Overview
This page provides complete, production-ready examples for integrating with the AgentHuman Avatar Video Streaming API. Each example demonstrates the full workflow from connection to video playback.Browser Example (HTML + JavaScript)
A complete single-page application that connects to the avatar server and displays the talking avatar.Copy
<!DOCTYPE html>
<html>
<head>
<title>Agent Human Avatar Video Streaming</title>
<script src="https://unpkg.com/@daily-co/daily-js"></script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 50px auto;
padding: 20px;
}
#daily-container {
width: 100%;
max-width: 640px;
height: 480px;
border: 2px solid #3c83f6;
border-radius: 8px;
margin-bottom: 20px;
}
button {
margin: 10px;
padding: 10px 20px;
background: #3c83f6;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
button:hover:not(:disabled) {
background: #2563eb;
}
#status {
margin: 20px 0;
padding: 10px;
background: #f3f4f6;
border-radius: 4px;
font-family: monospace;
}
</style>
</head>
<body>
<h1>Agent Human Talking Avatar</h1>
<div id="daily-container"></div>
<button id="connect-btn">Connect</button>
<button id="send-audio" disabled>Send Audio</button>
<button id="interrupt-btn" disabled>Interrupt</button>
<button id="disconnect-btn" disabled>Disconnect</button>
<div id="status">Status: Not connected</div>
<script>
// Get these from your backend after calling Create Session API
const SESSION_ID = 'your-session-id';
const ACCESS_TOKEN = 'your-access-token';
const DAILY_ROOM_URL = 'https://agenthuman.daily.co/your-room-name';
const DAILY_TOKEN = 'your-daily-token';
const WS_URI = 'ws://your-ws-uri'; // From Start Session API (use the ws_uri value exactly)
let ws = null;
let callFrame = null;
let sessionId = null;
const statusEl = document.getElementById('status');
const connectBtn = document.getElementById('connect-btn');
const sendAudioBtn = document.getElementById('send-audio');
const interruptBtn = document.getElementById('interrupt-btn');
const disconnectBtn = document.getElementById('disconnect-btn');
function updateStatus(msg) {
statusEl.textContent = 'Status: ' + msg;
console.log(msg);
}
connectBtn.onclick = async () => {
try {
// 1. Create Daily.co call frame
updateStatus('Creating Daily.co room...');
callFrame = DailyIframe.createFrame(
document.getElementById('daily-container'),
{
showLeaveButton: false,
iframeStyle: {
width: '100%',
height: '100%',
}
}
);
// 2. Set up Daily.co event handlers
callFrame.on('joined-meeting', () => {
updateStatus('Joined Daily.co room, waiting for avatar...');
});
callFrame.on('participant-joined', (event) => {
updateStatus('Avatar joined! Ready to send audio');
sendAudioBtn.disabled = false;
interruptBtn.disabled = false;
disconnectBtn.disabled = false;
connectBtn.disabled = true;
});
callFrame.on('track-started', (event) => {
console.log('Track started:', event.track.kind);
});
callFrame.on('error', (error) => {
console.error('Daily.co error:', error);
updateStatus('Daily.co error: ' + error.errorMsg);
});
// 3. Join Daily.co room
await callFrame.join({
url: DAILY_ROOM_URL,
token: DAILY_TOKEN
});
// 4. Connect to WebSocket for audio commands
ws = new WebSocket(WS_URI);
ws.onopen = () => {
updateStatus('WebSocket connected, initializing session...');
// Initialize session with credentials + Daily room info
ws.send(JSON.stringify({
type: 'session.init',
config: {
session_id: SESSION_ID,
access_token: ACCESS_TOKEN,
room: {
platform: 'daily',
url: DAILY_ROOM_URL,
token: DAILY_TOKEN,
display_name: 'AI Avatar (AH)'
},
video_width: 1280,
video_height: 720
}
}));
};
ws.onmessage = (event) => {
const message = JSON.parse(event.data);
console.log('Received:', message);
if (message.type === 'connection.established') {
sessionId = message.session_id;
updateStatus('Session established, waiting for avatar to join...');
} else if (message.type === 'agent.speak.confirmed') {
updateStatus('Audio queued (' + message.audio_samples + ' samples)');
} else if (message.type === 'agent.interrupt.confirmed') {
updateStatus('Playback interrupted');
} else if (message.type === 'error') {
updateStatus('Error: ' + message.error);
}
};
ws.onerror = (error) => {
updateStatus('WebSocket error');
console.error(error);
};
ws.onclose = () => {
updateStatus('WebSocket disconnected');
resetUI();
};
} catch (error) {
updateStatus('Connection error: ' + error.message);
console.error(error);
}
};
sendAudioBtn.onclick = async () => {
updateStatus('Loading audio file...');
// Load audio file
const response = await fetch('sample-audio.wav');
const arrayBuffer = await response.arrayBuffer();
// Create audio context to convert to raw PCM
const audioContext = new AudioContext({ sampleRate: 48000 });
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
// Get raw PCM data
const channelData = audioBuffer.getChannelData(0); // Mono
const samples = new Int16Array(channelData.length);
// Convert float32 to int16
for (let i = 0; i < channelData.length; i++) {
const s = Math.max(-1, Math.min(1, channelData[i]));
samples[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
// Encode to base64
const base64Audio = btoa(
String.fromCharCode.apply(null, new Uint8Array(samples.buffer))
);
ws.send(JSON.stringify({
type: 'agent.speak',
audio: base64Audio,
sample_rate: 48000
}));
updateStatus('Audio sent for generation');
};
interruptBtn.onclick = () => {
ws.send(JSON.stringify({
type: 'agent.interrupt'
}));
updateStatus('Interrupt sent');
};
disconnectBtn.onclick = async () => {
// End the session via REST from your backend (recommended)
// e.g. await fetch(`/api/end-session?session_id=${SESSION_ID}`);
// Leave Daily.co room
if (callFrame) {
await callFrame.leave();
await callFrame.destroy();
callFrame = null;
}
// Close WebSocket
if (ws) {
ws.close();
ws = null;
}
resetUI();
};
function resetUI() {
connectBtn.disabled = false;
sendAudioBtn.disabled = true;
interruptBtn.disabled = true;
disconnectBtn.disabled = true;
}
</script>
</body>
</html>
Python Client Example
A complete Python client usingasyncio and websockets. Video is received via Daily.co (use Daily’s Python SDK or web interface).
Copy
import asyncio
import json
import base64
import websockets
import numpy as np
import librosa
class AvatarClient:
def __init__(
self,
session_id: str,
access_token: str,
server_url: str,
daily_room_url: str,
daily_room_token: str,
display_name: str = "AI Avatar (AH)",
video_width: int = 1280,
video_height: int = 720
):
self.server_url = server_url
self.session_id = session_id
self.access_token = access_token
self.daily_room_url = daily_room_url
self.daily_room_token = daily_room_token
self.display_name = display_name
self.video_width = video_width
self.video_height = video_height
self.websocket = None
async def connect(self):
"""Establish WebSocket connection and initialize session"""
self.websocket = await websockets.connect(self.server_url)
print(f"WebSocket connected to {self.server_url}")
# Start session with credentials
await self.websocket.send(json.dumps({
'type': 'session.init',
'config': {
'session_id': self.session_id,
'access_token': self.access_token,
'room': {
'platform': 'daily',
'url': self.daily_room_url,
'token': self.daily_room_token,
'display_name': self.display_name
},
'video_width': self.video_width,
'video_height': self.video_height
}
}))
# Wait for connection established
message = json.loads(await self.websocket.recv())
if message['type'] == 'connection.established':
print(f"Session established: {message['session_id']}")
return True
elif message['type'] == 'error':
print(f"Connection error: {message['error']}")
return False
return False
async def send_audio(self, audio_path: str):
"""Send audio file for video generation
Args:
audio_path: Path to audio file (will be converted to 48kHz mono PCM)
"""
print(f"Loading audio: {audio_path}")
# Load and convert audio to 48kHz mono
audio, sr = librosa.load(audio_path, sr=48000, mono=True)
# Convert to 16-bit PCM
audio_int16 = (audio * 32768.0).astype(np.int16)
audio_bytes = audio_int16.tobytes()
audio_base64 = base64.b64encode(audio_bytes).decode()
await self.websocket.send(json.dumps({
'type': 'agent.speak',
'audio': audio_base64,
'sample_rate': 48000
}))
print(f"Sent audio ({len(audio)} samples, {len(audio_bytes)} bytes)")
# Wait for confirmation
message = json.loads(await self.websocket.recv())
if message['type'] == 'agent.speak.confirmed':
print(f"Audio confirmed: {message.get('audio_samples')} samples")
elif message['type'] == 'error':
print(f"Error: {message['error']}")
async def interrupt(self):
"""Interrupt current playback"""
await self.websocket.send(json.dumps({
'type': 'agent.interrupt'
}))
print("Sent interrupt signal")
# Wait for confirmation
message = json.loads(await self.websocket.recv())
if message['type'] == 'agent.interrupt.confirmed':
print("Interrupt confirmed")
async def listen_for_events(self, timeout: float = 60.0):
"""Listen for server events with timeout
Args:
timeout: How long to listen for events (seconds)
"""
try:
async with asyncio.timeout(timeout):
async for message in self.websocket:
data = json.loads(message)
print(f"Event: {data['type']}")
if data['type'] == 'error':
print(f"Error: {data.get('error')}")
except asyncio.TimeoutError:
print(f"Listening timeout after {timeout}s")
async def close(self):
"""Close connection and clean up"""
if self.websocket:
await self.websocket.close()
print("WebSocket closed")
# Usage example
async def main():
# Your session credentials (from Create Session API)
SESSION_ID = "your-session-id"
ACCESS_TOKEN = "your-access-token"
DAILY_ROOM_URL = "https://agenthuman.daily.co/your-room-name"
DAILY_TOKEN = "your-daily-token"
WS_URI = "ws://your-ws-uri" # From Start Session API (use the ws_uri value exactly)
# Note: Video is displayed via Daily.co room
# Use Daily's Python SDK or web interface to view the avatar
# The client just sends audio commands via WebSocket
client = AvatarClient(
session_id=SESSION_ID,
access_token=ACCESS_TOKEN,
server_url=WS_URI,
daily_room_url=DAILY_ROOM_URL,
daily_room_token=DAILY_TOKEN
)
try:
# Connect and initialize session
if await client.connect():
# Send audio for video generation
# Video will appear in the Daily.co room automatically
await client.send_audio("speech.wav")
# Listen for events (optional)
await client.listen_for_events(timeout=30.0)
finally:
await client.close()
if __name__ == "__main__":
asyncio.run(main())
Node.js Client Example
A complete Node.js client for server-side applications. Video is streamed via Daily.co room (use Daily’s SDK or web interface to view).Copy
// Save as: avatar-client.js
// Install: npm install ws
const WebSocket = require('ws');
const fs = require('fs');
const { spawn } = require('child_process');
class AvatarClient {
constructor(sessionId, accessToken, serverUrl, dailyRoomUrl, dailyToken, displayName = 'AI Avatar (AH)', videoWidth = 1280, videoHeight = 720) {
this.sessionId = sessionId;
this.accessToken = accessToken;
this.serverUrl = serverUrl;
this.dailyRoomUrl = dailyRoomUrl;
this.dailyToken = dailyToken;
this.displayName = displayName;
this.videoWidth = videoWidth;
this.videoHeight = videoHeight;
this.ws = null;
}
async connect() {
return new Promise((resolve, reject) => {
this.ws = new WebSocket(this.serverUrl);
this.ws.on('open', () => {
console.log('WebSocket connected');
// Initialize session with credentials
this.ws.send(JSON.stringify({
type: 'session.init',
config: {
session_id: this.sessionId,
access_token: this.accessToken,
room: {
platform: 'daily',
url: this.dailyRoomUrl,
token: this.dailyToken,
display_name: this.displayName
},
video_width: this.videoWidth,
video_height: this.videoHeight
}
}));
});
this.ws.on('message', async (data) => {
const message = JSON.parse(data);
console.log('Received:', message.type);
if (message.type === 'connection.established') {
console.log('Session established:', message.session_id);
resolve(true);
} else if (message.type === 'agent.speak.confirmed') {
console.log('Audio confirmed:', message.audio_samples, 'samples');
} else if (message.type === 'agent.interrupt.confirmed') {
console.log('Interrupt confirmed');
} else if (message.type === 'error') {
console.error('Server error:', message.error);
reject(new Error(message.error));
}
});
this.ws.on('error', (error) => {
console.error('WebSocket error:', error);
reject(error);
});
});
}
async sendAudio(audioPath) {
// Convert audio to 48kHz mono PCM using ffmpeg
const pcmData = await this.convertAudioToPCM(audioPath);
// Convert to base64
const base64Audio = pcmData.toString('base64');
this.ws.send(JSON.stringify({
type: 'agent.speak',
audio: base64Audio,
sample_rate: 48000
}));
console.log(`Sent audio: ${audioPath} (${pcmData.length} bytes)`);
}
convertAudioToPCM(audioPath) {
return new Promise((resolve, reject) => {
const chunks = [];
// Use ffmpeg to convert to 48kHz, mono, s16le PCM
const ffmpeg = spawn('ffmpeg', [
'-i', audioPath,
'-ar', '48000', // 48kHz sample rate
'-ac', '1', // Mono
'-f', 's16le', // 16-bit signed little-endian
'-' // Output to stdout
]);
ffmpeg.stdout.on('data', (chunk) => {
chunks.push(chunk);
});
ffmpeg.stderr.on('data', (data) => {
// ffmpeg outputs to stderr, ignore it
});
ffmpeg.on('close', (code) => {
if (code === 0) {
resolve(Buffer.concat(chunks));
} else {
reject(new Error(`ffmpeg exited with code ${code}`));
}
});
ffmpeg.on('error', reject);
});
}
async interrupt() {
this.ws.send(JSON.stringify({
type: 'agent.interrupt'
}));
console.log('Interrupt sent');
}
async close() {
if (this.ws) {
this.ws.close();
}
console.log('Client closed');
}
}
// Usage example
async function main() {
// Get these from your backend after calling Create Session API
const SESSION_ID = 'your-session-id';
const ACCESS_TOKEN = 'your-access-token';
const DAILY_ROOM_URL = 'https://agenthuman.daily.co/your-room-name';
const DAILY_TOKEN = 'your-daily-token';
const WS_URI = 'ws://your-ws-uri'; // From Start Session API (use the ws_uri value exactly)
// Note: Video is displayed via Daily.co room
// Use Daily's Node.js SDK or web interface to view the avatar
// This client just sends audio commands via WebSocket
const client = new AvatarClient(SESSION_ID, ACCESS_TOKEN, WS_URI, DAILY_ROOM_URL, DAILY_TOKEN);
try {
await client.connect();
// Send audio file - video will appear in Daily.co room
await client.sendAudio('speech.wav');
// Wait a bit for video generation
await new Promise(resolve => setTimeout(resolve, 10000));
} catch (error) {
console.error('Error:', error);
} finally {
await client.close();
}
}
// Run with: node avatar-client.js
if (require.main === module) {
main().catch(console.error);
}
module.exports = AvatarClient;
Quick Start Guide
Step 1: Get Your Credentials
First, create a session using the Create Session endpoint to get yoursession_id and access_token.
Step 2: Choose Your Platform
Select the example that matches your platform:- Browser: Use the HTML/JavaScript example
- Python: Use the Python client with
asyncio - Node.js: Use the Node.js client
Step 3: Update Credentials
Replace the placeholders in your chosen example:Copy
const SESSION_ID = 'your-session-id'; // From Create Session API
const ACCESS_TOKEN = 'your-access-token'; // From Create Session API
Step 4: Prepare Audio
Ensure you have an audio file (WAV format recommended) to test with. The examples convert audio to 48kHz, 16-bit, mono PCM and includesample_rate: 48000 when sending agent.speak.
Step 5: Run and Test
- Browser: Open the HTML file in a modern browser (requires Daily.co SDK loaded)
- Python: Run
python client.py(join Daily.co room separately to view video) - Node.js: Run
node avatar-client.js(join Daily.co room separately to view video)