Skip to main content

Overview

This page provides complete, production-ready examples for integrating with the AgentHuman Avatar Video Streaming API. Each example demonstrates the full workflow from connection to video playback.

Browser Example (HTML + JavaScript)

A complete single-page application that connects to the avatar server and displays the talking avatar.
<!DOCTYPE html>
<html>
<head>
    <title>Agent Human Avatar Video Streaming</title>
    <script src="https://unpkg.com/@daily-co/daily-js"></script>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 50px auto;
            padding: 20px;
        }
        #daily-container { 
            width: 100%; 
            max-width: 640px;
            height: 480px;
            border: 2px solid #3c83f6;
            border-radius: 8px;
            margin-bottom: 20px;
        }
        button { 
            margin: 10px; 
            padding: 10px 20px;
            background: #3c83f6;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
        }
        button:disabled {
            background: #ccc;
            cursor: not-allowed;
        }
        button:hover:not(:disabled) {
            background: #2563eb;
        }
        #status {
            margin: 20px 0;
            padding: 10px;
            background: #f3f4f6;
            border-radius: 4px;
            font-family: monospace;
        }
    </style>
</head>
<body>
    <h1>Agent Human Talking Avatar</h1>
    <div id="daily-container"></div>
    <button id="connect-btn">Connect</button>
    <button id="send-audio" disabled>Send Audio</button>
    <button id="interrupt-btn" disabled>Interrupt</button>
    <button id="disconnect-btn" disabled>Disconnect</button>
    <div id="status">Status: Not connected</div>
    
    <script>
        // Get these from your backend after calling Create Session API
        const SESSION_ID = 'your-session-id';
        const ACCESS_TOKEN = 'your-access-token';
        const DAILY_ROOM_URL = 'https://agenthuman.daily.co/your-room-name';
        const DAILY_TOKEN = 'your-daily-token';
        const WS_URI = 'ws://your-ws-uri'; // From Start Session API (use the ws_uri value exactly)
        
        let ws = null;
        let callFrame = null;
        let sessionId = null;
        
        const statusEl = document.getElementById('status');
        const connectBtn = document.getElementById('connect-btn');
        const sendAudioBtn = document.getElementById('send-audio');
        const interruptBtn = document.getElementById('interrupt-btn');
        const disconnectBtn = document.getElementById('disconnect-btn');
        
        function updateStatus(msg) {
            statusEl.textContent = 'Status: ' + msg;
            console.log(msg);
        }
        
        connectBtn.onclick = async () => {
            try {
                // 1. Create Daily.co call frame
                updateStatus('Creating Daily.co room...');
                callFrame = DailyIframe.createFrame(
                    document.getElementById('daily-container'),
                    {
                        showLeaveButton: false,
                        iframeStyle: {
                            width: '100%',
                            height: '100%',
                        }
                    }
                );
                
                // 2. Set up Daily.co event handlers
                callFrame.on('joined-meeting', () => {
                    updateStatus('Joined Daily.co room, waiting for avatar...');
                });
                
                callFrame.on('participant-joined', (event) => {
                    updateStatus('Avatar joined! Ready to send audio');
                    sendAudioBtn.disabled = false;
                    interruptBtn.disabled = false;
                    disconnectBtn.disabled = false;
                    connectBtn.disabled = true;
                });
                
                callFrame.on('track-started', (event) => {
                    console.log('Track started:', event.track.kind);
                });
                
                callFrame.on('error', (error) => {
                    console.error('Daily.co error:', error);
                    updateStatus('Daily.co error: ' + error.errorMsg);
                });
                
                // 3. Join Daily.co room
                await callFrame.join({
                    url: DAILY_ROOM_URL,
                    token: DAILY_TOKEN
                });
                
                // 4. Connect to WebSocket for audio commands
                ws = new WebSocket(WS_URI);
                
                ws.onopen = () => {
                    updateStatus('WebSocket connected, initializing session...');
                    
                    // Initialize session with credentials + Daily room info
                    ws.send(JSON.stringify({
                        type: 'session.init',
                        config: {
                            session_id: SESSION_ID,
                            access_token: ACCESS_TOKEN,
                            room: {
                                platform: 'daily',
                                url: DAILY_ROOM_URL,
                                token: DAILY_TOKEN,
                                display_name: 'AI Avatar (AH)'
                            },
                            video_width: 1280,
                            video_height: 720
                        }
                    }));
                };
                
                ws.onmessage = (event) => {
                    const message = JSON.parse(event.data);
                    console.log('Received:', message);
                    
                    if (message.type === 'connection.established') {
                        sessionId = message.session_id;
                        updateStatus('Session established, waiting for avatar to join...');
                    } else if (message.type === 'agent.speak.confirmed') {
                        updateStatus('Audio queued (' + message.audio_samples + ' samples)');
                    } else if (message.type === 'agent.interrupt.confirmed') {
                        updateStatus('Playback interrupted');
                    } else if (message.type === 'error') {
                        updateStatus('Error: ' + message.error);
                    }
                };
                
                ws.onerror = (error) => {
                    updateStatus('WebSocket error');
                    console.error(error);
                };
                
                ws.onclose = () => {
                    updateStatus('WebSocket disconnected');
                    resetUI();
                };
                
            } catch (error) {
                updateStatus('Connection error: ' + error.message);
                console.error(error);
            }
        };
        
        sendAudioBtn.onclick = async () => {
            updateStatus('Loading audio file...');
            
            // Load audio file
            const response = await fetch('sample-audio.wav');
            const arrayBuffer = await response.arrayBuffer();
            
            // Create audio context to convert to raw PCM
            const audioContext = new AudioContext({ sampleRate: 48000 });
            const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
            
            // Get raw PCM data
            const channelData = audioBuffer.getChannelData(0); // Mono
            const samples = new Int16Array(channelData.length);
            
            // Convert float32 to int16
            for (let i = 0; i < channelData.length; i++) {
                const s = Math.max(-1, Math.min(1, channelData[i]));
                samples[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
            }
            
            // Encode to base64
            const base64Audio = btoa(
                String.fromCharCode.apply(null, new Uint8Array(samples.buffer))
            );
            
            ws.send(JSON.stringify({
                type: 'agent.speak',
                audio: base64Audio,
                sample_rate: 48000
            }));
            
            updateStatus('Audio sent for generation');
        };
        
        interruptBtn.onclick = () => {
            ws.send(JSON.stringify({
                type: 'agent.interrupt'
            }));
            updateStatus('Interrupt sent');
        };
        
        disconnectBtn.onclick = async () => {
            // End the session via REST from your backend (recommended)
            // e.g. await fetch(`/api/end-session?session_id=${SESSION_ID}`);
            
            // Leave Daily.co room
            if (callFrame) {
                await callFrame.leave();
                await callFrame.destroy();
                callFrame = null;
            }
            
            // Close WebSocket
            if (ws) {
                ws.close();
                ws = null;
            }
            
            resetUI();
        };
        
        function resetUI() {
            connectBtn.disabled = false;
            sendAudioBtn.disabled = true;
            interruptBtn.disabled = true;
            disconnectBtn.disabled = true;
        }
    </script>
</body>
</html>

Python Client Example

A complete Python client using asyncio and websockets. Video is received via Daily.co (use Daily’s Python SDK or web interface).
import asyncio
import json
import base64
import websockets
import numpy as np
import librosa

class AvatarClient:
    def __init__(
        self, 
        session_id: str,
        access_token: str,
        server_url: str,
        daily_room_url: str,
        daily_room_token: str,
        display_name: str = "AI Avatar (AH)",
        video_width: int = 1280,
        video_height: int = 720
    ):
        self.server_url = server_url
        self.session_id = session_id
        self.access_token = access_token
        self.daily_room_url = daily_room_url
        self.daily_room_token = daily_room_token
        self.display_name = display_name
        self.video_width = video_width
        self.video_height = video_height
        self.websocket = None
    
    async def connect(self):
        """Establish WebSocket connection and initialize session"""
        self.websocket = await websockets.connect(self.server_url)
        print(f"WebSocket connected to {self.server_url}")
        
        # Start session with credentials
        await self.websocket.send(json.dumps({
            'type': 'session.init',
            'config': {
                'session_id': self.session_id,
                'access_token': self.access_token,
                'room': {
                    'platform': 'daily',
                    'url': self.daily_room_url,
                    'token': self.daily_room_token,
                    'display_name': self.display_name
                },
                'video_width': self.video_width,
                'video_height': self.video_height
            }
        }))
        
        # Wait for connection established
        message = json.loads(await self.websocket.recv())
        if message['type'] == 'connection.established':
            print(f"Session established: {message['session_id']}")
            return True
        elif message['type'] == 'error':
            print(f"Connection error: {message['error']}")
            return False
        return False
    
    
    async def send_audio(self, audio_path: str):
        """Send audio file for video generation
        
        Args:
            audio_path: Path to audio file (will be converted to 48kHz mono PCM)
        """
        print(f"Loading audio: {audio_path}")
        
        # Load and convert audio to 48kHz mono
        audio, sr = librosa.load(audio_path, sr=48000, mono=True)
        
        # Convert to 16-bit PCM
        audio_int16 = (audio * 32768.0).astype(np.int16)
        audio_bytes = audio_int16.tobytes()
        audio_base64 = base64.b64encode(audio_bytes).decode()
        
        await self.websocket.send(json.dumps({
            'type': 'agent.speak',
            'audio': audio_base64,
            'sample_rate': 48000
        }))
        
        print(f"Sent audio ({len(audio)} samples, {len(audio_bytes)} bytes)")
        
        # Wait for confirmation
        message = json.loads(await self.websocket.recv())
        if message['type'] == 'agent.speak.confirmed':
            print(f"Audio confirmed: {message.get('audio_samples')} samples")
        elif message['type'] == 'error':
            print(f"Error: {message['error']}")
    
    async def interrupt(self):
        """Interrupt current playback"""
        await self.websocket.send(json.dumps({
            'type': 'agent.interrupt'
        }))
        
        print("Sent interrupt signal")
        
        # Wait for confirmation
        message = json.loads(await self.websocket.recv())
        if message['type'] == 'agent.interrupt.confirmed':
            print("Interrupt confirmed")
    
    async def listen_for_events(self, timeout: float = 60.0):
        """Listen for server events with timeout
        
        Args:
            timeout: How long to listen for events (seconds)
        """
        try:
            async with asyncio.timeout(timeout):
                async for message in self.websocket:
                    data = json.loads(message)
                    print(f"Event: {data['type']}")
                    
                    if data['type'] == 'error':
                        print(f"Error: {data.get('error')}")
        except asyncio.TimeoutError:
            print(f"Listening timeout after {timeout}s")
    
    async def close(self):
        """Close connection and clean up"""
        if self.websocket:
            await self.websocket.close()
            print("WebSocket closed")

# Usage example
async def main():
    # Your session credentials (from Create Session API)
    SESSION_ID = "your-session-id"
    ACCESS_TOKEN = "your-access-token"
    DAILY_ROOM_URL = "https://agenthuman.daily.co/your-room-name"
    DAILY_TOKEN = "your-daily-token"
    WS_URI = "ws://your-ws-uri"  # From Start Session API (use the ws_uri value exactly)
    
    # Note: Video is displayed via Daily.co room
    # Use Daily's Python SDK or web interface to view the avatar
    # The client just sends audio commands via WebSocket
    
    client = AvatarClient(
        session_id=SESSION_ID,
        access_token=ACCESS_TOKEN,
        server_url=WS_URI,
        daily_room_url=DAILY_ROOM_URL,
        daily_room_token=DAILY_TOKEN
    )
    
    try:
        # Connect and initialize session
        if await client.connect():
            # Send audio for video generation
            # Video will appear in the Daily.co room automatically
            await client.send_audio("speech.wav")
            
            # Listen for events (optional)
            await client.listen_for_events(timeout=30.0)
    finally:
        await client.close()

if __name__ == "__main__":
    asyncio.run(main())

Node.js Client Example

A complete Node.js client for server-side applications. Video is streamed via Daily.co room (use Daily’s SDK or web interface to view).
// Save as: avatar-client.js
// Install: npm install ws

const WebSocket = require('ws');
const fs = require('fs');
const { spawn } = require('child_process');

class AvatarClient {
    constructor(sessionId, accessToken, serverUrl, dailyRoomUrl, dailyToken, displayName = 'AI Avatar (AH)', videoWidth = 1280, videoHeight = 720) {
        this.sessionId = sessionId;
        this.accessToken = accessToken;
        this.serverUrl = serverUrl;
        this.dailyRoomUrl = dailyRoomUrl;
        this.dailyToken = dailyToken;
        this.displayName = displayName;
        this.videoWidth = videoWidth;
        this.videoHeight = videoHeight;
        this.ws = null;
    }

    async connect() {
        return new Promise((resolve, reject) => {
            this.ws = new WebSocket(this.serverUrl);

            this.ws.on('open', () => {
                console.log('WebSocket connected');
                
                // Initialize session with credentials
                this.ws.send(JSON.stringify({
                    type: 'session.init',
                    config: {
                        session_id: this.sessionId,
                        access_token: this.accessToken,
                        room: {
                            platform: 'daily',
                            url: this.dailyRoomUrl,
                            token: this.dailyToken,
                            display_name: this.displayName
                        },
                        video_width: this.videoWidth,
                        video_height: this.videoHeight
                    }
                }));
            });

            this.ws.on('message', async (data) => {
                const message = JSON.parse(data);
                console.log('Received:', message.type);

                if (message.type === 'connection.established') {
                    console.log('Session established:', message.session_id);
                    resolve(true);
                } else if (message.type === 'agent.speak.confirmed') {
                    console.log('Audio confirmed:', message.audio_samples, 'samples');
                } else if (message.type === 'agent.interrupt.confirmed') {
                    console.log('Interrupt confirmed');
                } else if (message.type === 'error') {
                    console.error('Server error:', message.error);
                    reject(new Error(message.error));
                }
            });

            this.ws.on('error', (error) => {
                console.error('WebSocket error:', error);
                reject(error);
            });
        });
    }

    async sendAudio(audioPath) {
        // Convert audio to 48kHz mono PCM using ffmpeg
        const pcmData = await this.convertAudioToPCM(audioPath);
        
        // Convert to base64
        const base64Audio = pcmData.toString('base64');

        this.ws.send(JSON.stringify({
            type: 'agent.speak',
            audio: base64Audio,
            sample_rate: 48000
        }));

        console.log(`Sent audio: ${audioPath} (${pcmData.length} bytes)`);
    }

    convertAudioToPCM(audioPath) {
        return new Promise((resolve, reject) => {
            const chunks = [];
            
            // Use ffmpeg to convert to 48kHz, mono, s16le PCM
            const ffmpeg = spawn('ffmpeg', [
                '-i', audioPath,
                '-ar', '48000',      // 48kHz sample rate
                '-ac', '1',          // Mono
                '-f', 's16le',       // 16-bit signed little-endian
                '-'                  // Output to stdout
            ]);

            ffmpeg.stdout.on('data', (chunk) => {
                chunks.push(chunk);
            });

            ffmpeg.stderr.on('data', (data) => {
                // ffmpeg outputs to stderr, ignore it
            });

            ffmpeg.on('close', (code) => {
                if (code === 0) {
                    resolve(Buffer.concat(chunks));
                } else {
                    reject(new Error(`ffmpeg exited with code ${code}`));
                }
            });

            ffmpeg.on('error', reject);
        });
    }

    async interrupt() {
        this.ws.send(JSON.stringify({
            type: 'agent.interrupt'
        }));

        console.log('Interrupt sent');
    }

    async close() {
        if (this.ws) {
            this.ws.close();
        }

        console.log('Client closed');
    }
}

// Usage example
async function main() {
    // Get these from your backend after calling Create Session API
    const SESSION_ID = 'your-session-id';
    const ACCESS_TOKEN = 'your-access-token';
    const DAILY_ROOM_URL = 'https://agenthuman.daily.co/your-room-name';
    const DAILY_TOKEN = 'your-daily-token';
    const WS_URI = 'ws://your-ws-uri'; // From Start Session API (use the ws_uri value exactly)
    
    // Note: Video is displayed via Daily.co room
    // Use Daily's Node.js SDK or web interface to view the avatar
    // This client just sends audio commands via WebSocket

    const client = new AvatarClient(SESSION_ID, ACCESS_TOKEN, WS_URI, DAILY_ROOM_URL, DAILY_TOKEN);

    try {
        await client.connect();
        
        // Send audio file - video will appear in Daily.co room
        await client.sendAudio('speech.wav');
        
        // Wait a bit for video generation
        await new Promise(resolve => setTimeout(resolve, 10000));
        
    } catch (error) {
        console.error('Error:', error);
    } finally {
        await client.close();
    }
}

// Run with: node avatar-client.js
if (require.main === module) {
    main().catch(console.error);
}

module.exports = AvatarClient;

Quick Start Guide

Step 1: Get Your Credentials

First, create a session using the Create Session endpoint to get your session_id and access_token.

Step 2: Choose Your Platform

Select the example that matches your platform:
  • Browser: Use the HTML/JavaScript example
  • Python: Use the Python client with asyncio
  • Node.js: Use the Node.js client

Step 3: Update Credentials

Replace the placeholders in your chosen example:
const SESSION_ID = 'your-session-id';  // From Create Session API
const ACCESS_TOKEN = 'your-access-token';  // From Create Session API

Step 4: Prepare Audio

Ensure you have an audio file (WAV format recommended) to test with. The examples convert audio to 48kHz, 16-bit, mono PCM and include sample_rate: 48000 when sending agent.speak.

Step 5: Run and Test

  • Browser: Open the HTML file in a modern browser (requires Daily.co SDK loaded)
  • Python: Run python client.py (join Daily.co room separately to view video)
  • Node.js: Run node avatar-client.js (join Daily.co room separately to view video)