Bot with ElevenLabs TTS + OpenAI + Deepgram
A full Daily.co bot using ElevenLabs for speech synthesis, OpenAI for the LLM, and Deepgram for STT.Copy
import asyncio
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.daily.transport import DailyParams, DailyTransport
from agenthuman.api import NewSessionRequest
from agenthuman.video import AgentHumanVideoService
load_dotenv()
async def run_bot(transport):
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id="21m00Tcm4TlvDq8ikWAM",
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
settings=OpenAILLMService.Settings(
system_instruction="You are a friendly AI assistant. Respond naturally and keep your answers conversational.",
),
)
agentHuman = AgentHumanVideoService(
api_key=os.getenv("AGENTHUMAN_API_KEY"),
session_request=NewSessionRequest(
avatar="avat_01KMDFWB9SW1QX4TVP0RT1RFYQ"
),
transport=transport,
)
context = LLMContext()
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
)
pipeline = Pipeline([
transport.input(),
stt,
user_aggregator,
llm,
tts,
agentHuman,
transport.output(),
assistant_aggregator,
])
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info("Client connected")
context.add_message(
{"role": "user", "content": "Say hello and briefly introduce yourself."}
)
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info("Client disconnected")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
async def main():
transport = DailyTransport(
room_url=os.getenv("DAILY_ROOM_URL"),
token=os.getenv("DAILY_TOKEN"),
bot_name="AI Avatar",
params=DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
video_out_enabled=True,
video_out_is_live=True,
video_out_width=1200,
video_out_height=1200,
video_out_bitrate=2_000_000,
),
)
await run_bot(transport)
if __name__ == "__main__":
asyncio.run(main())
Bot with Cartesia TTS
Swap ElevenLabs for Cartesia — everything else stays the same.Copy
from pipecat.services.cartesia.tts import CartesiaTTSService
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
settings=CartesiaTTSService.Settings(
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
),
)
AgentHumanVideoService setup as above.
Portrait Avatar (Mobile Layout)
Use a3:4 aspect ratio for portrait video, suitable for mobile UIs.
Copy
transport = DailyTransport(
room_url=os.getenv("DAILY_ROOM_URL"),
token=os.getenv("DAILY_TOKEN"),
bot_name="AI Avatar",
params=DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
video_out_enabled=True,
video_out_is_live=True,
video_out_width=900,
video_out_height=1200, # portrait dimensions
),
)
agentHuman = AgentHumanVideoService(
api_key=os.getenv("AGENTHUMAN_API_KEY"),
session_request=NewSessionRequest(
avatar="avat_01KMDFWB9SW1QX4TVP0RT1RFYQ",
aspect_ratio="3:4", # explicit portrait ratio
),
transport=transport,
)
Using Pipecat Cloud / create_transport
When deploying to Pipecat Cloud or using the pipecat.runner helpers, use create_transport to handle transport creation from RunnerArguments:
Copy
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.base_transport import TransportParams
async def bot(runner_args: RunnerArguments):
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
video_out_enabled=True,
video_out_is_live=True,
video_out_width=1200,
video_out_height=1200,
video_out_bitrate=2_000_000,
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
video_out_enabled=True,
video_out_is_live=True,
video_out_width=1200,
video_out_height=1200,
),
}
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()