Skip to main content

Agent with OpenAI + ElevenLabs

from dotenv import load_dotenv
from livekit.agents import Agent, AgentServer, AgentSession, JobContext
from livekit.plugins import deepgram, elevenlabs, openai, silero
import agenthuman

load_dotenv()

server = AgentServer()

@server.rtc_session()
async def entrypoint(ctx: JobContext):
    session = AgentSession(
        vad=silero.VAD.load(),
        stt=deepgram.STT(model="nova-3", language="multi"),
        llm=openai.LLM(model="gpt-4o-mini"),
        tts=elevenlabs.TTS(
            voice_id="cgSgspJ2msm6clMCkdW9",
            model="eleven_multilingual_v2"
        )
    )

    avatar = agenthuman.AvatarSession(
        avatar="avat_xxxxxxxxxxxxxxxxxxxxxxxx",
        aspect_ratio="3:4"
    )
    await avatar.start(session, room=ctx.room)

    await session.start(
        agent=Agent(instructions="You are a friendly voice assistant."),
        room=ctx.room
    )
    await session.generate_reply(instructions="Greet the user and ask about their day.")

if __name__ == "__main__":
    from livekit.agents import cli
    cli.run_app(server)

Agent with Cartesia TTS

from dotenv import load_dotenv
from livekit.agents import Agent, AgentServer, AgentSession, JobContext
from livekit.plugins import cartesia, openai, silero, deepgram
import agenthuman

load_dotenv()

server = AgentServer()

@server.rtc_session()
async def entrypoint(ctx: JobContext):
    session = AgentSession(
        vad=silero.VAD.load(),
        stt=deepgram.STT(model="nova-3"),
        llm=openai.LLM(model="gpt-4o-mini"),
        tts=cartesia.TTS(
            model="sonic-3",
            voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
        )
    )

    avatar = agenthuman.AvatarSession(
        avatar="avat_xxxxxxxxxxxxxxxxxxxxxxxx",
        aspect_ratio="4:3"
    )
    await avatar.start(session, room=ctx.room)

    await session.start(
        agent=Agent(instructions="You are a helpful customer support agent."),
        room=ctx.room
    )

if __name__ == "__main__":
    from livekit.agents import cli
    cli.run_app(server)

Using LiveKit Inference

from dotenv import load_dotenv
from livekit.agents import Agent, AgentServer, AgentSession, JobContext, inference
from livekit.plugins import silero
import agenthuman

load_dotenv()

server = AgentServer()

@server.rtc_session()
async def entrypoint(ctx: JobContext):
    session = AgentSession(
        vad=silero.VAD.load(),
        stt=inference.STT("deepgram/nova-3"),
        llm=inference.LLM("openai/gpt-4.1-mini"),
        tts=inference.TTS("cartesia/sonic-3"),
    )

    avatar = agenthuman.AvatarSession(
        avatar="avat_xxxxxxxxxxxxxxxxxxxxxxxx",
        aspect_ratio="4:3"
    )
    await avatar.start(session, room=ctx.room)

    await session.start(
        agent=Agent(instructions="You are a helpful assistant."),
        room=ctx.room
    )

if __name__ == "__main__":
    from livekit.agents import cli
    cli.run_app(server)