Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion livekit-agents/livekit/agents/voice/room_io/_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
*,
sample_rate: int,
num_channels: int,
frame_size_ms: int = 50,
track_publish_options: rtc.TrackPublishOptions,
track_name: str = "roomio_audio",
) -> None:
Expand All @@ -49,7 +50,7 @@ def __init__(

self._audio_buf = utils.aio.Chan[rtc.AudioFrame]()
self._audio_bstream = utils.audio.AudioByteStream(
sample_rate, num_channels, samples_per_channel=sample_rate // 20, progressive=True
sample_rate, num_channels, samples_per_channel=sample_rate * frame_size_ms // 1000, progressive=True
)

self._flush_task: asyncio.Task[None] | None = None
Expand Down
1 change: 1 addition & 0 deletions livekit-agents/livekit/agents/voice/room_io/room_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ async def start(self) -> None:
self._room,
sample_rate=output_audio_options.sample_rate,
num_channels=output_audio_options.num_channels,
frame_size_ms=output_audio_options.frame_size_ms,
track_publish_options=output_audio_options.track_publish_options,
track_name=(
output_audio_options.track_name
Expand Down
2 changes: 2 additions & 0 deletions livekit-agents/livekit/agents/voice/room_io/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class VideoInputOptions:
class AudioOutputOptions:
sample_rate: int = 24000
num_channels: int = 1
frame_size_ms: int = 50
"""The frame size in milliseconds for audio output frames."""
track_publish_options: rtc.TrackPublishOptions = field(
default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,48 +1,50 @@
from __future__ import annotations

import asyncio
import base64
import json
import os
import time
import typing
import weakref
from collections.abc import AsyncIterable
from dataclasses import dataclass, field
from typing import Literal

from livekit import rtc
from livekit.agents import llm, utils
from livekit.agents.voice.room_io import AudioOutputOptions
from livekit.agents.types import (
DEFAULT_API_CONNECT_OPTIONS,
NOT_GIVEN,
APIConnectOptions,
NotGivenOr,
)
from livekit.agents.utils import audio as audio_utils, is_given
from phonic import AsyncPhonic
from phonic.conversations.socket_client import (
AsyncConversationsSocketClient,
)
from phonic.types import (
AddSystemMessagePayload,
AudioChunkPayload,
AudioChunkResponsePayload,
ConfigPayload,
GenerateReplyPayload,
InputTextPayload,
SayPayload,
ToolCallInterruptedPayload,
ToolCallOutputPayload,
ToolCallPayload,
)

from ..log import logger

Check failure on line 41 in livekit-plugins/livekit-plugins-phonic/livekit/plugins/phonic/realtime/realtime_model.py

View workflow job for this annotation

GitHub Actions / ruff

ruff (I001)

livekit-plugins/livekit-plugins-phonic/livekit/plugins/phonic/realtime/realtime_model.py:1:1: I001 Import block is un-sorted or un-formatted help: Organize imports

PHONIC_INPUT_SAMPLE_RATE = 44100
PHONIC_OUTPUT_SAMPLE_RATE = 44100
PHONIC_NUM_CHANNELS = 1
PHONIC_INPUT_FRAME_MS = 20
PHONIC_OUTPUT_FRAME_MS = 20
WS_CLOSE_NORMAL = 1000
TOOL_CALL_OUTPUT_TIMEOUT_MS = 60000

Expand Down Expand Up @@ -221,6 +223,26 @@
self._sessions.add(sess)
return sess

@staticmethod
def audio_output_options(**kwargs: typing.Any) -> AudioOutputOptions:
"""Return ``AudioOutputOptions`` tuned for Phonic: 44100 Hz sample rate and 20 ms frames.

Using these options with ``RoomIO`` avoids resampling (Phonic natively outputs 44100 Hz)
and matches the frame size to Phonic's 20 ms audio chunks, minimising buffer starvation.

Example::

model = phonic.realtime.RealtimeModel(...)
room_io = RoomIO(session, room=ctx.room, options=RoomOptions(
audio_output=model.audio_output_options()
))
"""
return AudioOutputOptions(
sample_rate=PHONIC_OUTPUT_SAMPLE_RATE,
frame_size_ms=PHONIC_OUTPUT_FRAME_MS,
**kwargs,
)

def update_options(
self,
) -> None:
Expand Down
Loading