Skip to content

Commit 7ae0d65

Browse files
committed
added cambai tts integration
1 parent 24082b8 commit 7ae0d65

7 files changed

Lines changed: 1300 additions & 1 deletion

File tree

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
#
2+
# Copyright (c) 2024–2025, Daily
3+
#
4+
# SPDX-License-Identifier: BSD 2-Clause License
5+
#
6+
7+
"""Camb.ai MARS-8 TTS example with interruption handling.
8+
9+
This example demonstrates:
10+
- Basic TTS synthesis with Camb.ai MARS-8
11+
- Voice selection
12+
- Speed control
13+
- Handling interruptions
14+
15+
Requirements:
16+
- CAMB_API_KEY environment variable
17+
- OPENAI_API_KEY environment variable (for LLM)
18+
- DEEPGRAM_API_KEY environment variable (for STT)
19+
20+
Usage:
21+
export CAMB_API_KEY=your_camb_api_key
22+
export OPENAI_API_KEY=your_openai_api_key
23+
export DEEPGRAM_API_KEY=your_deepgram_api_key
24+
python 07za-interruptible-camb.py --transport daily
25+
26+
For more information:
27+
- Camb.ai API docs: https://camb.mintlify.app/
28+
- Pipecat docs: https://docs.pipecat.ai/
29+
"""
30+
31+
import os
32+
33+
import aiohttp
34+
from dotenv import load_dotenv
35+
from loguru import logger
36+
37+
from pipecat.audio.vad.silero import SileroVADAnalyzer
38+
from pipecat.audio.vad.vad_analyzer import VADParams
39+
from pipecat.frames.frames import LLMRunFrame
40+
from pipecat.pipeline.pipeline import Pipeline
41+
from pipecat.pipeline.runner import PipelineRunner
42+
from pipecat.pipeline.task import PipelineParams, PipelineTask
43+
from pipecat.processors.aggregators.llm_context import LLMContext
44+
from pipecat.processors.aggregators.llm_response_universal import (
45+
LLMContextAggregatorPair,
46+
)
47+
from pipecat.runner.types import RunnerArguments
48+
from pipecat.runner.utils import create_transport
49+
from pipecat.services.camb.tts import CambTTSService
50+
from pipecat.services.deepgram.stt import DeepgramSTTService
51+
from pipecat.services.openai.llm import OpenAILLMService
52+
from pipecat.transports.base_transport import BaseTransport, TransportParams
53+
from pipecat.transports.daily.transport import DailyParams
54+
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
55+
56+
load_dotenv(override=True)
57+
58+
59+
# Transport configuration for different platforms
60+
transport_params = {
61+
"daily": lambda: DailyParams(
62+
audio_in_enabled=True,
63+
audio_out_enabled=True,
64+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
65+
),
66+
"twilio": lambda: FastAPIWebsocketParams(
67+
audio_in_enabled=True,
68+
audio_out_enabled=True,
69+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
70+
),
71+
"webrtc": lambda: TransportParams(
72+
audio_in_enabled=True,
73+
audio_out_enabled=True,
74+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
75+
),
76+
}
77+
78+
79+
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
80+
"""Run the bot with Camb.ai TTS.
81+
82+
Args:
83+
transport: The transport to use for audio I/O.
84+
runner_args: Runner arguments from the CLI.
85+
"""
86+
logger.info("Starting Camb.ai TTS bot")
87+
88+
# Create an HTTP session for the TTS service
89+
async with aiohttp.ClientSession() as session:
90+
# Initialize Deepgram STT for speech recognition
91+
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
92+
93+
# Initialize Camb.ai TTS with MARS-8-flash model (fastest)
94+
tts = CambTTSService(
95+
api_key=os.getenv("CAMB_API_KEY"),
96+
aiohttp_session=session,
97+
voice_id=2681, # Attic voice (default)
98+
model="mars-8-flash", # Fast inference model
99+
params=CambTTSService.InputParams(
100+
speed=1.0, # Normal speed (0.5-2.0 range)
101+
),
102+
)
103+
104+
# Initialize OpenAI LLM
105+
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
106+
107+
# System prompt for the assistant
108+
messages = [
109+
{
110+
"role": "system",
111+
"content": """You are a helpful voice assistant powered by Camb.ai's MARS-8
112+
text-to-speech technology. Your goal is to have natural conversations and demonstrate
113+
high-quality speech synthesis. Keep your responses concise and conversational since
114+
they will be spoken aloud. Avoid special characters, emojis, or bullet points that
115+
can't easily be spoken.""",
116+
},
117+
]
118+
119+
# Set up context management
120+
context = LLMContext(messages)
121+
context_aggregator = LLMContextAggregatorPair(context)
122+
123+
# Build the pipeline
124+
pipeline = Pipeline(
125+
[
126+
transport.input(), # Transport user input
127+
stt, # Speech-to-text
128+
context_aggregator.user(), # User context aggregation
129+
llm, # Language model
130+
tts, # Camb.ai TTS
131+
transport.output(), # Transport bot output
132+
context_aggregator.assistant(), # Assistant context aggregation
133+
]
134+
)
135+
136+
# Create the pipeline task
137+
task = PipelineTask(
138+
pipeline,
139+
params=PipelineParams(
140+
enable_metrics=True,
141+
enable_usage_metrics=True,
142+
),
143+
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
144+
)
145+
146+
@transport.event_handler("on_client_connected")
147+
async def on_client_connected(transport, client):
148+
logger.info("Client connected")
149+
# Start the conversation with a greeting
150+
messages.append(
151+
{
152+
"role": "system",
153+
"content": "Please introduce yourself briefly and ask how you can help.",
154+
}
155+
)
156+
await task.queue_frames([LLMRunFrame()])
157+
158+
@transport.event_handler("on_client_disconnected")
159+
async def on_client_disconnected(transport, client):
160+
logger.info("Client disconnected")
161+
await task.cancel()
162+
163+
# Run the pipeline
164+
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
165+
await runner.run(task)
166+
167+
168+
async def bot(runner_args: RunnerArguments):
169+
"""Main bot entry point compatible with Pipecat Cloud.
170+
171+
Args:
172+
runner_args: Arguments passed from the runner.
173+
"""
174+
transport = await create_transport(runner_args, transport_params)
175+
await run_bot(transport, runner_args)
176+
177+
178+
async def list_available_voices():
179+
"""Helper function to list available Camb.ai voices.
180+
181+
Run this to see what voices are available for your API key.
182+
"""
183+
async with aiohttp.ClientSession() as session:
184+
voices = await CambTTSService.list_voices(
185+
api_key=os.getenv("CAMB_API_KEY"),
186+
aiohttp_session=session,
187+
)
188+
print("\nAvailable Camb.ai voices:")
189+
print("-" * 50)
190+
for voice in voices:
191+
print(f" ID: {voice['id']}, Name: {voice['name']}, Gender: {voice['gender']}")
192+
print("-" * 50)
193+
print(f"Total: {len(voices)} voices\n")
194+
195+
196+
if __name__ == "__main__":
197+
import sys
198+
199+
# If --list-voices flag is passed, list voices and exit
200+
if "--list-voices" in sys.argv:
201+
import asyncio
202+
203+
asyncio.run(list_available_voices())
204+
else:
205+
from pipecat.runner.run import main
206+
207+
main()
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/usr/bin/env python3
2+
"""Quick test script to verify Camb.ai TTS integration works.
3+
4+
Usage:
5+
export CAMB_API_KEY=your_api_key
6+
python test_camb_quick.py
7+
"""
8+
9+
import asyncio
10+
import os
11+
import sys
12+
13+
# Add the src directory to the path so we can import the module
14+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
15+
16+
import aiohttp
17+
from dotenv import load_dotenv
18+
19+
load_dotenv()
20+
21+
22+
async def test_list_voices():
23+
"""Test listing available voices."""
24+
from pipecat.services.camb.tts import CambTTSService
25+
26+
api_key = os.getenv("CAMB_API_KEY")
27+
if not api_key:
28+
print("ERROR: CAMB_API_KEY environment variable not set!")
29+
return False
30+
31+
print("\n1. Testing list_voices()...")
32+
async with aiohttp.ClientSession() as session:
33+
try:
34+
voices = await CambTTSService.list_voices(
35+
api_key=api_key,
36+
aiohttp_session=session,
37+
)
38+
print(f" SUCCESS: Found {len(voices)} voices")
39+
if voices:
40+
print(f" First voice: ID={voices[0]['id']}, Name={voices[0]['name']}")
41+
return True
42+
except Exception as e:
43+
print(f" FAILED: {e}")
44+
import traceback
45+
traceback.print_exc()
46+
return False
47+
48+
49+
async def test_tts_synthesis():
50+
"""Test basic TTS synthesis."""
51+
from pipecat.services.camb.tts import CambTTSService
52+
from pipecat.frames.frames import TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ErrorFrame
53+
54+
api_key = os.getenv("CAMB_API_KEY")
55+
if not api_key:
56+
print("ERROR: CAMB_API_KEY environment variable not set!")
57+
return False
58+
59+
print("\n2. Testing TTS synthesis...")
60+
async with aiohttp.ClientSession() as session:
61+
tts = CambTTSService(
62+
api_key=api_key,
63+
aiohttp_session=session,
64+
voice_id=2681, # Attic voice
65+
model="mars-8-flash",
66+
)
67+
68+
# Manually set sample rate (normally done by StartFrame)
69+
tts._sample_rate = 24000
70+
71+
text = "Hello! This is a test of the Camb.ai text to speech integration."
72+
print(f" Synthesizing: '{text}'")
73+
74+
audio_bytes = 0
75+
frames_received = []
76+
77+
try:
78+
async for frame in tts.run_tts(text):
79+
frames_received.append(type(frame).__name__)
80+
if isinstance(frame, TTSAudioRawFrame):
81+
audio_bytes += len(frame.audio)
82+
elif isinstance(frame, ErrorFrame):
83+
print(f" FAILED: {frame.error}")
84+
return False
85+
86+
print(f" Frames received: {frames_received}")
87+
print(f" Audio bytes received: {audio_bytes}")
88+
89+
if audio_bytes > 0:
90+
print(" SUCCESS: TTS synthesis works!")
91+
92+
# Optionally save and play audio
93+
save_audio = input("\n Save audio to test_output.wav? (y/n): ").strip().lower()
94+
if save_audio == 'y':
95+
await save_audio_to_file(tts, text)
96+
# Try to play the audio
97+
play_audio = input(" Play the audio? (y/n): ").strip().lower()
98+
if play_audio == 'y':
99+
play_wav_file("test_output.wav")
100+
101+
return True
102+
else:
103+
print(" FAILED: No audio received")
104+
return False
105+
106+
except Exception as e:
107+
print(f" FAILED: {e}")
108+
import traceback
109+
traceback.print_exc()
110+
return False
111+
112+
113+
async def save_audio_to_file(tts, text):
114+
"""Save synthesized audio to a WAV file."""
115+
import wave
116+
from pipecat.frames.frames import TTSAudioRawFrame
117+
118+
audio_data = bytearray()
119+
async for frame in tts.run_tts(text):
120+
if isinstance(frame, TTSAudioRawFrame):
121+
audio_data.extend(frame.audio)
122+
123+
if audio_data:
124+
with wave.open("test_output.wav", "wb") as wav_file:
125+
wav_file.setnchannels(1) # Mono
126+
wav_file.setsampwidth(2) # 16-bit
127+
wav_file.setframerate(24000) # 24kHz
128+
wav_file.writeframes(bytes(audio_data))
129+
print(" Saved to test_output.wav")
130+
131+
132+
def play_wav_file(filepath):
133+
"""Play a WAV file using the system's default player."""
134+
import subprocess
135+
import platform
136+
137+
system = platform.system()
138+
try:
139+
if system == "Darwin": # macOS
140+
subprocess.run(["afplay", filepath], check=True)
141+
elif system == "Linux":
142+
subprocess.run(["aplay", filepath], check=True)
143+
elif system == "Windows":
144+
subprocess.run(["powershell", "-c", f"(New-Object Media.SoundPlayer '{filepath}').PlaySync()"], check=True)
145+
else:
146+
print(f" Unsupported platform: {system}. Please play {filepath} manually.")
147+
except Exception as e:
148+
print(f" Could not play audio: {e}")
149+
150+
151+
async def main():
152+
print("=" * 50)
153+
print("Camb.ai TTS Integration Test")
154+
print("=" * 50)
155+
156+
results = []
157+
158+
# Test 1: List voices
159+
results.append(await test_list_voices())
160+
161+
# Test 2: TTS synthesis
162+
results.append(await test_tts_synthesis())
163+
164+
# Summary
165+
print("\n" + "=" * 50)
166+
print("Summary:")
167+
print(f" List voices: {'PASS' if results[0] else 'FAIL'}")
168+
print(f" TTS synthesis: {'PASS' if results[1] else 'FAIL'}")
169+
print("=" * 50)
170+
171+
if all(results):
172+
print("\nAll tests passed!")
173+
return 0
174+
else:
175+
print("\nSome tests failed!")
176+
return 1
177+
178+
179+
if __name__ == "__main__":
180+
exit_code = asyncio.run(main())
181+
sys.exit(exit_code)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ aws = [ "aioboto3~=15.5.0", "pipecat-ai[websockets-base]" ]
5353
aws-nova-sonic = [ "aws_sdk_bedrock_runtime~=0.2.0; python_version>='3.12'" ]
5454
azure = [ "azure-cognitiveservices-speech~=1.44.0"]
5555
cartesia = [ "cartesia~=2.0.3", "pipecat-ai[websockets-base]" ]
56+
camb = [ "pipecat-ai[websockets-base]" ]
5657
cerebras = []
5758
daily = [ "daily-python~=0.23.0" ]
5859
deepgram = [ "deepgram-sdk~=4.7.0", "pipecat-ai[websockets-base]" ]

0 commit comments

Comments
 (0)