|
28 | 28 | import os |
29 | 29 | import sys |
30 | 30 |
|
31 | | -import aiohttp |
32 | 31 | from dotenv import load_dotenv |
33 | 32 | from loguru import logger |
34 | 33 |
|
@@ -66,73 +65,70 @@ async def main(voice_id: int): |
66 | 65 | # Deepgram STT for speech recognition |
67 | 66 | stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) |
68 | 67 |
|
69 | | - # Create HTTP session for Camb.ai TTS |
70 | | - async with aiohttp.ClientSession() as session: |
71 | | - # Camb.ai TTS with MARS-flash model |
72 | | - tts = CambTTSService( |
73 | | - api_key=os.getenv("CAMB_API_KEY"), |
74 | | - aiohttp_session=session, |
75 | | - voice_id=voice_id, |
76 | | - model="mars-flash", |
77 | | - ) |
| 68 | + # Camb.ai TTS with MARS-flash model (uses official SDK) |
| 69 | + tts = CambTTSService( |
| 70 | + api_key=os.getenv("CAMB_API_KEY"), |
| 71 | + voice_id=voice_id, |
| 72 | + model="mars-flash", |
| 73 | + ) |
78 | 74 |
|
79 | | - # OpenAI LLM |
80 | | - llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) |
| 75 | + # OpenAI LLM |
| 76 | + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) |
81 | 77 |
|
82 | | - # System prompt |
83 | | - messages = [ |
84 | | - { |
85 | | - "role": "system", |
86 | | - "content": """You are a helpful voice assistant powered by Camb.ai's MARS |
| 78 | + # System prompt |
| 79 | + messages = [ |
| 80 | + { |
| 81 | + "role": "system", |
| 82 | + "content": """You are a helpful voice assistant powered by Camb.ai's MARS |
87 | 83 | text-to-speech technology. Keep your responses concise and conversational since |
88 | 84 | they will be spoken aloud. Avoid special characters, emojis, or bullet points.""", |
89 | | - }, |
| 85 | + }, |
| 86 | + ] |
| 87 | + |
| 88 | + # Context management |
| 89 | + context = LLMContext(messages) |
| 90 | + context_aggregator = LLMContextAggregatorPair(context) |
| 91 | + |
| 92 | + # Build the pipeline |
| 93 | + pipeline = Pipeline( |
| 94 | + [ |
| 95 | + transport.input(), # Microphone input |
| 96 | + stt, # Speech-to-text |
| 97 | + context_aggregator.user(), # User context |
| 98 | + llm, # Language model |
| 99 | + tts, # Camb.ai TTS |
| 100 | + transport.output(), # Speaker output |
| 101 | + context_aggregator.assistant(), # Assistant context |
90 | 102 | ] |
| 103 | + ) |
91 | 104 |
|
92 | | - # Context management |
93 | | - context = LLMContext(messages) |
94 | | - context_aggregator = LLMContextAggregatorPair(context) |
95 | | - |
96 | | - # Build the pipeline |
97 | | - pipeline = Pipeline( |
98 | | - [ |
99 | | - transport.input(), # Microphone input |
100 | | - stt, # Speech-to-text |
101 | | - context_aggregator.user(), # User context |
102 | | - llm, # Language model |
103 | | - tts, # Camb.ai TTS |
104 | | - transport.output(), # Speaker output |
105 | | - context_aggregator.assistant(), # Assistant context |
106 | | - ] |
107 | | - ) |
| 105 | + # Create pipeline task |
| 106 | + # Use 24kHz sample rate to match Camb.ai TTS output |
| 107 | + task = PipelineTask( |
| 108 | + pipeline, |
| 109 | + params=PipelineParams( |
| 110 | + audio_out_sample_rate=24000, |
| 111 | + enable_metrics=True, |
| 112 | + enable_usage_metrics=True, |
| 113 | + ), |
| 114 | + ) |
108 | 115 |
|
109 | | - # Create pipeline task |
110 | | - # Use 24kHz sample rate to match Camb.ai TTS output |
111 | | - task = PipelineTask( |
112 | | - pipeline, |
113 | | - params=PipelineParams( |
114 | | - audio_out_sample_rate=24000, |
115 | | - enable_metrics=True, |
116 | | - enable_usage_metrics=True, |
117 | | - ), |
| 116 | + # Start the conversation when the pipeline is ready |
| 117 | + @task.event_handler("on_pipeline_started") |
| 118 | + async def on_pipeline_started(task, frame): |
| 119 | + messages.append( |
| 120 | + { |
| 121 | + "role": "system", |
| 122 | + "content": "Please introduce yourself briefly and ask how you can help.", |
| 123 | + } |
118 | 124 | ) |
| 125 | + await task.queue_frames([LLMRunFrame()]) |
119 | 126 |
|
120 | | - # Start the conversation when the pipeline is ready |
121 | | - @task.event_handler("on_pipeline_started") |
122 | | - async def on_pipeline_started(task, frame): |
123 | | - messages.append( |
124 | | - { |
125 | | - "role": "system", |
126 | | - "content": "Please introduce yourself briefly and ask how you can help.", |
127 | | - } |
128 | | - ) |
129 | | - await task.queue_frames([LLMRunFrame()]) |
130 | | - |
131 | | - # Run the pipeline |
132 | | - runner = PipelineRunner() |
133 | | - logger.info("Starting Camb.ai TTS bot with local audio...") |
134 | | - logger.info("Speak into your microphone to interact with the bot.") |
135 | | - await runner.run(task) |
| 127 | + # Run the pipeline |
| 128 | + runner = PipelineRunner() |
| 129 | + logger.info("Starting Camb.ai TTS bot with local audio...") |
| 130 | + logger.info("Speak into your microphone to interact with the bot.") |
| 131 | + await runner.run(task) |
136 | 132 |
|
137 | 133 |
|
138 | 134 | if __name__ == "__main__": |
|
0 commit comments