|
| 1 | +# |
| 2 | +# Copyright (c) 2024–2025, Daily |
| 3 | +# |
| 4 | +# SPDX-License-Identifier: BSD 2-Clause License |
| 5 | +# |
| 6 | + |
| 7 | + |
| 8 | +import os |
| 9 | + |
| 10 | +from dotenv import load_dotenv |
| 11 | +from loguru import logger |
| 12 | + |
| 13 | +from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 |
| 14 | +from pipecat.audio.vad.silero import SileroVADAnalyzer |
| 15 | +from pipecat.audio.vad.vad_analyzer import VADParams |
| 16 | +from pipecat.pipeline.pipeline import Pipeline |
| 17 | +from pipecat.pipeline.runner import PipelineRunner |
| 18 | +from pipecat.pipeline.task import PipelineParams, PipelineTask |
| 19 | +from pipecat.processors.aggregators.llm_context import LLMContext |
| 20 | +from pipecat.processors.aggregators.llm_response_universal import ( |
| 21 | + LLMContextAggregatorPair, |
| 22 | + LLMUserAggregatorParams, |
| 23 | +) |
| 24 | +from pipecat.runner.types import RunnerArguments |
| 25 | +from pipecat.runner.utils import create_transport |
| 26 | +from pipecat.services.cartesia.tts import CartesiaTTSService |
| 27 | +from pipecat.services.deepgram.stt import DeepgramSTTService |
| 28 | +from pipecat.services.openai.llm import OpenAILLMService |
| 29 | +from pipecat.transports.base_transport import BaseTransport, TransportParams |
| 30 | +from pipecat.transports.daily.transport import DailyParams |
| 31 | +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams |
| 32 | +from pipecat.turns.bot import TurnAnalyzerBotTurnStartStrategy |
| 33 | +from pipecat.turns.turn_start_strategies import TurnStartStrategies |
| 34 | +from pipecat.turns.user import TranscriptionUserTurnStartStrategy |
| 35 | + |
| 36 | +load_dotenv(override=True) |
| 37 | + |
| 38 | + |
| 39 | +# We store functions so objects (e.g. SileroVADAnalyzer) don't get |
| 40 | +# instantiated. The function will be called when the desired transport gets |
| 41 | +# selected. |
| 42 | +transport_params = { |
| 43 | + "daily": lambda: DailyParams( |
| 44 | + audio_in_enabled=True, |
| 45 | + audio_out_enabled=True, |
| 46 | + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), |
| 47 | + ), |
| 48 | + "twilio": lambda: FastAPIWebsocketParams( |
| 49 | + audio_in_enabled=True, |
| 50 | + audio_out_enabled=True, |
| 51 | + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), |
| 52 | + ), |
| 53 | + "webrtc": lambda: TransportParams( |
| 54 | + audio_in_enabled=True, |
| 55 | + audio_out_enabled=True, |
| 56 | + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), |
| 57 | + ), |
| 58 | +} |
| 59 | + |
| 60 | + |
| 61 | +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): |
| 62 | + logger.info(f"Starting bot") |
| 63 | + |
| 64 | + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) |
| 65 | + |
| 66 | + tts = CartesiaTTSService( |
| 67 | + api_key=os.getenv("CARTESIA_API_KEY"), |
| 68 | + voice_id="d4db5fb9-f44b-4bd1-85fa-192e0f0d75f9", # Spanish-speaking Lady |
| 69 | + ) |
| 70 | + |
| 71 | + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) |
| 72 | + |
| 73 | + messages = [ |
| 74 | + { |
| 75 | + "role": "system", |
| 76 | + "content": "You are a live translation assistant. Your sole purpose is to translate English text into Spanish. When you receive English text from the user, immediately translate it into natural, fluent Spanish. Do not add explanations, commentary, or extra information—only provide the Spanish translation of the text you receive.", |
| 77 | + }, |
| 78 | + ] |
| 79 | + |
| 80 | + context = LLMContext(messages) |
| 81 | + |
| 82 | + # We use the TranscriptionUserTurnStartStrategy to start a new user turn |
| 83 | + # every time a transcription is received. We disable interruptions, so the |
| 84 | + # user can continue speaking while the bot is transcribing, without |
| 85 | + # interrupting the bot. |
| 86 | + context_aggregator = LLMContextAggregatorPair( |
| 87 | + context, |
| 88 | + user_params=LLMUserAggregatorParams( |
| 89 | + turn_start_strategies=TurnStartStrategies( |
| 90 | + user=[TranscriptionUserTurnStartStrategy(enable_interruptions=False)], |
| 91 | + bot=[TurnAnalyzerBotTurnStartStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())], |
| 92 | + ), |
| 93 | + ), |
| 94 | + ) |
| 95 | + |
| 96 | + pipeline = Pipeline( |
| 97 | + [ |
| 98 | + transport.input(), # Transport user input |
| 99 | + stt, # STT |
| 100 | + context_aggregator.user(), # User responses |
| 101 | + llm, # LLM |
| 102 | + tts, # TTS (bot will speak the chosen language) |
| 103 | + transport.output(), # Transport bot output |
| 104 | + context_aggregator.assistant(), # Assistant spoken responses |
| 105 | + ] |
| 106 | + ) |
| 107 | + |
| 108 | + task = PipelineTask( |
| 109 | + pipeline, |
| 110 | + params=PipelineParams( |
| 111 | + enable_metrics=True, |
| 112 | + enable_usage_metrics=True, |
| 113 | + ), |
| 114 | + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, |
| 115 | + ) |
| 116 | + |
| 117 | + @transport.event_handler("on_client_connected") |
| 118 | + async def on_client_connected(transport, client): |
| 119 | + logger.info(f"Client connected") |
| 120 | + |
| 121 | + @transport.event_handler("on_client_disconnected") |
| 122 | + async def on_client_disconnected(transport, client): |
| 123 | + logger.info(f"Client disconnected") |
| 124 | + await task.cancel() |
| 125 | + |
| 126 | + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) |
| 127 | + |
| 128 | + await runner.run(task) |
| 129 | + |
| 130 | + |
| 131 | +async def bot(runner_args: RunnerArguments): |
| 132 | + """Main bot entry point compatible with Pipecat Cloud.""" |
| 133 | + transport = await create_transport(runner_args, transport_params) |
| 134 | + await run_bot(transport, runner_args) |
| 135 | + |
| 136 | + |
| 137 | +if __name__ == "__main__": |
| 138 | + from pipecat.runner.run import main |
| 139 | + |
| 140 | + main() |
0 commit comments