44# SPDX-License-Identifier: BSD 2-Clause License
55#
66
7+ """Pattern Pair Voice Switching Example with Pipecat.
8+
9+ This example demonstrates how to use the PatternPairAggregator to dynamically switch
10+ between different voices in a storytelling application. It showcases how pattern matching
11+ can be used to control TTS behavior in streaming text from an LLM.
12+
13+ The example:
14+ 1. Sets up a storytelling bot with three distinct voices (narrator, male, female)
15+ 2. Uses pattern pairs (<voice>name</voice>) to trigger voice switching
16+ 3. Processes the patterns in real-time as text streams from the LLM
17+ 4. Removes the pattern tags before sending text to TTS
18+
19+ The PatternPairAggregator:
20+ - Buffers text until complete patterns are detected
21+ - Identifies content between start/end pattern pairs
22+ - Triggers callbacks when patterns are matched
23+ - Processes patterns that may span across multiple text chunks
24+ - Returns processed text at sentence boundaries
25+
26+ Example usage (run from pipecat root directory):
27+ $ pip install "pipecat-ai[daily,openai,cartesia,silero]"
28+ $ pip install -r dev-requirements.txt
29+ $ python examples/foundational/35-pattern-pair-voice-switching.py
30+
31+ Requirements:
32+ - OpenAI API key (for GPT-4o)
33+ - Cartesia API key (for text-to-speech)
34+ - Daily API key (for video/audio transport)
35+
36+ Environment variables (.env file):
37+ OPENAI_API_KEY=your_openai_key
38+ CARTESIA_API_KEY=your_cartesia_key
39+ DAILY_API_KEY=your_daily_key
40+
41+ Note:
42+ This example shows one application of PatternPairAggregator (voice switching),
43+ but the same approach can be used for various pattern-based text processing needs,
44+ such as formatting instructions, command recognition, or structured data extraction.
45+ """
46+
747import asyncio
848import os
949import sys
@@ -43,7 +83,7 @@ async def main():
4383 transport = DailyTransport (
4484 room_url ,
4585 token ,
46- "Storytelling Bot " ,
86+ "Multi-voice storyteller " ,
4787 DailyParams (
4888 audio_out_enabled = True ,
4989 transcription_enabled = True ,
@@ -52,12 +92,6 @@ async def main():
5292 ),
5393 )
5494
55- # Initialize TTS with narrator voice as default
56- tts = CartesiaTTSService (
57- api_key = os .getenv ("CARTESIA_API_KEY" ),
58- voice_id = VOICE_IDS ["narrator" ],
59- )
60-
6195 # Create pattern pair aggregator for voice switching
6296 pattern_aggregator = PatternPairAggregator ()
6397
@@ -81,8 +115,12 @@ def on_voice_tag(match: PatternMatch):
81115
82116 pattern_aggregator .on_pattern_match ("voice_tag" , on_voice_tag )
83117
84- # Set the pattern aggregator on the TTS service
85- tts ._text_aggregator = pattern_aggregator
118+ # Initialize TTS with narrator voice as default
119+ tts = CartesiaTTSService (
120+ api_key = os .getenv ("CARTESIA_API_KEY" ),
121+ voice_id = VOICE_IDS ["narrator" ],
122+ text_aggregator = pattern_aggregator ,
123+ )
86124
87125 # Initialize LLM
88126 llm = OpenAILLMService (api_key = os .getenv ("OPENAI_API_KEY" ), model = "gpt-4o" )
0 commit comments