1313import asyncio
1414import base64
1515import json
16+ import uuid
1617from typing import Any , AsyncGenerator , Dict , List , Literal , Mapping , Optional , Tuple , Union
1718
1819import aiohttp
@@ -680,6 +681,20 @@ async def _send_text(self, text: str):
680681 msg = {"text" : text , "context_id" : self ._context_id }
681682 await self ._websocket .send (json .dumps (msg ))
682683
684+ def create_context_id (self ) -> str :
685+ """Generate a unique context ID for a TTS request in case we don't have one already in progress.
686+
687+ Returns:
688+ A unique string identifier for the TTS context.
689+ """
690+ # If a context ID does not exist, create a new one.
691+ # If an ID exists, continue using the current ID.
692+ # When interruptions happens, user speech results in
693+ # an interruption, which resets the context ID.
694+ if not self ._context_id :
695+ return str (uuid .uuid4 ())
696+ return self ._context_id
697+
683698 @traced_tts
684699 async def run_tts (self , text : str , context_id : str ) -> AsyncGenerator [Frame , None ]:
685700 """Generate speech from text using ElevenLabs' streaming WebSocket API.
@@ -698,31 +713,28 @@ async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, Non
698713 await self ._connect ()
699714
700715 try :
701- await self .start_ttfb_metrics ()
702- yield TTSStartedFrame (context_id = context_id )
703- self ._cumulative_time = 0
704- self ._partial_word = ""
705- self ._partial_word_start_time = 0.0
706- # If a context ID does not exist, use the provided one.
707- # If an ID exists, that means the Pipeline doesn't allow
708- # user interruptions, so continue using the current ID.
709- # When interruptions are allowed, user speech results in
710- # an interruption, which resets the context ID.
711716 if not self ._context_id :
717+ await self .start_ttfb_metrics ()
718+ yield TTSStartedFrame (context_id = context_id )
712719 self ._context_id = context_id
713- if not self .audio_context_available (self ._context_id ):
714- await self .create_audio_context (self ._context_id )
720+ self ._cumulative_time = 0
721+ self ._partial_word = ""
722+ self ._partial_word_start_time = 0.0
715723
716- # Initialize context with voice settings and pronunciation dictionaries
717- msg = {"text" : " " , "context_id" : self ._context_id }
718- if self ._voice_settings :
719- msg ["voice_settings" ] = self ._voice_settings
720- if self ._pronunciation_dictionary_locators :
721- msg ["pronunciation_dictionary_locators" ] = [
722- locator .model_dump () for locator in self ._pronunciation_dictionary_locators
723- ]
724- await self ._websocket .send (json .dumps (msg ))
725- logger .trace (f"Created new context { self ._context_id } " )
724+ if not self .audio_context_available (self ._context_id ):
725+ await self .create_audio_context (self ._context_id )
726+
727+ # Initialize context with voice settings and pronunciation dictionaries
728+ msg = {"text" : " " , "context_id" : self ._context_id }
729+ if self ._voice_settings :
730+ msg ["voice_settings" ] = self ._voice_settings
731+ if self ._pronunciation_dictionary_locators :
732+ msg ["pronunciation_dictionary_locators" ] = [
733+ locator .model_dump ()
734+ for locator in self ._pronunciation_dictionary_locators
735+ ]
736+ await self ._websocket .send (json .dumps (msg ))
737+ logger .trace (f"Created new context { self ._context_id } " )
726738
727739 await self ._send_text (text )
728740 await self .start_tts_usage_metrics (text )
0 commit comments