@@ -296,6 +296,7 @@ def __init__(
296296 self ._user_text_buffer = ""
297297 self ._assistant_text_buffer = ""
298298 self ._completed_tool_calls = set ()
299+ self ._audio_input_started = False
299300
300301 file_path = files ("pipecat.services.aws.nova_sonic" ).joinpath ("ready.wav" )
301302 with wave .open (file_path .open ("rb" ), "rb" ) as wav_file :
@@ -532,14 +533,30 @@ async def _finish_connecting_if_context_available(self):
532533 if system_instruction :
533534 await self ._send_text_event (text = system_instruction , role = Role .SYSTEM )
534535
535- # Send conversation history
536- for message in llm_connection_params ["messages" ]:
536+ # Send conversation history (except for the last message if it's from the
537+ # user, which we'll send as interactive after starting audio input)
538+ messages = llm_connection_params ["messages" ]
539+ last_user_message = None
540+ for i , message in enumerate (messages ):
537541 # logger.debug(f"Seeding conversation history with message: {message}")
538- await self ._send_text_event (text = message .text , role = message .role )
542+ is_last_message = i == len (messages ) - 1
543+ if is_last_message and message .role == Role .USER :
544+ # Save for sending after audio input starts
545+ last_user_message = message
546+ else :
547+ await self ._send_text_event (text = message .text , role = message .role )
539548
540549 # Start audio input
541550 await self ._send_audio_input_start_event ()
542551
552+ # Now send the last user message as interactive to trigger bot response
553+ if last_user_message :
554+ # logger.debug(
555+ # f"Sending last user message as interactive to trigger bot response: {last_user_message}")
556+ await self ._send_text_event (
557+ text = last_user_message .text , role = last_user_message .role , interactive = True
558+ )
559+
543560 # Start receiving events
544561 self ._receive_task = self .create_task (self ._receive_task_handler ())
545562
@@ -602,6 +619,7 @@ async def _disconnect(self):
602619 self ._user_text_buffer = ""
603620 self ._assistant_text_buffer = ""
604621 self ._completed_tool_calls = set ()
622+ self ._audio_input_started = False
605623
606624 logger .info ("Finished disconnecting" )
607625 except Exception as e :
@@ -727,8 +745,18 @@ async def _send_audio_input_start_event(self):
727745 }}
728746 '''
729747 await self ._send_client_event (audio_content_start )
748+ self ._audio_input_started = True
749+
750+ async def _send_text_event (self , text : str , role : Role , interactive : bool = False ):
751+ """Send a text event to the LLM.
730752
731- async def _send_text_event (self , text : str , role : Role ):
753+ Args:
754+ text: The text content to send.
755+ role: The role associated with the text (e.g., USER, ASSISTANT, SYSTEM).
756+ interactive: Whether the content is interactive. Defaults to False.
757+ False: conversation history or system instruction, sent prior to interactive audio
758+ True: text input sent during (or at the start of) interactive audio
759+ """
732760 if not self ._stream or not self ._prompt_name or not text :
733761 return
734762
@@ -741,7 +769,7 @@ async def _send_text_event(self, text: str, role: Role):
741769 "promptName": "{ self ._prompt_name } ",
742770 "contentName": "{ content_name } ",
743771 "type": "TEXT",
744- "interactive": true ,
772+ "interactive": { json . dumps ( interactive ) } ,
745773 "role": "{ role .value } ",
746774 "textInputConfiguration": {{
747775 "mediaType": "text/plain"
@@ -779,7 +807,7 @@ async def _send_text_event(self, text: str, role: Role):
779807 await self ._send_client_event (text_content_end )
780808
781809 async def _send_user_audio_event (self , audio : bytes ):
782- if not self ._stream :
810+ if not self ._stream or not self . _audio_input_started :
783811 return
784812
785813 blob = base64 .b64encode (audio )
0 commit comments