Skip to content

Commit 778dacc

Browse files
authored
Merge pull request #3486 from pipecat-ai/pk/fix-nova-sonic-reset-conversation
Fix `AWSNovaSonicLLMService.reset_conversation()`
2 parents f03a717 + 06b3ecd commit 778dacc

3 files changed

Lines changed: 43 additions & 6 deletions

File tree

changelog/3486.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
- Fixed `AWSNovaSonicLLMService.reset_conversation()`, which would previously error out. Now it successfully reconnects and "rehydrates" from the context object.

examples/foundational/20e-persistent-context-aws-nova-sonic.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ async def _reset():
113113
# "content": f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}",
114114
# }
115115
# )
116+
# If the last message isn't from the user, add a message asking for a recap
117+
if messages and messages[-1].get("role") != "user":
118+
messages.append(
119+
{
120+
"role": "user",
121+
"content": "Can you catch me up on what we were talking about?",
122+
}
123+
)
116124
params.context.set_messages(messages)
117125
await params.llm.reset_conversation()
118126
# await params.llm.trigger_assistant_response()

src/pipecat/services/aws/nova_sonic/llm.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ def __init__(
296296
self._user_text_buffer = ""
297297
self._assistant_text_buffer = ""
298298
self._completed_tool_calls = set()
299+
self._audio_input_started = False
299300

300301
file_path = files("pipecat.services.aws.nova_sonic").joinpath("ready.wav")
301302
with wave.open(file_path.open("rb"), "rb") as wav_file:
@@ -532,14 +533,30 @@ async def _finish_connecting_if_context_available(self):
532533
if system_instruction:
533534
await self._send_text_event(text=system_instruction, role=Role.SYSTEM)
534535

535-
# Send conversation history
536-
for message in llm_connection_params["messages"]:
536+
# Send conversation history (except for the last message if it's from the
537+
# user, which we'll send as interactive after starting audio input)
538+
messages = llm_connection_params["messages"]
539+
last_user_message = None
540+
for i, message in enumerate(messages):
537541
# logger.debug(f"Seeding conversation history with message: {message}")
538-
await self._send_text_event(text=message.text, role=message.role)
542+
is_last_message = i == len(messages) - 1
543+
if is_last_message and message.role == Role.USER:
544+
# Save for sending after audio input starts
545+
last_user_message = message
546+
else:
547+
await self._send_text_event(text=message.text, role=message.role)
539548

540549
# Start audio input
541550
await self._send_audio_input_start_event()
542551

552+
# Now send the last user message as interactive to trigger bot response
553+
if last_user_message:
554+
# logger.debug(
555+
# f"Sending last user message as interactive to trigger bot response: {last_user_message}")
556+
await self._send_text_event(
557+
text=last_user_message.text, role=last_user_message.role, interactive=True
558+
)
559+
543560
# Start receiving events
544561
self._receive_task = self.create_task(self._receive_task_handler())
545562

@@ -602,6 +619,7 @@ async def _disconnect(self):
602619
self._user_text_buffer = ""
603620
self._assistant_text_buffer = ""
604621
self._completed_tool_calls = set()
622+
self._audio_input_started = False
605623

606624
logger.info("Finished disconnecting")
607625
except Exception as e:
@@ -727,8 +745,18 @@ async def _send_audio_input_start_event(self):
727745
}}
728746
'''
729747
await self._send_client_event(audio_content_start)
748+
self._audio_input_started = True
749+
750+
async def _send_text_event(self, text: str, role: Role, interactive: bool = False):
751+
"""Send a text event to the LLM.
730752
731-
async def _send_text_event(self, text: str, role: Role):
753+
Args:
754+
text: The text content to send.
755+
role: The role associated with the text (e.g., USER, ASSISTANT, SYSTEM).
756+
interactive: Whether the content is interactive. Defaults to False.
757+
False: conversation history or system instruction, sent prior to interactive audio
758+
True: text input sent during (or at the start of) interactive audio
759+
"""
732760
if not self._stream or not self._prompt_name or not text:
733761
return
734762

@@ -741,7 +769,7 @@ async def _send_text_event(self, text: str, role: Role):
741769
"promptName": "{self._prompt_name}",
742770
"contentName": "{content_name}",
743771
"type": "TEXT",
744-
"interactive": true,
772+
"interactive": {json.dumps(interactive)},
745773
"role": "{role.value}",
746774
"textInputConfiguration": {{
747775
"mediaType": "text/plain"
@@ -779,7 +807,7 @@ async def _send_text_event(self, text: str, role: Role):
779807
await self._send_client_event(text_content_end)
780808

781809
async def _send_user_audio_event(self, audio: bytes):
782-
if not self._stream:
810+
if not self._stream or not self._audio_input_started:
783811
return
784812

785813
blob = base64.b64encode(audio)

0 commit comments

Comments
 (0)