Skip to content

Commit 7a22d58

Browse files
committed
Fix "bot-llm-text" not firing when using AWS Nova Sonic
1 parent 24082b8 commit 7a22d58

1 file changed

Lines changed: 22 additions & 8 deletions

File tree

  • src/pipecat/services/aws/nova_sonic

src/pipecat/services/aws/nova_sonic/llm.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
LLMContextFrame,
3939
LLMFullResponseEndFrame,
4040
LLMFullResponseStartFrame,
41+
LLMTextFrame,
4142
StartFrame,
4243
TranscriptionFrame,
4344
TTSAudioRawFrame,
@@ -1077,9 +1078,7 @@ async def _report_assistant_response_text_added(self, text):
10771078
logger.debug(f"Assistant response text added: {text}")
10781079

10791080
# Report the text of the assistant response.
1080-
frame = TTSTextFrame(text, aggregated_by=AggregationType.SENTENCE)
1081-
frame.includes_inter_frame_spaces = True
1082-
await self.push_frame(frame)
1081+
await self._push_assistant_response_text_frames(text)
10831082

10841083
# HACK: here we're also buffering the assistant text ourselves as a
10851084
# backup rather than relying solely on the assistant context aggregator
@@ -1112,11 +1111,7 @@ async def _report_assistant_response_ended(self):
11121111
# TTSTextFrame would be ignored otherwise (the interruption frame
11131112
# would have cleared the assistant aggregator state).
11141113
await self.push_frame(LLMFullResponseStartFrame())
1115-
frame = TTSTextFrame(
1116-
self._assistant_text_buffer, aggregated_by=AggregationType.SENTENCE
1117-
)
1118-
frame.includes_inter_frame_spaces = True
1119-
await self.push_frame(frame)
1114+
await self._push_assistant_response_text_frames(self._assistant_text_buffer)
11201115
self._may_need_repush_assistant_text = False
11211116

11221117
# Report the end of the assistant response.
@@ -1128,6 +1123,25 @@ async def _report_assistant_response_ended(self):
11281123
# Clear out the buffered assistant text
11291124
self._assistant_text_buffer = ""
11301125

1126+
async def _push_assistant_response_text_frames(self, text: str):
1127+
# In a typical "cascade" LLM + TTS setup, LLMTextFrames would not
1128+
# proceed beyond the TTS service. Therefore, since a speech-to-speech
1129+
# service like Nova Sonic combines both LLM and TTS functionality, you
1130+
# would think we wouldn't need to push LLMTextFrames at all. However,
1131+
# RTVI relies on LLMTextFrames being pushed to trigger its
1132+
# "bot-llm-text" event. So here we push an LLMTextFrame, too, but avoid
1133+
# appending it to context to avoid context message duplication.
1134+
1135+
# Push LLMTextFrame
1136+
llm_text_frame = LLMTextFrame(text)
1137+
llm_text_frame.append_to_context = False
1138+
await self.push_frame(llm_text_frame)
1139+
1140+
# Push TTSTextFrame
1141+
tts_text_frame = TTSTextFrame(text, aggregated_by=AggregationType.SENTENCE)
1142+
tts_text_frame.includes_inter_frame_spaces = True
1143+
await self.push_frame(tts_text_frame)
1144+
11311145
#
11321146
# user transcription reporting
11331147
#

0 commit comments

Comments
 (0)