Skip to content

Commit 84c7e97

Browse files
authored
Merge pull request #3483 from pipecat-ai/aleix/throttle-user-speaking-frame
throttle user speaking frame
2 parents b11150f + ac3fa7f commit 84c7e97

3 files changed

Lines changed: 17 additions & 4 deletions

File tree

changelog/3483.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
- Throttle `UserSpeakingFrame` to broadcast at most every 200ms instead of on every audio chunk, reducing frame processing overhead during user speech.

src/pipecat/transports/base_input.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"""
1212

1313
import asyncio
14+
import time
1415
from typing import Optional
1516

1617
from loguru import logger
@@ -77,6 +78,11 @@ def __init__(self, params: TransportParams, **kwargs):
7778

7879
# Track user speaking state for interruption logic
7980
self._user_speaking = False
81+
# Last time a UserSpeakingFrame was pushed.
82+
self._user_speaking_frame_time = 0
83+
# How often a UserSpeakingFrame should be pushed (value should be
84+
# greater than the audio chunks to have any effect).
85+
self._user_speaking_frame_period = 0.2
8086

8187
# Task to process incoming audio (VAD) and push audio frames downstream
8288
# if passthrough is enabled.
@@ -423,7 +429,7 @@ async def _audio_task_handler(self):
423429
await self._deprecated_run_turn_analyzer(frame, vad_state, previous_vad_state)
424430

425431
if vad_state == VADState.SPEAKING:
426-
await self.broadcast_frame(UserSpeakingFrame)
432+
await self._user_currently_speaking()
427433

428434
# Push audio downstream if passthrough is set.
429435
if self._params.audio_in_passthrough:
@@ -444,6 +450,13 @@ async def _audio_task_handler(self):
444450
else:
445451
await self.push_frame(VADUserStoppedSpeakingFrame())
446452

453+
async def _user_currently_speaking(self):
454+
"""Handle user speaking frame."""
455+
diff_time = time.time() - self._user_speaking_frame_time
456+
if diff_time >= self._user_speaking_frame_period:
457+
await self.broadcast_frame(UserSpeakingFrame)
458+
self._user_speaking_frame_time = time.time()
459+
447460
#
448461
# DEPRECATED.
449462
#

src/pipecat/transports/base_output.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def __init__(
403403
# Last time a BotSpeakingFrame was pushed.
404404
self._bot_speaking_frame_time = 0
405405
# How often a BotSpeakingFrame should be pushed (value should be
406-
# lower than the audio chunks).
406+
# greater than the audio chunks to have any effect).
407407
self._bot_speaking_frame_period = 0.2
408408
# Last time the bot actually spoke.
409409
self._bot_speech_last_time = 0
@@ -644,8 +644,7 @@ async def _bot_currently_speaking(self):
644644

645645
diff_time = time.time() - self._bot_speaking_frame_time
646646
if diff_time >= self._bot_speaking_frame_period:
647-
await self._transport.push_frame(BotSpeakingFrame())
648-
await self._transport.push_frame(BotSpeakingFrame(), FrameDirection.UPSTREAM)
647+
await self._transport.broadcast_frame(BotSpeakingFrame)
649648
self._bot_speaking_frame_time = time.time()
650649

651650
self._bot_speech_last_time = time.time()

0 commit comments

Comments
 (0)