1010from typing import Optional
1111
1212from pipecat .frames .frames import (
13- BotSpeakingFrame ,
13+ BotStartedSpeakingFrame ,
14+ BotStoppedSpeakingFrame ,
1415 Frame ,
16+ FunctionCallCancelFrame ,
1517 FunctionCallResultFrame ,
1618 FunctionCallsStartedFrame ,
17- UserSpeakingFrame ,
1819 UserStartedSpeakingFrame ,
20+ UserStoppedSpeakingFrame ,
1921)
2022from pipecat .utils .asyncio .task_manager import BaseTaskManager
2123from pipecat .utils .base_object import BaseObject
@@ -25,14 +27,14 @@ class UserIdleController(BaseObject):
2527 """Controller for managing user idle detection.
2628
2729 This class monitors user activity and triggers an event when the user has been
28- idle (not speaking) for a configured timeout period. It only starts monitoring
29- after the first conversation activity and does not trigger while the bot is
30- speaking or function calls are in progress.
30+ idle (not speaking) for a configured timeout period after the bot finishes
31+ speaking. The timer starts when BotStoppedSpeakingFrame is received and is
32+ cancelled when someone starts speaking again (UserStartedSpeakingFrame or
33+ BotStartedSpeakingFrame).
3134
32- The controller tracks activity using continuous frames (UserSpeakingFrame and
33- BotSpeakingFrame) which are emitted repeatedly while speaking is happening, and
34- state-based tracking for function calls (FunctionCallsStartedFrame and
35- FunctionCallResultFrame) which are only sent at start and end.
35+ The timer is suppressed while a user turn is in progress to avoid false
36+ triggers during interruptions (where BotStoppedSpeakingFrame arrives while
37+ the user is still speaking).
3638
3739 Event handlers available:
3840
@@ -62,11 +64,9 @@ def __init__(
6264
6365 self ._task_manager : Optional [BaseTaskManager ] = None
6466
65- self ._conversation_started = False
66- self ._function_call_in_progress = False
67-
68- self .user_idle_event = asyncio .Event ()
69- self .user_idle_task : Optional [asyncio .Task ] = None
67+ self ._user_turn_in_progress : bool = False
68+ self ._function_calls_in_progress : int = 0
69+ self ._idle_timer_task : Optional [asyncio .Task ] = None
7070
7171 self ._register_event_handler ("on_user_turn_idle" , sync = True )
7272
@@ -85,89 +85,63 @@ async def setup(self, task_manager: BaseTaskManager):
8585 """
8686 self ._task_manager = task_manager
8787
88- if not self .user_idle_task :
89- self .user_idle_task = self .task_manager .create_task (
90- self .user_idle_task_handler (),
91- f"{ self } ::user_idle_task_handler" ,
92- )
93-
9488 async def cleanup (self ):
9589 """Cleanup the controller."""
9690 await super ().cleanup ()
97-
98- if self .user_idle_task :
99- await self .task_manager .cancel_task (self .user_idle_task )
100- self .user_idle_task = None
91+ await self ._cancel_idle_timer ()
10192
10293 async def process_frame (self , frame : Frame ):
10394 """Process an incoming frame to track user activity state.
10495
10596 Args:
10697 frame: The frame to be processed.
10798 """
108- # Start monitoring on first conversation activity
109- if not self ._conversation_started :
110- if isinstance (frame , (UserStartedSpeakingFrame , BotSpeakingFrame )):
111- self ._conversation_started = True
112- self .user_idle_event .set ()
113- else :
114- return
115-
116- # Reset idle timer on continuous activity frames
117- if isinstance (frame , (UserSpeakingFrame , BotSpeakingFrame )):
118- await self ._handle_activity (frame )
119- # Track function call state (start/end frames, not continuous)
99+ if isinstance (frame , BotStoppedSpeakingFrame ):
100+ # Only start the timer if the user isn't mid-turn and no function
101+ # calls are pending.
102+ #
103+ # Interruption case: the frame order is UserStartedSpeaking →
104+ # BotStoppedSpeaking → (user keeps talking) → UserStoppedSpeaking.
105+ # Without the user-turn guard the timer would start while the user
106+ # is still speaking.
107+ #
108+ # Function call case: normally FunctionCallsStarted arrives after
109+ # BotStoppedSpeaking and cancels the timer directly. But a race
110+ # condition can cause FunctionCallsStarted to arrive before
111+ # BotStoppedSpeaking when pushing a TTSSpeakFrame in the
112+ # on_function_calls_started event handler, so the counter guard
113+ # prevents the timer from starting while a function call is in progress.
114+ if not self ._user_turn_in_progress and self ._function_calls_in_progress == 0 :
115+ await self ._start_idle_timer ()
116+ elif isinstance (frame , BotStartedSpeakingFrame ):
117+ await self ._cancel_idle_timer ()
118+ elif isinstance (frame , UserStartedSpeakingFrame ):
119+ self ._user_turn_in_progress = True
120+ await self ._cancel_idle_timer ()
121+ elif isinstance (frame , UserStoppedSpeakingFrame ):
122+ self ._user_turn_in_progress = False
120123 elif isinstance (frame , FunctionCallsStartedFrame ):
121- await self ._handle_function_calls_started (frame )
122- elif isinstance (frame , FunctionCallResultFrame ):
123- await self ._handle_function_call_result (frame )
124-
125- async def _handle_activity (self , _ : UserSpeakingFrame | BotSpeakingFrame ):
126- """Handle continuous activity frames that should reset the idle timer.
127-
128- These frames are emitted continuously while the user or bot is speaking,
129- so we simply reset the timer whenever we receive them.
130-
131- Args:
132- frame: The activity frame to process.
133- """
134- self .user_idle_event .set ()
135-
136- async def _handle_function_calls_started (self , _ : FunctionCallsStartedFrame ):
137- """Handle function calls started event.
138-
139- Function calls can take longer than the timeout, so we track their state
140- to prevent idle callbacks while they're in progress.
141-
142- Args:
143- frame: The FunctionCallsStartedFrame to process.
144- """
145- self ._function_call_in_progress = True
146- self .user_idle_event .set ()
147-
148- async def _handle_function_call_result (self , _ : FunctionCallResultFrame ):
149- """Handle function call result event.
150-
151- Args:
152- frame: The FunctionCallResultFrame to process.
153- """
154- self ._function_call_in_progress = False
155- self .user_idle_event .set ()
156-
157- async def user_idle_task_handler (self ):
158- """Monitors for idle timeout and triggers events.
159-
160- Runs in a loop until cancelled. The idle timer is reset whenever activity
161- frames are received (UserSpeakingFrame or BotSpeakingFrame). Function calls
162- are tracked via state since they only send start/end frames. If no activity
163- is detected for the configured timeout period and no function call is in
164- progress, the on_user_turn_idle event is triggered.
165- """
166- while True :
167- try :
168- await asyncio .wait_for (self .user_idle_event .wait (), timeout = self ._user_idle_timeout )
169- self .user_idle_event .clear ()
170- except asyncio .TimeoutError :
171- # Only trigger if conversation has started and no function call is in progress
172- if self ._conversation_started and not self ._function_call_in_progress :
173- await self ._call_event_handler ("on_user_turn_idle" )
124+ self ._function_calls_in_progress += len (frame .function_calls )
125+ await self ._cancel_idle_timer ()
126+ elif isinstance (frame , (FunctionCallResultFrame , FunctionCallCancelFrame )):
127+ self ._function_calls_in_progress = max (0 , self ._function_calls_in_progress - 1 )
128+
129+ async def _start_idle_timer (self ):
130+ """Start (or restart) the idle timer."""
131+ await self ._cancel_idle_timer ()
132+ self ._idle_timer_task = self .task_manager .create_task (
133+ self ._idle_timer_expired (),
134+ f"{ self } ::idle_timer" ,
135+ )
136+
137+ async def _cancel_idle_timer (self ):
138+ """Cancel the idle timer if running."""
139+ if self ._idle_timer_task :
140+ await self .task_manager .cancel_task (self ._idle_timer_task )
141+ self ._idle_timer_task = None
142+
143+ async def _idle_timer_expired (self ):
144+ """Sleep for the timeout duration then fire the idle event."""
145+ await asyncio .sleep (self ._user_idle_timeout )
146+ self ._idle_timer_task = None
147+ await self ._call_event_handler ("on_user_turn_idle" )
0 commit comments