1919
2020from pipecat .frames .frames import (
2121 BotStartedSpeakingFrame ,
22+ ClientConnectedFrame ,
2223 InterruptionFrame ,
2324 MetricsFrame ,
2425 UserStoppedSpeakingFrame ,
@@ -80,6 +81,10 @@ class UserBotLatencyObserver(BaseObserver):
8081 on_latency_breakdown(observer, breakdown): Emitted at each
8182 ``BotStartedSpeakingFrame`` with a :class:`LatencyBreakdown`
8283 containing per-service metrics collected during the user→bot cycle.
84+ on_first_bot_speech_latency(observer, latency_seconds): Emitted once,
85+ the first time ``BotStartedSpeakingFrame`` arrives after
86+ ``ClientConnectedFrame``. Measures the time from client connection
87+ to the first bot speech.
8388 """
8489
8590 def __init__ (self , * , max_frames = 100 , ** kwargs ):
@@ -97,6 +102,10 @@ def __init__(self, *, max_frames=100, **kwargs):
97102 self ._user_stopped_time : Optional [float ] = None
98103 self ._user_turn : Optional [float ] = None
99104
105+ # First bot speech tracking
106+ self ._client_connected_time : Optional [float ] = None
107+ self ._first_bot_speech_measured : bool = False
108+
100109 # Frame deduplication (bounded deque + set pattern)
101110 self ._processed_frames : set = set ()
102111 self ._frame_history : deque = deque (maxlen = max_frames )
@@ -107,6 +116,7 @@ def __init__(self, *, max_frames=100, **kwargs):
107116
108117 self ._register_event_handler ("on_latency_measured" )
109118 self ._register_event_handler ("on_latency_breakdown" )
119+ self ._register_event_handler ("on_first_bot_speech_latency" )
110120
111121 async def on_push_frame (self , data : FramePushed ):
112122 """Process frames to track speech timing and calculate latency.
@@ -132,12 +142,21 @@ async def on_push_frame(self, data: FramePushed):
132142 if len (self ._processed_frames ) > len (self ._frame_history ):
133143 self ._processed_frames = set (self ._frame_history )
134144
145+ # Track client connection (first occurrence only)
146+ if isinstance (data .frame , ClientConnectedFrame ):
147+ if self ._client_connected_time is None :
148+ self ._client_connected_time = time .time ()
149+ return
150+
135151 # Track speech and pipeline events for latency
136152 if isinstance (data .frame , VADUserStartedSpeakingFrame ):
137153 # Reset when user starts speaking
138154 self ._user_stopped_time = None
139155 self ._user_turn = None
140156 self ._reset_accumulators ()
157+ # If user speaks before the bot's first speech, abandon the
158+ # first-bot-speech measurement — it's only meaningful for greetings.
159+ self ._first_bot_speech_measured = True
141160 elif isinstance (data .frame , VADUserStoppedSpeakingFrame ):
142161 # Record the actual time the user stopped speaking, which is
143162 # the VAD determination time minus the stop_secs silence duration
@@ -159,28 +178,41 @@ async def on_push_frame(self, data: FramePushed):
159178
160179 async def _handle_bot_started_speaking (self ):
161180 """Handle BotStartedSpeakingFrame to emit latency and breakdown."""
162- if self ._user_stopped_time is None :
163- return
181+ emit_breakdown = False
164182
165- latency = time .time () - self ._user_stopped_time
166- self ._user_stopped_time = None
167- await self ._call_event_handler ("on_latency_measured" , latency )
183+ # One-time first bot speech measurement (client connect → first speech)
184+ if self ._client_connected_time is not None and not self ._first_bot_speech_measured :
185+ self ._first_bot_speech_measured = True
186+ latency = time .time () - self ._client_connected_time
187+ await self ._call_event_handler ("on_first_bot_speech_latency" , latency )
188+ emit_breakdown = True
168189
169- breakdown = LatencyBreakdown (
170- ttfb = list (self ._ttfb ),
171- text_aggregation = self ._text_aggregation ,
172- user_turn_secs = self ._user_turn ,
173- )
174- await self ._call_event_handler ("on_latency_breakdown" , breakdown )
175- self ._reset_accumulators ()
190+ if self ._user_stopped_time is not None :
191+ latency = time .time () - self ._user_stopped_time
192+ self ._user_stopped_time = None
193+ await self ._call_event_handler ("on_latency_measured" , latency )
194+ emit_breakdown = True
195+
196+ if emit_breakdown :
197+ breakdown = LatencyBreakdown (
198+ ttfb = list (self ._ttfb ),
199+ text_aggregation = self ._text_aggregation ,
200+ user_turn_secs = self ._user_turn ,
201+ )
202+ await self ._call_event_handler ("on_latency_breakdown" , breakdown )
203+ self ._reset_accumulators ()
176204
177205 def _handle_metrics_frame (self , frame : MetricsFrame ):
178206 """Extract latency metrics from a MetricsFrame.
179207
180- Only accumulates metrics when a user→bot measurement is in progress
181- (after ``VADUserStoppedSpeakingFrame``).
208+ Accumulates metrics when a measurement is in progress: either a
209+ user→bot cycle (after ``VADUserStoppedSpeakingFrame``) or the
210+ first-bot-speech window (after ``ClientConnectedFrame``).
182211 """
183- if self ._user_stopped_time is None :
212+ waiting_for_first_speech = (
213+ self ._client_connected_time is not None and not self ._first_bot_speech_measured
214+ )
215+ if self ._user_stopped_time is None and not waiting_for_first_speech :
184216 return
185217
186218 for metrics_data in frame .data :
0 commit comments