Add first-bot-speech latency to UserBotLatencyObserver

markbackman · markbackman · commit f6ed92337da7 · 2026-03-01T10:01:08.000-05:00
Measure time from ClientConnectedFrame to first BotStartedSpeakingFrame,
emitting a one-time on_first_bot_speech_latency event with breakdown.
diff --git a/changelog/3885.added.2.md b/changelog/3885.added.2.md
@@ -0,0 +1 @@
+- Added `on_first_bot_speech_latency` event to `UserBotLatencyObserver` measuring the time from client connection to first bot speech, including a latency breakdown with per-service metrics.
diff --git a/changelog/3885.added.md b/changelog/3885.added.md
@@ -1 +1 @@
-- Added `LatencyBreakdown` dataclass and `on_latency_breakdown` event to `UserBotLatencyObserver` for per-service latency metrics (TTFB, text aggregation, user turn duration) collected during each user-to-bot response cycle.
+- Added `on_latency_breakdown` event to `UserBotLatencyObserver` providing per-service TTFB, text aggregation, and user turn duration metrics for each user-to-bot response cycle.
diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py
@@ -101,6 +101,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
         observers=[latency_observer, startup_observer],
     )
 
+    @latency_observer.event_handler("on_first_bot_speech_latency")
+    async def on_first_bot_speech_latency(observer, latency_seconds):
+        logger.info(f"First bot speech: {latency_seconds:.3f}s after client connected")
+
     @latency_observer.event_handler("on_latency_measured")
     async def on_latency_measured(observer, latency_seconds):
         logger.info(f"⏱️ User-to-bot latency: {latency_seconds:.3f}s")
diff --git a/src/pipecat/observers/user_bot_latency_observer.py b/src/pipecat/observers/user_bot_latency_observer.py
@@ -19,6 +19,7 @@
 
 from pipecat.frames.frames import (
     BotStartedSpeakingFrame,
+    ClientConnectedFrame,
     InterruptionFrame,
     MetricsFrame,
     UserStoppedSpeakingFrame,
@@ -80,6 +81,10 @@ class UserBotLatencyObserver(BaseObserver):
         on_latency_breakdown(observer, breakdown): Emitted at each
             ``BotStartedSpeakingFrame`` with a :class:`LatencyBreakdown`
             containing per-service metrics collected during the user→bot cycle.
+        on_first_bot_speech_latency(observer, latency_seconds): Emitted once,
+            the first time ``BotStartedSpeakingFrame`` arrives after
+            ``ClientConnectedFrame``. Measures the time from client connection
+            to the first bot speech.
     """
 
     def __init__(self, *, max_frames=100, **kwargs):
@@ -97,6 +102,10 @@ def __init__(self, *, max_frames=100, **kwargs):
         self._user_stopped_time: Optional[float] = None
         self._user_turn: Optional[float] = None
 
+        # First bot speech tracking
+        self._client_connected_time: Optional[float] = None
+        self._first_bot_speech_measured: bool = False
+
         # Frame deduplication (bounded deque + set pattern)
         self._processed_frames: set = set()
         self._frame_history: deque = deque(maxlen=max_frames)
@@ -107,6 +116,7 @@ def __init__(self, *, max_frames=100, **kwargs):
 
         self._register_event_handler("on_latency_measured")
         self._register_event_handler("on_latency_breakdown")
+        self._register_event_handler("on_first_bot_speech_latency")
 
     async def on_push_frame(self, data: FramePushed):
         """Process frames to track speech timing and calculate latency.
@@ -132,12 +142,21 @@ async def on_push_frame(self, data: FramePushed):
         if len(self._processed_frames) > len(self._frame_history):
             self._processed_frames = set(self._frame_history)
 
+        # Track client connection (first occurrence only)
+        if isinstance(data.frame, ClientConnectedFrame):
+            if self._client_connected_time is None:
+                self._client_connected_time = time.time()
+            return
+
         # Track speech and pipeline events for latency
         if isinstance(data.frame, VADUserStartedSpeakingFrame):
             # Reset when user starts speaking
             self._user_stopped_time = None
             self._user_turn = None
             self._reset_accumulators()
+            # If user speaks before the bot's first speech, abandon the
+            # first-bot-speech measurement — it's only meaningful for greetings.
+            self._first_bot_speech_measured = True
         elif isinstance(data.frame, VADUserStoppedSpeakingFrame):
             # Record the actual time the user stopped speaking, which is
             # the VAD determination time minus the stop_secs silence duration
@@ -159,28 +178,41 @@ async def on_push_frame(self, data: FramePushed):
 
     async def _handle_bot_started_speaking(self):
         """Handle BotStartedSpeakingFrame to emit latency and breakdown."""
-        if self._user_stopped_time is None:
-            return
+        emit_breakdown = False
 
-        latency = time.time() - self._user_stopped_time
-        self._user_stopped_time = None
-        await self._call_event_handler("on_latency_measured", latency)
+        # One-time first bot speech measurement (client connect → first speech)
+        if self._client_connected_time is not None and not self._first_bot_speech_measured:
+            self._first_bot_speech_measured = True
+            latency = time.time() - self._client_connected_time
+            await self._call_event_handler("on_first_bot_speech_latency", latency)
+            emit_breakdown = True
 
-        breakdown = LatencyBreakdown(
-            ttfb=list(self._ttfb),
-            text_aggregation=self._text_aggregation,
-            user_turn_secs=self._user_turn,
-        )
-        await self._call_event_handler("on_latency_breakdown", breakdown)
-        self._reset_accumulators()
+        if self._user_stopped_time is not None:
+            latency = time.time() - self._user_stopped_time
+            self._user_stopped_time = None
+            await self._call_event_handler("on_latency_measured", latency)
+            emit_breakdown = True
+
+        if emit_breakdown:
+            breakdown = LatencyBreakdown(
+                ttfb=list(self._ttfb),
+                text_aggregation=self._text_aggregation,
+                user_turn_secs=self._user_turn,
+            )
+            await self._call_event_handler("on_latency_breakdown", breakdown)
+            self._reset_accumulators()
 
     def _handle_metrics_frame(self, frame: MetricsFrame):
         """Extract latency metrics from a MetricsFrame.
 
-        Only accumulates metrics when a user→bot measurement is in progress
-        (after ``VADUserStoppedSpeakingFrame``).
+        Accumulates metrics when a measurement is in progress: either a
+        user→bot cycle (after ``VADUserStoppedSpeakingFrame``) or the
+        first-bot-speech window (after ``ClientConnectedFrame``).
         """
-        if self._user_stopped_time is None:
+        waiting_for_first_speech = (
+            self._client_connected_time is not None and not self._first_bot_speech_measured
+        )
+        if self._user_stopped_time is None and not waiting_for_first_speech:
             return
 
         for metrics_data in frame.data:
diff --git a/tests/test_user_bot_latency_observer.py b/tests/test_user_bot_latency_observer.py
@@ -2,6 +2,7 @@
 
 from pipecat.frames.frames import (
     BotStartedSpeakingFrame,
+    ClientConnectedFrame,
     InterruptionFrame,
     MetricsFrame,
     UserStoppedSpeakingFrame,
@@ -342,6 +343,126 @@ async def on_breakdown(obs, breakdown):
         self.assertEqual(len(latencies), 0)
         self.assertEqual(len(breakdowns), 0)
 
+    async def test_first_bot_speech_latency(self):
+        """Test first bot speech latency and breakdown from ClientConnected to BotStartedSpeaking."""
+        observer = UserBotLatencyObserver()
+        processor = IdentityFilter()
+
+        first_speech_latencies = []
+        breakdowns = []
+
+        @observer.event_handler("on_first_bot_speech_latency")
+        async def on_first_bot_speech(obs, latency_seconds):
+            first_speech_latencies.append(latency_seconds)
+
+        @observer.event_handler("on_latency_breakdown")
+        async def on_breakdown(obs, breakdown):
+            breakdowns.append(breakdown)
+
+        llm_ttfb = TTFBMetricsData(processor="OpenAILLMService#0", value=0.250)
+        tts_ttfb = TTFBMetricsData(processor="CartesiaTTSService#0", value=0.070)
+
+        frames_to_send = [
+            ClientConnectedFrame(),
+            MetricsFrame(data=[llm_ttfb]),
+            MetricsFrame(data=[tts_ttfb]),
+            BotStartedSpeakingFrame(),
+        ]
+
+        expected_down_frames = [
+            ClientConnectedFrame,
+            MetricsFrame,
+            MetricsFrame,
+            BotStartedSpeakingFrame,
+        ]
+
+        await run_test(
+            processor,
+            frames_to_send=frames_to_send,
+            expected_down_frames=expected_down_frames,
+            observers=[observer],
+        )
+
+        self.assertEqual(len(first_speech_latencies), 1)
+        self.assertGreater(first_speech_latencies[0], 0)
+        self.assertLess(first_speech_latencies[0], 1.0)
+
+        # Breakdown should also be emitted with the accumulated metrics
+        self.assertEqual(len(breakdowns), 1)
+        self.assertEqual(len(breakdowns[0].ttfb), 2)
+        self.assertEqual(breakdowns[0].ttfb[0].processor, "OpenAILLMService#0")
+        self.assertEqual(breakdowns[0].ttfb[1].processor, "CartesiaTTSService#0")
+
+    async def test_first_bot_speech_only_once(self):
+        """Test that first bot speech latency is only emitted once."""
+        observer = UserBotLatencyObserver()
+        processor = IdentityFilter()
+
+        first_speech_latencies = []
+
+        @observer.event_handler("on_first_bot_speech_latency")
+        async def on_first_bot_speech(obs, latency_seconds):
+            first_speech_latencies.append(latency_seconds)
+
+        frames_to_send = [
+            ClientConnectedFrame(),
+            BotStartedSpeakingFrame(),
+            # Second bot speech should not trigger the event again
+            VADUserStoppedSpeakingFrame(),
+            BotStartedSpeakingFrame(),
+        ]
+
+        expected_down_frames = [
+            ClientConnectedFrame,
+            BotStartedSpeakingFrame,
+            VADUserStoppedSpeakingFrame,
+            BotStartedSpeakingFrame,
+        ]
+
+        await run_test(
+            processor,
+            frames_to_send=frames_to_send,
+            expected_down_frames=expected_down_frames,
+            observers=[observer],
+        )
+
+        self.assertEqual(len(first_speech_latencies), 1)
+
+    async def test_first_bot_speech_skipped_when_user_speaks_first(self):
+        """Test that first bot speech event is not emitted when user speaks before the bot."""
+        observer = UserBotLatencyObserver()
+        processor = IdentityFilter()
+
+        first_speech_latencies = []
+
+        @observer.event_handler("on_first_bot_speech_latency")
+        async def on_first_bot_speech(obs, latency_seconds):
+            first_speech_latencies.append(latency_seconds)
+
+        frames_to_send = [
+            ClientConnectedFrame(),
+            # User speaks before bot has a chance to greet
+            VADUserStartedSpeakingFrame(),
+            VADUserStoppedSpeakingFrame(),
+            BotStartedSpeakingFrame(),
+        ]
+
+        expected_down_frames = [
+            ClientConnectedFrame,
+            VADUserStartedSpeakingFrame,
+            VADUserStoppedSpeakingFrame,
+            BotStartedSpeakingFrame,
+        ]
+
+        await run_test(
+            processor,
+            frames_to_send=frames_to_send,
+            expected_down_frames=expected_down_frames,
+            observers=[observer],
+        )
+
+        self.assertEqual(len(first_speech_latencies), 0)
+
 
 if __name__ == "__main__":
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+- Added `on_first_bot_speech_latency` event to `UserBotLatencyObserver` measuring the time from client connection to first bot speech, including a latency breakdown with per-service metrics.
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-- Added `LatencyBreakdown` dataclass and `on_latency_breakdown` event to `UserBotLatencyObserver` for per-service latency metrics (TTFB, text aggregation, user turn duration) collected during each user-to-bot response cycle.
	`1`	+- Added `on_latency_breakdown` event to `UserBotLatencyObserver` providing per-service TTFB, text aggregation, and user turn duration metrics for each user-to-bot response cycle.