Merge pull request #3529 from lukepayyapilli/fix/llm-timeout-without-retry

markbackman · web-flow · commit b77a50de73c5 · 2026-01-29T09:12:54.000-05:00
feat: handle exceptions for BaseOpenAILLMService
diff --git a/changelog/3529.fixed.md b/changelog/3529.fixed.md
@@ -0,0 +1 @@
+- Fixed OpenAI LLM services to emit `ErrorFrame` on completion timeout, enabling proper error handling and LLMSwitcher failover.
diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py
@@ -492,8 +492,11 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
                 await self.push_frame(LLMFullResponseStartFrame())
                 await self.start_processing_metrics()
                 await self._process_context(context)
-            except httpx.TimeoutException:
+            except httpx.TimeoutException as e:
                 await self._call_event_handler("on_completion_timeout")
+                await self.push_error(error_msg="LLM completion timeout", exception=e)
+            except Exception as e:
+                await self.push_error(error_msg=f"Error during completion: {e}", exception=e)
             finally:
                 await self.stop_processing_metrics()
                 await self.push_frame(LLMFullResponseEndFrame())
diff --git a/tests/test_openai_llm_timeout.py b/tests/test_openai_llm_timeout.py
@@ -0,0 +1,163 @@
+#
+# Copyright (c) 2024-2026, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Unit tests for OpenAI LLM error handling."""
+
+from unittest.mock import AsyncMock, patch
+
+import httpx
+import pytest
+
+from pipecat.frames.frames import (
+    LLMContextFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.openai.llm import OpenAILLMService
+
+
+@pytest.mark.asyncio
+async def test_openai_llm_emits_error_frame_on_timeout():
+    """Test that OpenAI LLM service emits ErrorFrame when a timeout occurs.
+
+    This enables LLMSwitcher to trigger failover to backup LLMs when the
+    primary LLM times out.
+    """
+    with patch.object(OpenAILLMService, "create_client"):
+        service = OpenAILLMService(model="gpt-4")
+        service._client = AsyncMock()
+
+        # Track pushed frames and errors
+        pushed_frames = []
+        pushed_errors = []
+        timeout_handler_called = False
+
+        original_push_frame = service.push_frame
+
+        async def mock_push_frame(frame, direction=FrameDirection.DOWNSTREAM):
+            pushed_frames.append(frame)
+            await original_push_frame(frame, direction)
+
+        async def mock_push_error(error_msg, exception=None):
+            pushed_errors.append({"error_msg": error_msg, "exception": exception})
+
+        async def mock_timeout_handler(event_name):
+            nonlocal timeout_handler_called
+            if event_name == "on_completion_timeout":
+                timeout_handler_called = True
+
+        service.push_frame = mock_push_frame
+        service.push_error = mock_push_error
+        service._call_event_handler = AsyncMock(side_effect=mock_timeout_handler)
+
+        # Mock _process_context to raise TimeoutException
+        service._process_context = AsyncMock(
+            side_effect=httpx.TimeoutException("Connection timed out")
+        )
+
+        # Mock metrics methods
+        service.start_processing_metrics = AsyncMock()
+        service.stop_processing_metrics = AsyncMock()
+        service.start_ttfb_metrics = AsyncMock()
+
+        # Create a context frame to process
+        context = LLMContext(
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+        frame = LLMContextFrame(context=context)
+
+        # Process the frame
+        await service.process_frame(frame, FrameDirection.DOWNSTREAM)
+
+        # Verify timeout handler was called
+        service._call_event_handler.assert_called_once_with("on_completion_timeout")
+        assert timeout_handler_called
+
+        # Verify push_error was called with correct message
+        assert len(pushed_errors) == 1
+        assert pushed_errors[0]["error_msg"] == "LLM completion timeout"
+        assert isinstance(pushed_errors[0]["exception"], httpx.TimeoutException)
+
+        # Verify LLMFullResponseStartFrame and LLMFullResponseEndFrame were pushed
+        frame_types = [type(f) for f in pushed_frames]
+        assert LLMFullResponseStartFrame in frame_types
+        assert LLMFullResponseEndFrame in frame_types
+
+
+@pytest.mark.asyncio
+async def test_openai_llm_timeout_still_pushes_end_frame():
+    """Test that LLMFullResponseEndFrame is pushed even when timeout occurs.
+
+    The finally block should ensure proper cleanup regardless of timeout.
+    """
+    with patch.object(OpenAILLMService, "create_client"):
+        service = OpenAILLMService(model="gpt-4")
+        service._client = AsyncMock()
+
+        pushed_frames = []
+
+        async def mock_push_frame(frame, direction=FrameDirection.DOWNSTREAM):
+            pushed_frames.append(frame)
+
+        service.push_frame = mock_push_frame
+        service.push_error = AsyncMock()
+        service._call_event_handler = AsyncMock()
+        service._process_context = AsyncMock(side_effect=httpx.TimeoutException("Timeout"))
+        service.start_processing_metrics = AsyncMock()
+        service.stop_processing_metrics = AsyncMock()
+
+        context = LLMContext(
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+        frame = LLMContextFrame(context=context)
+
+        await service.process_frame(frame, FrameDirection.DOWNSTREAM)
+
+        # Verify both start and end frames are pushed
+        frame_types = [type(f) for f in pushed_frames]
+        assert LLMFullResponseStartFrame in frame_types
+        assert LLMFullResponseEndFrame in frame_types
+
+        # Verify metrics were stopped
+        service.stop_processing_metrics.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_openai_llm_emits_error_frame_on_exception():
+    """Test that OpenAI LLM service emits ErrorFrame when a general exception occurs.
+
+    This enables proper error handling for API errors, rate limits, and other failures.
+    """
+    with patch.object(OpenAILLMService, "create_client"):
+        service = OpenAILLMService(model="gpt-4")
+        service._client = AsyncMock()
+
+        pushed_errors = []
+
+        async def mock_push_error(error_msg, exception=None):
+            pushed_errors.append({"error_msg": error_msg, "exception": exception})
+
+        service.push_frame = AsyncMock()
+        service.push_error = mock_push_error
+        service._call_event_handler = AsyncMock()
+        service._process_context = AsyncMock(side_effect=RuntimeError("API Error"))
+        service.start_processing_metrics = AsyncMock()
+        service.stop_processing_metrics = AsyncMock()
+
+        context = LLMContext(
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+        frame = LLMContextFrame(context=context)
+
+        await service.process_frame(frame, FrameDirection.DOWNSTREAM)
+
+        # Verify push_error was called with correct message
+        assert len(pushed_errors) == 1
+        assert "Error during completion" in pushed_errors[0]["error_msg"]
+        assert "API Error" in pushed_errors[0]["error_msg"]
+        assert isinstance(pushed_errors[0]["exception"], RuntimeError)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+- Fixed OpenAI LLM services to emit `ErrorFrame` on completion timeout, enabling proper error handling and LLMSwitcher failover.