BerriAI · shiva-manu · Apr 18, 2026 · Copilot · Apr 18, 2026 · greptile-apps
diff --git a/litellm/router_strategy/complexity_router/complexity_router.py b/litellm/router_strategy/complexity_router/complexity_router.py
@@ -8,6 +8,7 @@
 
 Inspired by ClawRouter: https://github.com/BlockRunAI/ClawRouter
 """
+
 import re
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
@@ -331,6 +332,57 @@ def get_model_for_tier(self, tier: ComplexityTier) -> str:
             f"No model configured for tier {tier_key} and no default_model set"
         )
 
+    def _get_provider_prefix(self, model: str) -> str:
+        """Extract provider prefix from model name."""
+        if "/" in model:
+            return model.split("/")[0]
+        for prefix in ("vertex_ai", "anthropic", "bedrock", "openai", "azure", "aws"):
+            if model.startswith(prefix):
+                return prefix
-        if "/" in model:
-            return model.split("/")[0]
-        for prefix in ("vertex_ai", "anthropic", "bedrock", "openai", "azure", "aws"):
-            if model.startswith(prefix):
-                return prefix
+        normalized_model = model.strip().lower()
+        if "/" in normalized_model:
+            return normalized_model.split("/")[0]
+        for prefix in ("vertex_ai", "anthropic", "bedrock", "openai", "azure", "aws"):
+            if normalized_model.startswith(prefix):
+                return prefix
+        if normalized_model.startswith("claude") or "claude" in normalized_model:
+            return "anthropic"
-        if "/" in model:
-            return model.split("/")[0]
-        for prefix in ("vertex_ai", "anthropic", "bedrock", "openai", "azure", "aws"):
-            if model.startswith(prefix):
-                return prefix
+        normalized_model = model.strip().lower()
+        if "/" in normalized_model:
+            return normalized_model.split("/")[0]
+        for prefix in ("vertex_ai", "anthropic", "bedrock", "openai", "azure", "aws"):
+            if normalized_model.startswith(prefix):
+                return prefix
+        if normalized_model.startswith("claude") or "claude" in normalized_model:
+            return "anthropic"
+        return model
+
+    def _should_strip_thinking_blocks(
+        self, original_model: str, new_model: str
+    ) -> bool:
+        """Determine if thinking blocks should be stripped when switching models."""
+        original_provider = self._get_provider_prefix(original_model)
+        new_provider = self._get_provider_prefix(new_model)
+        if original_provider == new_provider:
+            return False
+        providers_with_incompatible_thinking = ("vertex_ai", "anthropic")
+        return (
+            original_provider in providers_with_incompatible_thinking
+            or new_provider in providers_with_incompatible_thinking
+        )
+
+    def _strip_thinking_blocks_from_messages(
+        self, messages: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """Strip thinking/redacted_thinking blocks from messages."""
+        import copy
+
+        cleaned: List[Dict[str, Any]] = []
+        for msg in messages:
+            if not isinstance(msg, dict):
+                cleaned.append(msg)
+                continue
+            msg_copy = copy.deepcopy(msg)
+            content = msg_copy.get("content")
+            if isinstance(content, list):
+                filtered = [
+                    block
+                    for block in content
+                    if not (
+                        isinstance(block, dict)
+                        and block.get("type") in ("thinking", "redacted_thinking")
+                    )
+                ]
+                if not filtered:
+                    continue
+                msg_copy["content"] = filtered
+            cleaned.append(msg_copy)
+        return cleaned
-        import copy
-
-        cleaned: List[Dict[str, Any]] = []
-        for msg in messages:
-            if not isinstance(msg, dict):
-                cleaned.append(msg)
-                continue
-            msg_copy = copy.deepcopy(msg)
-            content = msg_copy.get("content")
-            if isinstance(content, list):
-                filtered = [
-                    block
-                    for block in content
-                    if not (
-                        isinstance(block, dict)
-                        and block.get("type") in ("thinking", "redacted_thinking")
-                    )
-                ]
-                if not filtered:
-                    continue
-                msg_copy["content"] = filtered
-            cleaned.append(msg_copy)
-        return cleaned
+        from litellm.llms.anthropic.common_utils import (
+            strip_thinking_blocks_from_anthropic_messages,
+        )
+
+        return strip_thinking_blocks_from_anthropic_messages(messages)
-        import copy
-
-        cleaned: List[Dict[str, Any]] = []
-        for msg in messages:
-            if not isinstance(msg, dict):
-                cleaned.append(msg)
-                continue
-            msg_copy = copy.deepcopy(msg)
-            content = msg_copy.get("content")
-            if isinstance(content, list):
-                filtered = [
-                    block
-                    for block in content
-                    if not (
-                        isinstance(block, dict)
-                        and block.get("type") in ("thinking", "redacted_thinking")
-                    )
-                ]
-                if not filtered:
-                    continue
-                msg_copy["content"] = filtered
-            cleaned.append(msg_copy)
-        return cleaned
+        from litellm.llms.anthropic.common_utils import (
+            strip_thinking_blocks_from_anthropic_messages,
+        )
+
+        return strip_thinking_blocks_from_anthropic_messages(messages)
+
     async def async_pre_routing_hook(
         self,
         model: str,
@@ -400,11 +452,19 @@ async def async_pre_routing_hook(
         routed_model = self.get_model_for_tier(tier)
 
         verbose_router_logger.info(
-            f"ComplexityRouter: tier={tier.value}, score={score:.3f}, "
-            f"signals={signals}, routed_model={routed_model}"
+            f"ComplexityRouter: tier={tier.value}, score={score:.3f}, signals={signals}, routed_model={routed_model}"
         )
 
+        # Strip thinking blocks when switching between providers with incompatible thinking formats
+        cleaned_messages = messages
+        if self._should_strip_thinking_blocks(model, routed_model):
+            cleaned_messages = self._strip_thinking_blocks_from_messages(messages)
+            if cleaned_messages != messages:
+                verbose_router_logger.debug(
+                    f"ComplexityRouter: stripped thinking blocks when switching from {model} to {routed_model}"
+                )
+
         return PreRoutingHookResponse(
             model=routed_model,
-            messages=messages,
+            messages=cleaned_messages,
         )
diff --git a/tests/test_litellm/router_strategy/test_complexity_router.py b/tests/test_litellm/router_strategy/test_complexity_router.py
@@ -3,16 +3,15 @@
 
 Tests the rule-based complexity scoring and tier assignment logic.
 """
+
 import os
 import sys
 from typing import Dict, List
 from unittest.mock import MagicMock
 
 import pytest
 
-sys.path.insert(
-    0, os.path.abspath("../../..")
-)  # Adds the parent directory to the system path
+sys.path.insert(0, os.path.abspath("../../.."))  # Adds the parent directory to the system path
 
 from litellm import Router
 from litellm.router_strategy.complexity_router.complexity_router import (
@@ -321,12 +320,15 @@ async def test_pre_routing_hook_simple_message(self, complexity_router):
     async def test_pre_routing_hook_complex_message(self, complexity_router):
         """Test pre-routing hook with a message containing technical content."""
         messages = [
-            {"role": "user", "content": (
-                "Design a distributed microservice architecture with Kubernetes "
-                "orchestration, implementing proper authentication, encryption, "
-                "and database optimization for high throughput. Think step by step "
-                "about the performance implications and scalability requirements."
-            )}
+            {
+                "role": "user",
+                "content": (
+                    "Design a distributed microservice architecture with Kubernetes "
+                    "orchestration, implementing proper authentication, encryption, "
+                    "and database optimization for high throughput. Think step by step "
+                    "about the performance implications and scalability requirements."
+                ),
+            }
         ]
         result = await complexity_router.async_pre_routing_hook(
             model="test-model",
@@ -376,16 +378,63 @@ async def test_pre_routing_hook_with_system_prompt(self, complexity_router):
     @pytest.mark.asyncio
     async def test_pre_routing_hook_reasoning_message(self, complexity_router):
         """Test pre-routing hook with reasoning markers."""
+        messages = [{"role": "user", "content": "Let's think step by step and reason through this problem carefully."}]
+        result = await complexity_router.async_pre_routing_hook(
+            model="test-model",
+            request_kwargs={},
+            messages=messages,
+        )
+        assert result is not None
+        assert result.model == "o1-preview"  # REASONING tier model
+
+    @pytest.mark.asyncio
+    async def test_pre_routing_hook_strips_thinking_blocks_on_provider_switch(self, complexity_router):
+        """Test thinking blocks are stripped when switching from vertex_ai to anthropic."""
         messages = [
-            {"role": "user", "content": "Let's think step by step and reason through this problem carefully."}
+            {"role": "user", "content": "Hello!"},
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": "Sure!"},
+                    {"type": "thinking", "thinking": "User said hello", "signature": "abc123"},
+                ],
+            },
         ]
+        result = await complexity_router.async_pre_routing_hook(
+            model="vertex_ai/test-model",
+            request_kwargs={},
+            messages=messages,
+        )
+        assert result is not None
+        # Should strip thinking blocks from assistant message
+        content = result.messages[1]["content"]
+        assert isinstance(content, list)
+        assert all(block["type"] == "text" for block in content)
+
+    @pytest.mark.asyncio
+    async def test_pre_routing_hook_preserves_thinking_blocks_on_same_provider(self, complexity_router):
+        """Test thinking blocks are preserved when staying within same provider."""
+        messages = [
+            {"role": "user", "content": "Hello!"},
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": "Sure!"},
+                    {"type": "thinking", "thinking": "User said hello", "signature": "abc123"},
+                ],
+            },
+        ]
+        # Using model without provider prefix for both - should preserve thinking blocks
         result = await complexity_router.async_pre_routing_hook(
             model="test-model",
             request_kwargs={},
             messages=messages,
         )
         assert result is not None
-        assert result.model == "o1-preview"  # REASONING tier model
+        # Should preserve thinking blocks since no provider switch
+        content = result.messages[1]["content"]
+        assert isinstance(content, list)
+        assert len(content) == 2  # Both text and thinking preserved
 
 
 class TestConfigOverrides:
@@ -412,9 +461,7 @@ def test_custom_tier_boundaries(self, mock_router_instance):
             complexity_router_config=config,
         )
         # With very low thresholds, even neutral prompts should be COMPLEX or higher
-        tier, score, signals = router.classify(
-            "Explain how HTTP works with REST APIs and distributed systems"
-        )
+        tier, score, signals = router.classify("Explain how HTTP works with REST APIs and distributed systems")
         # With boundaries this low, should be at least MEDIUM (anything above -0.5)
         assert tier != ComplexityTier.SIMPLE, f"Expected non-SIMPLE tier, got {tier} with score {score}"
 
@@ -575,22 +622,22 @@ def test_default_config_not_mutated(self, mock_router_instance):
 
         # Get original default
         original_default = ComplexityRouterConfig().default_model
-        
+
         # Create router with empty config and custom default_model
         router1 = ComplexityRouter(
             model_name="test-router-1",
             litellm_router_instance=mock_router_instance,
             complexity_router_config=None,
             default_model="custom-fallback",
         )
-        
+
         # Create another router without config
         router2 = ComplexityRouter(
             model_name="test-router-2",
             litellm_router_instance=mock_router_instance,
             complexity_router_config=None,
         )
-        
+
         # Router2 should have fresh defaults, not router1's custom default_model
         # Create a fresh config to check
         fresh_config = ComplexityRouterConfig()