MoonshotAI · RealKai42 · Mar 11, 2026 · Mar 6, 2026 · Mar 9, 2026 · Mar 11, 2026
diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_responses.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_responses.py
@@ -168,11 +168,13 @@ async def generate(
 
         generation_kwargs: dict[str, Any] = {}
         generation_kwargs.update(self._generation_kwargs)
-        generation_kwargs["reasoning"] = Reasoning(
-            effort=generation_kwargs.pop("reasoning_effort", None),
-            summary="auto",
-        )
-        generation_kwargs["include"] = ["reasoning.encrypted_content"]
+        reasoning_effort = generation_kwargs.pop("reasoning_effort", None)
+        if reasoning_effort is not None:
+            generation_kwargs["reasoning"] = Reasoning(
+                effort=reasoning_effort,
+                summary="auto",
+            )
+            generation_kwargs["include"] = ["reasoning.encrypted_content"]
 
         try:
             response = await self._client.responses.create(

diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_responses.py b/packages/kosong/tests/api_snapshot_tests/test_openai_responses.py
@@ -379,6 +379,18 @@ async def test_openai_responses_generation_kwargs():
         assert (body["temperature"], body["max_output_tokens"]) == snapshot((0.7, 2048))
 
 
+async def test_openai_responses_omits_reasoning_by_default():
+    with respx.mock(base_url="https://api.openai.com") as mock:
+        mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
+        provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False)
+        stream = await provider.generate("", [], [Message(role="user", content="Hi")])
+        async for _ in stream:
+            pass
+        body = json.loads(mock.calls.last.request.content.decode())
+        assert "reasoning" not in body
+        assert "include" not in body
+
+
 async def test_openai_responses_with_thinking():
     with respx.mock(base_url="https://api.openai.com") as mock:
         mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))

diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py
@@ -224,7 +224,7 @@ def create_llm(
     # Apply thinking if specified or if model always requires thinking
     if "always_thinking" in capabilities or (thinking is True and "thinking" in capabilities):
         chat_provider = chat_provider.with_thinking("high")
-    elif thinking is False:
+    elif thinking is False and provider.type != "openai_responses":
         chat_provider = chat_provider.with_thinking("off")
     # If thinking is None and model doesn't always think, leave as-is (default behavior)
 

diff --git a/tests/core/test_create_llm.py b/tests/core/test_create_llm.py
@@ -3,6 +3,7 @@
 from inline_snapshot import snapshot
 from kosong.chat_provider.echo import EchoChatProvider
 from kosong.chat_provider.kimi import Kimi
+from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
 from pydantic import SecretStr
 
 from kimi_cli.config import LLMModel, LLMProvider
@@ -133,3 +134,27 @@ def test_create_llm_requires_base_url_for_kimi():
     model = LLMModel(provider="kimi", model="kimi-base", max_context_size=4096)
 
     assert create_llm(provider, model) is None
+
+
+def test_create_llm_openai_responses_does_not_force_reasoning_off():
+    provider = LLMProvider(
+        type="openai_responses",
+        base_url="https://openrouter.ai/api/v1",
+        api_key=SecretStr("test-key"),
+    )
+    model = LLMModel(
+        provider="openrouter_custom",
+        model="minimax/minimax-m2.5",
+        max_context_size=128000,
+        capabilities=None,
+    )
+
+    llm = create_llm(provider, model, thinking=False)
+
+    assert llm is not None
+    assert isinstance(llm.chat_provider, OpenAIResponses)
+    assert llm.chat_provider.model_parameters == snapshot(
+        {
+            "base_url": "https://openrouter.ai/api/v1/",
+        }
+    )