Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,13 @@ async def generate(

generation_kwargs: dict[str, Any] = {}
generation_kwargs.update(self._generation_kwargs)
generation_kwargs["reasoning"] = Reasoning(
effort=generation_kwargs.pop("reasoning_effort", None),
summary="auto",
)
generation_kwargs["include"] = ["reasoning.encrypted_content"]
reasoning_effort = generation_kwargs.pop("reasoning_effort", None)
if reasoning_effort is not None:
generation_kwargs["reasoning"] = Reasoning(
effort=reasoning_effort,
summary="auto",
)
generation_kwargs["include"] = ["reasoning.encrypted_content"]

try:
response = await self._client.responses.create(
Expand Down
12 changes: 12 additions & 0 deletions packages/kosong/tests/api_snapshot_tests/test_openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,18 @@ async def test_openai_responses_generation_kwargs():
assert (body["temperature"], body["max_output_tokens"]) == snapshot((0.7, 2048))


async def test_openai_responses_omits_reasoning_by_default():
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False)
stream = await provider.generate("", [], [Message(role="user", content="Hi")])
async for _ in stream:
pass
body = json.loads(mock.calls.last.request.content.decode())
assert "reasoning" not in body
assert "include" not in body


async def test_openai_responses_with_thinking():
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
Expand Down
2 changes: 1 addition & 1 deletion src/kimi_cli/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def create_llm(
# Apply thinking if specified or if model always requires thinking
if "always_thinking" in capabilities or (thinking is True and "thinking" in capabilities):
chat_provider = chat_provider.with_thinking("high")
elif thinking is False:
elif thinking is False and provider.type != "openai_responses":
chat_provider = chat_provider.with_thinking("off")
# If thinking is None and model doesn't always think, leave as-is (default behavior)

Expand Down
25 changes: 25 additions & 0 deletions tests/core/test_create_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from inline_snapshot import snapshot
from kosong.chat_provider.echo import EchoChatProvider
from kosong.chat_provider.kimi import Kimi
from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
from pydantic import SecretStr

from kimi_cli.config import LLMModel, LLMProvider
Expand Down Expand Up @@ -133,3 +134,27 @@ def test_create_llm_requires_base_url_for_kimi():
model = LLMModel(provider="kimi", model="kimi-base", max_context_size=4096)

assert create_llm(provider, model) is None


def test_create_llm_openai_responses_does_not_force_reasoning_off():
provider = LLMProvider(
type="openai_responses",
base_url="https://openrouter.ai/api/v1",
api_key=SecretStr("test-key"),
)
model = LLMModel(
provider="openrouter_custom",
model="minimax/minimax-m2.5",
max_context_size=128000,
capabilities=None,
)

llm = create_llm(provider, model, thinking=False)

assert llm is not None
assert isinstance(llm.chat_provider, OpenAIResponses)
assert llm.chat_provider.model_parameters == snapshot(
{
"base_url": "https://openrouter.ai/api/v1/",
}
)