Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions providers.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,13 @@
"aliases": [
"open_router"
],
"protocol": "open_ai_completions",
"default_base_url": "https://openrouter.ai/api/v1",
"protocol": "open_router",
"default_base_url": "",
"api_key_env": "OPENROUTER_API_KEY",
"api_key_required": true,
"model_env": "OPENROUTER_MODEL",
Comment on lines +127 to 131
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 3a0b3a5providers.json now exposes "extra_headers_env": "OPENROUTER_EXTRA_HEADERS". Users can now configure HTTP-Referer:…,X-Title:… for the built-in openrouter backend.

"default_model": "openai/gpt-4o",
"description": "OpenRouter multi-provider gateway (200+ models)",
"description": "OpenRouter multi-provider gateway (200+ models, preserves reasoning across turns)",
"setup": {
"kind": "api_key",
"secret_name": "llm_openrouter_api_key",
Expand Down Expand Up @@ -246,13 +246,13 @@
"aliases": [
"deep_seek"
],
"protocol": "open_ai_completions",
"default_base_url": "https://api.deepseek.com/v1",
"protocol": "deep_seek",
"default_base_url": "",
"api_key_env": "DEEPSEEK_API_KEY",
"api_key_required": true,
"model_env": "DEEPSEEK_MODEL",
"default_model": "deepseek-chat",
"description": "DeepSeek inference API",
"description": "DeepSeek inference API (preserves reasoning_content for thinking-mode models)",
"setup": {
"kind": "api_key",
"secret_name": "llm_deepseek_api_key",
Expand Down Expand Up @@ -325,13 +325,13 @@
"google_gemini",
"google"
],
"protocol": "open_ai_completions",
"default_base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
"protocol": "gemini",
"default_base_url": "",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium Severity

Gemini is still advertised as list-models capable, but the UI/setup list-models paths do not speak native Gemini.

This registry entry now exposes adapter gemini with an empty default base URL while keeping can_list_models: true. The web configure surface shows the fetch button, then blocks on a missing base URL before calling /api/llm/list_models; if a base URL is supplied, the handler falls through to generic GET {base}/models with Bearer auth rather than Gemini's native /v1beta/models?key=... shape. The setup wizard has the same issue: it falls through to fetch_openai_compatible_models(def.default_base_url.unwrap_or("")), which returns no models for the new empty default. So users configuring Gemini are told model listing is supported, but it reliably fails/falls back to manual entry.

Please either add native Gemini model-listing support in the web handler/setup wizard or set can_list_models to false for Gemini until that path exists. Add a test covering adapter: "gemini" with empty/native default base URL.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 3a0b3a5 by setting "can_list_models": false on Gemini in providers.json. The setup wizard's fall-through path (fetch_openai_compatible_models(def.default_base_url.unwrap_or(""), …)) wouldn't speak the native /v1beta/models?key=… shape, and the web list-models handler is the same. Native Gemini list-models support is filed as a follow-up rather than blocking this PR.

"api_key_env": "GEMINI_API_KEY",
"api_key_required": true,
"model_env": "GEMINI_MODEL",
"default_model": "gemini-2.5-flash",
"description": "Google Gemini (via OpenAI-compatible endpoint)",
"description": "Google Gemini native API (preserves thought_signature on tool calls)",
"setup": {
"kind": "api_key",
"secret_name": "llm_gemini_api_key",
Expand Down
176 changes: 176 additions & 0 deletions src/llm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ fn create_registry_provider(
ProviderProtocol::OpenAiCompletions => create_openai_compat_from_registry(config),
ProviderProtocol::Anthropic => create_anthropic_from_registry(config),
ProviderProtocol::Ollama => create_ollama_from_registry(config),
ProviderProtocol::DeepSeek => create_deepseek_from_registry(config),
ProviderProtocol::Gemini => create_gemini_from_registry(config),
ProviderProtocol::OpenRouter => create_openrouter_from_registry(config),
ProviderProtocol::GithubCopilot => {
let provider =
github_copilot::GithubCopilotProvider::new(config, request_timeout_secs)?;
Expand Down Expand Up @@ -424,6 +427,179 @@ fn create_ollama_from_registry(
Ok(Arc::new(adapter))
}

/// Build a DeepSeek provider via rig-core's dedicated DeepSeek client.
///
/// Routing through this client (rather than the generic OpenAI-compat path)
/// is what makes thinking-mode tool calling work: rig-core's DeepSeek
/// implementation captures `reasoning_content` from each response and writes
/// it back onto the assistant message in the next request. Without that
/// round-trip the API rejects the second turn with HTTP 400 ("The
/// reasoning_content in the thinking mode must be passed back to the API").
/// See #3201.
fn create_deepseek_from_registry(
config: &RegistryProviderConfig,
) -> Result<Arc<dyn LlmProvider>, LlmError> {
use rig::providers::deepseek;

let api_key = config
.api_key
.as_ref()
.map(|k| k.expose_secret().to_string())
.ok_or_else(|| LlmError::AuthFailed {
provider: config.provider_id.clone(),
})?;

let client: deepseek::Client = if config.base_url.is_empty() {
deepseek::Client::new(&api_key)
} else {
deepseek::Client::builder()
.api_key(&api_key)
.base_url(&config.base_url)
.build()
}
.map_err(|e| LlmError::RequestFailed {
provider: config.provider_id.clone(),
reason: format!("Failed to create DeepSeek client: {e}"),
})?;

let model = client.completion_model(&config.model);

tracing::debug!(
provider = %config.provider_id,
model = %config.model,
base_url = if config.base_url.is_empty() { "default" } else { &config.base_url },
"Using DeepSeek provider (preserves reasoning_content across turns)"
);

Ok(Arc::new(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High Severity

The dedicated clients are still wrapped in RigAdapter, which drops required reasoning/signature metadata before the next tool turn.

rig-core can now parse DeepSeek/Gemini/OpenRouter provider-specific state, but RigAdapter::extract_response() only converts AssistantContent::ToolCall into IronClaw ToolCall { id, name, arguments, reasoning } and ignores AssistantContent::Reasoning, ToolCall.signature, and ToolCall.additional_params. On the next iteration, dispatcher stores ChatMessage::assistant_with_tool_calls(...), and RigAdapter::convert_messages() rebuilds bare rig tool calls without those fields. A Gemini thinking model that returned a signed functionCall will therefore still be replayed without thought_signature; DeepSeek/OpenRouter reasoning artifacts are similarly lost, so the second turn can still fail with the same 400s this PR is intended to fix.

Please either extend IronClaw's message/tool-call types plus RigAdapter conversions to round-trip AssistantContent::Reasoning, ToolCall.signature, and OpenRouter additional params across the tool loop, or use provider-specific LlmProvider implementations. Add a caller-level integration test that simulates provider response with tool call → tool result → second provider request and asserts the echoed fields are present.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — confirmed by reading rig-core's deepseek::Message::try_from (src/providers/deepseek.rs:299-322) and gemini::Part::From<message::ToolCall> (src/providers/gemini/completion.rs:1012-1023). The dedicated clients only round-trip inside rig-core; bridging through RigAdapter was where reasoning + signatures were dropped.

Fixed end-to-end in 3a0b3a5:

  • ChatMessage carries reasoning: Option<String>; ToolCall carries signature: Option<String>; ToolCompletionResponse carries reasoning.
  • RigAdapter::extract_response captures AssistantContent::Reasoning and per-tool-call signature.
  • RigAdapter::convert_messages re-emits AssistantContent::Reasoning and pushes ToolCall::with_signature(…) when rebuilding rig messages on the next turn.
  • Plumbed via ChatMessage::with_reasoning through dispatcher (src/agent/dispatcher.rs:783-791), job worker (src/worker/job.rs:1736-1742), container worker (src/worker/container.rs:530-535), routine engine (src/agent/routine_engine.rs:1940-1948), and the orchestrator-worker proxy (src/worker/api.rs + src/orchestrator/api.rs).
  • Regression test reasoning_and_signature_round_trip_through_chat_message in src/llm/rig_adapter.rs simulates a 2-turn tool loop end-to-end: rig response with Reasoning + signed ToolCall → IronClaw ChatMessage → rebuilt rig message, asserting both fields survive.

OpenRouter's additional_params (per-tool-call reasoning_details) is not plumbed yet — left for follow-up #3327 since it requires a typed payload field rather than the simple string round-trip used for DeepSeek/Gemini.

RigAdapter::new(model, &config.model)
.with_unsupported_params(config.unsupported_params.clone()),
))
}

/// Build an OpenRouter provider via rig-core's dedicated OpenRouter client.
///
/// Routing through this client (rather than the generic OpenAI-compat path)
/// preserves OpenRouter's `reasoning`, `reasoning_details`, and per-tool-call
/// signatures across turns. The generic OpenAI client strips all of them, so
/// any thinking-mode model accessed via OpenRouter (Claude with thinking,
/// OpenAI o-series, DeepSeek-R1, Gemini 2.5+, Qwen QwQ, …) loses its
/// reasoning artifacts on the assistant message and the next request fails
/// the same way as #3201 / #3225.
fn create_openrouter_from_registry(
config: &RegistryProviderConfig,
) -> Result<Arc<dyn LlmProvider>, LlmError> {
use rig::providers::openrouter;

let api_key = config
.api_key
.as_ref()
.map(|k| k.expose_secret().to_string())
.ok_or_else(|| LlmError::AuthFailed {
provider: config.provider_id.clone(),
})?;

// OpenRouter attribution headers (`HTTP-Referer`, `X-Title`) and any other
// user-configured extras must follow the request through.
let mut extra_headers = reqwest::header::HeaderMap::new();
for (key, value) in &config.extra_headers {
let name = match reqwest::header::HeaderName::from_bytes(key.as_bytes()) {
Ok(n) => n,
Err(e) => {
tracing::warn!(header = %key, error = %e, "Skipping extra header: invalid name");
continue;
}
};
Comment on lines +517 to +529
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

False positive — http::HeaderName::from_bytes normalizes input (see http-1.4.0/src/header/name.rs:1115: "This function normalizes the input"); uppercase ASCII parses fine and is lowercased internally via the HEADER_CHARS table. HTTP-Referer and X-Title round-trip without warnings. Added a clarifying comment in 3a0b3a5 so the warn block doesn't read as suspect.

let val = match reqwest::header::HeaderValue::from_str(value) {
Ok(v) => v,
Err(e) => {
tracing::warn!(header = %key, error = %e, "Skipping extra header: invalid value");
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 3a0b3a5 — both header-warn sites (create_openrouter_from_registry and create_openai_compat_from_registry) now include provider = %config.provider_id so misconfigurations are easy to attribute in multi-provider deployments.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate of #discussion_r3199146864 — fixed in 3a0b3a5 (added provider_id to both header-warn sites).

continue;
}
};
extra_headers.insert(name, val);
}

let mut builder = openrouter::Client::builder().api_key(&api_key);
if !config.base_url.is_empty() {
builder = builder.base_url(&config.base_url);
}
if !extra_headers.is_empty() {
builder = builder.http_headers(extra_headers);
}

let client: openrouter::Client = builder.build().map_err(|e| LlmError::RequestFailed {
provider: config.provider_id.clone(),
reason: format!("Failed to create OpenRouter client: {e}"),
})?;

let model = client.completion_model(&config.model);

tracing::debug!(
provider = %config.provider_id,
model = %config.model,
base_url = if config.base_url.is_empty() { "default" } else { &config.base_url },
"Using OpenRouter provider (preserves reasoning + signatures across turns)"
);

Ok(Arc::new(
RigAdapter::new(model, &config.model)
.with_unsupported_params(config.unsupported_params.clone()),
))
}

/// Build a Gemini provider via rig-core's dedicated Gemini client.
///
/// Routing through this client (rather than the generic OpenAI-compat path
/// at `/v1beta/openai`) is what makes Gemini thinking-mode tool calling
/// work: rig-core's Gemini implementation round-trips `thought_signature`
/// on each `functionCall`. Without that round-trip the API rejects the
/// next turn with HTTP 400 ("Function call is missing a thought_signature
/// in functionCall parts"). See #3225.
///
/// This is API-key auth only (`GEMINI_API_KEY`). Users on Gemini OAuth go
/// through the separate `gemini_oauth` backend.
fn create_gemini_from_registry(
config: &RegistryProviderConfig,
) -> Result<Arc<dyn LlmProvider>, LlmError> {
use rig::providers::gemini;

let api_key = config
.api_key
.as_ref()
.map(|k| k.expose_secret().to_string())
.ok_or_else(|| LlmError::AuthFailed {
provider: config.provider_id.clone(),
})?;

let client: gemini::Client = if config.base_url.is_empty() {
gemini::Client::new(&api_key)
} else {
gemini::Client::builder()
.api_key(&api_key)
.base_url(&config.base_url)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High Severity

Native Gemini will use stale OpenAI-shim base URL overrides from existing installs.

Before this PR, the UI/config default for Gemini was the OpenAI-compatible shim https://generativelanguage.googleapis.com/v1beta/openai. Existing users may have this saved in llm_builtin_overrides[gemini].base_url; config resolution still prefers DB overrides over the new empty/native registry default. This factory then passes any non-empty URL directly to gemini::Client::builder(), whose native client appends paths like /v1beta/models/{model}:generateContent. Result: an upgraded user can send native Gemini requests to .../v1beta/openai/v1beta/models/..., breaking normal Gemini calls instead of using the native default endpoint.

Please detect/migrate/ignore the old Gemini OpenAI-shim URL when ProviderProtocol::Gemini is selected, or otherwise clear stale persisted overrides. Add a regression test for resolving a saved https://generativelanguage.googleapis.com/v1beta/openai override after this protocol switch.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 3a0b3a5. New sanitize_gemini_base_url in src/llm/mod.rs discards any persisted …/v1beta/openai (or …/v1/openai) shim URL when ProviderProtocol::Gemini is selected, with a warn! log telling the operator how to clear the stale override. Custom proxies and region endpoints pass through unchanged.

Regression: sanitize_gemini_base_url_strips_legacy_openai_shim (covers exact match, trailing slash, mixed case, and the /v1/openai variant) plus sanitize_gemini_base_url_passes_through_empty and sanitize_gemini_base_url_preserves_custom_endpoints.

.build()
}
.map_err(|e| LlmError::RequestFailed {
provider: config.provider_id.clone(),
reason: format!("Failed to create Gemini client: {e}"),
})?;

let model = client.completion_model(&config.model);

tracing::debug!(
provider = %config.provider_id,
model = %config.model,
base_url = if config.base_url.is_empty() { "default" } else { &config.base_url },
"Using Gemini provider (preserves thought_signature across turns)"
);

Ok(Arc::new(
RigAdapter::new(model, &config.model)
.with_unsupported_params(config.unsupported_params.clone()),
))
}

/// Create an OpenAI Codex provider with OAuth authentication.
///
/// This is async because it needs to ensure authentication before
Expand Down
57 changes: 57 additions & 0 deletions src/llm/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ pub enum ProviderProtocol {
Ollama,
/// GitHub Copilot API (OpenAI-compatible with token exchange).
GithubCopilot,
/// DeepSeek API. Routes through rig-core's dedicated DeepSeek client,
/// which round-trips `reasoning_content` for thinking-mode models —
/// the generic OpenAI client strips it. (#3201)
DeepSeek,
/// Google Gemini native API. Routes through rig-core's dedicated Gemini
/// client, which round-trips `thought_signature` on tool calls —
/// the OpenAI-compat shim strips it. (#3225)
Gemini,
/// OpenRouter (multi-model gateway). Routes through rig-core's dedicated
/// OpenRouter client, which round-trips `reasoning`, `reasoning_details`
/// (Summary / Encrypted / Text), and per-tool-call signatures —
/// the generic OpenAI client strips all of them, breaking thinking-mode
/// tool calling on every reasoning model OpenRouter exposes (Claude with
/// thinking, OpenAI o-series, DeepSeek-R1, Gemini 2.5+, Qwen QwQ, …).
OpenRouter,
}

/// How the setup wizard should collect credentials for this provider.
Expand Down Expand Up @@ -478,6 +493,48 @@ mod tests {
}
}

/// Regression for #3201 / #3225 and the OpenRouter generalisation:
/// providers whose APIs return reasoning artifacts (DeepSeek's
/// `reasoning_content`, Gemini's `thought_signature`, OpenRouter's
/// `reasoning_details` + signatures) must NOT use the generic
/// `OpenAiCompletions` protocol. The OpenAI-compat path goes through
/// rig-core's OpenAI client, which strips those fields, breaking
/// multi-turn tool calling for every thinking-mode model these
/// providers expose. They must route through the dedicated rig-core
/// clients which round-trip the artifacts on the next request.
#[test]
fn reasoning_aware_providers_use_dedicated_protocol_not_openai_compat() {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description already references the actual test name (reasoning_aware_providers_use_dedicated_protocol_not_openai_compat); the bot may have read a stale revision. The body has been refreshed in 3a0b3a5 along with the new round-trip regression tests.

let providers: Vec<ProviderDefinition> =
serde_json::from_str(include_str!("../../providers.json")).unwrap();
let by_id = |id: &str| providers.iter().find(|p| p.id == id).cloned();

let deepseek = by_id("deepseek").expect("deepseek entry must exist");
assert_eq!(
deepseek.protocol,
ProviderProtocol::DeepSeek,
"deepseek must use DeepSeek protocol — OpenAiCompletions strips \
reasoning_content and breaks thinking-mode tool calling (#3201)",
);

let gemini = by_id("gemini").expect("gemini entry must exist");
assert_eq!(
gemini.protocol,
ProviderProtocol::Gemini,
"gemini must use Gemini protocol — OpenAiCompletions strips \
thought_signature and breaks tool calling on thinking models (#3225)",
);

let openrouter = by_id("openrouter").expect("openrouter entry must exist");
assert_eq!(
openrouter.protocol,
ProviderProtocol::OpenRouter,
"openrouter must use OpenRouter protocol — OpenAiCompletions \
strips reasoning_details and tool-call signatures, breaking \
every thinking-mode model OpenRouter exposes (Claude with \
thinking, OpenAI o-series, DeepSeek-R1, Gemini 2.5+, Qwen QwQ)",
);
}

#[test]
fn test_openai_compatible_providers_have_base_url() {
let providers: Vec<ProviderDefinition> =
Expand Down
Loading