feat: configurable LLM request timeout via LLM_REQUEST_TIMEOUT_SECS (#615) (#630)

zmanian · claude · web-flow · commit 200aed16cdb9 · 2026-03-08T08:30:52.000Z
Add LLM_REQUEST_TIMEOUT_SECS env var (default: 120) to configure the
HTTP request timeout for LLM API calls. Primarily useful for local
models (Ollama, vLLM, LM Studio) that need more time for prompt
evaluation on consumer hardware.

The timeout is applied to the NearAI provider's HTTP client. Other
providers (Anthropic, OpenAI) use rig-core's default client.

- Add request_timeout_secs field to LlmConfig
- Thread timeout through create_llm_provider -&gt; NearAiChatProvider
- Add NearAiChatProvider::new_with_timeout constructor
- Add .env.example documentation
- 2 regression tests for default and custom timeout values

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/.env.example b/.env.example
@@ -5,6 +5,7 @@ DATABASE_POOL_SIZE=10
 # LLM Provider
 # LLM_BACKEND=nearai           # default
 # Possible values: nearai, ollama, openai_compatible, openai, anthropic, tinfoil
+# LLM_REQUEST_TIMEOUT_SECS=120  # Increase for local LLMs (Ollama, vLLM, LM Studio)
 
 # === Anthropic Direct ===
 # Two auth modes:
diff --git a/src/config/llm.rs b/src/config/llm.rs
@@ -103,6 +103,10 @@ pub struct LlmConfig {
     /// Resolved provider config for registry-based providers.
     /// `None` when backend is "nearai".
     pub provider: Option<RegistryProviderConfig>,
+    /// HTTP request timeout in seconds for LLM API calls.
+    /// Default: 120. Increase for local LLMs (Ollama, vLLM, LM Studio) that
+    /// need more time for prompt evaluation on consumer hardware.
+    pub request_timeout_secs: u64,
 }
 
 /// NEAR AI configuration.
@@ -165,6 +169,7 @@ impl LlmConfig {
                 smart_routing_cascade: false,
             },
             provider: None,
+            request_timeout_secs: 120,
         }
     }
 
@@ -254,6 +259,8 @@ impl LlmConfig {
             )?)
         };
 
+        let request_timeout_secs = parse_optional_env("LLM_REQUEST_TIMEOUT_SECS", 120)?;
+
         Ok(Self {
             backend: if is_nearai {
                 "nearai".to_string()
@@ -265,6 +272,7 @@ impl LlmConfig {
             session,
             nearai,
             provider,
+            request_timeout_secs,
         })
     }
 
@@ -1016,4 +1024,30 @@ mod tests {
             assert_eq!(parsed, variant, "round-trip failed for {s}");
         }
     }
+
+    #[test]
+    fn test_request_timeout_defaults_to_120() {
+        let _guard = ENV_MUTEX.lock().expect("env mutex poisoned");
+        // SAFETY: Under ENV_MUTEX.
+        unsafe {
+            std::env::remove_var("LLM_REQUEST_TIMEOUT_SECS");
+        }
+        let config = LlmConfig::resolve(&Settings::default()).expect("resolve");
+        assert_eq!(config.request_timeout_secs, 120);
+    }
+
+    #[test]
+    fn test_request_timeout_configurable() {
+        let _guard = ENV_MUTEX.lock().expect("env mutex poisoned");
+        // SAFETY: Under ENV_MUTEX.
+        unsafe {
+            std::env::set_var("LLM_REQUEST_TIMEOUT_SECS", "300");
+        }
+        let config = LlmConfig::resolve(&Settings::default()).expect("resolve");
+        assert_eq!(config.request_timeout_secs, 300);
+        // SAFETY: Cleanup
+        unsafe {
+            std::env::remove_var("LLM_REQUEST_TIMEOUT_SECS");
+        }
+    }
 }
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
@@ -58,8 +58,10 @@ pub fn create_llm_provider(
     config: &LlmConfig,
     session: Arc<SessionManager>,
 ) -> Result<Arc<dyn LlmProvider>, LlmError> {
+    let timeout = config.request_timeout_secs;
+
     if config.backend == "nearai" || config.backend == "near_ai" || config.backend == "near" {
-        return create_llm_provider_with_config(&config.nearai, session);
+        return create_llm_provider_with_config(&config.nearai, session, timeout);
     }
 
     let reg_config = config
@@ -79,6 +81,7 @@ pub fn create_llm_provider(
 pub fn create_llm_provider_with_config(
     config: &NearAiConfig,
     session: Arc<SessionManager>,
+    request_timeout_secs: u64,
 ) -> Result<Arc<dyn LlmProvider>, LlmError> {
     let auth_mode = if config.api_key.is_some() {
         "API key"
@@ -89,9 +92,14 @@ pub fn create_llm_provider_with_config(
         model = %config.model,
         base_url = %config.base_url,
         auth = auth_mode,
+        timeout_secs = request_timeout_secs,
         "Using NEAR AI (Chat Completions API)"
     );
-    Ok(Arc::new(NearAiChatProvider::new(config.clone(), session)?))
+    Ok(Arc::new(NearAiChatProvider::new_with_timeout(
+        config.clone(),
+        session,
+        request_timeout_secs,
+    )?))
 }
 
 /// Create a provider from a registry-resolved config.
@@ -365,7 +373,11 @@ pub fn build_provider_chain(
     let llm: Arc<dyn LlmProvider> = if let Some(ref cheap_model) = config.nearai.cheap_model {
         let mut cheap_config = config.nearai.clone();
         cheap_config.model = cheap_model.clone();
-        let cheap = create_llm_provider_with_config(&cheap_config, session.clone())?;
+        let cheap = create_llm_provider_with_config(
+            &cheap_config,
+            session.clone(),
+            config.request_timeout_secs,
+        )?;
         let cheap: Arc<dyn LlmProvider> = if retry_config.max_retries > 0 {
             Arc::new(RetryProvider::new(cheap, retry_config.clone()))
         } else {
@@ -397,7 +409,11 @@ pub fn build_provider_chain(
         }
         let mut fallback_config = config.nearai.clone();
         fallback_config.model = fallback_model.clone();
-        let fallback = create_llm_provider_with_config(&fallback_config, session.clone())?;
+        let fallback = create_llm_provider_with_config(
+            &fallback_config,
+            session.clone(),
+            config.request_timeout_secs,
+        )?;
         tracing::info!(
             primary = %llm.model_name(),
             fallback = %fallback.model_name(),
@@ -503,6 +519,7 @@ mod tests {
             session: SessionConfig::default(),
             nearai: test_nearai_config(),
             provider: None,
+            request_timeout_secs: 120,
         }
     }
 
diff --git a/src/llm/nearai_chat.rs b/src/llm/nearai_chat.rs
@@ -58,17 +58,28 @@ impl NearAiChatProvider {
     /// By default this enables tool-message flattening for compatibility with
     /// providers that reject `role: "tool"` messages.
     pub fn new(config: NearAiConfig, session: Arc<SessionManager>) -> Result<Self, LlmError> {
-        Self::new_with_flatten(config, session, true)
+        Self::new_with_options(config, session, true, 120)
     }
 
-    /// Create a chat completions provider with configurable tool-message flattening.
-    pub fn new_with_flatten(
+    /// Create a new provider with a custom request timeout.
+    pub fn new_with_timeout(
+        config: NearAiConfig,
+        session: Arc<SessionManager>,
+        request_timeout_secs: u64,
+    ) -> Result<Self, LlmError> {
+        Self::new_with_options(config, session, true, request_timeout_secs)
+    }
+
+    /// Create a chat completions provider with configurable tool-message flattening
+    /// and request timeout.
+    pub fn new_with_options(
         config: NearAiConfig,
         session: Arc<SessionManager>,
         flatten_tool_messages: bool,
+        request_timeout_secs: u64,
     ) -> Result<Self, LlmError> {
         let client = Client::builder()
-            .timeout(std::time::Duration::from_secs(120))
+            .timeout(std::time::Duration::from_secs(request_timeout_secs))
             .build()
             .map_err(|e| LlmError::RequestFailed {
                 provider: "nearai_chat".to_string(),
diff --git a/src/setup/wizard.rs b/src/setup/wizard.rs
@@ -1491,6 +1491,7 @@ impl SetupWizard {
                 smart_routing_cascade: true,
             },
             provider: None,
+            request_timeout_secs: 120,
         };
 
         match create_llm_provider(&config, session) {