Merge pull request #2 from florianbuetow/feature-timeout-config

feat: add configurable request timeout for LLM calls
author: Paul Bütow <1224732+snonux@users.noreply.github.com> 2026-02-02 20:55:36 +0200
committer: GitHub <noreply@github.com> 2026-02-02 20:55:36 +0200
commit: da01d65da337cc2f6c99d8236140f8fb45c6bd5e (patch)
tree: 4887ab977b8a92165e180dd3d3c7323df7e6172b
parent: 7194696eb8c4c5bd50f69df96e9a6b87cec1f049 (diff)
parent: 791cd282ec143c9bbb44eba3b2271f1573272ba5 (diff)
13 files changed, 128 insertions, 12 deletions
diff --git a/PR.md b/PR.md
new file mode 100644
index 0000000..50a5393
--- /dev/null
+++ b/PR.md
@@ -0,0 +1,65 @@
+# Add configurable request timeout for LLM calls
+
+## Motivation
+
+When using locally-hosted LLMs (via LM Studio, Ollama, or other OpenAI-compatible servers), the default 30-second HTTP timeout is often insufficient. Local models, especially larger ones like Gemma 3 27B, can take significantly longer to generate responses, resulting in "context deadline exceeded" errors.
+
+This change allows users to configure a longer timeout to accommodate slower local inference.
+
+## Changes
+
+### New configuration option
+
+Added `request_timeout` (in seconds) to the `[general]` section:
+
+```toml
+[general]
+request_timeout = 120  # seconds, default 30
+```
+
+Or via environment variable:
+```sh
+export HEXAI_REQUEST_TIMEOUT=120
+```
+
+### Implementation
+
+- Added `RequestTimeout` field to config structs with default of 30 seconds
+- Each LLM client constructor now has a `*WithTimeout` variant that accepts the timeout
+- Original constructors remain unchanged (delegate to `*WithTimeout` with default)
+- `NewFromConfig` passes the configured timeout to clients
+
+### Files modified
+
+- `internal/appconfig/config.go` - Config field, parsing, merge logic, env var support
+- `internal/llm/provider.go` - Added `RequestTimeout` to `Config`, calls `*WithTimeout` constructors
+- `internal/llm/openai.go` - Added `newOpenAIWithTimeout`
+- `internal/llm/ollama.go` - Added `newOllamaWithTimeout`
+- `internal/llm/openrouter.go` - Added `newOpenRouterWithTimeout`
+- `internal/llm/copilot.go` - Added `newCopilotWithTimeout`
+- `internal/llm/anthropic.go` - Added `newAnthropicWithTimeout`
+- `internal/hexailsp/run.go` - Pass `RequestTimeout` to `llm.Config`
+- `internal/llmutils/client.go` - Pass `RequestTimeout` to `llm.Config`
+- `internal/lsp/server.go` - Pass `RequestTimeout` to `llm.Config`
+- `config.toml.example` - Documented option
+- `docs/configuration.md` - Added `HEXAI_REQUEST_TIMEOUT` to env vars list
+
+### Test impact
+
+No test files were modified. The original constructor signatures are preserved, so all existing tests continue to work unchanged.
+
+## Testing
+
+```sh
+# Set timeout in config
+[general]
+request_timeout = 120
+
+# Test with local LLM
+cat somefile.go | hexai "review this code"
+```
+
+All existing tests pass:
+```sh
+HEXAI_TEST_SKIP_NET=1 go test ./...
+```
diff --git a/config.toml.example b/config.toml.example
index ae8110a..cc34e04 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -3,6 +3,7 @@
 [general]
 max_tokens = 4000
 max_context_tokens = 4000
+request_timeout = 30                  # LLM request timeout in seconds
 # context_mode controls how much of the current document is sent as extra context:
 # - minimal: no additional context beyond the request payload.
 # - window: include a sliding window of ~context_window_lines around the cursor.
diff --git a/docs/configuration.md b/docs/configuration.md
index 50dfbcb..54ac85f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -12,7 +12,7 @@ Environment overrides
 - All options can be overridden by environment variables prefixed with `HEXAI_`.
 - Env values take precedence over the config file.
 - Examples:
-  - `HEXAI_PROVIDER`, `HEXAI_MAX_TOKENS`, `HEXAI_CONTEXT_MODE`, `HEXAI_CONTEXT_WINDOW_LINES`, `HEXAI_MAX_CONTEXT_TOKENS`, `HEXAI_LOG_PREVIEW_LIMIT`
+  - `HEXAI_PROVIDER`, `HEXAI_MAX_TOKENS`, `HEXAI_CONTEXT_MODE`, `HEXAI_CONTEXT_WINDOW_LINES`, `HEXAI_MAX_CONTEXT_TOKENS`, `HEXAI_LOG_PREVIEW_LIMIT`, `HEXAI_REQUEST_TIMEOUT`
   - `HEXAI_CODING_TEMPERATURE`
   - `HEXAI_COMPLETION_DEBOUNCE_MS`, `HEXAI_COMPLETION_THROTTLE_MS`
   - `HEXAI_TRIGGER_CHARACTERS` (comma-separated, e.g., `".,:,_ , "`)
diff --git a/internal/appconfig/config.go b/internal/appconfig/config.go
index 78237be..b17c5d4 100644
--- a/internal/appconfig/config.go
+++ b/internal/appconfig/config.go
@@ -27,6 +27,7 @@ type App struct {
 	ContextWindowLines int    `json:"context_window_lines" toml:"context_window_lines"`
 	MaxContextTokens   int    `json:"max_context_tokens" toml:"max_context_tokens"`
 	LogPreviewLimit    int    `json:"log_preview_limit" toml:"log_preview_limit"`
+	RequestTimeout     int    `json:"request_timeout" toml:"request_timeout"`
 	// Single knob for LSP requests; if set, overrides hardcoded temps in LSP.
 	CodingTemperature *float64 `json:"coding_temperature" toml:"coding_temperature"`
 	// Minimum identifier characters required for manual (TriggerKind=1) invoke
@@ -141,6 +142,7 @@ func newDefaultConfig() App {
 		ContextWindowLines:    120,
 		MaxContextTokens:      4000,
 		LogPreviewLimit:       100,
+		RequestTimeout:        30,
 		CodingTemperature:     &t,
 		OpenAITemperature:     &t,
 		OllamaTemperature:     &t,
@@ -256,6 +258,7 @@ type sectionGeneral struct {
 	ContextWindowLines int      `toml:"context_window_lines"`
 	MaxContextTokens   int      `toml:"max_context_tokens"`
 	CodingTemperature  *float64 `toml:"coding_temperature"`
+	RequestTimeout     int      `toml:"request_timeout"`
 }
 
 type sectionLogging struct {
@@ -419,6 +422,7 @@ func (fc *fileConfig) toApp() App {
 			ContextWindowLines: fc.General.ContextWindowLines,
 			MaxContextTokens:   fc.General.MaxContextTokens,
 			CodingTemperature:  fc.General.CodingTemperature,
+			RequestTimeout:     fc.General.RequestTimeout,
 		}
 		out.mergeBasics(&tmp)
 	}
@@ -883,6 +887,9 @@ func (a *App) mergeBasics(other *App) {
 	if other.LogPreviewLimit >= 0 {
 		a.LogPreviewLimit = other.LogPreviewLimit
 	}
+	if other.RequestTimeout > 0 {
+		a.RequestTimeout = other.RequestTimeout
+	}
 	if other.CodingTemperature != nil { // allow explicit 0.0
 		a.CodingTemperature = other.CodingTemperature
 	}
@@ -1185,6 +1192,10 @@ func loadFromEnv(logger *log.Logger) *App {
 		out.LogPreviewLimit = n
 		any = true
 	}
+	if n, ok := parseInt("HEXAI_REQUEST_TIMEOUT"); ok {
+		out.RequestTimeout = n
+		any = true
+	}
 	if n, ok := parseInt("HEXAI_MANUAL_INVOKE_MIN_PREFIX"); ok {
 		out.ManualInvokeMinPrefix = n
 		any = true
diff --git a/internal/hexailsp/run.go b/internal/hexailsp/run.go
index 1afa70a..47ed648 100644
--- a/internal/hexailsp/run.go
+++ b/internal/hexailsp/run.go
@@ -109,6 +109,7 @@ func buildClientIfNil(cfg appconfig.App, client llm.Client) llm.Client {
 	}
 	llmCfg := llm.Config{
 		Provider:              cfg.Provider,
+		RequestTimeout:        cfg.RequestTimeout,
 		OpenAIBaseURL:         cfg.OpenAIBaseURL,
 		OpenAIModel:           cfg.OpenAIModel,
 		OpenAITemperature:     cfg.OpenAITemperature,
diff --git a/internal/llm/anthropic.go b/internal/llm/anthropic.go
index c0cdc9a..a6c1454 100644
--- a/internal/llm/anthropic.go
+++ b/internal/llm/anthropic.go
@@ -90,14 +90,21 @@ var (
 // newAnthropic constructs an Anthropic client using explicit configuration values.
 // The apiKey may be empty; calls will fail until a valid key is supplied.
 func newAnthropic(baseURL, model, apiKey string, defaultTemp *float64) Client {
+	return newAnthropicWithTimeout(baseURL, model, apiKey, defaultTemp, 0)
+}
+
+func newAnthropicWithTimeout(baseURL, model, apiKey string, defaultTemp *float64, timeoutSec int) Client {
 	if strings.TrimSpace(baseURL) == "" {
 		baseURL = "https://api.anthropic.com/v1"
 	}
 	if strings.TrimSpace(model) == "" {
 		model = "claude-3-5-sonnet-20241022"
 	}
+	if timeoutSec <= 0 {
+		timeoutSec = 30
+	}
 	return anthropicClient{
-		httpClient:         &http.Client{Timeout: 30 * time.Second},
+		httpClient:         &http.Client{Timeout: time.Duration(timeoutSec) * time.Second},
 		apiKey:             apiKey,
 		baseURL:            baseURL,
 		defaultModel:       model,
diff --git a/internal/llm/copilot.go b/internal/llm/copilot.go
index b439ed3..43419ea 100644
--- a/internal/llm/copilot.go
+++ b/internal/llm/copilot.go
@@ -64,6 +64,10 @@ type copilotChatResponse struct {
 
 // Constructor (kept among the first functions by convention)
 func newCopilot(baseURL, model, apiKey string, defaultTemp *float64) Client {
+	return newCopilotWithTimeout(baseURL, model, apiKey, defaultTemp, 0)
+}
+
+func newCopilotWithTimeout(baseURL, model, apiKey string, defaultTemp *float64, timeoutSec int) Client {
 	if strings.TrimSpace(baseURL) == "" {
 		baseURL = "https://api.githubcopilot.com"
 	}
@@ -72,8 +76,11 @@ func newCopilot(baseURL, model, apiKey string, defaultTemp *float64) Client {
 		// Default to a broadly available, cost-effective option.
 		model = "gpt-4o-mini"
 	}
+	if timeoutSec <= 0 {
+		timeoutSec = 30
+	}
 	return copilotClient{
-		httpClient:         &http.Client{Timeout: 30 * time.Second},
+		httpClient:         &http.Client{Timeout: time.Duration(timeoutSec) * time.Second},
 		apiKey:             apiKey,
 		baseURL:            strings.TrimRight(baseURL, "/"),
 		defaultModel:       model,
diff --git a/internal/llm/ollama.go b/internal/llm/ollama.go
index f355166..a22dd7b 100644
--- a/internal/llm/ollama.go
+++ b/internal/llm/ollama.go
@@ -42,14 +42,21 @@ type ollamaChatResponse struct {
 
 // Constructor (kept among the first functions by convention)
 func newOllama(baseURL, model string, defaultTemp *float64) Client {
+	return newOllamaWithTimeout(baseURL, model, defaultTemp, 0)
+}
+
+func newOllamaWithTimeout(baseURL, model string, defaultTemp *float64, timeoutSec int) Client {
 	if strings.TrimSpace(baseURL) == "" {
 		baseURL = "http://localhost:11434"
 	}
 	if strings.TrimSpace(model) == "" {
 		model = "qwen3-coder:30b-a3b-q4_K_M"
 	}
+	if timeoutSec <= 0 {
+		timeoutSec = 30
+	}
 	return ollamaClient{
-		httpClient:         &http.Client{Timeout: 30 * time.Second},
+		httpClient:         &http.Client{Timeout: time.Duration(timeoutSec) * time.Second},
 		baseURL:            strings.TrimRight(baseURL, "/"),
 		defaultModel:       model,
 		chatLogger:         logging.NewChatLogger("ollama"),
diff --git a/internal/llm/openai.go b/internal/llm/openai.go
index b97111d..6bc3a7c 100644
--- a/internal/llm/openai.go
+++ b/internal/llm/openai.go
@@ -77,14 +77,21 @@ type oaStreamChunk struct {
 // newOpenAI constructs an OpenAI client using explicit configuration values.
 // The apiKey may be empty; calls will fail until a valid key is supplied.
 func newOpenAI(baseURL, model, apiKey string, defaultTemp *float64) Client {
+	return newOpenAIWithTimeout(baseURL, model, apiKey, defaultTemp, 0)
+}
+
+func newOpenAIWithTimeout(baseURL, model, apiKey string, defaultTemp *float64, timeoutSec int) Client {
 	if strings.TrimSpace(baseURL) == "" {
 		baseURL = "https://api.openai.com/v1"
 	}
 	if strings.TrimSpace(model) == "" {
 		model = "gpt-4.1"
 	}
+	if timeoutSec <= 0 {
+		timeoutSec = 30
+	}
 	return openAIClient{
-		httpClient:         &http.Client{Timeout: 30 * time.Second},
+		httpClient:         &http.Client{Timeout: time.Duration(timeoutSec) * time.Second},
 		apiKey:             apiKey,
 		baseURL:            baseURL,
 		defaultModel:       model,
diff --git a/internal/llm/openrouter.go b/internal/llm/openrouter.go
index 4aae398..21e3102 100644
--- a/internal/llm/openrouter.go
+++ b/internal/llm/openrouter.go
@@ -23,14 +23,21 @@ type openRouterClient struct {
 }
 
 func newOpenRouter(baseURL, model, apiKey string, defaultTemp *float64) Client {
+	return newOpenRouterWithTimeout(baseURL, model, apiKey, defaultTemp, 0)
+}
+
+func newOpenRouterWithTimeout(baseURL, model, apiKey string, defaultTemp *float64, timeoutSec int) Client {
 	if strings.TrimSpace(baseURL) == "" {
 		baseURL = "https://openrouter.ai/api/v1"
 	}
 	if strings.TrimSpace(model) == "" {
 		model = "openrouter/auto"
 	}
+	if timeoutSec <= 0 {
+		timeoutSec = 30
+	}
 	return openRouterClient{
-		httpClient:         &http.Client{Timeout: 30 * time.Second},
+		httpClient:         &http.Client{Timeout: time.Duration(timeoutSec) * time.Second},
 		apiKey:             apiKey,
 		baseURL:            baseURL,
 		defaultModel:       model,
diff --git a/internal/llm/provider.go b/internal/llm/provider.go
index ae840b0..297f1f3 100644
--- a/internal/llm/provider.go
+++ b/internal/llm/provider.go
@@ -64,7 +64,8 @@ func WithStop(stop ...string) RequestOption {
 
 // Config defines provider configuration read from the Hexai config file.
 type Config struct {
-	Provider string
+	Provider       string
+	RequestTimeout int // seconds; 0 means use default (30s)
 	// OpenAI options
 	OpenAIBaseURL     string
 	OpenAIModel       string
@@ -119,7 +120,7 @@ func NewFromConfig(cfg Config, openAIAPIKey, openRouterAPIKey, copilotAPIKey, an
 			v := 0.2
 			cfg.OpenAITemperature = &v
 		}
-		return newOpenAI(cfg.OpenAIBaseURL, cfg.OpenAIModel, openAIAPIKey, cfg.OpenAITemperature), nil
+		return newOpenAIWithTimeout(cfg.OpenAIBaseURL, cfg.OpenAIModel, openAIAPIKey, cfg.OpenAITemperature, cfg.RequestTimeout), nil
 	case "openrouter":
 		if strings.TrimSpace(openRouterAPIKey) == "" {
 			return nil, errors.New("missing OPENROUTER_API_KEY for provider openrouter")
@@ -128,13 +129,13 @@ func NewFromConfig(cfg Config, openAIAPIKey, openRouterAPIKey, copilotAPIKey, an
 			t := 0.2
 			cfg.OpenRouterTemperature = &t
 		}
-		return newOpenRouter(cfg.OpenRouterBaseURL, cfg.OpenRouterModel, openRouterAPIKey, cfg.OpenRouterTemperature), nil
+		return newOpenRouterWithTimeout(cfg.OpenRouterBaseURL, cfg.OpenRouterModel, openRouterAPIKey, cfg.OpenRouterTemperature, cfg.RequestTimeout), nil
 	case "ollama":
 		if cfg.OllamaTemperature == nil {
 			t := 0.2
 			cfg.OllamaTemperature = &t
 		}
-		return newOllama(cfg.OllamaBaseURL, cfg.OllamaModel, cfg.OllamaTemperature), nil
+		return newOllamaWithTimeout(cfg.OllamaBaseURL, cfg.OllamaModel, cfg.OllamaTemperature, cfg.RequestTimeout), nil
 	case "copilot":
 		if strings.TrimSpace(copilotAPIKey) == "" {
 			return nil, errors.New("missing COPILOT_API_KEY for provider copilot")
@@ -143,7 +144,7 @@ func NewFromConfig(cfg Config, openAIAPIKey, openRouterAPIKey, copilotAPIKey, an
 			t := 0.2
 			cfg.CopilotTemperature = &t
 		}
-		return newCopilot(cfg.CopilotBaseURL, cfg.CopilotModel, copilotAPIKey, cfg.CopilotTemperature), nil
+		return newCopilotWithTimeout(cfg.CopilotBaseURL, cfg.CopilotModel, copilotAPIKey, cfg.CopilotTemperature, cfg.RequestTimeout), nil
 	case "anthropic":
 		if strings.TrimSpace(anthropicAPIKey) == "" {
 			return nil, errors.New("missing ANTHROPIC_API_KEY for provider anthropic")
@@ -152,7 +153,7 @@ func NewFromConfig(cfg Config, openAIAPIKey, openRouterAPIKey, copilotAPIKey, an
 			t := 0.2
 			cfg.AnthropicTemperature = &t
 		}
-		return newAnthropic(cfg.AnthropicBaseURL, cfg.AnthropicModel, anthropicAPIKey, cfg.AnthropicTemperature), nil
+		return newAnthropicWithTimeout(cfg.AnthropicBaseURL, cfg.AnthropicModel, anthropicAPIKey, cfg.AnthropicTemperature, cfg.RequestTimeout), nil
 	default:
 		return nil, errors.New("unknown LLM provider: " + p)
 	}
diff --git a/internal/llmutils/client.go b/internal/llmutils/client.go
index 53fca9c..de65935 100644
--- a/internal/llmutils/client.go
+++ b/internal/llmutils/client.go
@@ -12,6 +12,7 @@ import (
 func NewClientFromApp(cfg appconfig.App) (llm.Client, error) {
 	llmCfg := llm.Config{
 		Provider:              cfg.Provider,
+		RequestTimeout:        cfg.RequestTimeout,
 		OpenAIBaseURL:         cfg.OpenAIBaseURL,
 		OpenAIModel:           cfg.OpenAIModel,
 		OpenAITemperature:     cfg.OpenAITemperature,
diff --git a/internal/lsp/server.go b/internal/lsp/server.go
index 127b089..bbee64f 100644
--- a/internal/lsp/server.go
+++ b/internal/lsp/server.go
@@ -220,6 +220,7 @@ func (s *Server) currentLLMClient() llm.Client {
 func newClientForProvider(cfg appconfig.App, provider string) (llm.Client, error) {
 	llmCfg := llm.Config{
 		Provider:              provider,
+		RequestTimeout:        cfg.RequestTimeout,
 		OpenAIBaseURL:         cfg.OpenAIBaseURL,
 		OpenAIModel:           cfg.OpenAIModel,
 		OpenAITemperature:     cfg.OpenAITemperature,
author	Paul Bütow <1224732+snonux@users.noreply.github.com>	2026-02-02 20:55:36 +0200
committer	GitHub <noreply@github.com>	2026-02-02 20:55:36 +0200
commit	da01d65da337cc2f6c99d8236140f8fb45c6bd5e (patch)
tree	4887ab977b8a92165e180dd3d3c7323df7e6172b
parent	7194696eb8c4c5bd50f69df96e9a6b87cec1f049 (diff)
parent	791cd282ec143c9bbb44eba3b2271f1573272ba5 (diff)