From 017b355d6632ef9fbf162fa741e7dde366b2b2db Mon Sep 17 00:00:00 2001
From: Paul Buetow <paul@buetow.org>
Date: Sun, 26 Apr 2026 09:05:36 +0300
Subject: feat: default to Ollama Cloud (kimi-k2.6) when no provider configured

Switches the in-code defaults so that hexai talks to Ollama Cloud
(https://ollama.com) with model kimi-k2.6 when no provider is configured,
instead of OpenAI. The example config, README, and configuration guide
all reflect the new recommended setup; previous OpenAI / local-Ollama
options are still documented as alternatives.

Tests that depended on the implicit "openai" default now pin the
provider explicitly so they continue to exercise the OpenAI / gpt-5
code paths they were designed to cover.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                             |  2 +-
 config.toml.example                   | 23 ++++++++++++-----------
 docs/configuration.md                 | 12 +++++++-----
 internal/hexaicli/cache_test.go       |  3 +++
 internal/hexaicli/run_test.go         |  4 ++++
 internal/hexailsp/run_test.go         | 10 +++++++++-
 internal/llm/ollama.go                |  7 +++++--
 internal/llm/provider.go              |  2 +-
 internal/llmutils/client.go           |  7 ++++---
 internal/llmutils/client_test.go      |  4 ++--
 internal/lsp/llm_request_opts_test.go |  3 +++
 11 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 9da94e1..e1d6699 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ It has got improved capabilities for Go code understanding (for example, create
   - Auto-detects Cursor, Amp, Aider (WIP), and other agents
   - OpenAI Codex CLI and Claude Code CLI have native external-editor support via `Ctrl+G`
   - Config-driven: add new agents via `[tmux_edit]` in config.toml
-* Support for OpenAI, OpenRouter, Anthropic, and Ollama
+* Support for Ollama (local + Ollama Cloud), OpenAI, OpenRouter, and Anthropic — Ollama Cloud (`kimi-k2.6` at `https://ollama.com`) is the default
 
 > **Note on hexai-mcp-server:** This component is currently experimental and not actively maintained. The author manages prompts through slash commands and meta-commands in the hexai agent system, making the MCP server redundant for its original purpose. The code is preserved for potential future enhancements with different functionality beyond prompt management. See the [MCP documentation](docs/mcp-setup.md) for reference only.
 
diff --git a/config.toml.example b/config.toml.example
index 26e817a..4f7d26a 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -68,7 +68,18 @@ chat_prefixes = ["?", "!", ":", ";"]  # single-character items
 # temperature = 0.6
 
 [provider]
-name = "openai"                      # openai | openrouter | anthropic | ollama
+# Ollama Cloud (ollama.ai) is the recommended default. The API key is read
+# from HEXAI_OLLAMA_API_KEY (falling back to OLLAMA_API_KEY).
+name = "ollama"                      # ollama | openai | openrouter | anthropic
+
+[ollama]
+model       = "kimi-k2.6"
+base_url    = "https://ollama.com"
+# To run against a local Ollama server instead, point at it and pick a local
+# model. No API key is required when targeting localhost.
+#   model    = "qwen3-coder:30b-a3b-q4_K_M"
+#   base_url = "http://localhost:11434"
+temperature = 0.2
 
 [openai]
 model       = "gpt-4.1"
@@ -80,16 +91,6 @@ model       = "openrouter/auto"
 base_url    = "https://openrouter.ai/api/v1"
 temperature = 0.2
 
-[ollama]
-model       = "qwen3-coder:30b-a3b-q4_K_M"
-base_url    = "http://localhost:11434"
-# For Ollama Cloud (ollama.ai subscription), point at the cloud endpoint and
-# export an API key in the environment. The key is optional: leave it unset
-# for a local Ollama server.
-#   base_url = "https://ollama.com"
-#   export HEXAI_OLLAMA_API_KEY=...   # or OLLAMA_API_KEY
-temperature = 0.2
-
 [anthropic]
 model       = "claude-3-5-sonnet-20241022"
 base_url    = "https://api.anthropic.com/v1"
diff --git a/docs/configuration.md b/docs/configuration.md
index 06cc9fc..573d8e7 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -65,20 +65,22 @@ Runtime reloads
 
 API keys:
 
+- Ollama Cloud (recommended default): prefer `HEXAI_OLLAMA_API_KEY`, falling back to `OLLAMA_API_KEY`. The key is optional — leave it unset to talk to a local Ollama server.
 - OpenAI: prefer `HEXAI_OPENAI_API_KEY`, falling back to `OPENAI_API_KEY`.
 - OpenRouter: prefer `HEXAI_OPENROUTER_API_KEY`, falling back to `OPENROUTER_API_KEY`.
+- Anthropic: prefer `HEXAI_ANTHROPIC_API_KEY`, falling back to `ANTHROPIC_API_KEY`.
 
 Selecting a provider
 
-- Sectioned: set `[provider] name = "openai" | "openrouter" | "anthropic" | "ollama"`.
-- If omitted, Hexai defaults to `openai`.
+- Sectioned: set `[provider] name = "ollama" | "openai" | "openrouter" | "anthropic"`.
+- If omitted, Hexai defaults to `ollama` (Ollama Cloud at `https://ollama.com` with model `kimi-k2.6`).
 - Selecting `openrouter` uses https://openrouter.ai/api/v1 by default and automatically sends the required `HTTP-Referer` (`https://github.com/snonux/hexai`) and `X-Title` (`Hexai`) headers. Override the base URL via `[openrouter]` or environment variables when needed.
 
 Notes on Ollama:
 
-- Ensure the model is available locally (e.g., `ollama pull qwen3-coder:30b-a3b-q4_K_M`).
-- Alternatively, run Ollama in OpenAI‑compatible mode and use the OpenAI provider with
-  `openai_base_url` pointed at your local endpoint.
+- The default Ollama base URL is `https://ollama.com` (Ollama Cloud). Set `HEXAI_OLLAMA_API_KEY` (or `OLLAMA_API_KEY`) to authenticate.
+- To use a local Ollama server instead, set `[ollama] base_url = "http://localhost:11434"` and pick a locally pulled model (e.g. `ollama pull qwen3-coder:30b-a3b-q4_K_M`). No API key is required when targeting localhost.
+- Alternatively, run Ollama in OpenAI‑compatible mode and use the OpenAI provider with `openai_base_url` pointed at your local endpoint.
 
 Hexai Action (TUI) configuration
 
diff --git a/internal/hexaicli/cache_test.go b/internal/hexaicli/cache_test.go
index 5f00e7b..c9b83c6 100644
--- a/internal/hexaicli/cache_test.go
+++ b/internal/hexaicli/cache_test.go
@@ -75,6 +75,9 @@ func TestRun_UsesCachedResponseWithoutClientCall(t *testing.T) {
 	t.Chdir(t.TempDir())
 	t.Setenv("XDG_CONFIG_HOME", t.TempDir())
 	t.Setenv("XDG_CACHE_HOME", t.TempDir())
+	// This test asserts an "openai/gpt-4.1" cache-hit label, so pin the provider
+	// (the in-code default switched to ollama when no config is present).
+	t.Setenv("HEXAI_PROVIDER", "openai")
 
 	oldNew := newClientFromApp
 	defer func() { newClientFromApp = oldNew }()
diff --git a/internal/hexaicli/run_test.go b/internal/hexaicli/run_test.go
index 69e5d98..e2788e5 100644
--- a/internal/hexaicli/run_test.go
+++ b/internal/hexaicli/run_test.go
@@ -167,6 +167,10 @@ func TestPrintProviderInfo(t *testing.T) {
 }
 
 func TestRun_SingleProviderHeaderUsesStderr(t *testing.T) {
+	// This test asserts an "openai:gpt-4.1:" header, so pin the provider/model
+	// explicitly (the in-code default switched to ollama/kimi-k2.6).
+	t.Setenv("HEXAI_PROVIDER", "openai")
+	t.Setenv("HEXAI_OPENAI_MODEL", "gpt-4.1")
 	oldNew := newClientFromApp
 	defer func() { newClientFromApp = oldNew }()
 	newClientFromApp = func(_ appconfig.App) (llm.Client, error) {
diff --git a/internal/hexailsp/run_test.go b/internal/hexailsp/run_test.go
index badb27c..fa4d535 100644
--- a/internal/hexailsp/run_test.go
+++ b/internal/hexailsp/run_test.go
@@ -38,6 +38,11 @@ func TestRunWithFactory_UsesDefaultsAndCallsServer(t *testing.T) {
 	var stderr bytes.Buffer
 	logger := log.New(&stderr, "hexai-lsp-server ", 0)
 	cfg := appconfig.Load(nil) // defaults
+	// Pin provider to openai: the in-code default is now ollama, which would
+	// happily build a client without a key and short-circuit the missing-key
+	// assertion below. Load(nil) returns raw defaults and ignores env vars,
+	// so set the field directly on the struct.
+	cfg.Provider = "openai"
 	var gotOpts lsp.ServerOptions
 	factory := func(r io.Reader, w io.Writer, logger *log.Logger, opts lsp.ServerOptions) ServerRunner {
 		gotOpts = opts
@@ -75,7 +80,10 @@ func TestRunWithFactory_BuildsClientWhenKeysPresent(t *testing.T) {
 
 	var stderr bytes.Buffer
 	logger := log.New(&stderr, "hexai-lsp-server ", 0)
-	cfg := appconfig.Load(nil) // defaults, provider=openai by default
+	cfg := appconfig.Load(nil) // defaults
+	// Pin provider to openai (the in-code default is now ollama). Load(nil)
+	// returns raw defaults and ignores env vars, so set this on the struct.
+	cfg.Provider = "openai"
 	var got llm.Client
 	factory := func(r io.Reader, w io.Writer, logger *log.Logger, opts lsp.ServerOptions) ServerRunner {
 		got = opts.Client
diff --git a/internal/llm/ollama.go b/internal/llm/ollama.go
index 0916c06..987a258 100644
--- a/internal/llm/ollama.go
+++ b/internal/llm/ollama.go
@@ -66,11 +66,14 @@ func newOllama(baseURL, model string, defaultTemp *float64, apiKey string) Clien
 }
 
 func newOllamaWithTimeout(baseURL, model, apiKey string, defaultTemp *float64, timeoutSec int) Client {
+	// Defaults target Ollama Cloud (ollama.ai); a local server is opted into
+	// by setting base_url = "http://localhost:11434" (or HEXAI_OLLAMA_BASE_URL)
+	// and an appropriate model.
 	if strings.TrimSpace(baseURL) == "" {
-		baseURL = "http://localhost:11434"
+		baseURL = "https://ollama.com"
 	}
 	if strings.TrimSpace(model) == "" {
-		model = "qwen3-coder:30b-a3b-q4_K_M"
+		model = "kimi-k2.6"
 	}
 	if timeoutSec <= 0 {
 		timeoutSec = 30
diff --git a/internal/llm/provider.go b/internal/llm/provider.go
index 255297c..d1ff404 100644
--- a/internal/llm/provider.go
+++ b/internal/llm/provider.go
@@ -152,7 +152,7 @@ func RegisterAllProviders() {
 func NewFromConfig(cfg Config, openAIAPIKey, openRouterAPIKey, anthropicAPIKey, ollamaAPIKey string) (Client, error) {
 	provider := normalizeProvider(cfg.Provider)
 	if provider == "" {
-		provider = "openai"
+		provider = "ollama"
 	}
 
 	factory, ok := lookupProviderFactory(provider)
diff --git a/internal/llmutils/client.go b/internal/llmutils/client.go
index ef24571..ccba847 100644
--- a/internal/llmutils/client.go
+++ b/internal/llmutils/client.go
@@ -8,11 +8,12 @@ import (
 	"codeberg.org/snonux/hexai/internal/llm"
 )
 
-// CanonicalProvider normalizes provider names and defaults to openai.
+// CanonicalProvider normalizes provider names and defaults to ollama (Ollama
+// Cloud at https://ollama.com when paired with the default base URL).
 func CanonicalProvider(name string) string {
 	provider := strings.ToLower(strings.TrimSpace(name))
 	if provider == "" {
-		return "openai"
+		return "ollama"
 	}
 	return provider
 }
@@ -29,7 +30,7 @@ func DefaultModelForProvider(cfg appconfig.App, provider string) string {
 		if model := strings.TrimSpace(cfg.OllamaModel); model != "" {
 			return model
 		}
-		return "qwen3-coder:30b-a3b-q4_K_M"
+		return "kimi-k2.6"
 	case "anthropic":
 		if model := strings.TrimSpace(cfg.AnthropicModel); model != "" {
 			return model
diff --git a/internal/llmutils/client_test.go b/internal/llmutils/client_test.go
index c688213..ed91584 100644
--- a/internal/llmutils/client_test.go
+++ b/internal/llmutils/client_test.go
@@ -37,7 +37,7 @@ func TestCanonicalProvider(t *testing.T) {
 	if got := CanonicalProvider("  OpenRouter "); got != "openrouter" {
 		t.Fatalf("CanonicalProvider(openrouter) = %q", got)
 	}
-	if got := CanonicalProvider(" "); got != "openai" {
+	if got := CanonicalProvider(" "); got != "ollama" {
 		t.Fatalf("CanonicalProvider(empty) = %q", got)
 	}
 }
@@ -73,7 +73,7 @@ func TestDefaultModelForProvider_Fallbacks(t *testing.T) {
 	if got := DefaultModelForProvider(cfg, "openrouter"); got != "openrouter/auto" {
 		t.Fatalf("openrouter fallback = %q", got)
 	}
-	if got := DefaultModelForProvider(cfg, "ollama"); got != "qwen3-coder:30b-a3b-q4_K_M" {
+	if got := DefaultModelForProvider(cfg, "ollama"); got != "kimi-k2.6" {
 		t.Fatalf("ollama fallback = %q", got)
 	}
 	if got := DefaultModelForProvider(cfg, "anthropic"); got != "claude-3-5-sonnet-20240620" {
diff --git a/internal/lsp/llm_request_opts_test.go b/internal/lsp/llm_request_opts_test.go
index ad87cd4..79a83fc 100644
--- a/internal/lsp/llm_request_opts_test.go
+++ b/internal/lsp/llm_request_opts_test.go
@@ -24,6 +24,9 @@ func TestRequestSpec_Gpt5_ForcesTemp1(t *testing.T) {
 	s := newTestServer()
 	one := 0.2
 	s.cfg.CodingTemperature = &one
+	// Pin Provider explicitly: the in-code default is now ollama, but the
+	// gpt-5 temperature-force rule only fires for openai.
+	s.cfg.Provider = "openai"
 	s.llmClient = fakeClient{name: "openai", model: "gpt-5.0"}
 	s.cfg.OpenAIModel = "gpt-5.0"
 
-- 
cgit v1.2.3