Phase 1: remove single in-flight LLM gate\n\n- Drop llmBusy state and busy item\n- Remove concurrency guard in completion paths\n- Allow manual invoke (TriggerKind=1) even after whitespace\n- Delete llm_busy_test; update TODO\n\nAll unit tests pass.

author: Paul Buetow <paul@buetow.org> 2025-09-03 15:55:34 +0300
committer: Paul Buetow <paul@buetow.org> 2025-09-03 15:55:34 +0300
commit: 71f0d04bd558433cebf1b05845c9fa0e2957eba8 (patch)
tree: 911a3cab02ecb212cc4221bdd24b46bdbea21fb8
parent: b089ce13904d225a77ed2a4825fa88366a57442c (diff)
5 files changed, 47 insertions, 102 deletions
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..20f78c1
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,26 @@
+Implement the changes outlined in this TODO document. Mark phases as done once completed. Before marking a phase done, ensure that all unit tests pass.
+
+When implementing new code, ensure that there is also unit test coverage for it. Once unit tests pass, commit changes always to git after every phase completion.
+
+Problem statement: When implementing a LSP server for Helix Editor for Auto Code completion, what are the strategies avoiding excessive completion calls? Especially when we use an LLM in the backend it can strain the performance. Is there anything with a debounce strategy we could implement?
+
+Solutions (sub-divided into phases):
+
+To avoid excessive completion calls in a Helix Editor LSP server—especially when an LLM is on the backend—debouncing is indeed the primary strategy. Here are the key optimization techniques:
+
+Phase 1: Remove the LSP auto-completion rate limiter from the Hexai source code.
+
+Status: Done — removed the single in-flight LLM request concurrency gate so
+multiple completion requests can proceed concurrently. Also ensured manual
+invocation (TriggerKind=1) always triggers completion even after whitespace.
+Updated tests accordingly.
+
+Phase 2: Debounce completion requests: Introduce a configurable delay (e.g., 100–500 ms) before sending a completion request to the LLM. This prevents a flood of calls while typing. 
+        
+Phase 3: Throttle on the server side: Beyond debouncing, implement request throttling to cap the maximum rate of LLM calls (e.g., one per 500 ms). This is especially useful when debounce alone isn’t enough under rapid editing
+    2
+    .
+
+Phase 4: I think this is already implemented, verify: Filter incomplete triggers: Avoid sending requests for short or non-meaningful prefixes (e.g., less than 2–3 characters). This reduces noise and unnecessary LLM calls.
+
+Phase 5: I think this is already implemented, verify: Server-side caching: Cache recent completions keyed by prefix and file context. This avoids recomputation for repeated or similar queries.
diff --git a/internal/lsp/handlers.go b/internal/lsp/handlers.go
index 531b454..547be67 100644
--- a/internal/lsp/handlers.go
+++ b/internal/lsp/handlers.go
@@ -184,38 +184,7 @@ func (s *Server) reply(id json.RawMessage, result any, err *RespError) {
 
 // busyCompletionItem builds a visible, non-inserting completion item indicating
 // that an LLM request is already in flight.
-func (s *Server) busyCompletionItem() CompletionItem {
-	prov := ""
-	model := ""
-	if s.llmClient != nil {
-		prov = s.llmClient.Name()
-		model = s.llmClient.DefaultModel()
-	}
-	label := "Hexai: LLM busy"
-	if prov != "" && model != "" {
-		label += " (" + prov + ":" + model + ")"
-	}
-	return CompletionItem{
-		Label:         label,
-		Detail:        "Another request is running; only one is allowed concurrently",
-		InsertText:    "",
-		FilterText:    "",
-		SortText:      "~~~~~busy", // float to top
-		Documentation: "Hexai is processing a previous request. Please retry shortly.",
-	}
-}
-
-func (s *Server) isLLMBusy() bool {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	return s.llmBusy
-}
-
-func (s *Server) setLLMBusy(v bool) {
-	s.mu.Lock()
-	s.llmBusy = v
-	s.mu.Unlock()
-}
+// removed: previous single in-flight LLM busy gate and busy item
 
 // --- small completion cache (last ~10 entries) ---
 
@@ -329,14 +298,14 @@ func (s *Server) isTriggerEvent(p CompletionParams, current string) bool {
 			b, _ := json.Marshal(p.Context)
 			_ = json.Unmarshal(b, &ctx)
 		}
-		// If the line contains a bare ';;' (no ';;text;'), do not treat as a trigger source.
-		if strings.Contains(current, ";;") && !hasDoubleSemicolonTrigger(current) {
-			return false
-		}
-		// TriggerKind 1 = Invoked (manual) — always allow (unless bare ';;' above)
-		if ctx.TriggerKind == 1 {
-			return true
-		}
+        // If the line contains a bare ';;' (no ';;text;'), do not treat as a trigger source.
+        if strings.Contains(current, ";;") && !hasDoubleSemicolonTrigger(current) {
+            return false
+        }
+        // TriggerKind 1 = Invoked (manual). Always allow manual invoke.
+        if ctx.TriggerKind == 1 {
+            return true
+        }
 		// TriggerKind 2 is TriggerCharacter per LSP spec
 		if ctx.TriggerKind == 2 {
 			if ctx.TriggerCharacter != "" {
diff --git a/internal/lsp/handlers_completion.go b/internal/lsp/handlers_completion.go
index 73c903f..1c77024 100644
--- a/internal/lsp/handlers_completion.go
+++ b/internal/lsp/handlers_completion.go
@@ -70,9 +70,8 @@ func (s *Server) logCompletionContext(p CompletionParams, above, current, below,
 }
 
 func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, funcCtx, docStr string, hasExtra bool, extraText string) ([]CompletionItem, bool) {
-	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
-	defer cancel()
-	locked := false // track if we've taken the LLM busy lock
+    ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
+    defer cancel()
 
 	inlinePrompt := lineHasInlinePrompt(current)
 	if !inlinePrompt && !s.isTriggerEvent(p, current) {
@@ -104,10 +103,10 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
 		return []CompletionItem{}, true
 	}
 
-	// Provider-native path
-	if items, ok := s.tryProviderNativeCompletion(current, p, above, below, funcCtx, docStr, hasExtra, extraText, inParams); ok {
-		return items, true
-	}
+    // Provider-native path
+    if items, ok := s.tryProviderNativeCompletion(current, p, above, below, funcCtx, docStr, hasExtra, extraText, inParams); ok {
+        return items, true
+    }
 
 	// Chat path
 	messages := s.buildCompletionMessages(inlinePrompt, hasExtra, extraText, inParams, p, above, current, below, funcCtx)
@@ -121,16 +120,7 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
 	if s.codingTemperature != nil {
 		opts = append(opts, llm.WithTemperature(*s.codingTemperature))
 	}
-	logging.Logf("lsp ", "completion llm=requesting model=%s", s.llmClient.DefaultModel())
-
-	// Concurrency guard for chat path as well
-	if !locked {
-		if s.isLLMBusy() {
-			return []CompletionItem{s.busyCompletionItem()}, true
-		}
-		s.setLLMBusy(true)
-		defer s.setLLMBusy(false)
-	}
+    logging.Logf("lsp ", "completion llm=requesting model=%s", s.llmClient.DefaultModel())
 
 	text, err := s.llmClient.Chat(ctx, messages, opts...)
 	if err != nil {
@@ -233,15 +223,10 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams,
 		prov = s.llmClient.Name()
 	}
 	logging.Logf("lsp ", "completion path=codex provider=%s uri=%s", prov, path)
-	ctx2, cancel2 := context.WithTimeout(context.Background(), 8*time.Second)
-	defer cancel2()
-	if s.isLLMBusy() {
-		return []CompletionItem{s.busyCompletionItem()}, true
-	}
-	s.setLLMBusy(true)
-	defer s.setLLMBusy(false)
+    ctx2, cancel2 := context.WithTimeout(context.Background(), 8*time.Second)
+    defer cancel2()
 
-	suggestions, err := cc.CodeCompletion(ctx2, prompt, after, 1, lang, temp)
+    suggestions, err := cc.CodeCompletion(ctx2, prompt, after, 1, lang, temp)
 	if err == nil && len(suggestions) > 0 {
 		cleaned := strings.TrimSpace(suggestions[0])
 		if cleaned != "" {
diff --git a/internal/lsp/llm_busy_test.go b/internal/lsp/llm_busy_test.go
deleted file mode 100644
index c7cc716..0000000
--- a/internal/lsp/llm_busy_test.go
+++ /dev/null
@@ -1,32 +0,0 @@
-package lsp
-
-import (
-	"encoding/json"
-	"testing"
-)
-
-// Ensure a visible busy item is returned when a prior LLM request is in flight.
-func TestLLMBusy_YieldsBusyCompletionItem(t *testing.T) {
-	s := &Server{maxTokens: 32, triggerChars: []string{"."}, compCache: make(map[string]string)}
-	s.llmClient = &countingLLM{}
-	// Mark busy
-	s.setLLMBusy(true)
-	t.Cleanup(func() { s.setLLMBusy(false) })
-	line := "obj."
-	p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://busy.go"}}
-	// Simulate manual invoke to bypass min-prefix
-	p.Context = json.RawMessage([]byte(`{"triggerKind":1}`))
-	items, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, "")
-	if !ok {
-		t.Fatalf("expected ok=true")
-	}
-	if len(items) != 1 {
-		t.Fatalf("expected one busy item, got %d", len(items))
-	}
-	if items[0].InsertText != "" {
-		t.Fatalf("busy item should not insert text")
-	}
-	if items[0].Label == "" {
-		t.Fatalf("busy item should have a label")
-	}
-}
diff --git a/internal/lsp/server.go b/internal/lsp/server.go
index 4b72717..2f834ba 100644
--- a/internal/lsp/server.go
+++ b/internal/lsp/server.go
@@ -44,11 +44,8 @@ type Server struct {
 	// Minimum identifier chars required for manual invoke to bypass prefix checks
 	manualInvokeMinPrefix int
 
-	// LLM concurrency guard: allow at most one in-flight request
-	llmBusy bool
-
-	// Dispatch table for JSON-RPC methods → handler functions
-	handlers map[string]func(Request)
+    // Dispatch table for JSON-RPC methods → handler functions
+    handlers map[string]func(Request)
 }
 
 // ServerOptions collects configuration for NewServer to avoid long parameter lists.
author	Paul Buetow <paul@buetow.org>	2025-09-03 15:55:34 +0300
committer	Paul Buetow <paul@buetow.org>	2025-09-03 15:55:34 +0300
commit	71f0d04bd558433cebf1b05845c9fa0e2957eba8 (patch)
tree	911a3cab02ecb212cc4221bdd24b46bdbea21fb8
parent	b089ce13904d225a77ed2a4825fa88366a57442c (diff)