lsp: limit to one in-flight LLM query; return visible 'LLM busy' completion item with provider/model; retain chat EOL suppression

author: Paul Buetow <paul@buetow.org> 2025-08-28 23:56:28 +0300
committer: Paul Buetow <paul@buetow.org> 2025-08-28 23:56:28 +0300
commit: 32a72e6533ecf3d4e0c53137692c658b512abcd1 (patch)
tree: 91b05a597974e4da89aa7647dc207e16e1bc94a7
parent: 30d233d689f77d4688a49c6fa221ab63a8482db2 (diff)
3 files changed, 86 insertions, 6 deletions
diff --git a/internal/lsp/handlers.go b/internal/lsp/handlers.go
index f810d84..332344a 100644
--- a/internal/lsp/handlers.go
+++ b/internal/lsp/handlers.go
@@ -769,8 +769,10 @@ func (s *Server) logCompletionContext(p CompletionParams, above, current, below,
 }
 
 func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, funcCtx, docStr string, hasExtra bool, extraText string) ([]CompletionItem, bool) {
-	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
-	defer cancel()
+    ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
+    defer cancel()
+    // Track if we've already acquired the LLM busy lock during this call
+    locked := false
 
     // Inline prompt markers (strict ;text; or double-; patterns) explicitly allow triggering.
     inlinePrompt := lineHasInlinePrompt(current)
@@ -883,6 +885,14 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
         logging.Logf("lsp ", "completion path=codex provider=%s uri=%s", prov, path)
         ctx2, cancel2 := context.WithTimeout(context.Background(), 8*time.Second)
         defer cancel2()
+        // Concurrency guard
+        if s.isLLMBusy() {
+            return []CompletionItem{s.busyCompletionItem()}, true
+        }
+        s.setLLMBusy(true)
+        defer s.setLLMBusy(false)
+        locked = true
+
         suggestions, err := cc.CodeCompletion(ctx2, prompt, after, 1, lang, temp)
         if err == nil && len(suggestions) > 0 {
             cleaned := strings.TrimSpace(suggestions[0])
@@ -931,6 +941,15 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
 		opts = append(opts, llm.WithTemperature(*s.codingTemperature))
 	}
 	logging.Logf("lsp ", "completion llm=requesting model=%s", s.llmClient.DefaultModel())
+    // Concurrency guard for chat path as well
+    if !locked {
+        if s.isLLMBusy() {
+            return []CompletionItem{s.busyCompletionItem()}, true
+        }
+        s.setLLMBusy(true)
+        defer s.setLLMBusy(false)
+    }
+
     text, err := s.llmClient.Chat(ctx, messages, opts...)
     if err != nil {
         logging.Logf("lsp ", "llm completion error: %v", err)
@@ -977,6 +996,39 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
     return s.makeCompletionItems(cleaned, inParams, current, p, docStr), true
 }
 
+// busyCompletionItem builds a visible, non-inserting completion item indicating
+// that an LLM request is already in flight.
+func (s *Server) busyCompletionItem() CompletionItem {
+    prov := ""
+    model := ""
+    if s.llmClient != nil {
+        prov = s.llmClient.Name()
+        model = s.llmClient.DefaultModel()
+    }
+    label := "Hexai: LLM busy"
+    if prov != "" && model != "" { label += " (" + prov + ":" + model + ")" }
+    return CompletionItem{
+        Label:         label,
+        Detail:        "Another request is running; only one is allowed concurrently",
+        InsertText:    "",
+        FilterText:    "",
+        SortText:      "~~~~~busy", // float to top
+        Documentation: "Hexai is processing a previous request. Please retry shortly.",
+    }
+}
+
+func (s *Server) isLLMBusy() bool {
+    s.mu.Lock()
+    defer s.mu.Unlock()
+    return s.llmBusy
+}
+
+func (s *Server) setLLMBusy(v bool) {
+    s.mu.Lock()
+    s.llmBusy = v
+    s.mu.Unlock()
+}
+
 // --- small completion cache (last ~10 entries) ---
 
 func (s *Server) completionCacheKey(p CompletionParams, above, current, below, funcCtx string, inParams bool, hasExtra bool, extraText string) string {
diff --git a/internal/lsp/llm_busy_test.go b/internal/lsp/llm_busy_test.go
new file mode 100644
index 0000000..95123d2
--- /dev/null
+++ b/internal/lsp/llm_busy_test.go
@@ -0,0 +1,25 @@
+package lsp
+
+import (
+    "encoding/json"
+    "testing"
+)
+
+// Ensure a visible busy item is returned when a prior LLM request is in flight.
+func TestLLMBusy_YieldsBusyCompletionItem(t *testing.T) {
+    s := &Server{ maxTokens: 32, triggerChars: []string{"."}, compCache: make(map[string]string) }
+    s.llmClient = &countingLLM{}
+    // Mark busy
+    s.setLLMBusy(true)
+    t.Cleanup(func(){ s.setLLMBusy(false) })
+    line := "obj."
+    p := CompletionParams{ Position: Position{ Line: 0, Character: len(line) }, TextDocument: TextDocumentIdentifier{URI: "file://busy.go"} }
+    // Simulate manual invoke to bypass min-prefix
+    p.Context = json.RawMessage([]byte(`{"triggerKind":1}`))
+    items, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, "")
+    if !ok { t.Fatalf("expected ok=true") }
+    if len(items) != 1 { t.Fatalf("expected one busy item, got %d", len(items)) }
+    if items[0].InsertText != "" { t.Fatalf("busy item should not insert text") }
+    if items[0].Label == "" { t.Fatalf("busy item should have a label") }
+}
+
diff --git a/internal/lsp/server.go b/internal/lsp/server.go
index 7bc52c1..54efdf7 100644
--- a/internal/lsp/server.go
+++ b/internal/lsp/server.go
@@ -40,10 +40,13 @@ type Server struct {
 	// Small LRU cache for recent code completion outputs (keyed by context)
 	compCache      map[string]string
 	compCacheOrder []string // most-recent at end; cap ~10
-	// Outgoing JSON-RPC id counter for server-initiated requests
-	nextID int64
-	// Minimum identifier chars required for manual invoke to bypass prefix checks
-	manualInvokeMinPrefix int
+    // Outgoing JSON-RPC id counter for server-initiated requests
+    nextID int64
+    // Minimum identifier chars required for manual invoke to bypass prefix checks
+    manualInvokeMinPrefix int
+
+    // LLM concurrency guard: allow at most one in-flight request
+    llmBusy bool
 }
 
 // ServerOptions collects configuration for NewServer to avoid long parameter lists.
author	Paul Buetow <paul@buetow.org>	2025-08-28 23:56:28 +0300
committer	Paul Buetow <paul@buetow.org>	2025-08-28 23:56:28 +0300
commit	32a72e6533ecf3d4e0c53137692c658b512abcd1 (patch)
tree	91b05a597974e4da89aa7647dc207e16e1bc94a7
parent	30d233d689f77d4688a49c6fa221ab63a8482db2 (diff)