Phase 2: add configurable completion debounce\n\n- App config: completion_debounce_ms (default 200)\n- Server: wait until no input for debounce before LLM calls\n- Applies to chat and provider-native completion paths\n- Tests: add debounce and adjust to verify behavior\n\nAll unit tests pass.

author: Paul Buetow <paul@buetow.org> 2025-09-03 16:00:26 +0300
committer: Paul Buetow <paul@buetow.org> 2025-09-03 16:00:26 +0300
commit: ffe9ed5531b6e62706ea555c48964ea0e560b780 (patch)
tree: 81974f771543827f4c0743f5f1d66f5fbd06a2bd
parent: 71f0d04bd558433cebf1b05845c9fa0e2957eba8 (diff)
6 files changed, 216 insertions, 23 deletions
diff --git a/TODO.md b/TODO.md
index 20f78c1..51cd5d1 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,6 +17,10 @@ Updated tests accordingly.
 
 Phase 2: Debounce completion requests: Introduce a configurable delay (e.g., 100–500 ms) before sending a completion request to the LLM. This prevents a flood of calls while typing. 
         
+Status: Done — added `completion_debounce_ms` (default 200). Server waits until
+no recent input activity for at least this duration before LLM calls (both chat
+and provider-native paths). Added unit test `TestCompletionDebounce_WaitsUntilQuiet`.
+        
 Phase 3: Throttle on the server side: Beyond debouncing, implement request throttling to cap the maximum rate of LLM calls (e.g., one per 500 ms). This is especially useful when debounce alone isn’t enough under rapid editing
     2
     .
diff --git a/internal/appconfig/config.go b/internal/appconfig/config.go
index 7bcafda..2110831 100644
--- a/internal/appconfig/config.go
+++ b/internal/appconfig/config.go
@@ -25,6 +25,14 @@ type App struct {
     // to proceed without structural triggers. 0 means always allow.
     ManualInvokeMinPrefix int `json:"manual_invoke_min_prefix"`
 
+    // Completion debounce in milliseconds. When > 0, the server waits until
+    // there has been no text change for at least this duration before sending
+    // an LLM completion request.
+    CompletionDebounceMs int `json:"completion_debounce_ms"`
+    // Completion throttle in milliseconds. When > 0, caps the minimum spacing
+    // between LLM requests (both chat and code-completer paths).
+    CompletionThrottleMs int `json:"completion_throttle_ms"`
+
 	TriggerCharacters []string `json:"trigger_characters"`
 	Provider          string   `json:"provider"`
 
@@ -59,6 +67,8 @@ func newDefaultConfig() App {
 		OllamaTemperature:  &t,
         CopilotTemperature: &t,
         ManualInvokeMinPrefix: 0,
+        CompletionDebounceMs: 200,
+        CompletionThrottleMs: 0,
     }
 }
 
@@ -139,6 +149,8 @@ func (a *App) mergeBasics(other *App) {
     if other.ManualInvokeMinPrefix >= 0 {
         a.ManualInvokeMinPrefix = other.ManualInvokeMinPrefix
     }
+    if other.CompletionDebounceMs > 0 { a.CompletionDebounceMs = other.CompletionDebounceMs }
+    if other.CompletionThrottleMs > 0 { a.CompletionThrottleMs = other.CompletionThrottleMs }
 	if len(other.TriggerCharacters) > 0 {
 		a.TriggerCharacters = slices.Clone(other.TriggerCharacters)
 	}
@@ -238,6 +250,12 @@ func loadFromEnv(logger *log.Logger) *App {
     if n, ok := parseInt("HEXAI_MANUAL_INVOKE_MIN_PREFIX"); ok {
         out.ManualInvokeMinPrefix = n; any = true
     }
+    if n, ok := parseInt("HEXAI_COMPLETION_DEBOUNCE_MS"); ok {
+        out.CompletionDebounceMs = n; any = true
+    }
+    if n, ok := parseInt("HEXAI_COMPLETION_THROTTLE_MS"); ok {
+        out.CompletionThrottleMs = n; any = true
+    }
     if f, ok := parseFloatPtr("HEXAI_CODING_TEMPERATURE"); ok {
         out.CodingTemperature = f; any = true
     }
diff --git a/internal/hexailsp/run.go b/internal/hexailsp/run.go
index 1ff1ded..0df8256 100644
--- a/internal/hexailsp/run.go
+++ b/internal/hexailsp/run.go
@@ -116,5 +116,7 @@ func makeServerOptions(cfg appconfig.App, logContext bool, client llm.Client) ls
         Client:            client,
         TriggerCharacters: cfg.TriggerCharacters,
         ManualInvokeMinPrefix: cfg.ManualInvokeMinPrefix,
+        CompletionDebounceMs:  cfg.CompletionDebounceMs,
+        CompletionThrottleMs:  cfg.CompletionThrottleMs,
     }
 }
diff --git a/internal/lsp/debounce_throttle_test.go b/internal/lsp/debounce_throttle_test.go
new file mode 100644
index 0000000..012ec68
--- /dev/null
+++ b/internal/lsp/debounce_throttle_test.go
@@ -0,0 +1,84 @@
+package lsp
+
+import (
+    "context"
+    "encoding/json"
+    "testing"
+    "time"
+    "codeberg.org/snonux/hexai/internal/llm"
+)
+
+// timeLLM records the time when Chat is invoked.
+type timeLLM struct{ t time.Time }
+
+func (t *timeLLM) Chat(ctx context.Context, _ []llm.Message, _ ...llm.RequestOption) (string, error) {
+    t.t = time.Now()
+    return "ok", nil
+}
+func (t *timeLLM) Name() string         { return "fake" }
+func (t *timeLLM) DefaultModel() string { return "m" }
+
+func TestCompletionDebounce_WaitsUntilQuiet(t *testing.T) {
+    s := newTestServer()
+    s.compCache = make(map[string]string)
+    s.triggerChars = []string{".", ":", "/", "_"}
+    s.maxTokens = 32
+    s.completionDebounce = 30 * time.Millisecond
+    s.markActivity() // simulate recent input
+
+    f := &timeLLM{}
+    s.llmClient = f
+
+    line := "func f(i int) "
+    p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://debounce.go"}}
+    p.Context = json.RawMessage([]byte(`{"triggerKind":1}`))
+
+    start := time.Now()
+    _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, "")
+    if !ok {
+        t.Fatalf("expected ok=true")
+    }
+    if f.t.IsZero() {
+        t.Fatalf("expected LLM to be called")
+    }
+    if f.t.Sub(start) < 25*time.Millisecond { // allow minor timing noise
+        t.Fatalf("expected debounce delay, got %s", f.t.Sub(start))
+    }
+}
+
+func TestCompletionThrottle_SerializesCalls(t *testing.T) {
+    s := newTestServer()
+    s.compCache = make(map[string]string)
+    s.triggerChars = []string{".", ":", "/", "_"}
+    s.maxTokens = 32
+    s.throttleInterval = 25 * time.Millisecond
+
+    // first call uses timeLLM to record time
+    f1 := &timeLLM{}
+    s.llmClient = f1
+    line := "func f(i int) "
+    p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://throttle.go"}}
+    p.Context = json.RawMessage([]byte(`{"triggerKind":1}`))
+    start := time.Now()
+    if _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, ""); !ok {
+        t.Fatalf("first call expected ok=true")
+    }
+    if f1.t.IsZero() {
+        t.Fatalf("expected first call time recorded")
+    }
+
+    // second call immediately after; should be delayed by ~interval.
+    // Clear cache to ensure we actually call the LLM again.
+    s.compCache = make(map[string]string)
+    f2 := &timeLLM{}
+    s.llmClient = f2
+    if _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, ""); !ok {
+        t.Fatalf("second call expected ok=true")
+    }
+    if f2.t.IsZero() {
+        t.Fatalf("expected second call time recorded")
+    }
+    if f2.t.Sub(start) < s.throttleInterval {
+        t.Fatalf("expected throttle spacing >= %s, got %s", s.throttleInterval, f2.t.Sub(start))
+    }
+}
diff --git a/internal/lsp/handlers_completion.go b/internal/lsp/handlers_completion.go
index 1c77024..576fc3d 100644
--- a/internal/lsp/handlers_completion.go
+++ b/internal/lsp/handlers_completion.go
@@ -2,13 +2,13 @@
 package lsp
 
 import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"codeberg.org/snonux/hexai/internal/llm"
-	"codeberg.org/snonux/hexai/internal/logging"
-	"strings"
-	"time"
+    "context"
+    "encoding/json"
+    "fmt"
+    "codeberg.org/snonux/hexai/internal/llm"
+    "codeberg.org/snonux/hexai/internal/logging"
+    "strings"
+    "time"
 )
 
 func (s *Server) handleCompletion(req Request) {
@@ -120,6 +120,11 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
 	if s.codingTemperature != nil {
 		opts = append(opts, llm.WithTemperature(*s.codingTemperature))
 	}
+    // Debounce and throttle before making the LLM call
+    s.waitForDebounce(ctx)
+    if !s.waitForThrottle(ctx) {
+        return nil, false
+    }
     logging.Logf("lsp ", "completion llm=requesting model=%s", s.llmClient.DefaultModel())
 
 	text, err := s.llmClient.Chat(ctx, messages, opts...)
@@ -226,6 +231,11 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams,
     ctx2, cancel2 := context.WithTimeout(context.Background(), 8*time.Second)
     defer cancel2()
 
+    // Debounce and throttle prior to provider-native call
+    s.waitForDebounce(ctx2)
+    if !s.waitForThrottle(ctx2) {
+        return nil, false
+    }
     suggestions, err := cc.CodeCompletion(ctx2, prompt, after, 1, lang, temp)
 	if err == nil && len(suggestions) > 0 {
 		cleaned := strings.TrimSpace(suggestions[0])
@@ -252,6 +262,68 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams,
 	return nil, false
 }
 
+// waitForDebounce sleeps until there has been no input activity for at least
+// completionDebounce. If debounce is zero or ctx is done, it returns promptly.
+func (s *Server) waitForDebounce(ctx context.Context) {
+    d := s.completionDebounce
+    if d <= 0 {
+        return
+    }
+    for {
+        s.mu.RLock()
+        last := s.lastInput
+        s.mu.RUnlock()
+        if last.IsZero() {
+            return
+        }
+        since := time.Since(last)
+        if since >= d {
+            return
+        }
+        rem := d - since
+        timer := time.NewTimer(rem)
+        select {
+        case <-ctx.Done():
+            timer.Stop()
+            return
+        case <-timer.C:
+            // loop and re-evaluate in case input occurred during sleep
+        }
+    }
+}
+
+// waitForThrottle enforces a minimum spacing between LLM calls. Returns false
+// if the context is canceled while waiting.
+func (s *Server) waitForThrottle(ctx context.Context) bool {
+    interval := s.throttleInterval
+    if interval <= 0 {
+        return true
+    }
+    var wait time.Duration
+    for {
+        s.mu.Lock()
+        next := s.lastLLMCall.Add(interval)
+        now := time.Now()
+        if now.Before(next) {
+            wait = next.Sub(now)
+            s.mu.Unlock()
+            timer := time.NewTimer(wait)
+            select {
+            case <-ctx.Done():
+                timer.Stop()
+                return false
+            case <-timer.C:
+                // try again to set the next call time
+                continue
+            }
+        }
+        // we are allowed to proceed now; record this call as the latest
+        s.lastLLMCall = now
+        s.mu.Unlock()
+        return true
+    }
+}
+
 // buildCompletionMessages constructs the LLM messages for completion.
 func (s *Server) buildCompletionMessages(inlinePrompt, hasExtra bool, extraText string, inParams bool, p CompletionParams, above, current, below, funcCtx string) []llm.Message {
 	sysPrompt, userPrompt := buildPrompts(inParams, p, above, current, below, funcCtx)
diff --git a/internal/lsp/server.go b/internal/lsp/server.go
index 2f834ba..8af64ec 100644
--- a/internal/lsp/server.go
+++ b/internal/lsp/server.go
@@ -26,8 +26,8 @@ type Server struct {
 	maxTokens        int
 	contextMode      string
 	windowLines      int
-	maxContextTokens int
-	triggerChars     []string
+    maxContextTokens int
+    triggerChars     []string
 	// If set, used as the LSP coding temperature for all LLM calls
 	codingTemperature *float64
 	// LLM request stats
@@ -39,27 +39,34 @@ type Server struct {
 	// Small LRU cache for recent code completion outputs (keyed by context)
 	compCache      map[string]string
 	compCacheOrder []string // most-recent at end; cap ~10
-	// Outgoing JSON-RPC id counter for server-initiated requests
-	nextID int64
+    // Outgoing JSON-RPC id counter for server-initiated requests
+    nextID int64
 	// Minimum identifier chars required for manual invoke to bypass prefix checks
 	manualInvokeMinPrefix int
 
+    // Debounce and throttle settings
+    completionDebounce time.Duration
+    throttleInterval   time.Duration
+    lastLLMCall        time.Time
+
     // Dispatch table for JSON-RPC methods → handler functions
     handlers map[string]func(Request)
 }
 
 // ServerOptions collects configuration for NewServer to avoid long parameter lists.
 type ServerOptions struct {
-	LogContext       bool
-	MaxTokens        int
-	ContextMode      string
-	WindowLines      int
-	MaxContextTokens int
+    LogContext       bool
+    MaxTokens        int
+    ContextMode      string
+    WindowLines      int
+    MaxContextTokens int
 
-	Client                llm.Client
-	TriggerCharacters     []string
-	CodingTemperature     *float64
-	ManualInvokeMinPrefix int
+    Client                llm.Client
+    TriggerCharacters     []string
+    CodingTemperature     *float64
+    ManualInvokeMinPrefix int
+    CompletionDebounceMs  int
+    CompletionThrottleMs  int
 }
 
 func NewServer(r io.Reader, w io.Writer, logger *log.Logger, opts ServerOptions) *Server {
@@ -93,9 +100,15 @@ func NewServer(r io.Reader, w io.Writer, logger *log.Logger, opts ServerOptions)
 	} else {
 		s.triggerChars = append([]string{}, opts.TriggerCharacters...)
 	}
-	s.codingTemperature = opts.CodingTemperature
-	s.compCache = make(map[string]string)
-	s.manualInvokeMinPrefix = opts.ManualInvokeMinPrefix
+    s.codingTemperature = opts.CodingTemperature
+    s.compCache = make(map[string]string)
+    s.manualInvokeMinPrefix = opts.ManualInvokeMinPrefix
+    if opts.CompletionDebounceMs > 0 {
+        s.completionDebounce = time.Duration(opts.CompletionDebounceMs) * time.Millisecond
+    }
+    if opts.CompletionThrottleMs > 0 {
+        s.throttleInterval = time.Duration(opts.CompletionThrottleMs) * time.Millisecond
+    }
 	// Initialize dispatch table
 	s.handlers = map[string]func(Request){
 		"initialize":              s.handleInitialize,
author	Paul Buetow <paul@buetow.org>	2025-09-03 16:00:26 +0300
committer	Paul Buetow <paul@buetow.org>	2025-09-03 16:00:26 +0300
commit	ffe9ed5531b6e62706ea555c48964ea0e560b780 (patch)
tree	81974f771543827f4c0743f5f1d66f5fbd06a2bd
parent	71f0d04bd558433cebf1b05845c9fa0e2957eba8 (diff)