summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-09-03 16:00:26 +0300
committerPaul Buetow <paul@buetow.org>2025-09-03 16:00:26 +0300
commitffe9ed5531b6e62706ea555c48964ea0e560b780 (patch)
tree81974f771543827f4c0743f5f1d66f5fbd06a2bd
parent71f0d04bd558433cebf1b05845c9fa0e2957eba8 (diff)
Phase 2: add configurable completion debounce\n\n- App config: completion_debounce_ms (default 200)\n- Server: wait until no input for debounce before LLM calls\n- Applies to chat and provider-native completion paths\n- Tests: add debounce and adjust to verify behavior\n\nAll unit tests pass.
-rw-r--r--TODO.md4
-rw-r--r--internal/appconfig/config.go18
-rw-r--r--internal/hexailsp/run.go2
-rw-r--r--internal/lsp/debounce_throttle_test.go84
-rw-r--r--internal/lsp/handlers_completion.go86
-rw-r--r--internal/lsp/server.go45
6 files changed, 216 insertions, 23 deletions
diff --git a/TODO.md b/TODO.md
index 20f78c1..51cd5d1 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,6 +17,10 @@ Updated tests accordingly.
Phase 2: Debounce completion requests: Introduce a configurable delay (e.g., 100–500 ms) before sending a completion request to the LLM. This prevents a flood of calls while typing.
+Status: Done — added `completion_debounce_ms` (default 200). Server waits until
+no recent input activity for at least this duration before LLM calls (both chat
+and provider-native paths). Added unit test `TestCompletionDebounce_WaitsUntilQuiet`.
+
Phase 3: Throttle on the server side: Beyond debouncing, implement request throttling to cap the maximum rate of LLM calls (e.g., one per 500 ms). This is especially useful when debounce alone isn’t enough under rapid editing
2
.
diff --git a/internal/appconfig/config.go b/internal/appconfig/config.go
index 7bcafda..2110831 100644
--- a/internal/appconfig/config.go
+++ b/internal/appconfig/config.go
@@ -25,6 +25,14 @@ type App struct {
// to proceed without structural triggers. 0 means always allow.
ManualInvokeMinPrefix int `json:"manual_invoke_min_prefix"`
+ // Completion debounce in milliseconds. When > 0, the server waits until
+ // there has been no text change for at least this duration before sending
+ // an LLM completion request.
+ CompletionDebounceMs int `json:"completion_debounce_ms"`
+ // Completion throttle in milliseconds. When > 0, caps the minimum spacing
+ // between LLM requests (both chat and code-completer paths).
+ CompletionThrottleMs int `json:"completion_throttle_ms"`
+
TriggerCharacters []string `json:"trigger_characters"`
Provider string `json:"provider"`
@@ -59,6 +67,8 @@ func newDefaultConfig() App {
OllamaTemperature: &t,
CopilotTemperature: &t,
ManualInvokeMinPrefix: 0,
+ CompletionDebounceMs: 200,
+ CompletionThrottleMs: 0,
}
}
@@ -139,6 +149,8 @@ func (a *App) mergeBasics(other *App) {
if other.ManualInvokeMinPrefix >= 0 {
a.ManualInvokeMinPrefix = other.ManualInvokeMinPrefix
}
+ if other.CompletionDebounceMs > 0 { a.CompletionDebounceMs = other.CompletionDebounceMs }
+ if other.CompletionThrottleMs > 0 { a.CompletionThrottleMs = other.CompletionThrottleMs }
if len(other.TriggerCharacters) > 0 {
a.TriggerCharacters = slices.Clone(other.TriggerCharacters)
}
@@ -238,6 +250,12 @@ func loadFromEnv(logger *log.Logger) *App {
if n, ok := parseInt("HEXAI_MANUAL_INVOKE_MIN_PREFIX"); ok {
out.ManualInvokeMinPrefix = n; any = true
}
+ if n, ok := parseInt("HEXAI_COMPLETION_DEBOUNCE_MS"); ok {
+ out.CompletionDebounceMs = n; any = true
+ }
+ if n, ok := parseInt("HEXAI_COMPLETION_THROTTLE_MS"); ok {
+ out.CompletionThrottleMs = n; any = true
+ }
if f, ok := parseFloatPtr("HEXAI_CODING_TEMPERATURE"); ok {
out.CodingTemperature = f; any = true
}
diff --git a/internal/hexailsp/run.go b/internal/hexailsp/run.go
index 1ff1ded..0df8256 100644
--- a/internal/hexailsp/run.go
+++ b/internal/hexailsp/run.go
@@ -116,5 +116,7 @@ func makeServerOptions(cfg appconfig.App, logContext bool, client llm.Client) ls
Client: client,
TriggerCharacters: cfg.TriggerCharacters,
ManualInvokeMinPrefix: cfg.ManualInvokeMinPrefix,
+ CompletionDebounceMs: cfg.CompletionDebounceMs,
+ CompletionThrottleMs: cfg.CompletionThrottleMs,
}
}
diff --git a/internal/lsp/debounce_throttle_test.go b/internal/lsp/debounce_throttle_test.go
new file mode 100644
index 0000000..012ec68
--- /dev/null
+++ b/internal/lsp/debounce_throttle_test.go
@@ -0,0 +1,84 @@
+package lsp
+
+import (
+ "context"
+ "encoding/json"
+ "testing"
+ "time"
+ "codeberg.org/snonux/hexai/internal/llm"
+)
+
+// timeLLM records the time when Chat is invoked.
+type timeLLM struct{ t time.Time }
+
+func (t *timeLLM) Chat(ctx context.Context, _ []llm.Message, _ ...llm.RequestOption) (string, error) {
+ t.t = time.Now()
+ return "ok", nil
+}
+func (t *timeLLM) Name() string { return "fake" }
+func (t *timeLLM) DefaultModel() string { return "m" }
+
+func TestCompletionDebounce_WaitsUntilQuiet(t *testing.T) {
+ s := newTestServer()
+ s.compCache = make(map[string]string)
+ s.triggerChars = []string{".", ":", "/", "_"}
+ s.maxTokens = 32
+ s.completionDebounce = 30 * time.Millisecond
+ s.markActivity() // simulate recent input
+
+ f := &timeLLM{}
+ s.llmClient = f
+
+ line := "func f(i int) "
+ p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://debounce.go"}}
+ p.Context = json.RawMessage([]byte(`{"triggerKind":1}`))
+
+ start := time.Now()
+ _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, "")
+ if !ok {
+ t.Fatalf("expected ok=true")
+ }
+ if f.t.IsZero() {
+ t.Fatalf("expected LLM to be called")
+ }
+ if f.t.Sub(start) < 25*time.Millisecond { // allow minor timing noise
+ t.Fatalf("expected debounce delay, got %s", f.t.Sub(start))
+ }
+}
+
+func TestCompletionThrottle_SerializesCalls(t *testing.T) {
+ s := newTestServer()
+ s.compCache = make(map[string]string)
+ s.triggerChars = []string{".", ":", "/", "_"}
+ s.maxTokens = 32
+ s.throttleInterval = 25 * time.Millisecond
+
+ // first call uses timeLLM to record time
+ f1 := &timeLLM{}
+ s.llmClient = f1
+ line := "func f(i int) "
+ p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://throttle.go"}}
+ p.Context = json.RawMessage([]byte(`{"triggerKind":1}`))
+ start := time.Now()
+ if _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, ""); !ok {
+ t.Fatalf("first call expected ok=true")
+ }
+ if f1.t.IsZero() {
+ t.Fatalf("expected first call time recorded")
+ }
+
+ // second call immediately after; should be delayed by ~interval.
+ // Clear cache to ensure we actually call the LLM again.
+ s.compCache = make(map[string]string)
+ f2 := &timeLLM{}
+ s.llmClient = f2
+ if _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, ""); !ok {
+ t.Fatalf("second call expected ok=true")
+ }
+ if f2.t.IsZero() {
+ t.Fatalf("expected second call time recorded")
+ }
+ if f2.t.Sub(start) < s.throttleInterval {
+ t.Fatalf("expected throttle spacing >= %s, got %s", s.throttleInterval, f2.t.Sub(start))
+ }
+}
diff --git a/internal/lsp/handlers_completion.go b/internal/lsp/handlers_completion.go
index 1c77024..576fc3d 100644
--- a/internal/lsp/handlers_completion.go
+++ b/internal/lsp/handlers_completion.go
@@ -2,13 +2,13 @@
package lsp
import (
- "context"
- "encoding/json"
- "fmt"
- "codeberg.org/snonux/hexai/internal/llm"
- "codeberg.org/snonux/hexai/internal/logging"
- "strings"
- "time"
+ "context"
+ "encoding/json"
+ "fmt"
+ "codeberg.org/snonux/hexai/internal/llm"
+ "codeberg.org/snonux/hexai/internal/logging"
+ "strings"
+ "time"
)
func (s *Server) handleCompletion(req Request) {
@@ -120,6 +120,11 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
if s.codingTemperature != nil {
opts = append(opts, llm.WithTemperature(*s.codingTemperature))
}
+ // Debounce and throttle before making the LLM call
+ s.waitForDebounce(ctx)
+ if !s.waitForThrottle(ctx) {
+ return nil, false
+ }
logging.Logf("lsp ", "completion llm=requesting model=%s", s.llmClient.DefaultModel())
text, err := s.llmClient.Chat(ctx, messages, opts...)
@@ -226,6 +231,11 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams,
ctx2, cancel2 := context.WithTimeout(context.Background(), 8*time.Second)
defer cancel2()
+ // Debounce and throttle prior to provider-native call
+ s.waitForDebounce(ctx2)
+ if !s.waitForThrottle(ctx2) {
+ return nil, false
+ }
suggestions, err := cc.CodeCompletion(ctx2, prompt, after, 1, lang, temp)
if err == nil && len(suggestions) > 0 {
cleaned := strings.TrimSpace(suggestions[0])
@@ -252,6 +262,68 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams,
return nil, false
}
+// waitForDebounce sleeps until there has been no input activity for at least
+// completionDebounce. If debounce is zero or ctx is done, it returns promptly.
+func (s *Server) waitForDebounce(ctx context.Context) {
+ d := s.completionDebounce
+ if d <= 0 {
+ return
+ }
+ for {
+ s.mu.RLock()
+ last := s.lastInput
+ s.mu.RUnlock()
+ if last.IsZero() {
+ return
+ }
+ since := time.Since(last)
+ if since >= d {
+ return
+ }
+ rem := d - since
+ timer := time.NewTimer(rem)
+ select {
+ case <-ctx.Done():
+ timer.Stop()
+ return
+ case <-timer.C:
+ // loop and re-evaluate in case input occurred during sleep
+ }
+ }
+}
+
+// waitForThrottle enforces a minimum spacing between LLM calls. Returns false
+// if the context is canceled while waiting.
+func (s *Server) waitForThrottle(ctx context.Context) bool {
+ interval := s.throttleInterval
+ if interval <= 0 {
+ return true
+ }
+ var wait time.Duration
+ for {
+ s.mu.Lock()
+ next := s.lastLLMCall.Add(interval)
+ now := time.Now()
+ if now.Before(next) {
+ wait = next.Sub(now)
+ s.mu.Unlock()
+ timer := time.NewTimer(wait)
+ select {
+ case <-ctx.Done():
+ timer.Stop()
+ return false
+ case <-timer.C:
+ // try again to set the next call time
+ continue
+ }
+ }
+ // we are allowed to proceed now; record this call as the latest
+ s.lastLLMCall = now
+ s.mu.Unlock()
+ return true
+ }
+}
+
// buildCompletionMessages constructs the LLM messages for completion.
func (s *Server) buildCompletionMessages(inlinePrompt, hasExtra bool, extraText string, inParams bool, p CompletionParams, above, current, below, funcCtx string) []llm.Message {
sysPrompt, userPrompt := buildPrompts(inParams, p, above, current, below, funcCtx)
diff --git a/internal/lsp/server.go b/internal/lsp/server.go
index 2f834ba..8af64ec 100644
--- a/internal/lsp/server.go
+++ b/internal/lsp/server.go
@@ -26,8 +26,8 @@ type Server struct {
maxTokens int
contextMode string
windowLines int
- maxContextTokens int
- triggerChars []string
+ maxContextTokens int
+ triggerChars []string
// If set, used as the LSP coding temperature for all LLM calls
codingTemperature *float64
// LLM request stats
@@ -39,27 +39,34 @@ type Server struct {
// Small LRU cache for recent code completion outputs (keyed by context)
compCache map[string]string
compCacheOrder []string // most-recent at end; cap ~10
- // Outgoing JSON-RPC id counter for server-initiated requests
- nextID int64
+ // Outgoing JSON-RPC id counter for server-initiated requests
+ nextID int64
// Minimum identifier chars required for manual invoke to bypass prefix checks
manualInvokeMinPrefix int
+ // Debounce and throttle settings
+ completionDebounce time.Duration
+ throttleInterval time.Duration
+ lastLLMCall time.Time
+
// Dispatch table for JSON-RPC methods → handler functions
handlers map[string]func(Request)
}
// ServerOptions collects configuration for NewServer to avoid long parameter lists.
type ServerOptions struct {
- LogContext bool
- MaxTokens int
- ContextMode string
- WindowLines int
- MaxContextTokens int
+ LogContext bool
+ MaxTokens int
+ ContextMode string
+ WindowLines int
+ MaxContextTokens int
- Client llm.Client
- TriggerCharacters []string
- CodingTemperature *float64
- ManualInvokeMinPrefix int
+ Client llm.Client
+ TriggerCharacters []string
+ CodingTemperature *float64
+ ManualInvokeMinPrefix int
+ CompletionDebounceMs int
+ CompletionThrottleMs int
}
func NewServer(r io.Reader, w io.Writer, logger *log.Logger, opts ServerOptions) *Server {
@@ -93,9 +100,15 @@ func NewServer(r io.Reader, w io.Writer, logger *log.Logger, opts ServerOptions)
} else {
s.triggerChars = append([]string{}, opts.TriggerCharacters...)
}
- s.codingTemperature = opts.CodingTemperature
- s.compCache = make(map[string]string)
- s.manualInvokeMinPrefix = opts.ManualInvokeMinPrefix
+ s.codingTemperature = opts.CodingTemperature
+ s.compCache = make(map[string]string)
+ s.manualInvokeMinPrefix = opts.ManualInvokeMinPrefix
+ if opts.CompletionDebounceMs > 0 {
+ s.completionDebounce = time.Duration(opts.CompletionDebounceMs) * time.Millisecond
+ }
+ if opts.CompletionThrottleMs > 0 {
+ s.throttleInterval = time.Duration(opts.CompletionThrottleMs) * time.Millisecond
+ }
// Initialize dispatch table
s.handlers = map[string]func(Request){
"initialize": s.handleInitialize,