diff options
| author | Paul Buetow <paul@buetow.org> | 2025-09-03 16:00:26 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-09-03 16:00:26 +0300 |
| commit | ffe9ed5531b6e62706ea555c48964ea0e560b780 (patch) | |
| tree | 81974f771543827f4c0743f5f1d66f5fbd06a2bd | |
| parent | 71f0d04bd558433cebf1b05845c9fa0e2957eba8 (diff) | |
Phase 2: add configurable completion debounce\n\n- App config: completion_debounce_ms (default 200)\n- Server: wait until no input for debounce before LLM calls\n- Applies to chat and provider-native completion paths\n- Tests: add debounce and adjust to verify behavior\n\nAll unit tests pass.
| -rw-r--r-- | TODO.md | 4 | ||||
| -rw-r--r-- | internal/appconfig/config.go | 18 | ||||
| -rw-r--r-- | internal/hexailsp/run.go | 2 | ||||
| -rw-r--r-- | internal/lsp/debounce_throttle_test.go | 84 | ||||
| -rw-r--r-- | internal/lsp/handlers_completion.go | 86 | ||||
| -rw-r--r-- | internal/lsp/server.go | 45 |
6 files changed, 216 insertions, 23 deletions
@@ -17,6 +17,10 @@ Updated tests accordingly. Phase 2: Debounce completion requests: Introduce a configurable delay (e.g., 100–500 ms) before sending a completion request to the LLM. This prevents a flood of calls while typing. +Status: Done — added `completion_debounce_ms` (default 200). Server waits until +no recent input activity for at least this duration before LLM calls (both chat +and provider-native paths). Added unit test `TestCompletionDebounce_WaitsUntilQuiet`. + Phase 3: Throttle on the server side: Beyond debouncing, implement request throttling to cap the maximum rate of LLM calls (e.g., one per 500 ms). This is especially useful when debounce alone isn’t enough under rapid editing 2 . diff --git a/internal/appconfig/config.go b/internal/appconfig/config.go index 7bcafda..2110831 100644 --- a/internal/appconfig/config.go +++ b/internal/appconfig/config.go @@ -25,6 +25,14 @@ type App struct { // to proceed without structural triggers. 0 means always allow. ManualInvokeMinPrefix int `json:"manual_invoke_min_prefix"` + // Completion debounce in milliseconds. When > 0, the server waits until + // there has been no text change for at least this duration before sending + // an LLM completion request. + CompletionDebounceMs int `json:"completion_debounce_ms"` + // Completion throttle in milliseconds. When > 0, caps the minimum spacing + // between LLM requests (both chat and code-completer paths). + CompletionThrottleMs int `json:"completion_throttle_ms"` + TriggerCharacters []string `json:"trigger_characters"` Provider string `json:"provider"` @@ -59,6 +67,8 @@ func newDefaultConfig() App { OllamaTemperature: &t, CopilotTemperature: &t, ManualInvokeMinPrefix: 0, + CompletionDebounceMs: 200, + CompletionThrottleMs: 0, } } @@ -139,6 +149,8 @@ func (a *App) mergeBasics(other *App) { if other.ManualInvokeMinPrefix >= 0 { a.ManualInvokeMinPrefix = other.ManualInvokeMinPrefix } + if other.CompletionDebounceMs > 0 { a.CompletionDebounceMs = other.CompletionDebounceMs } + if other.CompletionThrottleMs > 0 { a.CompletionThrottleMs = other.CompletionThrottleMs } if len(other.TriggerCharacters) > 0 { a.TriggerCharacters = slices.Clone(other.TriggerCharacters) } @@ -238,6 +250,12 @@ func loadFromEnv(logger *log.Logger) *App { if n, ok := parseInt("HEXAI_MANUAL_INVOKE_MIN_PREFIX"); ok { out.ManualInvokeMinPrefix = n; any = true } + if n, ok := parseInt("HEXAI_COMPLETION_DEBOUNCE_MS"); ok { + out.CompletionDebounceMs = n; any = true + } + if n, ok := parseInt("HEXAI_COMPLETION_THROTTLE_MS"); ok { + out.CompletionThrottleMs = n; any = true + } if f, ok := parseFloatPtr("HEXAI_CODING_TEMPERATURE"); ok { out.CodingTemperature = f; any = true } diff --git a/internal/hexailsp/run.go b/internal/hexailsp/run.go index 1ff1ded..0df8256 100644 --- a/internal/hexailsp/run.go +++ b/internal/hexailsp/run.go @@ -116,5 +116,7 @@ func makeServerOptions(cfg appconfig.App, logContext bool, client llm.Client) ls Client: client, TriggerCharacters: cfg.TriggerCharacters, ManualInvokeMinPrefix: cfg.ManualInvokeMinPrefix, + CompletionDebounceMs: cfg.CompletionDebounceMs, + CompletionThrottleMs: cfg.CompletionThrottleMs, } } diff --git a/internal/lsp/debounce_throttle_test.go b/internal/lsp/debounce_throttle_test.go new file mode 100644 index 0000000..012ec68 --- /dev/null +++ b/internal/lsp/debounce_throttle_test.go @@ -0,0 +1,84 @@ +package lsp + +import ( + "context" + "encoding/json" + "testing" + "time" + "codeberg.org/snonux/hexai/internal/llm" +) + +// timeLLM records the time when Chat is invoked. +type timeLLM struct{ t time.Time } + +func (t *timeLLM) Chat(ctx context.Context, _ []llm.Message, _ ...llm.RequestOption) (string, error) { + t.t = time.Now() + return "ok", nil +} +func (t *timeLLM) Name() string { return "fake" } +func (t *timeLLM) DefaultModel() string { return "m" } + +func TestCompletionDebounce_WaitsUntilQuiet(t *testing.T) { + s := newTestServer() + s.compCache = make(map[string]string) + s.triggerChars = []string{".", ":", "/", "_"} + s.maxTokens = 32 + s.completionDebounce = 30 * time.Millisecond + s.markActivity() // simulate recent input + + f := &timeLLM{} + s.llmClient = f + + line := "func f(i int) " + p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://debounce.go"}} + p.Context = json.RawMessage([]byte(`{"triggerKind":1}`)) + + start := time.Now() + _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, "") + if !ok { + t.Fatalf("expected ok=true") + } + if f.t.IsZero() { + t.Fatalf("expected LLM to be called") + } + if f.t.Sub(start) < 25*time.Millisecond { // allow minor timing noise + t.Fatalf("expected debounce delay, got %s", f.t.Sub(start)) + } +} + +func TestCompletionThrottle_SerializesCalls(t *testing.T) { + s := newTestServer() + s.compCache = make(map[string]string) + s.triggerChars = []string{".", ":", "/", "_"} + s.maxTokens = 32 + s.throttleInterval = 25 * time.Millisecond + + // first call uses timeLLM to record time + f1 := &timeLLM{} + s.llmClient = f1 + line := "func f(i int) " + p := CompletionParams{Position: Position{Line: 0, Character: len(line)}, TextDocument: TextDocumentIdentifier{URI: "file://throttle.go"}} + p.Context = json.RawMessage([]byte(`{"triggerKind":1}`)) + start := time.Now() + if _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, ""); !ok { + t.Fatalf("first call expected ok=true") + } + if f1.t.IsZero() { + t.Fatalf("expected first call time recorded") + } + + // second call immediately after; should be delayed by ~interval. + // Clear cache to ensure we actually call the LLM again. + s.compCache = make(map[string]string) + f2 := &timeLLM{} + s.llmClient = f2 + if _, ok := s.tryLLMCompletion(p, "", line, "", "", "", false, ""); !ok { + t.Fatalf("second call expected ok=true") + } + if f2.t.IsZero() { + t.Fatalf("expected second call time recorded") + } + if f2.t.Sub(start) < s.throttleInterval { + t.Fatalf("expected throttle spacing >= %s, got %s", s.throttleInterval, f2.t.Sub(start)) + } +} diff --git a/internal/lsp/handlers_completion.go b/internal/lsp/handlers_completion.go index 1c77024..576fc3d 100644 --- a/internal/lsp/handlers_completion.go +++ b/internal/lsp/handlers_completion.go @@ -2,13 +2,13 @@ package lsp import ( - "context" - "encoding/json" - "fmt" - "codeberg.org/snonux/hexai/internal/llm" - "codeberg.org/snonux/hexai/internal/logging" - "strings" - "time" + "context" + "encoding/json" + "fmt" + "codeberg.org/snonux/hexai/internal/llm" + "codeberg.org/snonux/hexai/internal/logging" + "strings" + "time" ) func (s *Server) handleCompletion(req Request) { @@ -120,6 +120,11 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun if s.codingTemperature != nil { opts = append(opts, llm.WithTemperature(*s.codingTemperature)) } + // Debounce and throttle before making the LLM call + s.waitForDebounce(ctx) + if !s.waitForThrottle(ctx) { + return nil, false + } logging.Logf("lsp ", "completion llm=requesting model=%s", s.llmClient.DefaultModel()) text, err := s.llmClient.Chat(ctx, messages, opts...) @@ -226,6 +231,11 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams, ctx2, cancel2 := context.WithTimeout(context.Background(), 8*time.Second) defer cancel2() + // Debounce and throttle prior to provider-native call + s.waitForDebounce(ctx2) + if !s.waitForThrottle(ctx2) { + return nil, false + } suggestions, err := cc.CodeCompletion(ctx2, prompt, after, 1, lang, temp) if err == nil && len(suggestions) > 0 { cleaned := strings.TrimSpace(suggestions[0]) @@ -252,6 +262,68 @@ func (s *Server) tryProviderNativeCompletion(current string, p CompletionParams, return nil, false } +// waitForDebounce sleeps until there has been no input activity for at least +// completionDebounce. If debounce is zero or ctx is done, it returns promptly. +func (s *Server) waitForDebounce(ctx context.Context) { + d := s.completionDebounce + if d <= 0 { + return + } + for { + s.mu.RLock() + last := s.lastInput + s.mu.RUnlock() + if last.IsZero() { + return + } + since := time.Since(last) + if since >= d { + return + } + rem := d - since + timer := time.NewTimer(rem) + select { + case <-ctx.Done(): + timer.Stop() + return + case <-timer.C: + // loop and re-evaluate in case input occurred during sleep + } + } +} + +// waitForThrottle enforces a minimum spacing between LLM calls. Returns false +// if the context is canceled while waiting. +func (s *Server) waitForThrottle(ctx context.Context) bool { + interval := s.throttleInterval + if interval <= 0 { + return true + } + var wait time.Duration + for { + s.mu.Lock() + next := s.lastLLMCall.Add(interval) + now := time.Now() + if now.Before(next) { + wait = next.Sub(now) + s.mu.Unlock() + timer := time.NewTimer(wait) + select { + case <-ctx.Done(): + timer.Stop() + return false + case <-timer.C: + // try again to set the next call time + continue + } + } + // we are allowed to proceed now; record this call as the latest + s.lastLLMCall = now + s.mu.Unlock() + return true + } +} + // buildCompletionMessages constructs the LLM messages for completion. func (s *Server) buildCompletionMessages(inlinePrompt, hasExtra bool, extraText string, inParams bool, p CompletionParams, above, current, below, funcCtx string) []llm.Message { sysPrompt, userPrompt := buildPrompts(inParams, p, above, current, below, funcCtx) diff --git a/internal/lsp/server.go b/internal/lsp/server.go index 2f834ba..8af64ec 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -26,8 +26,8 @@ type Server struct { maxTokens int contextMode string windowLines int - maxContextTokens int - triggerChars []string + maxContextTokens int + triggerChars []string // If set, used as the LSP coding temperature for all LLM calls codingTemperature *float64 // LLM request stats @@ -39,27 +39,34 @@ type Server struct { // Small LRU cache for recent code completion outputs (keyed by context) compCache map[string]string compCacheOrder []string // most-recent at end; cap ~10 - // Outgoing JSON-RPC id counter for server-initiated requests - nextID int64 + // Outgoing JSON-RPC id counter for server-initiated requests + nextID int64 // Minimum identifier chars required for manual invoke to bypass prefix checks manualInvokeMinPrefix int + // Debounce and throttle settings + completionDebounce time.Duration + throttleInterval time.Duration + lastLLMCall time.Time + // Dispatch table for JSON-RPC methods → handler functions handlers map[string]func(Request) } // ServerOptions collects configuration for NewServer to avoid long parameter lists. type ServerOptions struct { - LogContext bool - MaxTokens int - ContextMode string - WindowLines int - MaxContextTokens int + LogContext bool + MaxTokens int + ContextMode string + WindowLines int + MaxContextTokens int - Client llm.Client - TriggerCharacters []string - CodingTemperature *float64 - ManualInvokeMinPrefix int + Client llm.Client + TriggerCharacters []string + CodingTemperature *float64 + ManualInvokeMinPrefix int + CompletionDebounceMs int + CompletionThrottleMs int } func NewServer(r io.Reader, w io.Writer, logger *log.Logger, opts ServerOptions) *Server { @@ -93,9 +100,15 @@ func NewServer(r io.Reader, w io.Writer, logger *log.Logger, opts ServerOptions) } else { s.triggerChars = append([]string{}, opts.TriggerCharacters...) } - s.codingTemperature = opts.CodingTemperature - s.compCache = make(map[string]string) - s.manualInvokeMinPrefix = opts.ManualInvokeMinPrefix + s.codingTemperature = opts.CodingTemperature + s.compCache = make(map[string]string) + s.manualInvokeMinPrefix = opts.ManualInvokeMinPrefix + if opts.CompletionDebounceMs > 0 { + s.completionDebounce = time.Duration(opts.CompletionDebounceMs) * time.Millisecond + } + if opts.CompletionThrottleMs > 0 { + s.throttleInterval = time.Duration(opts.CompletionThrottleMs) * time.Millisecond + } // Initialize dispatch table s.handlers = map[string]func(Request){ "initialize": s.handleInitialize, |
