summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-05-13 09:58:13 +0300
committerPaul Buetow <paul@buetow.org>2026-05-13 09:58:13 +0300
commit7dd508503595b023de03d78de487b4f52c7a98f0 (patch)
tree37a84992aca5710710944a2773d4ff6d9b53bacd /internal
parent8b586811571a9a3935a73deb47e6f37bb0c9bcbf (diff)
fix: add 30s startup timeout to startTraceCmd to prevent indefinite hang on BPF attach failure
If kernel lock contention or another issue causes BPF probe attachment to stall, the TUI previously remained in the 'Attaching tracepoints...' spinner state forever. startTraceCmdWithTimeout now races the starter goroutine against a configurable deadline (defaultStartupTimeout = 30s) and returns a TracingErrorMsg with a clear message when the deadline expires. The stuck goroutine is cleaned up when the caller cancels the trace context on the next user action (e.g. traceLifecycle.stop). Two new tests cover the timeout and context-cancel paths. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal')
-rw-r--r--internal/tui/tracelifecycle.go45
-rw-r--r--internal/tui/tui_test.go52
2 files changed, 91 insertions, 6 deletions
diff --git a/internal/tui/tracelifecycle.go b/internal/tui/tracelifecycle.go
index 5877cee..0117169 100644
--- a/internal/tui/tracelifecycle.go
+++ b/internal/tui/tracelifecycle.go
@@ -51,18 +51,51 @@ func (t *traceLifecycle) stop() {
}
}
+// defaultStartupTimeout is the maximum time allowed for BPF probe attachment.
+// If the trace starter does not return within this window the TUI surfaces
+// a TracingErrorMsg instead of spinning in the "Attaching tracepoints..."
+// state indefinitely. The stuck goroutine is left running until the caller
+// cancels the trace context (e.g. via traceLifecycle.stop on the next
+// user action) so no goroutine is leaked permanently.
+const defaultStartupTimeout = 30 * time.Second
+
// startTraceCmd wraps a TraceStarter in a tea.Cmd that handles context
// cancellation gracefully (returns nil so the caller does not treat a
-// user-initiated stop as an error).
+// user-initiated stop as an error). It uses defaultStartupTimeout to
+// prevent the TUI from hanging indefinitely when BPF probe attachment stalls.
func startTraceCmd(starter TraceStarter, ctx context.Context) tea.Cmd {
+ return startTraceCmdWithTimeout(starter, ctx, defaultStartupTimeout)
+}
+
+// startTraceCmdWithTimeout is the testable core of startTraceCmd. It races
+// the starter goroutine against a caller-supplied timeout so that tests can
+// use a short deadline without waiting 30 seconds.
+func startTraceCmdWithTimeout(starter TraceStarter, ctx context.Context, timeout time.Duration) tea.Cmd {
return func() tea.Msg {
- if err := starter(ctx); err != nil {
- if errors.Is(err, context.Canceled) {
- return nil
+ type starterResult struct{ err error }
+ ch := make(chan starterResult, 1)
+ go func() {
+ err := starter(ctx)
+ ch <- starterResult{err: err}
+ }()
+ select {
+ case res := <-ch:
+ if res.err != nil {
+ if errors.Is(res.err, context.Canceled) {
+ return nil
+ }
+ return TracingErrorMsg{Err: res.err}
}
- return TracingErrorMsg{Err: err}
+ return TracingStartedMsg{}
+ case <-time.After(timeout):
+ // BPF probe attachment did not complete in time. The stuck
+ // goroutine will be cleaned up when the caller cancels ctx
+ // (e.g. on the next traceLifecycle.stop call).
+ return TracingErrorMsg{Err: fmt.Errorf(
+ "trace startup timed out after %s: BPF probe attachment did not complete",
+ timeout,
+ )}
}
- return TracingStartedMsg{}
}
}
diff --git a/internal/tui/tui_test.go b/internal/tui/tui_test.go
index f0d4c2f..ba0f8ed 100644
--- a/internal/tui/tui_test.go
+++ b/internal/tui/tui_test.go
@@ -221,6 +221,58 @@ func TestStartTraceCmdEmitsErrorMsg(t *testing.T) {
}
}
+// TestStartTraceCmdTimeoutEmitsErrorMsg verifies that a starter that never
+// returns causes startTraceCmdWithTimeout to surface a TracingErrorMsg once
+// the deadline expires, rather than blocking the TUI indefinitely.
+func TestStartTraceCmdTimeoutEmitsErrorMsg(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ // Starter that blocks until ctx is cancelled, simulating a hung BPF attach.
+ blocker := func(ctx context.Context) error {
+ <-ctx.Done()
+ return ctx.Err()
+ }
+
+ // Use a short timeout so the test finishes quickly.
+ cmd := startTraceCmdWithTimeout(blocker, ctx, 50*time.Millisecond)
+ msg := cmd()
+
+ traceErr, ok := msg.(TracingErrorMsg)
+ if !ok {
+ t.Fatalf("expected TracingErrorMsg on timeout, got %T", msg)
+ }
+ if traceErr.Err == nil {
+ t.Fatal("expected non-nil error in TracingErrorMsg")
+ }
+ if !strings.Contains(traceErr.Err.Error(), "timed out") {
+ t.Fatalf("expected timeout message, got: %v", traceErr.Err)
+ }
+}
+
+// TestStartTraceCmdContextCancelledBeforeTimeoutReturnsNil verifies that
+// cancelling ctx before the timeout fires is treated as a user-initiated stop
+// (returns nil, not an error).
+func TestStartTraceCmdContextCancelledBeforeTimeoutReturnsNil(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+
+ // Starter that blocks until ctx is cancelled.
+ blocker := func(ctx context.Context) error {
+ <-ctx.Done()
+ return ctx.Err()
+ }
+
+ // Cancel ctx immediately so the starter exits before the timeout.
+ cancel()
+
+ cmd := startTraceCmdWithTimeout(blocker, ctx, 5*time.Second)
+ msg := cmd()
+
+ if msg != nil {
+ t.Fatalf("expected nil msg on context cancel, got %T: %v", msg, msg)
+ }
+}
+
func TestQuitInvokesTraceStop(t *testing.T) {
m := NewModel(-1, func(context.Context) error { return nil })
m.screen = ScreenDashboard