diff options
| -rw-r--r-- | internal/runtime/runtime.go | 4 | ||||
| -rw-r--r-- | internal/statsengine/engine.go | 62 | ||||
| -rw-r--r-- | internal/statsengine/engine_reset_test.go | 10 | ||||
| -rw-r--r-- | internal/statsengine/engine_test.go | 10 | ||||
| -rw-r--r-- | internal/statsengine/filerank.go | 15 | ||||
| -rw-r--r-- | internal/statsengine/histogram.go | 15 | ||||
| -rw-r--r-- | internal/statsengine/process.go | 15 | ||||
| -rw-r--r-- | internal/statsengine/syscall.go | 15 | ||||
| -rw-r--r-- | internal/tui/dashboard/model.go | 13 | ||||
| -rw-r--r-- | internal/tui/tui.go | 17 | ||||
| -rw-r--r-- | internal/tui/tui_test.go | 13 |
11 files changed, 144 insertions, 45 deletions
diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index d6500d4..e8e278e 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -38,8 +38,10 @@ type EventSink interface { // SnapshotSource provides statsengine snapshots for the TUI dashboard. // The core tracing engine passes a *statsengine.Engine; the TUI stores it // behind this interface so the dashboard can retrieve live snapshots. +// Snapshot returns (nil, nil) when the engine is nil. A non-nil error +// indicates that snapshot construction failed and the result must be discarded. type SnapshotSource interface { - Snapshot() *statsengine.Snapshot + Snapshot() (*statsengine.Snapshot, error) } // Snapshotter is the read-only subset of the trie contract used by consumers diff --git a/internal/statsengine/engine.go b/internal/statsengine/engine.go index 02aee37..f9602a9 100644 --- a/internal/statsengine/engine.go +++ b/internal/statsengine/engine.go @@ -5,6 +5,8 @@ import ( "sync" "time" + "golang.org/x/sync/errgroup" + "ior/internal/event" "ior/internal/types" ) @@ -206,22 +208,45 @@ func (e *Engine) captureSnapshotInputs() snapshotInputs { } // buildSubSnapshots runs all five per-category snapshot builders concurrently -// and returns their results bundled together. -func buildSubSnapshots(in snapshotInputs, elapsed time.Duration) subSnapshots { +// using errgroup so that any error from a sub-builder is captured and returned +// to the caller instead of being silently dropped. +func buildSubSnapshots(in snapshotInputs, elapsed time.Duration) (subSnapshots, error) { var ( ss subSnapshots - wg sync.WaitGroup + eg errgroup.Group ) - wg.Add(5) - go func() { defer wg.Done(); ss.syscalls = buildSyscallSnapshots(in.syscalls, elapsed) }() - go func() { defer wg.Done(); ss.files = buildFileSnapshots(in.files) }() - go func() { defer wg.Done(); ss.processes = buildProcessSnapshots(in.processes, elapsed) }() - go func() { defer wg.Done(); ss.latencyHist = buildHistogramSnapshot(in.latencyHist) }() - go func() { defer wg.Done(); ss.gapHist = buildHistogramSnapshot(in.gapHist) }() - wg.Wait() + eg.Go(func() error { + var err error + ss.syscalls, err = buildSyscallSnapshots(in.syscalls, elapsed) + return err + }) + eg.Go(func() error { + var err error + ss.files, err = buildFileSnapshots(in.files) + return err + }) + eg.Go(func() error { + var err error + ss.processes, err = buildProcessSnapshots(in.processes, elapsed) + return err + }) + eg.Go(func() error { + var err error + ss.latencyHist, err = buildHistogramSnapshot(in.latencyHist) + return err + }) + eg.Go(func() error { + var err error + ss.gapHist, err = buildHistogramSnapshot(in.gapHist) + return err + }) + + if err := eg.Wait(); err != nil { + return subSnapshots{}, err + } - return ss + return ss, nil } // populateSnapshotFields fills in the scalar fields of the snapshot from the @@ -247,15 +272,20 @@ func populateSnapshotFields(snap *Snapshot, in snapshotInputs, elapsed time.Dura // Snapshot returns an immutable point-in-time view of all stats. // It captures engine state under the lock, then builds sub-snapshots -// concurrently, and finally assembles the result without holding the lock. -func (e *Engine) Snapshot() *Snapshot { +// concurrently via errgroup, and finally assembles the result without holding +// the lock. An error is returned if any sub-builder fails. +func (e *Engine) Snapshot() (*Snapshot, error) { if e == nil { - return nil + return nil, nil } in := e.captureSnapshotInputs() elapsed := nonNegativeDuration(in.now.Sub(in.startedAt)) - ss := buildSubSnapshots(in, elapsed) + + ss, err := buildSubSnapshots(in, elapsed) + if err != nil { + return nil, err + } snap := NewSnapshot( in.latencySeries, in.gapSeries, in.throughputSeries, @@ -264,7 +294,7 @@ func (e *Engine) Snapshot() *Snapshot { ) populateSnapshotFields(&snap, in, elapsed) - return &snap + return &snap, nil } func safeMean(total uint64, count uint64) float64 { diff --git a/internal/statsengine/engine_reset_test.go b/internal/statsengine/engine_reset_test.go index e798686..c09c059 100644 --- a/internal/statsengine/engine_reset_test.go +++ b/internal/statsengine/engine_reset_test.go @@ -10,13 +10,19 @@ import ( func TestEngineResetClearsAccumulatedStats(t *testing.T) { e := NewEngine(8) e.Ingest(newEnginePair(types.SYS_ENTER_READ, 7, types.READ_CLASSIFIED, "test", 1, "/tmp/a", 7, 1000, 50)) - before := e.Snapshot() + before, err := e.Snapshot() + if err != nil { + t.Fatalf("unexpected snapshot error: %v", err) + } if before.TotalSyscalls == 0 { t.Fatalf("expected non-zero totals before reset") } e.Reset() - after := e.Snapshot() + after, err := e.Snapshot() + if err != nil { + t.Fatalf("unexpected snapshot error after reset: %v", err) + } if after.TotalSyscalls != 0 || after.TotalBytes != 0 || after.TotalErrors != 0 { t.Fatalf("expected totals cleared after reset, got %+v", after) } diff --git a/internal/statsengine/engine_test.go b/internal/statsengine/engine_test.go index 7ba8c3a..f714844 100644 --- a/internal/statsengine/engine_test.go +++ b/internal/statsengine/engine_test.go @@ -33,7 +33,10 @@ func TestEngineIngestAndSnapshotIntegration(t *testing.T) { engine.Ingest(newEnginePair(types.SYS_ENTER_COPY_FILE_RANGE, 80, types.TRANSFER_CLASSIFIED, "proc-b", 2, "/tmp/b", 20, 40, 8)) clock.Advance(1 * time.Second) - snap := engine.Snapshot() + snap, err := engine.Snapshot() + if err != nil { + t.Fatalf("unexpected snapshot error: %v", err) + } if snap == nil { t.Fatalf("expected snapshot") } @@ -79,7 +82,10 @@ func TestEngineSnapshotWithNoEvents(t *testing.T) { clock := &fakeClock{now: time.Unix(2000, 0)} engine := newEngineWithClock(10, clock.Now) - snap := engine.Snapshot() + snap, err := engine.Snapshot() + if err != nil { + t.Fatalf("unexpected snapshot error: %v", err) + } if snap == nil { t.Fatalf("expected snapshot") } diff --git a/internal/statsengine/filerank.go b/internal/statsengine/filerank.go index ef43c6e..a397381 100644 --- a/internal/statsengine/filerank.go +++ b/internal/statsengine/filerank.go @@ -93,12 +93,18 @@ func (r *fileRanker) Add(pair *event.Pair) { r.compactIfNeeded() } +// Snapshot returns a slice of FileSnapshots for all tracked files. +// It panics on build error, which should never happen for a valid ranker. func (r *fileRanker) Snapshot() []FileSnapshot { if r == nil { return nil } - return buildFileSnapshots(r.snapshotInputs()) + snap, err := buildFileSnapshots(r.snapshotInputs()) + if err != nil { + panic("buildFileSnapshots: " + err.Error()) + } + return snap } func (r *fileRanker) snapshotInputs() []fileSnapshotInput { @@ -119,7 +125,10 @@ func (r *fileRanker) snapshotInputs() []fileSnapshotInput { return inputs } -func buildFileSnapshots(inputs []fileSnapshotInput) []FileSnapshot { +// buildFileSnapshots converts raw file ranker inputs into sorted FileSnapshot +// slices. The error return is reserved for future validation; currently this +// function always succeeds. +func buildFileSnapshots(inputs []fileSnapshotInput) ([]FileSnapshot, error) { out := make([]FileSnapshot, 0, len(inputs)) for _, in := range inputs { out = append(out, in.toSnapshot()) @@ -130,7 +139,7 @@ func buildFileSnapshots(inputs []fileSnapshotInput) []FileSnapshot { } return cmp.Compare(a.Path, b.Path) }) - return out + return out, nil } func (r *fileRanker) addBytes(stats *fileRankStats, pair *event.Pair) { diff --git a/internal/statsengine/histogram.go b/internal/statsengine/histogram.go index 42460ea..4a5e3b4 100644 --- a/internal/statsengine/histogram.go +++ b/internal/statsengine/histogram.go @@ -47,12 +47,18 @@ func (h *histogram) Increment(durationNs uint64) { h.total++ } +// Snapshot returns a HistogramSnapshot of the current histogram state. +// It panics on build error, which should never happen for a valid histogram. func (h *histogram) Snapshot() HistogramSnapshot { if h == nil { return NewHistogramSnapshot(0, nil) } - return buildHistogramSnapshot(h.snapshotInputs()) + snap, err := buildHistogramSnapshot(h.snapshotInputs()) + if err != nil { + panic("buildHistogramSnapshot: " + err.Error()) + } + return snap } func (h *histogram) snapshotInputs() histogramSnapshotInput { @@ -65,7 +71,10 @@ func (h *histogram) snapshotInputs() histogramSnapshotInput { } } -func buildHistogramSnapshot(in histogramSnapshotInput) HistogramSnapshot { +// buildHistogramSnapshot converts a histogramSnapshotInput into a +// HistogramSnapshot. The error return is reserved for future validation; +// currently this function always succeeds. +func buildHistogramSnapshot(in histogramSnapshotInput) (HistogramSnapshot, error) { buckets := make([]HistogramBucketSnapshot, 0, histogramBucketCount) for i := 0; i < histogramBucketCount; i++ { lower, upper := histogramBucketRange(i) @@ -77,7 +86,7 @@ func buildHistogramSnapshot(in histogramSnapshotInput) HistogramSnapshot { }) } - return NewHistogramSnapshot(in.total, buckets) + return NewHistogramSnapshot(in.total, buckets), nil } func histogramBucketIndex(durationNs uint64) int { diff --git a/internal/statsengine/process.go b/internal/statsengine/process.go index b7eb6e7..a52be38 100644 --- a/internal/statsengine/process.go +++ b/internal/statsengine/process.go @@ -84,12 +84,18 @@ func (a *processAccumulator) Add(pair *event.Pair) { a.compactIfNeeded() } +// Snapshot returns a slice of ProcessSnapshots for all tracked processes. +// It panics on build error, which should never happen for a valid accumulator. func (a *processAccumulator) Snapshot(elapsed time.Duration) []ProcessSnapshot { if a == nil { return nil } - return buildProcessSnapshots(a.snapshotInputs(), elapsed) + snap, err := buildProcessSnapshots(a.snapshotInputs(), elapsed) + if err != nil { + panic("buildProcessSnapshots: " + err.Error()) + } + return snap } func (a *processAccumulator) snapshotInputs() []processSnapshotInput { @@ -110,7 +116,10 @@ func (a *processAccumulator) snapshotInputs() []processSnapshotInput { return inputs } -func buildProcessSnapshots(inputs []processSnapshotInput, elapsed time.Duration) []ProcessSnapshot { +// buildProcessSnapshots converts raw process accumulator inputs into sorted +// ProcessSnapshot slices. The error return is reserved for future validation; +// currently this function always succeeds. +func buildProcessSnapshots(inputs []processSnapshotInput, elapsed time.Duration) ([]ProcessSnapshot, error) { rateDiv := elapsed.Seconds() result := make([]ProcessSnapshot, 0, len(inputs)) for _, in := range inputs { @@ -125,7 +134,7 @@ func buildProcessSnapshots(inputs []processSnapshotInput, elapsed time.Duration) } return cmp.Compare(a.PID, b.PID) }) - return result + return result, nil } func (a *processAccumulator) compactIfNeeded() { diff --git a/internal/statsengine/syscall.go b/internal/statsengine/syscall.go index 2ef929a..d58e8c9 100644 --- a/internal/statsengine/syscall.go +++ b/internal/statsengine/syscall.go @@ -99,12 +99,18 @@ func (a *syscallAccumulator) Add(pair *event.Pair) { } } +// Snapshot returns a slice of SyscallSnapshots for all tracked syscalls. +// It panics on build error, which should never happen for a valid accumulator. func (a *syscallAccumulator) Snapshot(elapsed time.Duration) []SyscallSnapshot { if a == nil { return nil } - return buildSyscallSnapshots(a.snapshotInputs(), elapsed) + snap, err := buildSyscallSnapshots(a.snapshotInputs(), elapsed) + if err != nil { + panic("buildSyscallSnapshots: " + err.Error()) + } + return snap } func (a *syscallAccumulator) snapshotInputs() []syscallSnapshotInput { @@ -132,7 +138,10 @@ func (a *syscallAccumulator) snapshotInputs() []syscallSnapshotInput { return inputs } -func buildSyscallSnapshots(inputs []syscallSnapshotInput, elapsed time.Duration) []SyscallSnapshot { +// buildSyscallSnapshots converts raw syscall accumulator inputs into sorted +// SyscallSnapshot slices. The error return is reserved for future validation; +// currently this function always succeeds. +func buildSyscallSnapshots(inputs []syscallSnapshotInput, elapsed time.Duration) ([]SyscallSnapshot, error) { rateDiv := elapsed.Seconds() result := make([]SyscallSnapshot, 0, len(inputs)) for _, in := range inputs { @@ -144,7 +153,7 @@ func buildSyscallSnapshots(inputs []syscallSnapshotInput, elapsed time.Duration) } return cmp.Compare(a.Name, b.Name) }) - return result + return result, nil } func (s *syscallStats) updateMinMax(duration uint64) { diff --git a/internal/tui/dashboard/model.go b/internal/tui/dashboard/model.go index 8a3c5d4..cc2a052 100644 --- a/internal/tui/dashboard/model.go +++ b/internal/tui/dashboard/model.go @@ -874,11 +874,18 @@ func (m Model) maxProcessesRows() int { return m.snapshotOrZero().ProcessesCount() } +// snapshot returns the latest engine snapshot, or nil if the engine is nil or +// returns an error. Errors are silently dropped here because the dashboard +// renders the last successful snapshot on transient failures. func (m Model) snapshot() *statsengine.Snapshot { if m.engine == nil { return nil } - return m.engine.Snapshot() + snap, err := m.engine.Snapshot() + if err != nil { + return nil + } + return snap } func (m Model) snapshotOrZero() statsengine.Snapshot { @@ -893,10 +900,12 @@ func (m *Model) resetBaselineCmd() tea.Cmd { m.liveTrie.Reset() } + // Errors from Snapshot are silently dropped here; the dashboard will + // continue to display the last successful snapshot. var snap *statsengine.Snapshot if resettable, ok := m.engine.(resettableSnapshotSource); ok { resettable.Reset() - snap = resettable.Snapshot() + snap, _ = resettable.Snapshot() } else { snap = m.snapshot() } diff --git a/internal/tui/tui.go b/internal/tui/tui.go index eee4252..2cebaf2 100644 --- a/internal/tui/tui.go +++ b/internal/tui/tui.go @@ -225,6 +225,9 @@ func (r *runtimeBindings) advanceFilterEpoch() uint64 { return r.filterEpoch.Add(1) } +// resetDashboardSnapshotSource resets the dashboard snapshot source if it +// implements the resettable interface, then returns a fresh snapshot. Errors +// from Snapshot are silently dropped since callers handle a nil snapshot. func (r *runtimeBindings) resetDashboardSnapshotSource() *statsengine.Snapshot { src := r.dashboardSnapshotSource() if src == nil { @@ -232,10 +235,11 @@ func (r *runtimeBindings) resetDashboardSnapshotSource() *statsengine.Snapshot { } if resettable, ok := src.(interface { Reset() - Snapshot() *statsengine.Snapshot + Snapshot() (*statsengine.Snapshot, error) }); ok { resettable.Reset() - return resettable.Snapshot() + snap, _ := resettable.Snapshot() + return snap } return nil } @@ -1216,13 +1220,16 @@ type lateBoundDashboardSource struct { runtime *runtimeBindings } -func (s lateBoundDashboardSource) Snapshot() *statsengine.Snapshot { +// Snapshot returns a point-in-time dashboard snapshot from the underlying +// source, or (nil, nil) when no source is available. Errors are forwarded to +// the caller so they can decide how to handle a failed snapshot build. +func (s lateBoundDashboardSource) Snapshot() (*statsengine.Snapshot, error) { if s.runtime == nil { - return nil + return nil, nil } source := s.runtime.dashboardSnapshotSource() if source == nil { - return nil + return nil, nil } return source.Snapshot() } diff --git a/internal/tui/tui_test.go b/internal/tui/tui_test.go index 5f007e4..d76ccde 100644 --- a/internal/tui/tui_test.go +++ b/internal/tui/tui_test.go @@ -459,8 +459,8 @@ type fakeDashboardSource struct { snap *statsengine.Snapshot } -func (f fakeDashboardSource) Snapshot() *statsengine.Snapshot { - return f.snap +func (f fakeDashboardSource) Snapshot() (*statsengine.Snapshot, error) { + return f.snap, nil } type fakeResettableDashboardSource struct { @@ -468,8 +468,8 @@ type fakeResettableDashboardSource struct { resetCalls int } -func (f *fakeResettableDashboardSource) Snapshot() *statsengine.Snapshot { - return f.snap +func (f *fakeResettableDashboardSource) Snapshot() (*statsengine.Snapshot, error) { + return f.snap, nil } func (f *fakeResettableDashboardSource) Reset() { @@ -484,7 +484,10 @@ func TestDashboardRefreshPicksLateBoundSource(t *testing.T) { want := &statsengine.Snapshot{TotalSyscalls: 77} runtime.SetDashboardSnapshotSource(fakeDashboardSource{snap: want}) - got := source.Snapshot() + got, err := source.Snapshot() + if err != nil { + t.Fatalf("unexpected snapshot error: %v", err) + } if got != want { t.Fatalf("expected late-bound source to use latest runtime source") } |
