diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-13 14:41:28 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-13 14:41:28 +0300 |
| commit | de3405c275898c8cd528a636dbd40e1b685cfaa5 (patch) | |
| tree | 53711959800677aa694e18331476add24a21b8ef /internal/statsengine/engine.go | |
| parent | d392eebe5bd127e1573734321b0cabaad4182d7c (diff) | |
use errgroup instead of WaitGroup for concurrent snapshot builders
Replace sync.WaitGroup with errgroup.Group in buildSubSnapshots so errors from
sub-builders (buildSyscallSnapshots, buildFileSnapshots, buildProcessSnapshots,
buildHistogramSnapshot) are captured and propagated rather than silently dropped.
Change Engine.Snapshot() to return (*Snapshot, error), update runtime.SnapshotSource
and dashboard.SnapshotSource interfaces accordingly, and adjust all callers in
tui.go, dashboard/model.go, and the test helpers.
Each sub-builder now returns (result, error); the error return is currently
always nil but establishes the contract for future validation. The per-type
Snapshot() convenience methods (histogram, syscall, file, process) panic on
error since they are internal helpers where failure would be a programming bug.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal/statsengine/engine.go')
| -rw-r--r-- | internal/statsengine/engine.go | 62 |
1 files changed, 46 insertions, 16 deletions
diff --git a/internal/statsengine/engine.go b/internal/statsengine/engine.go index 02aee37..f9602a9 100644 --- a/internal/statsengine/engine.go +++ b/internal/statsengine/engine.go @@ -5,6 +5,8 @@ import ( "sync" "time" + "golang.org/x/sync/errgroup" + "ior/internal/event" "ior/internal/types" ) @@ -206,22 +208,45 @@ func (e *Engine) captureSnapshotInputs() snapshotInputs { } // buildSubSnapshots runs all five per-category snapshot builders concurrently -// and returns their results bundled together. -func buildSubSnapshots(in snapshotInputs, elapsed time.Duration) subSnapshots { +// using errgroup so that any error from a sub-builder is captured and returned +// to the caller instead of being silently dropped. +func buildSubSnapshots(in snapshotInputs, elapsed time.Duration) (subSnapshots, error) { var ( ss subSnapshots - wg sync.WaitGroup + eg errgroup.Group ) - wg.Add(5) - go func() { defer wg.Done(); ss.syscalls = buildSyscallSnapshots(in.syscalls, elapsed) }() - go func() { defer wg.Done(); ss.files = buildFileSnapshots(in.files) }() - go func() { defer wg.Done(); ss.processes = buildProcessSnapshots(in.processes, elapsed) }() - go func() { defer wg.Done(); ss.latencyHist = buildHistogramSnapshot(in.latencyHist) }() - go func() { defer wg.Done(); ss.gapHist = buildHistogramSnapshot(in.gapHist) }() - wg.Wait() + eg.Go(func() error { + var err error + ss.syscalls, err = buildSyscallSnapshots(in.syscalls, elapsed) + return err + }) + eg.Go(func() error { + var err error + ss.files, err = buildFileSnapshots(in.files) + return err + }) + eg.Go(func() error { + var err error + ss.processes, err = buildProcessSnapshots(in.processes, elapsed) + return err + }) + eg.Go(func() error { + var err error + ss.latencyHist, err = buildHistogramSnapshot(in.latencyHist) + return err + }) + eg.Go(func() error { + var err error + ss.gapHist, err = buildHistogramSnapshot(in.gapHist) + return err + }) + + if err := eg.Wait(); err != nil { + return subSnapshots{}, err + } - return ss + return ss, nil } // populateSnapshotFields fills in the scalar fields of the snapshot from the @@ -247,15 +272,20 @@ func populateSnapshotFields(snap *Snapshot, in snapshotInputs, elapsed time.Dura // Snapshot returns an immutable point-in-time view of all stats. // It captures engine state under the lock, then builds sub-snapshots -// concurrently, and finally assembles the result without holding the lock. -func (e *Engine) Snapshot() *Snapshot { +// concurrently via errgroup, and finally assembles the result without holding +// the lock. An error is returned if any sub-builder fails. +func (e *Engine) Snapshot() (*Snapshot, error) { if e == nil { - return nil + return nil, nil } in := e.captureSnapshotInputs() elapsed := nonNegativeDuration(in.now.Sub(in.startedAt)) - ss := buildSubSnapshots(in, elapsed) + + ss, err := buildSubSnapshots(in, elapsed) + if err != nil { + return nil, err + } snap := NewSnapshot( in.latencySeries, in.gapSeries, in.throughputSeries, @@ -264,7 +294,7 @@ func (e *Engine) Snapshot() *Snapshot { ) populateSnapshotFields(&snap, in, elapsed) - return &snap + return &snap, nil } func safeMean(total uint64, count uint64) float64 { |
