diff options
Diffstat (limited to 'internal/statsengine')
| -rw-r--r-- | internal/statsengine/bench_test.go | 29 | ||||
| -rw-r--r-- | internal/statsengine/syscall.go | 79 | ||||
| -rw-r--r-- | internal/statsengine/syscall_test.go | 37 |
3 files changed, 122 insertions, 23 deletions
diff --git a/internal/statsengine/bench_test.go b/internal/statsengine/bench_test.go new file mode 100644 index 0000000..27f17b1 --- /dev/null +++ b/internal/statsengine/bench_test.go @@ -0,0 +1,29 @@ +package statsengine + +import ( + "ior/internal/types" + "math/rand" + "testing" + "time" +) + +func BenchmarkSyscallAccumulatorSnapshot(b *testing.B) { + acc := newSyscallAccumulatorWithConfig(10_000, rand.New(rand.NewSource(123))) + traceIDs := []types.TraceId{ + types.SYS_ENTER_READ, + types.SYS_ENTER_WRITE, + types.SYS_ENTER_OPENAT, + types.SYS_ENTER_CLOSE, + types.SYS_ENTER_COPY_FILE_RANGE, + } + for i := 0; i < 100_000; i++ { + id := traceIDs[i%len(traceIDs)] + acc.Add(newPair(id, uint64((i%2000)+1), uint64(i%65536), 0)) + } + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = acc.Snapshot(5 * time.Second) + } +} diff --git a/internal/statsengine/syscall.go b/internal/statsengine/syscall.go index fe54cb4..6c34f4a 100644 --- a/internal/statsengine/syscall.go +++ b/internal/statsengine/syscall.go @@ -10,6 +10,7 @@ import ( ) const syscallReservoirSampleCapDefault = 10_000 +const syscallPercentileRecomputeStepDefault = 256 type syscallAccumulator struct { byID map[types.TraceId]*syscallStats @@ -30,18 +31,26 @@ type syscallStats struct { seenLatencies uint64 samples []uint64 + + sampleVersion uint64 + lastPercentileVersion uint64 + cachedP50 uint64 + cachedP95 uint64 + cachedP99 uint64 } type syscallSnapshotInput struct { - traceID types.TraceId - name string - count uint64 - errorCount uint64 - totalBytes uint64 - totalLatency uint64 - minLatency uint64 - maxLatency uint64 - sortedSamples []uint64 + traceID types.TraceId + name string + count uint64 + errorCount uint64 + totalBytes uint64 + totalLatency uint64 + minLatency uint64 + maxLatency uint64 + p50Latency uint64 + p95Latency uint64 + p99Latency uint64 } func newSyscallAccumulator() *syscallAccumulator { @@ -101,18 +110,19 @@ func (a *syscallAccumulator) snapshotInputs() []syscallSnapshotInput { inputs := make([]syscallSnapshotInput, 0, len(a.byID)) for _, stats := range a.byID { - sorted := append([]uint64(nil), stats.samples...) - sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + stats.ensurePercentiles() inputs = append(inputs, syscallSnapshotInput{ - traceID: stats.traceID, - name: stats.name, - count: stats.count, - errorCount: stats.errorCount, - totalBytes: stats.totalBytes, - totalLatency: stats.totalLatency, - minLatency: stats.minLatency, - maxLatency: stats.maxLatency, - sortedSamples: sorted, + traceID: stats.traceID, + name: stats.name, + count: stats.count, + errorCount: stats.errorCount, + totalBytes: stats.totalBytes, + totalLatency: stats.totalLatency, + minLatency: stats.minLatency, + maxLatency: stats.maxLatency, + p50Latency: stats.cachedP50, + p95Latency: stats.cachedP95, + p99Latency: stats.cachedP99, }) } return inputs @@ -146,6 +156,7 @@ func (s *syscallStats) addSample(duration uint64, cap int, rng *rand.Rand) { s.seenLatencies++ if len(s.samples) < cap { s.samples = append(s.samples, duration) + s.sampleVersion++ return } @@ -154,6 +165,28 @@ func (s *syscallStats) addSample(duration uint64, cap int, rng *rand.Rand) { return } s.samples[idx] = duration + s.sampleVersion++ +} + +func (s *syscallStats) ensurePercentiles() { + if s.lastPercentileVersion == s.sampleVersion { + return + } + if s.lastPercentileVersion != 0 && s.sampleVersion-s.lastPercentileVersion < syscallPercentileRecomputeStepDefault { + return + } + if len(s.samples) == 0 { + s.cachedP50, s.cachedP95, s.cachedP99 = 0, 0, 0 + s.lastPercentileVersion = s.sampleVersion + return + } + + sorted := append([]uint64(nil), s.samples...) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + s.cachedP50 = samplePercentile(sorted, 0.50) + s.cachedP95 = samplePercentile(sorted, 0.95) + s.cachedP99 = samplePercentile(sorted, 0.99) + s.lastPercentileVersion = s.sampleVersion } func (s syscallSnapshotInput) toSnapshot(rateDiv float64) SyscallSnapshot { @@ -167,9 +200,9 @@ func (s syscallSnapshotInput) toSnapshot(rateDiv float64) SyscallSnapshot { LatencyMinNs: s.minLatency, LatencyMaxNs: s.maxLatency, LatencyMeanNs: float64(s.totalLatency) / float64(maxU64(s.count, 1)), - LatencyP50Ns: samplePercentile(s.sortedSamples, 0.50), - LatencyP95Ns: samplePercentile(s.sortedSamples, 0.95), - LatencyP99Ns: samplePercentile(s.sortedSamples, 0.99), + LatencyP50Ns: s.p50Latency, + LatencyP95Ns: s.p95Latency, + LatencyP99Ns: s.p99Latency, } } diff --git a/internal/statsengine/syscall_test.go b/internal/statsengine/syscall_test.go index ad29026..1ebe214 100644 --- a/internal/statsengine/syscall_test.go +++ b/internal/statsengine/syscall_test.go @@ -106,6 +106,43 @@ func TestSyscallAccumulatorZeroElapsedRate(t *testing.T) { } } +func TestSyscallAccumulatorPercentilesRecomputeAfterThreshold(t *testing.T) { + acc := newSyscallAccumulatorWithConfig(10_000, rand.New(rand.NewSource(11))) + traceID := types.SYS_ENTER_READ + + for i := 1; i <= 1000; i++ { + acc.Add(newPair(traceID, uint64(i), 0, 0)) + } + _ = acc.Snapshot(1 * time.Second) + + stats := acc.byID[traceID] + if stats == nil { + t.Fatalf("expected syscall stats") + } + initialVersion := stats.lastPercentileVersion + initialP50 := stats.cachedP50 + if initialVersion == 0 { + t.Fatalf("expected initial percentile computation") + } + + acc.Add(newPair(traceID, 50000, 0, 0)) + _ = acc.Snapshot(1 * time.Second) + if stats.lastPercentileVersion != initialVersion { + t.Fatalf("expected percentile recompute to be deferred; got %d want %d", stats.lastPercentileVersion, initialVersion) + } + if stats.cachedP50 != initialP50 { + t.Fatalf("expected cached p50 to remain unchanged before threshold") + } + + for i := 0; i < syscallPercentileRecomputeStepDefault; i++ { + acc.Add(newPair(traceID, uint64(60000+i), 0, 0)) + } + _ = acc.Snapshot(1 * time.Second) + if stats.lastPercentileVersion != stats.sampleVersion { + t.Fatalf("expected percentile recompute after threshold, last=%d sample=%d", stats.lastPercentileVersion, stats.sampleVersion) + } +} + func newPair(traceID types.TraceId, duration uint64, bytes uint64, ret int64) *event.Pair { return &event.Pair{ EnterEv: &types.RetEvent{TraceId: traceID}, |
