summaryrefslogtreecommitdiff
path: root/internal/statsengine
diff options
context:
space:
mode:
Diffstat (limited to 'internal/statsengine')
-rw-r--r--internal/statsengine/bench_test.go29
-rw-r--r--internal/statsengine/syscall.go79
-rw-r--r--internal/statsengine/syscall_test.go37
3 files changed, 122 insertions, 23 deletions
diff --git a/internal/statsengine/bench_test.go b/internal/statsengine/bench_test.go
new file mode 100644
index 0000000..27f17b1
--- /dev/null
+++ b/internal/statsengine/bench_test.go
@@ -0,0 +1,29 @@
+package statsengine
+
+import (
+ "ior/internal/types"
+ "math/rand"
+ "testing"
+ "time"
+)
+
+func BenchmarkSyscallAccumulatorSnapshot(b *testing.B) {
+ acc := newSyscallAccumulatorWithConfig(10_000, rand.New(rand.NewSource(123)))
+ traceIDs := []types.TraceId{
+ types.SYS_ENTER_READ,
+ types.SYS_ENTER_WRITE,
+ types.SYS_ENTER_OPENAT,
+ types.SYS_ENTER_CLOSE,
+ types.SYS_ENTER_COPY_FILE_RANGE,
+ }
+ for i := 0; i < 100_000; i++ {
+ id := traceIDs[i%len(traceIDs)]
+ acc.Add(newPair(id, uint64((i%2000)+1), uint64(i%65536), 0))
+ }
+
+ b.ReportAllocs()
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _ = acc.Snapshot(5 * time.Second)
+ }
+}
diff --git a/internal/statsengine/syscall.go b/internal/statsengine/syscall.go
index fe54cb4..6c34f4a 100644
--- a/internal/statsengine/syscall.go
+++ b/internal/statsengine/syscall.go
@@ -10,6 +10,7 @@ import (
)
const syscallReservoirSampleCapDefault = 10_000
+const syscallPercentileRecomputeStepDefault = 256
type syscallAccumulator struct {
byID map[types.TraceId]*syscallStats
@@ -30,18 +31,26 @@ type syscallStats struct {
seenLatencies uint64
samples []uint64
+
+ sampleVersion uint64
+ lastPercentileVersion uint64
+ cachedP50 uint64
+ cachedP95 uint64
+ cachedP99 uint64
}
type syscallSnapshotInput struct {
- traceID types.TraceId
- name string
- count uint64
- errorCount uint64
- totalBytes uint64
- totalLatency uint64
- minLatency uint64
- maxLatency uint64
- sortedSamples []uint64
+ traceID types.TraceId
+ name string
+ count uint64
+ errorCount uint64
+ totalBytes uint64
+ totalLatency uint64
+ minLatency uint64
+ maxLatency uint64
+ p50Latency uint64
+ p95Latency uint64
+ p99Latency uint64
}
func newSyscallAccumulator() *syscallAccumulator {
@@ -101,18 +110,19 @@ func (a *syscallAccumulator) snapshotInputs() []syscallSnapshotInput {
inputs := make([]syscallSnapshotInput, 0, len(a.byID))
for _, stats := range a.byID {
- sorted := append([]uint64(nil), stats.samples...)
- sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] })
+ stats.ensurePercentiles()
inputs = append(inputs, syscallSnapshotInput{
- traceID: stats.traceID,
- name: stats.name,
- count: stats.count,
- errorCount: stats.errorCount,
- totalBytes: stats.totalBytes,
- totalLatency: stats.totalLatency,
- minLatency: stats.minLatency,
- maxLatency: stats.maxLatency,
- sortedSamples: sorted,
+ traceID: stats.traceID,
+ name: stats.name,
+ count: stats.count,
+ errorCount: stats.errorCount,
+ totalBytes: stats.totalBytes,
+ totalLatency: stats.totalLatency,
+ minLatency: stats.minLatency,
+ maxLatency: stats.maxLatency,
+ p50Latency: stats.cachedP50,
+ p95Latency: stats.cachedP95,
+ p99Latency: stats.cachedP99,
})
}
return inputs
@@ -146,6 +156,7 @@ func (s *syscallStats) addSample(duration uint64, cap int, rng *rand.Rand) {
s.seenLatencies++
if len(s.samples) < cap {
s.samples = append(s.samples, duration)
+ s.sampleVersion++
return
}
@@ -154,6 +165,28 @@ func (s *syscallStats) addSample(duration uint64, cap int, rng *rand.Rand) {
return
}
s.samples[idx] = duration
+ s.sampleVersion++
+}
+
+func (s *syscallStats) ensurePercentiles() {
+ if s.lastPercentileVersion == s.sampleVersion {
+ return
+ }
+ if s.lastPercentileVersion != 0 && s.sampleVersion-s.lastPercentileVersion < syscallPercentileRecomputeStepDefault {
+ return
+ }
+ if len(s.samples) == 0 {
+ s.cachedP50, s.cachedP95, s.cachedP99 = 0, 0, 0
+ s.lastPercentileVersion = s.sampleVersion
+ return
+ }
+
+ sorted := append([]uint64(nil), s.samples...)
+ sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] })
+ s.cachedP50 = samplePercentile(sorted, 0.50)
+ s.cachedP95 = samplePercentile(sorted, 0.95)
+ s.cachedP99 = samplePercentile(sorted, 0.99)
+ s.lastPercentileVersion = s.sampleVersion
}
func (s syscallSnapshotInput) toSnapshot(rateDiv float64) SyscallSnapshot {
@@ -167,9 +200,9 @@ func (s syscallSnapshotInput) toSnapshot(rateDiv float64) SyscallSnapshot {
LatencyMinNs: s.minLatency,
LatencyMaxNs: s.maxLatency,
LatencyMeanNs: float64(s.totalLatency) / float64(maxU64(s.count, 1)),
- LatencyP50Ns: samplePercentile(s.sortedSamples, 0.50),
- LatencyP95Ns: samplePercentile(s.sortedSamples, 0.95),
- LatencyP99Ns: samplePercentile(s.sortedSamples, 0.99),
+ LatencyP50Ns: s.p50Latency,
+ LatencyP95Ns: s.p95Latency,
+ LatencyP99Ns: s.p99Latency,
}
}
diff --git a/internal/statsengine/syscall_test.go b/internal/statsengine/syscall_test.go
index ad29026..1ebe214 100644
--- a/internal/statsengine/syscall_test.go
+++ b/internal/statsengine/syscall_test.go
@@ -106,6 +106,43 @@ func TestSyscallAccumulatorZeroElapsedRate(t *testing.T) {
}
}
+func TestSyscallAccumulatorPercentilesRecomputeAfterThreshold(t *testing.T) {
+ acc := newSyscallAccumulatorWithConfig(10_000, rand.New(rand.NewSource(11)))
+ traceID := types.SYS_ENTER_READ
+
+ for i := 1; i <= 1000; i++ {
+ acc.Add(newPair(traceID, uint64(i), 0, 0))
+ }
+ _ = acc.Snapshot(1 * time.Second)
+
+ stats := acc.byID[traceID]
+ if stats == nil {
+ t.Fatalf("expected syscall stats")
+ }
+ initialVersion := stats.lastPercentileVersion
+ initialP50 := stats.cachedP50
+ if initialVersion == 0 {
+ t.Fatalf("expected initial percentile computation")
+ }
+
+ acc.Add(newPair(traceID, 50000, 0, 0))
+ _ = acc.Snapshot(1 * time.Second)
+ if stats.lastPercentileVersion != initialVersion {
+ t.Fatalf("expected percentile recompute to be deferred; got %d want %d", stats.lastPercentileVersion, initialVersion)
+ }
+ if stats.cachedP50 != initialP50 {
+ t.Fatalf("expected cached p50 to remain unchanged before threshold")
+ }
+
+ for i := 0; i < syscallPercentileRecomputeStepDefault; i++ {
+ acc.Add(newPair(traceID, uint64(60000+i), 0, 0))
+ }
+ _ = acc.Snapshot(1 * time.Second)
+ if stats.lastPercentileVersion != stats.sampleVersion {
+ t.Fatalf("expected percentile recompute after threshold, last=%d sample=%d", stats.lastPercentileVersion, stats.sampleVersion)
+ }
+}
+
func newPair(traceID types.TraceId, duration uint64, bytes uint64, ret int64) *event.Pair {
return &event.Pair{
EnterEv: &types.RetEvent{TraceId: traceID},