summaryrefslogtreecommitdiff
path: root/internal/statsengine/filerank.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/statsengine/filerank.go')
-rw-r--r--internal/statsengine/filerank.go207
1 files changed, 207 insertions, 0 deletions
diff --git a/internal/statsengine/filerank.go b/internal/statsengine/filerank.go
new file mode 100644
index 0000000..9054ff7
--- /dev/null
+++ b/internal/statsengine/filerank.go
@@ -0,0 +1,207 @@
+package statsengine
+
+import (
+ "container/heap"
+ "ior/internal/event"
+ "ior/internal/types"
+ "sort"
+)
+
+const fileRankTopNDefault = 20
+
+type fileRanker struct {
+ topN int
+ maxSeen int
+ byPath map[string]*fileRankStats
+ topHeap fileRankHeap
+}
+
+type fileRankStats struct {
+ path string
+ accesses uint64
+ bytesRead uint64
+ bytesWritten uint64
+ totalLatency uint64
+ maxLatency uint64
+ heapIndex int
+}
+
+type fileRankHeap []*fileRankStats
+
+func newFileRanker() *fileRanker {
+ return newFileRankerWithConfig(fileRankTopNDefault)
+}
+
+func newFileRankerWithConfig(topN int) *fileRanker {
+ if topN <= 0 {
+ topN = fileRankTopNDefault
+ }
+ return newFileRankerWithLimits(topN, topN*32)
+}
+
+func newFileRankerWithLimits(topN int, maxSeen int) *fileRanker {
+ if topN <= 0 {
+ topN = fileRankTopNDefault
+ }
+ if maxSeen < topN {
+ maxSeen = topN
+ }
+
+ r := &fileRanker{
+ topN: topN,
+ maxSeen: maxSeen,
+ byPath: make(map[string]*fileRankStats),
+ }
+ heap.Init(&r.topHeap)
+ return r
+}
+
+func (r *fileRanker) Add(pair *event.Pair) {
+ if r == nil || pair == nil || pair.File == nil {
+ return
+ }
+
+ path := pair.File.Name()
+ if path == "" || path == "N:file" {
+ return
+ }
+
+ stats := r.byPath[path]
+ if stats == nil {
+ stats = &fileRankStats{path: path, heapIndex: -1}
+ r.byPath[path] = stats
+ }
+
+ stats.accesses++
+ stats.totalLatency += pair.Duration
+ if pair.Duration > stats.maxLatency {
+ stats.maxLatency = pair.Duration
+ }
+ r.addBytes(stats, pair)
+ r.updateHeap(stats)
+ r.compactIfNeeded()
+}
+
+func (r *fileRanker) Snapshot() []FileSnapshot {
+ if r == nil {
+ return nil
+ }
+
+ out := make([]FileSnapshot, 0, len(r.topHeap))
+ for _, stats := range r.topHeap {
+ out = append(out, stats.snapshot())
+ }
+
+ sort.Slice(out, func(i, j int) bool {
+ if out[i].Accesses != out[j].Accesses {
+ return out[i].Accesses > out[j].Accesses
+ }
+ return out[i].Path < out[j].Path
+ })
+
+ return out
+}
+
+func (r *fileRanker) addBytes(stats *fileRankStats, pair *event.Pair) {
+ retEv, ok := pair.ExitEv.(*types.RetEvent)
+ if !ok {
+ return
+ }
+
+ switch retEv.RetType {
+ case types.READ_CLASSIFIED:
+ stats.bytesRead += pair.Bytes
+ case types.WRITE_CLASSIFIED:
+ stats.bytesWritten += pair.Bytes
+ case types.TRANSFER_CLASSIFIED:
+ // Transfer syscalls move bytes in both directions from a file-centric view.
+ stats.bytesRead += pair.Bytes
+ stats.bytesWritten += pair.Bytes
+ }
+}
+
+func (r *fileRanker) updateHeap(stats *fileRankStats) {
+ if stats.heapIndex >= 0 {
+ heap.Fix(&r.topHeap, stats.heapIndex)
+ return
+ }
+
+ if len(r.topHeap) < r.topN {
+ heap.Push(&r.topHeap, stats)
+ return
+ }
+
+ worst := r.topHeap[0]
+ if !betterFileRank(stats, worst) {
+ return
+ }
+
+ heap.Pop(&r.topHeap)
+ heap.Push(&r.topHeap, stats)
+}
+
+func (r *fileRanker) compactIfNeeded() {
+ if len(r.byPath) <= r.maxSeen {
+ return
+ }
+
+ // Keep only currently top-ranked paths once cardinality crosses the guard.
+ kept := make(map[string]*fileRankStats, len(r.topHeap))
+ for _, stats := range r.topHeap {
+ kept[stats.path] = stats
+ }
+ r.byPath = kept
+}
+
+func (s *fileRankStats) snapshot() FileSnapshot {
+ avg := 0.0
+ if s.accesses > 0 {
+ avg = float64(s.totalLatency) / float64(s.accesses)
+ }
+
+ return FileSnapshot{
+ Path: s.path,
+ Accesses: s.accesses,
+ BytesRead: s.bytesRead,
+ BytesWritten: s.bytesWritten,
+ AvgLatencyNs: avg,
+ MaxLatencyNs: s.maxLatency,
+ }
+}
+
+func betterFileRank(a, b *fileRankStats) bool {
+ if a.accesses != b.accesses {
+ return a.accesses > b.accesses
+ }
+ return a.path < b.path
+}
+
+func (h fileRankHeap) Len() int {
+ return len(h)
+}
+
+func (h fileRankHeap) Less(i, j int) bool {
+ // Keep the worst-ranked item at root for O(log N) eviction.
+ return betterFileRank(h[j], h[i])
+}
+
+func (h fileRankHeap) Swap(i, j int) {
+ h[i], h[j] = h[j], h[i]
+ h[i].heapIndex = i
+ h[j].heapIndex = j
+}
+
+func (h *fileRankHeap) Push(x any) {
+ stats := x.(*fileRankStats)
+ stats.heapIndex = len(*h)
+ *h = append(*h, stats)
+}
+
+func (h *fileRankHeap) Pop() any {
+ old := *h
+ n := len(old)
+ stats := old[n-1]
+ stats.heapIndex = -1
+ *h = old[:n-1]
+ return stats
+}