diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-26 22:31:40 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-26 22:31:40 +0300 |
| commit | 6bfa0031cc7c903c16baaca2d0f504be26fb828c (patch) | |
| tree | 0d3c002eaed4c6e02f12cbffd7054bd07989e0fe /internal/flamegraph | |
| parent | f42d4f4f0b9d3faf38d2f3c3a9753a03440cdd24 (diff) | |
flamegraph: add LiveTrie height metric ingestion (task qo)
Diffstat (limited to 'internal/flamegraph')
| -rw-r--r-- | internal/flamegraph/livetrie.go | 109 | ||||
| -rw-r--r-- | internal/flamegraph/livetrie_test.go | 122 |
2 files changed, 180 insertions, 51 deletions
diff --git a/internal/flamegraph/livetrie.go b/internal/flamegraph/livetrie.go index a682a0a..a510a72 100644 --- a/internal/flamegraph/livetrie.go +++ b/internal/flamegraph/livetrie.go @@ -20,20 +20,22 @@ const ( ) type SnapshotNode struct { - Name string `json:"n"` - Value uint64 `json:"v"` - Total uint64 `json:"t"` - Children []*SnapshotNode `json:"c,omitempty"` + Name string `json:"n"` + Value uint64 `json:"v"` + Total uint64 `json:"t"` + HeightTotal uint64 `json:"ht,omitempty"` + Children []*SnapshotNode `json:"c,omitempty"` } // LiveTrie is a thread-safe, append-only trie used for live flamegraph snapshots. type LiveTrie struct { - mu sync.RWMutex - root *trieNode - maxDepth int - version atomic.Uint64 - fields []string - countField string + mu sync.RWMutex + root *trieNode + maxDepth int + version atomic.Uint64 + fields []string + countField string + heightField string // Snapshot cache avoids recomputing JSON when version is unchanged. cacheMu sync.Mutex @@ -48,22 +50,26 @@ type LiveTrie struct { treeCache *SnapshotNode } -// NewLiveTrie constructs an empty live trie with the configured frame/count fields. -func NewLiveTrie(fields []string, countField string) *LiveTrie { +// NewLiveTrie constructs an empty live trie with the configured frame/count/height fields. +func NewLiveTrie(fields []string, countField, heightField string) *LiveTrie { if !isLiveTrieCountField(countField) { countField = "count" } + if heightField != "" && !isLiveTrieCountField(heightField) { + heightField = "" + } return &LiveTrie{ root: &trieNode{ childMap: make(map[string]*trieNode), }, - fields: slices.Clone(fields), - countField: countField, + fields: slices.Clone(fields), + countField: countField, + heightField: heightField, } } -func (lt *LiveTrie) addLocked(frames []string, value uint64) { - insertTriePath(lt.root, frames, value, value) +func (lt *LiveTrie) addLocked(frames []string, value, heightValue uint64) { + insertTriePath(lt.root, frames, value, heightValue) if len(frames) > lt.maxDepth { lt.maxDepth = len(frames) } @@ -100,10 +106,17 @@ func (lt *LiveTrie) AddRecord(record IterRecord) { if err != nil { return } + heightValue := uint64(0) + if lt.heightField != "" { + heightValue, err = record.Cnt.ValueByName(lt.heightField) + if err != nil { + return + } + } lt.mu.Lock() frames := lt.buildFrames(record) - lt.addLocked(frames, value) + lt.addLocked(frames, value, heightValue) lt.version.Add(1) lt.mu.Unlock() } @@ -132,6 +145,14 @@ func (lt *LiveTrie) CountField() string { return field } +// HeightField returns the active metric used to aggregate node heights. +func (lt *LiveTrie) HeightField() string { + lt.mu.RLock() + field := lt.heightField + lt.mu.RUnlock() + return field +} + // SetCountField changes the active aggregation metric and starts a new baseline. func (lt *LiveTrie) SetCountField(countField string) error { field := strings.TrimSpace(countField) @@ -151,6 +172,25 @@ func (lt *LiveTrie) SetCountField(countField string) error { return nil } +// SetHeightField changes the active height metric and starts a new baseline. +func (lt *LiveTrie) SetHeightField(heightField string) error { + field := strings.TrimSpace(heightField) + if field != "" && !isLiveTrieCountField(field) { + return fmt.Errorf("invalid height field %q", heightField) + } + + lt.mu.Lock() + if lt.heightField == field { + lt.mu.Unlock() + return nil + } + lt.heightField = field + lt.resetLocked() + lt.mu.Unlock() + lt.invalidateCache() + return nil +} + // Reconfigure changes frame fields and clears accumulated data for a new baseline. func (lt *LiveTrie) Reconfigure(fields []string) error { normalized, err := normalizeLiveTrieFields(fields) @@ -303,18 +343,20 @@ func subtreeTotal(node *trieNode) uint64 { } func buildSnapshot(node *trieNode, depth int, minFraction float64, rootTotal uint64) *SnapshotNode { - snapshot, _ := buildSnapshotWithTotal(node, depth, minFraction, rootTotal, false) + snapshot, _, _ := buildSnapshotWithTotal(node, depth, minFraction, rootTotal, false) return snapshot } type childSnapshotState struct { - node *trieNode - snapshot *SnapshotNode - total uint64 + node *trieNode + snapshot *SnapshotNode + total uint64 + heightTotal uint64 } -func buildSnapshotWithTotal(node *trieNode, depth int, minFraction float64, rootTotal uint64, forceKeep bool) (*SnapshotNode, uint64) { +func buildSnapshotWithTotal(node *trieNode, depth int, minFraction float64, rootTotal uint64, forceKeep bool) (*SnapshotNode, uint64, uint64) { total := node.value + heightTotal := node.heightValue children := slices.Clone(node.children) slices.SortFunc(children, func(a, b *trieNode) int { return cmp.Compare(a.name, b.name) @@ -322,17 +364,19 @@ func buildSnapshotWithTotal(node *trieNode, depth int, minFraction float64, root childStates := make([]childSnapshotState, 0, len(children)) for _, child := range children { - childSnapshot, childTotal := buildSnapshotWithTotal(child, depth+1, minFraction, rootTotal, false) + childSnapshot, childTotal, childHeightTotal := buildSnapshotWithTotal(child, depth+1, minFraction, rootTotal, false) total += childTotal + heightTotal += childHeightTotal childStates = append(childStates, childSnapshotState{ - node: child, - snapshot: childSnapshot, - total: childTotal, + node: child, + snapshot: childSnapshot, + total: childTotal, + heightTotal: childHeightTotal, }) } if !forceKeep && depth > 0 && rootTotal > 0 && float64(total)/float64(rootTotal) < minFraction { - return nil, total + return nil, total, heightTotal } ensureFallbackVisibleChildren(childStates, depth, minFraction, rootTotal) @@ -344,14 +388,15 @@ func buildSnapshotWithTotal(node *trieNode, depth int, minFraction float64, root } snapshot := &SnapshotNode{ - Name: node.name, - Value: node.value, - Total: total, + Name: node.name, + Value: node.value, + Total: total, + HeightTotal: heightTotal, } if len(childSnapshots) > 0 { snapshot.Children = childSnapshots } - return snapshot, total + return snapshot, total, heightTotal } func ensureFallbackVisibleChildren(children []childSnapshotState, depth int, minFraction float64, rootTotal uint64) { @@ -389,7 +434,7 @@ func ensureFallbackVisibleChildren(children []childSnapshotState, depth int, min } for i := 0; i < limit; i++ { idx := candidates[i] - forced, _ := buildSnapshotWithTotal(children[idx].node, depth+1, minFraction, rootTotal, true) + forced, _, _ := buildSnapshotWithTotal(children[idx].node, depth+1, minFraction, rootTotal, true) children[idx].snapshot = forced } } diff --git a/internal/flamegraph/livetrie_test.go b/internal/flamegraph/livetrie_test.go index 6a825c0..32e2b40 100644 --- a/internal/flamegraph/livetrie_test.go +++ b/internal/flamegraph/livetrie_test.go @@ -17,7 +17,7 @@ import ( ) func TestLiveTrieIngestAndSnapshotRoundTrip(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "pid"}, "count") + lt := NewLiveTrie([]string{"comm", "pid"}, "count", "count") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 2, 3)) snap := decodeLiveSnapshot(t, lt) @@ -34,7 +34,7 @@ func TestLiveTrieIngestAndSnapshotRoundTrip(t *testing.T) { } func TestLiveTrieIngestIsAdditive(t *testing.T) { - lt := NewLiveTrie([]string{"path"}, "bytes") + lt := NewLiveTrie([]string{"path"}, "bytes", "bytes") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 2, 10)) lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 3, 4, 15)) @@ -49,7 +49,7 @@ func TestLiveTrieIngestIsAdditive(t *testing.T) { } func TestLiveTrieIngestCopiesBeforeRecycle(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "path"}, "count") + lt := NewLiveTrie([]string{"comm", "path"}, "count", "count") pair := newTestPair("svc", 42, 1001, "/tmp/a", 1, 2, 3) lt.Ingest(pair) @@ -66,7 +66,7 @@ func TestLiveTrieIngestCopiesBeforeRecycle(t *testing.T) { } func TestLiveTrieCommTracepointPathAggregatesSameSyscallAcrossPaths(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "tracepoint", "path"}, "count") + lt := NewLiveTrie([]string{"comm", "tracepoint", "path"}, "count", "count") lt.AddRecord(IterRecord{ Path: "/srv/a", TraceID: types.SYS_ENTER_READ, @@ -99,7 +99,7 @@ func TestLiveTrieCommTracepointPathAggregatesSameSyscallAcrossPaths(t *testing.T } func TestLiveTrieVersionIncrementsPerIngest(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") if got := lt.Version(); got != 0 { t.Fatalf("initial version = %d, want 0", got) } @@ -112,7 +112,7 @@ func TestLiveTrieVersionIncrementsPerIngest(t *testing.T) { } func TestLiveTrieAddRecordIncrementsVersion(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "path", "tracepoint"}, "count") + lt := NewLiveTrie([]string{"comm", "path", "tracepoint"}, "count", "count") lt.AddRecord(IterRecord{ Path: "/tmp/demo/read", TraceID: types.SYS_ENTER_READ, @@ -132,7 +132,7 @@ func TestLiveTrieAddRecordIncrementsVersion(t *testing.T) { } func TestSeedTestFlameDataBuildsStaticFixture(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "path", "tracepoint"}, "count") + lt := NewLiveTrie([]string{"comm", "path", "tracepoint"}, "count", "count") SeedTestFlameData(lt) if got := lt.Version(); got == 0 { @@ -151,7 +151,7 @@ func TestSeedTestFlameDataBuildsStaticFixture(t *testing.T) { } func TestSeedTestLiveFlameDataVariesByTick(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "path", "tracepoint"}, "count") + lt := NewLiveTrie([]string{"comm", "path", "tracepoint"}, "count", "count") SeedTestLiveFlameData(lt, 0) snapTick0 := decodeLiveSnapshot(t, lt) @@ -176,7 +176,7 @@ func TestSeedTestLiveFlameDataVariesByTick(t *testing.T) { } func TestLiveTrieResetClearsDataAndAdvancesVersion(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 1, 1)) lt.Ingest(newTestPair("svc", 42, 1002, "/tmp/b", 1, 1, 1)) @@ -203,7 +203,7 @@ func TestLiveTrieResetClearsDataAndAdvancesVersion(t *testing.T) { } func TestLiveTrieReconfigureChangesOrderAndResets(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "pid"}, "count") + lt := NewLiveTrie([]string{"comm", "pid"}, "count", "count") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 1, 1)) if err := lt.Reconfigure([]string{"path", "comm"}); err != nil { @@ -224,7 +224,7 @@ func TestLiveTrieReconfigureChangesOrderAndResets(t *testing.T) { } func TestLiveTrieReconfigureRejectsInvalidFields(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") cases := [][]string{ nil, @@ -241,7 +241,7 @@ func TestLiveTrieReconfigureRejectsInvalidFields(t *testing.T) { } func TestLiveTrieSetCountFieldSwitchesMetricAndResetsBaseline(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 10, 1, 64)) initial := decodeLiveSnapshot(t, lt) @@ -273,7 +273,7 @@ func TestLiveTrieSetCountFieldSwitchesMetricAndResetsBaseline(t *testing.T) { } func TestLiveTrieSetCountFieldRejectsInvalidValue(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 1, 1)) beforeVersion := lt.Version() @@ -288,8 +288,92 @@ func TestLiveTrieSetCountFieldRejectsInvalidValue(t *testing.T) { } } +func TestLiveTrieHeightFieldTracksIndependentMetric(t *testing.T) { + lt := NewLiveTrie([]string{"comm"}, "count", "bytes") + lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 10, 1, 64)) + lt.Ingest(newTestPair("svc", 42, 1002, "/tmp/b", 10, 1, 128)) + + snap := decodeLiveSnapshot(t, lt) + if got, want := snap.Total, uint64(2); got != want { + t.Fatalf("root total = %d, want %d", got, want) + } + if got, want := snap.HeightTotal, uint64(192); got != want { + t.Fatalf("root height total = %d, want %d", got, want) + } + leaf := findSnapshotPath(t, &snap, "svc") + if got, want := leaf.Total, uint64(2); got != want { + t.Fatalf("leaf total = %d, want %d", got, want) + } + if got, want := leaf.HeightTotal, uint64(192); got != want { + t.Fatalf("leaf height total = %d, want %d", got, want) + } +} + +func TestLiveTrieSetHeightFieldSwitchesMetricAndResetsBaseline(t *testing.T) { + lt := NewLiveTrie([]string{"comm"}, "count", "count") + lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 10, 1, 64)) + + initial := decodeLiveSnapshot(t, lt) + if got, want := initial.HeightTotal, uint64(1); got != want { + t.Fatalf("initial height total = %d, want %d", got, want) + } + + if err := lt.SetHeightField("bytes"); err != nil { + t.Fatalf("set height field: %v", err) + } + if got, want := lt.HeightField(), "bytes"; got != want { + t.Fatalf("height field = %q, want %q", got, want) + } + + empty := decodeLiveSnapshot(t, lt) + if got := empty.Total; got != 0 { + t.Fatalf("expected reset baseline after height metric switch, total=%d", got) + } + if got := empty.HeightTotal; got != 0 { + t.Fatalf("expected reset baseline after height metric switch, height total=%d", got) + } + + lt.Ingest(newTestPair("svc", 42, 1002, "/tmp/b", 10, 1, 64)) + next := decodeLiveSnapshot(t, lt) + if got, want := next.Total, uint64(1); got != want { + t.Fatalf("total after switch = %d, want %d", got, want) + } + if got, want := next.HeightTotal, uint64(64); got != want { + t.Fatalf("height total after switch = %d, want %d", got, want) + } +} + +func TestLiveTrieSetHeightFieldRejectsInvalidValue(t *testing.T) { + lt := NewLiveTrie([]string{"comm"}, "count", "count") + lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 1, 1)) + beforeVersion := lt.Version() + + if err := lt.SetHeightField("bogus"); err == nil { + t.Fatalf("expected invalid height field error") + } + if got, want := lt.HeightField(), "count"; got != want { + t.Fatalf("height field changed unexpectedly: got %q want %q", got, want) + } + if got := lt.Version(); got != beforeVersion { + t.Fatalf("version changed on invalid height field: got %d want %d", got, beforeVersion) + } +} + +func TestLiveTrieHeightFieldEmptyDisablesHeightTotals(t *testing.T) { + lt := NewLiveTrie([]string{"comm"}, "count", "") + lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 10, 1, 64)) + + snap := decodeLiveSnapshot(t, lt) + if got, want := snap.Total, uint64(1); got != want { + t.Fatalf("root total = %d, want %d", got, want) + } + if got := snap.HeightTotal; got != 0 { + t.Fatalf("root height total = %d, want 0 when height metric disabled", got) + } +} + func TestLiveTrieSnapshotJSONCaching(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") lt.Ingest(newTestPair("svc", 42, 1001, "/tmp/a", 1, 1, 1)) first, version1 := lt.SnapshotJSON() @@ -304,7 +388,7 @@ func TestLiveTrieSnapshotJSONCaching(t *testing.T) { } func TestLiveTrieSnapshotJSONPrunesTinyNodes(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") for i := 0; i < 2000; i++ { lt.Ingest(newTestPair("big", 42, uint32(1000+i), "/tmp/a", 1, 1, 1)) } @@ -320,7 +404,7 @@ func TestLiveTrieSnapshotJSONPrunesTinyNodes(t *testing.T) { } func TestLiveTrieSnapshotJSONKeepsFallbackChildrenWhenAllAreTinyAtRoot(t *testing.T) { - lt := NewLiveTrie([]string{"comm"}, "count") + lt := NewLiveTrie([]string{"comm"}, "count", "count") const total = 6000 for i := 0; i < total; i++ { comm := fmt.Sprintf("svc-%04d", i) @@ -337,7 +421,7 @@ func TestLiveTrieSnapshotJSONKeepsFallbackChildrenWhenAllAreTinyAtRoot(t *testin } func TestLiveTrieSnapshotJSONKeepsFallbackChildrenAtDepthOne(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "pid"}, "count") + lt := NewLiveTrie([]string{"comm", "pid"}, "count", "count") const total = 6000 for i := 0; i < total; i++ { pid := uint32(100000 + i) @@ -355,7 +439,7 @@ func TestLiveTrieSnapshotJSONKeepsFallbackChildrenAtDepthOne(t *testing.T) { } func TestLiveTrieConcurrentIngestAndSnapshot(t *testing.T) { - lt := NewLiveTrie([]string{"comm", "pid"}, "count") + lt := NewLiveTrie([]string{"comm", "pid"}, "count", "count") var wg sync.WaitGroup wg.Add(2) @@ -396,7 +480,7 @@ func TestLiveTrieStressHighRateConcurrentSnapshot(t *testing.T) { maxMemGrowth = 512 << 20 ) - lt := NewLiveTrie([]string{"path", "pid"}, "count") + lt := NewLiveTrie([]string{"path", "pid"}, "count", "count") var startMem runtime.MemStats runtime.ReadMemStats(&startMem) |
