diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | AGENTS.md | 1 | ||||
| -rw-r--r-- | Dockerfile.el8 | 85 | ||||
| -rw-r--r-- | Magefile.go | 10 | ||||
| -rw-r--r-- | README.md | 19 | ||||
| -rw-r--r-- | internal/ior_bpfsetup.go | 10 | ||||
| -rw-r--r-- | internal/probemanager/manager.go | 17 | ||||
| -rw-r--r-- | internal/probemanager/manager_test.go | 59 | ||||
| -rw-r--r-- | internal/statsengine/filerank.go | 13 | ||||
| -rw-r--r-- | internal/statsengine/process.go | 13 | ||||
| -rw-r--r-- | internal/statsengine/snapshot.go | 21 | ||||
| -rw-r--r-- | internal/statsengine/syscall.go | 25 | ||||
| -rw-r--r-- | internal/tui/dashboard/bubbles.go | 114 | ||||
| -rw-r--r-- | internal/tui/dashboard/files.go | 14 | ||||
| -rw-r--r-- | internal/tui/dashboard/files_test.go | 6 | ||||
| -rw-r--r-- | internal/tui/dashboard/icicle.go | 20 | ||||
| -rw-r--r-- | internal/tui/dashboard/model.go | 17 | ||||
| -rw-r--r-- | internal/tui/dashboard/treemap.go | 71 | ||||
| -rw-r--r-- | internal/tui/flamegraph/controls.go | 14 | ||||
| -rw-r--r-- | internal/tui/flamegraph/model_test.go | 12 | ||||
| -rwxr-xr-x | scripts/build-with-docker-el8.sh | 56 |
21 files changed, 457 insertions, 141 deletions
@@ -11,6 +11,7 @@ perltidy.ERR *.new *.tmp /ior +/ior.el8 *.ior.zst *.collapsed *.collapsed.zst @@ -17,6 +17,7 @@ If builds/tests fail with missing libbpf headers (for example `bpf/bpf.h` not fo ```bash mage build # Build BPF object + Go binary (all is an alias) mage buildDocker # Build ior inside a Rocky Linux 9 container (writes binary to repo root) +mage buildDockerEl8 # Build ior inside a Rocky Linux 8 container (writes ior.el8 to repo root) mage test # Run all tests TEST_NAME=TestEventloop mage testWithName # Run specific test mage integrationTest # Build + run integration tests in parallel (parallelism capped to NumCPU) diff --git a/Dockerfile.el8 b/Dockerfile.el8 new file mode 100644 index 0000000..870aff4 --- /dev/null +++ b/Dockerfile.el8 @@ -0,0 +1,85 @@ +FROM rockylinux:8 + +# Update GO_VERSION here to upgrade the Go toolchain baked into the image. +ARG GO_VERSION=1.26.2 + +# Rocky 8 ships full dnf already; just add plugin support for config-manager +# and builddep below. The default Rocky 8 mirrors at dl.rockylinux.org/pub/ +# stay live until 2029-05-31, so no vault redirect is needed here. +RUN dnf install -y dnf-plugins-core && \ + dnf clean all + +# Rocky 8 calls the equivalent of Rocky 9's "crb" repo "powertools"; it carries +# zlib-static / glibc-static / elfutils-libelf-devel. The baseos-source repo is +# needed to fetch the elfutils source RPM for the libelf.a build below. +RUN dnf config-manager --set-enabled powertools && \ + dnf config-manager --set-enabled baseos-source && \ + dnf install -y epel-release && \ + dnf clean all + +# Rocky 8 default clang is too old for BPF CO-RE — enable the llvm-toolset +# module stream (ships clang 17 / LLVM 17 on the latest Rocky 8 vault). +RUN dnf module enable -y llvm-toolset && \ + dnf clean all + +# Build-time toolchain: C compiler, clang/LLVM (for BPF), bpftool, BPF/elf +# headers, static archives for zlib and glibc, and packaging helpers. +# Rocky 8 ships bpftool inside the bpftool package (same as Rocky 9). +RUN dnf install -y \ + gcc clang llvm bpftool \ + elfutils-libelf-devel \ + zlib-static glibc-static libzstd-devel \ + git make cmake wget rpmdevtools && \ + dnf builddep -y elfutils && \ + dnf clean all + +# Install Go from go.dev — Rocky 8 ships an older release, ior needs 1.26+. +RUN wget -q "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -O /tmp/go.tar.gz && \ + tar -C /usr/local -xf /tmp/go.tar.gz && \ + rm /tmp/go.tar.gz + +ENV PATH="/usr/local/go/bin:/root/go/bin:${PATH}" +ENV GOPATH="/root/go" + +# Build libelf.a from the Rocky 8 elfutils source RPM. +# Rocky 8 (like 9) does not ship libelf.a in any binary package. +RUN mkdir -p /root/src && cd /root && \ + dnf download --source elfutils-libelf && \ + rpm -ivh elfutils-*.src.rpm && \ + tar -C /root/src -xjf rpmbuild/SOURCES/elfutils-*.tar.bz2 && \ + cd /root/src/elfutils-* && \ + ./configure --enable-deterministic-archives --disable-debuginfod --disable-libdebuginfod && \ + make -C lib -j$(nproc) && \ + make -C libelf -j$(nproc) && \ + cp -v libelf/libelf.a /usr/lib64/ && \ + rm -rf /root/src /root/rpmbuild /root/elfutils-*.src.rpm + +# Build libzstd.a from upstream — libzstd-devel does not ship the static archive. +RUN wget -q https://github.com/facebook/zstd/releases/download/v1.5.5/zstd-1.5.5.tar.gz \ + -O /tmp/zstd.tar.gz && \ + tar -C /tmp -xzf /tmp/zstd.tar.gz && \ + make -C /tmp/zstd-1.5.5/lib -j$(nproc) libzstd.a && \ + cp -v /tmp/zstd-1.5.5/lib/libzstd.a /usr/lib64/ && \ + rm -rf /tmp/zstd-1.5.5 /tmp/zstd.tar.gz + +# Clone libbpfgo at the required tag and build the static archive. +# Placed at /git/libbpfgo so it is a sibling of the ior mount at /git/ior, +# matching the default LIBBPFGO=../libbpfgo path used by Magefile.go. +RUN mkdir -p /git && \ + git clone https://github.com/aquasecurity/libbpfgo /git/libbpfgo && \ + git -C /git/libbpfgo checkout v0.9.2-libbpf-1.5.1 && \ + git -C /git/libbpfgo submodule update --init --recursive && \ + make -C /git/libbpfgo libbpfgo-static + +# Install the mage build tool +RUN go install github.com/magefile/mage@latest + +# The ior source tree is mounted at /git/ior at runtime (see build-with-docker-el8.sh). +WORKDIR /git/ior + +# Generate kernel-specific tracepoint code, compile ior, then publish the +# binary as ior.el8 so it lives alongside the el9 build artifact in the repo. +# IOR_FORCE_GENERATE=1 skips the strict diff against the committed syscall-coverage +# audit, which was generated on a different kernel build than the container host. +# The container runs as root so bpftool and /sys/kernel/tracing are used directly. +CMD ["sh", "-c", "IOR_FORCE_GENERATE=1 mage generate && mage all && mv -v ior ior.el8"] diff --git a/Magefile.go b/Magefile.go index dcde50e..540515c 100644 --- a/Magefile.go +++ b/Magefile.go @@ -41,6 +41,7 @@ const ( tracepointsGoPath = "internal/tracepoints/generated_tracepoints.go" typesGoPath = "internal/types/generated_types.go" dockerBuildScript = "scripts/build-with-docker.sh" + dockerBuildScriptEl8 = "scripts/build-with-docker-el8.sh" typesHeaderPath = "internal/c/types.h" VMLINUXPath = "internal/c/vmlinux.h" benchProfilesDir = "bench-profiles" @@ -82,6 +83,15 @@ func BuildDocker() error { return sh.RunV("bash", dockerBuildScript) } +// BuildDockerEl8 builds the ior binary inside a Rocky Linux 8 Docker container +// and writes it as ior.el8 in the repo root, alongside the el9 ior binary +// produced by BuildDocker. Use this when targeting hosts with the older glibc +// shipped on RHEL/Rocky/Alma 8. The container image is built on the first +// run (~15-20 min) and reused thereafter. +func BuildDockerEl8() error { + return sh.RunV("bash", dockerBuildScriptEl8) +} + // BpfBuild builds the embedded BPF object used by the Go binary. func BpfBuild() error { if err := ensureVMLINUX(); err != nil { @@ -31,6 +31,18 @@ The demo is fully reproducible: `mage installDemoTools` once, then `sudo -v && m ## Requirements - Docker and a Linux host with a BTF-enabled kernel (`/sys/kernel/btf/vmlinux` present). +- Go (any 1.x version on `PATH`) for installing the [Mage](https://magefile.org) build tool. + +## Install Mage + +The build orchestration uses Mage. Install the `mage` binary once before any of +the build commands below: + +```shell +go install github.com/magefile/mage@latest +``` + +Make sure `$(go env GOPATH)/bin` (typically `$HOME/go/bin`) is on your `PATH`. ## Build @@ -48,6 +60,13 @@ cached image and finish in under a minute. To skip the image rebuild: ./scripts/build-with-docker.sh --run ``` +To target hosts with the older glibc on RHEL/Rocky/Alma 8, build a sibling +binary called `ior.el8` from a Rocky Linux 8 container: + +```shell +mage buildDockerEl8 +``` + For contributors who need a native build (Fedora / Rocky Linux 9), see [docs/build-rocky-linux-9.md](./docs/build-rocky-linux-9.md) and [AGENTS.md](./AGENTS.md). diff --git a/internal/ior_bpfsetup.go b/internal/ior_bpfsetup.go index 3500106..9ab9186 100644 --- a/internal/ior_bpfsetup.go +++ b/internal/ior_bpfsetup.go @@ -3,6 +3,7 @@ package internal import ( "context" "fmt" + "os" appconfig "ior/internal/config" "ior/internal/flags" @@ -65,7 +66,14 @@ func setupBPFModule(parentCtx context.Context, cfg flags.Config) (*bpf.Module, * } mgr := probemanager.NewManager(libbpfTracepointModule{module: bpfModule}) - if err := mgr.AttachAll(cfg.ShouldIAttachTracepoint, tracepoints.List); err != nil { + // Per-syscall attach failures are non-fatal: on older kernels the + // tracepoint may be absent (e.g. binary built against a newer kernel). + // We log and skip; the affected probe stays in the manager with its + // lastErr set, so States() and the TUI surface the failure. + warn := func(syscall string, err error) { + fmt.Fprintf(os.Stderr, "ior: skipping tracepoint for %s: %v\n", syscall, err) + } + if err := mgr.AttachAll(cfg.ShouldIAttachTracepoint, tracepoints.List, warn); err != nil { mgr.Close() bpfModule.Close() return nil, nil, releaseBindings, setupBPFModuleError("attach probes", err) diff --git a/internal/probemanager/manager.go b/internal/probemanager/manager.go index 288af41..677762b 100644 --- a/internal/probemanager/manager.go +++ b/internal/probemanager/manager.go @@ -79,7 +79,17 @@ func (m *Manager) Register(syscall string, pair TracepointPair) { } // AttachAll registers and attaches all tracepoint pairs selected by shouldAttach. -func (m *Manager) AttachAll(shouldAttach func(string) bool, tpNames []string) error { +// +// If onAttachError is non-nil, per-syscall attach failures are reported through +// the callback and AttachAll continues with the remaining tracepoints. This is +// the desired mode in production: when running a binary built on a newer kernel +// against an older one, some syscalls' tracepoints may be absent and the +// corresponding attach call returns ENOENT. The error is recorded on the +// probe entry (visible via States()) regardless of the callback. +// +// If onAttachError is nil, AttachAll preserves the strict legacy behavior and +// returns the first attach error to the caller. Tests rely on this mode. +func (m *Manager) AttachAll(shouldAttach func(string) bool, tpNames []string, onAttachError func(syscall string, err error)) error { if m == nil { return errors.New("probe manager is nil") } @@ -94,7 +104,10 @@ func (m *Manager) AttachAll(shouldAttach func(string) bool, tpNames []string) er continue } if err := m.Attach(syscall); err != nil { - return err + if onAttachError == nil { + return err + } + onAttachError(syscall, err) } } return nil diff --git a/internal/probemanager/manager_test.go b/internal/probemanager/manager_test.go index dc0c474..2beb11e 100644 --- a/internal/probemanager/manager_test.go +++ b/internal/probemanager/manager_test.go @@ -97,7 +97,7 @@ func TestManagerAttachAllToggleAndCounts(t *testing.T) { err := mgr.AttachAll(func(tp string) bool { return tp == "sys_enter_read" || tp == "sys_exit_read" }, []string{ "sys_enter_read", "sys_exit_read", "sys_enter_write", "sys_exit_write", - }) + }, nil) if err != nil { t.Fatalf("AttachAll returned error: %v", err) } @@ -210,7 +210,7 @@ func TestManagerAttachWaitsForDetachBeforeReturning(t *testing.T) { errs: map[string]error{}, } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}, nil); err != nil { t.Fatalf("AttachAll returned error: %v", err) } @@ -281,7 +281,7 @@ func TestManagerCloseWaitsForDetachAndDoesNotDoubleDestroy(t *testing.T) { errs: map[string]error{}, } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}, nil); err != nil { t.Fatalf("AttachAll returned error: %v", err) } @@ -337,7 +337,7 @@ func TestManagerDetachDestroysLinks(t *testing.T) { errs: map[string]error{}, } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}, nil); err != nil { t.Fatalf("AttachAll returned error: %v", err) } if err := mgr.Detach("close"); err != nil { @@ -359,7 +359,7 @@ func TestManagerDetachFailureKeepsActiveStateForUndetachedLink(t *testing.T) { errs: map[string]error{}, } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_close", "sys_exit_close"}, nil); err != nil { t.Fatalf("AttachAll returned error: %v", err) } @@ -388,7 +388,7 @@ func TestManagerClosePreventsFurtherOperations(t *testing.T) { errs: map[string]error{}, } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_open", "sys_exit_open"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_open", "sys_exit_open"}, nil); err != nil { t.Fatalf("AttachAll returned error: %v", err) } if err := mgr.Close(); err != nil { @@ -407,7 +407,7 @@ func TestManagerAttachAllReturnsProgramError(t *testing.T) { }, } mgr := NewManager(attacher) - err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read"}) + err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read"}, nil) if err == nil { t.Fatalf("expected attach error") } @@ -417,6 +417,45 @@ func TestManagerAttachAllReturnsProgramError(t *testing.T) { } } +// When onAttachError is supplied, AttachAll should report each per-syscall +// failure through the callback and continue attaching the remaining probes. +// This is the path that lets a binary built on a newer kernel run on an older +// one where some tracepoints don't exist. +func TestManagerAttachAllWarnAndContinue(t *testing.T) { + attacher := &fakeAttacher{ + programs: map[string]*fakeProgram{ + "handle_sys_enter_write": {}, + "handle_sys_exit_write": {}, + }, + errs: map[string]error{ + "handle_sys_enter_read": errors.New("no such tracepoint"), + }, + } + mgr := NewManager(attacher) + + var warned []string + warn := func(syscall string, err error) { + warned = append(warned, syscall+":"+err.Error()) + } + err := mgr.AttachAll(nil, []string{ + "sys_enter_read", "sys_exit_read", + "sys_enter_write", "sys_exit_write", + }, warn) + if err != nil { + t.Fatalf("AttachAll returned error despite warn callback: %v", err) + } + if len(warned) != 1 { + t.Fatalf("expected exactly 1 warning, got %d (%v)", len(warned), warned) + } + if !strings.Contains(warned[0], "read") || !strings.Contains(warned[0], "no such tracepoint") { + t.Fatalf("unexpected warning text: %q", warned[0]) + } + active, total := mgr.ActiveCount() + if active != 1 || total != 2 { + t.Fatalf("expected write attached and read skipped, got active=%d total=%d", active, total) + } +} + func TestManagerAttachAllPicksUpNewTracepointsOnLaterCall(t *testing.T) { attacher := &fakeAttacher{ programs: map[string]*fakeProgram{ @@ -429,7 +468,7 @@ func TestManagerAttachAllPicksUpNewTracepointsOnLaterCall(t *testing.T) { } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read"}, nil); err != nil { t.Fatalf("AttachAll(read) returned error: %v", err) } states := mgr.States() @@ -437,7 +476,7 @@ func TestManagerAttachAllPicksUpNewTracepointsOnLaterCall(t *testing.T) { t.Fatalf("expected only read after first call, got %+v", states) } - if err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read", "sys_enter_write", "sys_exit_write"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read", "sys_enter_write", "sys_exit_write"}, nil); err != nil { t.Fatalf("AttachAll(read+write) returned error: %v", err) } states = mgr.States() @@ -458,7 +497,7 @@ func TestManagerIsActiveReflectsCurrentState(t *testing.T) { errs: map[string]error{}, } mgr := NewManager(attacher) - if err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read"}); err != nil { + if err := mgr.AttachAll(nil, []string{"sys_enter_read", "sys_exit_read"}, nil); err != nil { t.Fatalf("AttachAll returned error: %v", err) } if !mgr.IsActive("read") { diff --git a/internal/statsengine/filerank.go b/internal/statsengine/filerank.go index d24ab93..ef43c6e 100644 --- a/internal/statsengine/filerank.go +++ b/internal/statsengine/filerank.go @@ -191,12 +191,13 @@ func (s fileSnapshotInput) toSnapshot() FileSnapshot { } return FileSnapshot{ - Path: s.path, - Accesses: s.accesses, - BytesRead: s.bytesRead, - BytesWritten: s.bytesWritten, - AvgLatencyNs: avg, - MaxLatencyNs: s.maxLatency, + Path: s.path, + Accesses: s.accesses, + BytesRead: s.bytesRead, + BytesWritten: s.bytesWritten, + AvgLatencyNs: avg, + MaxLatencyNs: s.maxLatency, + TotalLatencyNs: s.totalLatency, } } diff --git a/internal/statsengine/process.go b/internal/statsengine/process.go index 3bfd019..b7eb6e7 100644 --- a/internal/statsengine/process.go +++ b/internal/statsengine/process.go @@ -174,11 +174,12 @@ func (s processSnapshotInput) toSnapshot(rateDiv float64) ProcessSnapshot { } return ProcessSnapshot{ - PID: s.pid, - Comm: s.comm, - Syscalls: s.count, - RatePerSec: safeRate(s.count, rateDiv), - Bytes: s.totalBytes, - AvgLatencyNs: avg, + PID: s.pid, + Comm: s.comm, + Syscalls: s.count, + RatePerSec: safeRate(s.count, rateDiv), + Bytes: s.totalBytes, + AvgLatencyNs: avg, + TotalLatencyNs: s.totalLatency, } } diff --git a/internal/statsengine/snapshot.go b/internal/statsengine/snapshot.go index f2b617b..7a95ab8 100644 --- a/internal/statsengine/snapshot.go +++ b/internal/statsengine/snapshot.go @@ -68,12 +68,13 @@ type SyscallSnapshot struct { Errors uint64 Bytes uint64 - LatencyMinNs uint64 - LatencyMaxNs uint64 - LatencyMeanNs float64 - LatencyP50Ns uint64 - LatencyP95Ns uint64 - LatencyP99Ns uint64 + LatencyMinNs uint64 + LatencyMaxNs uint64 + LatencyMeanNs float64 + TotalLatencyNs uint64 + LatencyP50Ns uint64 + LatencyP95Ns uint64 + LatencyP99Ns uint64 } // FileSnapshot is an aggregated per-file ranking entry. @@ -84,8 +85,9 @@ type FileSnapshot struct { BytesRead uint64 BytesWritten uint64 - AvgLatencyNs float64 - MaxLatencyNs uint64 + AvgLatencyNs float64 + MaxLatencyNs uint64 + TotalLatencyNs uint64 } // ProcessSnapshot is an aggregated per-process entry. @@ -97,7 +99,8 @@ type ProcessSnapshot struct { RatePerSec float64 Bytes uint64 - AvgLatencyNs float64 + AvgLatencyNs float64 + TotalLatencyNs uint64 } // HistogramBucketSnapshot is one bucket of a histogram snapshot. diff --git a/internal/statsengine/syscall.go b/internal/statsengine/syscall.go index 93931d1..2ef929a 100644 --- a/internal/statsengine/syscall.go +++ b/internal/statsengine/syscall.go @@ -195,18 +195,19 @@ func (s *syscallStats) ensurePercentiles() { func (s syscallSnapshotInput) toSnapshot(rateDiv float64) SyscallSnapshot { return SyscallSnapshot{ - TraceID: s.traceID, - Name: s.name, - Count: s.count, - RatePerSec: safeRate(s.count, rateDiv), - Errors: s.errorCount, - Bytes: s.totalBytes, - LatencyMinNs: s.minLatency, - LatencyMaxNs: s.maxLatency, - LatencyMeanNs: float64(s.totalLatency) / float64(maxU64(s.count, 1)), - LatencyP50Ns: s.p50Latency, - LatencyP95Ns: s.p95Latency, - LatencyP99Ns: s.p99Latency, + TraceID: s.traceID, + Name: s.name, + Count: s.count, + RatePerSec: safeRate(s.count, rateDiv), + Errors: s.errorCount, + Bytes: s.totalBytes, + LatencyMinNs: s.minLatency, + LatencyMaxNs: s.maxLatency, + LatencyMeanNs: float64(s.totalLatency) / float64(maxU64(s.count, 1)), + TotalLatencyNs: s.totalLatency, + LatencyP50Ns: s.p50Latency, + LatencyP95Ns: s.p95Latency, + LatencyP99Ns: s.p99Latency, } } diff --git a/internal/tui/dashboard/bubbles.go b/internal/tui/dashboard/bubbles.go index f50eba8..f4fa6d5 100644 --- a/internal/tui/dashboard/bubbles.go +++ b/internal/tui/dashboard/bubbles.go @@ -19,8 +19,9 @@ import ( type bubbleMetric string const ( - bubbleMetricCount bubbleMetric = "count" - bubbleMetricBytes bubbleMetric = "bytes" + bubbleMetricCount bubbleMetric = "count" + bubbleMetricBytes bubbleMetric = "bytes" + bubbleMetricDuration bubbleMetric = "duration" ) const ( @@ -32,20 +33,22 @@ const ( ) type bubbleDatum struct { - ID string - Label string - Count uint64 - Bytes uint64 - Detail string + ID string + Label string + Count uint64 + Bytes uint64 + Duration uint64 + Detail string } type bubbleNode struct { - ID string - Label string - Detail string - Count uint64 - Bytes uint64 - Value uint64 + ID string + Label string + Detail string + Count uint64 + Bytes uint64 + Duration uint64 + Value uint64 radiusSpring harmonica.Spring xSpring harmonica.Spring @@ -114,28 +117,35 @@ func (c *bubbleChart) SetViewport(width, height int) { data := make([]bubbleDatum, 0, len(c.nodes)) for _, node := range c.nodes { data = append(data, bubbleDatum{ - ID: node.ID, - Label: node.Label, - Count: node.Count, - Bytes: node.Bytes, - Detail: node.Detail, + ID: node.ID, + Label: node.Label, + Count: node.Count, + Bytes: node.Bytes, + Duration: node.Duration, + Detail: node.Detail, }) } c.SetData(data) } func (c *bubbleChart) SetMetric(metric bubbleMetric) { - if metric != bubbleMetricBytes { - metric = bubbleMetricCount + switch metric { + case bubbleMetricBytes, bubbleMetricDuration: + c.metric = metric + default: + c.metric = bubbleMetricCount } - c.metric = metric } func (c *bubbleChart) Metric() bubbleMetric { - if c.metric == bubbleMetricBytes { + switch c.metric { + case bubbleMetricBytes: return bubbleMetricBytes + case bubbleMetricDuration: + return bubbleMetricDuration + default: + return bubbleMetricCount } - return bubbleMetricCount } func (c *bubbleChart) SetStatusHint(hint string) { @@ -543,17 +553,25 @@ func (c *bubbleChart) statusLine(width int) string { } func (c *bubbleChart) metricLabel() string { - if c.Metric() == bubbleMetricBytes { + switch c.Metric() { + case bubbleMetricBytes: return "bytes" + case bubbleMetricDuration: + return "duration" + default: + return "events" } - return "events" } func (c *bubbleChart) formatMetricValue(node bubbleNode) string { - if c.Metric() == bubbleMetricBytes { + switch c.Metric() { + case bubbleMetricBytes: return formatBytes(float64(node.Bytes)) + case bubbleMetricDuration: + return formatDurationUintNs(node.Duration) + default: + return fmt.Sprintf("%d", node.Count) } - return fmt.Sprintf("%d", node.Count) } func (c *bubbleChart) palette() []color.Color { @@ -687,6 +705,7 @@ func buildBubbleTargets(data []bubbleDatum, metric bubbleMetric, width, height i Detail: datum.Detail, Count: datum.Count, Bytes: datum.Bytes, + Duration: datum.Duration, Value: value, targetRadius: targetRadius, targetX: targetX, @@ -767,10 +786,14 @@ func clampFloat(value, minValue, maxValue float64) float64 { } func bubbleValue(d bubbleDatum, metric bubbleMetric) uint64 { - if metric == bubbleMetricBytes { + switch metric { + case bubbleMetricBytes: return d.Bytes + case bubbleMetricDuration: + return d.Duration + default: + return d.Count } - return d.Count } func syscallBubbleData(snap *statsengine.Snapshot) []bubbleDatum { @@ -782,11 +805,12 @@ func syscallBubbleData(snap *statsengine.Snapshot) []bubbleDatum { for _, syscall := range rows { detail := fmt.Sprintf("rate %.1f/s, errors %d, p95 %s", syscall.RatePerSec, syscall.Errors, formatDurationUintNs(syscall.LatencyP95Ns)) data = append(data, bubbleDatum{ - ID: syscall.Name, - Label: syscall.Name, - Count: syscall.Count, - Bytes: syscall.Bytes, - Detail: detail, + ID: syscall.Name, + Label: syscall.Name, + Count: syscall.Count, + Bytes: syscall.Bytes, + Duration: syscall.TotalLatencyNs, + Detail: detail, }) } return data @@ -802,11 +826,12 @@ func filesDirBubbleData(snap *statsengine.Snapshot) []bubbleDatum { totalBytes := dir.BytesRead + dir.BytesWritten detail := fmt.Sprintf("dir %s, files %d, read %s, write %s", dir.Dir, dir.FileCount, formatBytes(float64(dir.BytesRead)), formatBytes(float64(dir.BytesWritten))) data = append(data, bubbleDatum{ - ID: dir.Dir, - Label: rootPathLabelFromFSPath(dir.Dir), - Count: dir.Accesses, - Bytes: totalBytes, - Detail: detail, + ID: dir.Dir, + Label: rootPathLabelFromFSPath(dir.Dir), + Count: dir.Accesses, + Bytes: totalBytes, + Duration: dir.TotalLatencyNs, + Detail: detail, }) } return data @@ -825,11 +850,12 @@ func processBubbleData(snap *statsengine.Snapshot) []bubbleDatum { } detail := fmt.Sprintf("pid %d, rate %.1f/s, avg %s", proc.PID, proc.RatePerSec, formatDurationNs(proc.AvgLatencyNs)) data = append(data, bubbleDatum{ - ID: fmt.Sprintf("%d/%s", proc.PID, proc.Comm), - Label: label, - Count: proc.Syscalls, - Bytes: proc.Bytes, - Detail: detail, + ID: fmt.Sprintf("%d/%s", proc.PID, proc.Comm), + Label: label, + Count: proc.Syscalls, + Bytes: proc.Bytes, + Duration: proc.TotalLatencyNs, + Detail: detail, }) } return data diff --git a/internal/tui/dashboard/files.go b/internal/tui/dashboard/files.go index df850ab..3b85a73 100644 --- a/internal/tui/dashboard/files.go +++ b/internal/tui/dashboard/files.go @@ -17,9 +17,10 @@ type DirSnapshot struct { BytesRead uint64 BytesWritten uint64 - AvgLatencyNs float64 - MaxLatencyNs uint64 - FileCount uint64 + AvgLatencyNs float64 + MaxLatencyNs uint64 + TotalLatencyNs uint64 + FileCount uint64 } type fileSortKey uint8 @@ -401,7 +402,6 @@ func aggregateFilesByDir(files []statsengine.FileSnapshot) []DirSnapshot { } dirs := make(map[string]DirSnapshot, len(files)) - weightedLatency := make(map[string]float64, len(files)) for _, f := range files { dir := filepath.Dir(f.Path) s := dirs[dir] @@ -413,14 +413,14 @@ func aggregateFilesByDir(files []statsengine.FileSnapshot) []DirSnapshot { s.MaxLatencyNs = f.MaxLatencyNs } s.FileCount++ - weightedLatency[dir] += f.AvgLatencyNs * float64(f.Accesses) + s.TotalLatencyNs += f.TotalLatencyNs dirs[dir] = s } out := make([]DirSnapshot, 0, len(dirs)) - for dir, s := range dirs { + for _, s := range dirs { if s.Accesses > 0 { - s.AvgLatencyNs = weightedLatency[dir] / float64(s.Accesses) + s.AvgLatencyNs = float64(s.TotalLatencyNs) / float64(s.Accesses) } out = append(out, s) } diff --git a/internal/tui/dashboard/files_test.go b/internal/tui/dashboard/files_test.go index 480c25f..848aa33 100644 --- a/internal/tui/dashboard/files_test.go +++ b/internal/tui/dashboard/files_test.go @@ -65,9 +65,9 @@ func TestFilePathWidthExpandsOnWideTerminal(t *testing.T) { func TestAggregateFilesByDir(t *testing.T) { files := []statsengine.FileSnapshot{ - {Path: "/var/log/a.log", Accesses: 10, BytesRead: 100, BytesWritten: 40, AvgLatencyNs: 100, MaxLatencyNs: 300}, - {Path: "/var/log/b.log", Accesses: 20, BytesRead: 200, BytesWritten: 60, AvgLatencyNs: 200, MaxLatencyNs: 500}, - {Path: "/tmp/c.log", Accesses: 5, BytesRead: 50, BytesWritten: 10, AvgLatencyNs: 1000, MaxLatencyNs: 1200}, + {Path: "/var/log/a.log", Accesses: 10, BytesRead: 100, BytesWritten: 40, AvgLatencyNs: 100, MaxLatencyNs: 300, TotalLatencyNs: 1000}, + {Path: "/var/log/b.log", Accesses: 20, BytesRead: 200, BytesWritten: 60, AvgLatencyNs: 200, MaxLatencyNs: 500, TotalLatencyNs: 4000}, + {Path: "/tmp/c.log", Accesses: 5, BytesRead: 50, BytesWritten: 10, AvgLatencyNs: 1000, MaxLatencyNs: 1200, TotalLatencyNs: 5000}, } got := aggregateFilesByDir(files) diff --git a/internal/tui/dashboard/icicle.go b/internal/tui/dashboard/icicle.go index 768783b..560bb2a 100644 --- a/internal/tui/dashboard/icicle.go +++ b/internal/tui/dashboard/icicle.go @@ -16,6 +16,7 @@ type icicleNode struct { fullPath string accesses uint64 bytes uint64 + duration uint64 children map[string]*icicleNode } @@ -123,6 +124,7 @@ func buildIcicleTree(dirs []DirSnapshot) *icicleNode { metricBytes := dir.BytesRead + dir.BytesWritten current.accesses += dir.Accesses current.bytes += metricBytes + current.duration += dir.TotalLatencyNs currentPath := "/" for _, segment := range segments { if segment == "" { @@ -144,6 +146,7 @@ func buildIcicleTree(dirs []DirSnapshot) *icicleNode { } child.accesses += dir.Accesses child.bytes += metricBytes + child.duration += dir.TotalLatencyNs current = child } } @@ -297,9 +300,14 @@ func icicleStatusLine(tiles []icicleTile, selected int, metric bubbleMetric) str selected = clampOffset(selected, len(tiles)) tile := tiles[selected] metricValue := icicleValue(tile.node, metric) - metricText := fmt.Sprintf("%d", metricValue) - if metric == bubbleMetricBytes { + var metricText string + switch metric { + case bubbleMetricBytes: metricText = formatBytes(float64(metricValue)) + case bubbleMetricDuration: + metricText = formatDurationUintNs(metricValue) + default: + metricText = fmt.Sprintf("%d", metricValue) } return fmt.Sprintf( "sel:%d/%d %s | %s=%s | accesses=%d | bytes=%s", @@ -317,8 +325,12 @@ func icicleValue(node *icicleNode, metric bubbleMetric) uint64 { if node == nil { return 0 } - if metric == bubbleMetricBytes { + switch metric { + case bubbleMetricBytes: return node.bytes + case bubbleMetricDuration: + return node.duration + default: + return node.accesses } - return node.accesses } diff --git a/internal/tui/dashboard/model.go b/internal/tui/dashboard/model.go index 850a483..42a9ad4 100644 --- a/internal/tui/dashboard/model.go +++ b/internal/tui/dashboard/model.go @@ -713,10 +713,14 @@ func sortedProcessSnapshots(rows []statsengine.ProcessSnapshot, metric bubbleMet } func processMetricValue(proc statsengine.ProcessSnapshot, metric bubbleMetric) uint64 { - if metric == bubbleMetricBytes { + switch metric { + case bubbleMetricBytes: return proc.Bytes + case bubbleMetricDuration: + return proc.TotalLatencyNs + default: + return proc.Syscalls } - return proc.Syscalls } func processSelectionLabel(proc statsengine.ProcessSnapshot) string { @@ -1289,10 +1293,15 @@ func nextVizMode(current tabVizMode, allowed []tabVizMode) tabVizMode { } func nextBubbleMetric(metric bubbleMetric) bubbleMetric { - if metric == bubbleMetricBytes { + // 3-way cycle: count (events) → bytes → duration → count. + switch metric { + case bubbleMetricCount: + return bubbleMetricBytes + case bubbleMetricBytes: + return bubbleMetricDuration + default: return bubbleMetricCount } - return bubbleMetricBytes } func tickCmd(d time.Duration) tea.Cmd { diff --git a/internal/tui/dashboard/treemap.go b/internal/tui/dashboard/treemap.go index dd62d13..03c2917 100644 --- a/internal/tui/dashboard/treemap.go +++ b/internal/tui/dashboard/treemap.go @@ -17,13 +17,14 @@ import ( const maxSyscallTreemapItems = 20 type syscallTreemapItem struct { - Name string - Count uint64 - Bytes uint64 - Errors uint64 - P95Ns uint64 - Detail string - Value uint64 + Name string + Count uint64 + Bytes uint64 + Duration uint64 + Errors uint64 + P95Ns uint64 + Detail string + Value uint64 } type syscallTreemapTile struct { @@ -111,11 +112,12 @@ func buildSyscallTreemapItems(snap *statsengine.Snapshot, metric bubbleMetric) [ items := make([]syscallTreemapItem, 0, len(syscalls)) for _, syscall := range syscalls { item := syscallTreemapItem{ - Name: syscall.Name, - Count: syscall.Count, - Bytes: syscall.Bytes, - Errors: syscall.Errors, - P95Ns: syscall.LatencyP95Ns, + Name: syscall.Name, + Count: syscall.Count, + Bytes: syscall.Bytes, + Duration: syscall.TotalLatencyNs, + Errors: syscall.Errors, + P95Ns: syscall.LatencyP95Ns, Detail: fmt.Sprintf( "rate %.1f/s, errors %d, p95 %s", syscall.RatePerSec, @@ -154,9 +156,10 @@ func buildFilesTreemapItems(snap *statsengine.Snapshot, metric bubbleMetric) []s pathLabel := rootPathLabelFromFSPath(dir.Dir) totalBytes := dir.BytesRead + dir.BytesWritten item := syscallTreemapItem{ - Name: pathLabel, - Count: dir.Accesses, - Bytes: totalBytes, + Name: pathLabel, + Count: dir.Accesses, + Bytes: totalBytes, + Duration: dir.TotalLatencyNs, Detail: fmt.Sprintf( "dir %s, files %d, read %s, write %s, max %s", dir.Dir, @@ -199,9 +202,10 @@ func buildProcessesTreemapItems(snap *statsengine.Snapshot, metric bubbleMetric) label = fmt.Sprintf("%d:%s", proc.PID, comm) } item := syscallTreemapItem{ - Name: label, - Count: proc.Syscalls, - Bytes: proc.Bytes, + Name: label, + Count: proc.Syscalls, + Bytes: proc.Bytes, + Duration: proc.TotalLatencyNs, Detail: fmt.Sprintf( "pid %d, rate %.1f/s, avg %s", proc.PID, @@ -231,10 +235,14 @@ func buildProcessesTreemapItems(snap *statsengine.Snapshot, metric bubbleMetric) } func treemapValue(item syscallTreemapItem, metric bubbleMetric) uint64 { - if metric == bubbleMetricBytes { + switch metric { + case bubbleMetricBytes: return item.Bytes + case bubbleMetricDuration: + return item.Duration + default: + return item.Count } - return item.Count } func layoutSyscallTreemap(items []syscallTreemapItem, x, y, w, h int) []syscallTreemapTile { @@ -421,13 +429,14 @@ func treemapStatusLine(items []syscallTreemapItem, selected int, metric bubbleMe } selected = clampOffset(selected, len(items)) item := items[selected] - metricValue := item.Count - if metric == bubbleMetricBytes { - metricValue = item.Bytes - } - metricText := fmt.Sprintf("%d", metricValue) - if metric == bubbleMetricBytes { - metricText = formatBytes(float64(metricValue)) + var metricText string + switch metric { + case bubbleMetricBytes: + metricText = formatBytes(float64(item.Bytes)) + case bubbleMetricDuration: + metricText = formatDurationUintNs(item.Duration) + default: + metricText = fmt.Sprintf("%d", item.Count) } status := fmt.Sprintf( "sel:%d/%d %s | %s=%s | bytes=%s", @@ -445,10 +454,14 @@ func treemapStatusLine(items []syscallTreemapItem, selected int, metric bubbleMe } func treemapMetricLabel(metric bubbleMetric) string { - if metric == bubbleMetricBytes { + switch metric { + case bubbleMetricBytes: return "bytes" + case bubbleMetricDuration: + return "duration" + default: + return "events" } - return "events" } func treemapPalette(isDark bool) []color.Color { diff --git a/internal/tui/flamegraph/controls.go b/internal/tui/flamegraph/controls.go index 2033416..bd588b3 100644 --- a/internal/tui/flamegraph/controls.go +++ b/internal/tui/flamegraph/controls.go @@ -57,8 +57,16 @@ func (m *Model) cycleFieldOrder() { } func (m *Model) toggleCountField() { - next := "bytes" - if m.countField == "bytes" { + // 3-way cycle: count → bytes → duration → count. + // durationToPrev (inter-syscall gap) is reachable via the CLI flag but + // kept out of the toolbar cycle for now. + var next string + switch m.countField { + case "count": + next = "bytes" + case "bytes": + next = "duration" + default: next = "count" } if m.liveTrie != nil { @@ -168,6 +176,8 @@ func (m Model) countFieldLabel() string { return "events" case "bytes": return "bytes" + case "duration": + return "duration" default: return m.countField } diff --git a/internal/tui/flamegraph/model_test.go b/internal/tui/flamegraph/model_test.go index c2626cd..e864e88 100644 --- a/internal/tui/flamegraph/model_test.go +++ b/internal/tui/flamegraph/model_test.go @@ -987,12 +987,20 @@ func TestControlMetricToggleReconfiguresLiveTrieCountField(t *testing.T) { } m = pressFlameKey(t, m, tea.KeyPressMsg{Code: []rune{'b'}[0], Text: "b"}) - if got, want := m.countField, "count"; got != want { + if got, want := m.countField, "duration"; got != want { t.Fatalf("expected model count field %q after second toggle, got %q", want, got) } - if got, want := liveTrie.CountField(), "count"; got != want { + if got, want := liveTrie.CountField(), "duration"; got != want { t.Fatalf("expected live trie count field %q after second toggle, got %q", want, got) } + + m = pressFlameKey(t, m, tea.KeyPressMsg{Code: []rune{'b'}[0], Text: "b"}) + if got, want := m.countField, "count"; got != want { + t.Fatalf("expected model count field %q after third toggle, got %q", want, got) + } + if got, want := liveTrie.CountField(), "count"; got != want { + t.Fatalf("expected live trie count field %q after third toggle, got %q", want, got) + } } func TestNewModelAlignsPresetIndexToLiveTrieFields(t *testing.T) { diff --git a/scripts/build-with-docker-el8.sh b/scripts/build-with-docker-el8.sh new file mode 100755 index 0000000..9ee35ba --- /dev/null +++ b/scripts/build-with-docker-el8.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Build the ior binary inside a Rocky Linux 8 container and write it as +# ior.el8 to the repo root. The container image is built once and reused on +# subsequent runs. Mirrors scripts/build-with-docker.sh but targets Rocky 8 +# so the produced binary runs on hosts with the older glibc shipped there. +# +# Usage: +# ./build-with-docker-el8.sh # build image + compile ior.el8 +# ./build-with-docker-el8.sh --build # force rebuild of the Docker image +# ./build-with-docker-el8.sh --run # skip image build, only compile ior.el8 +set -euo pipefail + +IMAGE="ior-builder:rocky8" +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +DOCKERFILE="${REPO_ROOT}/Dockerfile.el8" + +# Derive the Go version from go.mod so the Docker image always matches the +# minimum toolchain declared by the project. +GO_VERSION="$(grep '^go ' "${REPO_ROOT}/go.mod" | awk '{print $2}')" + +BUILD_IMAGE=true +RUN_BUILD=true + +for arg in "$@"; do + case "$arg" in + --build) BUILD_IMAGE=true; RUN_BUILD=false ;; + --run) BUILD_IMAGE=false; RUN_BUILD=true ;; + esac +done + +if $BUILD_IMAGE; then + echo "==> Building Docker image ${IMAGE} (this takes ~15-20 min on first run)..." + docker build --platform=linux/amd64 \ + --build-arg "GO_VERSION=${GO_VERSION}" \ + -f "${DOCKERFILE}" \ + -t "${IMAGE}" \ + "${REPO_ROOT}" + echo "==> Image build complete." +fi + +if $RUN_BUILD; then + echo "==> Compiling ior.el8 inside the container..." + # --privileged gives full host capabilities. + # tracefs (/sys/kernel/tracing) and BTF (/sys/kernel/btf) are not auto-mounted + # by Docker even with --privileged, so they are mounted explicitly: + # - /sys/kernel/tracing : mage generate reads available syscall tracepoints + # - /sys/kernel/btf : mage bpfBuild reads vmlinux BTF for vmlinux.h + docker run --rm \ + --platform=linux/amd64 \ + --privileged \ + -v /sys/kernel/tracing:/sys/kernel/tracing \ + -v /sys/kernel/btf:/sys/kernel/btf \ + -v "${REPO_ROOT}:/git/ior" \ + "${IMAGE}" + echo "==> Done. Binary written to ${REPO_ROOT}/ior.el8" +fi |
