From 9310b54d439d4a1a8d4d337987aa63884df0af76 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Wed, 20 May 2026 11:38:19 +0300 Subject: feat: add syscall aggregate sampling infrastructure (task 17) --- internal/flags/flags.go | 59 +++++++++++++++++++++------ internal/flags/sampling.go | 88 +++++++++++++++++++++++++++++++++++++++++ internal/flags/sampling_test.go | 78 ++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 13 deletions(-) create mode 100644 internal/flags/sampling.go create mode 100644 internal/flags/sampling_test.go (limited to 'internal/flags') diff --git a/internal/flags/flags.go b/internal/flags/flags.go index 285569c..3a6456a 100644 --- a/internal/flags/flags.go +++ b/internal/flags/flags.go @@ -12,6 +12,7 @@ import ( appconfig "ior/internal/config" "ior/internal/globalfilter" "ior/internal/tracepoints" + "ior/internal/types" ) // Config captures runtime configuration parsed from CLI flags. @@ -72,6 +73,13 @@ type Config struct { // ResetTimer is the interval at which aggregate dashboard state (flamegraph // trie and stats engine) is automatically cleared; 0 disables auto-reset. ResetTimer time.Duration + // SyscallFamilySamplingRates controls in-kernel syscall sampling by family. + // Rate semantics: 0 aggregate-only, 1 emit every event, N>1 emit 1-in-N events. + SyscallFamilySamplingRates map[types.SyscallFamily]uint32 + // SyscallSamplingRates controls in-kernel syscall sampling by syscall name. + // Keys use syscall names (for example "futex"), not tracepoint names. + // Rate semantics: 0 aggregate-only, 1 emit every event, N>1 emit 1-in-N events. + SyscallSamplingRates map[string]uint32 // ShowVersion prints the banner plus version and exits without running. ShowVersion bool @@ -86,16 +94,18 @@ const DefaultResetTimer = 30 * time.Second // NewFlags returns a configuration instance initialized with project defaults. func NewFlags() Config { return Config{ - PidFilter: -1, - TidFilter: -1, - EventMapSize: appconfig.DefaultEventMapSize, - Duration: 900, - LiveInterval: 200 * time.Millisecond, - TUIFastRefreshInterval: 250 * time.Millisecond, - TUIExportEnable: true, - CollapsedFields: []string{"comm", "tracepoint", "path"}, - CountField: "count", - ResetTimer: DefaultResetTimer, + PidFilter: -1, + TidFilter: -1, + EventMapSize: appconfig.DefaultEventMapSize, + Duration: 900, + LiveInterval: 200 * time.Millisecond, + TUIFastRefreshInterval: 250 * time.Millisecond, + TUIExportEnable: true, + CollapsedFields: []string{"comm", "tracepoint", "path"}, + CountField: "count", + ResetTimer: DefaultResetTimer, + SyscallFamilySamplingRates: make(map[types.SyscallFamily]uint32), + SyscallSamplingRates: make(map[string]uint32), } } @@ -121,6 +131,8 @@ func (f Config) Clone() Config { out.TracepointSelector = f.TracepointSelector.Clone() out.CollapsedFields = slices.Clone(f.CollapsedFields) out.GlobalFilter = f.GlobalFilter.Clone() + out.SyscallFamilySamplingRates = cloneFamilySamplingRates(f.SyscallFamilySamplingRates) + out.SyscallSamplingRates = cloneSyscallSamplingRates(f.SyscallSamplingRates) return out } @@ -136,7 +148,7 @@ func Parse() (Config, error) { // fresh FlagSet and custom argument slices without touching global state. func parseFromFlagSet(fs *flag.FlagSet, args []string) (Config, error) { cfg := NewFlags() - tpsAttach, tpsExclude, fields := registerFlags(fs, &cfg) + tpsAttach, tpsExclude, fields, familySampling, syscallSampling := registerFlags(fs, &cfg) if err := fs.Parse(args); err != nil { return Config{}, err @@ -144,6 +156,9 @@ func parseFromFlagSet(fs *flag.FlagSet, args []string) (Config, error) { if err := resolvePostParseFields(&cfg, tpsAttach, tpsExclude, fields); err != nil { return Config{}, err } + if err := resolveSamplingRates(&cfg, familySampling, syscallSampling); err != nil { + return Config{}, err + } if err := validateConfig(cfg); err != nil { return Config{}, err } @@ -152,7 +167,7 @@ func parseFromFlagSet(fs *flag.FlagSet, args []string) (Config, error) { // registerFlags binds all CLI flags to cfg and returns the string pointers for // fields that require post-parse resolution (tracepoint regexes, collapse fields). -func registerFlags(fs *flag.FlagSet, cfg *Config) (tpsAttach, tpsExclude, fields *string) { +func registerFlags(fs *flag.FlagSet, cfg *Config) (tpsAttach, tpsExclude, fields, familySampling, syscallSampling *string) { validFields := collapse.ValidFields() validCounts := collapse.ValidCountFields() @@ -180,12 +195,16 @@ func registerFlags(fs *flag.FlagSet, cfg *Config) (tpsAttach, tpsExclude, fields fs.BoolVar(&cfg.TUIExportEnable, "tuiExport", cfg.TUIExportEnable, "Enable TUI CSV snapshot export files (separate from Parquet recording)") fs.DurationVar(&cfg.ResetTimer, "resetTimer", cfg.ResetTimer, "Auto-reset interval for aggregate dashboard state (flamegraph trie + stats engine); set to 0 to disable") + familySampling = fs.String("syscall-sampling-families", "", + "Per-family sampling rates, for example \"Time=100,Misc=0\" (0=aggregate-only, 1=all, N=1-in-N)") + syscallSampling = fs.String("syscall-sampling-syscalls", "", + "Per-syscall sampling rates, for example \"futex=0,clock_gettime=200\" (overrides family rates)") fs.BoolVar(&cfg.ShowVersion, "version", false, "Print version banner and exit") fields = fs.String("fields", "", fmt.Sprintf("Comma separated list of fields to collapse, valid are: %v", validFields)) fs.StringVar(&cfg.CountField, "count", cfg.CountField, fmt.Sprintf("Count field to collapse, valid are: %v", validCounts)) - return tpsAttach, tpsExclude, fields + return tpsAttach, tpsExclude, fields, familySampling, syscallSampling } // resolvePostParseFields compiles the tracepoint selector and collapse field @@ -220,6 +239,20 @@ func resolvePostParseFields(cfg *Config, tpsAttach, tpsExclude, fields *string) return nil } +func resolveSamplingRates(cfg *Config, familySampling, syscallSampling *string) error { + familyRates, err := parseFamilySamplingRates(*familySampling) + if err != nil { + return err + } + syscallRates, err := parseSyscallSamplingRates(*syscallSampling) + if err != nil { + return err + } + cfg.SyscallFamilySamplingRates = familyRates + cfg.SyscallSamplingRates = syscallRates + return nil +} + // validateConfig checks numeric/duration bounds that cannot be enforced by the // flag package itself and returns a descriptive error on the first violation. func validateConfig(cfg Config) error { diff --git a/internal/flags/sampling.go b/internal/flags/sampling.go new file mode 100644 index 0000000..19b4f9d --- /dev/null +++ b/internal/flags/sampling.go @@ -0,0 +1,88 @@ +package flags + +import ( + "fmt" + "strconv" + "strings" + + "ior/internal/types" +) + +func cloneFamilySamplingRates(in map[types.SyscallFamily]uint32) map[types.SyscallFamily]uint32 { + out := make(map[types.SyscallFamily]uint32, len(in)) + for family, rate := range in { + out[family] = rate + } + return out +} + +func cloneSyscallSamplingRates(in map[string]uint32) map[string]uint32 { + out := make(map[string]uint32, len(in)) + for syscall, rate := range in { + out[syscall] = rate + } + return out +} + +func parseFamilySamplingRates(raw string) (map[types.SyscallFamily]uint32, error) { + entries, err := parseSamplingEntries(raw) + if err != nil { + return nil, err + } + out := make(map[types.SyscallFamily]uint32, len(entries)) + for key, rate := range entries { + family, ok := types.ParseSyscallFamily(key) + if !ok { + return nil, fmt.Errorf("invalid syscall family in sampling map: %q", key) + } + out[family] = rate + } + return out, nil +} + +func parseSyscallSamplingRates(raw string) (map[string]uint32, error) { + entries, err := parseSamplingEntries(raw) + if err != nil { + return nil, err + } + out := make(map[string]uint32, len(entries)) + for syscall, rate := range entries { + syscall = strings.ToLower(strings.TrimSpace(syscall)) + if syscall == "" { + return nil, fmt.Errorf("invalid syscall sampling key %q", syscall) + } + if _, ok := types.EnterTraceIDByName(syscall); !ok { + return nil, fmt.Errorf("invalid syscall in sampling map: %q", syscall) + } + out[syscall] = rate + } + return out, nil +} + +func parseSamplingEntries(raw string) (map[string]uint32, error) { + out := make(map[string]uint32) + raw = strings.TrimSpace(raw) + if raw == "" { + return out, nil + } + for _, part := range strings.Split(raw, ",") { + part = strings.TrimSpace(part) + if part == "" { + continue + } + key, valueRaw, ok := strings.Cut(part, "=") + if !ok { + return nil, fmt.Errorf("invalid sampling entry %q: expected name=rate", part) + } + key = strings.TrimSpace(key) + if key == "" { + return nil, fmt.Errorf("invalid sampling entry %q: empty name", part) + } + rate, err := strconv.ParseUint(strings.TrimSpace(valueRaw), 10, 32) + if err != nil { + return nil, fmt.Errorf("invalid sampling rate for %q: %w", key, err) + } + out[key] = uint32(rate) + } + return out, nil +} diff --git a/internal/flags/sampling_test.go b/internal/flags/sampling_test.go new file mode 100644 index 0000000..0966c0f --- /dev/null +++ b/internal/flags/sampling_test.go @@ -0,0 +1,78 @@ +package flags + +import ( + "strings" + "testing" + + "ior/internal/types" +) + +func TestParseSamplingRates(t *testing.T) { + cfg, err := parseForTest(t, + "-syscall-sampling-families", "Time=100,misc=0", + "-syscall-sampling-syscalls", "futex=0,clock_gettime=7", + ) + if err != nil { + t.Fatalf("parse returned error: %v", err) + } + + if got := cfg.SyscallFamilySamplingRates[types.FamilyTime]; got != 100 { + t.Fatalf("Time family rate = %d, want 100", got) + } + if got := cfg.SyscallFamilySamplingRates[types.FamilyMisc]; got != 0 { + t.Fatalf("Misc family rate = %d, want 0", got) + } + if got := cfg.SyscallSamplingRates["futex"]; got != 0 { + t.Fatalf("futex rate = %d, want 0", got) + } + if got := cfg.SyscallSamplingRates["clock_gettime"]; got != 7 { + t.Fatalf("clock_gettime rate = %d, want 7", got) + } +} + +func TestParseSamplingFamilyRejectsUnknown(t *testing.T) { + _, err := parseForTest(t, "-syscall-sampling-families", "Nope=4") + if err == nil { + t.Fatal("expected parse error") + } + if !strings.Contains(err.Error(), "invalid syscall family") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestParseSamplingSyscallRejectsMalformedEntry(t *testing.T) { + _, err := parseForTest(t, "-syscall-sampling-syscalls", "futex") + if err == nil { + t.Fatal("expected parse error") + } + if !strings.Contains(err.Error(), "expected name=rate") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestParseSamplingSyscallRejectsUnknownName(t *testing.T) { + _, err := parseForTest(t, "-syscall-sampling-syscalls", "not_a_syscall=2") + if err == nil { + t.Fatal("expected parse error") + } + if !strings.Contains(err.Error(), "invalid syscall in sampling map") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestCloneDeepCopiesSamplingMaps(t *testing.T) { + cfg := NewFlags() + cfg.SyscallFamilySamplingRates[types.FamilyTime] = 5 + cfg.SyscallSamplingRates["futex"] = 9 + + cloned := cfg.Clone() + cloned.SyscallFamilySamplingRates[types.FamilyTime] = 100 + cloned.SyscallSamplingRates["futex"] = 1 + + if got := cfg.SyscallFamilySamplingRates[types.FamilyTime]; got != 5 { + t.Fatalf("original family rate mutated: got %d, want 5", got) + } + if got := cfg.SyscallSamplingRates["futex"]; got != 9 { + t.Fatalf("original syscall rate mutated: got %d, want 9", got) + } +} -- cgit v1.2.3