From 97a1b7a907c3237445643b95496bf84404e5cf4c Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 30 May 2026 11:00:53 +0300 Subject: test(generate): lock in mkdirat path capture at args[1] Audit of mkdirat(2) found the tracing implementation correct: the generated BPF handler reads the pathname from args[1] (after the dirfd at args[0]), while the sibling mkdir(2) reads from args[0] (no dirfd). Both are KindPathname / FamilyFS with an UNCLASSIFIED return, consistent with mknod/mknodat and docs/syscall-tracing-plan.md. The arg index is data-driven from the kernel format via FieldNumber, so no source change was needed. Add lock-in unit tests and real-format fixtures asserting: - mkdirat captures the path from args[1], NOT args[0] (negative guard) - mkdir captures the path from args[0] - mkdirat/mkdir/mknodat share FamilyFS and KindPathname - FieldNumber(pathname) = 1 for mkdirat, 0 for mkdir Co-Authored-By: Claude Opus 4.8 --- internal/generate/codegen_test.go | 60 +++++++++++++++++++++++++++++++++++ internal/generate/testdata.go | 67 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) (limited to 'internal/generate') diff --git a/internal/generate/codegen_test.go b/internal/generate/codegen_test.go index 880d67d..cd34c63 100644 --- a/internal/generate/codegen_test.go +++ b/internal/generate/codegen_test.go @@ -594,6 +594,66 @@ func TestGenerateMqOpenHandler(t *testing.T) { requireContains(t, output, "ev->flags = ctx->args[1];") } +// TestGenerateMkdiratHandlerCapturesPathFromArgs1 locks in that mkdirat(2) is a +// KindPathname event whose path is read from args[1]. mkdirat(dirfd, pathname, +// mode) places the dirfd at args[0] and the real filesystem path at args[1]; +// reading args[0] would capture the dir fd (often AT_FDCWD) instead of the path. +// The arg index is data-driven from the kernel format (FieldNumber of the +// "pathname" field), so this guards against a regression in that derivation. +func TestGenerateMkdiratHandlerCapturesPathFromArgs1(t *testing.T) { + output := generateFromPair(t, FormatMkdirat, FormatExitMkdirat) + + requireContains(t, output, `SEC("tracepoint/syscalls/sys_enter_mkdirat")`) + requireContains(t, output, "struct path_event *ev") + requireContains(t, output, "ev->event_type = ENTER_PATH_EVENT;") + requireContains(t, output, "ev->trace_id = SYS_ENTER_MKDIRAT;") + requireContains(t, output, "bpf_probe_read_user_str(ev->pathname, sizeof(ev->pathname), (void*)ctx->args[1]);") + // Negative guard: the path must NOT be read from args[0] (the dirfd). + requireNotContains(t, output, "bpf_probe_read_user_str(ev->pathname, sizeof(ev->pathname), (void*)ctx->args[0]);") + // Return value is a 0/-1 status code, not a byte count: UNCLASSIFIED. + requireContains(t, output, "ev->trace_id = SYS_EXIT_MKDIRAT;") + requireContains(t, output, "ev->ret_type = UNCLASSIFIED;") +} + +// TestGenerateMkdirHandlerCapturesPathFromArgs0 locks in that the sibling +// mkdir(2) — which has no dirfd — reads its pathname from args[0]. Contrasting +// it with mkdirat above ensures the two never collapse onto a shared arg index. +func TestGenerateMkdirHandlerCapturesPathFromArgs0(t *testing.T) { + output := generateFromPair(t, FormatMkdir, FormatExitMkdir) + + requireContains(t, output, `SEC("tracepoint/syscalls/sys_enter_mkdir")`) + requireContains(t, output, "struct path_event *ev") + requireContains(t, output, "ev->trace_id = SYS_ENTER_MKDIR;") + requireContains(t, output, "bpf_probe_read_user_str(ev->pathname, sizeof(ev->pathname), (void*)ctx->args[0]);") + requireContains(t, output, "ev->trace_id = SYS_EXIT_MKDIR;") + requireContains(t, output, "ev->ret_type = UNCLASSIFIED;") +} + +// TestMkdiratFamilyAndKindMatchSiblings locks in that mkdirat and its siblings +// mkdir/mknodat share the same FS family and pathname kind classification. A +// drift here (e.g. mkdirat slipping into Misc) would split related directory/ +// node-creation syscalls across families in the dashboard. +func TestMkdiratFamilyAndKindMatchSiblings(t *testing.T) { + for _, syscall := range []string{"mkdirat", "mkdir", "mknodat"} { + if got := ClassifySyscallFamily("sys_enter_" + syscall); got != FamilyFS { + t.Errorf("%s family = %q, want %q", syscall, got, FamilyFS) + } + } + + mkdirat := mustParseOne(t, FormatMkdirat) + if r := ClassifyFormat(&mkdirat); r.Kind != KindPathname || r.PathnameField != "pathname" { + t.Errorf("mkdirat classified as kind=%d field=%q, want KindPathname/pathname", r.Kind, r.PathnameField) + } + if n := mkdirat.FieldNumber("pathname"); n != 1 { + t.Errorf("mkdirat FieldNumber(pathname) = %d, want 1", n) + } + + mkdir := mustParseOne(t, FormatMkdir) + if n := mkdir.FieldNumber("pathname"); n != 0 { + t.Errorf("mkdir FieldNumber(pathname) = %d, want 0", n) + } +} + func TestGenerateExecHandler(t *testing.T) { output := generateFromPair(t, FormatExecveat, FormatExitExecveat) diff --git a/internal/generate/testdata.go b/internal/generate/testdata.go index 25392b1..5b38cfd 100644 --- a/internal/generate/testdata.go +++ b/internal/generate/testdata.go @@ -2052,3 +2052,70 @@ format: print fmt: "0x%lx", REC->ret ` + +// FormatMkdirat is real sysfs data for mkdirat(2): the pathname argument sits +// at args[1], AFTER the dirfd at args[0]. Captured from a Linux 7.0 kernel, +// which also exposes the __data_loc __pathname_val trailing field. The +// generator must read the path from args[1] (the dfd at args[0] is NOT a path). +const FormatMkdirat = `name: sys_enter_mkdirat +ID: 899 +format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:int __syscall_nr; offset:8; size:4; signed:1; + field:int dfd; offset:16; size:8; signed:0; + field:const char * pathname; offset:24; size:8; signed:0; + field:umode_t mode; offset:32; size:8; signed:0; + field:__data_loc char[] __pathname_val; offset:40; size:4; signed:0; + +print fmt: "dfd: 0x%08lx, pathname: 0x%08lx, mode: 0x%08lx", ((unsigned long)(REC->dfd)), ((unsigned long)(REC->pathname)), ((unsigned long)(REC->mode)) +` + +const FormatExitMkdirat = `name: sys_exit_mkdirat +ID: 898 +format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:int __syscall_nr; offset:8; size:4; signed:1; + field:long ret; offset:16; size:8; signed:1; + +print fmt: "0x%lx", REC->ret +` + +// FormatMkdir is the sibling mkdir(2): it has NO dirfd, so the pathname is the +// first argument at args[0]. This is the key contrast with mkdirat above and +// guards against accidentally sharing a single arg index between the two. +const FormatMkdir = `name: sys_enter_mkdir +ID: 901 +format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:int __syscall_nr; offset:8; size:4; signed:1; + field:const char * pathname; offset:16; size:8; signed:0; + field:umode_t mode; offset:24; size:8; signed:0; + +print fmt: "pathname: 0x%08lx, mode: 0x%08lx", ((unsigned long)(REC->pathname)), ((unsigned long)(REC->mode)) +` + +const FormatExitMkdir = `name: sys_exit_mkdir +ID: 900 +format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:int __syscall_nr; offset:8; size:4; signed:1; + field:long ret; offset:16; size:8; signed:1; + +print fmt: "0x%lx", REC->ret +` -- cgit v1.2.3