From 18c8e8f5f3d7cc9bbbcb9b8b9be65477110363a7 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Fri, 29 May 2026 22:32:55 +0300 Subject: test(generate): lock in access/faccessat path classification Audit of access(2) found the tracing implementation already correct: FS family, KindPathname capturing the real path, and an UNCLASSIFIED int 0/-1 ret_event on exit. access(2) captures its path from args[0] (no dirfd), while siblings faccessat(2)/faccessat2(2) capture from args[1] (dfd precedes the path). mage generate produces no diff and the docs/integration coverage already match. Add unit lock-in tests mirroring prior syscall audits: - FormatAccess/FormatFaccessat tracepoint fixtures (real kernel formats). - classify tests asserting both classify as KindPathname/"filename". - family_test cluster asserting access/faccessat/faccessat2 stay FamilyFS. - codegen test proving access reads ctx->args[0] while faccessat reads ctx->args[1], guarding against a wrong-arg or dropped-path regression. Co-Authored-By: Claude Opus 4.8 --- internal/generate/classify_test.go | 31 ++++++++++++++++++++++++++ internal/generate/codegen_test.go | 29 ++++++++++++++++++++++++ internal/generate/family_test.go | 13 +++++++++++ internal/generate/testdata.go | 45 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+) diff --git a/internal/generate/classify_test.go b/internal/generate/classify_test.go index 838d643..25f1960 100644 --- a/internal/generate/classify_test.go +++ b/internal/generate/classify_test.go @@ -103,6 +103,37 @@ func TestClassifyPathnameUtime(t *testing.T) { } } +// TestClassifyPathnameAccess locks in that access(2)'s args[0] argument +// (kernel field "filename") is captured as a real filesystem path. access(2) +// checks the calling process's permissions for a file by path; the path is at +// args[0] (there is no dirfd), so it must classify as KindPathname with the +// path wired to the "filename" field. If this regresses to a non-path kind, +// access's pathname would silently stop being captured. +func TestClassifyPathnameAccess(t *testing.T) { + r := classifyFromData(t, FormatAccess) + if r.Kind != KindPathname { + t.Errorf("access: got kind %d, want KindPathname", r.Kind) + } + if r.PathnameField != "filename" { + t.Errorf("access: PathnameField = %q, want filename", r.PathnameField) + } +} + +// TestClassifyPathnameFaccessat locks in that faccessat(2) — access(2)'s +// dirfd-relative sibling — also classifies as KindPathname with the path wired +// to the "filename" field. The path is at args[1] (args[0] is the dirfd); the +// argument-index difference from access(2) is verified separately in the +// codegen tests (TestGenerateAccessFaccessatHandlers). +func TestClassifyPathnameFaccessat(t *testing.T) { + r := classifyFromData(t, FormatFaccessat) + if r.Kind != KindPathname { + t.Errorf("faccessat: got kind %d, want KindPathname", r.Kind) + } + if r.PathnameField != "filename" { + t.Errorf("faccessat: PathnameField = %q, want filename", r.PathnameField) + } +} + func TestClassifyNameRename(t *testing.T) { r := classifyFromData(t, FormatRename) if r.Kind != KindName { diff --git a/internal/generate/codegen_test.go b/internal/generate/codegen_test.go index 814f114..de64e3c 100644 --- a/internal/generate/codegen_test.go +++ b/internal/generate/codegen_test.go @@ -602,6 +602,35 @@ func TestGeneratePathnameHandler(t *testing.T) { requireContains(t, output, "bpf_probe_read_user_str(ev->pathname, sizeof(ev->pathname), (void*)ctx->args[0]);") } +// TestGenerateAccessFaccessatHandlers locks in the generated BPF C for +// access(2) and its dirfd-relative sibling faccessat(2). Both capture a real +// path into a path_event's pathname member, but from DIFFERENT argument slots: +// access(2) has no dirfd so its path is at args[0], whereas faccessat(2) takes +// dfd at args[0] and the path at args[1]. This guards against a regression that +// would read the wrong arg (e.g. capturing faccessat's dirfd as a path, or +// dropping access's path entirely). The exit side is a ret_event (int 0/-1, +// UNCLASSIFIED) — verified via the shared ret_event handler shape. +func TestGenerateAccessFaccessatHandlers(t *testing.T) { + exitAccess := strings.Replace(FormatExitRead, "sys_exit_read", "sys_exit_access", 1) + exitAccess = strings.Replace(exitAccess, "ID: 843", "ID: 816", 1) + accessOut := generateFromPair(t, FormatAccess, exitAccess) + requireContains(t, accessOut, `SEC("tracepoint/syscalls/sys_enter_access")`) + requireContains(t, accessOut, "struct path_event *ev") + requireContains(t, accessOut, "ev->event_type = ENTER_PATH_EVENT;") + requireContains(t, accessOut, "ev->trace_id = SYS_ENTER_ACCESS;") + // access(2): path (filename) is at args[0] — no dirfd precedes it. + requireContains(t, accessOut, "bpf_probe_read_user_str(ev->pathname, sizeof(ev->pathname), (void*)ctx->args[0]);") + + exitFaccessat := strings.Replace(FormatExitRead, "sys_exit_read", "sys_exit_faccessat", 1) + exitFaccessat = strings.Replace(exitFaccessat, "ID: 843", "ID: 820", 1) + faccessatOut := generateFromPair(t, FormatFaccessat, exitFaccessat) + requireContains(t, faccessatOut, `SEC("tracepoint/syscalls/sys_enter_faccessat")`) + requireContains(t, faccessatOut, "struct path_event *ev") + requireContains(t, faccessatOut, "ev->trace_id = SYS_ENTER_FACCESSAT;") + // faccessat(2): dfd is at args[0], so the path (filename) is at args[1]. + requireContains(t, faccessatOut, "bpf_probe_read_user_str(ev->pathname, sizeof(ev->pathname), (void*)ctx->args[1]);") +} + func TestGenerateFcntlHandler(t *testing.T) { output := generateFromPair(t, FormatFcntl, FormatExitFcntl) diff --git a/internal/generate/family_test.go b/internal/generate/family_test.go index 47b7685..cba4f8e 100644 --- a/internal/generate/family_test.go +++ b/internal/generate/family_test.go @@ -59,6 +59,19 @@ func TestClassifySyscallFamily(t *testing.T) { {"sys_exit_gettimeofday", FamilyTime}, {"sys_enter_sched_yield", FamilySched}, {"sys_enter_openat", FamilyFS}, + // access(2) checks the calling process's permissions for a file named by + // a real filesystem path (pathname at args[0]; no dirfd). Its siblings + // faccessat(2)/faccessat2(2) perform the same check relative to a dirfd + // (path at args[1]). All three are filesystem-metadata syscalls and must + // stay in FamilyFS together — assert the whole cluster so a stray + // reclassification of any one trips this test. Keep in sync with the FS + // list in docs/syscall-tracing-plan.md. + {"sys_enter_access", FamilyFS}, + {"sys_exit_access", FamilyFS}, + {"sys_enter_faccessat", FamilyFS}, + {"sys_exit_faccessat", FamilyFS}, + {"sys_enter_faccessat2", FamilyFS}, + {"sys_exit_faccessat2", FamilyFS}, // utime(2)/utimes(2) change a file's access and modification times by // path (filename at args[0] is a real filesystem path, captured as // KindPathname). They are filesystem-metadata syscalls and share diff --git a/internal/generate/testdata.go b/internal/generate/testdata.go index 8c2b1ee..0941f97 100644 --- a/internal/generate/testdata.go +++ b/internal/generate/testdata.go @@ -389,6 +389,51 @@ format: print fmt: "filename: 0x%08lx, times: 0x%08lx", ((unsigned long)(REC->filename)), ((unsigned long)(REC->times)) ` +// FormatAccess mirrors the real sys_enter_access tracepoint format. access(2) +// checks the calling process's permissions for a file; its first argument +// "filename" is a genuine const char * filesystem path at args[0] (there is no +// dirfd), so access classifies as KindPathname with PathnameField "filename" +// and the path is captured from args[0]. The trailing __data_loc field is the +// kernel's own copy of the string and is ignored by the classifier. +const FormatAccess = `name: sys_enter_access +ID: 817 +format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:int __syscall_nr; offset:8; size:4; signed:1; + field:const char * filename; offset:16; size:8; signed:0; + field:int mode; offset:24; size:8; signed:0; + field:__data_loc char[] __filename_val; offset:32; size:4; signed:0; + +print fmt: "filename: 0x%08lx \"%s\", mode: 0x%08lx", ((unsigned long)(REC->filename)), __get_str(__filename_val), ((unsigned long)(REC->mode)) +` + +// FormatFaccessat mirrors the real sys_enter_faccessat tracepoint format. +// faccessat(2) is access(2) relative to a directory file descriptor: its first +// argument is "dfd" (the dirfd, args[0]) and the real path "filename" is at +// args[1]. It must therefore classify as KindPathname with PathnameField +// "filename" while capturing the path from args[1] (not args[0]) — the key +// difference from access(2), whose path is at args[0]. +const FormatFaccessat = `name: sys_enter_faccessat +ID: 821 +format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; + field:int common_pid; offset:4; size:4; signed:1; + + field:int __syscall_nr; offset:8; size:4; signed:1; + field:int dfd; offset:16; size:8; signed:0; + field:const char * filename; offset:24; size:8; signed:0; + field:int mode; offset:32; size:8; signed:0; + field:__data_loc char[] __filename_val; offset:40; size:4; signed:0; + +print fmt: "dfd: 0x%08lx, filename: 0x%08lx \"%s\", mode: 0x%08lx", ((unsigned long)(REC->dfd)), ((unsigned long)(REC->filename)), __get_str(__filename_val), ((unsigned long)(REC->mode)) +` + const FormatDup3 = `name: sys_enter_dup3 ID: 922 format: -- cgit v1.2.3