diff options
| -rw-r--r-- | cmd/ioworkload/scenario_pidfd.go | 54 | ||||
| -rw-r--r-- | cmd/ioworkload/scenario_readwrite.go | 44 | ||||
| -rw-r--r-- | cmd/ioworkload/scenarios.go | 3 | ||||
| -rw-r--r-- | integrationtests/pidfd_test.go | 17 | ||||
| -rw-r--r-- | integrationtests/readwrite_test.go | 43 |
5 files changed, 160 insertions, 1 deletions
diff --git a/cmd/ioworkload/scenario_pidfd.go b/cmd/ioworkload/scenario_pidfd.go index 2aafced..fd3df5a 100644 --- a/cmd/ioworkload/scenario_pidfd.go +++ b/cmd/ioworkload/scenario_pidfd.go @@ -75,6 +75,26 @@ func pidfdGetfdFailure() error { return nil } +// pidfdSendSignal opens a pidfd for the current process and issues a +// pidfd_send_signal liveness probe against it. Signal 0 is a special "no signal" +// value: the kernel performs only the permission/existence checks and delivers +// NOTHING, so targeting our own process is completely safe (no signal handler +// runs and the process is not affected). The scenario exercises the enter +// fd_event (pidfd at args[0]) and the exit ret_event (UNCLASSIFIED) end-to-end. +func pidfdSendSignal() error { + pidfd, err := pidfdOpen(os.Getpid(), 0) + if err != nil { + return fmt.Errorf("pidfd_open self: %w", err) + } + defer syscall.Close(pidfd) + + // pidfd_send_signal(pidfd, sig=0, info=NULL, flags=0): liveness probe only. + if err := pidfdSendSignalRaw(pidfd, 0, 0, 0); err != nil { + return fmt.Errorf("pidfd_send_signal: %w", err) + } + return nil +} + func pidfdOpen(pid int, flags uintptr) (int, error) { syscallNr, err := pidfdOpenSyscallNr() if err != nil { @@ -112,6 +132,30 @@ func pidfdGetfdSyscallNr() (uintptr, error) { return pidfdGetfdSyscallNrForArch(runtime.GOARCH) } +func pidfdSendSignalRaw(pidfd int, sig int, info uintptr, flags uintptr) error { + syscallNr, err := pidfdSendSignalSyscallNr() + if err != nil { + return err + } + _, _, errno := syscall.Syscall6( + syscallNr, + uintptr(pidfd), + uintptr(sig), + info, + flags, + 0, + 0, + ) + if errno != 0 { + return errno + } + return nil +} + +func pidfdSendSignalSyscallNr() (uintptr, error) { + return pidfdSendSignalSyscallNrForArch(runtime.GOARCH) +} + func pidfdOpenSyscallNrForArch(arch string) (uintptr, error) { // Go's syscall package does not expose pidfd constants on all toolchains. switch arch { @@ -131,3 +175,13 @@ func pidfdGetfdSyscallNrForArch(arch string) (uintptr, error) { return 0, fmt.Errorf("pidfd_getfd syscall number not defined for GOARCH=%s", arch) } } + +func pidfdSendSignalSyscallNrForArch(arch string) (uintptr, error) { + // Go's syscall package does not expose pidfd constants on all toolchains. + switch arch { + case "amd64", "arm64": + return 424, nil + default: + return 0, fmt.Errorf("pidfd_send_signal syscall number not defined for GOARCH=%s", arch) + } +} diff --git a/cmd/ioworkload/scenario_readwrite.go b/cmd/ioworkload/scenario_readwrite.go index 21fba8e..53a366e 100644 --- a/cmd/ioworkload/scenario_readwrite.go +++ b/cmd/ioworkload/scenario_readwrite.go @@ -385,6 +385,50 @@ func readwriteReadaheadEbadf() error { return nil } +// readwriteFadvise64 opens a file, writes data, then calls fadvise64(2) on it. +// fadvise64(fd, offset, len, advice) declares an access-pattern hint for the +// file's page cache; offset=0/len=0 means "the whole file". It returns 0 on +// success / -1 on error and transfers NO bytes to userspace, so ior classifies +// it KindFd / UNCLASSIFIED (offset/len are hint parameters, not bytes moved). +// The scenario exercises the enter fd_event (fd at args[0]) and the exit +// ret_event end-to-end. unix.Fadvise wraps the per-arch fadvise64 syscall. +func readwriteFadvise64() error { + dir, cleanup, err := makeTempDir("readwrite-fadvise64") + if err != nil { + return err + } + defer cleanup() + + path := filepath.Join(dir, "fadvise64file.txt") + fd, err := syscall.Open(path, syscall.O_RDWR|syscall.O_CREAT, 0o644) + if err != nil { + return fmt.Errorf("open: %w", err) + } + defer syscall.Close(fd) + + if _, err := syscall.Write(fd, []byte("fadvise64 test data")); err != nil { + return fmt.Errorf("write: %w", err) + } + + // fadvise64(fd, offset=0, len=0, FADV_NORMAL): hint over the whole file. + if err := unix.Fadvise(fd, 0, 0, unix.FADV_NORMAL); err != nil { + return fmt.Errorf("fadvise64: %w", err) + } + return nil +} + +// readwriteFadvise64Ebadf calls fadvise64(2) on an invalid fd. +// The syscall fails with EBADF, but ior captures the enter_fadvise64 tracepoint +// because arguments are read on syscall entry before the kernel returns an error. +func readwriteFadvise64Ebadf() error { + for i := 0; i < 5; i++ { + if err := unix.Fadvise(99999, 0, 0, unix.FADV_NORMAL); err == nil { + return fmt.Errorf("expected EBADF, but fadvise64 succeeded") + } + } + return nil +} + // cachestatRange mirrors the kernel's struct cachestat_range, the second // cachestat(2) argument: { __u64 off; __u64 len; }. off=0/len=0 means "the // whole file". diff --git a/cmd/ioworkload/scenarios.go b/cmd/ioworkload/scenarios.go index 9cfda58..cc3123f 100644 --- a/cmd/ioworkload/scenarios.go +++ b/cmd/ioworkload/scenarios.go @@ -31,6 +31,8 @@ var scenarios = map[string]func() error{ "readwrite-pwrite-invalid": readwritePwriteInvalid, "readwrite-readahead": readwriteReadahead, "readwrite-readahead-ebadf": readwriteReadaheadEbadf, + "readwrite-fadvise64": readwriteFadvise64, + "readwrite-fadvise64-ebadf": readwriteFadvise64Ebadf, "readwrite-cachestat": readwriteCachestat, "retbytes-phase-a": retbytesPhaseA, "socket-basic": socketBasic, @@ -149,6 +151,7 @@ var scenarios = map[string]func() error{ "truncate-ftruncate-ebadf": truncateFtruncateEbadf, "pidfd-getfd-success": pidfdGetfdSuccess, "pidfd-getfd-failure": pidfdGetfdFailure, + "pidfd-send-signal": pidfdSendSignal, "security-keys-ptrace-perf": securityKeysPtracePerf, "security-landlock": securityLandlockCreateRuleset, "security-getrandom": securityGetrandom, diff --git a/integrationtests/pidfd_test.go b/integrationtests/pidfd_test.go index 8df2e13..cbe671d 100644 --- a/integrationtests/pidfd_test.go +++ b/integrationtests/pidfd_test.go @@ -2,7 +2,7 @@ package integrationtests import "testing" -var pidfdTraceArgs = []string{"-trace-syscalls", "pidfd_open,pidfd_getfd,openat,write,close"} +var pidfdTraceArgs = []string{"-trace-syscalls", "pidfd_open,pidfd_getfd,pidfd_send_signal,openat,write,close"} // TestPidfdGetfdSuccess asserts the resolved path of the pidfd_getfd event is // the duplicated source file, NOT the pidfd's anon_inode. @@ -36,3 +36,18 @@ func TestPidfdGetfdFailure(t *testing.T) { }, }, pidfdTraceArgs) } + +// TestPidfdSendSignal asserts ior captures the enter_pidfd_send_signal +// tracepoint when ioworkload issues a pidfd_send_signal liveness probe (sig 0) +// against its own pidfd. The BPF enter handler captures args[0] = the pidfd +// (FamilyIPC, KindFd); the exit is UNCLASSIFIED. Signal 0 delivers nothing, so +// the probe is safe to target self. +func TestPidfdSendSignal(t *testing.T) { + runScenarioResultWithIorArgs(t, "pidfd-send-signal", []ExpectedEvent{ + { + Tracepoint: "enter_pidfd_send_signal", + Comm: "ioworkload", + MinCount: 1, + }, + }, pidfdTraceArgs) +} diff --git a/integrationtests/readwrite_test.go b/integrationtests/readwrite_test.go index 8bfa539..636cc5c 100644 --- a/integrationtests/readwrite_test.go +++ b/integrationtests/readwrite_test.go @@ -302,6 +302,49 @@ func TestReadwriteReadaheadEbadf(t *testing.T) { }, 0) } +func TestReadwriteFadvise64(t *testing.T) { + // fadvise64(2) is KindFd / UNCLASSIFIED: it declares a page-cache access hint + // and returns 0/-1 (no byte count, transfers no bytes to userspace), so the + // tracer must attribute zero bytes (not misread the 0/-1 return or the + // offset/len hint parameters as a byte count) while still capturing the fd + // (args[0]) on enter and timing the syscall. + result, _ := runScenarioResult(t, "readwrite-fadvise64", []ExpectedEvent{ + { + PathContains: "fadvise64file.txt", + Tracepoint: "enter_fadvise64", + Comm: "ioworkload", + MinCount: 1, + }, + }) + exp := ExpectedEvent{ + PathContains: "fadvise64file.txt", + Tracepoint: "enter_fadvise64", + Comm: "ioworkload", + } + // UNCLASSIFIED: no byte count is attributed for a successful fadvise64. + assertEventBytesEqual(t, result, exp, 0) + // Timing is captured end-to-end (enter/exit paired into a duration). + assertEventDurationPositive(t, result, exp) +} + +func TestReadwriteFadvise64Ebadf(t *testing.T) { + // fadvise64 on an invalid fd fails with EBADF, but ior still captures the + // enter_fadvise64 tracepoint because arguments are read on syscall entry + // before the kernel returns the error. The UNCLASSIFIED -1 return must not + // be attributed as bytes. + result, _ := runScenarioResult(t, "readwrite-fadvise64-ebadf", []ExpectedEvent{ + { + Tracepoint: "enter_fadvise64", + Comm: "ioworkload", + MinCount: 1, + }, + }) + assertEventBytesEqual(t, result, ExpectedEvent{ + Tracepoint: "enter_fadvise64", + Comm: "ioworkload", + }, 0) +} + func TestReadwriteCachestat(t *testing.T) { // cachestat(2) is KindFd / UNCLASSIFIED: it queries page-cache residency for // a file and returns 0/-1 (no byte count, no I/O bytes to userspace), so the |
