diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-30 16:20:36 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-30 16:20:36 +0300 |
| commit | 65d276b67e65427e8cd25fd45b142e6fff1259f0 (patch) | |
| tree | 3e7d60dd6e0862fbc8060d98f340135db63edc4d | |
| parent | 4e6d9e1a6c74e0bdb4d89df10ef22a664f84737f (diff) | |
sendfile64: capture out_fd instead of dropping both fds
sendfile64(out_fd, in_fd, offset, count) transfers bytes between two file
descriptors in the kernel and returns the number of bytes written to out_fd.
Its tracepoint fields carry no field literally named "fd", so it fell through
to KindNull and captured no descriptor at all - inconsistent with its sibling
copy_file_range (KindFd) and the read/write/sendto/recvfrom families.
Add an explicit sys_enter_sendfile64 -> KindFd override that captures out_fd
(args[0], the destination the bytes are written to), matching the single-fd
KindFd convention. The return value stays TransferClassified, consistent with
copy_file_range/splice/tee/vmsplice. Family stays Network (sendfile is
historically socket-oriented; copy_file_range=FS is pure file-to-file).
Update docs/syscall-tracing-plan.md (move sendfile64 from null to fd kind),
regenerate C/Go artifacts, fix the phase-A classify assertion, and add
TestClassifySendfile64CapturesOutFd as a lock-in + negative test. The existing
TestRetbytesPhaseA integration test still passes with the runtime change.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
| -rw-r--r-- | docs/syscall-tracing-plan.md | 4 | ||||
| -rw-r--r-- | internal/c/generated_tracepoints.c | 7 | ||||
| -rw-r--r-- | internal/c/generated_tracepoints_result.txt | 2 | ||||
| -rw-r--r-- | internal/generate/classify.go | 13 | ||||
| -rw-r--r-- | internal/generate/classify_test.go | 52 | ||||
| -rw-r--r-- | internal/tracepoints/generated_tracepoints.go | 2 |
6 files changed, 71 insertions, 9 deletions
diff --git a/docs/syscall-tracing-plan.md b/docs/syscall-tracing-plan.md index 38173fe..6d0d0cf 100644 --- a/docs/syscall-tracing-plan.md +++ b/docs/syscall-tracing-plan.md @@ -61,14 +61,14 @@ sudo ./ior -trace-syscalls openat,recvmsg,nanosleep -no-trace-kinds null - eventfd: `epoll_create`, `epoll_create1`, `eventfd`, `eventfd2`, `fanotify_init`, `fsmount`, `fsopen`, `inotify_init`, `inotify_init1`, `landlock_create_ruleset`, `memfd_create`, `memfd_secret`, `signalfd`, `signalfd4`, `timerfd_create`, `userfaultfd` - exec: `execve`, `execveat` - fcntl: `fcntl` -- fd: `bind`, `cachestat`, `close`, `connect`, `copy_file_range`, `dup`, `dup2`, `epoll_pwait`, `epoll_pwait2`, `epoll_wait`, `fadvise64`, `fallocate`, `fchdir`, `fchmod`, `fchown`, `fdatasync`, `fgetxattr`, `finit_module`, `flistxattr`, `flock`, `fremovexattr`, `fsconfig`, `fsetxattr`, `fstatfs`, `fsync`, `ftruncate`, `getdents`, `getdents64`, `getpeername`, `getsockname`, `getsockopt`, `inotify_add_watch`, `inotify_rm_watch`, `io_uring_enter`, `io_uring_register`, `ioctl`, `kexec_file_load`, `landlock_add_rule`, `landlock_restrict_self`, `listen`, `lseek`, `mmap`, `mq_getsetattr`, `mq_notify`, `mq_timedreceive`, `mq_timedsend`, `newfstat`, `pidfd_getfd`, `pidfd_send_signal`, `pread64`, `preadv`, `preadv2`, `process_madvise`, `process_mrelease`, `pwrite64`, `pwritev`, `pwritev2`, `quotactl_fd`, `read`, `readahead`, `readv`, `recvfrom`, `recvmmsg`, `recvmsg`, `sendmmsg`, `sendmsg`, `sendto`, `setns`, `setsockopt`, `shutdown`, `sync_file_range`, `syncfs`, `vmsplice`, `write`, `writev` +- fd: `bind`, `cachestat`, `close`, `connect`, `copy_file_range`, `dup`, `dup2`, `epoll_pwait`, `epoll_pwait2`, `epoll_wait`, `fadvise64`, `fallocate`, `fchdir`, `fchmod`, `fchown`, `fdatasync`, `fgetxattr`, `finit_module`, `flistxattr`, `flock`, `fremovexattr`, `fsconfig`, `fsetxattr`, `fstatfs`, `fsync`, `ftruncate`, `getdents`, `getdents64`, `getpeername`, `getsockname`, `getsockopt`, `inotify_add_watch`, `inotify_rm_watch`, `io_uring_enter`, `io_uring_register`, `ioctl`, `kexec_file_load`, `landlock_add_rule`, `landlock_restrict_self`, `listen`, `lseek`, `mmap`, `mq_getsetattr`, `mq_notify`, `mq_timedreceive`, `mq_timedsend`, `newfstat`, `pidfd_getfd`, `pidfd_send_signal`, `pread64`, `preadv`, `preadv2`, `process_madvise`, `process_mrelease`, `pwrite64`, `pwritev`, `pwritev2`, `quotactl_fd`, `read`, `readahead`, `readv`, `recvfrom`, `recvmmsg`, `recvmsg`, `sendfile64`, `sendmmsg`, `sendmsg`, `sendto`, `setns`, `setsockopt`, `shutdown`, `sync_file_range`, `syncfs`, `vmsplice`, `write`, `writev` - futex: `futex`, `futex_requeue`, `futex_wait`, `futex_waitv`, `futex_wake` - keyctl: `add_key`, `keyctl`, `request_key` - mem: `brk`, `madvise`, `map_shadow_stack`, `mincore`, `mlock`, `mlock2`, `mprotect`, `mremap`, `mseal`, `munlock`, `munmap`, `pkey_mprotect`, `remap_file_pages` - module: `delete_module`, `init_module` - mq-open: `mq_open` - name: `link`, `linkat`, `rename`, `renameat`, `renameat2`, `symlink`, `symlinkat` -- null: `adjtimex`, `alarm`, `arch_prctl`, `capget`, `capset`, `clock_adjtime`, `clock_getres`, `clock_gettime`, `clock_settime`, `exit`, `exit_group`, `get_mempolicy`, `get_robust_list`, `getcpu`, `getcwd`, `getegid`, `geteuid`, `getgid`, `getgroups`, `getitimer`, `getpgid`, `getpgrp`, `getpid`, `getppid`, `getpriority`, `getrandom`, `getresgid`, `getresuid`, `getrlimit`, `getrusage`, `getsid`, `gettid`, `gettimeofday`, `getuid`, `io_cancel`, `io_destroy`, `io_getevents`, `io_pgetevents`, `io_setup`, `io_submit`, `io_uring_setup`, `ioperm`, `iopl`, `ioprio_get`, `ioprio_set`, `kexec_load`, `kill`, `listmount`, `listns`, `lsm_get_self_attr`, `lsm_list_modules`, `lsm_set_self_attr`, `mbind`, `membarrier`, `migrate_pages`, `mlockall`, `modify_ldt`, `move_pages`, `msync`, `munlockall`, `newuname`, `pause`, `personality`, `pkey_alloc`, `pkey_free`, `prlimit64`, `process_vm_readv`, `process_vm_writev`, `reboot`, `restart_syscall`, `rseq`, `rt_sigaction`, `rt_sigpending`, `rt_sigprocmask`, `rt_sigqueueinfo`, `rt_sigreturn`, `rt_sigsuspend`, `rt_sigtimedwait`, `rt_tgsigqueueinfo`, `sched_get_priority_max`, `sched_get_priority_min`, `sched_getaffinity`, `sched_getattr`, `sched_getparam`, `sched_getscheduler`, `sched_rr_get_interval`, `sched_setaffinity`, `sched_setattr`, `sched_setparam`, `sched_setscheduler`, `sched_yield`, `sendfile64`, `set_mempolicy`, `set_mempolicy_home_node`, `set_robust_list`, `set_tid_address`, `setdomainname`, `setfsgid`, `setfsuid`, `setgid`, `setgroups`, `sethostname`, `setitimer`, `setpgid`, `setpriority`, `setregid`, `setresgid`, `setresuid`, `setreuid`, `setrlimit`, `setsid`, `settimeofday`, `setuid`, `sigaltstack`, `splice`, `statmount`, `sync`, `sysfs`, `sysinfo`, `syslog`, `tee`, `tgkill`, `time`, `timerfd_gettime`, `timerfd_settime`, `times`, `tkill`, `umask`, `unshare`, `uprobe`, `uretprobe`, `ustat`, `vhangup` +- null: `adjtimex`, `alarm`, `arch_prctl`, `capget`, `capset`, `clock_adjtime`, `clock_getres`, `clock_gettime`, `clock_settime`, `exit`, `exit_group`, `get_mempolicy`, `get_robust_list`, `getcpu`, `getcwd`, `getegid`, `geteuid`, `getgid`, `getgroups`, `getitimer`, `getpgid`, `getpgrp`, `getpid`, `getppid`, `getpriority`, `getrandom`, `getresgid`, `getresuid`, `getrlimit`, `getrusage`, `getsid`, `gettid`, `gettimeofday`, `getuid`, `io_cancel`, `io_destroy`, `io_getevents`, `io_pgetevents`, `io_setup`, `io_submit`, `io_uring_setup`, `ioperm`, `iopl`, `ioprio_get`, `ioprio_set`, `kexec_load`, `kill`, `listmount`, `listns`, `lsm_get_self_attr`, `lsm_list_modules`, `lsm_set_self_attr`, `mbind`, `membarrier`, `migrate_pages`, `mlockall`, `modify_ldt`, `move_pages`, `msync`, `munlockall`, `newuname`, `pause`, `personality`, `pkey_alloc`, `pkey_free`, `prlimit64`, `process_vm_readv`, `process_vm_writev`, `reboot`, `restart_syscall`, `rseq`, `rt_sigaction`, `rt_sigpending`, `rt_sigprocmask`, `rt_sigqueueinfo`, `rt_sigreturn`, `rt_sigsuspend`, `rt_sigtimedwait`, `rt_tgsigqueueinfo`, `sched_get_priority_max`, `sched_get_priority_min`, `sched_getaffinity`, `sched_getattr`, `sched_getparam`, `sched_getscheduler`, `sched_rr_get_interval`, `sched_setaffinity`, `sched_setattr`, `sched_setparam`, `sched_setscheduler`, `sched_yield`, `set_mempolicy`, `set_mempolicy_home_node`, `set_robust_list`, `set_tid_address`, `setdomainname`, `setfsgid`, `setfsuid`, `setgid`, `setgroups`, `sethostname`, `setitimer`, `setpgid`, `setpriority`, `setregid`, `setresgid`, `setresuid`, `setreuid`, `setrlimit`, `setsid`, `settimeofday`, `setuid`, `sigaltstack`, `splice`, `statmount`, `sync`, `sysfs`, `sysinfo`, `syslog`, `tee`, `tgkill`, `time`, `timerfd_gettime`, `timerfd_settime`, `times`, `tkill`, `umask`, `unshare`, `uprobe`, `uretprobe`, `ustat`, `vhangup` - open: `open`, `open_tree`, `open_tree_attr`, `openat`, `openat2` - open-by-handle-at: `open_by_handle_at` - pathname: `access`, `acct`, `chdir`, `chmod`, `chown`, `chroot`, `creat`, `faccessat`, `faccessat2`, `fanotify_mark`, `fchmodat`, `fchmodat2`, `fchownat`, `file_getattr`, `file_setattr`, `fspick`, `futimesat`, `getxattr`, `getxattrat`, `lchown`, `lgetxattr`, `listxattr`, `listxattrat`, `llistxattr`, `lremovexattr`, `lsetxattr`, `mkdir`, `mkdirat`, `mknod`, `mknodat`, `mount`, `mount_setattr`, `mq_unlink`, `name_to_handle_at`, `newfstatat`, `newlstat`, `newstat`, `pivot_root`, `quotactl`, `readlink`, `readlinkat`, `removexattr`, `removexattrat`, `rmdir`, `setxattr`, `setxattrat`, `statfs`, `statx`, `swapoff`, `swapon`, `truncate`, `umount`, `unlink`, `unlinkat`, `utime`, `utimensat`, `utimes` diff --git a/internal/c/generated_tracepoints.c b/internal/c/generated_tracepoints.c index fbf690c..a8437df 100644 --- a/internal/c/generated_tracepoints.c +++ b/internal/c/generated_tracepoints.c @@ -10118,7 +10118,7 @@ int handle_sys_exit_pwritev2(struct syscall_trace_exit *ctx) { return 0; } -/// sys_enter_sendfile64 is a struct null_event (kind=null) +/// sys_enter_sendfile64 is a struct fd_event (kind=fd) SEC("tracepoint/syscalls/sys_enter_sendfile64") int handle_sys_enter_sendfile64(struct syscall_trace_enter *ctx) { __u32 pid, tid; @@ -10128,15 +10128,16 @@ int handle_sys_enter_sendfile64(struct syscall_trace_enter *ctx) { if (!ior_on_syscall_enter(tid, SYS_ENTER_SENDFILE64)) return 0; - struct null_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct null_event), 0); + struct fd_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct fd_event), 0); if (!ev) return 0; - ev->event_type = ENTER_NULL_EVENT; + ev->event_type = ENTER_FD_EVENT; ev->trace_id = SYS_ENTER_SENDFILE64; ev->pid = pid; ev->tid = tid; ev->time = bpf_ktime_get_boot_ns(); + ev->fd = (__s32)ctx->args[0]; bpf_ringbuf_submit(ev, 0); return 0; diff --git a/internal/c/generated_tracepoints_result.txt b/internal/c/generated_tracepoints_result.txt index f59a820..3587939 100644 --- a/internal/c/generated_tracepoints_result.txt +++ b/internal/c/generated_tracepoints_result.txt @@ -279,7 +279,7 @@ sys_enter_semctl is a struct null_event (kind=sysv-op) sys_enter_semget is a struct null_event (kind=sysv-id) sys_enter_semop is a struct null_event (kind=sysv-op) sys_enter_semtimedop is a struct null_event (kind=sysv-op) -sys_enter_sendfile64 is a struct null_event (kind=null) +sys_enter_sendfile64 is a struct fd_event (kind=fd) sys_enter_sendmmsg is a struct fd_event (kind=fd) sys_enter_sendmsg is a struct fd_event (kind=fd) sys_enter_sendto is a struct fd_event (kind=fd) diff --git a/internal/generate/classify.go b/internal/generate/classify.go index b7e9c0f..f85cb93 100644 --- a/internal/generate/classify.go +++ b/internal/generate/classify.go @@ -246,7 +246,18 @@ var nameOnlyKindsTable = map[string]TracepointKind{ // fd_event. This lets the runtime honour the upper bound and the // CLOSE_RANGE_CLOEXEC flag instead of closing every fd >= first. "sys_enter_close_range": KindTwoFd, - "sys_enter_statmount": KindNull, + // sendfile64(out_fd, in_fd, offset, count) transfers bytes between two file + // descriptors inside the kernel and returns the number of bytes written to + // out_fd (TransferClassified, see retClassifications). Its tracepoint fields + // (out_fd, in_fd, offset, count) carry no field literally named "fd", so + // without an explicit override it would fall through to KindNull and capture + // no descriptor at all — unlike its sibling copy_file_range, which is a + // KindFd event. Capture out_fd (args[0], the destination the bytes are + // written to) so sendfile64 attributes its transfer to a concrete fd, matching + // the single-fd KindFd convention used for copy_file_range and the + // read/write/sendto/recvfrom families. + "sys_enter_sendfile64": KindFd, + "sys_enter_statmount": KindNull, "sys_enter_listmount": KindNull, "sys_enter_listns": KindNull, diff --git a/internal/generate/classify_test.go b/internal/generate/classify_test.go index 25d01b4..f161ef8 100644 --- a/internal/generate/classify_test.go +++ b/internal/generate/classify_test.go @@ -985,6 +985,56 @@ func TestClassifyN7NameOnlyKinds(t *testing.T) { } } +// TestClassifySendfile64CapturesOutFd locks in the sendfile64 audit (task az): +// sendfile64(out_fd, in_fd, offset, count) transfers bytes between two file +// descriptors inside the kernel and returns the count written to out_fd. Its +// real tracepoint fields carry no field literally named "fd", so without the +// explicit nameOnlyKindsTable override it would fall through to KindNull and +// capture no descriptor — inconsistent with its sibling copy_file_range (KindFd) +// and the read/write/sendto/recvfrom families. This test pins that sendfile64 is +// a KindFd event capturing out_fd (args[0], the write destination) and that the +// generated C emits exactly that capture, never a null_event. +func TestClassifySendfile64CapturesOutFd(t *testing.T) { + // Realistic enter layout from /sys/kernel/tracing for sys_enter_sendfile64. + enter := &Format{ + Name: "sys_enter_sendfile64", + ExternalFields: []Field{ + {Type: "long", Name: "__syscall_nr"}, + {Type: "int", Name: "out_fd"}, + {Type: "int", Name: "in_fd"}, + {Type: "off_t *", Name: "offset"}, + {Type: "size_t", Name: "count"}, + }, + } + r := ClassifyFormat(enter) + if r.Kind != KindFd { + t.Fatalf("sendfile64: got kind %d, want KindFd (must not fall back to KindNull)", r.Kind) + } + // Negative guard: out_fd/in_fd must not be mistaken for a two-fd event; the + // audit deliberately keeps sendfile64 single-fd like copy_file_range. + if r.Kind == KindTwoFd || r.Kind == KindNull { + t.Fatalf("sendfile64: kind %d, want single-fd KindFd, not two-fd/null", r.Kind) + } + + // Generated C must capture out_fd at args[0] (the byte-write destination) via + // a struct fd_event, never a struct null_event. + output := GenerateTracepointsC(phaseAFormats("sendfile64", 9500)) + if !strings.Contains(output, "/// sys_enter_sendfile64 is a struct fd_event") { + t.Fatalf("sys_enter_sendfile64 should be a struct fd_event:\n%s", output) + } + if strings.Contains(output, "/// sys_enter_sendfile64 is a struct null_event") { + t.Fatalf("sys_enter_sendfile64 must not be a struct null_event:\n%s", output) + } + if !strings.Contains(output, "ev->fd = (__s32)ctx->args[0];") { + t.Fatalf("sys_enter_sendfile64 should capture out_fd from args[0]:\n%s", output) + } + // Return value stays TransferClassified: sendfile64 moves bytes between two + // fds, consistent with copy_file_range/splice/tee/vmsplice. + if c := ClassifyRet("sys_exit_sendfile64"); c != TransferClassified { + t.Fatalf("sendfile64 ret: got %v, want TransferClassified", c) + } +} + func TestClassifyG7NameOnlyKinds(t *testing.T) { tests := []struct { name string @@ -2000,7 +2050,7 @@ func TestClassifyPhaseAByteSyscallPairsAccepted(t *testing.T) { {"recvmsg", "struct fd_event", "READ_CLASSIFIED"}, {"sendto", "struct fd_event", "WRITE_CLASSIFIED"}, {"sendmsg", "struct fd_event", "WRITE_CLASSIFIED"}, - {"sendfile64", "struct null_event", "TRANSFER_CLASSIFIED"}, + {"sendfile64", "struct fd_event", "TRANSFER_CLASSIFIED"}, {"splice", "struct null_event", "TRANSFER_CLASSIFIED"}, {"tee", "struct null_event", "TRANSFER_CLASSIFIED"}, {"process_vm_readv", "struct null_event", "READ_CLASSIFIED"}, diff --git a/internal/tracepoints/generated_tracepoints.go b/internal/tracepoints/generated_tracepoints.go index b0f9112..38b1f4c 100644 --- a/internal/tracepoints/generated_tracepoints.go +++ b/internal/tracepoints/generated_tracepoints.go @@ -1388,7 +1388,7 @@ var syscallKinds = map[string]string{ "semget": "sysv-id", "semop": "sysv-op", "semtimedop": "sysv-op", - "sendfile64": "null", + "sendfile64": "fd", "sendmmsg": "fd", "sendmsg": "fd", "sendto": "fd", |
