diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-28 10:43:37 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-28 10:43:37 +0300 |
| commit | ff8774b5ce3f6b37e5152d0dc06ae46b7a36d1da (patch) | |
| tree | 7224ccb001a0945216d6e30b5b9c326396ceba76 | |
| parent | 99e99c6ea35ae97e84d727449f9ad7c4c0a9fa23 (diff) | |
close_range: honor last bound and CLOSE_RANGE_CLOEXEC flag
close_range was captured as a single-fd fd_event carrying only first, so
the runtime evicted every tracked fd >= first, ignoring the last upper
bound and the flags. Bounded calls wrongly dropped still-open higher fds,
and CLOSE_RANGE_CLOEXEC (which keeps fds open) was treated as a full close.
Reclassify close_range to the two_fd_event kind, mapping fd_a/fd_b/extra to
first/last/flags. The runtime now closes only the inclusive [first, last]
range (a negative last from ~0U means unbounded) and skips eviction when
CLOSE_RANGE_CLOEXEC is set or the syscall fails.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
| -rw-r--r-- | cmd/ioworkload/scenario_close.go | 46 | ||||
| -rw-r--r-- | cmd/ioworkload/scenarios.go | 1 | ||||
| -rw-r--r-- | docs/syscall-tracing-plan.md | 4 | ||||
| -rw-r--r-- | integrationtests/close_test.go | 17 | ||||
| -rw-r--r-- | internal/c/generated_tracepoints.c | 10 | ||||
| -rw-r--r-- | internal/c/generated_tracepoints_result.txt | 2 | ||||
| -rw-r--r-- | internal/eventloop_exit.go | 42 | ||||
| -rw-r--r-- | internal/eventloop_state.go | 27 | ||||
| -rw-r--r-- | internal/eventloop_test.go | 128 | ||||
| -rw-r--r-- | internal/generate/classify.go | 11 | ||||
| -rw-r--r-- | internal/generate/classify_test.go | 1 | ||||
| -rw-r--r-- | internal/tracepoints/generated_tracepoints.go | 2 |
12 files changed, 246 insertions, 45 deletions
diff --git a/cmd/ioworkload/scenario_close.go b/cmd/ioworkload/scenario_close.go index fc5044c..1be5376 100644 --- a/cmd/ioworkload/scenario_close.go +++ b/cmd/ioworkload/scenario_close.go @@ -64,6 +64,52 @@ func closeRange() error { return nil } +// closeRangeBounded opens a contiguous block of low fds plus one higher fd, +// then closes only the low block via close_range(first, last, 0) where last is +// strictly below the higher fd. It writes to the higher fd afterwards to prove +// it stayed open. This exercises close_range's upper-bound handling end to end: +// ior must keep the higher fd tracked rather than evicting everything >= first. +func closeRangeBounded() error { + dir, cleanup, err := makeTempDir("close-range-bounded") + if err != nil { + return err + } + defer cleanup() + + var lowFds []int + for i := range 3 { + path := filepath.Join(dir, fmt.Sprintf("closerangelow-%d.txt", i)) + fd, err := syscall.Open(path, syscall.O_RDWR|syscall.O_CREAT, 0o644) + if err != nil { + return fmt.Errorf("open low %d: %w", i, err) + } + lowFds = append(lowFds, fd) + } + + highPath := filepath.Join(dir, "closerangehigh.txt") + highFd, err := syscall.Open(highPath, syscall.O_RDWR|syscall.O_CREAT, 0o644) + if err != nil { + return fmt.Errorf("open high: %w", err) + } + defer syscall.Close(highFd) + + if highFd <= lowFds[len(lowFds)-1] { + return fmt.Errorf("high fd %d not above low fds %v", highFd, lowFds) + } + + first := uintptr(lowFds[0]) + last := uintptr(lowFds[len(lowFds)-1]) + if _, _, errno := syscall.Syscall(sysCloseRange, first, last, 0); errno != 0 { + return fmt.Errorf("close_range: %w", errno) + } + + // highFd is above last, so it must still be open and usable. + if _, err := syscall.Write(highFd, []byte("still-open")); err != nil { + return fmt.Errorf("write high fd: %w", err) + } + return nil +} + // closeInvalidFd attempts to close a very high fd number that is not open. // The close fails with EBADF, but ior should capture the enter_close tracepoint // because arguments are read on syscall entry before the kernel returns an error. diff --git a/cmd/ioworkload/scenarios.go b/cmd/ioworkload/scenarios.go index ba444ef..534c9b4 100644 --- a/cmd/ioworkload/scenarios.go +++ b/cmd/ioworkload/scenarios.go @@ -43,6 +43,7 @@ var scenarios = map[string]func() error{ "family-mixed": familyMixed, "close-basic": closeBasic, "close-range": closeRange, + "close-range-bounded": closeRangeBounded, "close-invalid-fd": closeInvalidFd, "close-double-close": closeDoubleClose, "close-range-empty": closeRangeEmpty, diff --git a/docs/syscall-tracing-plan.md b/docs/syscall-tracing-plan.md index 25e1af3..f7fb430 100644 --- a/docs/syscall-tracing-plan.md +++ b/docs/syscall-tracing-plan.md @@ -61,7 +61,7 @@ sudo ./ior -trace-syscalls openat,recvmsg,nanosleep -no-trace-kinds null - eventfd: `epoll_create`, `epoll_create1`, `eventfd`, `eventfd2`, `fanotify_init`, `fsmount`, `fsopen`, `inotify_init`, `inotify_init1`, `landlock_create_ruleset`, `memfd_create`, `memfd_secret`, `signalfd`, `signalfd4`, `timerfd_create`, `userfaultfd` - exec: `execve`, `execveat` - fcntl: `fcntl` -- fd: `bind`, `cachestat`, `close`, `close_range`, `connect`, `copy_file_range`, `dup`, `dup2`, `epoll_pwait`, `epoll_pwait2`, `epoll_wait`, `fadvise64`, `fallocate`, `fchdir`, `fchmod`, `fchown`, `fdatasync`, `fgetxattr`, `finit_module`, `flistxattr`, `flock`, `fremovexattr`, `fsconfig`, `fsetxattr`, `fstatfs`, `fsync`, `ftruncate`, `getdents`, `getdents64`, `getpeername`, `getsockname`, `getsockopt`, `inotify_add_watch`, `inotify_rm_watch`, `io_uring_enter`, `io_uring_register`, `ioctl`, `kexec_file_load`, `landlock_add_rule`, `landlock_restrict_self`, `listen`, `lseek`, `mmap`, `mq_getsetattr`, `mq_notify`, `mq_timedreceive`, `mq_timedsend`, `newfstat`, `pidfd_getfd`, `pidfd_send_signal`, `pread64`, `preadv`, `preadv2`, `process_madvise`, `process_mrelease`, `pwrite64`, `pwritev`, `pwritev2`, `quotactl_fd`, `read`, `readahead`, `readv`, `recvfrom`, `recvmmsg`, `recvmsg`, `sendmmsg`, `sendmsg`, `sendto`, `setns`, `setsockopt`, `shutdown`, `sync_file_range`, `syncfs`, `vmsplice`, `write`, `writev` +- fd: `bind`, `cachestat`, `close`, `connect`, `copy_file_range`, `dup`, `dup2`, `epoll_pwait`, `epoll_pwait2`, `epoll_wait`, `fadvise64`, `fallocate`, `fchdir`, `fchmod`, `fchown`, `fdatasync`, `fgetxattr`, `finit_module`, `flistxattr`, `flock`, `fremovexattr`, `fsconfig`, `fsetxattr`, `fstatfs`, `fsync`, `ftruncate`, `getdents`, `getdents64`, `getpeername`, `getsockname`, `getsockopt`, `inotify_add_watch`, `inotify_rm_watch`, `io_uring_enter`, `io_uring_register`, `ioctl`, `kexec_file_load`, `landlock_add_rule`, `landlock_restrict_self`, `listen`, `lseek`, `mmap`, `mq_getsetattr`, `mq_notify`, `mq_timedreceive`, `mq_timedsend`, `newfstat`, `pidfd_getfd`, `pidfd_send_signal`, `pread64`, `preadv`, `preadv2`, `process_madvise`, `process_mrelease`, `pwrite64`, `pwritev`, `pwritev2`, `quotactl_fd`, `read`, `readahead`, `readv`, `recvfrom`, `recvmmsg`, `recvmsg`, `sendmmsg`, `sendmsg`, `sendto`, `setns`, `setsockopt`, `shutdown`, `sync_file_range`, `syncfs`, `vmsplice`, `write`, `writev` - futex: `futex`, `futex_requeue`, `futex_wait`, `futex_waitv`, `futex_wake` - keyctl: `add_key`, `keyctl`, `request_key` - mem: `brk`, `madvise`, `map_shadow_stack`, `mincore`, `mlock`, `mlock2`, `mprotect`, `mremap`, `mseal`, `munlock`, `munmap`, `pkey_mprotect`, `remap_file_pages` @@ -86,7 +86,7 @@ sudo ./ior -trace-syscalls openat,recvmsg,nanosleep -no-trace-kinds null - sysv-id: `msgget`, `semget`, `shmget` - sysv-op: `msgctl`, `msgrcv`, `msgsnd`, `semctl`, `semop`, `semtimedop`, `shmat`, `shmctl`, `shmdt` - timer-obj: `timer_create`, `timer_delete`, `timer_getoverrun`, `timer_gettime`, `timer_settime` -- two-fd: `kcmp`, `move_mount` +- two-fd: `close_range`, `kcmp`, `move_mount` ## Bytes vs Non-Bytes Classification diff --git a/integrationtests/close_test.go b/integrationtests/close_test.go index 3689fb8..36db476 100644 --- a/integrationtests/close_test.go +++ b/integrationtests/close_test.go @@ -24,6 +24,23 @@ func TestCloseRange(t *testing.T) { }) } +func TestCloseRangeBounded(t *testing.T) { + runScenario(t, "close-range-bounded", []ExpectedEvent{ + { + PathContains: "closerangelow-", + Tracepoint: "enter_close_range", + Comm: "ioworkload", + MinCount: 1, + }, + { + PathContains: "closerangehigh.txt", + Tracepoint: "enter_write", + Comm: "ioworkload", + MinCount: 1, + }, + }) +} + func TestCloseInvalidFd(t *testing.T) { runScenario(t, "close-invalid-fd", []ExpectedEvent{ { diff --git a/internal/c/generated_tracepoints.c b/internal/c/generated_tracepoints.c index 8dfd53f..48c1f84 100644 --- a/internal/c/generated_tracepoints.c +++ b/internal/c/generated_tracepoints.c @@ -7549,7 +7549,7 @@ int handle_sys_exit_sysfs(struct syscall_trace_exit *ctx) { return 0; } -/// sys_enter_close_range is a struct fd_event (kind=fd) +/// sys_enter_close_range is a struct two_fd_event (kind=two-fd) SEC("tracepoint/syscalls/sys_enter_close_range") int handle_sys_enter_close_range(struct syscall_trace_enter *ctx) { __u32 pid, tid; @@ -7559,16 +7559,18 @@ int handle_sys_enter_close_range(struct syscall_trace_enter *ctx) { if (!ior_on_syscall_enter(tid, SYS_ENTER_CLOSE_RANGE)) return 0; - struct fd_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct fd_event), 0); + struct two_fd_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct two_fd_event), 0); if (!ev) return 0; - ev->event_type = ENTER_FD_EVENT; + ev->event_type = ENTER_TWO_FD_EVENT; ev->trace_id = SYS_ENTER_CLOSE_RANGE; ev->pid = pid; ev->tid = tid; ev->time = bpf_ktime_get_boot_ns(); - ev->fd = (__s32)ctx->args[0]; + ev->fd_a = (__s32)ctx->args[0]; + ev->fd_b = (__s32)ctx->args[1]; + ev->extra = (__u64)ctx->args[2]; bpf_ringbuf_submit(ev, 0); return 0; diff --git a/internal/c/generated_tracepoints_result.txt b/internal/c/generated_tracepoints_result.txt index 4d44867..7918006 100644 --- a/internal/c/generated_tracepoints_result.txt +++ b/internal/c/generated_tracepoints_result.txt @@ -24,7 +24,7 @@ sys_enter_clock_settime is a struct null_event (kind=null) sys_enter_clone is a struct null_event (kind=proc) sys_enter_clone3 is a struct null_event (kind=proc) sys_enter_close is a struct fd_event (kind=fd) -sys_enter_close_range is a struct fd_event (kind=fd) +sys_enter_close_range is a struct two_fd_event (kind=two-fd) sys_enter_connect is a struct fd_event (kind=fd) sys_enter_copy_file_range is a struct fd_event (kind=fd) sys_enter_creat is a struct path_event (kind=pathname) diff --git a/internal/eventloop_exit.go b/internal/eventloop_exit.go index cb11074..a5b38d4 100644 --- a/internal/eventloop_exit.go +++ b/internal/eventloop_exit.go @@ -118,8 +118,10 @@ func (e *eventLoop) handlePathExit(ep *event.Pair, pathEv *types.PathEvent) bool } // handleFdExit processes exit events for fd-based syscalls. It resolves the fd -// to a file, applies close/close_range state transitions, filters the pair, and -// handles dup/pidfd_getfd fd-transfer operations before finalising bytes. +// to a file, applies the close state transition, filters the pair, and handles +// dup/pidfd_getfd fd-transfer operations before finalising bytes. close_range is +// not handled here: it carries (first, last, flags) and is routed through +// handleTwoFdExit so the upper bound and flags are honoured. func (e *eventLoop) handleFdExit(ep *event.Pair, fdEv *types.FdEvent) bool { fd := fdEv.Fd ep.File = e.fdState().resolve(fd, fdEv.Pid) @@ -134,21 +136,11 @@ func (e *eventLoop) handleFdExit(ep *event.Pair, fdEv *types.FdEvent) bool { return true } -// applyFdCloseState updates fd-tracking state for close and close_range syscalls. +// applyFdCloseState updates fd-tracking state for the close syscall. func (e *eventLoop) applyFdCloseState(ep *event.Pair, fd int32, pid uint32) { if ep.Is(types.SYS_ENTER_CLOSE) { e.fdState().delete(fd) e.fdState().deleteProcFdCache(fd, pid) - return - } - if ep.Is(types.SYS_ENTER_CLOSE_RANGE) { - // close_range provides (first, last), but fd_event only carries the first - // argument, so we approximate by closing all tracked fds >= first. - retEv, ok := ep.ExitEv.(*types.RetEvent) - if ok && retEv.Ret == 0 { - e.fdState().closeRangeFrom(fd) - e.fdState().deleteProcFdCacheFrom(fd, pid) - } } } @@ -385,9 +377,33 @@ func (e *eventLoop) handlePollExit(ep *event.Pair, pollEv *types.PollEvent) bool func (e *eventLoop) handleTwoFdExit(ep *event.Pair, twoFdEv *types.TwoFdEvent) bool { ep.File = e.fdState().resolve(twoFdEv.FdA, twoFdEv.Pid) + if ep.Is(types.SYS_ENTER_CLOSE_RANGE) { + e.applyCloseRangeState(ep, twoFdEv) + } return e.finishPairForTid(ep, twoFdEv.GetTid()) } +// closeRangeCloexec mirrors CLOSE_RANGE_CLOEXEC from <linux/close_range.h>: when +// set, close_range only marks the descriptors close-on-exec instead of closing +// them, so the fds stay open and must remain tracked. +const closeRangeCloexec = 1 << 2 + +// applyCloseRangeState evicts the fds closed by a successful close_range. The +// enter event carries (first, last, flags) in fd_a/fd_b/extra. fd_b is an __s32 +// view of the unsigned "last" argument, so a negative value (e.g. ~0U meaning +// "close everything from first up") is treated as having no upper bound. +func (e *eventLoop) applyCloseRangeState(ep *event.Pair, ev *types.TwoFdEvent) { + retEv, ok := ep.ExitEv.(*types.RetEvent) + if !ok || retEv.Ret != 0 { + return + } + if ev.Extra&closeRangeCloexec != 0 { + return + } + e.fdState().closeRange(ev.FdA, ev.FdB) + e.fdState().deleteProcFdCacheRange(ev.FdA, ev.FdB, ev.Pid) +} + func (e *eventLoop) handleMemExit(ep *event.Pair, memEv *types.MemEvent) bool { return e.finishPairForTid(ep, memEv.GetTid()) } diff --git a/internal/eventloop_state.go b/internal/eventloop_state.go index a277e31..40e11c2 100644 --- a/internal/eventloop_state.go +++ b/internal/eventloop_state.go @@ -58,11 +58,19 @@ func (t *fdTracker) delete(fd int32) { delete(t.files, fd) } -func (t *fdTracker) closeRangeFrom(first int32) { +// closeRange removes all tracked fds in the inclusive range [first, last], as +// closed by close_range(2). A negative last means "no upper bound": close_range's +// last argument is an unsigned int, so the common close-everything form ~0U +// arrives here as a negative __s32 and must close every tracked fd >= first. +func (t *fdTracker) closeRange(first, last int32) { for fd := range t.files { - if fd >= first { - delete(t.files, fd) + if fd < first { + continue } + if last >= 0 && fd > last { + continue + } + delete(t.files, fd) } } @@ -113,16 +121,23 @@ func (t *fdTracker) deleteProcFdCache(fd int32, pid uint32) { t.deleteCacheKey(procFdCacheKey(pid, fd)) } -func (t *fdTracker) deleteProcFdCacheFrom(first int32, pid uint32) { +// deleteProcFdCacheRange drops cached procfs resolutions for pid's fds in the +// inclusive range [first, last]. A negative last means "no upper bound" (see +// closeRange for why close_range's last argument can arrive negative). +func (t *fdTracker) deleteProcFdCacheRange(first, last int32, pid uint32) { if t.procFdCache == nil { return } for key := range t.procFdCache { cachePid := uint32(key >> 32) cacheFd := int32(uint32(key)) - if cachePid == pid && cacheFd >= first { - t.deleteCacheKey(key) + if cachePid != pid || cacheFd < first { + continue + } + if last >= 0 && cacheFd > last { + continue } + t.deleteCacheKey(key) } } diff --git a/internal/eventloop_test.go b/internal/eventloop_test.go index 473a107..b768fcb 100644 --- a/internal/eventloop_test.go +++ b/internal/eventloop_test.go @@ -186,39 +186,114 @@ func TestHandleFdExitCloseClearsProcFdCache(t *testing.T) { verifyProcFdNotCached(t, el, pid, fd) } -func TestHandleFdExitCloseRangeClearsProcFdCacheRange(t *testing.T) { +func TestHandleTwoFdExitCloseRangeClearsProcFdCacheRange(t *testing.T) { el := mustNewEventLoop(t, eventLoopConfig{}) pid := uint32(2002) - el.fdState().setProcFdCache(10, pid, file.NewFd(10, "keep", syscall.O_RDONLY)) + el.fdState().setProcFdCache(10, pid, file.NewFd(10, "keep-below", syscall.O_RDONLY)) el.fdState().setProcFdCache(20, pid, file.NewFd(20, "drop", syscall.O_RDONLY)) el.fdState().setProcFdCache(30, pid, file.NewFd(30, "drop", syscall.O_RDONLY)) + el.fdState().setProcFdCache(40, pid, file.NewFd(40, "keep-above", syscall.O_RDONLY)) el.fdState().setProcFdCache(20, pid+1, file.NewFd(20, "other-pid", syscall.O_RDONLY)) - enter := &types.FdEvent{ - TraceId: types.SYS_ENTER_CLOSE_RANGE, - Pid: pid, - Tid: pid, - Fd: 20, + // close_range(20, 30, 0): only the inclusive [20,30] window for pid is evicted. + enter := &types.TwoFdEvent{ + EventType: types.ENTER_TWO_FD_EVENT, + TraceId: types.SYS_ENTER_CLOSE_RANGE, + Pid: pid, + Tid: pid, + FdA: 20, + FdB: 30, + Extra: 0, } exit := &types.RetEvent{ - TraceId: types.SYS_EXIT_CLOSE_RANGE, - Pid: pid, - Tid: pid, - Ret: 0, + EventType: types.EXIT_RET_EVENT, + TraceId: types.SYS_EXIT_CLOSE_RANGE, + Pid: pid, + Tid: pid, + Ret: 0, } ep := &event.Pair{EnterEv: enter, ExitEv: exit} - if ok := el.handleFdExit(ep, enter); !ok { - t.Fatal("handleFdExit(close_range) returned false") + if ok := el.handleTwoFdExit(ep, enter); !ok { + t.Fatal("handleTwoFdExit(close_range) returned false") } verifyProcFdCached(t, el, pid, 10) verifyProcFdNotCached(t, el, pid, 20) verifyProcFdNotCached(t, el, pid, 30) + verifyProcFdCached(t, el, pid, 40) verifyProcFdCached(t, el, pid+1, 20) } +func TestHandleTwoFdExitCloseRangeCloexecKeepsFds(t *testing.T) { + el := mustNewEventLoop(t, eventLoopConfig{}) + el.fdState().set(5, file.NewFd(5, "stays-open", syscall.O_RDONLY)) + el.fdState().set(6, file.NewFd(6, "stays-open", syscall.O_RDONLY)) + + // close_range(5, 6, CLOSE_RANGE_CLOEXEC): the kernel only marks the fds + // close-on-exec, so they remain open and must stay tracked. + enter := &types.TwoFdEvent{ + EventType: types.ENTER_TWO_FD_EVENT, + TraceId: types.SYS_ENTER_CLOSE_RANGE, + Pid: 3003, + Tid: 3003, + FdA: 5, + FdB: 6, + Extra: closeRangeCloexec, + } + exit := &types.RetEvent{ + EventType: types.EXIT_RET_EVENT, + TraceId: types.SYS_EXIT_CLOSE_RANGE, + Pid: 3003, + Tid: 3003, + Ret: 0, + } + ep := &event.Pair{EnterEv: enter, ExitEv: exit} + + if ok := el.handleTwoFdExit(ep, enter); !ok { + t.Fatal("handleTwoFdExit(close_range cloexec) returned false") + } + + verifyFileDescriptor(t, el, 5, "stays-open") + verifyFileDescriptor(t, el, 6, "stays-open") +} + +func TestHandleTwoFdExitCloseRangeUnboundedClosesAll(t *testing.T) { + el := mustNewEventLoop(t, eventLoopConfig{}) + el.fdState().set(2, file.NewFd(2, "keep-below", syscall.O_RDONLY)) + el.fdState().set(7, file.NewFd(7, "drop", syscall.O_RDONLY)) + el.fdState().set(900, file.NewFd(900, "drop-high", syscall.O_RDONLY)) + + // close_range(3, ~0U, 0): the unsigned UINT_MAX upper bound arrives as a + // negative __s32, meaning "close everything from fd 3 up". + enter := &types.TwoFdEvent{ + EventType: types.ENTER_TWO_FD_EVENT, + TraceId: types.SYS_ENTER_CLOSE_RANGE, + Pid: 4004, + Tid: 4004, + FdA: 3, + FdB: -1, + Extra: 0, + } + exit := &types.RetEvent{ + EventType: types.EXIT_RET_EVENT, + TraceId: types.SYS_EXIT_CLOSE_RANGE, + Pid: 4004, + Tid: 4004, + Ret: 0, + } + ep := &event.Pair{EnterEv: enter, ExitEv: exit} + + if ok := el.handleTwoFdExit(ep, enter); !ok { + t.Fatal("handleTwoFdExit(close_range unbounded) returned false") + } + + verifyFileDescriptor(t, el, 2, "keep-below") + verifyFdNotTracked(t, el, 7) + verifyFdNotTracked(t, el, 900) +} + func TestFreezePairForEmissionCopiesFdFile(t *testing.T) { el := mustNewEventLoop(t, eventLoopConfig{}) fdFile := file.NewFd(9, "/tmp/x", syscall.O_RDONLY) @@ -423,6 +498,27 @@ func makeExitFdEvent(t *testing.T, time uint64, pid, tid uint32, fd int32, trace return ev, bytes } +// makeEnterTwoFdEvent builds an enter two_fd_event and its wire bytes. For +// close_range the three fields carry (first, last, flags). +func makeEnterTwoFdEvent(t *testing.T, time uint64, pid, tid uint32, fdA, fdB int32, extra uint64, traceId types.TraceId) (types.TwoFdEvent, []byte) { + ev := types.TwoFdEvent{ + EventType: types.ENTER_TWO_FD_EVENT, + TraceId: traceId, + Time: time, + Pid: pid, + Tid: tid, + FdA: fdA, + FdB: fdB, + Extra: extra, + } + + bytes, err := ev.Bytes() + if err != nil { + t.Error(err) + } + return ev, bytes +} + // Helper function to create exit RetEvent func makeExitRetEvent(t *testing.T, time uint64, pid, tid uint32, traceId types.TraceId, ret int64) (types.RetEvent, []byte) { ev := types.RetEvent{ @@ -590,7 +686,8 @@ func makeCloseRangeEventTestData(t *testing.T) (td testData) { openExitBytes3, _ = openExitEv3.Bytes() td.rawTracepoints = append(td.rawTracepoints, openExitBytes3) - enterCloseRange, enterCloseRangeBytes := makeEnterFdEvent(t, defaulTime+600, defaultPid, defaultTid, fd2, types.SYS_ENTER_CLOSE_RANGE) + // close_range(fd2, fd3, 0): closes the inclusive window [fd2, fd3], leaving fd1 tracked. + enterCloseRange, enterCloseRangeBytes := makeEnterTwoFdEvent(t, defaulTime+600, defaultPid, defaultTid, fd2, fd3, 0, types.SYS_ENTER_CLOSE_RANGE) td.rawTracepoints = append(td.rawTracepoints, enterCloseRangeBytes) exitCloseRange, exitCloseRangeBytes := makeExitRetEvent(t, defaulTime+700, defaultPid, defaultTid, types.SYS_EXIT_CLOSE_RANGE, 0) @@ -671,7 +768,8 @@ func makeCloseRangeFailureTestData(t *testing.T) (td testData) { openExitBytes2, _ = openExitEv2.Bytes() td.rawTracepoints = append(td.rawTracepoints, openExitBytes2) - enterCloseRange, enterCloseRangeBytes := makeEnterFdEvent(t, defaulTime+400, defaultPid, defaultTid, fd1, types.SYS_ENTER_CLOSE_RANGE) + // close_range(fd1, fd2, 0) that fails (ret=-1): no fds should be evicted. + enterCloseRange, enterCloseRangeBytes := makeEnterTwoFdEvent(t, defaulTime+400, defaultPid, defaultTid, fd1, fd2, 0, types.SYS_ENTER_CLOSE_RANGE) td.rawTracepoints = append(td.rawTracepoints, enterCloseRangeBytes) exitCloseRange, exitCloseRangeBytes := makeExitRetEvent(t, defaulTime+500, defaultPid, defaultTid, types.SYS_EXIT_CLOSE_RANGE, -1) diff --git a/internal/generate/classify.go b/internal/generate/classify.go index 7dcbf5a..b7e9c0f 100644 --- a/internal/generate/classify.go +++ b/internal/generate/classify.go @@ -241,9 +241,14 @@ var nameOnlyKindsTable = map[string]TracepointKind{ "sys_enter_epoll_ctl": KindEpollCtl, "sys_enter_move_mount": KindTwoFd, - "sys_enter_statmount": KindNull, - "sys_enter_listmount": KindNull, - "sys_enter_listns": KindNull, + // close_range(first, last, flags) needs all three arguments, so it is a + // two_fd_event (fd_a=first, fd_b=last, extra=flags) rather than a single-fd + // fd_event. This lets the runtime honour the upper bound and the + // CLOSE_RANGE_CLOEXEC flag instead of closing every fd >= first. + "sys_enter_close_range": KindTwoFd, + "sys_enter_statmount": KindNull, + "sys_enter_listmount": KindNull, + "sys_enter_listns": KindNull, "sys_enter_poll": KindPoll, "sys_enter_ppoll": KindPoll, diff --git a/internal/generate/classify_test.go b/internal/generate/classify_test.go index 618274a..46947e2 100644 --- a/internal/generate/classify_test.go +++ b/internal/generate/classify_test.go @@ -1401,6 +1401,7 @@ func TestClassifySyscallPairAccepted(t *testing.T) { {"mount", FormatMount, FormatExitMount, KindPathname}, {"umount", FormatUmount, FormatExitUmount, KindPathname}, {"move_mount", FormatMoveMount, FormatExitMoveMount, KindTwoFd}, + {"close_range", syntheticEnter("close_range", 9322), syntheticExit("close_range", 9321), KindTwoFd}, {"kcmp", syntheticEnter("kcmp", 9324), syntheticExit("kcmp", 9323), KindTwoFd}, {"kexec_file_load", syntheticEnter("kexec_file_load", 9326), syntheticExit("kexec_file_load", 9325), KindFd}, {"membarrier", syntheticEnter("membarrier", 9328), syntheticExit("membarrier", 9327), KindNull}, diff --git a/internal/tracepoints/generated_tracepoints.go b/internal/tracepoints/generated_tracepoints.go index fd3ac75..5ff2f5c 100644 --- a/internal/tracepoints/generated_tracepoints.go +++ b/internal/tracepoints/generated_tracepoints.go @@ -1135,7 +1135,7 @@ var syscallKinds = map[string]string{ "clone": "proc", "clone3": "proc", "close": "fd", - "close_range": "fd", + "close_range": "two-fd", "connect": "fd", "copy_file_range": "fd", "creat": "pathname", |
