From d33d2ef1186096dddf2836f83e91b833866ef3bd Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Tue, 9 Jun 2026 22:28:33 +0300 Subject: test: add mount_setattr coverage and assert vmsplice TRANSFER byte count mount_setattr(2) was the only new-mount-API sibling not exercised end-to-end (unlike move_mount/fsmount/fspick/open_tree), and vmsplice(2) had zero end-to-end assertion despite being TRANSFER_CLASSIFIED. uj0 (mount_setattr): add a best-effort RawSyscall6(SYS_MOUNT_SETATTR) call to mountfsManagement() aimed at the scenario mount point with AT_FDCWD and a MountAttr requesting MOUNT_ATTR_RDONLY. It needs CAP_SYS_ADMIN (Linux 5.12+) and the path is not a mount, so it returns EPERM/EINVAL, but its sys_enter_ tracepoint fires on kernel entry before any check -- the same best-effort pattern used for the other mount-API calls. Add mount_setattr to mountfsTraceArgs and assert enter_mount_setattr MinCount>=1 in TestMountFsManagementSyscalls. bl0 (vmsplice): add a deterministic retbytesVmsplice driver to the phase-A workload (mirroring the getdents64/readlinkat drivers): a spaced retry loop that gathers a fixed 18-byte user iovec into a fresh pipe via vmsplice and drains it each iteration. vmsplice is TRANSFER_CLASSIFIED, so the exit reports ctx->ret = bytes moved. Add vmsplice to retbytesTraceArgs and assert enter_vmsplice presence plus assertEventBytesAtLeast(payloadLen=18) and a positive duration in TestRetbytesPhaseA, locking in the TRANSFER byte attribution like its splice/tee siblings. Coverage hardening only; classification/tracing verified correct by inspection (mount_setattr=FamilyFS/KindPathname/UNCLASSIFIED, vmsplice=FamilyNetwork/KindFd/TRANSFER_CLASSIFIED). Verified: TEST_NAME=TestMountFsManagementSyscalls mage testWithName PASS (enter_mount_setattr captured); TEST_NAME=TestRetbytesPhaseA mage testWithName PASS (vmsplice bytes>=18 asserted). Co-Authored-By: Claude Opus 4.8 --- cmd/ioworkload/scenario_mountfs.go | 14 ++++++++ cmd/ioworkload/scenario_retbytes.go | 72 +++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) (limited to 'cmd') diff --git a/cmd/ioworkload/scenario_mountfs.go b/cmd/ioworkload/scenario_mountfs.go index 5053dca..78d0bf6 100644 --- a/cmd/ioworkload/scenario_mountfs.go +++ b/cmd/ioworkload/scenario_mountfs.go @@ -93,6 +93,20 @@ func mountfsManagement() error { if fd, _, errno := syscall.RawSyscall(unix.SYS_OPEN_TREE, atFDCWD, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC)); errno == 0 { _ = syscall.Close(int(fd)) } + + // mount_setattr(dirfd, path, flags, attr, size) changes the per-mount + // attributes of an existing mount. It is a KindPathname syscall: args[1] is + // the path. We aim it at the scenario mount point with AT_FDCWD, requesting + // MOUNT_ATTR_RDONLY, but it requires CAP_SYS_ADMIN (Linux 5.12+) and the + // path is not even a mount here, so it returns EPERM/EINVAL unprivileged. + // That is fine: like its mount-API siblings above, the sys_enter_ + // mount_setattr tracepoint fires on kernel entry before any permission or + // validity check, so MinCount>=1 holds regardless of errno. attr/size carry + // the MountAttr struct and its size so the kernel parses the call before + // failing; the call mutates no real mount. + attr := unix.MountAttr{Attr_set: unix.MOUNT_ATTR_RDONLY} + _, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT_SETATTR, atFDCWD, uintptr(unsafe.Pointer(mountPath)), 0, uintptr(unsafe.Pointer(&attr)), unsafe.Sizeof(attr), 0) + _, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT, uintptr(unsafe.Pointer(none)), uintptr(unsafe.Pointer(mountPath)), uintptr(unsafe.Pointer(tmpfs)), 0, 0, 0) _, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), 0, 0) _, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.MNT_DETACH), 0) diff --git a/cmd/ioworkload/scenario_retbytes.go b/cmd/ioworkload/scenario_retbytes.go index c1da350..192b9b5 100644 --- a/cmd/ioworkload/scenario_retbytes.go +++ b/cmd/ioworkload/scenario_retbytes.go @@ -46,9 +46,81 @@ func retbytesPhaseA() error { if err := retbytesReadlinkat(); err != nil { return err } + if err := retbytesVmsplice(); err != nil { + return err + } return retbytesProcessVM() } +// retbytesVmspliceLen is the exact number of bytes vmsplice gathers from the +// user iovec into the pipe on every iteration. It is far below the default +// pipe capacity (64 KiB) so vmsplice never blocks, and it is the value the +// TRANSFER_CLASSIFIED exit byte-count assertion in TestRetbytesPhaseA pins. +const retbytesVmspliceLen = 18 + +// retbytesVmsplice gathers a fixed-size user buffer into a pipe via vmsplice(2) +// and drains the pipe each iteration. vmsplice is TRANSFER_CLASSIFIED, so a +// successful gather into the pipe returns ctx->ret = bytes moved (here exactly +// retbytesVmspliceLen). This drives the exit byte-count assertion in +// TestRetbytesPhaseA, locking in the byte attribution like its splice/tee +// siblings. The buffer is far smaller than the pipe capacity, so vmsplice +// cannot block; draining the read end leaves no data behind. +func retbytesVmsplice() error { + buf := []byte("phase-a-vmsplice!!") // retbytesVmspliceLen bytes + if len(buf) != retbytesVmspliceLen { + return fmt.Errorf("vmsplice payload is %d bytes, want %d", len(buf), retbytesVmspliceLen) + } + + // Re-issue vmsplice in a short window so ior has enough time to attach and + // capture an enter/exit pair under high parallel integration load. Each + // iteration creates a fresh pipe, gathers the same buffer, drains it, and + // closes both ends so descriptors and data never accumulate. + for i := 0; i < 40; i++ { + if err := retbytesVmspliceOnce(buf); err != nil { + return err + } + time.Sleep(25 * time.Millisecond) + } + return nil +} + +// retbytesVmspliceOnce performs a single vmsplice of buf into a fresh pipe and +// drains it. It returns an error if vmsplice fails or moves the wrong number of +// bytes, so the workload fails loudly rather than silently skewing the assert. +func retbytesVmspliceOnce(buf []byte) error { + pipe := make([]int, 2) + if err := syscall.Pipe2(pipe, syscall.O_CLOEXEC); err != nil { + return fmt.Errorf("pipe2 for vmsplice: %w", err) + } + defer syscall.Close(pipe[0]) + defer syscall.Close(pipe[1]) + + iov := syscall.Iovec{Base: &buf[0], Len: uint64(len(buf))} + n, _, errno := syscall.Syscall6( + syscall.SYS_VMSPLICE, + uintptr(pipe[1]), + uintptr(unsafe.Pointer(&iov)), + 1, // one iovec segment + 0, // no SPLICE_F_* flags needed for this tiny, non-blocking gather + 0, 0, + ) + runtime.KeepAlive(buf) + if errno != 0 { + return fmt.Errorf("vmsplice: %w", errno) + } + if int(n) != len(buf) { + return fmt.Errorf("vmsplice moved %d bytes, want %d", n, len(buf)) + } + + // Drain whatever vmsplice placed into the pipe so it never approaches its + // capacity across iterations. + drain := make([]byte, int(n)) + if _, err := syscall.Read(pipe[0], drain); err != nil { + return fmt.Errorf("drain vmsplice pipe: %w", err) + } + return nil +} + // retbytesGetdents opens a non-empty directory and reads its entries via // getdents64(2). getdents/getdents64 are READ_CLASSIFIED, so a successful call // on a populated directory returns ctx->ret > 0 (bytes filled into the dirent -- cgit v1.2.3