diff options
| author | Paul Buetow <paul@buetow.org> | 2026-06-09 22:28:33 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-06-09 22:28:33 +0300 |
| commit | d33d2ef1186096dddf2836f83e91b833866ef3bd (patch) | |
| tree | 2155f579b7d42d5ad1487d99c211508e28af4938 | |
| parent | f601dc90fcef3f270c55a9612c5f0326dbd0f391 (diff) | |
test: add mount_setattr coverage and assert vmsplice TRANSFER byte count
mount_setattr(2) was the only new-mount-API sibling not exercised
end-to-end (unlike move_mount/fsmount/fspick/open_tree), and vmsplice(2)
had zero end-to-end assertion despite being TRANSFER_CLASSIFIED.
uj0 (mount_setattr): add a best-effort RawSyscall6(SYS_MOUNT_SETATTR)
call to mountfsManagement() aimed at the scenario mount point with
AT_FDCWD and a MountAttr requesting MOUNT_ATTR_RDONLY. It needs
CAP_SYS_ADMIN (Linux 5.12+) and the path is not a mount, so it returns
EPERM/EINVAL, but its sys_enter_ tracepoint fires on kernel entry before
any check -- the same best-effort pattern used for the other mount-API
calls. Add mount_setattr to mountfsTraceArgs and assert
enter_mount_setattr MinCount>=1 in TestMountFsManagementSyscalls.
bl0 (vmsplice): add a deterministic retbytesVmsplice driver to the
phase-A workload (mirroring the getdents64/readlinkat drivers): a spaced
retry loop that gathers a fixed 18-byte user iovec into a fresh pipe via
vmsplice and drains it each iteration. vmsplice is TRANSFER_CLASSIFIED,
so the exit reports ctx->ret = bytes moved. Add vmsplice to
retbytesTraceArgs and assert enter_vmsplice presence plus
assertEventBytesAtLeast(payloadLen=18) and a positive duration in
TestRetbytesPhaseA, locking in the TRANSFER byte attribution like its
splice/tee siblings.
Coverage hardening only; classification/tracing verified correct by
inspection (mount_setattr=FamilyFS/KindPathname/UNCLASSIFIED,
vmsplice=FamilyNetwork/KindFd/TRANSFER_CLASSIFIED).
Verified: TEST_NAME=TestMountFsManagementSyscalls mage testWithName PASS
(enter_mount_setattr captured); TEST_NAME=TestRetbytesPhaseA mage
testWithName PASS (vmsplice bytes>=18 asserted).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
| -rw-r--r-- | cmd/ioworkload/scenario_mountfs.go | 14 | ||||
| -rw-r--r-- | cmd/ioworkload/scenario_retbytes.go | 72 | ||||
| -rw-r--r-- | integrationtests/mountfs_test.go | 8 | ||||
| -rw-r--r-- | integrationtests/retbytes_test.go | 11 |
4 files changed, 103 insertions, 2 deletions
diff --git a/cmd/ioworkload/scenario_mountfs.go b/cmd/ioworkload/scenario_mountfs.go index 5053dca..78d0bf6 100644 --- a/cmd/ioworkload/scenario_mountfs.go +++ b/cmd/ioworkload/scenario_mountfs.go @@ -93,6 +93,20 @@ func mountfsManagement() error { if fd, _, errno := syscall.RawSyscall(unix.SYS_OPEN_TREE, atFDCWD, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC)); errno == 0 { _ = syscall.Close(int(fd)) } + + // mount_setattr(dirfd, path, flags, attr, size) changes the per-mount + // attributes of an existing mount. It is a KindPathname syscall: args[1] is + // the path. We aim it at the scenario mount point with AT_FDCWD, requesting + // MOUNT_ATTR_RDONLY, but it requires CAP_SYS_ADMIN (Linux 5.12+) and the + // path is not even a mount here, so it returns EPERM/EINVAL unprivileged. + // That is fine: like its mount-API siblings above, the sys_enter_ + // mount_setattr tracepoint fires on kernel entry before any permission or + // validity check, so MinCount>=1 holds regardless of errno. attr/size carry + // the MountAttr struct and its size so the kernel parses the call before + // failing; the call mutates no real mount. + attr := unix.MountAttr{Attr_set: unix.MOUNT_ATTR_RDONLY} + _, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT_SETATTR, atFDCWD, uintptr(unsafe.Pointer(mountPath)), 0, uintptr(unsafe.Pointer(&attr)), unsafe.Sizeof(attr), 0) + _, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT, uintptr(unsafe.Pointer(none)), uintptr(unsafe.Pointer(mountPath)), uintptr(unsafe.Pointer(tmpfs)), 0, 0, 0) _, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), 0, 0) _, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.MNT_DETACH), 0) diff --git a/cmd/ioworkload/scenario_retbytes.go b/cmd/ioworkload/scenario_retbytes.go index c1da350..192b9b5 100644 --- a/cmd/ioworkload/scenario_retbytes.go +++ b/cmd/ioworkload/scenario_retbytes.go @@ -46,9 +46,81 @@ func retbytesPhaseA() error { if err := retbytesReadlinkat(); err != nil { return err } + if err := retbytesVmsplice(); err != nil { + return err + } return retbytesProcessVM() } +// retbytesVmspliceLen is the exact number of bytes vmsplice gathers from the +// user iovec into the pipe on every iteration. It is far below the default +// pipe capacity (64 KiB) so vmsplice never blocks, and it is the value the +// TRANSFER_CLASSIFIED exit byte-count assertion in TestRetbytesPhaseA pins. +const retbytesVmspliceLen = 18 + +// retbytesVmsplice gathers a fixed-size user buffer into a pipe via vmsplice(2) +// and drains the pipe each iteration. vmsplice is TRANSFER_CLASSIFIED, so a +// successful gather into the pipe returns ctx->ret = bytes moved (here exactly +// retbytesVmspliceLen). This drives the exit byte-count assertion in +// TestRetbytesPhaseA, locking in the byte attribution like its splice/tee +// siblings. The buffer is far smaller than the pipe capacity, so vmsplice +// cannot block; draining the read end leaves no data behind. +func retbytesVmsplice() error { + buf := []byte("phase-a-vmsplice!!") // retbytesVmspliceLen bytes + if len(buf) != retbytesVmspliceLen { + return fmt.Errorf("vmsplice payload is %d bytes, want %d", len(buf), retbytesVmspliceLen) + } + + // Re-issue vmsplice in a short window so ior has enough time to attach and + // capture an enter/exit pair under high parallel integration load. Each + // iteration creates a fresh pipe, gathers the same buffer, drains it, and + // closes both ends so descriptors and data never accumulate. + for i := 0; i < 40; i++ { + if err := retbytesVmspliceOnce(buf); err != nil { + return err + } + time.Sleep(25 * time.Millisecond) + } + return nil +} + +// retbytesVmspliceOnce performs a single vmsplice of buf into a fresh pipe and +// drains it. It returns an error if vmsplice fails or moves the wrong number of +// bytes, so the workload fails loudly rather than silently skewing the assert. +func retbytesVmspliceOnce(buf []byte) error { + pipe := make([]int, 2) + if err := syscall.Pipe2(pipe, syscall.O_CLOEXEC); err != nil { + return fmt.Errorf("pipe2 for vmsplice: %w", err) + } + defer syscall.Close(pipe[0]) + defer syscall.Close(pipe[1]) + + iov := syscall.Iovec{Base: &buf[0], Len: uint64(len(buf))} + n, _, errno := syscall.Syscall6( + syscall.SYS_VMSPLICE, + uintptr(pipe[1]), + uintptr(unsafe.Pointer(&iov)), + 1, // one iovec segment + 0, // no SPLICE_F_* flags needed for this tiny, non-blocking gather + 0, 0, + ) + runtime.KeepAlive(buf) + if errno != 0 { + return fmt.Errorf("vmsplice: %w", errno) + } + if int(n) != len(buf) { + return fmt.Errorf("vmsplice moved %d bytes, want %d", n, len(buf)) + } + + // Drain whatever vmsplice placed into the pipe so it never approaches its + // capacity across iterations. + drain := make([]byte, int(n)) + if _, err := syscall.Read(pipe[0], drain); err != nil { + return fmt.Errorf("drain vmsplice pipe: %w", err) + } + return nil +} + // retbytesGetdents opens a non-empty directory and reads its entries via // getdents64(2). getdents/getdents64 are READ_CLASSIFIED, so a successful call // on a populated directory returns ctx->ret > 0 (bytes filled into the dirent diff --git a/integrationtests/mountfs_test.go b/integrationtests/mountfs_test.go index 69a19fd..a32cda7 100644 --- a/integrationtests/mountfs_test.go +++ b/integrationtests/mountfs_test.go @@ -4,7 +4,7 @@ import "testing" var mountfsTraceArgs = []string{ "-trace-syscalls", - "mount,umount,move_mount,fsopen,fsconfig,fspick,open_tree,fsmount,pivot_root,quotactl,quotactl_fd,statmount,listmount,listns,swapon,swapoff", + "mount,umount,move_mount,fsopen,fsconfig,fspick,open_tree,mount_setattr,fsmount,pivot_root,quotactl,quotactl_fd,statmount,listmount,listns,swapon,swapoff", } func TestMountFsManagementSyscalls(t *testing.T) { @@ -20,6 +20,12 @@ func TestMountFsManagementSyscalls(t *testing.T) { {Tracepoint: "enter_fsconfig", MinCount: 1}, {Tracepoint: "enter_fspick", MinCount: 1}, {Tracepoint: "enter_open_tree", MinCount: 1}, + // mount_setattr (KindPathname, path@arg1) changes per-mount attributes + // of an existing mount and needs CAP_SYS_ADMIN (Linux 5.12+), so it + // returns EPERM/EINVAL in the scenario. Its sys_enter_ tracepoint fires + // on kernel entry regardless of permission/validity, so MinCount>=1 + // holds even though the call itself fails. + {Tracepoint: "enter_mount_setattr", MinCount: 1}, {Tracepoint: "enter_fsmount", MinCount: 1}, {Tracepoint: "enter_pivot_root", MinCount: 1}, {Tracepoint: "enter_quotactl", MinCount: 1}, diff --git a/integrationtests/retbytes_test.go b/integrationtests/retbytes_test.go index a7cb45a..9b2b55c 100644 --- a/integrationtests/retbytes_test.go +++ b/integrationtests/retbytes_test.go @@ -2,7 +2,7 @@ package integrationtests import "testing" -var retbytesTraceArgs = []string{"-trace-syscalls", "sendto,recvfrom,sendmsg,recvmsg,sendmmsg,recvmmsg,sendfile64,splice,tee,process_vm_writev,process_vm_readv,socketpair,pipe2,openat,write,read,close,lseek,fcntl,unlinkat,mkdirat,getdents64,readlinkat,symlink"} +var retbytesTraceArgs = []string{"-trace-syscalls", "sendto,recvfrom,sendmsg,recvmsg,sendmmsg,recvmmsg,sendfile64,splice,tee,vmsplice,process_vm_writev,process_vm_readv,socketpair,pipe2,openat,write,read,close,lseek,fcntl,unlinkat,mkdirat,getdents64,readlinkat,symlink"} func TestRetbytesPhaseA(t *testing.T) { const payloadLen = uint64(18) @@ -21,6 +21,7 @@ func TestRetbytesPhaseA(t *testing.T) { {Tracepoint: "enter_process_vm_readv", Comm: "ioworkload", MinCount: 1}, {Tracepoint: "enter_getdents64", Comm: "ioworkload", MinCount: 1}, {Tracepoint: "enter_readlinkat", Comm: "ioworkload", MinCount: 1}, + {Tracepoint: "enter_vmsplice", Comm: "ioworkload", MinCount: 1}, }, retbytesTraceArgs) for _, tracepoint := range []string{ @@ -62,4 +63,12 @@ func TestRetbytesPhaseA(t *testing.T) { readlinkatExp := ExpectedEvent{Tracepoint: "enter_readlinkat", Comm: "ioworkload"} assertEventBytesAtLeast(t, result, readlinkatExp, 1) assertEventDurationPositive(t, result, readlinkatExp) + + // vmsplice is TRANSFER_CLASSIFIED: a successful gather of the user iovec + // into the pipe reports ctx->ret = bytes moved. The retbytes driver gathers + // exactly payloadLen (18) bytes each iteration, so the exit byte count is at + // least that. This locks in the TRANSFER byte attribution like splice/tee. + vmspliceExp := ExpectedEvent{Tracepoint: "enter_vmsplice", Comm: "ioworkload"} + assertEventBytesAtLeast(t, result, vmspliceExp, payloadLen) + assertEventDurationPositive(t, result, vmspliceExp) } |
