summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-06-09 22:28:33 +0300
committerPaul Buetow <paul@buetow.org>2026-06-09 22:28:33 +0300
commitd33d2ef1186096dddf2836f83e91b833866ef3bd (patch)
tree2155f579b7d42d5ad1487d99c211508e28af4938 /cmd
parentf601dc90fcef3f270c55a9612c5f0326dbd0f391 (diff)
test: add mount_setattr coverage and assert vmsplice TRANSFER byte count
mount_setattr(2) was the only new-mount-API sibling not exercised end-to-end (unlike move_mount/fsmount/fspick/open_tree), and vmsplice(2) had zero end-to-end assertion despite being TRANSFER_CLASSIFIED. uj0 (mount_setattr): add a best-effort RawSyscall6(SYS_MOUNT_SETATTR) call to mountfsManagement() aimed at the scenario mount point with AT_FDCWD and a MountAttr requesting MOUNT_ATTR_RDONLY. It needs CAP_SYS_ADMIN (Linux 5.12+) and the path is not a mount, so it returns EPERM/EINVAL, but its sys_enter_ tracepoint fires on kernel entry before any check -- the same best-effort pattern used for the other mount-API calls. Add mount_setattr to mountfsTraceArgs and assert enter_mount_setattr MinCount>=1 in TestMountFsManagementSyscalls. bl0 (vmsplice): add a deterministic retbytesVmsplice driver to the phase-A workload (mirroring the getdents64/readlinkat drivers): a spaced retry loop that gathers a fixed 18-byte user iovec into a fresh pipe via vmsplice and drains it each iteration. vmsplice is TRANSFER_CLASSIFIED, so the exit reports ctx->ret = bytes moved. Add vmsplice to retbytesTraceArgs and assert enter_vmsplice presence plus assertEventBytesAtLeast(payloadLen=18) and a positive duration in TestRetbytesPhaseA, locking in the TRANSFER byte attribution like its splice/tee siblings. Coverage hardening only; classification/tracing verified correct by inspection (mount_setattr=FamilyFS/KindPathname/UNCLASSIFIED, vmsplice=FamilyNetwork/KindFd/TRANSFER_CLASSIFIED). Verified: TEST_NAME=TestMountFsManagementSyscalls mage testWithName PASS (enter_mount_setattr captured); TEST_NAME=TestRetbytesPhaseA mage testWithName PASS (vmsplice bytes>=18 asserted). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Diffstat (limited to 'cmd')
-rw-r--r--cmd/ioworkload/scenario_mountfs.go14
-rw-r--r--cmd/ioworkload/scenario_retbytes.go72
2 files changed, 86 insertions, 0 deletions
diff --git a/cmd/ioworkload/scenario_mountfs.go b/cmd/ioworkload/scenario_mountfs.go
index 5053dca..78d0bf6 100644
--- a/cmd/ioworkload/scenario_mountfs.go
+++ b/cmd/ioworkload/scenario_mountfs.go
@@ -93,6 +93,20 @@ func mountfsManagement() error {
if fd, _, errno := syscall.RawSyscall(unix.SYS_OPEN_TREE, atFDCWD, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC)); errno == 0 {
_ = syscall.Close(int(fd))
}
+
+ // mount_setattr(dirfd, path, flags, attr, size) changes the per-mount
+ // attributes of an existing mount. It is a KindPathname syscall: args[1] is
+ // the path. We aim it at the scenario mount point with AT_FDCWD, requesting
+ // MOUNT_ATTR_RDONLY, but it requires CAP_SYS_ADMIN (Linux 5.12+) and the
+ // path is not even a mount here, so it returns EPERM/EINVAL unprivileged.
+ // That is fine: like its mount-API siblings above, the sys_enter_
+ // mount_setattr tracepoint fires on kernel entry before any permission or
+ // validity check, so MinCount>=1 holds regardless of errno. attr/size carry
+ // the MountAttr struct and its size so the kernel parses the call before
+ // failing; the call mutates no real mount.
+ attr := unix.MountAttr{Attr_set: unix.MOUNT_ATTR_RDONLY}
+ _, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT_SETATTR, atFDCWD, uintptr(unsafe.Pointer(mountPath)), 0, uintptr(unsafe.Pointer(&attr)), unsafe.Sizeof(attr), 0)
+
_, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT, uintptr(unsafe.Pointer(none)), uintptr(unsafe.Pointer(mountPath)), uintptr(unsafe.Pointer(tmpfs)), 0, 0, 0)
_, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), 0, 0)
_, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.MNT_DETACH), 0)
diff --git a/cmd/ioworkload/scenario_retbytes.go b/cmd/ioworkload/scenario_retbytes.go
index c1da350..192b9b5 100644
--- a/cmd/ioworkload/scenario_retbytes.go
+++ b/cmd/ioworkload/scenario_retbytes.go
@@ -46,9 +46,81 @@ func retbytesPhaseA() error {
if err := retbytesReadlinkat(); err != nil {
return err
}
+ if err := retbytesVmsplice(); err != nil {
+ return err
+ }
return retbytesProcessVM()
}
+// retbytesVmspliceLen is the exact number of bytes vmsplice gathers from the
+// user iovec into the pipe on every iteration. It is far below the default
+// pipe capacity (64 KiB) so vmsplice never blocks, and it is the value the
+// TRANSFER_CLASSIFIED exit byte-count assertion in TestRetbytesPhaseA pins.
+const retbytesVmspliceLen = 18
+
+// retbytesVmsplice gathers a fixed-size user buffer into a pipe via vmsplice(2)
+// and drains the pipe each iteration. vmsplice is TRANSFER_CLASSIFIED, so a
+// successful gather into the pipe returns ctx->ret = bytes moved (here exactly
+// retbytesVmspliceLen). This drives the exit byte-count assertion in
+// TestRetbytesPhaseA, locking in the byte attribution like its splice/tee
+// siblings. The buffer is far smaller than the pipe capacity, so vmsplice
+// cannot block; draining the read end leaves no data behind.
+func retbytesVmsplice() error {
+ buf := []byte("phase-a-vmsplice!!") // retbytesVmspliceLen bytes
+ if len(buf) != retbytesVmspliceLen {
+ return fmt.Errorf("vmsplice payload is %d bytes, want %d", len(buf), retbytesVmspliceLen)
+ }
+
+ // Re-issue vmsplice in a short window so ior has enough time to attach and
+ // capture an enter/exit pair under high parallel integration load. Each
+ // iteration creates a fresh pipe, gathers the same buffer, drains it, and
+ // closes both ends so descriptors and data never accumulate.
+ for i := 0; i < 40; i++ {
+ if err := retbytesVmspliceOnce(buf); err != nil {
+ return err
+ }
+ time.Sleep(25 * time.Millisecond)
+ }
+ return nil
+}
+
+// retbytesVmspliceOnce performs a single vmsplice of buf into a fresh pipe and
+// drains it. It returns an error if vmsplice fails or moves the wrong number of
+// bytes, so the workload fails loudly rather than silently skewing the assert.
+func retbytesVmspliceOnce(buf []byte) error {
+ pipe := make([]int, 2)
+ if err := syscall.Pipe2(pipe, syscall.O_CLOEXEC); err != nil {
+ return fmt.Errorf("pipe2 for vmsplice: %w", err)
+ }
+ defer syscall.Close(pipe[0])
+ defer syscall.Close(pipe[1])
+
+ iov := syscall.Iovec{Base: &buf[0], Len: uint64(len(buf))}
+ n, _, errno := syscall.Syscall6(
+ syscall.SYS_VMSPLICE,
+ uintptr(pipe[1]),
+ uintptr(unsafe.Pointer(&iov)),
+ 1, // one iovec segment
+ 0, // no SPLICE_F_* flags needed for this tiny, non-blocking gather
+ 0, 0,
+ )
+ runtime.KeepAlive(buf)
+ if errno != 0 {
+ return fmt.Errorf("vmsplice: %w", errno)
+ }
+ if int(n) != len(buf) {
+ return fmt.Errorf("vmsplice moved %d bytes, want %d", n, len(buf))
+ }
+
+ // Drain whatever vmsplice placed into the pipe so it never approaches its
+ // capacity across iterations.
+ drain := make([]byte, int(n))
+ if _, err := syscall.Read(pipe[0], drain); err != nil {
+ return fmt.Errorf("drain vmsplice pipe: %w", err)
+ }
+ return nil
+}
+
// retbytesGetdents opens a non-empty directory and reads its entries via
// getdents64(2). getdents/getdents64 are READ_CLASSIFIED, so a successful call
// on a populated directory returns ctx->ret > 0 (bytes filled into the dirent