summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rw-r--r--cmd/ioworkload/scenario_mountfs.go14
-rw-r--r--cmd/ioworkload/scenario_retbytes.go72
2 files changed, 86 insertions, 0 deletions
diff --git a/cmd/ioworkload/scenario_mountfs.go b/cmd/ioworkload/scenario_mountfs.go
index 5053dca..78d0bf6 100644
--- a/cmd/ioworkload/scenario_mountfs.go
+++ b/cmd/ioworkload/scenario_mountfs.go
@@ -93,6 +93,20 @@ func mountfsManagement() error {
if fd, _, errno := syscall.RawSyscall(unix.SYS_OPEN_TREE, atFDCWD, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC)); errno == 0 {
_ = syscall.Close(int(fd))
}
+
+ // mount_setattr(dirfd, path, flags, attr, size) changes the per-mount
+ // attributes of an existing mount. It is a KindPathname syscall: args[1] is
+ // the path. We aim it at the scenario mount point with AT_FDCWD, requesting
+ // MOUNT_ATTR_RDONLY, but it requires CAP_SYS_ADMIN (Linux 5.12+) and the
+ // path is not even a mount here, so it returns EPERM/EINVAL unprivileged.
+ // That is fine: like its mount-API siblings above, the sys_enter_
+ // mount_setattr tracepoint fires on kernel entry before any permission or
+ // validity check, so MinCount>=1 holds regardless of errno. attr/size carry
+ // the MountAttr struct and its size so the kernel parses the call before
+ // failing; the call mutates no real mount.
+ attr := unix.MountAttr{Attr_set: unix.MOUNT_ATTR_RDONLY}
+ _, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT_SETATTR, atFDCWD, uintptr(unsafe.Pointer(mountPath)), 0, uintptr(unsafe.Pointer(&attr)), unsafe.Sizeof(attr), 0)
+
_, _, _ = syscall.RawSyscall6(unix.SYS_MOUNT, uintptr(unsafe.Pointer(none)), uintptr(unsafe.Pointer(mountPath)), uintptr(unsafe.Pointer(tmpfs)), 0, 0, 0)
_, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), 0, 0)
_, _, _ = syscall.RawSyscall(unix.SYS_UMOUNT2, uintptr(unsafe.Pointer(mountPath)), uintptr(unix.MNT_DETACH), 0)
diff --git a/cmd/ioworkload/scenario_retbytes.go b/cmd/ioworkload/scenario_retbytes.go
index c1da350..192b9b5 100644
--- a/cmd/ioworkload/scenario_retbytes.go
+++ b/cmd/ioworkload/scenario_retbytes.go
@@ -46,9 +46,81 @@ func retbytesPhaseA() error {
if err := retbytesReadlinkat(); err != nil {
return err
}
+ if err := retbytesVmsplice(); err != nil {
+ return err
+ }
return retbytesProcessVM()
}
+// retbytesVmspliceLen is the exact number of bytes vmsplice gathers from the
+// user iovec into the pipe on every iteration. It is far below the default
+// pipe capacity (64 KiB) so vmsplice never blocks, and it is the value the
+// TRANSFER_CLASSIFIED exit byte-count assertion in TestRetbytesPhaseA pins.
+const retbytesVmspliceLen = 18
+
+// retbytesVmsplice gathers a fixed-size user buffer into a pipe via vmsplice(2)
+// and drains the pipe each iteration. vmsplice is TRANSFER_CLASSIFIED, so a
+// successful gather into the pipe returns ctx->ret = bytes moved (here exactly
+// retbytesVmspliceLen). This drives the exit byte-count assertion in
+// TestRetbytesPhaseA, locking in the byte attribution like its splice/tee
+// siblings. The buffer is far smaller than the pipe capacity, so vmsplice
+// cannot block; draining the read end leaves no data behind.
+func retbytesVmsplice() error {
+ buf := []byte("phase-a-vmsplice!!") // retbytesVmspliceLen bytes
+ if len(buf) != retbytesVmspliceLen {
+ return fmt.Errorf("vmsplice payload is %d bytes, want %d", len(buf), retbytesVmspliceLen)
+ }
+
+ // Re-issue vmsplice in a short window so ior has enough time to attach and
+ // capture an enter/exit pair under high parallel integration load. Each
+ // iteration creates a fresh pipe, gathers the same buffer, drains it, and
+ // closes both ends so descriptors and data never accumulate.
+ for i := 0; i < 40; i++ {
+ if err := retbytesVmspliceOnce(buf); err != nil {
+ return err
+ }
+ time.Sleep(25 * time.Millisecond)
+ }
+ return nil
+}
+
+// retbytesVmspliceOnce performs a single vmsplice of buf into a fresh pipe and
+// drains it. It returns an error if vmsplice fails or moves the wrong number of
+// bytes, so the workload fails loudly rather than silently skewing the assert.
+func retbytesVmspliceOnce(buf []byte) error {
+ pipe := make([]int, 2)
+ if err := syscall.Pipe2(pipe, syscall.O_CLOEXEC); err != nil {
+ return fmt.Errorf("pipe2 for vmsplice: %w", err)
+ }
+ defer syscall.Close(pipe[0])
+ defer syscall.Close(pipe[1])
+
+ iov := syscall.Iovec{Base: &buf[0], Len: uint64(len(buf))}
+ n, _, errno := syscall.Syscall6(
+ syscall.SYS_VMSPLICE,
+ uintptr(pipe[1]),
+ uintptr(unsafe.Pointer(&iov)),
+ 1, // one iovec segment
+ 0, // no SPLICE_F_* flags needed for this tiny, non-blocking gather
+ 0, 0,
+ )
+ runtime.KeepAlive(buf)
+ if errno != 0 {
+ return fmt.Errorf("vmsplice: %w", errno)
+ }
+ if int(n) != len(buf) {
+ return fmt.Errorf("vmsplice moved %d bytes, want %d", n, len(buf))
+ }
+
+ // Drain whatever vmsplice placed into the pipe so it never approaches its
+ // capacity across iterations.
+ drain := make([]byte, int(n))
+ if _, err := syscall.Read(pipe[0], drain); err != nil {
+ return fmt.Errorf("drain vmsplice pipe: %w", err)
+ }
+ return nil
+}
+
// retbytesGetdents opens a non-empty directory and reads its entries via
// getdents64(2). getdents/getdents64 are READ_CLASSIFIED, so a successful call
// on a populated directory returns ctx->ret > 0 (bytes filled into the dirent