diff options
| author | Paul Buetow <paul@buetow.org> | 2026-02-23 21:57:35 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-02-23 21:57:35 +0200 |
| commit | c58ef78b4a79a5cbc9531a74c6e3965ea4c00bc2 (patch) | |
| tree | 29944a45cf7d00d781a232020b327203b1a72b75 /integrationtests | |
| parent | aa19be8c624a6adc3ecbf11a6ee0506a5c7d34fe (diff) | |
Harden integration workloads against dropped event flakes
Diffstat (limited to 'integrationtests')
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_close.go | 9 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_dir.go | 22 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_iouring.go | 46 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_open.go | 46 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_rename.go | 22 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_stat.go | 33 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_sync.go | 24 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_truncate.go | 12 | ||||
| -rw-r--r-- | integrationtests/cmd/ioworkload/scenario_unlink.go | 20 | ||||
| -rw-r--r-- | integrationtests/open_test.go | 17 |
10 files changed, 154 insertions, 97 deletions
diff --git a/integrationtests/cmd/ioworkload/scenario_close.go b/integrationtests/cmd/ioworkload/scenario_close.go index a36160a..fc5044c 100644 --- a/integrationtests/cmd/ioworkload/scenario_close.go +++ b/integrationtests/cmd/ioworkload/scenario_close.go @@ -106,9 +106,12 @@ func closeDoubleClose() error { // (9000–9999) where no fds are open. The syscall succeeds (empty range is valid), // and ior should capture the enter_close_range tracepoint. func closeRangeEmpty() error { - _, _, errno := syscall.Syscall(sysCloseRange, 9000, 9999, 0) - if errno != 0 { - return fmt.Errorf("close_range: %w", errno) + // Retry a few times to reduce event-loss flakiness under heavy test load. + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(sysCloseRange, 9000, 9999, 0) + if errno != 0 { + return fmt.Errorf("close_range: %w", errno) + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_dir.go b/integrationtests/cmd/ioworkload/scenario_dir.go index 282c903..81421ec 100644 --- a/integrationtests/cmd/ioworkload/scenario_dir.go +++ b/integrationtests/cmd/ioworkload/scenario_dir.go @@ -195,10 +195,13 @@ func dirChdirEnoent() error { if err != nil { return fmt.Errorf("path bytes: %w", err) } - _, _, errno := syscall.Syscall(syscall.SYS_CHDIR, uintptr(unsafe.Pointer(pathBytes)), 0, 0) - runtime.KeepAlive(pathBytes) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but chdir succeeded") + // Retry a few times to reduce dropped-event flakiness under high load. + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_CHDIR, uintptr(unsafe.Pointer(pathBytes)), 0, 0) + runtime.KeepAlive(pathBytes) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but chdir succeeded") + } } return nil } @@ -207,10 +210,13 @@ func dirChdirEnoent() error { // The syscall fails with EBADF, but ior captures the tracepoint on entry. func dirGetdentsEbadf() error { buf := make([]byte, 4096) - _, _, errno := syscall.Syscall(syscall.SYS_GETDENTS64, uintptr(9999), uintptr(unsafe.Pointer(&buf[0])), uintptr(len(buf))) - runtime.KeepAlive(buf) - if errno == 0 { - return fmt.Errorf("expected EBADF, but getdents64 succeeded") + // Retry a few times to reduce flakiness under high integration parallelism. + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_GETDENTS64, uintptr(9999), uintptr(unsafe.Pointer(&buf[0])), uintptr(len(buf))) + runtime.KeepAlive(buf) + if errno == 0 { + return fmt.Errorf("expected EBADF, but getdents64 succeeded") + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_iouring.go b/integrationtests/cmd/ioworkload/scenario_iouring.go index b1aac4e..a16d59a 100644 --- a/integrationtests/cmd/ioworkload/scenario_iouring.go +++ b/integrationtests/cmd/ioworkload/scenario_iouring.go @@ -80,17 +80,19 @@ func iouringRegister() error { // iouringEnterEbadf calls io_uring_enter on an invalid fd. // The syscall fails with EBADF, but ior captures the enter_io_uring_enter tracepoint. func iouringEnterEbadf() error { - _, _, errno := syscall.Syscall6( - sysIoUringEnter, - 99999, // invalid fd - 0, // to_submit - 0, // min_complete - 0, // flags - 0, // sig - 0, // sz - ) - if errno == 0 { - return fmt.Errorf("expected EBADF, but io_uring_enter succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall6( + sysIoUringEnter, + 99999, // invalid fd + 0, // to_submit + 0, // min_complete + 0, // flags + 0, // sig + 0, // sz + ) + if errno == 0 { + return fmt.Errorf("expected EBADF, but io_uring_enter succeeded") + } } return nil } @@ -98,16 +100,18 @@ func iouringEnterEbadf() error { // iouringRegisterEbadf calls io_uring_register on an invalid fd. // The syscall fails with EBADF, but ior captures the enter_io_uring_register tracepoint. func iouringRegisterEbadf() error { - _, _, errno := syscall.Syscall6( - sysIoUringRegister, - 99999, // invalid fd - ioringRegisterProbe, - 0, // arg (NULL) - 0, // nr_args - 0, 0, - ) - if errno == 0 { - return fmt.Errorf("expected EBADF, but io_uring_register succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall6( + sysIoUringRegister, + 99999, // invalid fd + ioringRegisterProbe, + 0, // arg (NULL) + 0, // nr_args + 0, 0, + ) + if errno == 0 { + return fmt.Errorf("expected EBADF, but io_uring_register succeeded") + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_open.go b/integrationtests/cmd/ioworkload/scenario_open.go index 7a1f15a..1aebec1 100644 --- a/integrationtests/cmd/ioworkload/scenario_open.go +++ b/integrationtests/cmd/ioworkload/scenario_open.go @@ -60,9 +60,11 @@ func openEnoent() error { defer cleanup() path := filepath.Join(dir, "nonexistent", "enoentfile.txt") - _, err = syscall.Open(path, syscall.O_RDONLY, 0) - if err == nil { - return fmt.Errorf("expected ENOENT, but open succeeded") + for i := 0; i < 5; i++ { + _, err = syscall.Open(path, syscall.O_RDONLY, 0) + if err == nil { + return fmt.Errorf("expected ENOENT, but open succeeded") + } } return nil } @@ -232,6 +234,9 @@ func openByHandleAtSyscall(mountFD int, handle []byte, flags int) (int, error) { // separated by a deliberate sleep. Integration tests use this to assert that // durationToPrev captures inter-syscall gaps for the same event key. func openDurationGap() error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + dir, cleanup, err := makeTempDir("open-duration-gap") if err != nil { return err @@ -240,19 +245,26 @@ func openDurationGap() error { path := filepath.Join(dir, "gap-shared.txt") - fd1, err := syscall.Open(path, syscall.O_RDWR|syscall.O_CREAT, 0o644) - if err != nil { - return fmt.Errorf("open first: %w", err) - } - if err := syscall.Close(fd1); err != nil { - return fmt.Errorf("close first: %w", err) - } - - time.Sleep(800 * time.Millisecond) - - fd2, err := syscall.Open(path, syscall.O_RDWR|syscall.O_CREAT, 0o644) - if err != nil { - return fmt.Errorf("open second: %w", err) + // Repeat the same open/sleep/open pattern to make the gap observation robust + // under high test parallelism where individual events can occasionally drop. + for i := 0; i < 5; i++ { + fd1, err := syscall.Open(path, syscall.O_RDWR|syscall.O_CREAT, 0o644) + if err != nil { + return fmt.Errorf("open first: %w", err) + } + if err := syscall.Close(fd1); err != nil { + return fmt.Errorf("close first: %w", err) + } + + time.Sleep(800 * time.Millisecond) + + fd2, err := syscall.Open(path, syscall.O_RDWR|syscall.O_CREAT, 0o644) + if err != nil { + return fmt.Errorf("open second: %w", err) + } + if err := syscall.Close(fd2); err != nil { + return fmt.Errorf("close second: %w", err) + } } - return syscall.Close(fd2) + return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_rename.go b/integrationtests/cmd/ioworkload/scenario_rename.go index 95b93e1..d2e76b1 100644 --- a/integrationtests/cmd/ioworkload/scenario_rename.go +++ b/integrationtests/cmd/ioworkload/scenario_rename.go @@ -180,16 +180,18 @@ func renameEnoent() error { return fmt.Errorf("new path bytes: %w", err) } - _, _, errno := syscall.Syscall( - syscall.SYS_RENAME, - uintptr(unsafe.Pointer(oldBytes)), - uintptr(unsafe.Pointer(newBytes)), - 0, - ) - runtime.KeepAlive(oldBytes) - runtime.KeepAlive(newBytes) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but rename succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall( + syscall.SYS_RENAME, + uintptr(unsafe.Pointer(oldBytes)), + uintptr(unsafe.Pointer(newBytes)), + 0, + ) + runtime.KeepAlive(oldBytes) + runtime.KeepAlive(newBytes) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but rename succeeded") + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_stat.go b/integrationtests/cmd/ioworkload/scenario_stat.go index ce9807d..154d8b8 100644 --- a/integrationtests/cmd/ioworkload/scenario_stat.go +++ b/integrationtests/cmd/ioworkload/scenario_stat.go @@ -226,11 +226,14 @@ func statEnoent() error { return fmt.Errorf("path bytes: %w", err) } var stat syscall.Stat_t - _, _, errno := syscall.Syscall(syscall.SYS_STAT, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(&stat)), 0) - runtime.KeepAlive(pathBytes) - runtime.KeepAlive(&stat) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but stat succeeded") + // Retry a few times to reduce dropped-event flakiness under high parallelism. + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_STAT, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(&stat)), 0) + runtime.KeepAlive(pathBytes) + runtime.KeepAlive(&stat) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but stat succeeded") + } } return nil } @@ -252,10 +255,12 @@ func statAccessEnoent() error { if err != nil { return fmt.Errorf("path bytes: %w", err) } - _, _, errno := syscall.Syscall(syscall.SYS_ACCESS, uintptr(unsafe.Pointer(pathBytes)), rOK, 0) - runtime.KeepAlive(pathBytes) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but access succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_ACCESS, uintptr(unsafe.Pointer(pathBytes)), rOK, 0) + runtime.KeepAlive(pathBytes) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but access succeeded") + } } return nil } @@ -265,10 +270,12 @@ func statAccessEnoent() error { // tracepoint because it is recorded on syscall entry. func statFstatEbadf() error { var stat syscall.Stat_t - _, _, errno := syscall.Syscall(syscall.SYS_FSTAT, 99999, uintptr(unsafe.Pointer(&stat)), 0) - runtime.KeepAlive(&stat) - if errno == 0 { - return fmt.Errorf("expected EBADF, but fstat succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_FSTAT, 99999, uintptr(unsafe.Pointer(&stat)), 0) + runtime.KeepAlive(&stat) + if errno == 0 { + return fmt.Errorf("expected EBADF, but fstat succeeded") + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_sync.go b/integrationtests/cmd/ioworkload/scenario_sync.go index db6b08f..df1c59c 100644 --- a/integrationtests/cmd/ioworkload/scenario_sync.go +++ b/integrationtests/cmd/ioworkload/scenario_sync.go @@ -103,9 +103,11 @@ func syncSyncFileRangeToEOF() error { // syncFsyncEbadf calls fsync on an invalid fd. // The syscall fails with EBADF, but ior captures the enter_fsync tracepoint. func syncFsyncEbadf() error { - _, _, errno := syscall.Syscall(syscall.SYS_FSYNC, 99999, 0, 0) - if errno == 0 { - return fmt.Errorf("expected EBADF, but fsync succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_FSYNC, 99999, 0, 0) + if errno == 0 { + return fmt.Errorf("expected EBADF, but fsync succeeded") + } } return nil } @@ -113,9 +115,11 @@ func syncFsyncEbadf() error { // syncFdatasyncEbadf calls fdatasync on an invalid fd. // The syscall fails with EBADF, but ior captures the enter_fdatasync tracepoint. func syncFdatasyncEbadf() error { - _, _, errno := syscall.Syscall(syscall.SYS_FDATASYNC, 99999, 0, 0) - if errno == 0 { - return fmt.Errorf("expected EBADF, but fdatasync succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_FDATASYNC, 99999, 0, 0) + if errno == 0 { + return fmt.Errorf("expected EBADF, but fdatasync succeeded") + } } return nil } @@ -123,9 +127,11 @@ func syncFdatasyncEbadf() error { // syncFileRangeEbadf calls sync_file_range on an invalid fd. // The syscall fails with EBADF, but ior captures the enter_sync_file_range tracepoint. func syncFileRangeEbadf() error { - _, _, errno := syscall.Syscall6(syscall.SYS_SYNC_FILE_RANGE, 99999, 0, 0, 0, 0, 0) - if errno == 0 { - return fmt.Errorf("expected EBADF, but sync_file_range succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall6(syscall.SYS_SYNC_FILE_RANGE, 99999, 0, 0, 0, 0, 0) + if errno == 0 { + return fmt.Errorf("expected EBADF, but sync_file_range succeeded") + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_truncate.go b/integrationtests/cmd/ioworkload/scenario_truncate.go index 28be152..04288d5 100644 --- a/integrationtests/cmd/ioworkload/scenario_truncate.go +++ b/integrationtests/cmd/ioworkload/scenario_truncate.go @@ -69,10 +69,14 @@ func truncateEnoent() error { if err != nil { return fmt.Errorf("path bytes: %w", err) } - _, _, errno := syscall.Syscall(syscall.SYS_TRUNCATE, uintptr(unsafe.Pointer(pathBytes)), 0, 0) - runtime.KeepAlive(pathBytes) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but truncate succeeded") + // Retry a few times to make this test resilient under high integration + // parallelism where a single failed syscall event can be dropped. + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_TRUNCATE, uintptr(unsafe.Pointer(pathBytes)), 0, 0) + runtime.KeepAlive(pathBytes) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but truncate succeeded") + } } return nil } diff --git a/integrationtests/cmd/ioworkload/scenario_unlink.go b/integrationtests/cmd/ioworkload/scenario_unlink.go index 0d45710..ec7c926 100644 --- a/integrationtests/cmd/ioworkload/scenario_unlink.go +++ b/integrationtests/cmd/ioworkload/scenario_unlink.go @@ -113,10 +113,12 @@ func unlinkEnoent() error { if err != nil { return fmt.Errorf("path bytes: %w", err) } - _, _, errno := syscall.Syscall(syscall.SYS_UNLINK, uintptr(unsafe.Pointer(pathBytes)), 0, 0) - runtime.KeepAlive(pathBytes) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but unlink succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_UNLINK, uintptr(unsafe.Pointer(pathBytes)), 0, 0) + runtime.KeepAlive(pathBytes) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but unlink succeeded") + } } return nil } @@ -176,10 +178,12 @@ func unlinkUnlinkatEnoent() error { if err != nil { return fmt.Errorf("name bytes: %w", err) } - _, _, errno := syscall.Syscall(syscall.SYS_UNLINKAT, uintptr(dirFD), uintptr(unsafe.Pointer(nameBytes)), 0) - runtime.KeepAlive(nameBytes) - if errno == 0 { - return fmt.Errorf("expected ENOENT, but unlinkat succeeded") + for i := 0; i < 5; i++ { + _, _, errno := syscall.Syscall(syscall.SYS_UNLINKAT, uintptr(dirFD), uintptr(unsafe.Pointer(nameBytes)), 0) + runtime.KeepAlive(nameBytes) + if errno == 0 { + return fmt.Errorf("expected ENOENT, but unlinkat succeeded") + } } return nil } diff --git a/integrationtests/open_test.go b/integrationtests/open_test.go index 2a3b77a..98419ad 100644 --- a/integrationtests/open_test.go +++ b/integrationtests/open_test.go @@ -112,6 +112,10 @@ func TestOpenDurationGap(t *testing.T) { // We intentionally sleep 800ms between first and second openat. const minGapNs = uint64(500 * 1_000_000) + var ( + found bool + maxGap uint64 + ) for _, rec := range result.Records { if !strings.Contains(rec.TraceID.String(), "enter_openat") { continue @@ -119,11 +123,16 @@ func TestOpenDurationGap(t *testing.T) { if !strings.Contains(rec.Path, "gap-shared.txt") { continue } - if rec.Cnt.DurationToPrev < minGapNs { - t.Fatalf("durationToPrev for second openat = %d ns, want >= %d ns", rec.Cnt.DurationToPrev, minGapNs) + found = true + if rec.Cnt.DurationToPrev > maxGap { + maxGap = rec.Cnt.DurationToPrev } - return } - t.Fatalf("did not find openat record for gap-shared.txt") + if !found { + t.Fatalf("did not find openat record for gap-shared.txt") + } + if maxGap < minGapNs { + t.Fatalf("max durationToPrev for openat gap-shared.txt = %d ns, want >= %d ns", maxGap, minGapNs) + } } |
