summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-06-01 15:00:44 +0300
committerPaul Buetow <paul@buetow.org>2026-06-01 15:00:44 +0300
commit6a872804d93b822d530e9df93547f2fec0a8ea50 (patch)
tree4665b0aec25491cb094b71a099fedcdb15c03928
parent55aa404fc93deeff27205b3cc9af407ab071be4b (diff)
test(integration): add Sched family tracing coverage
Add a self-targeted, non-disruptive sched-basic ioworkload scenario and a dedicated TestSchedBasic integration test. The scenario pins to one OS thread (LockOSThread) and exercises only safe Sched syscalls: sched_yield; sched_getaffinity then sched_setaffinity re-applying the identical mask (a no-op); and read-only sched_getscheduler, sched_getparam, sched_getattr, sched_get_priority_max/min, and sched_rr_get_interval. sched_setscheduler, sched_setattr, and sched_setparam are intentionally excluded. The test scopes -trace-syscalls to the sched_* family, guards on PID and comm, and asserts enter_ tracepoints fire (MinCount>=1) for sched_yield, sched_getaffinity, sched_getscheduler, and sched_getparam. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
-rw-r--r--cmd/ioworkload/scenario_sched.go173
-rw-r--r--cmd/ioworkload/scenarios.go1
-rw-r--r--integrationtests/sched_test.go39
3 files changed, 213 insertions, 0 deletions
diff --git a/cmd/ioworkload/scenario_sched.go b/cmd/ioworkload/scenario_sched.go
new file mode 100644
index 0000000..9f7c530
--- /dev/null
+++ b/cmd/ioworkload/scenario_sched.go
@@ -0,0 +1,173 @@
+package main
+
+import (
+ "fmt"
+ "runtime"
+ "syscall"
+ "unsafe"
+
+ "golang.org/x/sys/unix"
+)
+
+// schedSchedOther is SCHED_OTHER (the default time-sharing policy, value 0). We
+// only ever query against this policy; we never CHANGE the policy, so the value
+// is used purely as the argument to sched_get_priority_max/min and never to set
+// anything.
+const schedSchedOther = 0
+
+// schedAttrSize is the size in bytes we advertise for struct sched_attr. The
+// kernel uses this field to version the struct; SCHED_ATTR_SIZE_VER0 (48) is the
+// original layout and is accepted on every kernel that has sched_getattr. We
+// pass a zero-initialised buffer of at least this size and let the kernel fill
+// it in.
+const schedAttrSize = 48
+
+// schedParam mirrors struct sched_param: a single scheduling priority. For
+// SCHED_OTHER this is always 0. We only READ it via sched_getparam, so the
+// contents we pass in are irrelevant — the kernel overwrites them.
+type schedParam struct {
+ Priority int32
+}
+
+// schedAttr mirrors struct sched_attr as the sched_getattr syscall fills it.
+// Only the Size field matters on the way in (it advertises the struct version);
+// every other field is written by the kernel. We keep the full v0 layout so the
+// kernel never writes past our buffer.
+type schedAttr struct {
+ Size uint32
+ Policy uint32
+ Flags uint64
+ Nice int32
+ Priority uint32
+ RuntimeNs uint64
+ DeadlineNs uint64
+ PeriodNs uint64
+}
+
+// schedBasic exercises the SAFE, NON-DISRUPTIVE members of the Sched syscall
+// family entirely self-targeted (pid 0 == the calling thread), so it changes no
+// other process and — crucially — never alters this process's scheduling state:
+//
+// - sched_yield yields the CPU once (no lasting effect).
+// - sched_getaffinity (pid 0) reads the current CPU affinity mask.
+// - sched_setaffinity (pid 0) re-applies the EXACT mask just read back, so
+// the affinity is left byte-for-byte unchanged.
+// - sched_getscheduler (pid 0) reads the current scheduling policy.
+// - sched_getparam (pid 0) reads the current scheduling parameters.
+// - sched_getattr (pid 0) reads the extended scheduling attributes.
+// - sched_get_priority_max/min query the priority range for SCHED_OTHER.
+// - sched_rr_get_interval (0) reads the round-robin quantum for this thread.
+//
+// INTENTIONALLY EXCLUDED (documented so the reasons travel with the code):
+// - sched_setscheduler / sched_setattr to SCHED_FIFO/SCHED_RR: require
+// CAP_SYS_NICE and would switch this thread to real-time scheduling, which
+// is disruptive and could starve the host. Only re-applying the CURRENT
+// policy/affinity (as sched_setaffinity does above) is safe, so we never
+// touch the policy at all.
+// - sched_setparam: changing scheduling parameters is only meaningful for
+// real-time policies and otherwise EINVALs under SCHED_OTHER; not worth the
+// risk for no behavioural gain.
+//
+// LockOSThread pins this goroutine to one OS thread so that "pid 0" (the calling
+// thread) is a stable, well-defined target across every call — the affinity we
+// read and re-apply, and the policy/params we query, all belong to one thread.
+func schedBasic() error {
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ if err := schedYieldOnce(); err != nil {
+ return err
+ }
+ if err := schedRoundtripAffinity(); err != nil {
+ return err
+ }
+ if err := schedQueryPolicy(); err != nil {
+ return err
+ }
+ return schedQueryPriorityRange()
+}
+
+// schedYieldOnce issues sched_yield(2) via a raw syscall (golang.org/x/sys/unix
+// ships no wrapper). Yielding has no lasting effect on scheduling state.
+func schedYieldOnce() error {
+ if _, _, errno := syscall.RawSyscall(unix.SYS_SCHED_YIELD, 0, 0, 0); errno != 0 {
+ return fmt.Errorf("sched_yield: %w", errno)
+ }
+ return nil
+}
+
+// schedRoundtripAffinity reads this thread's CPU affinity mask with
+// sched_getaffinity(pid 0) and then writes the SAME mask back with
+// sched_setaffinity(pid 0). Because we restore exactly what we read, the
+// affinity is left unchanged — the call exists purely to fire the tracepoint.
+func schedRoundtripAffinity() error {
+ var mask unix.CPUSet
+ if err := unix.SchedGetaffinity(0, &mask); err != nil {
+ return fmt.Errorf("sched_getaffinity: %w", err)
+ }
+ // Re-apply the identical mask we just read: a no-op change.
+ if err := unix.SchedSetaffinity(0, &mask); err != nil {
+ return fmt.Errorf("sched_setaffinity (restore same mask): %w", err)
+ }
+ return nil
+}
+
+// schedQueryPolicy reads — but never modifies — this thread's scheduling policy
+// and parameters via three raw syscalls (unix lacks wrappers for all three):
+// sched_getscheduler, sched_getparam, and sched_getattr. Each targets pid 0 (the
+// calling thread) and only fills caller-owned buffers.
+func schedQueryPolicy() error {
+ // sched_getscheduler returns the policy as the syscall return value; a
+ // negative errno would surface as a non-zero errno here.
+ if _, _, errno := syscall.RawSyscall(unix.SYS_SCHED_GETSCHEDULER, 0, 0, 0); errno != 0 {
+ return fmt.Errorf("sched_getscheduler: %w", errno)
+ }
+
+ var param schedParam
+ if _, _, errno := syscall.RawSyscall(
+ unix.SYS_SCHED_GETPARAM, 0, uintptr(unsafe.Pointer(&param)), 0,
+ ); errno != 0 {
+ return fmt.Errorf("sched_getparam: %w", errno)
+ }
+
+ attr := schedAttr{Size: schedAttrSize}
+ if _, _, errno := syscall.RawSyscall6(
+ unix.SYS_SCHED_GETATTR,
+ 0, // pid 0: this thread
+ uintptr(unsafe.Pointer(&attr)), // buffer the kernel fills in
+ schedAttrSize, // advertised buffer size
+ 0, // flags (must be 0)
+ 0, 0,
+ ); errno != 0 {
+ return fmt.Errorf("sched_getattr: %w", errno)
+ }
+ return nil
+}
+
+// schedQueryPriorityRange issues the remaining read-only Sched queries:
+// sched_get_priority_max/min for SCHED_OTHER (the priority range, a constant
+// property of the policy) and sched_rr_get_interval(pid 0) for this thread's
+// round-robin time quantum. None of these change any scheduling state.
+func schedQueryPriorityRange() error {
+ if _, _, errno := syscall.RawSyscall(
+ unix.SYS_SCHED_GET_PRIORITY_MAX, schedSchedOther, 0, 0,
+ ); errno != 0 {
+ return fmt.Errorf("sched_get_priority_max: %w", errno)
+ }
+ if _, _, errno := syscall.RawSyscall(
+ unix.SYS_SCHED_GET_PRIORITY_MIN, schedSchedOther, 0, 0,
+ ); errno != 0 {
+ return fmt.Errorf("sched_get_priority_min: %w", errno)
+ }
+
+ // sched_rr_get_interval writes the round-robin quantum into a timespec. For
+ // non-RR policies the kernel still returns a value (often the base slice),
+ // so this is a harmless read.
+ var ts unix.Timespec
+ if _, _, errno := syscall.RawSyscall(
+ unix.SYS_SCHED_RR_GET_INTERVAL, 0, uintptr(unsafe.Pointer(&ts)), 0,
+ ); errno != 0 {
+ return fmt.Errorf("sched_rr_get_interval: %w", errno)
+ }
+ return nil
+}
diff --git a/cmd/ioworkload/scenarios.go b/cmd/ioworkload/scenarios.go
index 3505984..c11f25d 100644
--- a/cmd/ioworkload/scenarios.go
+++ b/cmd/ioworkload/scenarios.go
@@ -139,6 +139,7 @@ var scenarios = map[string]func() error{
"aio-submit": aioSubmit,
"signals-basic": signalsBasic,
"misc-basic": miscBasic,
+ "sched-basic": schedBasic,
}
func makeTempDir(prefix string) (string, func(), error) {
diff --git a/integrationtests/sched_test.go b/integrationtests/sched_test.go
new file mode 100644
index 0000000..74cd545
--- /dev/null
+++ b/integrationtests/sched_test.go
@@ -0,0 +1,39 @@
+package integrationtests
+
+import "testing"
+
+// schedTraceArgs restricts tracing to the Sched-family syscalls the sched-basic
+// workload issues, so the captured output is dominated by those calls. The
+// tracer names each tracepoint after the underlying kernel syscall, so the
+// names below match the sched_* syscall names verbatim. sched_setscheduler and
+// sched_setattr are intentionally absent: the scenario never invokes them (they
+// would require CAP_SYS_NICE and would disrupt real-time scheduling).
+var schedTraceArgs = []string{
+ "-trace-syscalls",
+ "sched_yield,sched_getaffinity,sched_setaffinity,sched_getscheduler,sched_getparam,sched_getattr,sched_get_priority_max,sched_get_priority_min,sched_rr_get_interval",
+}
+
+// TestSchedBasic verifies the Sched syscall family is traced end-to-end. The
+// sched-basic workload self-targets every call (pid 0 == the calling thread,
+// pinned with LockOSThread): it yields once, reads its CPU affinity mask and
+// re-applies the IDENTICAL mask (a byte-for-byte no-op), then queries — but
+// never modifies — its scheduling policy, parameters, extended attributes,
+// priority range, and round-robin interval. Nothing alters the process's
+// scheduling state, so the calls are safe and unprivileged. Each required
+// syscall must appear as an enter event attributed to the ioworkload process.
+func TestSchedBasic(t *testing.T) {
+ h := newTestHarness(t)
+ result, pid, err := h.RunWithIorArgs("sched-basic", defaultDuration, schedTraceArgs)
+ if err != nil {
+ t.Fatalf("run scenario sched-basic: %v", err)
+ }
+
+ AssertNoUnexpectedPID(t, result, pid)
+ AssertNoUnexpectedComm(t, result, "ioworkload")
+ AssertEventsPresent(t, result, []ExpectedEvent{
+ {Tracepoint: "enter_sched_yield", Comm: "ioworkload", MinCount: 1},
+ {Tracepoint: "enter_sched_getaffinity", Comm: "ioworkload", MinCount: 1},
+ {Tracepoint: "enter_sched_getscheduler", Comm: "ioworkload", MinCount: 1},
+ {Tracepoint: "enter_sched_getparam", Comm: "ioworkload", MinCount: 1},
+ })
+}