diff options
Diffstat (limited to 'internal/generate')
| -rw-r--r-- | internal/generate/bpfhandler.go | 9 | ||||
| -rw-r--r-- | internal/generate/codegen.go | 27 | ||||
| -rw-r--r-- | internal/generate/codegen_test.go | 61 |
3 files changed, 74 insertions, 23 deletions
diff --git a/internal/generate/bpfhandler.go b/internal/generate/bpfhandler.go index 1dff4d6..071e11d 100644 --- a/internal/generate/bpfhandler.go +++ b/internal/generate/bpfhandler.go @@ -36,10 +36,11 @@ func generateBPFHandler(tp GeneratedTracepoint) string { // between kernel-assigned enter/exit IDs. enterName := enterConstForHandler(f.Name, isEnter) - // Noreturn syscalls (exit, exit_group) get a special enter hook that skips - // the syscall_enter_state_map write. Their exit handler is suppressed (see - // codegen.go), so nothing would ever clear a recorded enter-state entry; - // recording it would only leak stale per-tid entries in the bounded map. + // Noreturn syscalls (exit, exit_group, rt_sigreturn) get a special enter + // hook that skips the syscall_enter_state_map write. Their exit handler is + // suppressed (see codegen.go), so nothing would ever clear a recorded + // enter-state entry; recording it would only leak stale per-tid entries in + // the bounded map. noreturn := isEnter && isNoreturnSyscall(syscallName(f.Name)) return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, noreturn, enterName) diff --git a/internal/generate/codegen.go b/internal/generate/codegen.go index 339dc1f..5b9ea7c 100644 --- a/internal/generate/codegen.go +++ b/internal/generate/codegen.go @@ -126,9 +126,10 @@ func classifySyscall(sc Syscall) ([]GeneratedTracepoint, string) { result = append(result, GeneratedTracepoint{Format: sc.Enter, Classification: enterClass}) } // Emit the exit handler only for syscalls that can actually return. - // Noreturn syscalls (exit, exit_group) never return to userspace, so their - // sys_exit tracepoint never fires; emitting a handler would be dead code in - // the generated BPF program. We still emit their enter handler above. + // Noreturn syscalls (exit, exit_group, rt_sigreturn) never return to the + // syscall site, so their sys_exit tracepoint never fires; emitting a handler + // would be dead code in the generated BPF program. We still emit their enter + // handler above. if sc.Exit != nil && !isNoreturnSyscall(sc.Name) { result = append(result, GeneratedTracepoint{Format: sc.Exit, Classification: exitClass}) } @@ -150,13 +151,25 @@ func isEnterRejected(kind TracepointKind) bool { return !lookupKind(kind).enterAccepted } -// noreturnSyscalls lists syscalls that never return control to userspace. +// noreturnSyscalls lists syscalls that never return to the syscall site. // Their sys_exit tracepoint can never fire, so the generator suppresses the // matching exit handler (see classifySyscall) to avoid dead code in the -// generated BPF program. +// generated BPF program, and the enter handler uses the noreturn enter hook +// that skips the (otherwise un-reclaimable) syscall_enter_state_map write. +// +// - exit / exit_group terminate the thread/process; control never returns. +// - rt_sigreturn restores the pre-signal execution context off the signal +// stack frame and resumes the interrupted instruction. It does NOT return +// to the instruction after the rt_sigreturn syscall, so the kernel never +// fires sys_exit_rt_sigreturn. Verified empirically against +// /sys/kernel/tracing: sys_enter_rt_sigreturn fires once per signal-handler +// return while sys_exit_rt_sigreturn never does. The man page (sigreturn(2)) +// states plainly that "sigreturn() never returns". rt_sigreturn is emitted +// by the signal trampoline, not called directly by applications. var noreturnSyscalls = map[string]bool{ - "exit": true, - "exit_group": true, + "exit": true, + "exit_group": true, + "rt_sigreturn": true, } // isNoreturnSyscall reports whether the named syscall never returns and thus diff --git a/internal/generate/codegen_test.go b/internal/generate/codegen_test.go index 9545447..474b3d5 100644 --- a/internal/generate/codegen_test.go +++ b/internal/generate/codegen_test.go @@ -1161,15 +1161,47 @@ func TestClassifyRetSyncUnclassified(t *testing.T) { // TestSyncIsNotNoreturn locks in that bare sync(2) is NOT treated as a noreturn // syscall: it is void but returns control to userspace, so its exit handler must -// be generated (see TestGenerateSyncHandler). Only exit(2)/exit_group(2) are -// noreturn. This guards against sync accidentally being added to the noreturn -// suppression list, which would silently drop its exit events. +// be generated (see TestGenerateSyncHandler). Only exit(2)/exit_group(2)/ +// rt_sigreturn(2) are noreturn. This guards against sync accidentally being added +// to the noreturn suppression list, which would silently drop its exit events. func TestSyncIsNotNoreturn(t *testing.T) { if isNoreturnSyscall("sync") { t.Error("sync must not be noreturn: it is void but DOES return, so its exit handler must be emitted") } } +// TestRtSigreturnIsNoreturn locks in that rt_sigreturn(2) is treated as a +// noreturn syscall. rt_sigreturn restores the pre-signal execution context off +// the signal-stack frame and resumes the interrupted instruction; it does NOT +// return to the instruction after the syscall, so the kernel never fires +// sys_exit_rt_sigreturn (verified empirically against /sys/kernel/tracing: +// sys_enter_rt_sigreturn fires once per signal-handler return, sys_exit never +// does). man sigreturn(2): "sigreturn() never returns". Suppressing the dead +// exit handler also stops the bounded syscall_enter_state_map from leaking a +// per-tid entry on every signal-handler return. +func TestRtSigreturnIsNoreturn(t *testing.T) { + if !isNoreturnSyscall("rt_sigreturn") { + t.Error("rt_sigreturn must be noreturn: it never returns to the syscall site, so sys_exit_rt_sigreturn never fires and its exit handler must be suppressed") + } +} + +// TestRtSigSiblingsAreNotNoreturn is the contrast to TestRtSigreturnIsNoreturn: +// every OTHER rt_sig* syscall returns normally to its caller, so it must NOT be +// in the noreturn set or its exit events (and durations) would be silently +// dropped. Only rt_sigreturn is the kernel/libc signal-trampoline return path. +func TestRtSigSiblingsAreNotNoreturn(t *testing.T) { + siblings := []string{ + "rt_sigaction", "rt_sigprocmask", "rt_sigpending", + "rt_sigsuspend", "rt_sigtimedwait", "rt_sigqueueinfo", + "rt_tgsigqueueinfo", + } + for _, s := range siblings { + if isNoreturnSyscall(s) { + t.Errorf("%s must not be noreturn: it returns normally, so its exit handler must be emitted", s) + } + } +} + func TestGenerateIoUringEnterHandler(t *testing.T) { output := generateFromPair(t, FormatIoUringEnter, FormatExitIoUringEnter) @@ -2059,17 +2091,22 @@ func TestGenerateFallbackNullHandler(t *testing.T) { requireContains(t, output, "ev->event_type = EXIT_RET_EVENT;") } -// TestGenerateExitNoreturnHandlers locks in how the noreturn process-exit -// syscalls are generated. Per exit(2)/exit_group(2): both take a single -// `int status` argument and never return. ior classifies them as KindNull -// (FamilyProcess), so: +// TestGenerateExitNoreturnHandlers locks in how the noreturn syscalls are +// generated. exit(2)/exit_group(2) take a single `int status` argument and +// never return (they terminate the thread/process). rt_sigreturn(2) takes no +// meaningful arguments and never returns to the syscall site: it restores the +// pre-signal execution context off the signal-stack frame and resumes the +// interrupted instruction (man sigreturn(2): "sigreturn() never returns"; +// verified empirically against /sys/kernel/tracing where sys_enter_rt_sigreturn +// fires once per signal-handler return while sys_exit_rt_sigreturn never does). +// All three are KindNull, so: // - The enter handler emits a struct null_event and intentionally does NOT -// capture the int status arg (it is not an I/O resource like an fd/path). -// - The kernel still exposes sys_exit_{exit,exit_group} tracepoints, but -// those handlers can never fire at runtime because the syscall does not -// return. The generator suppresses the dead exit handlers. +// capture any arg (status/whatever is not an I/O resource like an fd/path). +// - The kernel still exposes the sys_exit_<name> tracepoints, but those +// handlers can never fire at runtime because the syscall does not return. +// The generator suppresses the dead exit handlers. func TestGenerateExitNoreturnHandlers(t *testing.T) { - for _, syscall := range []string{"exit", "exit_group"} { + for _, syscall := range []string{"exit", "exit_group", "rt_sigreturn"} { t.Run(syscall, func(t *testing.T) { output := GenerateTracepointsC(mustParseAll(t, syntheticPair(syscall))) |
