diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-30 10:13:17 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-30 10:13:17 +0300 |
| commit | 768e53d90be2d15242266b898023c9c39dacf47d (patch) | |
| tree | 94bc78a2c02979e868836b127b4d6bbb6beb213c /internal/generate/bpfhandler.go | |
| parent | 23579dbdac320884bcdd670e46744b5f4ab45d5d (diff) | |
fix(z10): skip enter-state write for noreturn syscalls
After p10 suppressed the sys_exit_exit/sys_exit_exit_group handlers, the
enter handlers for exit/exit_group still called ior_on_syscall_enter,
which writes a per-tid entry into syscall_enter_state_map. With the exit
handler gone, nothing ever bpf_map_delete_elem'd that entry, so stale
per-tid state accumulated in the bounded (32768) map on hosts churning
many distinct tids and could starve legitimate inserts.
Add ior_on_noreturn_syscall_enter in internal/c/filter.c: it only makes
the sampling decision (ior_should_emit_trace) and deliberately does NOT
record enter-state. The code generator now emits this hook for noreturn
enter handlers (detected via isNoreturnSyscall(syscallName(name))) so the
enter null_event is still emitted while the dead, unreclaimable map write
is skipped. Regenerated generated_tracepoints.c accordingly.
Extend TestGenerateExitNoreturnHandlers with a negative assertion (no
ior_on_syscall_enter for noreturn) and add
TestGenerateReturningSyscallEnterRecordsState as a positive contrast.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Diffstat (limited to 'internal/generate/bpfhandler.go')
| -rw-r--r-- | internal/generate/bpfhandler.go | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/internal/generate/bpfhandler.go b/internal/generate/bpfhandler.go index 3d3db4a..7b8fea0 100644 --- a/internal/generate/bpfhandler.go +++ b/internal/generate/bpfhandler.go @@ -36,7 +36,13 @@ func generateBPFHandler(tp GeneratedTracepoint) string { // between kernel-assigned enter/exit IDs. enterName := enterConstForHandler(f.Name, isEnter) - return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, enterName) + // Noreturn syscalls (exit, exit_group) get a special enter hook that skips + // the syscall_enter_state_map write. Their exit handler is suppressed (see + // codegen.go), so nothing would ever clear a recorded enter-state entry; + // recording it would only leak stale per-tid entries in the bounded map. + noreturn := isEnter && isNoreturnSyscall(syscallName(f.Name)) + + return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, noreturn, enterName) } // enterConstForHandler returns the C #define constant name for the @@ -51,7 +57,7 @@ func enterConstForHandler(name string, isEnter bool) string { return strings.Replace(upper, "SYS_EXIT_", "SYS_ENTER_", 1) } -func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra string, isEnter bool, enterName string) string { +func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra string, isEnter, noreturn bool, enterName string) string { var b strings.Builder fmt.Fprintf(&b, "/// %s is a struct %s\n", name, comment) fmt.Fprintf(&b, "SEC(\"tracepoint/syscalls/%s\")\n", name) @@ -60,7 +66,15 @@ func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra b.WriteString(" if (filter(&pid, &tid))\n") b.WriteString(" return 0;\n") b.WriteString("\n") - if isEnter { + if isEnter && noreturn { + // Noreturn enter: only the sampling decision, no enter-state write. The + // syscall never returns, so its exit handler is suppressed and nothing + // would ever look up or delete a recorded enter-state entry. Skipping + // the write avoids leaking stale per-tid entries in the bounded + // syscall_enter_state_map; the enter null_event is still emitted below. + fmt.Fprintf(&b, " if (!ior_on_noreturn_syscall_enter(%s))\n", strings.ToUpper(name)) + b.WriteString(" return 0;\n") + } else if isEnter { fmt.Fprintf(&b, " if (!ior_on_syscall_enter(tid, %s))\n", strings.ToUpper(name)) b.WriteString(" return 0;\n") } else { |
