diff options
| -rw-r--r-- | internal/c/filter.c | 15 | ||||
| -rw-r--r-- | internal/c/generated_tracepoints.c | 4 | ||||
| -rw-r--r-- | internal/generate/bpfhandler.go | 20 | ||||
| -rw-r--r-- | internal/generate/codegen_test.go | 41 |
4 files changed, 75 insertions, 5 deletions
diff --git a/internal/c/filter.c b/internal/c/filter.c index 5585c12..48907d8 100644 --- a/internal/c/filter.c +++ b/internal/c/filter.c @@ -78,6 +78,21 @@ static __always_inline int ior_on_syscall_enter(__u32 tid, __u32 enter_trace_id) return state.emit_event != 0; } +// ior_on_noreturn_syscall_enter is the enter hook for noreturn syscalls +// (exit, exit_group). Unlike ior_on_syscall_enter it deliberately does NOT +// write a per-tid entry into syscall_enter_state_map. A noreturn syscall never +// returns to userspace, so its sys_exit tracepoint never fires and the matching +// exit handler is suppressed by the generator (see internal/generate/codegen.go +// isNoreturnSyscall). With no exit handler, nothing would ever look up or +// bpf_map_delete_elem that enter-state entry, so recording it would only leave +// stale per-tid entries crowding the bounded (32768) map on hosts churning many +// distinct tids. We still honor the sampling decision so the enter null_event is +// emitted (or dropped) exactly as a normal syscall's enter would be, but without +// the dead, unreclaimable map write. +static __always_inline int ior_on_noreturn_syscall_enter(__u32 enter_trace_id) { + return ior_should_emit_trace(enter_trace_id); +} + static __always_inline int ior_on_syscall_exit(__u32 tid, __u32 enter_trace_id, __s64 ret) { __u64 now; __u64 duration = 0; diff --git a/internal/c/generated_tracepoints.c b/internal/c/generated_tracepoints.c index 51e30be..e38e1af 100644 --- a/internal/c/generated_tracepoints.c +++ b/internal/c/generated_tracepoints.c @@ -18753,7 +18753,7 @@ int handle_sys_enter_exit(struct syscall_trace_enter *ctx) { if (filter(&pid, &tid)) return 0; - if (!ior_on_syscall_enter(tid, SYS_ENTER_EXIT)) + if (!ior_on_noreturn_syscall_enter(SYS_ENTER_EXIT)) return 0; struct null_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct null_event), 0); @@ -18777,7 +18777,7 @@ int handle_sys_enter_exit_group(struct syscall_trace_enter *ctx) { if (filter(&pid, &tid)) return 0; - if (!ior_on_syscall_enter(tid, SYS_ENTER_EXIT_GROUP)) + if (!ior_on_noreturn_syscall_enter(SYS_ENTER_EXIT_GROUP)) return 0; struct null_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct null_event), 0); diff --git a/internal/generate/bpfhandler.go b/internal/generate/bpfhandler.go index 3d3db4a..7b8fea0 100644 --- a/internal/generate/bpfhandler.go +++ b/internal/generate/bpfhandler.go @@ -36,7 +36,13 @@ func generateBPFHandler(tp GeneratedTracepoint) string { // between kernel-assigned enter/exit IDs. enterName := enterConstForHandler(f.Name, isEnter) - return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, enterName) + // Noreturn syscalls (exit, exit_group) get a special enter hook that skips + // the syscall_enter_state_map write. Their exit handler is suppressed (see + // codegen.go), so nothing would ever clear a recorded enter-state entry; + // recording it would only leak stale per-tid entries in the bounded map. + noreturn := isEnter && isNoreturnSyscall(syscallName(f.Name)) + + return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, noreturn, enterName) } // enterConstForHandler returns the C #define constant name for the @@ -51,7 +57,7 @@ func enterConstForHandler(name string, isEnter bool) string { return strings.Replace(upper, "SYS_EXIT_", "SYS_ENTER_", 1) } -func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra string, isEnter bool, enterName string) string { +func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra string, isEnter, noreturn bool, enterName string) string { var b strings.Builder fmt.Fprintf(&b, "/// %s is a struct %s\n", name, comment) fmt.Fprintf(&b, "SEC(\"tracepoint/syscalls/%s\")\n", name) @@ -60,7 +66,15 @@ func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra b.WriteString(" if (filter(&pid, &tid))\n") b.WriteString(" return 0;\n") b.WriteString("\n") - if isEnter { + if isEnter && noreturn { + // Noreturn enter: only the sampling decision, no enter-state write. The + // syscall never returns, so its exit handler is suppressed and nothing + // would ever look up or delete a recorded enter-state entry. Skipping + // the write avoids leaking stale per-tid entries in the bounded + // syscall_enter_state_map; the enter null_event is still emitted below. + fmt.Fprintf(&b, " if (!ior_on_noreturn_syscall_enter(%s))\n", strings.ToUpper(name)) + b.WriteString(" return 0;\n") + } else if isEnter { fmt.Fprintf(&b, " if (!ior_on_syscall_enter(tid, %s))\n", strings.ToUpper(name)) b.WriteString(" return 0;\n") } else { diff --git a/internal/generate/codegen_test.go b/internal/generate/codegen_test.go index 7e0e122..7f9c223 100644 --- a/internal/generate/codegen_test.go +++ b/internal/generate/codegen_test.go @@ -1409,10 +1409,51 @@ func TestGenerateExitNoreturnHandlers(t *testing.T) { if strings.Contains(enterBody, "ctx->args[") { t.Errorf("%s: enter handler unexpectedly captures an arg; the int status must be ignored", syscall) } + + // Regression guard (task z10): the noreturn enter handler must emit + // the enter null_event WITHOUT recording enter-state. Because the + // exit handler is suppressed, nothing would ever look up or delete a + // syscall_enter_state_map entry, so recording one would leak a stale + // per-tid entry in the bounded map. The handler must therefore call + // the dedicated ior_on_noreturn_syscall_enter hook (which only makes + // the sampling decision) and must NOT call the state-recording + // ior_on_syscall_enter that normal returning syscalls use. + requireContains(t, output, "ior_on_noreturn_syscall_enter("+strings.ToUpper("sys_enter_"+syscall)+")") + if strings.Contains(enterBody, "ior_on_syscall_enter(") { + t.Errorf("%s: noreturn enter handler must not record enter-state "+ + "(found ior_on_syscall_enter, which writes syscall_enter_state_map)", syscall) + } }) } } +// TestGenerateReturningSyscallEnterRecordsState is the positive contrast to +// TestGenerateExitNoreturnHandlers: a normal returning syscall's enter handler +// DOES record enter-state via ior_on_syscall_enter (so its later exit handler +// can pair durations and delete the entry), and must NOT use the noreturn hook. +func TestGenerateReturningSyscallEnterRecordsState(t *testing.T) { + syscall := "sched_get_priority_min" // a returning KindNull syscall + output := GenerateTracepointsC(mustParseAll(t, syntheticPair(syscall))) + + enterSec := `SEC("tracepoint/syscalls/sys_enter_` + syscall + `")` + enterStart := strings.Index(output, enterSec) + if enterStart < 0 { + t.Fatalf("%s: enter handler not found", syscall) + } + enterEnd := strings.Index(output[enterStart+len(enterSec):], `SEC("tracepoint/`) + enterBody := output[enterStart:] + if enterEnd >= 0 { + enterBody = output[enterStart : enterStart+len(enterSec)+enterEnd] + } + + if !strings.Contains(enterBody, "ior_on_syscall_enter(tid, "+strings.ToUpper("sys_enter_"+syscall)+")") { + t.Errorf("%s: returning syscall enter handler must record enter-state via ior_on_syscall_enter", syscall) + } + if strings.Contains(enterBody, "ior_on_noreturn_syscall_enter(") { + t.Errorf("%s: returning syscall enter handler must not use the noreturn hook", syscall) + } +} + // TestGenerateSchedGetPriorityMinHandler locks in how sched_get_priority_min // (and its identical sibling sched_get_priority_max) are generated. Per // sched_get_priority_min(2): `int sched_get_priority_min(int policy)` takes a |
