summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/c/filter.c15
-rw-r--r--internal/c/generated_tracepoints.c4
-rw-r--r--internal/generate/bpfhandler.go20
-rw-r--r--internal/generate/codegen_test.go41
4 files changed, 75 insertions, 5 deletions
diff --git a/internal/c/filter.c b/internal/c/filter.c
index 5585c12..48907d8 100644
--- a/internal/c/filter.c
+++ b/internal/c/filter.c
@@ -78,6 +78,21 @@ static __always_inline int ior_on_syscall_enter(__u32 tid, __u32 enter_trace_id)
return state.emit_event != 0;
}
+// ior_on_noreturn_syscall_enter is the enter hook for noreturn syscalls
+// (exit, exit_group). Unlike ior_on_syscall_enter it deliberately does NOT
+// write a per-tid entry into syscall_enter_state_map. A noreturn syscall never
+// returns to userspace, so its sys_exit tracepoint never fires and the matching
+// exit handler is suppressed by the generator (see internal/generate/codegen.go
+// isNoreturnSyscall). With no exit handler, nothing would ever look up or
+// bpf_map_delete_elem that enter-state entry, so recording it would only leave
+// stale per-tid entries crowding the bounded (32768) map on hosts churning many
+// distinct tids. We still honor the sampling decision so the enter null_event is
+// emitted (or dropped) exactly as a normal syscall's enter would be, but without
+// the dead, unreclaimable map write.
+static __always_inline int ior_on_noreturn_syscall_enter(__u32 enter_trace_id) {
+ return ior_should_emit_trace(enter_trace_id);
+}
+
static __always_inline int ior_on_syscall_exit(__u32 tid, __u32 enter_trace_id, __s64 ret) {
__u64 now;
__u64 duration = 0;
diff --git a/internal/c/generated_tracepoints.c b/internal/c/generated_tracepoints.c
index 51e30be..e38e1af 100644
--- a/internal/c/generated_tracepoints.c
+++ b/internal/c/generated_tracepoints.c
@@ -18753,7 +18753,7 @@ int handle_sys_enter_exit(struct syscall_trace_enter *ctx) {
if (filter(&pid, &tid))
return 0;
- if (!ior_on_syscall_enter(tid, SYS_ENTER_EXIT))
+ if (!ior_on_noreturn_syscall_enter(SYS_ENTER_EXIT))
return 0;
struct null_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct null_event), 0);
@@ -18777,7 +18777,7 @@ int handle_sys_enter_exit_group(struct syscall_trace_enter *ctx) {
if (filter(&pid, &tid))
return 0;
- if (!ior_on_syscall_enter(tid, SYS_ENTER_EXIT_GROUP))
+ if (!ior_on_noreturn_syscall_enter(SYS_ENTER_EXIT_GROUP))
return 0;
struct null_event *ev = bpf_ringbuf_reserve(&event_map, sizeof(struct null_event), 0);
diff --git a/internal/generate/bpfhandler.go b/internal/generate/bpfhandler.go
index 3d3db4a..7b8fea0 100644
--- a/internal/generate/bpfhandler.go
+++ b/internal/generate/bpfhandler.go
@@ -36,7 +36,13 @@ func generateBPFHandler(tp GeneratedTracepoint) string {
// between kernel-assigned enter/exit IDs.
enterName := enterConstForHandler(f.Name, isEnter)
- return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, enterName)
+ // Noreturn syscalls (exit, exit_group) get a special enter hook that skips
+ // the syscall_enter_state_map write. Their exit handler is suppressed (see
+ // codegen.go), so nothing would ever clear a recorded enter-state entry;
+ // recording it would only leak stale per-tid entries in the bounded map.
+ noreturn := isEnter && isNoreturnSyscall(syscallName(f.Name))
+
+ return renderHandler(f.Name, ctxStruct, eventStruct, comment, eventTypeConst, extra, isEnter, noreturn, enterName)
}
// enterConstForHandler returns the C #define constant name for the
@@ -51,7 +57,7 @@ func enterConstForHandler(name string, isEnter bool) string {
return strings.Replace(upper, "SYS_EXIT_", "SYS_ENTER_", 1)
}
-func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra string, isEnter bool, enterName string) string {
+func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra string, isEnter, noreturn bool, enterName string) string {
var b strings.Builder
fmt.Fprintf(&b, "/// %s is a struct %s\n", name, comment)
fmt.Fprintf(&b, "SEC(\"tracepoint/syscalls/%s\")\n", name)
@@ -60,7 +66,15 @@ func renderHandler(name, ctxStruct, eventStruct, comment, eventTypeConst, extra
b.WriteString(" if (filter(&pid, &tid))\n")
b.WriteString(" return 0;\n")
b.WriteString("\n")
- if isEnter {
+ if isEnter && noreturn {
+ // Noreturn enter: only the sampling decision, no enter-state write. The
+ // syscall never returns, so its exit handler is suppressed and nothing
+ // would ever look up or delete a recorded enter-state entry. Skipping
+ // the write avoids leaking stale per-tid entries in the bounded
+ // syscall_enter_state_map; the enter null_event is still emitted below.
+ fmt.Fprintf(&b, " if (!ior_on_noreturn_syscall_enter(%s))\n", strings.ToUpper(name))
+ b.WriteString(" return 0;\n")
+ } else if isEnter {
fmt.Fprintf(&b, " if (!ior_on_syscall_enter(tid, %s))\n", strings.ToUpper(name))
b.WriteString(" return 0;\n")
} else {
diff --git a/internal/generate/codegen_test.go b/internal/generate/codegen_test.go
index 7e0e122..7f9c223 100644
--- a/internal/generate/codegen_test.go
+++ b/internal/generate/codegen_test.go
@@ -1409,10 +1409,51 @@ func TestGenerateExitNoreturnHandlers(t *testing.T) {
if strings.Contains(enterBody, "ctx->args[") {
t.Errorf("%s: enter handler unexpectedly captures an arg; the int status must be ignored", syscall)
}
+
+ // Regression guard (task z10): the noreturn enter handler must emit
+ // the enter null_event WITHOUT recording enter-state. Because the
+ // exit handler is suppressed, nothing would ever look up or delete a
+ // syscall_enter_state_map entry, so recording one would leak a stale
+ // per-tid entry in the bounded map. The handler must therefore call
+ // the dedicated ior_on_noreturn_syscall_enter hook (which only makes
+ // the sampling decision) and must NOT call the state-recording
+ // ior_on_syscall_enter that normal returning syscalls use.
+ requireContains(t, output, "ior_on_noreturn_syscall_enter("+strings.ToUpper("sys_enter_"+syscall)+")")
+ if strings.Contains(enterBody, "ior_on_syscall_enter(") {
+ t.Errorf("%s: noreturn enter handler must not record enter-state "+
+ "(found ior_on_syscall_enter, which writes syscall_enter_state_map)", syscall)
+ }
})
}
}
+// TestGenerateReturningSyscallEnterRecordsState is the positive contrast to
+// TestGenerateExitNoreturnHandlers: a normal returning syscall's enter handler
+// DOES record enter-state via ior_on_syscall_enter (so its later exit handler
+// can pair durations and delete the entry), and must NOT use the noreturn hook.
+func TestGenerateReturningSyscallEnterRecordsState(t *testing.T) {
+ syscall := "sched_get_priority_min" // a returning KindNull syscall
+ output := GenerateTracepointsC(mustParseAll(t, syntheticPair(syscall)))
+
+ enterSec := `SEC("tracepoint/syscalls/sys_enter_` + syscall + `")`
+ enterStart := strings.Index(output, enterSec)
+ if enterStart < 0 {
+ t.Fatalf("%s: enter handler not found", syscall)
+ }
+ enterEnd := strings.Index(output[enterStart+len(enterSec):], `SEC("tracepoint/`)
+ enterBody := output[enterStart:]
+ if enterEnd >= 0 {
+ enterBody = output[enterStart : enterStart+len(enterSec)+enterEnd]
+ }
+
+ if !strings.Contains(enterBody, "ior_on_syscall_enter(tid, "+strings.ToUpper("sys_enter_"+syscall)+")") {
+ t.Errorf("%s: returning syscall enter handler must record enter-state via ior_on_syscall_enter", syscall)
+ }
+ if strings.Contains(enterBody, "ior_on_noreturn_syscall_enter(") {
+ t.Errorf("%s: returning syscall enter handler must not use the noreturn hook", syscall)
+ }
+}
+
// TestGenerateSchedGetPriorityMinHandler locks in how sched_get_priority_min
// (and its identical sibling sched_get_priority_max) are generated. Per
// sched_get_priority_min(2): `int sched_get_priority_min(int policy)` takes a