summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/ioworkload/scenario_retbytes.go59
-rw-r--r--docs/syscall-tracing-plan.md10
-rw-r--r--integrationtests/retbytes_test.go8
-rw-r--r--internal/c/generated_tracepoints.c8
-rw-r--r--internal/c/generated_tracepoints_result.txt4
-rw-r--r--internal/generate/classify.go2
-rw-r--r--internal/generate/classify_test.go18
-rw-r--r--internal/generate/retclassify_test.go17
8 files changed, 109 insertions, 17 deletions
diff --git a/cmd/ioworkload/scenario_retbytes.go b/cmd/ioworkload/scenario_retbytes.go
index fa0b677..d9b2984 100644
--- a/cmd/ioworkload/scenario_retbytes.go
+++ b/cmd/ioworkload/scenario_retbytes.go
@@ -12,14 +12,24 @@ import (
const (
sysProcessVMReadv = 310
sysProcessVMWritev = 311
+ sysSendmmsg = 307
retbytesPayloadLen = 18
)
+type mmsghdr struct {
+ hdr syscall.Msghdr
+ len uint32
+ _ uint32
+}
+
// retbytesPhaseA exercises byte-classified syscalls that use generic ret_event exits.
func retbytesPhaseA() error {
if err := retbytesSocketIO(); err != nil {
return err
}
+ if err := retbytesBatchSocketIO(); err != nil {
+ return err
+ }
if err := retbytesSendfile(); err != nil {
return err
}
@@ -68,6 +78,44 @@ func retbytesSocketIO() error {
return nil
}
+func retbytesBatchSocketIO() error {
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
+ if err != nil {
+ return fmt.Errorf("batch socketpair: %w", err)
+ }
+ defer syscall.Close(fds[0])
+ defer syscall.Close(fds[1])
+
+ payloadA := []byte("batch-one")
+ payloadB := []byte("batch-two")
+ sendMsgs := mmsgSlice(payloadA, payloadB)
+ n, _, errno := syscall.Syscall6(sysSendmmsg, uintptr(fds[0]), uintptr(unsafe.Pointer(&sendMsgs[0])), uintptr(len(sendMsgs)), 0, 0, 0)
+ if errno != 0 {
+ return fmt.Errorf("sendmmsg: %w", errno)
+ }
+ if n != uintptr(len(sendMsgs)) {
+ return fmt.Errorf("sendmmsg sent %d messages, want %d", n, len(sendMsgs))
+ }
+ runtime.KeepAlive(payloadA)
+ runtime.KeepAlive(payloadB)
+ runtime.KeepAlive(sendMsgs)
+
+ recvA := make([]byte, len(payloadA))
+ recvB := make([]byte, len(payloadB))
+ recvMsgs := mmsgSlice(recvA, recvB)
+ n, _, errno = syscall.Syscall6(syscall.SYS_RECVMMSG, uintptr(fds[1]), uintptr(unsafe.Pointer(&recvMsgs[0])), uintptr(len(recvMsgs)), 0, 0, 0)
+ if errno != 0 {
+ return fmt.Errorf("recvmmsg: %w", errno)
+ }
+ if n != uintptr(len(recvMsgs)) {
+ return fmt.Errorf("recvmmsg received %d messages, want %d", n, len(recvMsgs))
+ }
+ runtime.KeepAlive(recvA)
+ runtime.KeepAlive(recvB)
+ runtime.KeepAlive(recvMsgs)
+ return nil
+}
+
func retbytesSendfile() error {
dir, cleanup, err := makeTempDir("retbytes-sendfile")
if err != nil {
@@ -202,6 +250,17 @@ func openPayloadFile(path string) (int, error) {
return fd, nil
}
+func mmsgSlice(bufs ...[]byte) []mmsghdr {
+ msgs := make([]mmsghdr, len(bufs))
+ iovs := make([]syscall.Iovec, len(bufs))
+ for i := range bufs {
+ iovs[i] = syscall.Iovec{Base: &bufs[i][0], Len: uint64(len(bufs[i]))}
+ msgs[i].hdr.Iov = &iovs[i]
+ msgs[i].hdr.Iovlen = 1
+ }
+ return msgs
+}
+
func processVMReadv(pid int, local, remote []byte) (int, error) {
localIov := syscall.Iovec{Base: &local[0], Len: uint64(len(local))}
remoteIov := syscall.Iovec{Base: &remote[0], Len: uint64(len(remote))}
diff --git a/docs/syscall-tracing-plan.md b/docs/syscall-tracing-plan.md
index 18ac033..ecf11b5 100644
--- a/docs/syscall-tracing-plan.md
+++ b/docs/syscall-tracing-plan.md
@@ -98,15 +98,15 @@ These are conspicuously absent from ior (which already has read/write but not so
| `setsockopt` | yes | no | yes (optlen) | extend `KindFd` | level/optname | P2 |
| `sendto` | yes | no | **yes** (len) | `KindRet` + `WriteClassified` (already in `retClassifications`!) | sockaddr family | P1 |
| `sendmsg` | yes | no | **yes** (iov total) | `KindRet` + `WriteClassified` (already mapped) | flags | P1 |
-| `sendmmsg` | yes | no | **yes** (sum of msgs) | `KindRet` + `WriteClassified` (already mapped) | vlen, flags | P1 |
+| `sendmmsg` | yes | no | **yes** (sum of msgs) | defer byte classification until payload bytes can be computed from message vectors | vlen, flags | P1 |
| `recvfrom` | yes | no | **yes** (len) | `KindRet` + `ReadClassified` (already mapped) | sockaddr family | P1 |
| `recvmsg` | yes | no | **yes** (iov total) | `KindRet` + `ReadClassified` (already mapped) | flags | P1 |
-| `recvmmsg` | yes | no | **yes** (sum of msgs) | `KindRet` + `ReadClassified` (already mapped) | vlen, flags, timeout | P1 |
+| `recvmmsg` | yes | no | **yes** (sum of msgs) | defer byte classification until payload bytes can be computed from message vectors | vlen, flags, timeout | P1 |
| `sendfile64` | yes (both in/out fd) | no | **yes** (count) | `KindRet` + `TransferClassified` (already mapped) | both fds | P1 |
| `splice` | yes (both fds) | no | **yes** (len) | `KindRet` + `TransferClassified` (already mapped) | both fds, flags | P1 |
| `tee` | yes (both fds) | no | **yes** (len) | `KindRet` + `TransferClassified` (already mapped) | both fds, flags | P1 |
-> Note: `RetClassification` already lists the recv/send/sendfile/splice/tee/process_vm_* families. The classifier just refuses them today because `shouldIgnore`/`exactIgnores` short-circuits earlier in `classify.go`. **Removing those ignores is the cheapest possible win** — bytes accounting drops in for free.
+> Note: `RetClassification` covers single-message recv/send, sendfile/splice/tee, and process_vm_* families. Batched `sendmmsg`/`recvmmsg` are not safe to classify through generic return-value byte accounting because their return value is message count, not payload bytes.
### 3.2 IPC — pipes, eventfd, signalfd, message queues, shared mem, semaphores
@@ -355,7 +355,7 @@ Out of the ~230 currently-ignored syscalls, **fd as argument** appears in:
- Security: `landlock_add_rule` (ruleset_fd), `landlock_restrict_self`, `kexec_file_load`
- Mount: `move_mount` (two), `fsmount` (fsfd)
- Perf: `perf_event_open` (group_fd)
-- Already-mapped Ret-classified bytes-carrying entries that block on `shouldIgnore` only: all send/recv variants, `sendfile64`, `splice`, `tee`, `vmsplice` (already traced), `process_vm_readv`, `process_vm_writev`
+- Already-mapped Ret-classified bytes-carrying entries that block on `shouldIgnore` only: single-message send/recv variants, `sendfile64`, `splice`, `tee`, `vmsplice` (already traced), `process_vm_readv`, `process_vm_writev`. `sendmmsg`/`recvmmsg` need message-vector byte accounting before they can join this set.
### 4.2 Which syscalls return an fd? (Summary)
@@ -442,7 +442,7 @@ Tracing `futex`, `clock_gettime`, `epoll_wait`, `nanosleep`, and `read`/`write`
A pragmatic, low-risk order of work — each step ships independent value:
**Phase A — "free wins"** (no new kind needed, just unblock ignores)
-- Network read/write bytes: enable `sendto`/`sendmsg`/`sendmmsg`/`recvfrom`/`recvmsg`/`recvmmsg`, `sendfile64`, `splice`, `tee`, `process_vm_readv`, `process_vm_writev`. These already appear in `retClassifications`; only `shouldIgnore` blocks them. Need a `KindRet` exit handler and minimal enter wiring.
+- Network read/write bytes: enable `sendto`/`sendmsg`/`recvfrom`/`recvmsg`, `sendfile64`, `splice`, `tee`, `process_vm_readv`, `process_vm_writev`. These can use `retClassifications` directly because their return values are payload bytes. Defer `sendmmsg`/`recvmmsg` byte totals until enter-state/iovec accounting can compute payload bytes rather than message counts.
**Phase B — high-impact families** (new kinds, but small set, very visible payoff)
- `socket`/`socketpair`/`accept[4]`/`bind`/`connect`/`listen`/`shutdown` + getsock*/setsock*
diff --git a/integrationtests/retbytes_test.go b/integrationtests/retbytes_test.go
index 2e2ea1d..c6f06d8 100644
--- a/integrationtests/retbytes_test.go
+++ b/integrationtests/retbytes_test.go
@@ -10,6 +10,8 @@ func TestRetbytesPhaseA(t *testing.T) {
{Tracepoint: "enter_recvfrom", Comm: "ioworkload", MinCount: 1},
{Tracepoint: "enter_sendmsg", Comm: "ioworkload", MinCount: 1},
{Tracepoint: "enter_recvmsg", Comm: "ioworkload", MinCount: 1},
+ {Tracepoint: "enter_sendmmsg", Comm: "ioworkload", MinCount: 1},
+ {Tracepoint: "enter_recvmmsg", Comm: "ioworkload", MinCount: 1},
{Tracepoint: "enter_sendfile64", Comm: "ioworkload", MinCount: 1},
{Tracepoint: "enter_splice", Comm: "ioworkload", MinCount: 1},
{Tracepoint: "enter_tee", Comm: "ioworkload", MinCount: 1},
@@ -32,4 +34,10 @@ func TestRetbytesPhaseA(t *testing.T) {
assertEventBytesAtLeast(t, result, exp, payloadLen)
assertEventDurationPositive(t, result, exp)
}
+
+ for _, tracepoint := range []string{"enter_sendmmsg", "enter_recvmmsg"} {
+ exp := ExpectedEvent{Tracepoint: tracepoint, Comm: "ioworkload"}
+ assertEventBytesEqual(t, result, exp, 0)
+ assertEventDurationPositive(t, result, exp)
+ }
}
diff --git a/internal/c/generated_tracepoints.c b/internal/c/generated_tracepoints.c
index be6606d..980f91d 100644
--- a/internal/c/generated_tracepoints.c
+++ b/internal/c/generated_tracepoints.c
@@ -1431,7 +1431,7 @@ int handle_sys_enter_sendmmsg(struct syscall_trace_enter *ctx) {
return 0;
}
-/// sys_exit_sendmmsg is a struct ret_event (WRITE_CLASSIFIED)
+/// sys_exit_sendmmsg is a struct ret_event (UNCLASSIFIED)
SEC("tracepoint/syscalls/sys_exit_sendmmsg")
int handle_sys_exit_sendmmsg(struct syscall_trace_exit *ctx) {
__u32 pid, tid;
@@ -1448,7 +1448,7 @@ int handle_sys_exit_sendmmsg(struct syscall_trace_exit *ctx) {
ev->tid = tid;
ev->time = bpf_ktime_get_boot_ns();
ev->ret = ctx->ret;
- ev->ret_type = WRITE_CLASSIFIED;
+ ev->ret_type = UNCLASSIFIED;
bpf_ringbuf_submit(ev, 0);
return 0;
@@ -1521,7 +1521,7 @@ int handle_sys_enter_recvmmsg(struct syscall_trace_enter *ctx) {
return 0;
}
-/// sys_exit_recvmmsg is a struct ret_event (READ_CLASSIFIED)
+/// sys_exit_recvmmsg is a struct ret_event (UNCLASSIFIED)
SEC("tracepoint/syscalls/sys_exit_recvmmsg")
int handle_sys_exit_recvmmsg(struct syscall_trace_exit *ctx) {
__u32 pid, tid;
@@ -1538,7 +1538,7 @@ int handle_sys_exit_recvmmsg(struct syscall_trace_exit *ctx) {
ev->tid = tid;
ev->time = bpf_ktime_get_boot_ns();
ev->ret = ctx->ret;
- ev->ret_type = READ_CLASSIFIED;
+ ev->ret_type = UNCLASSIFIED;
bpf_ringbuf_submit(ev, 0);
return 0;
diff --git a/internal/c/generated_tracepoints_result.txt b/internal/c/generated_tracepoints_result.txt
index 2cc1e52..ea4f2d1 100644
--- a/internal/c/generated_tracepoints_result.txt
+++ b/internal/c/generated_tracepoints_result.txt
@@ -608,7 +608,7 @@ sys_exit_readlinkat is a struct ret_event (READ_CLASSIFIED)
sys_exit_readv is a struct ret_event (READ_CLASSIFIED)
sys_exit_reboot is a struct ret_event (UNCLASSIFIED)
sys_exit_recvfrom is a struct ret_event (READ_CLASSIFIED)
-sys_exit_recvmmsg is a struct ret_event (READ_CLASSIFIED)
+sys_exit_recvmmsg is a struct ret_event (UNCLASSIFIED)
sys_exit_recvmsg is a struct ret_event (READ_CLASSIFIED)
sys_exit_remap_file_pages is a struct ret_event (UNCLASSIFIED)
sys_exit_removexattr is a struct ret_event (UNCLASSIFIED)
@@ -647,7 +647,7 @@ sys_exit_semget is a struct ret_event (UNCLASSIFIED)
sys_exit_semop is a struct ret_event (UNCLASSIFIED)
sys_exit_semtimedop is a struct ret_event (UNCLASSIFIED)
sys_exit_sendfile64 is a struct ret_event (TRANSFER_CLASSIFIED)
-sys_exit_sendmmsg is a struct ret_event (WRITE_CLASSIFIED)
+sys_exit_sendmmsg is a struct ret_event (UNCLASSIFIED)
sys_exit_sendmsg is a struct ret_event (WRITE_CLASSIFIED)
sys_exit_sendto is a struct ret_event (WRITE_CLASSIFIED)
sys_exit_set_mempolicy is a struct ret_event (UNCLASSIFIED)
diff --git a/internal/generate/classify.go b/internal/generate/classify.go
index b96ee0d..7768ea7 100644
--- a/internal/generate/classify.go
+++ b/internal/generate/classify.go
@@ -169,7 +169,6 @@ var retClassifications = map[string]RetClassification{
"readlink": ReadClassified,
"readlinkat": ReadClassified,
"readv": ReadClassified,
- "recvmmsg": ReadClassified,
"recvmsg": ReadClassified,
"recvfrom": ReadClassified,
"syslog": ReadClassified,
@@ -184,7 +183,6 @@ var retClassifications = map[string]RetClassification{
"pwrite64": WriteClassified,
"pwritev": WriteClassified,
"pwritev2": WriteClassified,
- "sendmmsg": WriteClassified,
"sendmsg": WriteClassified,
"sendto": WriteClassified,
"write": WriteClassified,
diff --git a/internal/generate/classify_test.go b/internal/generate/classify_test.go
index 4dd216e..ea7d662 100644
--- a/internal/generate/classify_test.go
+++ b/internal/generate/classify_test.go
@@ -372,11 +372,27 @@ func TestClassifyPhaseAByteSyscallPairsAccepted(t *testing.T) {
}
}
+func TestBatchMessageSyscallPairsDeferByteClassification(t *testing.T) {
+ tests := []string{"sendmmsg", "recvmmsg"}
+ for i, name := range tests {
+ t.Run(name, func(t *testing.T) {
+ output := GenerateTracepointsC(phaseAFormats(name, 9100+i*2))
+ if strings.Contains(output, "Ignoring") || strings.Contains(output, "Skipping") {
+ t.Fatalf("syscall %s was not accepted:\n%s", name, output)
+ }
+ if !strings.Contains(output, "/// sys_exit_"+name+" is a struct ret_event (UNCLASSIFIED)") {
+ t.Fatalf("sys_exit_%s should be generated without byte classification:\n%s", name, output)
+ }
+ })
+ }
+}
+
func phaseAFormats(name string, enterID int) []Format {
enterFields := []Field{
{Type: "long", Name: "__syscall_nr"},
}
- if name == "sendto" || name == "recvfrom" || name == "sendmsg" || name == "recvmsg" {
+ if name == "sendto" || name == "recvfrom" || name == "sendmsg" || name == "recvmsg" ||
+ name == "sendmmsg" || name == "recvmmsg" {
enterFields = append(enterFields, Field{Type: "int", Name: "fd"})
}
diff --git a/internal/generate/retclassify_test.go b/internal/generate/retclassify_test.go
index 9a75a15..1c5b2ac 100644
--- a/internal/generate/retclassify_test.go
+++ b/internal/generate/retclassify_test.go
@@ -7,7 +7,7 @@ func TestClassifyRetRead(t *testing.T) {
"fgetxattr", "flistxattr", "getdents", "getdents64", "getxattr",
"lgetxattr", "listxattr", "llistxattr", "pread64", "preadv",
"preadv2", "process_vm_readv", "read", "readlink", "readlinkat",
- "readv", "recvmmsg", "recvmsg", "recvfrom", "syslog",
+ "readv", "recvmsg", "recvfrom", "syslog",
}
for _, name := range reads {
if got := ClassifyRet("sys_exit_" + name); got != ReadClassified {
@@ -19,7 +19,7 @@ func TestClassifyRetRead(t *testing.T) {
func TestClassifyRetWrite(t *testing.T) {
writes := []string{
"process_vm_writev", "pwrite64", "pwritev", "pwritev2",
- "sendmmsg", "sendmsg", "sendto", "write", "writev",
+ "sendmsg", "sendto", "write", "writev",
}
for _, name := range writes {
if got := ClassifyRet("sys_exit_" + name); got != WriteClassified {
@@ -43,7 +43,7 @@ func TestClassifyRetUnclassified(t *testing.T) {
unclassified := []string{
"openat", "close", "rename", "unlink", "fcntl", "dup", "dup2", "dup3",
"mkdir", "rmdir", "chmod", "chown", "chdir", "stat", "lseek",
- "truncate", "fallocate", "mmap", "fsync", "flock",
+ "truncate", "fallocate", "mmap", "fsync", "flock", "recvmmsg", "sendmmsg",
}
for _, name := range unclassified {
if got := ClassifyRet("sys_exit_" + name); got != Unclassified {
@@ -52,6 +52,17 @@ func TestClassifyRetUnclassified(t *testing.T) {
}
}
+func TestBatchMessageSyscallsDeferredFromRetByteClassification(t *testing.T) {
+ tests := []string{"recvmmsg", "sendmmsg"}
+ for _, name := range tests {
+ t.Run(name, func(t *testing.T) {
+ if got := ClassifyRet("sys_exit_" + name); got != Unclassified {
+ t.Fatalf("ClassifyRet(sys_exit_%s) = %q, want %q", name, got, Unclassified)
+ }
+ })
+ }
+}
+
func TestClassifyRetCaseInsensitive(t *testing.T) {
if got := ClassifyRet("sys_exit_READ"); got != ReadClassified {
t.Errorf("ClassifyRet(sys_exit_READ) = %q, want READ_CLASSIFIED", got)