From 7031211501884555139351bb676fc0592c9df14c Mon Sep 17 00:00:00 2001
From: Paul Buetow <paul@buetow.org>
Date: Tue, 9 Jun 2026 22:18:42 +0300
Subject: feat(parquet): surface epoll_ctl op/target-fd/events metadata

epoll_ctl's BPF handler already decodes the operation (args[1]),
target descriptor (args[2]), and requested event mask (args[3]->events)
into an EpollCtlEvent, but the single resolved-epfd `fd` column was the
only epoll detail reaching the output schema. Consumers could not see
which descriptor was registered nor the operation performed.

Surface the metadata as three additive, backward-compatible columns,
mirroring the existing dedicated optional-column convention used by
requested_sleep_ns and address_space_bytes:

- epoll_op (String): ADD/MOD/DEL, or the raw decimal for unknown ops;
  empty for non-epoll_ctl rows.
- epoll_target_fd (Int32): registered descriptor (args[2]); 0 otherwise.
- epoll_events (UInt32): requested event mask; 0 otherwise.

Data flows EpollCtlEvent -> event.Pair (new EpollCtl/HasEpoll fields,
populated in handleEpollCtlExit) -> streamrow.Row -> parquet.Record.
The op-to-string mapping lives on event.EpollCtl.OpName.

Docs (docs/parquet-querying.md) and the Magefile parquetValidate column
list updated in lockstep (also adding the previously-undocumented
address_space_bytes/requested_sleep_ns columns). The polling parquet
integration test now asserts epoll_ctl rows carry a decoded op and a
valid target fd, and that other syscalls leave epoll_op empty.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 integrationtests/polling_test.go | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'integrationtests')

diff --git a/integrationtests/polling_test.go b/integrationtests/polling_test.go
index d6b520c..c2f42d0 100644
--- a/integrationtests/polling_test.go
+++ b/integrationtests/polling_test.go
@@ -60,8 +60,19 @@ func TestPollingEpollReadyCountInParquet(t *testing.T) {
 	}
 	var sawPwait2 bool
 	var sawPwait2ReadyCount bool
+	var sawEpollCtlOp bool
 	for _, row := range rows {
 		switch row.Syscall {
+		case "epoll_ctl":
+			// The workload registers descriptors via epoll_ctl; at least one
+			// successful row must surface a decoded op and a non-negative target
+			// fd distinct from the resolved epfd column.
+			if row.EpollOp != "" && row.Ret == 0 {
+				sawEpollCtlOp = true
+				if row.EpollTargetFD < 0 {
+					t.Fatalf("epoll_ctl row has op %q but target fd %d < 0", row.EpollOp, row.EpollTargetFD)
+				}
+			}
 		case "epoll_wait", "epoll_pwait", "poll", "ppoll", "select", "pselect6":
 			if row.Ret > 0 {
 				wantReadyCount[row.Syscall] = true
@@ -77,9 +88,17 @@ func TestPollingEpollReadyCountInParquet(t *testing.T) {
 			if row.Bytes != 0 {
 				t.Fatalf("%s bytes = %d, want 0 for ready-count events", row.Syscall, row.Bytes)
 			}
+			// epoll_ctl metadata must stay empty for non-epoll_ctl syscalls.
+			if row.EpollOp != "" {
+				t.Fatalf("%s row has unexpected epoll_op %q", row.Syscall, row.EpollOp)
+			}
 		}
 	}
 
+	if !sawEpollCtlOp {
+		t.Fatalf("expected at least one successful epoll_ctl row with decoded op/target-fd in parquet output")
+	}
+
 	for syscall, ok := range wantReadyCount {
 		if !ok {
 			t.Fatalf("expected %s row with positive ready-count ret in parquet output", syscall)
-- 
cgit v1.2.3