diff options
| -rw-r--r-- | Magefile.go | 7 | ||||
| -rw-r--r-- | docs/parquet-querying.md | 24 | ||||
| -rw-r--r-- | integrationtests/polling_test.go | 19 | ||||
| -rw-r--r-- | internal/event/pair.go | 38 | ||||
| -rw-r--r-- | internal/eventloop_exit.go | 9 | ||||
| -rw-r--r-- | internal/parquet/schema.go | 10 | ||||
| -rw-r--r-- | internal/streamrow/row.go | 16 |
7 files changed, 114 insertions, 9 deletions
diff --git a/Magefile.go b/Magefile.go index 2a996eb..0396a9e 100644 --- a/Magefile.go +++ b/Magefile.go @@ -1164,11 +1164,14 @@ func runClickHouseQuery(dir, file, sql string) (string, error) { return strings.TrimSpace(out), nil } -// expectedParquetColumns lists the 15 column names that the parquet schema must contain. +// expectedParquetColumns lists the column names that the parquet schema must +// contain. Keep in lockstep with parquet.Record (internal/parquet/schema.go). var expectedParquetColumns = []string{ "seq", "time_ns", "gap_ns", "latency_ns", "comm", "pid", "tid", "syscall", "family", "fd", "ret", - "bytes", "file", "is_error", "filter_epoch", + "bytes", "address_space_bytes", "requested_sleep_ns", + "file", "is_error", "filter_epoch", + "epoll_op", "epoll_target_fd", "epoll_events", } // parquetSchemaCheck verifies that all expectedParquetColumns appear in the diff --git a/docs/parquet-querying.md b/docs/parquet-querying.md index 4c31474..2ebf16e 100644 --- a/docs/parquet-querying.md +++ b/docs/parquet-querying.md @@ -30,9 +30,14 @@ state, no installation needed beyond Docker. | `fd` | Int32 | File descriptor | | `ret` | Int64 | Return value (negative = errno) | | `bytes` | UInt64 | Bytes transferred (0 if not applicable) | +| `address_space_bytes` | UInt64 | Memory-region extent for memory syscalls (e.g. `munmap`/`mremap`); 0 otherwise | +| `requested_sleep_ns` | Int64 | Requested sleep duration for nanosleep-style syscalls; 0 otherwise | | `file` | String | File path (empty if not resolved) | | `is_error` | Bool | True when `ret` is a negative errno | | `filter_epoch` | UInt64 | Filter generation at capture time | +| `epoll_op` | String | `epoll_ctl` operation (`ADD`/`MOD`/`DEL`); empty for other syscalls | +| `epoll_target_fd` | Int32 | `epoll_ctl` target descriptor being registered (args[2]); 0 for other syscalls | +| `epoll_events` | UInt32 | `epoll_ctl` requested event mask (args[3]->events); 0 for other syscalls | --- @@ -78,12 +83,17 @@ pid UInt32 tid UInt32 syscall String family String -fd Int32 -ret Int64 -bytes UInt64 -file String -is_error Bool -filter_epoch UInt64 +fd Int32 +ret Int64 +bytes UInt64 +address_space_bytes UInt64 +requested_sleep_ns Int64 +file String +is_error Bool +filter_epoch UInt64 +epoll_op String +epoll_target_fd Int32 +epoll_events UInt32 ``` ### Row count @@ -220,6 +230,6 @@ PARQUET_FILE=ior-recording-20260313-170234.parquet env GOTOOLCHAIN=auto mage par ``` It checks: -1. All 14 expected columns are present +1. All 20 expected columns are present 2. Row count > 0 3. `seq` is monotonically ordered and `time_ns` is non-zero diff --git a/integrationtests/polling_test.go b/integrationtests/polling_test.go index d6b520c..c2f42d0 100644 --- a/integrationtests/polling_test.go +++ b/integrationtests/polling_test.go @@ -60,8 +60,19 @@ func TestPollingEpollReadyCountInParquet(t *testing.T) { } var sawPwait2 bool var sawPwait2ReadyCount bool + var sawEpollCtlOp bool for _, row := range rows { switch row.Syscall { + case "epoll_ctl": + // The workload registers descriptors via epoll_ctl; at least one + // successful row must surface a decoded op and a non-negative target + // fd distinct from the resolved epfd column. + if row.EpollOp != "" && row.Ret == 0 { + sawEpollCtlOp = true + if row.EpollTargetFD < 0 { + t.Fatalf("epoll_ctl row has op %q but target fd %d < 0", row.EpollOp, row.EpollTargetFD) + } + } case "epoll_wait", "epoll_pwait", "poll", "ppoll", "select", "pselect6": if row.Ret > 0 { wantReadyCount[row.Syscall] = true @@ -77,9 +88,17 @@ func TestPollingEpollReadyCountInParquet(t *testing.T) { if row.Bytes != 0 { t.Fatalf("%s bytes = %d, want 0 for ready-count events", row.Syscall, row.Bytes) } + // epoll_ctl metadata must stay empty for non-epoll_ctl syscalls. + if row.EpollOp != "" { + t.Fatalf("%s row has unexpected epoll_op %q", row.Syscall, row.EpollOp) + } } } + if !sawEpollCtlOp { + t.Fatalf("expected at least one successful epoll_ctl row with decoded op/target-fd in parquet output") + } + for syscall, ok := range wantReadyCount { if !ok { t.Fatalf("expected %s row with positive ready-count ret in parquet output", syscall) diff --git a/internal/event/pair.go b/internal/event/pair.go index 523f961..afc9bed 100644 --- a/internal/event/pair.go +++ b/internal/event/pair.go @@ -31,6 +31,44 @@ type Pair struct { AddressSpaceBytes uint64 // RequestedSleepNs tracks requested sleep duration for nanosleep-style syscalls. RequestedSleepNs int64 + // Epoll carries epoll_ctl control metadata (op, target fd, requested event + // mask). It is only populated for epoll_ctl pairs; HasEpoll reports whether + // it is set. The Pair-level File still resolves to the epoll instance (epfd); + // Epoll.TargetFD is the descriptor being registered/modified/removed. + Epoll EpollCtl + HasEpoll bool +} + +// EpollCtl holds the decoded epoll_ctl arguments surfaced from the BPF +// EpollCtlEvent: the operation (EPOLL_CTL_ADD/MOD/DEL), the target fd +// (args[2]), and the requested epoll event mask (args[3]->events). +type EpollCtl struct { + Op int32 + TargetFD int32 + Events uint32 +} + +// Linux epoll_ctl op values from <sys/epoll.h>. +const ( + epollCtlAdd = 1 + epollCtlDel = 2 + epollCtlMod = 3 +) + +// OpName renders the epoll_ctl operation as a human-readable token +// (ADD/DEL/MOD). Unknown values fall back to their decimal form so the +// raw op is never lost. +func (c EpollCtl) OpName() string { + switch c.Op { + case epollCtlAdd: + return "ADD" + case epollCtlDel: + return "DEL" + case epollCtlMod: + return "MOD" + default: + return strconv.FormatInt(int64(c.Op), 10) + } } func NewPair(enterEv Event) *Pair { diff --git a/internal/eventloop_exit.go b/internal/eventloop_exit.go index ae085c6..105d9ac 100644 --- a/internal/eventloop_exit.go +++ b/internal/eventloop_exit.go @@ -385,7 +385,16 @@ func (e *eventLoop) handleEventfdExit(ep *event.Pair, eventfdEv *types.EventfdEv } func (e *eventLoop) handleEpollCtlExit(ep *event.Pair, epollCtlEv *types.EpollCtlEvent) bool { + // File resolves to the epoll instance (epfd); the decoded op/target-fd/events + // are surfaced separately via ep.Epoll so consumers can see which descriptor + // was registered and the operation performed. ep.File = e.fdState().resolve(epollCtlEv.Epfd, epollCtlEv.Pid) + ep.Epoll = event.EpollCtl{ + Op: epollCtlEv.Op, + TargetFD: epollCtlEv.Fd, + Events: epollCtlEv.Events, + } + ep.HasEpoll = true return e.finishPairForTid(ep, epollCtlEv.GetTid()) } diff --git a/internal/parquet/schema.go b/internal/parquet/schema.go index f39361c..b7ed381 100644 --- a/internal/parquet/schema.go +++ b/internal/parquet/schema.go @@ -30,6 +30,13 @@ type Record struct { File string `parquet:"file"` IsError bool `parquet:"is_error"` FilterEpoch uint64 `parquet:"filter_epoch"` + // EpollOp/EpollTargetFD/EpollEvents surface epoll_ctl control metadata: the + // operation (ADD/MOD/DEL), the target descriptor registered (args[2]), and + // the requested event mask (args[3]->events). EpollOp is empty and the + // numeric fields are zero for all non-epoll_ctl rows. + EpollOp string `parquet:"epoll_op"` + EpollTargetFD int32 `parquet:"epoll_target_fd"` + EpollEvents uint32 `parquet:"epoll_events"` } // FileMetadata captures constant metadata written once into the parquet file. @@ -74,6 +81,9 @@ func RecordFromStream(row streamrow.Row, filterEpoch uint64) Record { File: row.FileName, IsError: row.IsError, FilterEpoch: filterEpoch, + EpollOp: row.EpollOp, + EpollTargetFD: row.EpollTargetFD, + EpollEvents: row.EpollEvents, } } diff --git a/internal/streamrow/row.go b/internal/streamrow/row.go index a6ccdf7..c846346 100644 --- a/internal/streamrow/row.go +++ b/internal/streamrow/row.go @@ -30,6 +30,13 @@ type Row struct { RetVal int64 IsError bool FD int32 + // EpollOp is the epoll_ctl operation as a readable token (ADD/MOD/DEL), + // empty for non-epoll_ctl rows. EpollTargetFD and EpollEvents hold the + // registered descriptor (args[2]) and requested event mask (args[3]->events) + // for epoll_ctl rows; both are zero when EpollOp is empty. + EpollOp string + EpollTargetFD int32 + EpollEvents uint32 } func (r Row) SyscallValue() string { @@ -122,6 +129,15 @@ func New(seq uint64, pair *event.Pair) Row { row.FD = fd } + // Surface epoll_ctl control metadata when present. The Pair's FD/File still + // reflect the epoll instance (epfd); these fields expose the target fd and + // operation so consumers can see which descriptor was registered. + if pair.HasEpoll { + row.EpollOp = pair.Epoll.OpName() + row.EpollTargetFD = pair.Epoll.TargetFD + row.EpollEvents = pair.Epoll.Events + } + if retEv, ok := pair.ExitEv.(*types.RetEvent); ok { row.RetVal = retEv.Ret row.IsError = retEv.Ret < 0 |
