diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-01 23:33:46 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-01 23:33:46 +0200 |
| commit | 3690b89082215ff5c1fc84110074cf08b1b8909c (patch) | |
| tree | e89f32688b64a467cb804e9df615a32cc6c2522c | |
| parent | 3a438f0381a21c0055db3cce3164ff0ef5970cc0 (diff) | |
Route non-fatal event-loop warnings to TUI
| -rw-r--r-- | internal/eventloop.go | 79 | ||||
| -rw-r--r-- | internal/eventloop_error_handling_test.go | 115 | ||||
| -rw-r--r-- | internal/flamegraph/iordatacollector.go | 10 | ||||
| -rw-r--r-- | internal/ior.go | 7 | ||||
| -rw-r--r-- | internal/tui/eventstream/streamevent.go | 16 | ||||
| -rw-r--r-- | internal/tui/eventstream/streamevent_test.go | 26 |
6 files changed, 233 insertions, 20 deletions
diff --git a/internal/eventloop.go b/internal/eventloop.go index 6d4b065..f303f65 100644 --- a/internal/eventloop.go +++ b/internal/eventloop.go @@ -43,6 +43,7 @@ type eventLoop struct { flamegraph flamegraph.IorDataCollector // Storing all paths in a map structure for analysis liveTrie *flamegraph.LiveTrie printCb func(ep *event.Pair) // Callback to print the event + warningCb func(message string) // Optional callback for non-fatal event processing warnings cfg eventLoopConfig // Statistics @@ -144,7 +145,12 @@ func (e *eventLoop) run(ctx context.Context, rawCh <-chan []byte) { if e.cfg.flamegraphEnable { fmt.Println("Waiting for flamegraph") - <-e.flamegraph.Done + if err := <-e.flamegraph.Done; err != nil { + e.notifyWarning(fmt.Sprintf("Flamegraph generation failed: %v", err)) + if e.warningCb == nil { + fmt.Println("Flamegraph generation failed:", err) + } + } } } @@ -182,7 +188,8 @@ func (e *eventLoop) processRawEvent(raw []byte, ch chan<- *event.Pair) { evType := EventType(raw[0]) handler, ok := e.rawHandlers[evType] if !ok { - panic(fmt.Sprintf("unhandled event type %v: %v", evType, raw)) + e.notifyWarning(fmt.Sprintf("Dropped unhandled raw event type %d", evType)) + return } handler(raw, ch) } @@ -279,6 +286,7 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) // enterEv:SYS_ENTER_OPEN => exitEv:SYS_EXIT_OPEN if ep.EnterEv.GetTraceId()-1 != ep.ExitEv.GetTraceId() { e.numTracepointMismatches++ + e.notifyWarning("Dropped tracepoint pair with mismatched enter/exit IDs") ep.Recycle() return } @@ -286,9 +294,14 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) switch v := ep.EnterEv.(type) { case *OpenEvent: openEv := ep.EnterEv.(*OpenEvent) + retEvent, ok := ep.ExitEv.(*RetEvent) + if !ok { + e.recyclePair(ep, "Dropped malformed open exit event") + return + } comm := types.StringValue(openEv.Comm[:]) ep.Comm = comm - if fd := int32(ep.ExitEv.(*RetEvent).Ret); fd >= 0 { + if fd := int32(retEvent.Ret); fd >= 0 { file := file.NewFd(fd, types.StringValue(openEv.Filename[:]), v.Flags) e.files[fd] = file ep.File = file @@ -319,7 +332,12 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) nameEvent := ep.EnterEv.(*PathEvent) if ep.Is(SYS_ENTER_CREAT) { - if fd := int32(ep.ExitEv.(*RetEvent).Ret); fd >= 0 { + retEvent, ok := ep.ExitEv.(*RetEvent) + if !ok { + e.recyclePair(ep, "Dropped malformed creat exit event") + return + } + if fd := int32(retEvent.Ret); fd >= 0 { file := file.NewFd(fd, types.StringValue(nameEvent.Pathname[:]), syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC) e.files[fd] = file @@ -360,10 +378,16 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) if ep.Is(SYS_ENTER_DUP) || ep.Is(SYS_ENTER_DUP2) { fdFile, ok := ep.File.(file.FdFile) if !ok { - panic("expected a file.FdFile") + e.recyclePair(ep, "Dropped malformed dup source event") + return + } + retEvent, ok := ep.ExitEv.(*RetEvent) + if !ok { + e.recyclePair(ep, "Dropped malformed dup exit event") + return } // Duplicating fd - newFd := int32(ep.ExitEv.(*RetEvent).Ret) + newFd := int32(retEvent.Ret) if newFd != -1 { e.files[newFd] = fdFile.Dup(newFd) } @@ -371,7 +395,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) if ep.Is(SYS_ENTER_PIDFD_GETFD) { retEv, ok := ep.ExitEv.(*RetEvent) if !ok { - panic("expected *types.RetEvent") + e.recyclePair(ep, "Dropped malformed pidfd_getfd exit event") + return } if newFd := int32(retEv.Ret); newFd >= 0 { transferredFile := file.NewFdWithPid(newFd, v.Pid) @@ -400,9 +425,15 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) // Duplicating fd fdFile, ok := ep.File.(file.FdFile) if !ok { - panic("expected a file.FdFile") + e.recyclePair(ep, "Dropped malformed dup3 source event") + return + } + retEvent, ok := ep.ExitEv.(*RetEvent) + if !ok { + e.recyclePair(ep, "Dropped malformed dup3 exit event") + return } - newFd := int32(ep.ExitEv.(*RetEvent).Ret) + newFd := int32(retEvent.Ret) if newFd != -1 { duppedFdFile := fdFile.Dup(newFd) duppedFdFile.AddFlags(dup3Event.Flags & syscall.O_CLOEXEC) @@ -413,7 +444,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) tid := ep.EnterEv.GetTid() retEvent, ok := ep.ExitEv.(*RetEvent) if !ok { - panic("expected *types.RetEvent for open_by_handle_at exit") + e.recyclePair(ep, "Dropped malformed open_by_handle_at exit event") + return } if fd := int32(retEvent.Ret); fd >= 0 { @@ -441,7 +473,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) if ep.Is(SYS_ENTER_IO_URING_SETUP) { retEvent, ok := exitEv.(*types.RetEvent) if !ok { - panic("expected *types.RetEvent") + e.recyclePair(ep, "Dropped malformed io_uring_setup exit event") + return } if fd := int32(retEvent.Ret); fd >= 0 { fdFile := file.NewFdWithPid(fd, v.Pid) @@ -452,7 +485,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) if ep.Is(SYS_ENTER_GETCWD) { retEvent, ok := ep.ExitEv.(*types.RetEvent) if !ok { - panic("expected *types.RetEvent") + e.recyclePair(ep, "Dropped malformed getcwd exit event") + return } if retEvent.Ret > 0 { if cwd, err := os.Readlink(fmt.Sprintf("/proc/%d/cwd", ep.EnterEv.GetTid())); err == nil { @@ -481,7 +515,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) retEvent, ok := exitEv.(*types.RetEvent) if !ok { - panic("expected *types.RetEvent") + e.recyclePair(ep, "Dropped malformed fcntl exit event") + return } // Syscall returned -1, nothing was changed with the fd if retEvent.Ret == -1 { @@ -490,7 +525,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) fdFile, ok := ep.File.(file.FdFile) if !ok { - panic("expected a file.FdFile") + e.recyclePair(ep, "Dropped malformed fcntl file event") + return } // See fcntl(2) for implementation details @@ -511,7 +547,8 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) } default: - panic(fmt.Sprintf("unknown type: %v", v)) + e.recyclePair(ep, "Dropped malformed enter event") + return } prevPairTime, _ := e.prevPairTimes[ep.EnterEv.GetTid()] ep.CalculateDurations(prevPairTime) @@ -519,6 +556,18 @@ func (e *eventLoop) tracepointExited(exitEv event.Event, ch chan<- *event.Pair) ch <- ep } +func (e *eventLoop) recyclePair(ep *event.Pair, warning string) { + e.notifyWarning(warning) + ep.Recycle() +} + +func (e *eventLoop) notifyWarning(message string) { + if e.warningCb == nil || message == "" { + return + } + e.warningCb(message) +} + func (e *eventLoop) comm(tid uint32) string { if comm, ok := e.comms[tid]; ok { return comm diff --git a/internal/eventloop_error_handling_test.go b/internal/eventloop_error_handling_test.go new file mode 100644 index 0000000..12f9b2f --- /dev/null +++ b/internal/eventloop_error_handling_test.go @@ -0,0 +1,115 @@ +package internal + +import ( + "testing" + + "ior/internal/event" + "ior/internal/types" +) + +func TestTracepointExitedMalformedOpenExitDoesNotPanicAndNotifies(t *testing.T) { + el := newEventLoop(eventLoopConfig{}) + warnings := make(chan string, 1) + el.warningCb = func(message string) { warnings <- message } + + enterEv, enterRaw := makeEnterOpenEvent(t, defaulTime, defaultPid, defaultTid) + el.tracepointEntered(types.NewOpenEvent(enterRaw)) + + _, exitRaw := makeExitNullEvent(t, defaulTime+1, defaultPid, defaultTid, types.SYS_EXIT_OPEN) + exitEv := types.NewNullEvent(exitRaw) + pairCh := make(chan *event.Pair, 1) + + defer func() { + if r := recover(); r != nil { + t.Fatalf("tracepointExited panicked: %v", r) + } + }() + + el.tracepointExited(exitEv, pairCh) + + select { + case ep := <-pairCh: + t.Fatalf("unexpected event produced: %v", ep) + default: + } + + select { + case msg := <-warnings: + if msg == "" { + t.Fatalf("expected non-empty warning message") + } + default: + t.Fatalf("expected warning notification") + } + + if _, ok := el.enterEvs[enterEv.Tid]; ok { + t.Fatalf("expected enter event to be removed for tid %d", enterEv.Tid) + } +} + +func TestTracepointExitedMalformedOpenByHandleAtExitDoesNotPanicAndNotifies(t *testing.T) { + el := newEventLoop(eventLoopConfig{}) + warnings := make(chan string, 1) + el.warningCb = func(message string) { warnings <- message } + + _, enterRaw := makeEnterOpenByHandleAtEvent(t, defaulTime, defaultPid, defaultTid, 0) + el.tracepointEntered(types.NewOpenByHandleAtEvent(enterRaw)) + + _, exitRaw := makeExitNullEvent(t, defaulTime+1, defaultPid, defaultTid, types.SYS_EXIT_OPEN_BY_HANDLE_AT) + exitEv := types.NewNullEvent(exitRaw) + pairCh := make(chan *event.Pair, 1) + + defer func() { + if r := recover(); r != nil { + t.Fatalf("tracepointExited panicked: %v", r) + } + }() + + el.tracepointExited(exitEv, pairCh) + + select { + case ep := <-pairCh: + t.Fatalf("unexpected event produced: %v", ep) + default: + } + + select { + case msg := <-warnings: + if msg == "" { + t.Fatalf("expected non-empty warning message") + } + default: + t.Fatalf("expected warning notification") + } +} + +func TestProcessRawEventUnknownTypeDoesNotPanicAndNotifies(t *testing.T) { + el := newEventLoop(eventLoopConfig{}) + warnings := make(chan string, 1) + el.warningCb = func(message string) { warnings <- message } + + pairCh := make(chan *event.Pair, 1) + + defer func() { + if r := recover(); r != nil { + t.Fatalf("processRawEvent panicked: %v", r) + } + }() + + el.processRawEvent([]byte{255}, pairCh) + + select { + case ep := <-pairCh: + t.Fatalf("unexpected event produced: %v", ep) + default: + } + + select { + case msg := <-warnings: + if msg == "" { + t.Fatalf("expected non-empty warning message") + } + default: + t.Fatalf("expected warning notification") + } +} diff --git a/internal/flamegraph/iordatacollector.go b/internal/flamegraph/iordatacollector.go index a63b742..948af97 100644 --- a/internal/flamegraph/iordatacollector.go +++ b/internal/flamegraph/iordatacollector.go @@ -5,7 +5,6 @@ import ( "fmt" "ior/internal/event" "ior/internal/flags" - "os" "runtime" "sync" ) @@ -13,14 +12,14 @@ import ( type IorDataCollector struct { flags flags.Flags Ch chan *event.Pair - Done chan struct{} + Done chan error workers []worker } func New() IorDataCollector { f := IorDataCollector{ Ch: make(chan *event.Pair, 4096), - Done: make(chan struct{}), + Done: make(chan error, 1), } numWorkers := runtime.NumCPU() / 4 if numWorkers == 0 { @@ -52,8 +51,9 @@ func (f IorDataCollector) Start(ctx context.Context) { } } if err := iod.serializeToFile(); err != nil { - fmt.Println(err) - os.Exit(2) + f.Done <- err + return } + f.Done <- nil }() } diff --git a/internal/ior.go b/internal/ior.go index 0243ae7..51373c4 100644 --- a/internal/ior.go +++ b/internal/ior.go @@ -190,6 +190,13 @@ func tuiTraceStarterFromRunTrace( streamEvents <- eventstream.NewStreamEvent(ep.EnterEv.GetTime(), ep) ep.Recycle() } + el.warningCb = func(message string) { + // Drop warning notifications if the stream channel is saturated. + select { + case streamEvents <- eventstream.NewWarningEvent(message): + default: + } + } }) close(streamEvents) errCh <- err diff --git a/internal/tui/eventstream/streamevent.go b/internal/tui/eventstream/streamevent.go index a5c3d14..dbe04dd 100644 --- a/internal/tui/eventstream/streamevent.go +++ b/internal/tui/eventstream/streamevent.go @@ -3,6 +3,7 @@ package eventstream import ( "ior/internal/event" "ior/internal/types" + "time" ) type StreamEvent struct { @@ -49,3 +50,18 @@ func NewStreamEvent(seq uint64, pair *event.Pair) StreamEvent { return e } + +// NewWarningEvent creates a synthetic stream row for non-fatal runtime warnings. +func NewWarningEvent(message string) StreamEvent { + now := uint64(time.Now().UnixNano()) + return StreamEvent{ + Seq: now, + TimeNs: now, + Syscall: "warning", + Comm: "ior", + FileName: message, + FD: UnknownFD, + RetVal: -1, + IsError: true, + } +} diff --git a/internal/tui/eventstream/streamevent_test.go b/internal/tui/eventstream/streamevent_test.go index 6be7407..6131fed 100644 --- a/internal/tui/eventstream/streamevent_test.go +++ b/internal/tui/eventstream/streamevent_test.go @@ -94,3 +94,29 @@ func TestNewStreamEventWithoutRetEvent(t *testing.T) { t.Fatalf("FD = %d, want %d", got.FD, UnknownFD) } } + +func TestNewWarningEventPopulatesFields(t *testing.T) { + got := NewWarningEvent("Dropped malformed event") + + if got.Syscall != "warning" { + t.Fatalf("Syscall = %q, want warning", got.Syscall) + } + if got.Comm != "ior" { + t.Fatalf("Comm = %q, want ior", got.Comm) + } + if got.FileName != "Dropped malformed event" { + t.Fatalf("FileName = %q, want warning text", got.FileName) + } + if got.FD != UnknownFD { + t.Fatalf("FD = %d, want %d", got.FD, UnknownFD) + } + if got.RetVal != -1 { + t.Fatalf("RetVal = %d, want -1", got.RetVal) + } + if !got.IsError { + t.Fatalf("IsError = false, want true") + } + if got.Seq == 0 || got.TimeNs == 0 { + t.Fatalf("Seq/TimeNs = %d/%d, want non-zero", got.Seq, got.TimeNs) + } +} |
