diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-13 14:31:59 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-13 14:31:59 +0300 |
| commit | ed7cf2505d92e05411d476b445bda45cab9aaf89 (patch) | |
| tree | 9de12631cda85c4336c0d8abf6266351fb0eb6fc /internal | |
| parent | 42645a4889c1e45ad2ab85e0a371ef8e1054062e (diff) | |
feat(eventloop): add panic recovery to events() goroutine for resilience
Wrap processRawEvent calls in a new processRawEventSafe() helper that
uses defer/recover to catch any panic from a callback and convert it into
a warning notification via warningCb, preventing a single bad event from
crashing the whole process. Added TestEventsPanicInCallbackIsRecoveredAndNotified
to verify the recovery behaviour end-to-end.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/eventloop_events_test.go | 50 | ||||
| -rw-r--r-- | internal/eventloop_runtime.go | 18 |
2 files changed, 67 insertions, 1 deletions
diff --git a/internal/eventloop_events_test.go b/internal/eventloop_events_test.go index 4515164..881402e 100644 --- a/internal/eventloop_events_test.go +++ b/internal/eventloop_events_test.go @@ -2,8 +2,12 @@ package internal import ( "context" + "strings" "testing" "time" + + "ior/internal/event" + "ior/internal/types" ) func TestEventsStopsOnContextCancelWithoutRawData(t *testing.T) { @@ -24,6 +28,52 @@ func TestEventsStopsOnContextCancelWithoutRawData(t *testing.T) { } } +// TestEventsPanicInCallbackIsRecoveredAndNotified verifies that a panic inside +// a raw event handler does not crash the events() goroutine. The goroutine +// must recover, emit a warning via warningCb, and continue processing +// subsequent events rather than closing the output channel prematurely. +func TestEventsPanicInCallbackIsRecoveredAndNotified(t *testing.T) { + el := mustNewEventLoop(t, eventLoopConfig{synchronousRawProcessing: false}) + warnings := make(chan string, 4) + el.warningCb = func(message string) { warnings <- message } + + // Install a handler for ENTER_OPEN_EVENT that always panics. + el.rawHandlers[types.ENTER_OPEN_EVENT] = func(_ []byte, _ chan<- *event.Pair) { + panic("injected test panic") + } + + rawCh := make(chan []byte, 4) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + out := el.events(ctx, rawCh) + + // Send a raw payload whose first byte is ENTER_OPEN_EVENT to trigger the panic handler. + rawCh <- []byte{byte(types.ENTER_OPEN_EVENT)} + + // The goroutine must survive; wait briefly and then cancel context cleanly. + select { + case msg := <-warnings: + if !strings.Contains(msg, "injected test panic") { + t.Fatalf("unexpected warning message: %q", msg) + } + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for panic-recovery warning") + } + + // Cancel context and confirm the channel closes normally (goroutine is still alive). + cancel() + + select { + case _, ok := <-out: + if ok { + t.Fatal("expected output channel to be closed after cancellation") + } + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for output channel to close after cancellation") + } +} + func TestEventsIgnoresEmptyRawPayload(t *testing.T) { el := mustNewEventLoop(t, eventLoopConfig{}) rawCh := make(chan []byte, 1) diff --git a/internal/eventloop_runtime.go b/internal/eventloop_runtime.go index 85a90a1..01bc798 100644 --- a/internal/eventloop_runtime.go +++ b/internal/eventloop_runtime.go @@ -3,6 +3,7 @@ package internal import ( "context" "fmt" + "runtime/debug" "time" "ior/internal/event" @@ -85,7 +86,9 @@ func (e *eventLoop) events(ctx context.Context, rawCh <-chan []byte) <-chan *eve if len(raw) == 0 { continue } - e.processRawEvent(raw, ch) + // Recover from any panic inside a callback so a single + // bad event cannot crash the entire process. + e.processRawEventSafe(raw, ch) case <-ctx.Done(): fmt.Println("Stopping event loop") return @@ -96,6 +99,19 @@ func (e *eventLoop) events(ctx context.Context, rawCh <-chan []byte) <-chan *eve return ch } +// processRawEventSafe calls processRawEvent and recovers from any panic, +// converting it into a warning notification so that one misbehaving event +// does not crash the whole process. +func (e *eventLoop) processRawEventSafe(raw []byte, ch chan<- *event.Pair) { + defer func() { + if r := recover(); r != nil { + stack := debug.Stack() + e.notifyWarning(fmt.Sprintf("Recovered panic in processRawEvent: %v\n%s", r, stack)) + } + }() + e.processRawEvent(raw, ch) +} + func (e *eventLoop) processRawEvent(raw []byte, ch chan<- *event.Pair) { if len(raw) == 0 { return |
