From 0aa3222cef46d527bb9437afa9ddd90f3a80a9d8 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 28 Jun 2025 19:52:43 +0300 Subject: refactor: consolidate optimization flags into DTAIL_TURBOBOOST_ENABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace DTAIL_CHANNELLESS_GREP and DTAIL_OPTIMIZED_READER with single flag - Rename documentation to TURBOBOOST_OPTIMIZATION.md - Fix channel-less adapter to use blocking sends (prevent data loss) - Update logging messages to reference "turbo boost" mode The DTAIL_TURBOBOOST_ENABLE variable now controls all performance optimizations and can be extended to other commands in the future. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CHANNELLESS_GREP_IMPLEMENTATION.md | 103 --------------------- TURBOBOOST_OPTIMIZATION.md | 118 ++++++++++++++++++++++++ internal/server/handlers/channelless_adapter.go | 14 +-- internal/server/handlers/readcommand.go | 20 ++-- 4 files changed, 131 insertions(+), 124 deletions(-) delete mode 100644 CHANNELLESS_GREP_IMPLEMENTATION.md create mode 100644 TURBOBOOST_OPTIMIZATION.md diff --git a/CHANNELLESS_GREP_IMPLEMENTATION.md b/CHANNELLESS_GREP_IMPLEMENTATION.md deleted file mode 100644 index af79d9c..0000000 --- a/CHANNELLESS_GREP_IMPLEMENTATION.md +++ /dev/null @@ -1,103 +0,0 @@ -# Channel-less dgrep Implementation - -## Overview - -This document describes the channel-less implementation of dgrep that was created to address performance bottlenecks caused by channel overhead in the original implementation. - -## Problem Statement - -The original dgrep implementation used multiple channels in a pipeline: -- `rawLines chan *bytes.Buffer` (buffer: 100) - Raw lines read from file -- `lines chan *line.Line` (buffer: 100) - Filtered lines to send to client - -This created several performance issues: -1. Fixed channel buffer sizes causing blocking under high throughput -2. Context switching overhead between goroutines -3. Channel synchronization overhead -4. Memory allocations for channel operations - -## Solution - -The channel-less implementation replaces the channel pipeline with direct function calls using a `LineProcessor` interface. - -### Key Components - -1. **LineProcessor Interface** (`internal/io/line/processor.go`) - - Defines methods for processing lines without channels - - `ProcessLine()` - Handle a single line - - `Flush()` - Ensure buffered data is written - - `Close()` - Clean up resources - -2. **GrepLineProcessor** (`internal/server/handlers/lineprocessor.go`) - - Implements LineProcessor for grep operations - - Writes directly to the network connection - - Uses internal buffering for efficiency (64KB buffer) - - Thread-safe with mutex protection - -3. **Modified File Reading** (`internal/io/fs/readfile_processor.go`) - - `StartWithProcessor()` - Channel-less file reading - - Direct callbacks instead of channel sends - - Inline regex filtering without goroutines - -4. **Optimized File Reading** (`internal/io/fs/readfile_processor_optimized.go`) - - Uses buffered line reading instead of byte-by-byte - - Custom scanner with 256KB buffer - - Efficient handling of long lines - - Special optimization for tail mode - -### Feature Flags - -The implementation can be controlled via environment variables: -- `DTAIL_CHANNELLESS_GREP=yes` - Enable channel-less grep implementation -- `DTAIL_OPTIMIZED_READER=yes` - Use optimized buffered reader - -### Benefits - -1. **Reduced Latency**: No channel queuing delays -2. **Lower Memory Usage**: No channel buffers -3. **Better CPU Efficiency**: Fewer context switches -4. **Simpler Code Flow**: Direct processing without goroutine coordination -5. **Predictable Performance**: No channel blocking - -### Backward Compatibility - -- Original channel-based implementation remains available -- Same command-line interface -- Protocol compatibility maintained -- All integration tests pass unchanged - -### Performance Testing - -Use the provided script to compare performance: - -```bash -./test_channelless_performance.sh -``` - -This will test: -1. Original channel-based implementation -2. Channel-less implementation -3. Optimized channel-less implementation - -### Usage - -To use the channel-less implementation: - -```bash -# Enable channel-less grep -export DTAIL_CHANNELLESS_GREP=yes - -# Also enable optimized reader -export DTAIL_OPTIMIZED_READER=yes - -# Run dgrep normally -dgrep -regex "pattern" file.log -``` - -### Future Improvements - -1. Extend channel-less approach to other commands (dcat, dtail) -2. Add configurable buffer sizes -3. Implement zero-copy optimizations -4. Add performance metrics collection -5. Consider using io_uring on Linux for async I/O \ No newline at end of file diff --git a/TURBOBOOST_OPTIMIZATION.md b/TURBOBOOST_OPTIMIZATION.md new file mode 100644 index 0000000..f13943f --- /dev/null +++ b/TURBOBOOST_OPTIMIZATION.md @@ -0,0 +1,118 @@ +# DTail Turbo Boost Optimization + +## Overview + +This document describes the turbo boost optimization feature that provides significant performance improvements for DTail operations by using channel-less processing and optimized I/O. + +## Problem Statement + +The original dgrep implementation used multiple channels in a pipeline: +- `rawLines chan *bytes.Buffer` (buffer: 100) - Raw lines read from file +- `lines chan *line.Line` (buffer: 100) - Filtered lines to send to client + +This created several performance issues: +1. Fixed channel buffer sizes causing blocking under high throughput +2. Context switching overhead between goroutines +3. Channel synchronization overhead +4. Memory allocations for channel operations + +## Solution + +The channel-less implementation replaces the channel pipeline with direct function calls using a `LineProcessor` interface. + +### Key Components + +1. **LineProcessor Interface** (`internal/io/line/processor.go`) + - Defines methods for processing lines without channels + - `ProcessLine()` - Handle a single line + - `Flush()` - Ensure buffered data is written + - `Close()` - Clean up resources + +2. **GrepLineProcessor** (`internal/server/handlers/lineprocessor.go`) + - Implements LineProcessor for grep operations + - Writes directly to the network connection + - Uses internal buffering for efficiency (64KB buffer) + - Thread-safe with mutex protection + +3. **Modified File Reading** (`internal/io/fs/readfile_processor.go`) + - `StartWithProcessor()` - Channel-less file reading + - Direct callbacks instead of channel sends + - Inline regex filtering without goroutines + +4. **Optimized File Reading** (`internal/io/fs/readfile_processor_optimized.go`) + - Uses buffered line reading instead of byte-by-byte + - Custom scanner with 256KB buffer + - Efficient handling of long lines + - Special optimization for tail mode + +### Feature Flags + +The implementation can be controlled via environment variables: +- `DTAIL_CHANNELLESS_GREP=yes` - Enable channel-less grep implementation +- `DTAIL_OPTIMIZED_READER=yes` - Use optimized buffered reader + +### Benefits + +1. **Reduced Latency**: No channel queuing delays +2. **Lower Memory Usage**: No channel buffers +3. **Better CPU Efficiency**: Fewer context switches +4. **Simpler Code Flow**: Direct processing without goroutine coordination +5. **Predictable Performance**: No channel blocking + +### Backward Compatibility + +- Original channel-based implementation remains available +- Same command-line interface +- Protocol compatibility maintained +- All integration tests pass unchanged + +### Performance Testing + +Use the provided script to compare performance: + +```bash +./test_channelless_performance.sh +``` + +This will test: +1. Original channel-based implementation +2. Channel-less implementation +3. Optimized channel-less implementation + +### Usage + +To use the channel-less implementation: + +```bash +# Enable channel-less grep +export DTAIL_CHANNELLESS_GREP=yes + +# Also enable optimized reader +export DTAIL_OPTIMIZED_READER=yes + +# Run dgrep normally +dgrep -regex "pattern" file.log +``` + +### Future Improvements + +1. Extend channel-less approach to other commands (dcat, dtail) +2. Add configurable buffer sizes +3. Implement zero-copy optimizations +4. Add performance metrics collection +5. Consider using io_uring on Linux for async I/O + +## Usage + +To enable turbo boost optimizations: + +```bash +export DTAIL_TURBOBOOST_ENABLE=yes +``` + +This enables: +- Channel-less implementation for grep and cat operations +- Optimized buffered I/O reader (256KB buffer) +- Buffer pooling to reduce memory allocations + +The turbo boost mode is designed to be extended to other DTail commands in the future. diff --git a/internal/server/handlers/channelless_adapter.go b/internal/server/handlers/channelless_adapter.go index 9e5bc9c..a950408 100644 --- a/internal/server/handlers/channelless_adapter.go +++ b/internal/server/handlers/channelless_adapter.go @@ -2,10 +2,8 @@ package handlers import ( "bytes" - "fmt" "github.com/mimecast/dtail/internal/io/line" - "github.com/mimecast/dtail/internal/io/pool" ) // ChannellessLineProcessor adapts the channel-less processor to work with the existing handler infrastructure @@ -30,15 +28,9 @@ func (p *ChannellessLineProcessor) ProcessLine(lineContent *bytes.Buffer, lineNu // Create a line object that matches what the original implementation expects l := line.New(lineContent, lineNum, 100, sourceID) - // Send through the channel - select { - case p.lines <- l: - return nil - default: - // Channel full, recycle the buffer - pool.RecycleBytesBuffer(lineContent) - return fmt.Errorf("lines channel full") - } + // Send through the channel (blocking to ensure no lines are lost) + p.lines <- l + return nil } // Flush does nothing for this implementation diff --git a/internal/server/handlers/readcommand.go b/internal/server/handlers/readcommand.go index 7a351ba..abdbe9c 100644 --- a/internal/server/handlers/readcommand.go +++ b/internal/server/handlers/readcommand.go @@ -166,14 +166,14 @@ func (r *readCommand) read(ctx context.Context, ltx lcontext.LContext, } } - // Check if we should use the channel-less implementation - channellessEnabled := config.Env("DTAIL_CHANNELLESS_GREP") - dlog.Server.Info(r.server.user, "Channel-less check: enabled=", channellessEnabled, "mode=", r.mode) + // Check if we should use the turbo boost optimizations + turboBoostEnabled := config.Env("DTAIL_TURBOBOOST_ENABLE") + dlog.Server.Info(r.server.user, "Turbo boost check: enabled=", turboBoostEnabled, "mode=", r.mode) // Only enable channel-less for server mode, not serverless mode // Use the serverless field directly as it's more reliable - if channellessEnabled && (r.mode == omode.CatClient || r.mode == omode.GrepClient) && !r.server.serverless { + if turboBoostEnabled && (r.mode == omode.CatClient || r.mode == omode.GrepClient) && !r.server.serverless { // Log to stderr for testing verification - only in server mode - fmt.Fprintf(os.Stderr, "[DTAIL] Using channel-less implementation for %s\n", path) + fmt.Fprintf(os.Stderr, "[DTAIL] Turbo boost enabled: using channel-less implementation for %s\n", path) r.readWithProcessor(ctx, ltx, path, globID, re, reader) return } @@ -217,13 +217,13 @@ func (r *readCommand) readWithProcessor(ctx context.Context, ltx lcontext.LConte lines := r.server.lines aggregate := r.server.aggregate - // Use the optimized version if available - useOptimized := config.Env("DTAIL_OPTIMIZED_READER") + // Use the optimized version if turbo boost is enabled + turboBoostEnabled := config.Env("DTAIL_TURBOBOOST_ENABLE") // Log to stderr for testing verification - only in server mode if !r.server.serverless { - if useOptimized { - fmt.Fprintf(os.Stderr, "[DTAIL] Using optimized reader for %s\n", path) + if turboBoostEnabled { + fmt.Fprintf(os.Stderr, "[DTAIL] Turbo boost enabled: using optimized reader for %s\n", path) } else { fmt.Fprintf(os.Stderr, "[DTAIL] Using standard processor reader for %s\n", path) } @@ -240,7 +240,7 @@ func (r *readCommand) readWithProcessor(ctx context.Context, ltx lcontext.LConte defer processor.Close() var err error - if useOptimized { + if turboBoostEnabled { err = reader.StartWithProcessorOptimized(ctx, ltx, processor, re) } else { err = reader.StartWithProcessor(ctx, ltx, processor, re) -- cgit v1.2.3