From 0aa3222cef46d527bb9437afa9ddd90f3a80a9d8 Mon Sep 17 00:00:00 2001
From: Paul Buetow <paul@buetow.org>
Date: Sat, 28 Jun 2025 19:52:43 +0300
Subject: refactor: consolidate optimization flags into DTAIL_TURBOBOOST_ENABLE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace DTAIL_CHANNELLESS_GREP and DTAIL_OPTIMIZED_READER with single flag
- Rename documentation to TURBOBOOST_OPTIMIZATION.md
- Fix channel-less adapter to use blocking sends (prevent data loss)
- Update logging messages to reference "turbo boost" mode

The DTAIL_TURBOBOOST_ENABLE variable now controls all performance
optimizations and can be extended to other commands in the future.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 CHANNELLESS_GREP_IMPLEMENTATION.md              | 103 ---------------------
 TURBOBOOST_OPTIMIZATION.md                      | 118 ++++++++++++++++++++++++
 internal/server/handlers/channelless_adapter.go |  14 +--
 internal/server/handlers/readcommand.go         |  20 ++--
 4 files changed, 131 insertions(+), 124 deletions(-)
 delete mode 100644 CHANNELLESS_GREP_IMPLEMENTATION.md
 create mode 100644 TURBOBOOST_OPTIMIZATION.md

diff --git a/CHANNELLESS_GREP_IMPLEMENTATION.md b/CHANNELLESS_GREP_IMPLEMENTATION.md
deleted file mode 100644
index af79d9c..0000000
--- a/CHANNELLESS_GREP_IMPLEMENTATION.md
+++ /dev/null
@@ -1,103 +0,0 @@
-# Channel-less dgrep Implementation
-
-## Overview
-
-This document describes the channel-less implementation of dgrep that was created to address performance bottlenecks caused by channel overhead in the original implementation.
-
-## Problem Statement
-
-The original dgrep implementation used multiple channels in a pipeline:
-- `rawLines chan *bytes.Buffer` (buffer: 100) - Raw lines read from file
-- `lines chan *line.Line` (buffer: 100) - Filtered lines to send to client
-
-This created several performance issues:
-1. Fixed channel buffer sizes causing blocking under high throughput
-2. Context switching overhead between goroutines
-3. Channel synchronization overhead
-4. Memory allocations for channel operations
-
-## Solution
-
-The channel-less implementation replaces the channel pipeline with direct function calls using a `LineProcessor` interface.
-
-### Key Components
-
-1. **LineProcessor Interface** (`internal/io/line/processor.go`)
-   - Defines methods for processing lines without channels
-   - `ProcessLine()` - Handle a single line
-   - `Flush()` - Ensure buffered data is written
-   - `Close()` - Clean up resources
-
-2. **GrepLineProcessor** (`internal/server/handlers/lineprocessor.go`)
-   - Implements LineProcessor for grep operations
-   - Writes directly to the network connection
-   - Uses internal buffering for efficiency (64KB buffer)
-   - Thread-safe with mutex protection
-
-3. **Modified File Reading** (`internal/io/fs/readfile_processor.go`)
-   - `StartWithProcessor()` - Channel-less file reading
-   - Direct callbacks instead of channel sends
-   - Inline regex filtering without goroutines
-
-4. **Optimized File Reading** (`internal/io/fs/readfile_processor_optimized.go`)
-   - Uses buffered line reading instead of byte-by-byte
-   - Custom scanner with 256KB buffer
-   - Efficient handling of long lines
-   - Special optimization for tail mode
-
-### Feature Flags
-
-The implementation can be controlled via environment variables:
-- `DTAIL_CHANNELLESS_GREP=yes` - Enable channel-less grep implementation
-- `DTAIL_OPTIMIZED_READER=yes` - Use optimized buffered reader
-
-### Benefits
-
-1. **Reduced Latency**: No channel queuing delays
-2. **Lower Memory Usage**: No channel buffers
-3. **Better CPU Efficiency**: Fewer context switches
-4. **Simpler Code Flow**: Direct processing without goroutine coordination
-5. **Predictable Performance**: No channel blocking
-
-### Backward Compatibility
-
-- Original channel-based implementation remains available
-- Same command-line interface
-- Protocol compatibility maintained
-- All integration tests pass unchanged
-
-### Performance Testing
-
-Use the provided script to compare performance:
-
-```bash
-./test_channelless_performance.sh
-```
-
-This will test:
-1. Original channel-based implementation
-2. Channel-less implementation
-3. Optimized channel-less implementation
-
-### Usage
-
-To use the channel-less implementation:
-
-```bash
-# Enable channel-less grep
-export DTAIL_CHANNELLESS_GREP=yes
-
-# Also enable optimized reader
-export DTAIL_OPTIMIZED_READER=yes
-
-# Run dgrep normally
-dgrep -regex "pattern" file.log
-```
-
-### Future Improvements
-
-1. Extend channel-less approach to other commands (dcat, dtail)
-2. Add configurable buffer sizes
-3. Implement zero-copy optimizations
-4. Add performance metrics collection
-5. Consider using io_uring on Linux for async I/O
\ No newline at end of file
diff --git a/TURBOBOOST_OPTIMIZATION.md b/TURBOBOOST_OPTIMIZATION.md
new file mode 100644
index 0000000..f13943f
--- /dev/null
+++ b/TURBOBOOST_OPTIMIZATION.md
@@ -0,0 +1,118 @@
+# DTail Turbo Boost Optimization
+
+## Overview
+
+This document describes the turbo boost optimization feature that provides significant performance improvements for DTail operations by using channel-less processing and optimized I/O.
+
+## Problem Statement
+
+The original dgrep implementation used multiple channels in a pipeline:
+- `rawLines chan *bytes.Buffer` (buffer: 100) - Raw lines read from file
+- `lines chan *line.Line` (buffer: 100) - Filtered lines to send to client
+
+This created several performance issues:
+1. Fixed channel buffer sizes causing blocking under high throughput
+2. Context switching overhead between goroutines
+3. Channel synchronization overhead
+4. Memory allocations for channel operations
+
+## Solution
+
+The channel-less implementation replaces the channel pipeline with direct function calls using a `LineProcessor` interface.
+
+### Key Components
+
+1. **LineProcessor Interface** (`internal/io/line/processor.go`)
+   - Defines methods for processing lines without channels
+   - `ProcessLine()` - Handle a single line
+   - `Flush()` - Ensure buffered data is written
+   - `Close()` - Clean up resources
+
+2. **GrepLineProcessor** (`internal/server/handlers/lineprocessor.go`)
+   - Implements LineProcessor for grep operations
+   - Writes directly to the network connection
+   - Uses internal buffering for efficiency (64KB buffer)
+   - Thread-safe with mutex protection
+
+3. **Modified File Reading** (`internal/io/fs/readfile_processor.go`)
+   - `StartWithProcessor()` - Channel-less file reading
+   - Direct callbacks instead of channel sends
+   - Inline regex filtering without goroutines
+
+4. **Optimized File Reading** (`internal/io/fs/readfile_processor_optimized.go`)
+   - Uses buffered line reading instead of byte-by-byte
+   - Custom scanner with 256KB buffer
+   - Efficient handling of long lines
+   - Special optimization for tail mode
+
+### Feature Flags
+
+The implementation can be controlled via environment variables:
+- `DTAIL_CHANNELLESS_GREP=yes` - Enable channel-less grep implementation
+- `DTAIL_OPTIMIZED_READER=yes` - Use optimized buffered reader
+
+### Benefits
+
+1. **Reduced Latency**: No channel queuing delays
+2. **Lower Memory Usage**: No channel buffers
+3. **Better CPU Efficiency**: Fewer context switches
+4. **Simpler Code Flow**: Direct processing without goroutine coordination
+5. **Predictable Performance**: No channel blocking
+
+### Backward Compatibility
+
+- Original channel-based implementation remains available
+- Same command-line interface
+- Protocol compatibility maintained
+- All integration tests pass unchanged
+
+### Performance Testing
+
+Use the provided script to compare performance:
+
+```bash
+./test_channelless_performance.sh
+```
+
+This will test:
+1. Original channel-based implementation
+2. Channel-less implementation
+3. Optimized channel-less implementation
+
+### Usage
+
+To use the channel-less implementation:
+
+```bash
+# Enable channel-less grep
+export DTAIL_CHANNELLESS_GREP=yes
+
+# Also enable optimized reader
+export DTAIL_OPTIMIZED_READER=yes
+
+# Run dgrep normally
+dgrep -regex "pattern" file.log
+```
+
+### Future Improvements
+
+1. Extend channel-less approach to other commands (dcat, dtail)
+2. Add configurable buffer sizes
+3. Implement zero-copy optimizations
+4. Add performance metrics collection
+5. Consider using io_uring on Linux for async I/O
+
+## Usage
+
+To enable turbo boost optimizations:
+
+```bash
+export DTAIL_TURBOBOOST_ENABLE=yes
+```
+
+This enables:
+- Channel-less implementation for grep and cat operations
+- Optimized buffered I/O reader (256KB buffer)
+- Buffer pooling to reduce memory allocations
+
+The turbo boost mode is designed to be extended to other DTail commands in the future.
diff --git a/internal/server/handlers/channelless_adapter.go b/internal/server/handlers/channelless_adapter.go
index 9e5bc9c..a950408 100644
--- a/internal/server/handlers/channelless_adapter.go
+++ b/internal/server/handlers/channelless_adapter.go
@@ -2,10 +2,8 @@ package handlers
 
 import (
 	"bytes"
-	"fmt"
 
 	"github.com/mimecast/dtail/internal/io/line"
-	"github.com/mimecast/dtail/internal/io/pool"
 )
 
 // ChannellessLineProcessor adapts the channel-less processor to work with the existing handler infrastructure
@@ -30,15 +28,9 @@ func (p *ChannellessLineProcessor) ProcessLine(lineContent *bytes.Buffer, lineNu
 	// Create a line object that matches what the original implementation expects
 	l := line.New(lineContent, lineNum, 100, sourceID)
 	
-	// Send through the channel
-	select {
-	case p.lines <- l:
-		return nil
-	default:
-		// Channel full, recycle the buffer
-		pool.RecycleBytesBuffer(lineContent)
-		return fmt.Errorf("lines channel full")
-	}
+	// Send through the channel (blocking to ensure no lines are lost)
+	p.lines <- l
+	return nil
 }
 
 // Flush does nothing for this implementation
diff --git a/internal/server/handlers/readcommand.go b/internal/server/handlers/readcommand.go
index 7a351ba..abdbe9c 100644
--- a/internal/server/handlers/readcommand.go
+++ b/internal/server/handlers/readcommand.go
@@ -166,14 +166,14 @@ func (r *readCommand) read(ctx context.Context, ltx lcontext.LContext,
 		}
 	}
 
-	// Check if we should use the channel-less implementation
-	channellessEnabled := config.Env("DTAIL_CHANNELLESS_GREP")
-	dlog.Server.Info(r.server.user, "Channel-less check: enabled=", channellessEnabled, "mode=", r.mode)
+	// Check if we should use the turbo boost optimizations
+	turboBoostEnabled := config.Env("DTAIL_TURBOBOOST_ENABLE")
+	dlog.Server.Info(r.server.user, "Turbo boost check: enabled=", turboBoostEnabled, "mode=", r.mode)
 	// Only enable channel-less for server mode, not serverless mode
 	// Use the serverless field directly as it's more reliable
-	if channellessEnabled && (r.mode == omode.CatClient || r.mode == omode.GrepClient) && !r.server.serverless {
+	if turboBoostEnabled && (r.mode == omode.CatClient || r.mode == omode.GrepClient) && !r.server.serverless {
 		// Log to stderr for testing verification - only in server mode
-		fmt.Fprintf(os.Stderr, "[DTAIL] Using channel-less implementation for %s\n", path)
+		fmt.Fprintf(os.Stderr, "[DTAIL] Turbo boost enabled: using channel-less implementation for %s\n", path)
 		r.readWithProcessor(ctx, ltx, path, globID, re, reader)
 		return
 	}
@@ -217,13 +217,13 @@ func (r *readCommand) readWithProcessor(ctx context.Context, ltx lcontext.LConte
 	lines := r.server.lines
 	aggregate := r.server.aggregate
 
-	// Use the optimized version if available
-	useOptimized := config.Env("DTAIL_OPTIMIZED_READER")
+	// Use the optimized version if turbo boost is enabled
+	turboBoostEnabled := config.Env("DTAIL_TURBOBOOST_ENABLE")
 	
 	// Log to stderr for testing verification - only in server mode
 	if !r.server.serverless {
-		if useOptimized {
-			fmt.Fprintf(os.Stderr, "[DTAIL] Using optimized reader for %s\n", path)
+		if turboBoostEnabled {
+			fmt.Fprintf(os.Stderr, "[DTAIL] Turbo boost enabled: using optimized reader for %s\n", path)
 		} else {
 			fmt.Fprintf(os.Stderr, "[DTAIL] Using standard processor reader for %s\n", path)
 		}
@@ -240,7 +240,7 @@ func (r *readCommand) readWithProcessor(ctx context.Context, ltx lcontext.LConte
 		defer processor.Close()
 
 		var err error
-		if useOptimized {
+		if turboBoostEnabled {
 			err = reader.StartWithProcessorOptimized(ctx, ltx, processor, re)
 		} else {
 			err = reader.StartWithProcessor(ctx, ltx, processor, re)
-- 
cgit v1.2.3