Add comprehensive benchmarking framework for DTail

- Create benchmark framework to measure performance of dcat, dgrep, and dmap - Generate test files of 10MB, 100MB, and 1GB with configurable patterns - Support benchmarking with gzip and zstd compressed files - Implement tool-specific benchmarks: * DCat: Simple reading, multiple files, compressed files * DGrep: Pattern matching, regex complexity, context lines, inverted grep * DMap: Aggregations, group by operations, complex queries, time intervals - Track performance metrics: throughput (MB/sec), lines/sec, memory usage - Save results in multiple formats: JSON, CSV, and Markdown reports - Add Makefile targets: benchmark, benchmark-quick, benchmark-full - Support environment variables for configuration (sizes, timeouts, etc.) - Automatically clean up temporary .tmp files after benchmarks The framework provides consistent performance testing across the DTail toolset and enables tracking performance regressions between commits. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2025-06-25 23:10:24 +0300
committer: Paul Buetow <paul@buetow.org> 2025-06-25 23:10:24 +0300
commit: 41ec9cf2942edc7be58d78e49a050131bb2faf8c (patch)
tree: a3f9dbd423c120f76e629f06524381476e948e9a /benchmarks/dcat_benchmark_test.go
parent: 281360144171c98641f50e938c439915c9b2580a (diff)
1 files changed, 320 insertions, 0 deletions
diff --git a/benchmarks/dcat_benchmark_test.go b/benchmarks/dcat_benchmark_test.go
new file mode 100644
index 0000000..189cd0b
--- /dev/null
+++ b/benchmarks/dcat_benchmark_test.go
@@ -0,0 +1,320 @@
+package benchmarks
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+// BenchmarkDCatSimple benchmarks simple file reading
+func BenchmarkDCatSimple(b *testing.B) {
+	cleanup := SetupBenchmark(b)
+	defer cleanup()
+	
+	sizes := GetBenchmarkSizes()
+	
+	for _, size := range sizes {
+		b.Run(fmt.Sprintf("Size=%s", size), func(b *testing.B) {
+			// Generate test file
+			config := TestDataConfig{
+				Size:          size,
+				Format:        SimpleLogFormat,
+				Compression:   NoCompression,
+				LineVariation: 50,
+			}
+			
+			testFile := GenerateTestFile(b, config)
+			defer os.Remove(testFile)
+			
+			fileSize, _ := GetFileSize(testFile)
+			lineCount, _ := CountFileLines(testFile)
+			
+			// Warmup
+			WarmupCommand(b, "dcat", "--plain", "--cfg", "none", testFile)
+			
+			b.ResetTimer()
+			
+			// Run benchmark
+			totalDuration := time.Duration(0)
+			for i := 0; i < b.N; i++ {
+				result, err := RunBenchmarkCommand(b, "dcat", "--plain", "--cfg", "none", testFile)
+				if err != nil {
+					b.Fatalf("Command failed: %v", err)
+				}
+				totalDuration += result.Duration
+			}
+			
+			avgDuration := totalDuration / time.Duration(b.N)
+			throughput := CalculateThroughput(fileSize, avgDuration)
+			linesPerSec := CalculateLinesPerSecond(lineCount, avgDuration)
+			
+			// Report metrics
+			b.ReportMetric(throughput, "MB/sec")
+			b.ReportMetric(linesPerSec, "lines/sec")
+			
+			// Save result
+			benchResult := BenchmarkResult{
+				Timestamp:   time.Now(),
+				Tool:        "dcat",
+				Operation:   fmt.Sprintf("Simple_%s", size),
+				FileSize:    fileSize,
+				Duration:    avgDuration,
+				Throughput:  throughput,
+				LinesPerSec: linesPerSec,
+			}
+			SaveResults([]BenchmarkResult{benchResult})
+		})
+	}
+}
+
+// BenchmarkDCatMultipleFiles benchmarks reading multiple files
+func BenchmarkDCatMultipleFiles(b *testing.B) {
+	cleanup := SetupBenchmark(b)
+	defer cleanup()
+	
+	numFiles := []int{10, 50, 100}
+	fileSize := Small / 10 // 1MB each
+	
+	for _, num := range numFiles {
+		b.Run(fmt.Sprintf("Files=%d", num), func(b *testing.B) {
+			// Generate test files
+			var testFiles []string
+			totalSize := int64(0)
+			totalLines := 0
+			
+			for i := 0; i < num; i++ {
+				config := TestDataConfig{
+					Size:          FileSize(fileSize),
+					Format:        SimpleLogFormat,
+					Compression:   NoCompression,
+					LineVariation: 50,
+				}
+				
+				testFile := GenerateTestFile(b, config)
+				testFiles = append(testFiles, testFile)
+				defer os.Remove(testFile)
+				
+				size, _ := GetFileSize(testFile)
+				lines, _ := CountFileLines(testFile)
+				totalSize += size
+				totalLines += lines
+			}
+			
+			// Warmup
+			args := append([]string{"--plain", "--cfg", "none"}, testFiles...)
+			WarmupCommand(b, "dcat", args...)
+			
+			b.ResetTimer()
+			
+			// Run benchmark
+			totalDuration := time.Duration(0)
+			for i := 0; i < b.N; i++ {
+				result, err := RunBenchmarkCommand(b, "dcat", args...)
+				if err != nil {
+					b.Fatalf("Command failed: %v", err)
+				}
+				totalDuration += result.Duration
+			}
+			
+			avgDuration := totalDuration / time.Duration(b.N)
+			throughput := CalculateThroughput(totalSize, avgDuration)
+			linesPerSec := CalculateLinesPerSecond(totalLines, avgDuration)
+			
+			// Report metrics
+			b.ReportMetric(throughput, "MB/sec")
+			b.ReportMetric(linesPerSec, "lines/sec")
+			b.ReportMetric(float64(num), "files")
+			
+			// Save result
+			benchResult := BenchmarkResult{
+				Timestamp:   time.Now(),
+				Tool:        "dcat",
+				Operation:   fmt.Sprintf("MultiFile_%d", num),
+				FileSize:    totalSize,
+				Duration:    avgDuration,
+				Throughput:  throughput,
+				LinesPerSec: linesPerSec,
+			}
+			SaveResults([]BenchmarkResult{benchResult})
+		})
+	}
+}
+
+// BenchmarkDCatCompressed benchmarks reading compressed files
+func BenchmarkDCatCompressed(b *testing.B) {
+	cleanup := SetupBenchmark(b)
+	defer cleanup()
+	
+	compressions := []struct {
+		name string
+		typ  CompressionType
+	}{
+		{"none", NoCompression},
+		{"gzip", GzipCompression},
+		{"zstd", ZstdCompression},
+	}
+	
+	sizes := GetBenchmarkSizes()
+	if IsQuickMode() {
+		sizes = []FileSize{Small}
+	}
+	
+	for _, size := range sizes {
+		for _, comp := range compressions {
+			b.Run(fmt.Sprintf("Size=%s/Compression=%s", size, comp.name), func(b *testing.B) {
+				// Generate test file
+				config := TestDataConfig{
+					Size:          size,
+					Format:        SimpleLogFormat,
+					Compression:   comp.typ,
+					LineVariation: 50,
+				}
+				
+				testFile := GenerateTestFile(b, config)
+				defer os.Remove(testFile)
+				
+				// Get uncompressed size for throughput calculation
+				uncompressedSize := int64(size)
+				compressedSize, _ := GetFileSize(testFile)
+				compressionRatio := float64(uncompressedSize) / float64(compressedSize)
+				
+				// Estimate line count (compressed files are harder to count)
+				approxLineCount := int(size) / 150
+				
+				// Warmup
+				WarmupCommand(b, "dcat", "--plain", "--cfg", "none", testFile)
+				
+				b.ResetTimer()
+				
+				// Run benchmark
+				totalDuration := time.Duration(0)
+				for i := 0; i < b.N; i++ {
+					result, err := RunBenchmarkCommand(b, "dcat", "--plain", "--cfg", "none", testFile)
+					if err != nil {
+						b.Fatalf("Command failed: %v", err)
+					}
+					totalDuration += result.Duration
+				}
+				
+				avgDuration := totalDuration / time.Duration(b.N)
+				// Throughput based on uncompressed size
+				throughput := CalculateThroughput(uncompressedSize, avgDuration)
+				linesPerSec := CalculateLinesPerSecond(approxLineCount, avgDuration)
+				
+				// Report metrics
+				b.ReportMetric(throughput, "MB/sec")
+				b.ReportMetric(linesPerSec, "lines/sec")
+				b.ReportMetric(compressionRatio, "compression_ratio")
+				
+				// Save result
+				benchResult := BenchmarkResult{
+					Timestamp:   time.Now(),
+					Tool:        "dcat",
+					Operation:   fmt.Sprintf("Compressed_%s_%s", comp.name, size),
+					FileSize:    uncompressedSize,
+					Duration:    avgDuration,
+					Throughput:  throughput,
+					LinesPerSec: linesPerSec,
+				}
+				SaveResults([]BenchmarkResult{benchResult})
+			})
+		}
+	}
+}
+
+// BenchmarkDCatServerMode benchmarks server mode vs serverless
+func BenchmarkDCatServerMode(b *testing.B) {
+	cleanup := SetupBenchmark(b)
+	defer cleanup()
+	
+	// Skip if dserver binary doesn't exist
+	dserverPath := filepath.Join("..", "dserver")
+	if _, err := os.Stat(dserverPath); err != nil {
+		b.Skip("dserver binary not found, skipping server mode benchmarks")
+	}
+	
+	modes := []struct {
+		name   string
+		server bool
+	}{
+		{"serverless", false},
+		{"server", true},
+	}
+	
+	sizes := GetBenchmarkSizes()
+	if IsQuickMode() {
+		sizes = []FileSize{Small}
+	}
+	
+	for _, size := range sizes {
+		for _, mode := range modes {
+			b.Run(fmt.Sprintf("Size=%s/Mode=%s", size, mode.name), func(b *testing.B) {
+				// Generate test file
+				config := TestDataConfig{
+					Size:          size,
+					Format:        SimpleLogFormat,
+					Compression:   NoCompression,
+					LineVariation: 50,
+				}
+				
+				testFile := GenerateTestFile(b, config)
+				defer os.Remove(testFile)
+				
+				fileSize, _ := GetFileSize(testFile)
+				lineCount, _ := CountFileLines(testFile)
+				
+				var args []string
+				
+				if mode.server {
+					// Start dserver
+					// Note: In a real implementation, we'd need to:
+					// 1. Start dserver in background
+					// 2. Wait for it to be ready
+					// 3. Run dcat with --servers flag
+					// 4. Stop dserver after benchmark
+					// For now, we'll skip the actual server mode implementation
+					b.Skip("Server mode benchmarking requires additional setup")
+				} else {
+					args = []string{"--plain", "--cfg", "none", testFile}
+				}
+				
+				// Warmup
+				WarmupCommand(b, "dcat", args...)
+				
+				b.ResetTimer()
+				
+				// Run benchmark
+				totalDuration := time.Duration(0)
+				for i := 0; i < b.N; i++ {
+					result, err := RunBenchmarkCommand(b, "dcat", args...)
+					if err != nil {
+						b.Fatalf("Command failed: %v", err)
+					}
+					totalDuration += result.Duration
+				}
+				
+				avgDuration := totalDuration / time.Duration(b.N)
+				throughput := CalculateThroughput(fileSize, avgDuration)
+				linesPerSec := CalculateLinesPerSecond(lineCount, avgDuration)
+				
+				// Report metrics
+				b.ReportMetric(throughput, "MB/sec")
+				b.ReportMetric(linesPerSec, "lines/sec")
+				
+				// Save result
+				benchResult := BenchmarkResult{
+					Timestamp:   time.Now(),
+					Tool:        "dcat",
+					Operation:   fmt.Sprintf("%s_%s", mode.name, size),
+					FileSize:    fileSize,
+					Duration:    avgDuration,
+					Throughput:  throughput,
+					LinesPerSec: linesPerSec,
+				}
+				SaveResults([]BenchmarkResult{benchResult})
+			})
+		}
+	}
+}
author	Paul Buetow <paul@buetow.org>	2025-06-25 23:10:24 +0300
committer	Paul Buetow <paul@buetow.org>	2025-06-25 23:10:24 +0300
commit	41ec9cf2942edc7be58d78e49a050131bb2faf8c (patch)
tree	a3f9dbd423c120f76e629f06524381476e948e9a /benchmarks/dcat_benchmark_test.go
parent	281360144171c98641f50e938c439915c9b2580a (diff)