summaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 13:49:38 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 13:49:38 +0300
commit6664996ced62c77e0c62bc1619662cbed7fccff6 (patch)
treeb995d8aa34aa68ec8f97c4be417ac96e6c6abf48 /benchmarks
parent72828b8c5f575cfc7c7c27c5a5d3b7fd9225b625 (diff)
feat: add profiling framework with command echoing
Created a comprehensive profiling framework for dtail commands (dcat, dgrep, dmap) to analyze CPU usage and memory allocations. The framework now prints all executed commands to stdout for full transparency. Key features: - Integrated Go profiling (CPU, memory, allocations) into all three commands - Created profile.sh bash script for analyzing pprof profiles - Added multiple Makefile targets for different profiling scenarios - Automated profiling scripts with command echoing - Support for different data sizes (quick, normal, full) - Special handling for dmap MapReduce format All profiling commands are now echoed to stdout before execution, making it easy to understand what the framework is doing and reproduce commands manually. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/PROFILING.md372
-rw-r--r--benchmarks/README.md92
-rwxr-xr-xbenchmarks/benchmark.sh318
-rw-r--r--benchmarks/generate_profile_data.go159
-rwxr-xr-xbenchmarks/profile_benchmarks.sh152
-rwxr-xr-xbenchmarks/profile_dmap.sh121
-rw-r--r--benchmarks/profile_example.go307
-rwxr-xr-xbenchmarks/profile_quick.sh86
-rw-r--r--benchmarks/profile_runner.go233
9 files changed, 1840 insertions, 0 deletions
diff --git a/benchmarks/PROFILING.md b/benchmarks/PROFILING.md
new file mode 100644
index 0000000..04ed933
--- /dev/null
+++ b/benchmarks/PROFILING.md
@@ -0,0 +1,372 @@
+# DTail Profiling Framework
+
+This document describes the profiling framework for dtail commands (dcat, dgrep, dmap) to analyze CPU usage and memory allocations.
+
+## Overview
+
+The profiling framework provides:
+- CPU profiling to identify performance bottlenecks
+- Memory profiling to track allocations and detect leaks
+- Integration with existing benchmarks
+- Analysis tools for profile interpretation
+
+## Quick Start
+
+### 1. Build the Tools
+
+```bash
+make build # Builds all tools including dprofile
+```
+
+### 2. Run Commands with Profiling
+
+Each command now supports profiling flags:
+
+```bash
+# Profile dcat
+./dcat -profile -profiledir profiles -plain -cfg none /path/to/file.log
+
+# Profile dgrep with specific profiling types
+./dgrep -cpuprofile -memprofile -profiledir profiles -regex "error" /path/to/file.log
+
+# Profile dmap
+./dmap -profile -query "select count(*) from data.csv"
+```
+
+### 3. Analyze Profiles
+
+Use the included `profile.sh` script for quick analysis:
+
+```bash
+# Analyze CPU profile
+./profiling/profile.sh profiles/dcat_cpu_20240101_120000.prof
+
+# Show top 20 functions
+./profiling/profile.sh -top 20 profiles/dgrep_mem_20240101_120000.prof
+
+# Sort by cumulative time/allocations
+./profiling/profile.sh -cum profiles/dmap_cpu_20240101_120000.prof
+
+# List all profiles
+./profiling/profile.sh -list profiles/
+
+# Open web browser with flame graph
+./profiling/profile.sh -web profiles/dcat_cpu_*.prof
+```
+
+## Profiling Options
+
+### Command-line Flags
+
+All dtail commands support these profiling flags:
+
+- `-cpuprofile`: Enable CPU profiling only
+- `-memprofile`: Enable memory profiling only
+- `-profile`: Enable both CPU and memory profiling
+- `-profiledir <dir>`: Directory to store profiles (default: "profiles")
+
+### Profile Types
+
+1. **CPU Profile** (`*_cpu_*.prof`)
+ - Samples CPU usage during execution
+ - Identifies hot functions and code paths
+ - Useful for optimizing computational bottlenecks
+
+2. **Memory Profile** (`*_mem_*.prof`)
+ - Captures heap allocations at end of execution
+ - Shows memory usage by function
+ - Helps identify memory leaks
+
+3. **Allocation Profile** (`*_alloc_*.prof`)
+ - Tracks all allocations during execution
+ - More detailed than memory profile
+ - Useful for reducing allocation pressure
+
+## Using with Benchmarks
+
+### Automated Profiling Script
+
+Run the included profiling script:
+
+```bash
+cd benchmarks
+./profile_benchmarks.sh
+```
+
+This script:
+- Generates test data of various sizes
+- Profiles dcat and dgrep with different workloads
+- Stores profiles in the `profiles` directory
+- Provides analysis commands
+
+For dmap profiling (requires MapReduce format):
+```bash
+cd benchmarks
+./profile_dmap.sh
+```
+
+### Using Make Targets
+
+```bash
+# Quick profiling with immediate results
+make profile-quick
+
+# Profile individual commands
+make profile-dcat
+make profile-dgrep
+make profile-dmap # Uses MapReduce format
+
+# Full automated profiling
+make profile-auto
+```
+
+### Benchmark Integration
+
+Run profiling-enabled benchmarks:
+
+```bash
+cd benchmarks
+go test -bench="WithProfiling" -benchtime=1x
+```
+
+### Custom Profile Runner
+
+Use the profile runner in your benchmarks:
+
+```go
+import "github.com/mimecast/dtail/benchmarks"
+
+func BenchmarkMyFeature(b *testing.B) {
+ benchmarks.ProfileBenchmark(b, "MyFeature", "dcat",
+ "--plain", "--cfg", "none", "testfile.log")
+}
+```
+
+## Profile Analysis
+
+### Using go tool pprof
+
+For interactive analysis:
+
+```bash
+# Interactive mode
+go tool pprof profiles/dcat_cpu_*.prof
+
+# Common pprof commands:
+# top - Show top functions
+# list func - Show source code for function
+# web - Generate SVG graph
+# peek func - Show callers/callees of function
+```
+
+Generate visualizations:
+
+```bash
+# Flame graph (requires graphviz)
+go tool pprof -http=:8080 profiles/dcat_cpu_*.prof
+
+# Generate SVG
+go tool pprof -svg profiles/dgrep_mem_*.prof > profile.svg
+
+# Generate text report
+go tool pprof -text profiles/dmap_alloc_*.prof > report.txt
+```
+
+### Using profile.sh
+
+The `profile.sh` script provides quick summaries:
+
+```bash
+# List all profiles
+./profiling/profile.sh -list profiles/
+
+# Analyze specific profile
+./profiling/profile.sh profiles/dcat_cpu_20240101_120000.prof
+
+# Get help
+./profiling/profile.sh -help
+```
+
+## Optimization Workflow
+
+1. **Baseline Performance**
+ ```bash
+ # Run benchmarks without profiling
+ cd benchmarks
+ go test -bench="BenchmarkDCat" -benchtime=10s
+ ```
+
+2. **Profile Execution**
+ ```bash
+ # Run with profiling
+ ./dcat -profile -profiledir profiles large_file.log
+ ```
+
+3. **Identify Bottlenecks**
+ ```bash
+ # Analyze CPU profile
+ ./dprofile -profile profiles/dcat_cpu_*.prof -top 10
+
+ # Check memory allocations
+ go tool pprof -alloc_space profiles/dcat_alloc_*.prof
+ ```
+
+4. **Optimize Code**
+ - Focus on functions with high Flat% (direct CPU usage)
+ - Reduce allocations in hot paths
+ - Consider buffering and pooling
+
+5. **Verify Improvements**
+ ```bash
+ # Re-run benchmarks after optimization
+ go test -bench="BenchmarkDCat" -benchtime=10s
+ ```
+
+## Common Performance Issues
+
+### CPU Bottlenecks
+
+Look for:
+- Regex compilation in loops
+- Excessive string operations
+- Inefficient algorithms (O(n²) or worse)
+- Unnecessary type conversions
+
+Example optimization:
+```go
+// Before: Regex compiled every time
+for _, line := range lines {
+ if regexp.MustCompile(pattern).MatchString(line) {
+ // ...
+ }
+}
+
+// After: Compile once
+re := regexp.MustCompile(pattern)
+for _, line := range lines {
+ if re.MatchString(line) {
+ // ...
+ }
+}
+```
+
+### Memory Issues
+
+Common patterns:
+- String concatenation in loops
+- Large temporary slices
+- Unclosed resources
+- Excessive goroutines
+
+Example optimization:
+```go
+// Before: Many allocations
+result := ""
+for _, s := range strings {
+ result += s + "\n"
+}
+
+// After: Single allocation
+var buf strings.Builder
+buf.Grow(estimatedSize)
+for _, s := range strings {
+ buf.WriteString(s)
+ buf.WriteByte('\n')
+}
+result := buf.String()
+```
+
+## Tips and Best Practices
+
+1. **Profile Real Workloads**
+ - Use production-like data sizes
+ - Test with actual file formats
+ - Include network operations if relevant
+
+2. **Compare Profiles**
+ ```bash
+ # Compare before/after optimization
+ go tool pprof -diff_base=before.prof after.prof
+ ```
+
+3. **Focus on Hot Paths**
+ - Optimize functions with >5% CPU usage first
+ - Small improvements in hot paths have big impact
+
+4. **Memory Profiling**
+ - Use `-alloc_space` for total allocations
+ - Use `-inuse_space` for current heap usage
+ - Check for growing heap over time
+
+5. **Benchmark Regularly**
+ - Add profiling to CI/CD pipeline
+ - Track performance over releases
+ - Set performance regression alerts
+
+## Troubleshooting
+
+### No profiles generated
+- Check write permissions for profile directory
+- Ensure command completes successfully
+- Verify profiling flags are correct
+
+### Empty or small profiles
+- Run command with larger workload
+- Increase execution time
+- Check if command exits too quickly
+
+### Analysis tools fail
+- Ensure profile format is valid
+- Check Go version compatibility
+- Verify graphviz is installed for visualizations
+
+## Advanced Usage
+
+### Custom Profiling Points
+
+Add profiling snapshots in code:
+
+```go
+import "github.com/mimecast/dtail/internal/profiling"
+
+func processLargeFile() {
+ profiler := profiling.GetProfiler() // Assumes global profiler
+
+ // Take memory snapshot before processing
+ profiler.Snapshot("before_processing")
+
+ // ... process file ...
+
+ // Take snapshot after
+ profiler.Snapshot("after_processing")
+}
+```
+
+### Continuous Profiling
+
+For long-running operations:
+
+```go
+// Start periodic metrics logging
+ticker := time.NewTicker(30 * time.Second)
+go func() {
+ for range ticker.C {
+ profiler.LogMetrics("periodic")
+ }
+}()
+defer ticker.Stop()
+```
+
+## Contributing
+
+When adding new features:
+1. Include benchmark tests
+2. Run profiling before submitting PR
+3. Document any performance implications
+4. Add profiling examples for new commands
+
+## References
+
+- [Go Profiling Documentation](https://go.dev/blog/pprof)
+- [pprof Tool Guide](https://github.com/google/pprof)
+- [Go Performance Tips](https://go.dev/wiki/Performance) \ No newline at end of file
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 0b030d4..dfb2627 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -31,6 +31,36 @@ go test -bench=BenchmarkQuick ./benchmarks
go test -bench=. ./benchmarks
```
+### Creating Baseline Snapshots
+```bash
+# Create a baseline before making changes (will prompt for name)
+make benchmark-baseline
+
+# Create a quick baseline (small files only, will prompt for name)
+make benchmark-baseline-quick
+
+# Create a baseline with a descriptive tag (no prompt)
+./benchmarks/benchmark.sh baseline --tag "before-optimization"
+
+# Create a baseline interactively (will prompt if no tag provided)
+./benchmarks/benchmark.sh baseline
+
+# Create a comprehensive baseline (3x iterations)
+./benchmarks/benchmark.sh full-baseline --tag "v1.0-release"
+```
+
+### Comparing Performance
+```bash
+# Compare with a specific baseline using make
+make benchmark-compare BASELINE=benchmarks/baselines/baseline_20240125_143022.txt
+
+# Use the benchmark script for more options
+./benchmarks/benchmark.sh compare benchmarks/baselines/baseline_20240125_143022.txt
+
+# List available baselines
+./benchmarks/benchmark.sh list
+```
+
### Specific Tool Benchmarks
```bash
# DCat benchmarks only
@@ -130,6 +160,68 @@ Benchmarks create large temporary files. Ensure sufficient disk space (>2GB).
### Timeout errors
Increase timeout: `DTAIL_BENCH_TIMEOUT=60m go test -bench=. ./benchmarks`
+## Baseline Management
+
+The benchmarking framework includes tools for creating and comparing performance baselines:
+
+### Creating Baselines
+Baselines capture the complete benchmark output including:
+- Git commit hash
+- Timestamp
+- All benchmark results with timing and memory allocation data
+- Descriptive names for easy identification
+
+The system will prompt for a meaningful baseline name to ensure proper documentation:
+
+```bash
+# Simple baseline (prompts for name)
+make benchmark-baseline
+> Enter a descriptive name for this baseline: before-cache-optimization
+
+# Quick baseline for rapid testing (prompts for name)
+make benchmark-baseline-quick
+> Enter a descriptive name for this baseline: initial-performance-check
+
+# Tagged baseline with description (no prompt)
+./benchmarks/benchmark.sh baseline --tag "before-refactoring"
+
+# Full baseline with multiple iterations
+./benchmarks/benchmark.sh full-baseline --memory --tag "release-v2.0"
+```
+
+Baseline files are named with the pattern:
+`baseline_YYYYMMDD_HHMMSS_descriptive-name.txt`
+
+### Comparing Performance
+Compare current performance against a baseline to detect regressions or improvements:
+
+```bash
+# Using make
+make benchmark-compare BASELINE=benchmarks/baselines/baseline_20240125_143022.txt
+
+# Using benchmark script (provides benchstat analysis if available)
+./benchmarks/benchmark.sh compare benchmarks/baselines/baseline_20240125_143022.txt
+```
+
+### Managing Baselines
+```bash
+# List all baselines
+./benchmarks/benchmark.sh list
+
+# View a specific baseline
+./benchmarks/benchmark.sh show benchmarks/baselines/baseline_20240125_143022.txt
+
+# Clean old baselines (keeps last 10)
+./benchmarks/benchmark.sh clean
+```
+
+### Best Practices for Baselines
+1. Create a baseline before starting optimization work
+2. Tag baselines with descriptive names (e.g., "before-cache-impl", "v1.0-release")
+3. Use full baselines for release comparisons
+4. Commit important baseline files to version control for team reference
+5. Run benchmarks on consistent hardware for accurate comparisons
+
## Contributing
When adding new benchmarks:
diff --git a/benchmarks/benchmark.sh b/benchmarks/benchmark.sh
new file mode 100755
index 0000000..1b4a71f
--- /dev/null
+++ b/benchmarks/benchmark.sh
@@ -0,0 +1,318 @@
+#!/bin/bash
+# Benchmark management script for DTail
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BASELINES_DIR="${SCRIPT_DIR}/baselines"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Function to print usage
+usage() {
+ cat << EOF
+DTail Benchmark Management Tool
+
+Usage: $0 [command] [options]
+
+Commands:
+ baseline Create a new baseline snapshot
+ compare [baseline] Compare current performance against a baseline
+ list List available baselines
+ show [baseline] Display a baseline file
+ clean Remove old baselines (keeps last 10)
+ full-baseline Create a comprehensive baseline (all benchmarks, 3x iterations)
+
+Options:
+ -o, --output FILE Save benchmark output to custom file
+ -t, --tag TAG Add a descriptive tag to baseline filename
+ -q, --quick Run quick benchmarks only
+ -m, --memory Include memory profiling
+ -c, --cpu-profile Generate CPU profile
+ -v, --verbose Show detailed output
+
+Examples:
+ # Create a baseline before optimization
+ $0 baseline --tag "before-optimization"
+
+ # Compare current performance with baseline
+ $0 compare benchmarks/baselines/baseline_20240125_143022_before-optimization.txt
+
+ # Create full baseline with memory stats
+ $0 full-baseline --memory --tag "v1.0-release"
+
+EOF
+}
+
+# Function to ensure baselines directory exists
+ensure_baselines_dir() {
+ mkdir -p "$BASELINES_DIR"
+}
+
+# Function to create baseline
+create_baseline() {
+ local tag=""
+ local bench_args="-bench=."
+ local output_file=""
+ local memory_profile=""
+
+ # Parse arguments
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ -t|--tag)
+ tag="_$2"
+ shift 2
+ ;;
+ -q|--quick)
+ bench_args="-bench=BenchmarkQuick"
+ shift
+ ;;
+ -m|--memory)
+ memory_profile="-benchmem"
+ shift
+ ;;
+ -o|--output)
+ output_file="$2"
+ shift 2
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ # If no tag provided, ask for one
+ if [ -z "$tag" ]; then
+ echo -e "${YELLOW}Creating benchmark baseline...${NC}"
+ read -p "Enter a descriptive name for this baseline (e.g. 'before-optimization', 'v1.0-release'): " tag_input
+ if [ -z "$tag_input" ]; then
+ echo -e "${RED}Error: Baseline name cannot be empty${NC}"
+ exit 1
+ fi
+ # Clean the tag input
+ tag="_$(echo "$tag_input" | tr ' ' '_' | tr -cd '[:alnum:]._-')"
+ fi
+
+ ensure_baselines_dir
+
+ if [ -z "$output_file" ]; then
+ output_file="${BASELINES_DIR}/baseline_${TIMESTAMP}${tag}.txt"
+ fi
+
+ echo -e "${GREEN}Creating baseline: ${output_file}${NC}"
+ echo "Git commit: $(git rev-parse --short HEAD)" > "$output_file"
+ echo "Date: $(date)" >> "$output_file"
+ echo "Tag: ${tag#_}" >> "$output_file"
+ echo "----------------------------------------" >> "$output_file"
+
+ cd "$SCRIPT_DIR/.."
+ make build
+ go test $bench_args $memory_profile ./benchmarks -count=1 | tee -a "$output_file"
+
+ echo -e "${GREEN}Baseline created: ${output_file}${NC}"
+}
+
+# Function to create full baseline
+create_full_baseline() {
+ local tag=""
+ local memory_profile=""
+
+ # Parse arguments
+ while [[ $# -gt 0 ]]; do
+ case $1 in
+ -t|--tag)
+ tag="_$2"
+ shift 2
+ ;;
+ -m|--memory)
+ memory_profile="-benchmem"
+ shift
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ # If no tag provided, ask for one
+ if [ -z "$tag" ]; then
+ echo -e "${YELLOW}Creating comprehensive benchmark baseline...${NC}"
+ read -p "Enter a descriptive name for this baseline (e.g. 'before-optimization', 'v1.0-release'): " tag_input
+ if [ -z "$tag_input" ]; then
+ echo -e "${RED}Error: Baseline name cannot be empty${NC}"
+ exit 1
+ fi
+ # Clean the tag input
+ tag="_$(echo "$tag_input" | tr ' ' '_' | tr -cd '[:alnum:]._-')"
+ fi
+
+ ensure_baselines_dir
+
+ local output_file="${BASELINES_DIR}/baseline_${TIMESTAMP}${tag}_full.txt"
+
+ echo -e "${GREEN}Creating comprehensive baseline: ${output_file}${NC}"
+ echo "Git commit: $(git rev-parse --short HEAD)" > "$output_file"
+ echo "Date: $(date)" >> "$output_file"
+ echo "Tag: ${tag#_} (full)" >> "$output_file"
+ echo "----------------------------------------" >> "$output_file"
+
+ cd "$SCRIPT_DIR/.."
+ make build
+
+ # Run with multiple iterations for stability
+ go test -bench=. $memory_profile -benchtime=3x ./benchmarks -count=1 | tee -a "$output_file"
+
+ echo -e "${GREEN}Full baseline created: ${output_file}${NC}"
+}
+
+# Function to compare with baseline
+compare_baseline() {
+ local baseline_file="$1"
+
+ if [ -z "$baseline_file" ]; then
+ echo -e "${RED}Error: No baseline file specified${NC}"
+ echo "Available baselines:"
+ list_baselines
+ exit 1
+ fi
+
+ if [ ! -f "$baseline_file" ]; then
+ echo -e "${RED}Error: Baseline file not found: $baseline_file${NC}"
+ exit 1
+ fi
+
+ ensure_baselines_dir
+ local current_file="${BASELINES_DIR}/current_${TIMESTAMP}.txt"
+
+ echo -e "${YELLOW}Running current benchmarks...${NC}"
+ echo "Git commit: $(git rev-parse --short HEAD)" > "$current_file"
+ echo "Date: $(date)" >> "$current_file"
+ echo "----------------------------------------" >> "$current_file"
+
+ cd "$SCRIPT_DIR/.."
+ make build
+ go test -bench=. -benchmem ./benchmarks -count=1 | tee -a "$current_file"
+
+ echo -e "\n${YELLOW}=== Performance Comparison ===${NC}"
+
+ # Use benchstat if available
+ if command -v benchstat >/dev/null 2>&1; then
+ benchstat "$baseline_file" "$current_file"
+ else
+ echo -e "${YELLOW}benchstat not found. Install with:${NC}"
+ echo " go install golang.org/x/perf/cmd/benchstat@latest"
+ echo -e "\n${YELLOW}Showing simple comparison:${NC}"
+
+ # Extract benchmark results for comparison
+ echo -e "\nBaseline ($(basename "$baseline_file")):"
+ grep "^Benchmark" "$baseline_file" | head -10
+
+ echo -e "\nCurrent:"
+ grep "^Benchmark" "$current_file" | head -10
+ fi
+
+ # Save comparison report
+ local report_file="${BASELINES_DIR}/comparison_${TIMESTAMP}.txt"
+ {
+ echo "Comparison Report"
+ echo "================"
+ echo "Baseline: $baseline_file"
+ echo "Current: $current_file"
+ echo "Date: $(date)"
+ echo ""
+ if command -v benchstat >/dev/null 2>&1; then
+ benchstat "$baseline_file" "$current_file"
+ else
+ diff -u "$baseline_file" "$current_file" || true
+ fi
+ } > "$report_file"
+
+ echo -e "\n${GREEN}Comparison report saved: $report_file${NC}"
+}
+
+# Function to list baselines
+list_baselines() {
+ ensure_baselines_dir
+
+ echo -e "${YELLOW}Available baselines:${NC}"
+ if [ -d "$BASELINES_DIR" ]; then
+ ls -la "$BASELINES_DIR"/*.txt 2>/dev/null | awk '{print $9, $6, $7, $8}' | column -t || echo "No baselines found"
+ else
+ echo "No baselines found"
+ fi
+}
+
+# Function to show baseline content
+show_baseline() {
+ local baseline_file="$1"
+
+ if [ -z "$baseline_file" ]; then
+ echo -e "${RED}Error: No baseline file specified${NC}"
+ list_baselines
+ exit 1
+ fi
+
+ if [ ! -f "$baseline_file" ]; then
+ echo -e "${RED}Error: Baseline file not found: $baseline_file${NC}"
+ exit 1
+ fi
+
+ less "$baseline_file"
+}
+
+# Function to clean old baselines
+clean_baselines() {
+ ensure_baselines_dir
+
+ echo -e "${YELLOW}Cleaning old baselines (keeping last 10)...${NC}"
+
+ # Count files
+ local file_count=$(ls -1 "$BASELINES_DIR"/*.txt 2>/dev/null | wc -l)
+
+ if [ "$file_count" -gt 10 ]; then
+ # Remove oldest files, keeping last 10
+ ls -t "$BASELINES_DIR"/*.txt | tail -n +11 | xargs rm -v
+ echo -e "${GREEN}Cleanup complete${NC}"
+ else
+ echo "No cleanup needed (only $file_count baselines found)"
+ fi
+}
+
+# Main command handling
+case "${1:-}" in
+ baseline)
+ shift
+ create_baseline "$@"
+ ;;
+ full-baseline)
+ shift
+ create_full_baseline "$@"
+ ;;
+ compare)
+ shift
+ compare_baseline "$@"
+ ;;
+ list)
+ list_baselines
+ ;;
+ show)
+ shift
+ show_baseline "$@"
+ ;;
+ clean)
+ clean_baselines
+ ;;
+ -h|--help|help)
+ usage
+ ;;
+ *)
+ echo -e "${RED}Error: Unknown command '${1:-}'${NC}"
+ usage
+ exit 1
+ ;;
+esac \ No newline at end of file
diff --git a/benchmarks/generate_profile_data.go b/benchmarks/generate_profile_data.go
new file mode 100644
index 0000000..0b34047
--- /dev/null
+++ b/benchmarks/generate_profile_data.go
@@ -0,0 +1,159 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "math/rand"
+ "os"
+ "strconv"
+ "strings"
+ "time"
+)
+
+func main() {
+ var (
+ size string
+ output string
+ format string
+ )
+
+ flag.StringVar(&size, "size", "10MB", "Size of the file (e.g., 10MB, 100MB, 1GB)")
+ flag.StringVar(&output, "output", "test.log", "Output file path")
+ flag.StringVar(&format, "format", "log", "Format: log or csv")
+ flag.Parse()
+
+ // Parse size
+ sizeBytes, err := parseSize(size)
+ if err != nil {
+ log.Fatalf("Invalid size: %v", err)
+ }
+
+ // Generate data
+ switch format {
+ case "log":
+ generateLogFile(output, sizeBytes)
+ case "csv":
+ generateCSVFile(output, sizeBytes)
+ default:
+ log.Fatalf("Unknown format: %s", format)
+ }
+
+ fmt.Printf("Generated %s file: %s\n", size, output)
+}
+
+func parseSize(size string) (int64, error) {
+ size = strings.ToUpper(size)
+ multiplier := int64(1)
+
+ if strings.HasSuffix(size, "GB") {
+ multiplier = 1024 * 1024 * 1024
+ size = strings.TrimSuffix(size, "GB")
+ } else if strings.HasSuffix(size, "MB") {
+ multiplier = 1024 * 1024
+ size = strings.TrimSuffix(size, "MB")
+ } else if strings.HasSuffix(size, "KB") {
+ multiplier = 1024
+ size = strings.TrimSuffix(size, "KB")
+ }
+
+ base, err := strconv.ParseInt(size, 10, 64)
+ if err != nil {
+ return 0, err
+ }
+
+ return base * multiplier, nil
+}
+
+func generateLogFile(filename string, targetSize int64) {
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Sample log lines
+ logLevels := []string{"INFO", "WARN", "ERROR", "DEBUG"}
+ actions := []string{
+ "Processing request",
+ "Handling connection",
+ "Executing query",
+ "Loading configuration",
+ "Updating cache",
+ "Validating input",
+ "Sending response",
+ "Checking permissions",
+ }
+
+ bytesWritten := int64(0)
+ lineNum := 0
+ startTime := time.Now()
+
+ for bytesWritten < targetSize {
+ lineNum++
+ timestamp := startTime.Add(time.Duration(lineNum) * time.Millisecond).Format("2006-01-02 15:04:05.000")
+ level := logLevels[rand.Intn(len(logLevels))]
+ action := actions[rand.Intn(len(actions))]
+ userID := rand.Intn(1000)
+ requestID := fmt.Sprintf("req-%d", lineNum)
+ duration := rand.Intn(5000)
+
+ line := fmt.Sprintf("[%s] %s - %s for user%d (request: %s, duration: %dms)\n",
+ timestamp, level, action, userID, requestID, duration)
+
+ n, err := f.WriteString(line)
+ if err != nil {
+ log.Fatal(err)
+ }
+ bytesWritten += int64(n)
+
+ // Add some variety with stack traces for errors
+ if level == "ERROR" && rand.Float32() < 0.3 {
+ stackTrace := fmt.Sprintf(" Stack trace:\n at function1() file1.go:123\n at function2() file2.go:456\n at main() main.go:789\n")
+ n, err := f.WriteString(stackTrace)
+ if err != nil {
+ log.Fatal(err)
+ }
+ bytesWritten += int64(n)
+ }
+ }
+}
+
+func generateCSVFile(filename string, targetSize int64) {
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Write header
+ header := "timestamp,user,action,duration,status,category\n"
+ f.WriteString(header)
+ bytesWritten := int64(len(header))
+
+ actions := []string{"login", "query", "update", "delete", "logout", "search", "export", "import"}
+ statuses := []string{"success", "failure", "timeout", "pending"}
+ categories := []string{"web", "api", "batch", "admin"}
+
+ lineNum := 0
+ startTime := time.Now()
+
+ for bytesWritten < targetSize {
+ lineNum++
+ timestamp := startTime.Add(time.Duration(lineNum) * time.Second).Format("2006-01-02 15:04:05")
+ user := fmt.Sprintf("user%d", rand.Intn(100))
+ action := actions[rand.Intn(len(actions))]
+ duration := 100 + rand.Intn(9900)
+ status := statuses[rand.Intn(len(statuses))]
+ category := categories[rand.Intn(len(categories))]
+
+ line := fmt.Sprintf("%s,%s,%s,%d,%s,%s\n",
+ timestamp, user, action, duration, status, category)
+
+ n, err := f.WriteString(line)
+ if err != nil {
+ log.Fatal(err)
+ }
+ bytesWritten += int64(n)
+ }
+} \ No newline at end of file
diff --git a/benchmarks/profile_benchmarks.sh b/benchmarks/profile_benchmarks.sh
new file mode 100755
index 0000000..a78182d
--- /dev/null
+++ b/benchmarks/profile_benchmarks.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+# Profile benchmarks script for dtail commands
+# This script runs profiling on dcat, dgrep, and dmap with various workloads
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Colors for output
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+# Default values
+PROFILE_DIR="${PROFILE_DIR:-profiles}"
+TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}"
+PROFILE_RUNS=3
+
+# Create directories
+mkdir -p "$PROFILE_DIR"
+mkdir -p "$TEST_DATA_DIR"
+
+echo -e "${GREEN}DTail Profiling Framework${NC}"
+echo "=========================="
+echo
+
+# Function to generate test data
+generate_test_data() {
+ local size=$1
+ local filename=$2
+
+ if [ ! -f "$filename" ]; then
+ echo -e "${YELLOW}Generating test data: $filename (${size})${NC}"
+ # Use the standalone generator
+ echo " Command: go run generate_profile_data.go -size \"${size}\" -output \"$filename\" -format log"
+ go run generate_profile_data.go -size "${size}" -output "$filename" -format log
+ fi
+}
+
+# Function to run profiling
+run_profile() {
+ local cmd=$1
+ local name=$2
+ local args=$3
+
+ echo -e "${GREEN}Profiling $cmd - $name${NC}"
+
+ for i in $(seq 1 $PROFILE_RUNS); do
+ echo " Run $i/$PROFILE_RUNS..."
+ echo " Command: timeout 30s $cmd -profile -profiledir $PROFILE_DIR $args"
+
+ # Run with CPU and memory profiling with timeout
+ timeout 30s $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1
+ local exit_code=$?
+
+ if [ $exit_code -eq 124 ]; then
+ echo -e " ${YELLOW}Warning: Run $i timed out after 30s${NC}"
+ elif [ $exit_code -ne 0 ]; then
+ echo -e " ${RED}Error: Run $i failed with exit code $exit_code${NC}"
+ fi
+
+ # Small delay between runs
+ sleep 1
+ done
+
+ echo
+}
+
+# Generate test data
+echo -e "${GREEN}Preparing test data...${NC}"
+generate_test_data "10MB" "$TEST_DATA_DIR/small.log"
+generate_test_data "100MB" "$TEST_DATA_DIR/medium.log"
+generate_test_data "1GB" "$TEST_DATA_DIR/large.log"
+
+# Generate CSV data for dmap (smaller size for faster processing)
+if [ ! -f "$TEST_DATA_DIR/test.csv" ]; then
+ echo -e "${YELLOW}Generating CSV test data${NC}"
+ echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/test.csv\" -format csv"
+ go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/test.csv" -format csv
+fi
+
+echo
+
+# Build commands
+echo -e "${GREEN}Building commands...${NC}"
+echo " Command: cd .. && make dcat dgrep dmap"
+cd ..
+make dcat dgrep dmap
+cd "$SCRIPT_DIR"
+
+echo
+
+# Profile dcat
+echo -e "${GREEN}=== Profiling dcat ===${NC}"
+run_profile "../dcat" "small_file" "-plain -cfg none $TEST_DATA_DIR/small.log"
+run_profile "../dcat" "medium_file" "-plain -cfg none $TEST_DATA_DIR/medium.log"
+# Skip large file for faster profiling - uncomment if needed
+# run_profile "../dcat" "large_file" "-plain -cfg none $TEST_DATA_DIR/large.log"
+
+# Profile dgrep
+echo -e "${GREEN}=== Profiling dgrep ===${NC}"
+run_profile "../dgrep" "simple_regex" "-plain -cfg none -regex 'user[0-9]+' $TEST_DATA_DIR/medium.log"
+run_profile "../dgrep" "complex_regex" "-plain -cfg none -regex '\\d{4}-\\d{2}-\\d{2}.*login.*\\d{3}' $TEST_DATA_DIR/medium.log"
+run_profile "../dgrep" "with_context" "-plain -cfg none -regex 'login' -before 2 -after 2 $TEST_DATA_DIR/medium.log"
+
+# Profile dmap
+echo -e "${GREEN}=== Profiling dmap ===${NC}"
+# Note: dmap uses a special query format for MapReduce operations
+# For CSV files, we need to specify the format and fields correctly
+echo -e "${YELLOW}Note: Skipping dmap profiling - requires specific log format${NC}"
+echo -e "${YELLOW}To profile dmap, use files in MapReduce format with queries like:${NC}"
+echo -e "${YELLOW} from STATS select count(\$line) group by \$hostname${NC}"
+
+echo
+echo -e "${GREEN}Profiling complete!${NC}"
+echo
+
+# Analyze profiles
+echo -e "${GREEN}=== Profile Analysis ===${NC}"
+echo "Profile files generated in: $PROFILE_DIR"
+echo
+
+# List recent profiles
+echo "Recent CPU profiles:"
+ls -lt "$PROFILE_DIR"/*_cpu_*.prof 2>/dev/null | head -5 || echo " No CPU profiles found"
+
+echo
+echo "Recent memory profiles:"
+ls -lt "$PROFILE_DIR"/*_mem_*.prof 2>/dev/null | head -5 || echo " No memory profiles found"
+
+echo
+echo "Recent allocation profiles:"
+ls -lt "$PROFILE_DIR"/*_alloc_*.prof 2>/dev/null | head -5 || echo " No allocation profiles found"
+
+echo
+echo -e "${GREEN}To analyze a profile, use:${NC}"
+echo " go tool pprof <profile_file>"
+echo " ../profiling/profile.sh <profile_file>"
+echo
+echo -e "${GREEN}Examples:${NC}"
+echo " # Interactive analysis"
+echo " go tool pprof $PROFILE_DIR/dcat_cpu_*.prof"
+echo
+echo " # Generate flame graph"
+echo " go tool pprof -http=:8080 $PROFILE_DIR/dcat_cpu_*.prof"
+echo
+echo " # Quick summary with dprofile"
+echo " ../profiling/profile.sh $PROFILE_DIR/dcat_cpu_*.prof"
+echo \ No newline at end of file
diff --git a/benchmarks/profile_dmap.sh b/benchmarks/profile_dmap.sh
new file mode 100755
index 0000000..89d148a
--- /dev/null
+++ b/benchmarks/profile_dmap.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# Profile script specifically for dmap with MapReduce format data
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Colors for output
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+# Default values
+PROFILE_DIR="${PROFILE_DIR:-profiles}"
+TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}"
+
+# Create directories
+mkdir -p "$PROFILE_DIR"
+mkdir -p "$TEST_DATA_DIR"
+
+echo -e "${GREEN}DTail dmap Profiling${NC}"
+echo "===================="
+echo
+
+# Function to generate MapReduce format test data
+generate_mapreduce_data() {
+ local filename=$1
+ local lines=$2
+
+ if [ ! -f "$filename" ]; then
+ echo -e "${YELLOW}Generating MapReduce format test data: $filename${NC}"
+ echo " Command: Creating $filename with $lines lines"
+
+ cat > "$filename" << EOF
+STATS|earth|2024-01-01T10:00:00.000Z|goroutines:50;openFiles:120;connections:15;currentConnections:5;lifetimeConnections:1500
+STATS|mars|2024-01-01T10:00:01.000Z|goroutines:45;openFiles:110;connections:12;currentConnections:4;lifetimeConnections:1200
+STATS|venus|2024-01-01T10:00:02.000Z|goroutines:60;openFiles:130;connections:20;currentConnections:8;lifetimeConnections:2000
+EOF
+
+ # Repeat the pattern to create larger file
+ for i in $(seq 1 $lines); do
+ hostname="host$((i % 10))"
+ # Simple timestamp generation without date command
+ hour=$((10 + (i / 3600) % 24))
+ min=$(((i / 60) % 60))
+ sec=$((i % 60))
+ timestamp=$(printf "2024-01-01T%02d:%02d:%02d.000Z" $hour $min $sec)
+ goroutines=$((40 + i % 40))
+ openFiles=$((100 + i % 50))
+ connections=$((10 + i % 20))
+ currentConnections=$((i % 10))
+ lifetimeConnections=$((1000 + i))
+
+ echo "STATS|$hostname|$timestamp|goroutines:$goroutines;openFiles:$openFiles;connections:$connections;currentConnections:$currentConnections;lifetimeConnections:$lifetimeConnections" >> "$filename"
+ done
+ fi
+}
+
+# Generate test data
+echo -e "${GREEN}Preparing MapReduce test data...${NC}"
+generate_mapreduce_data "$TEST_DATA_DIR/stats_small.log" 1000
+generate_mapreduce_data "$TEST_DATA_DIR/stats_medium.log" 10000
+
+# Build dmap
+echo -e "${GREEN}Building commands...${NC}"
+echo " Command: cd .. && make dmap"
+cd ..
+make dmap 2>/dev/null || true
+cd "$SCRIPT_DIR"
+
+echo
+
+# Profile different dmap queries
+echo -e "${GREEN}Profiling dmap queries...${NC}"
+
+# Query 1: Simple count
+echo -e "\n${YELLOW}Query: Count by hostname${NC}"
+QUERY="from STATS select count(\$line) group by \$hostname outfile $TEST_DATA_DIR/count_output.csv"
+echo "Command: timeout 30s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
+timeout 30s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -5
+
+# Query 2: Aggregations
+echo -e "\n${YELLOW}Query: Sum and average${NC}"
+QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by \$hostname outfile $TEST_DATA_DIR/sum_avg_output.csv"
+echo "Command: timeout 30s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
+timeout 30s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -5
+
+# Query 3: Min/Max
+echo -e "\n${YELLOW}Query: Min and max${NC}"
+QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by \$hostname outfile $TEST_DATA_DIR/min_max_output.csv"
+echo "Command: timeout 30s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
+timeout 30s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -5
+
+echo
+echo -e "${GREEN}Analyzing dmap profiles...${NC}"
+
+# Find and analyze latest dmap profiles
+DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1)
+if [ -n "$DMAP_CPU" ]; then
+ echo -e "\nCPU Profile: $(basename "$DMAP_CPU")"
+ ../profiling/profile.sh -top 5 "$DMAP_CPU" 2>/dev/null || echo " Analysis failed"
+fi
+
+DMAP_MEM=$(ls -t "$PROFILE_DIR"/dmap_mem_*.prof 2>/dev/null | head -1)
+if [ -n "$DMAP_MEM" ]; then
+ echo -e "\nMemory Profile: $(basename "$DMAP_MEM")"
+ ../profiling/profile.sh -top 5 "$DMAP_MEM" 2>/dev/null || echo " Analysis failed"
+fi
+
+echo
+echo -e "${GREEN}dmap profiling complete!${NC}"
+echo
+echo "To analyze profiles in detail:"
+echo " go tool pprof $PROFILE_DIR/dmap_cpu_*.prof"
+echo " go tool pprof -alloc_space $PROFILE_DIR/dmap_mem_*.prof"
+
+# Cleanup temporary output files
+rm -f "$TEST_DATA_DIR"/*_output.csv \ No newline at end of file
diff --git a/benchmarks/profile_example.go b/benchmarks/profile_example.go
new file mode 100644
index 0000000..d187a5a
--- /dev/null
+++ b/benchmarks/profile_example.go
@@ -0,0 +1,307 @@
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+)
+
+// Example of using the profiling framework to find performance bottlenecks
+func main() {
+ fmt.Println("DTail Profiling Example")
+ fmt.Println("======================")
+ fmt.Println()
+
+ // Create test data
+ testFile := createTestData()
+ defer os.Remove(testFile)
+
+ // Profile dcat
+ fmt.Println("1. Profiling dcat...")
+ profileDCat(testFile)
+
+ // Profile dgrep
+ fmt.Println("\n2. Profiling dgrep...")
+ profileDGrep(testFile)
+
+ // Profile dmap
+ csvFile := createCSVData()
+ defer os.Remove(csvFile)
+ fmt.Println("\n3. Profiling dmap...")
+ profileDMap(csvFile)
+
+ // Analyze results
+ fmt.Println("\n4. Analyzing profiles...")
+ analyzeProfiles()
+}
+
+func createTestData() string {
+ filename := "test_data.log"
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Generate 100MB of log data
+ for i := 0; i < 1000000; i++ {
+ timestamp := time.Now().Format("2006-01-02 15:04:05.000")
+ level := []string{"INFO", "WARN", "ERROR", "DEBUG"}[i%4]
+ fmt.Fprintf(f, "[%s] %s - Processing request %d from user%d\n",
+ timestamp, level, i, i%1000)
+ }
+
+ return filename
+}
+
+func createCSVData() string {
+ filename := "test_data.csv"
+ f, err := os.Create(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+
+ // Header
+ fmt.Fprintln(f, "timestamp,user,action,duration,status")
+
+ // Generate data
+ for i := 0; i < 100000; i++ {
+ timestamp := time.Now().Add(time.Duration(i) * time.Second).Format("2006-01-02 15:04:05")
+ user := fmt.Sprintf("user%d", i%100)
+ action := []string{"login", "query", "update", "logout"}[i%4]
+ duration := 100 + i%900
+ status := []string{"success", "failure"}[i%2]
+
+ fmt.Fprintf(f, "%s,%s,%s,%d,%s\n", timestamp, user, action, duration, status)
+ }
+
+ return filename
+}
+
+func profileDCat(testFile string) {
+ // Run dcat with profiling
+ cmd := exec.Command("../dcat",
+ "-profile",
+ "-profiledir", "profiles",
+ "-plain",
+ "-cfg", "none",
+ testFile)
+
+ start := time.Now()
+ output, err := cmd.CombinedOutput()
+ duration := time.Since(start)
+
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ fmt.Printf("Output: %s\n", output)
+ return
+ }
+
+ fmt.Printf(" Completed in %v\n", duration)
+
+ // Find generated profiles
+ profiles, _ := filepath.Glob("profiles/dcat_*.prof")
+ for _, p := range profiles {
+ info, _ := os.Stat(p)
+ fmt.Printf(" Generated: %s (%d KB)\n", filepath.Base(p), info.Size()/1024)
+ }
+}
+
+func profileDGrep(testFile string) {
+ // Run dgrep with profiling
+ cmd := exec.Command("../dgrep",
+ "-profile",
+ "-profiledir", "profiles",
+ "-plain",
+ "-cfg", "none",
+ "-regex", "ERROR|WARN",
+ "-before", "2",
+ "-after", "2",
+ testFile)
+
+ start := time.Now()
+ output, err := cmd.CombinedOutput()
+ duration := time.Since(start)
+
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ fmt.Printf("Output: %s\n", output)
+ return
+ }
+
+ fmt.Printf(" Completed in %v\n", duration)
+
+ // Count matches
+ matches := strings.Count(string(output), "ERROR") + strings.Count(string(output), "WARN")
+ fmt.Printf(" Found %d matches\n", matches)
+}
+
+func profileDMap(csvFile string) {
+ // Run dmap with profiling
+ queries := []string{
+ fmt.Sprintf("select count(*) from %s", csvFile),
+ fmt.Sprintf("select user, count(*) from %s group by user", csvFile),
+ fmt.Sprintf("select action, avg(duration), max(duration) from %s group by action", csvFile),
+ }
+
+ for i, query := range queries {
+ fmt.Printf(" Query %d: %s\n", i+1, truncateQuery(query))
+
+ cmd := exec.Command("../dmap",
+ "-profile",
+ "-profiledir", "profiles",
+ "-plain",
+ "-cfg", "none",
+ "-query", query)
+
+ start := time.Now()
+ _, err := cmd.CombinedOutput()
+ duration := time.Since(start)
+
+ if err != nil {
+ fmt.Printf(" Error: %v\n", err)
+ continue
+ }
+
+ fmt.Printf(" Completed in %v\n", duration)
+ }
+}
+
+func truncateQuery(query string) string {
+ if len(query) > 50 {
+ return query[:47] + "..."
+ }
+ return query
+}
+
+func analyzeProfiles() {
+ // Find latest CPU profiles
+ cpuProfiles, _ := filepath.Glob("profiles/*_cpu_*.prof")
+ if len(cpuProfiles) == 0 {
+ fmt.Println("No CPU profiles found")
+ return
+ }
+
+ // Analyze each tool's CPU profile
+ tools := []string{"dcat", "dgrep", "dmap"}
+ for _, tool := range tools {
+ var latestProfile string
+ var latestTime time.Time
+
+ // Find latest profile for this tool
+ for _, profile := range cpuProfiles {
+ if strings.Contains(profile, tool+"_cpu_") {
+ info, err := os.Stat(profile)
+ if err == nil && info.ModTime().After(latestTime) {
+ latestProfile = profile
+ latestTime = info.ModTime()
+ }
+ }
+ }
+
+ if latestProfile == "" {
+ continue
+ }
+
+ fmt.Printf("\nAnalyzing %s CPU profile:\n", tool)
+
+ // Run profile.sh
+ cmd := exec.Command("../profiling/profile.sh",
+ "-top", "5",
+ latestProfile)
+
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ fmt.Printf(" Error analyzing: %v\n", err)
+ continue
+ }
+
+ // Extract and display key information
+ lines := strings.Split(string(output), "\n")
+ inTable := false
+ for _, line := range lines {
+ if strings.Contains(line, "Function") && strings.Contains(line, "Flat") {
+ inTable = true
+ }
+ if inTable && (strings.Contains(line, "%") || strings.Contains(line, "---")) {
+ fmt.Printf(" %s\n", line)
+ }
+ if inTable && line == "" {
+ break
+ }
+ }
+
+ // Suggest optimizations based on findings
+ suggestOptimizations(tool, string(output))
+ }
+}
+
+func suggestOptimizations(tool string, analysis string) {
+ fmt.Printf("\n Optimization suggestions for %s:\n", tool)
+
+ // Common patterns to look for
+ suggestions := []struct {
+ pattern string
+ suggestion string
+ }{
+ {"regexp.Compile", " - Pre-compile regex patterns instead of compiling in loops"},
+ {"strings.Join", " - Use strings.Builder for string concatenation"},
+ {"runtime.mallocgc", " - High allocation rate; consider object pooling"},
+ {"syscall", " - I/O bottleneck; consider buffering or async I/O"},
+ {"runtime.gcBgMarkWorker", " - High GC pressure; reduce allocations"},
+ }
+
+ foundAny := false
+ for _, s := range suggestions {
+ if strings.Contains(analysis, s.pattern) {
+ fmt.Println(s.suggestion)
+ foundAny = true
+ }
+ }
+
+ if !foundAny {
+ fmt.Println(" - Profile looks good; no obvious bottlenecks found")
+ }
+}
+
+// Helper function to demonstrate how to use profiling in tests
+func ExampleBenchmarkWithProfiling() {
+ // This would typically be in a _test.go file
+ fmt.Println(`
+Example benchmark with profiling:
+
+func BenchmarkDCatLargeFile(b *testing.B) {
+ // Enable profiling for this specific benchmark
+ if *cpuprofile != "" {
+ f, _ := os.Create(*cpuprofile)
+ pprof.StartCPUProfile(f)
+ defer pprof.StopCPUProfile()
+ }
+
+ // Generate test file
+ testFile := generateLargeFile(b)
+ defer os.Remove(testFile)
+
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ cmd := exec.Command("./dcat", "-plain", testFile)
+ cmd.Run()
+ }
+
+ if *memprofile != "" {
+ f, _ := os.Create(*memprofile)
+ runtime.GC()
+ pprof.WriteHeapProfile(f)
+ f.Close()
+ }
+}
+
+Run with: go test -bench=BenchmarkDCatLargeFile -cpuprofile=cpu.prof -memprofile=mem.prof
+`)
+} \ No newline at end of file
diff --git a/benchmarks/profile_quick.sh b/benchmarks/profile_quick.sh
new file mode 100755
index 0000000..1aa9425
--- /dev/null
+++ b/benchmarks/profile_quick.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Quick profile script for dtail commands
+# This runs profiling with smaller datasets for faster results
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Colors for output
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Default values
+PROFILE_DIR="${PROFILE_DIR:-profiles}"
+TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}"
+
+# Create directories
+mkdir -p "$PROFILE_DIR"
+mkdir -p "$TEST_DATA_DIR"
+
+echo -e "${GREEN}DTail Quick Profiling${NC}"
+echo "====================="
+echo
+
+# Generate test data if needed
+if [ ! -f "$TEST_DATA_DIR/quick_test.log" ]; then
+ echo -e "${YELLOW}Generating test data...${NC}"
+ echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.log\" -format log"
+ go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.log" -format log
+ echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.csv\" -format csv"
+ go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.csv" -format csv
+fi
+
+# Build commands
+echo -e "${GREEN}Building commands...${NC}"
+echo " Command: cd .. && make dcat dgrep dmap"
+cd ..
+make dcat dgrep dmap 2>/dev/null || true
+cd "$SCRIPT_DIR"
+
+echo
+echo -e "${GREEN}Running quick profiles...${NC}"
+
+# Profile dcat
+echo -e "\n${YELLOW}Profiling dcat...${NC}"
+echo "Command: ../dcat -profile -profiledir $PROFILE_DIR -plain -cfg none $TEST_DATA_DIR/quick_test.log"
+../dcat -profile -profiledir "$PROFILE_DIR" -plain -cfg none "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1
+DCAT_CPU=$(ls -t "$PROFILE_DIR"/dcat_cpu_*.prof 2>/dev/null | head -1)
+if [ -n "$DCAT_CPU" ]; then
+ echo " Generated: $(basename "$DCAT_CPU")"
+ echo " Analysis: ../profiling/profile.sh -top 3 $DCAT_CPU"
+ ../profiling/profile.sh -top 3 "$DCAT_CPU" | grep -A 5 "Top 3 functions"
+fi
+
+# Profile dgrep
+echo -e "\n${YELLOW}Profiling dgrep...${NC}"
+echo "Command: ../dgrep -profile -profiledir $PROFILE_DIR -plain -cfg none -regex \"user[0-9]+\" $TEST_DATA_DIR/quick_test.log"
+../dgrep -profile -profiledir "$PROFILE_DIR" -plain -cfg none -regex "user[0-9]+" "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1
+DGREP_CPU=$(ls -t "$PROFILE_DIR"/dgrep_cpu_*.prof 2>/dev/null | head -1)
+if [ -n "$DGREP_CPU" ]; then
+ echo " Generated: $(basename "$DGREP_CPU")"
+ echo " Analysis: ../profiling/profile.sh -top 3 $DGREP_CPU"
+ ../profiling/profile.sh -top 3 "$DGREP_CPU" | grep -A 5 "Top 3 functions"
+fi
+
+# Profile dmap
+echo -e "\n${YELLOW}Profiling dmap...${NC}"
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"select count(*) from $TEST_DATA_DIR/quick_test.csv\""
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "select count(*) from $TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1
+DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1)
+if [ -n "$DMAP_CPU" ]; then
+ echo " Generated: $(basename "$DMAP_CPU")"
+ echo " Analysis: ../profiling/profile.sh -top 3 $DMAP_CPU"
+ ../profiling/profile.sh -top 3 "$DMAP_CPU" | grep -A 5 "Top 3 functions"
+fi
+
+echo
+echo -e "${GREEN}Quick profiling complete!${NC}"
+echo
+echo "To analyze in detail:"
+echo " go tool pprof $PROFILE_DIR/<profile_file>"
+echo " make profile-flamegraph PROFILE=$PROFILE_DIR/<profile_file>"
+echo \ No newline at end of file
diff --git a/benchmarks/profile_runner.go b/benchmarks/profile_runner.go
new file mode 100644
index 0000000..2da122b
--- /dev/null
+++ b/benchmarks/profile_runner.go
@@ -0,0 +1,233 @@
+package benchmarks
+
+import (
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+)
+
+// ProfileConfig represents profiling configuration
+type ProfileConfig struct {
+ // Enable CPU profiling
+ EnableCPU bool
+ // Enable memory profiling
+ EnableMem bool
+ // Profile directory
+ ProfileDir string
+ // Number of iterations
+ Iterations int
+}
+
+// ProfileResult represents the result of a profiling run
+type ProfileResult struct {
+ Tool string
+ Operation string
+ Duration time.Duration
+ CPUProfile string
+ MemProfile string
+ AllocProfile string
+ ExitCode int
+ Error error
+}
+
+// DefaultProfileConfig returns default profiling configuration
+func DefaultProfileConfig() ProfileConfig {
+ return ProfileConfig{
+ EnableCPU: true,
+ EnableMem: true,
+ ProfileDir: "profiles",
+ Iterations: 1,
+ }
+}
+
+// RunProfiledCommand runs a command with profiling enabled
+func RunProfiledCommand(b *testing.B, config ProfileConfig, tool string, args ...string) (*ProfileResult, error) {
+ // Ensure profile directory exists
+ if err := os.MkdirAll(config.ProfileDir, 0755); err != nil {
+ return nil, fmt.Errorf("creating profile dir: %w", err)
+ }
+
+ // Build command path
+ cmdPath := filepath.Join("..", tool)
+
+ // Add profiling flags
+ profileArgs := []string{}
+ if config.EnableCPU || config.EnableMem {
+ profileArgs = append(profileArgs, "-profile")
+ profileArgs = append(profileArgs, "-profiledir", config.ProfileDir)
+ }
+
+ // Combine all arguments
+ allArgs := append(profileArgs, args...)
+
+ // Create command
+ cmd := exec.Command(cmdPath, allArgs...)
+
+ // Set up output capture
+ outputFile := filepath.Join(config.ProfileDir, fmt.Sprintf("%s_output_%s.log",
+ tool, time.Now().Format("20060102_150405")))
+ output, err := os.Create(outputFile)
+ if err != nil {
+ return nil, fmt.Errorf("creating output file: %w", err)
+ }
+ defer output.Close()
+
+ cmd.Stdout = output
+ cmd.Stderr = output
+
+ // Record start time
+ start := time.Now()
+
+ // Run command
+ err = cmd.Run()
+
+ // Record duration
+ duration := time.Since(start)
+
+ result := &ProfileResult{
+ Tool: tool,
+ Operation: strings.Join(args, "_"),
+ Duration: duration,
+ ExitCode: cmd.ProcessState.ExitCode(),
+ Error: err,
+ }
+
+ // Find generated profile files
+ timestamp := time.Now().Format("20060102_1504")
+ profiles, _ := filepath.Glob(filepath.Join(config.ProfileDir,
+ fmt.Sprintf("%s_*_%s*.prof", tool, timestamp)))
+
+ for _, profile := range profiles {
+ if strings.Contains(profile, "_cpu_") {
+ result.CPUProfile = profile
+ } else if strings.Contains(profile, "_mem_") {
+ result.MemProfile = profile
+ } else if strings.Contains(profile, "_alloc_") {
+ result.AllocProfile = profile
+ }
+ }
+
+ return result, nil
+}
+
+// ProfileBenchmark runs a benchmark with profiling enabled
+func ProfileBenchmark(b *testing.B, name string, tool string, args ...string) {
+ config := DefaultProfileConfig()
+
+ b.Run(name+"_profiled", func(b *testing.B) {
+ // Generate test data if needed
+ testFile := ""
+ if tool == "dcat" || tool == "dgrep" {
+ testConfig := TestDataConfig{
+ Size: Medium,
+ Format: SimpleLogFormat,
+ Compression: NoCompression,
+ LineVariation: 50,
+ }
+ testFile = GenerateTestFile(b, testConfig)
+ defer os.Remove(testFile)
+
+ // Replace placeholder in args
+ for i, arg := range args {
+ if arg == "__TESTFILE__" {
+ args[i] = testFile
+ }
+ }
+ }
+
+ // Run profiled command
+ result, err := RunProfiledCommand(b, config, tool, args...)
+ if err != nil && result.ExitCode != 0 {
+ b.Fatalf("Command failed: %v", err)
+ }
+
+ // Report results
+ b.Logf("Profile run completed in %v", result.Duration)
+ if result.CPUProfile != "" {
+ b.Logf("CPU profile: %s", result.CPUProfile)
+ }
+ if result.MemProfile != "" {
+ b.Logf("Memory profile: %s", result.MemProfile)
+ }
+ if result.AllocProfile != "" {
+ b.Logf("Allocation profile: %s", result.AllocProfile)
+ }
+
+ // Analyze profiles if profile.sh is available
+ dprofilePath := filepath.Join("..", "profiling", "profile.sh")
+ if _, err := os.Stat(dprofilePath); err == nil {
+ if result.CPUProfile != "" {
+ analyzeProfile(b, dprofilePath, result.CPUProfile, "CPU")
+ }
+ if result.MemProfile != "" {
+ analyzeProfile(b, dprofilePath, result.MemProfile, "Memory")
+ }
+ }
+ })
+}
+
+// analyzeProfile runs profile.sh on a profile file
+func analyzeProfile(b *testing.B, dprofilePath, profilePath, profileType string) {
+ b.Logf("\n%s Profile Analysis:", profileType)
+
+ cmd := exec.Command(dprofilePath, "-top", "5", profilePath)
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ b.Logf("Failed to analyze profile: %v", err)
+ return
+ }
+
+ // Print top functions
+ lines := strings.Split(string(output), "\n")
+ for _, line := range lines {
+ if strings.Contains(line, "%") || strings.Contains(line, "Top") {
+ b.Log(line)
+ }
+ }
+}
+
+// Profiling benchmarks for each tool
+func BenchmarkDCatWithProfiling(b *testing.B) {
+ ProfileBenchmark(b, "Simple", "dcat", "--plain", "--cfg", "none", "__TESTFILE__")
+}
+
+func BenchmarkDGrepWithProfiling(b *testing.B) {
+ ProfileBenchmark(b, "Regex", "dgrep", "--plain", "--cfg", "none",
+ "-regex", "error|warning", "__TESTFILE__")
+}
+
+func BenchmarkDMapWithProfiling(b *testing.B) {
+ // First generate a CSV file for dmap
+ csvFile := filepath.Join(os.TempDir(), "dmap_test.csv")
+ generateCSVTestData(b, csvFile, 10000)
+ defer os.Remove(csvFile)
+
+ ProfileBenchmark(b, "Count", "dmap", "--plain", "--cfg", "none",
+ "-query", fmt.Sprintf("select count(*) from %s", csvFile))
+}
+
+// generateCSVTestData generates CSV test data for dmap
+func generateCSVTestData(b *testing.B, filename string, rows int) {
+ f, err := os.Create(filename)
+ if err != nil {
+ b.Fatalf("Failed to create CSV file: %v", err)
+ }
+ defer f.Close()
+
+ // Write header
+ fmt.Fprintln(f, "timestamp,user,action,duration")
+
+ // Write data
+ for i := 0; i < rows; i++ {
+ timestamp := time.Now().Add(time.Duration(i) * time.Second).Format("2006-01-02 15:04:05")
+ user := fmt.Sprintf("user%d", i%100)
+ action := []string{"login", "query", "logout"}[i%3]
+ duration := 100 + i%500
+
+ fmt.Fprintf(f, "%s,%s,%s,%d\n", timestamp, user, action, duration)
+ }
+} \ No newline at end of file