summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-18 09:10:52 +0300
committerPaul Buetow <paul@buetow.org>2025-06-18 09:10:52 +0300
commit67a6b9d8e8e8dc83d5ea3e5859e631a0dfa9dabe (patch)
treee93c3e80959e78655f8ae18a97c9fa0d081b81fb /scripts
parent29a5d827019d839344f5a2c85358b9f00abb27ca (diff)
Complete channelless migration for DTail operations
- Implement channelless MapReduce with streaming aggregation - Add channelless tail with proper file following capability - Fix TestDTailWithServer by implementing ServerHandlerWriter for client-server mode - Add proper serverless mode detection for standalone operations - Remove temporary benchmark scripts - All integration tests now pass 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/benchmark_channelless.sh215
-rwxr-xr-xscripts/corrected_benchmark.sh89
-rwxr-xr-xscripts/profile_channelless.sh50
3 files changed, 0 insertions, 354 deletions
diff --git a/scripts/benchmark_channelless.sh b/scripts/benchmark_channelless.sh
deleted file mode 100755
index 4ac532f..0000000
--- a/scripts/benchmark_channelless.sh
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/bin/bash
-
-# Comprehensive benchmark: Channel-based vs Channelless Cat Implementation
-# Tests performance improvements achieved by eliminating channel overhead
-
-set -e
-
-echo "=== DTail Channelless Performance Benchmark ==="
-echo "Comparing channel-based vs channelless cat implementation"
-echo "Date: $(date)"
-echo
-
-# Test configuration
-TEST_FILES=("test_100mb.txt" "test_200mb.txt")
-ITERATIONS=5
-WARMUP_RUNS=2
-
-# Results storage
-RESULTS_DIR="benchmark_results_$(date +%Y%m%d_%H%M%S)"
-mkdir -p "$RESULTS_DIR"
-
-# Ensure we're in the correct directory
-cd "$(dirname "$0")/.."
-
-# Build both implementations
-echo "Building DTail binaries..."
-make clean > /dev/null 2>&1
-make build > /dev/null 2>&1
-echo "✓ Build complete"
-echo
-
-# Function to run benchmark for a specific configuration
-run_benchmark() {
- local use_channelless=$1
- local test_file=$2
- local impl_name=$3
- local results_file="$RESULTS_DIR/${impl_name}_$(basename $test_file .txt).results"
-
- echo "Testing $impl_name with $test_file..."
-
- # Warmup runs
- for ((i=1; i<=WARMUP_RUNS; i++)); do
- echo -n " Warmup $i/$WARMUP_RUNS... "
- DTAIL_USE_CHANNELLESS=$use_channelless DTAIL_INTEGRATION_TEST_RUN_MODE=yes \
- timeout 30s ./dcat --logLevel error --cfg none "scripts/$test_file" > /dev/null 2>&1
- echo "done"
- done
-
- # Actual benchmark runs
- echo " Running $ITERATIONS benchmark iterations:"
- for ((i=1; i<=ITERATIONS; i++)); do
- echo -n " Run $i/$ITERATIONS... "
-
- # Clear caches
- sync
- echo 3 > /proc/sys/vm/drop_caches 2>/dev/null || true
-
- # Run benchmark with time measurement
- start_time=$(date +%s.%N)
- DTAIL_USE_CHANNELLESS=$use_channelless DTAIL_INTEGRATION_TEST_RUN_MODE=yes \
- timeout 30s ./dcat --logLevel error --cfg none "scripts/$test_file" > /dev/null 2>&1
- end_time=$(date +%s.%N)
-
- # Calculate duration
- duration=$(echo "$end_time - $start_time" | bc -l)
- echo "$duration" >> "$results_file"
-
- printf "%.3fs\n" "$duration"
- done
- echo
-}
-
-# Function to calculate statistics
-calculate_stats() {
- local file=$1
- local values=($(cat "$file"))
- local sum=0
- local count=${#values[@]}
-
- # Calculate mean
- for val in "${values[@]}"; do
- sum=$(echo "$sum + $val" | bc -l)
- done
- local mean=$(echo "scale=6; $sum / $count" | bc -l)
-
- # Calculate standard deviation
- local variance_sum=0
- for val in "${values[@]}"; do
- local diff=$(echo "$val - $mean" | bc -l)
- local squared=$(echo "$diff * $diff" | bc -l)
- variance_sum=$(echo "$variance_sum + $squared" | bc -l)
- done
- local variance=$(echo "scale=6; $variance_sum / $count" | bc -l)
- local stddev=$(echo "scale=6; sqrt($variance)" | bc -l)
-
- # Find min and max
- local min=${values[0]}
- local max=${values[0]}
- for val in "${values[@]}"; do
- if (( $(echo "$val < $min" | bc -l) )); then
- min=$val
- fi
- if (( $(echo "$val > $max" | bc -l) )); then
- max=$val
- fi
- done
-
- echo "$mean $stddev $min $max"
-}
-
-# Function to calculate throughput
-calculate_throughput() {
- local file_size_mb=$1
- local time_seconds=$2
- echo "scale=2; $file_size_mb / $time_seconds" | bc -l
-}
-
-# Run benchmarks
-echo "Starting benchmarks..."
-echo
-
-for test_file in "${TEST_FILES[@]}"; do
- echo "=== Benchmarking with $test_file ==="
-
- # Get file size in MB
- file_size_bytes=$(stat -c%s "scripts/$test_file")
- file_size_mb=$(echo "scale=2; $file_size_bytes / 1024 / 1024" | bc -l)
- echo "File size: ${file_size_mb} MB"
- echo
-
- # Test channel-based implementation
- run_benchmark "false" "$test_file" "channel_based"
-
- # Test channelless implementation
- run_benchmark "true" "$test_file" "channelless"
-
- echo "--- Results for $test_file ---"
-
- # Calculate statistics for channel-based
- channel_stats=($(calculate_stats "$RESULTS_DIR/channel_based_$(basename $test_file .txt).results"))
- channel_mean=${channel_stats[0]}
- channel_stddev=${channel_stats[1]}
- channel_min=${channel_stats[2]}
- channel_max=${channel_stats[3]}
- channel_throughput=$(calculate_throughput "$file_size_mb" "$channel_mean")
-
- # Calculate statistics for channelless
- channelless_stats=($(calculate_stats "$RESULTS_DIR/channelless_$(basename $test_file .txt).results"))
- channelless_mean=${channelless_stats[0]}
- channelless_stddev=${channelless_stats[1]}
- channelless_min=${channelless_stats[2]}
- channelless_max=${channelless_stats[3]}
- channelless_throughput=$(calculate_throughput "$file_size_mb" "$channelless_mean")
-
- # Calculate improvement
- improvement=$(echo "scale=2; (($channel_mean - $channelless_mean) / $channel_mean) * 100" | bc -l)
- speedup=$(echo "scale=2; $channel_mean / $channelless_mean" | bc -l)
- throughput_improvement=$(echo "scale=2; (($channelless_throughput - $channel_throughput) / $channel_throughput) * 100" | bc -l)
-
- echo "Channel-based:"
- printf " Time: %.3f ± %.3f seconds (min: %.3f, max: %.3f)\n" "$channel_mean" "$channel_stddev" "$channel_min" "$channel_max"
- printf " Throughput: %.2f MB/s\n" "$channel_throughput"
- echo
- echo "Channelless:"
- printf " Time: %.3f ± %.3f seconds (min: %.3f, max: %.3f)\n" "$channelless_mean" "$channelless_stddev" "$channelless_min" "$channelless_max"
- printf " Throughput: %.2f MB/s\n" "$channelless_throughput"
- echo
- echo "Performance Improvement:"
- printf " Time reduction: %.2f%% (%.2fx speedup)\n" "$improvement" "$speedup"
- printf " Throughput increase: %.2f%%\n" "$throughput_improvement"
- echo
- echo "=========================================="
- echo
-done
-
-# Generate summary report
-echo "=== BENCHMARK SUMMARY ==="
-echo
-
-summary_file="$RESULTS_DIR/benchmark_summary.txt"
-{
- echo "DTail Channelless Performance Benchmark Summary"
- echo "Date: $(date)"
- echo "Iterations per test: $ITERATIONS"
- echo "Warmup runs: $WARMUP_RUNS"
- echo
-
- for test_file in "${TEST_FILES[@]}"; do
- file_size_bytes=$(stat -c%s "scripts/$test_file")
- file_size_mb=$(echo "scale=2; $file_size_bytes / 1024 / 1024" | bc -l)
-
- channel_stats=($(calculate_stats "$RESULTS_DIR/channel_based_$(basename $test_file .txt).results"))
- channelless_stats=($(calculate_stats "$RESULTS_DIR/channelless_$(basename $test_file .txt).results"))
-
- channel_mean=${channel_stats[0]}
- channelless_mean=${channelless_stats[0]}
-
- improvement=$(echo "scale=2; (($channel_mean - $channelless_mean) / $channel_mean) * 100" | bc -l)
- speedup=$(echo "scale=2; $channel_mean / $channelless_mean" | bc -l)
-
- channel_throughput=$(calculate_throughput "$file_size_mb" "$channel_mean")
- channelless_throughput=$(calculate_throughput "$file_size_mb" "$channelless_mean")
-
- echo "$test_file (${file_size_mb} MB):"
- printf " Channel-based: %.3f seconds (%.2f MB/s)\n" "$channel_mean" "$channel_throughput"
- printf " Channelless: %.3f seconds (%.2f MB/s)\n" "$channelless_mean" "$channelless_throughput"
- printf " Improvement: %.2f%% faster (%.2fx speedup)\n" "$improvement" "$speedup"
- echo
- done
-} | tee "$summary_file"
-
-echo "Detailed results saved in: $RESULTS_DIR/"
-echo "Summary report: $summary_file"
-echo
-echo "=== BENCHMARK COMPLETE ===" \ No newline at end of file
diff --git a/scripts/corrected_benchmark.sh b/scripts/corrected_benchmark.sh
deleted file mode 100755
index aa42aec..0000000
--- a/scripts/corrected_benchmark.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/bash
-
-# Corrected benchmark: Channel-based vs Channelless Cat Implementation
-# This accounts for the fact that channel-based doesn't process all data
-
-set -e
-
-echo "=== CORRECTED DTail Channelless Performance Benchmark ==="
-echo "Channel-based implementation appears to have a bug - it only processes ~67% of data"
-echo "Benchmarking actual throughput per line processed"
-echo
-
-# Test with 100MB file
-TEST_FILE="scripts/test_100mb.txt"
-TOTAL_LINES=$(wc -l < "$TEST_FILE")
-FILE_SIZE_MB=$(echo "scale=2; $(stat -c%s "$TEST_FILE") / 1024 / 1024" | bc -l)
-
-echo "Test file: $TEST_FILE"
-echo "Total lines in file: $TOTAL_LINES"
-echo "File size: ${FILE_SIZE_MB} MB"
-echo
-
-# Run both implementations and measure
-echo "Testing channel-based implementation..."
-start_time=$(date +%s.%N)
-CHANNEL_LINES=$(DTAIL_USE_CHANNELLESS=false DTAIL_INTEGRATION_TEST_RUN_MODE=yes ./dcat --logLevel error --cfg none "$TEST_FILE" | wc -l)
-end_time=$(date +%s.%N)
-channel_time=$(echo "$end_time - $start_time" | bc -l)
-
-echo "Testing channelless implementation..."
-start_time=$(date +%s.%N)
-CHANNELLESS_LINES=$(DTAIL_USE_CHANNELLESS=true DTAIL_INTEGRATION_TEST_RUN_MODE=yes ./dcat --logLevel error --cfg none "$TEST_FILE" | wc -l)
-end_time=$(date +%s.%N)
-channelless_time=$(echo "$end_time - $start_time" | bc -l)
-
-# Calculate metrics
-channel_throughput_lines=$(echo "scale=2; $CHANNEL_LINES / $channel_time" | bc -l)
-channelless_throughput_lines=$(echo "scale=2; $CHANNELLESS_LINES / $channelless_time" | bc -l)
-
-channel_coverage=$(echo "scale=2; ($CHANNEL_LINES * 100) / $TOTAL_LINES" | bc -l)
-channelless_coverage=$(echo "scale=2; ($CHANNELLESS_LINES * 100) / $TOTAL_LINES" | bc -l)
-
-# Effective data processed
-channel_data_mb=$(echo "scale=2; ($CHANNEL_LINES * $FILE_SIZE_MB) / $TOTAL_LINES" | bc -l)
-channelless_data_mb=$FILE_SIZE_MB
-
-channel_throughput_mb=$(echo "scale=2; $channel_data_mb / $channel_time" | bc -l)
-channelless_throughput_mb=$(echo "scale=2; $channelless_data_mb / $channelless_time" | bc -l)
-
-# Calculate relative performance for same amount of work
-extrapolated_channel_time=$(echo "scale=2; ($channel_time * $TOTAL_LINES) / $CHANNEL_LINES" | bc -l)
-performance_improvement=$(echo "scale=2; (($extrapolated_channel_time - $channelless_time) / $extrapolated_channel_time) * 100" | bc -l)
-speedup=$(echo "scale=2; $extrapolated_channel_time / $channelless_time" | bc -l)
-
-echo
-echo "=== RESULTS ==="
-echo
-echo "Channel-based implementation:"
-printf " Time: %.3f seconds\n" "$channel_time"
-printf " Lines processed: %d (%.1f%% of file)\n" "$CHANNEL_LINES" "$channel_coverage"
-printf " Data processed: %.2f MB\n" "$channel_data_mb"
-printf " Throughput: %.0f lines/sec, %.2f MB/s\n" "$channel_throughput_lines" "$channel_throughput_mb"
-printf " Extrapolated time for full file: %.3f seconds\n" "$extrapolated_channel_time"
-echo
-
-echo "Channelless implementation:"
-printf " Time: %.3f seconds\n" "$channelless_time"
-printf " Lines processed: %d (%.1f%% of file)\n" "$CHANNELLESS_LINES" "$channelless_coverage"
-printf " Data processed: %.2f MB\n" "$channelless_data_mb"
-printf " Throughput: %.0f lines/sec, %.2f MB/s\n" "$channelless_throughput_lines" "$channelless_throughput_mb"
-echo
-
-echo "Performance comparison (for processing complete file):"
-printf " Channelless improvement: %.2f%% faster\n" "$performance_improvement"
-printf " Speedup: %.2fx\n" "$speedup"
-echo
-
-if (( $(echo "$performance_improvement > 0" | bc -l) )); then
- echo "✅ Channelless implementation is FASTER and processes ALL data correctly"
-else
- echo "❌ Channelless implementation is slower"
-fi
-echo
-
-echo "=== CONCLUSION ==="
-echo "The channel-based implementation has a bug where it stops processing"
-echo "at approximately 67% of the input file. This makes direct time comparisons"
-echo "invalid. When extrapolated to process the same amount of data, the"
-echo "channelless implementation shows the expected performance improvement." \ No newline at end of file
diff --git a/scripts/profile_channelless.sh b/scripts/profile_channelless.sh
deleted file mode 100755
index fb6ec3d..0000000
--- a/scripts/profile_channelless.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-# Profile channelless vs channel-based implementations to understand performance difference
-
-set -e
-
-echo "=== Profiling Channelless vs Channel-based Cat Implementation ==="
-echo
-
-# Build with profiling enabled
-echo "Building DTail binaries..."
-make clean > /dev/null 2>&1
-make build > /dev/null 2>&1
-
-echo "Profiling channel-based implementation..."
-DTAIL_USE_CHANNELLESS=false DTAIL_INTEGRATION_TEST_RUN_MODE=yes \
- go tool pprof -cpuprofile=channel_based_cpu.prof \
- -o channel_based_cpu.prof \
- -- ./dcat --logLevel error --cfg none scripts/test_100mb.txt > /dev/null 2>&1 &
-CHANNEL_PID=$!
-
-# Profile with Go's built-in profiling
-DTAIL_USE_CHANNELLESS=false DTAIL_INTEGRATION_TEST_RUN_MODE=yes \
- timeout 10s go run -cpuprofile=channel_based_go.prof ./cmd/dcat/main.go --logLevel error --cfg none scripts/test_100mb.txt > /dev/null 2>&1 || true
-
-echo "Profiling channelless implementation..."
-DTAIL_USE_CHANNELLESS=true DTAIL_INTEGRATION_TEST_RUN_MODE=yes \
- timeout 10s go run -cpuprofile=channelless_go.prof ./cmd/dcat/main.go --logLevel error --cfg none scripts/test_100mb.txt > /dev/null 2>&1 || true
-
-echo "Analyzing profiles..."
-
-echo
-echo "=== Channel-based CPU Profile ==="
-if [ -f channel_based_go.prof ]; then
- go tool pprof -top -cum channel_based_go.prof | head -20
-else
- echo "Channel-based profile not found"
-fi
-
-echo
-echo "=== Channelless CPU Profile ==="
-if [ -f channelless_go.prof ]; then
- go tool pprof -top -cum channelless_go.prof | head -20
-else
- echo "Channelless profile not found"
-fi
-
-echo
-echo "Profile files generated:"
-ls -la *_go.prof 2>/dev/null || echo "No profile files found" \ No newline at end of file