diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-26 21:10:07 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-26 21:10:07 +0300 |
| commit | 513c70e297059822384140ee7e5939d20fd0bdc1 (patch) | |
| tree | d6619230b54c4956d138c17c43df0fc72bb6f71a /benchmarks | |
| parent | 4a657e44e7111d7d3b9a9ba5e453901e19af2ecb (diff) | |
refactor: move profiling scripts from benchmarks/ to profiling/
- Moved profile_benchmarks.sh, profile_dmap.sh, and profile_quick.sh
to the profiling/ directory where they belong
- Updated Makefile targets to reference new locations
- Fixed profile_dmap.sh to remove outfile clauses since they're not
needed for profiling and were preventing proper execution
- Updated .gitignore to exclude generated files in profiling/
This better separates benchmarking (performance comparison) from
profiling (performance analysis).
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'benchmarks')
| -rwxr-xr-x | benchmarks/profile_benchmarks.sh | 211 | ||||
| -rwxr-xr-x | benchmarks/profile_dmap.sh | 155 | ||||
| -rwxr-xr-x | benchmarks/profile_quick.sh | 86 |
3 files changed, 0 insertions, 452 deletions
diff --git a/benchmarks/profile_benchmarks.sh b/benchmarks/profile_benchmarks.sh deleted file mode 100755 index 6be86cd..0000000 --- a/benchmarks/profile_benchmarks.sh +++ /dev/null @@ -1,211 +0,0 @@ -#!/bin/bash - -# Profile benchmarks script for dtail commands -# This script runs profiling on dcat, dgrep, and dmap with various workloads - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR" - -# Colors for output -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -RED='\033[0;31m' -NC='\033[0m' # No Color - -# Default values -PROFILE_DIR="${PROFILE_DIR:-profiles}" -TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}" -PROFILE_RUNS=1 - -# Create directories -mkdir -p "$PROFILE_DIR" -mkdir -p "$TEST_DATA_DIR" - -echo -e "${GREEN}DTail Profiling Framework${NC}" -echo "==========================" -echo - -# Function to generate test data -generate_test_data() { - local size=$1 - local filename=$2 - - if [ ! -f "$filename" ]; then - echo -e "${YELLOW}Generating test data: $filename (${size})${NC}" - # Use the standalone generator - echo " Command: go run cmd/generate_profile_data.go -size \"${size}\" -output \"$filename\" -format log" - go run cmd/generate_profile_data.go -size "${size}" -output "$filename" -format log - fi -} - -# Function to run profiling -run_profile() { - local cmd=$1 - local name=$2 - local args=$3 - - echo -e "${GREEN}Profiling $cmd - $name${NC}" - - for i in $(seq 1 $PROFILE_RUNS); do - echo " Run $i/$PROFILE_RUNS..." - echo " Command: timeout 30s $cmd -profile -profiledir $PROFILE_DIR $args" - - # Run with CPU and memory profiling with timeout - timeout 30s $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 - local exit_code=$? - - if [ $exit_code -eq 124 ]; then - echo -e " ${YELLOW}Warning: Run $i timed out after 30s${NC}" - elif [ $exit_code -ne 0 ]; then - echo -e " ${RED}Error: Run $i failed with exit code $exit_code${NC}" - fi - - # Small delay between runs - sleep 1 - done - - echo -} - -# Special function for profiling dmap which runs continuously -run_profile_dmap() { - local cmd=$1 - local name=$2 - local args=$3 - - echo -e "${GREEN}Profiling $cmd - $name${NC}" - - for i in $(seq 1 $PROFILE_RUNS); do - echo " Run $i/$PROFILE_RUNS..." - echo " Command: $cmd -profile -profiledir $PROFILE_DIR $args (will interrupt after 3s)" - - # Run dmap in background, wait a bit for it to process, then interrupt it - $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 & - local pid=$! - - # Wait for dmap to process the file and generate initial results - sleep 3 - - # Send interrupt signal to make it exit cleanly - # We expect this to return non-zero, so we ignore the exit code - kill -INT $pid 2>/dev/null || true - wait $pid 2>/dev/null || true - - echo " Completed" - - # Small delay between runs - sleep 1 - done - - echo -} - -# Generate test data -echo -e "${GREEN}Preparing test data...${NC}" -generate_test_data "1MB" "$TEST_DATA_DIR/small.log" -generate_test_data "10MB" "$TEST_DATA_DIR/medium.log" -# Skip large file for faster testing -# generate_test_data "1GB" "$TEST_DATA_DIR/large.log" - -# Generate CSV data for dmap (smaller size for faster processing) -if [ ! -f "$TEST_DATA_DIR/test.csv" ]; then - echo -e "${YELLOW}Generating CSV test data${NC}" - echo " Command: go run cmd/generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/test.csv\" -format csv" - go run cmd/generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/test.csv" -format csv -fi - -echo - -# Build commands -echo -e "${GREEN}Building commands...${NC}" -echo " Command: cd .. && make dcat dgrep dmap" -cd .. -make dcat dgrep dmap -cd "$SCRIPT_DIR" - -echo - -# Profile dcat -echo -e "${GREEN}=== Profiling dcat ===${NC}" -run_profile "../dcat" "small_file" "-plain -cfg none $TEST_DATA_DIR/small.log" -# Skip medium file for faster profiling -# run_profile "../dcat" "medium_file" "-plain -cfg none $TEST_DATA_DIR/medium.log" -# Skip large file for faster profiling - uncomment if needed -# run_profile "../dcat" "large_file" "-plain -cfg none $TEST_DATA_DIR/large.log" - -# Profile dgrep -echo -e "${GREEN}=== Profiling dgrep ===${NC}" -run_profile "../dgrep" "simple_regex" "-plain -cfg none -regex 'user[0-9]+' $TEST_DATA_DIR/small.log" -# Use small file for faster profiling -# run_profile "../dgrep" "complex_regex" "-plain -cfg none -regex '\\d{4}-\\d{2}-\\d{2}.*login.*\\d{3}' $TEST_DATA_DIR/medium.log" -# run_profile "../dgrep" "with_context" "-plain -cfg none -regex 'login' -before 2 -after 2 $TEST_DATA_DIR/medium.log" - -# Profile dmap -echo -e "${GREEN}=== Profiling dmap ===${NC}" - -# Generate DTail default format test data for dmap -if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then - echo -e "${YELLOW}Generating DTail format test data for dmap${NC}" - echo " Command: Creating DTail format log file" - # Generate DTail default format log lines - for i in $(seq 1 1000); do - hostname="host$((i % 10))" - goroutines=$((40 + i % 40)) - cgocalls=$((i % 100)) - cpus=$((1 + i % 8)) - loadavg=$(printf "%.2f" $(echo "scale=2; $i % 100 / 100" | bc)) - uptime="${i}h0m0s" - connections=$((i % 10)) - lifetime=$((1000 + i)) - - echo "INFO|$(date +%m%d-%H%M%S)|1|stats.go:56|$cpus|$goroutines|$cgocalls|$loadavg|$uptime|MAPREDUCE:STATS|currentConnections=$connections|lifetimeConnections=$lifetime" - done > "$TEST_DATA_DIR/dtail_format.log" -fi - -# Profile dmap with DTail format -run_profile_dmap "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log" -run_profile_dmap "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log" -run_profile_dmap "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log" - -# Also test CSV format -echo -e "\n${YELLOW}Testing CSV format with dmap${NC}" -run_profile_dmap "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv" - -echo -echo -e "${GREEN}Profiling complete!${NC}" -echo - -# Analyze profiles -echo -e "${GREEN}=== Profile Analysis ===${NC}" -echo "Profile files generated in: $PROFILE_DIR" -echo - -# List recent profiles -echo "Recent CPU profiles:" -ls -lt "$PROFILE_DIR"/*_cpu_*.prof 2>/dev/null | head -5 || echo " No CPU profiles found" - -echo -echo "Recent memory profiles:" -ls -lt "$PROFILE_DIR"/*_mem_*.prof 2>/dev/null | head -5 || echo " No memory profiles found" - -echo -echo "Recent allocation profiles:" -ls -lt "$PROFILE_DIR"/*_alloc_*.prof 2>/dev/null | head -5 || echo " No allocation profiles found" - -echo -echo -e "${GREEN}To analyze a profile, use:${NC}" -echo " go tool pprof <profile_file>" -echo " ../profiling/profile.sh <profile_file>" -echo -echo -e "${GREEN}Examples:${NC}" -echo " # Interactive analysis" -echo " go tool pprof $PROFILE_DIR/dcat_cpu_*.prof" -echo -echo " # Generate flame graph" -echo " go tool pprof -http=:8080 $PROFILE_DIR/dcat_cpu_*.prof" -echo -echo " # Quick summary with dprofile" -echo " ../profiling/profile.sh $PROFILE_DIR/dcat_cpu_*.prof" -echo
\ No newline at end of file diff --git a/benchmarks/profile_dmap.sh b/benchmarks/profile_dmap.sh deleted file mode 100755 index 4983826..0000000 --- a/benchmarks/profile_dmap.sh +++ /dev/null @@ -1,155 +0,0 @@ -#!/bin/bash - -# Profile script specifically for dmap with MapReduce format data - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR" - -# Colors for output -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -RED='\033[0;31m' -NC='\033[0m' # No Color - -# Default values -PROFILE_DIR="${PROFILE_DIR:-profiles}" -TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}" - -# Create directories -mkdir -p "$PROFILE_DIR" -mkdir -p "$TEST_DATA_DIR" - -echo -e "${GREEN}DTail dmap Profiling${NC}" -echo "====================" -echo - -# Function to generate MapReduce format test data (generickv format) -generate_mapreduce_data() { - local filename=$1 - local lines=$2 - - if [ ! -f "$filename" ]; then - echo -e "${YELLOW}Generating MapReduce format test data: $filename${NC}" - echo " Command: Creating $filename with $lines lines (generickv format)" - - # Generate data in generickv format: field1=value1|field2=value2|... - for i in $(seq 1 $lines); do - hostname="host$((i % 10))" - # Simple timestamp generation without date command - hour=$((10 + (i / 3600) % 24)) - min=$(((i / 60) % 60)) - sec=$((i % 60)) - timestamp=$(printf "2024-01-01T%02d:%02d:%02d.000Z" $hour $min $sec) - goroutines=$((40 + i % 40)) - openFiles=$((100 + i % 50)) - connections=$((10 + i % 20)) - currentConnections=$((i % 10)) - lifetimeConnections=$((1000 + i)) - - echo "table=STATS|hostname=$hostname|timestamp=$timestamp|goroutines=$goroutines|openFiles=$openFiles|connections=$connections|currentConnections=$currentConnections|lifetimeConnections=$lifetimeConnections" >> "$filename" - done - fi -} - -# Generate test data in DTail default format instead -echo -e "${GREEN}Preparing MapReduce test data...${NC}" - -# Function to generate DTail default format test data -generate_dtail_format_data() { - local filename=$1 - local lines=$2 - - if [ ! -f "$filename" ]; then - echo -e "${YELLOW}Generating DTail default format test data: $filename${NC}" - echo " Command: Creating $filename with $lines lines (DTail default format)" - - # Generate DTail default format log lines - for i in $(seq 1 $lines); do - hostname="host$((i % 10))" - goroutines=$((40 + i % 40)) - cgocalls=$((i % 100)) - cpus=$((1 + i % 8)) - loadavg=$(printf "%.2f" $(echo "scale=2; $i % 100 / 100" | bc)) - uptime="${i}h0m0s" - connections=$((i % 10)) - lifetime=$((1000 + i)) - - # DTail default format: INFO|date-time|pid|caller|cpus|goroutines|cgocalls|loadavg|uptime|MAPREDUCE:STATS|key=value|... - echo "INFO|$(date +%m%d-%H%M%S)|1|stats.go:56|$cpus|$goroutines|$cgocalls|$loadavg|$uptime|MAPREDUCE:STATS|hostname=$hostname|currentConnections=$connections|lifetimeConnections=$lifetime" >> "$filename" - done - fi -} - -generate_dtail_format_data "$TEST_DATA_DIR/stats_small.log" 100 -generate_dtail_format_data "$TEST_DATA_DIR/stats_medium.log" 1000 - -# Build dmap -echo -e "${GREEN}Building commands...${NC}" -echo " Command: cd .. && make dmap" -cd .. -make dmap 2>/dev/null || true -cd "$SCRIPT_DIR" - -echo - -# Profile different dmap queries -echo -e "${GREEN}Profiling dmap queries...${NC}" - -# Query 1: Simple count -echo -e "\n${YELLOW}Query: Count by hostname${NC}" -QUERY="from STATS select count(\$line) group by hostname outfile $TEST_DATA_DIR/count_output.csv" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" -# Run dmap in background and interrupt after 3 seconds -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true - -# Query 2: Aggregations -echo -e "\n${YELLOW}Query: Sum and average${NC}" -QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname outfile $TEST_DATA_DIR/sum_avg_output.csv" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true - -# Query 3: Min/Max -echo -e "\n${YELLOW}Query: Min and max${NC}" -QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname outfile $TEST_DATA_DIR/min_max_output.csv" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & -DMAP_PID=$! -sleep 3 -kill -INT $DMAP_PID 2>/dev/null || true -wait $DMAP_PID 2>/dev/null || true - -echo -echo -e "${GREEN}Analyzing dmap profiles...${NC}" - -# Find and analyze latest dmap profiles -DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1) -if [ -n "$DMAP_CPU" ]; then - echo -e "\nCPU Profile: $(basename "$DMAP_CPU")" - ../profiling/profile.sh -top 5 "$DMAP_CPU" 2>/dev/null || echo " Analysis failed" -fi - -DMAP_MEM=$(ls -t "$PROFILE_DIR"/dmap_mem_*.prof 2>/dev/null | head -1) -if [ -n "$DMAP_MEM" ]; then - echo -e "\nMemory Profile: $(basename "$DMAP_MEM")" - ../profiling/profile.sh -top 5 "$DMAP_MEM" 2>/dev/null || echo " Analysis failed" -fi - -echo -echo -e "${GREEN}dmap profiling complete!${NC}" -echo -echo "To analyze profiles in detail:" -echo " go tool pprof $PROFILE_DIR/dmap_cpu_*.prof" -echo " go tool pprof -alloc_space $PROFILE_DIR/dmap_mem_*.prof" - -# Cleanup temporary output files -rm -f "$TEST_DATA_DIR"/*_output.csv
\ No newline at end of file diff --git a/benchmarks/profile_quick.sh b/benchmarks/profile_quick.sh deleted file mode 100755 index 1aa9425..0000000 --- a/benchmarks/profile_quick.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -# Quick profile script for dtail commands -# This runs profiling with smaller datasets for faster results - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR" - -# Colors for output -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Default values -PROFILE_DIR="${PROFILE_DIR:-profiles}" -TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}" - -# Create directories -mkdir -p "$PROFILE_DIR" -mkdir -p "$TEST_DATA_DIR" - -echo -e "${GREEN}DTail Quick Profiling${NC}" -echo "=====================" -echo - -# Generate test data if needed -if [ ! -f "$TEST_DATA_DIR/quick_test.log" ]; then - echo -e "${YELLOW}Generating test data...${NC}" - echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.log\" -format log" - go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.log" -format log - echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.csv\" -format csv" - go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.csv" -format csv -fi - -# Build commands -echo -e "${GREEN}Building commands...${NC}" -echo " Command: cd .. && make dcat dgrep dmap" -cd .. -make dcat dgrep dmap 2>/dev/null || true -cd "$SCRIPT_DIR" - -echo -echo -e "${GREEN}Running quick profiles...${NC}" - -# Profile dcat -echo -e "\n${YELLOW}Profiling dcat...${NC}" -echo "Command: ../dcat -profile -profiledir $PROFILE_DIR -plain -cfg none $TEST_DATA_DIR/quick_test.log" -../dcat -profile -profiledir "$PROFILE_DIR" -plain -cfg none "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1 -DCAT_CPU=$(ls -t "$PROFILE_DIR"/dcat_cpu_*.prof 2>/dev/null | head -1) -if [ -n "$DCAT_CPU" ]; then - echo " Generated: $(basename "$DCAT_CPU")" - echo " Analysis: ../profiling/profile.sh -top 3 $DCAT_CPU" - ../profiling/profile.sh -top 3 "$DCAT_CPU" | grep -A 5 "Top 3 functions" -fi - -# Profile dgrep -echo -e "\n${YELLOW}Profiling dgrep...${NC}" -echo "Command: ../dgrep -profile -profiledir $PROFILE_DIR -plain -cfg none -regex \"user[0-9]+\" $TEST_DATA_DIR/quick_test.log" -../dgrep -profile -profiledir "$PROFILE_DIR" -plain -cfg none -regex "user[0-9]+" "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1 -DGREP_CPU=$(ls -t "$PROFILE_DIR"/dgrep_cpu_*.prof 2>/dev/null | head -1) -if [ -n "$DGREP_CPU" ]; then - echo " Generated: $(basename "$DGREP_CPU")" - echo " Analysis: ../profiling/profile.sh -top 3 $DGREP_CPU" - ../profiling/profile.sh -top 3 "$DGREP_CPU" | grep -A 5 "Top 3 functions" -fi - -# Profile dmap -echo -e "\n${YELLOW}Profiling dmap...${NC}" -echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"select count(*) from $TEST_DATA_DIR/quick_test.csv\"" -../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "select count(*) from $TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1 -DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1) -if [ -n "$DMAP_CPU" ]; then - echo " Generated: $(basename "$DMAP_CPU")" - echo " Analysis: ../profiling/profile.sh -top 3 $DMAP_CPU" - ../profiling/profile.sh -top 3 "$DMAP_CPU" | grep -A 5 "Top 3 functions" -fi - -echo -echo -e "${GREEN}Quick profiling complete!${NC}" -echo -echo "To analyze in detail:" -echo " go tool pprof $PROFILE_DIR/<profile_file>" -echo " make profile-flamegraph PROFILE=$PROFILE_DIR/<profile_file>" -echo
\ No newline at end of file |
