diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-26 21:10:07 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-26 21:10:07 +0300 |
| commit | 513c70e297059822384140ee7e5939d20fd0bdc1 (patch) | |
| tree | d6619230b54c4956d138c17c43df0fc72bb6f71a /profiling | |
| parent | 4a657e44e7111d7d3b9a9ba5e453901e19af2ecb (diff) | |
refactor: move profiling scripts from benchmarks/ to profiling/
- Moved profile_benchmarks.sh, profile_dmap.sh, and profile_quick.sh
to the profiling/ directory where they belong
- Updated Makefile targets to reference new locations
- Fixed profile_dmap.sh to remove outfile clauses since they're not
needed for profiling and were preventing proper execution
- Updated .gitignore to exclude generated files in profiling/
This better separates benchmarking (performance comparison) from
profiling (performance analysis).
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'profiling')
| -rwxr-xr-x | profiling/profile_benchmarks.sh | 211 | ||||
| -rwxr-xr-x | profiling/profile_dmap.sh | 154 | ||||
| -rwxr-xr-x | profiling/profile_quick.sh | 86 |
3 files changed, 451 insertions, 0 deletions
diff --git a/profiling/profile_benchmarks.sh b/profiling/profile_benchmarks.sh new file mode 100755 index 0000000..6be86cd --- /dev/null +++ b/profiling/profile_benchmarks.sh @@ -0,0 +1,211 @@ +#!/bin/bash + +# Profile benchmarks script for dtail commands +# This script runs profiling on dcat, dgrep, and dmap with various workloads + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# Default values +PROFILE_DIR="${PROFILE_DIR:-profiles}" +TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}" +PROFILE_RUNS=1 + +# Create directories +mkdir -p "$PROFILE_DIR" +mkdir -p "$TEST_DATA_DIR" + +echo -e "${GREEN}DTail Profiling Framework${NC}" +echo "==========================" +echo + +# Function to generate test data +generate_test_data() { + local size=$1 + local filename=$2 + + if [ ! -f "$filename" ]; then + echo -e "${YELLOW}Generating test data: $filename (${size})${NC}" + # Use the standalone generator + echo " Command: go run cmd/generate_profile_data.go -size \"${size}\" -output \"$filename\" -format log" + go run cmd/generate_profile_data.go -size "${size}" -output "$filename" -format log + fi +} + +# Function to run profiling +run_profile() { + local cmd=$1 + local name=$2 + local args=$3 + + echo -e "${GREEN}Profiling $cmd - $name${NC}" + + for i in $(seq 1 $PROFILE_RUNS); do + echo " Run $i/$PROFILE_RUNS..." + echo " Command: timeout 30s $cmd -profile -profiledir $PROFILE_DIR $args" + + # Run with CPU and memory profiling with timeout + timeout 30s $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 + local exit_code=$? + + if [ $exit_code -eq 124 ]; then + echo -e " ${YELLOW}Warning: Run $i timed out after 30s${NC}" + elif [ $exit_code -ne 0 ]; then + echo -e " ${RED}Error: Run $i failed with exit code $exit_code${NC}" + fi + + # Small delay between runs + sleep 1 + done + + echo +} + +# Special function for profiling dmap which runs continuously +run_profile_dmap() { + local cmd=$1 + local name=$2 + local args=$3 + + echo -e "${GREEN}Profiling $cmd - $name${NC}" + + for i in $(seq 1 $PROFILE_RUNS); do + echo " Run $i/$PROFILE_RUNS..." + echo " Command: $cmd -profile -profiledir $PROFILE_DIR $args (will interrupt after 3s)" + + # Run dmap in background, wait a bit for it to process, then interrupt it + $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 & + local pid=$! + + # Wait for dmap to process the file and generate initial results + sleep 3 + + # Send interrupt signal to make it exit cleanly + # We expect this to return non-zero, so we ignore the exit code + kill -INT $pid 2>/dev/null || true + wait $pid 2>/dev/null || true + + echo " Completed" + + # Small delay between runs + sleep 1 + done + + echo +} + +# Generate test data +echo -e "${GREEN}Preparing test data...${NC}" +generate_test_data "1MB" "$TEST_DATA_DIR/small.log" +generate_test_data "10MB" "$TEST_DATA_DIR/medium.log" +# Skip large file for faster testing +# generate_test_data "1GB" "$TEST_DATA_DIR/large.log" + +# Generate CSV data for dmap (smaller size for faster processing) +if [ ! -f "$TEST_DATA_DIR/test.csv" ]; then + echo -e "${YELLOW}Generating CSV test data${NC}" + echo " Command: go run cmd/generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/test.csv\" -format csv" + go run cmd/generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/test.csv" -format csv +fi + +echo + +# Build commands +echo -e "${GREEN}Building commands...${NC}" +echo " Command: cd .. && make dcat dgrep dmap" +cd .. +make dcat dgrep dmap +cd "$SCRIPT_DIR" + +echo + +# Profile dcat +echo -e "${GREEN}=== Profiling dcat ===${NC}" +run_profile "../dcat" "small_file" "-plain -cfg none $TEST_DATA_DIR/small.log" +# Skip medium file for faster profiling +# run_profile "../dcat" "medium_file" "-plain -cfg none $TEST_DATA_DIR/medium.log" +# Skip large file for faster profiling - uncomment if needed +# run_profile "../dcat" "large_file" "-plain -cfg none $TEST_DATA_DIR/large.log" + +# Profile dgrep +echo -e "${GREEN}=== Profiling dgrep ===${NC}" +run_profile "../dgrep" "simple_regex" "-plain -cfg none -regex 'user[0-9]+' $TEST_DATA_DIR/small.log" +# Use small file for faster profiling +# run_profile "../dgrep" "complex_regex" "-plain -cfg none -regex '\\d{4}-\\d{2}-\\d{2}.*login.*\\d{3}' $TEST_DATA_DIR/medium.log" +# run_profile "../dgrep" "with_context" "-plain -cfg none -regex 'login' -before 2 -after 2 $TEST_DATA_DIR/medium.log" + +# Profile dmap +echo -e "${GREEN}=== Profiling dmap ===${NC}" + +# Generate DTail default format test data for dmap +if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then + echo -e "${YELLOW}Generating DTail format test data for dmap${NC}" + echo " Command: Creating DTail format log file" + # Generate DTail default format log lines + for i in $(seq 1 1000); do + hostname="host$((i % 10))" + goroutines=$((40 + i % 40)) + cgocalls=$((i % 100)) + cpus=$((1 + i % 8)) + loadavg=$(printf "%.2f" $(echo "scale=2; $i % 100 / 100" | bc)) + uptime="${i}h0m0s" + connections=$((i % 10)) + lifetime=$((1000 + i)) + + echo "INFO|$(date +%m%d-%H%M%S)|1|stats.go:56|$cpus|$goroutines|$cgocalls|$loadavg|$uptime|MAPREDUCE:STATS|currentConnections=$connections|lifetimeConnections=$lifetime" + done > "$TEST_DATA_DIR/dtail_format.log" +fi + +# Profile dmap with DTail format +run_profile_dmap "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log" +run_profile_dmap "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log" +run_profile_dmap "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log" + +# Also test CSV format +echo -e "\n${YELLOW}Testing CSV format with dmap${NC}" +run_profile_dmap "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv" + +echo +echo -e "${GREEN}Profiling complete!${NC}" +echo + +# Analyze profiles +echo -e "${GREEN}=== Profile Analysis ===${NC}" +echo "Profile files generated in: $PROFILE_DIR" +echo + +# List recent profiles +echo "Recent CPU profiles:" +ls -lt "$PROFILE_DIR"/*_cpu_*.prof 2>/dev/null | head -5 || echo " No CPU profiles found" + +echo +echo "Recent memory profiles:" +ls -lt "$PROFILE_DIR"/*_mem_*.prof 2>/dev/null | head -5 || echo " No memory profiles found" + +echo +echo "Recent allocation profiles:" +ls -lt "$PROFILE_DIR"/*_alloc_*.prof 2>/dev/null | head -5 || echo " No allocation profiles found" + +echo +echo -e "${GREEN}To analyze a profile, use:${NC}" +echo " go tool pprof <profile_file>" +echo " ../profiling/profile.sh <profile_file>" +echo +echo -e "${GREEN}Examples:${NC}" +echo " # Interactive analysis" +echo " go tool pprof $PROFILE_DIR/dcat_cpu_*.prof" +echo +echo " # Generate flame graph" +echo " go tool pprof -http=:8080 $PROFILE_DIR/dcat_cpu_*.prof" +echo +echo " # Quick summary with dprofile" +echo " ../profiling/profile.sh $PROFILE_DIR/dcat_cpu_*.prof" +echo
\ No newline at end of file diff --git a/profiling/profile_dmap.sh b/profiling/profile_dmap.sh new file mode 100755 index 0000000..1abf629 --- /dev/null +++ b/profiling/profile_dmap.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +# Profile script specifically for dmap with MapReduce format data + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# Default values +PROFILE_DIR="${PROFILE_DIR:-profiles}" +TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}" + +# Create directories +mkdir -p "$PROFILE_DIR" +mkdir -p "$TEST_DATA_DIR" + +echo -e "${GREEN}DTail dmap Profiling${NC}" +echo "====================" +echo + +# Function to generate MapReduce format test data (generickv format) +generate_mapreduce_data() { + local filename=$1 + local lines=$2 + + if [ ! -f "$filename" ]; then + echo -e "${YELLOW}Generating MapReduce format test data: $filename${NC}" + echo " Command: Creating $filename with $lines lines (generickv format)" + + # Generate data in generickv format: field1=value1|field2=value2|... + for i in $(seq 1 $lines); do + hostname="host$((i % 10))" + # Simple timestamp generation without date command + hour=$((10 + (i / 3600) % 24)) + min=$(((i / 60) % 60)) + sec=$((i % 60)) + timestamp=$(printf "2024-01-01T%02d:%02d:%02d.000Z" $hour $min $sec) + goroutines=$((40 + i % 40)) + openFiles=$((100 + i % 50)) + connections=$((10 + i % 20)) + currentConnections=$((i % 10)) + lifetimeConnections=$((1000 + i)) + + echo "table=STATS|hostname=$hostname|timestamp=$timestamp|goroutines=$goroutines|openFiles=$openFiles|connections=$connections|currentConnections=$currentConnections|lifetimeConnections=$lifetimeConnections" >> "$filename" + done + fi +} + +# Generate test data in DTail default format instead +echo -e "${GREEN}Preparing MapReduce test data...${NC}" + +# Function to generate DTail default format test data +generate_dtail_format_data() { + local filename=$1 + local lines=$2 + + if [ ! -f "$filename" ]; then + echo -e "${YELLOW}Generating DTail default format test data: $filename${NC}" + echo " Command: Creating $filename with $lines lines (DTail default format)" + + # Generate DTail default format log lines + for i in $(seq 1 $lines); do + hostname="host$((i % 10))" + goroutines=$((40 + i % 40)) + cgocalls=$((i % 100)) + cpus=$((1 + i % 8)) + loadavg=$(printf "%.2f" $(echo "scale=2; $i % 100 / 100" | bc)) + uptime="${i}h0m0s" + connections=$((i % 10)) + lifetime=$((1000 + i)) + + # DTail default format: INFO|date-time|pid|caller|cpus|goroutines|cgocalls|loadavg|uptime|MAPREDUCE:STATS|key=value|... + echo "INFO|$(date +%m%d-%H%M%S)|1|stats.go:56|$cpus|$goroutines|$cgocalls|$loadavg|$uptime|MAPREDUCE:STATS|hostname=$hostname|currentConnections=$connections|lifetimeConnections=$lifetime" >> "$filename" + done + fi +} + +generate_dtail_format_data "$TEST_DATA_DIR/stats_small.log" 100 +generate_dtail_format_data "$TEST_DATA_DIR/stats_medium.log" 1000 + +# Build dmap +echo -e "${GREEN}Building commands...${NC}" +echo " Command: cd .. && make dmap" +cd .. +make dmap 2>/dev/null || true +cd "$SCRIPT_DIR" + +echo + +# Profile different dmap queries +echo -e "${GREEN}Profiling dmap queries...${NC}" + +# Query 1: Simple count +echo -e "\n${YELLOW}Query: Count by hostname${NC}" +QUERY="from STATS select count(\$line) group by hostname" +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" +# Run dmap in background and interrupt after 3 seconds +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & +DMAP_PID=$! +sleep 3 +kill -INT $DMAP_PID 2>/dev/null || true +wait $DMAP_PID 2>/dev/null || true + +# Query 2: Aggregations +echo -e "\n${YELLOW}Query: Sum and average${NC}" +QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname" +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & +DMAP_PID=$! +sleep 3 +kill -INT $DMAP_PID 2>/dev/null || true +wait $DMAP_PID 2>/dev/null || true + +# Query 3: Min/Max +echo -e "\n${YELLOW}Query: Min and max${NC}" +QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname" +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & +DMAP_PID=$! +sleep 3 +kill -INT $DMAP_PID 2>/dev/null || true +wait $DMAP_PID 2>/dev/null || true + +echo +echo -e "${GREEN}Analyzing dmap profiles...${NC}" + +# Find and analyze latest dmap profiles +DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1) +if [ -n "$DMAP_CPU" ]; then + echo -e "\nCPU Profile: $(basename "$DMAP_CPU")" + ../profiling/profile.sh -top 5 "$DMAP_CPU" 2>/dev/null || echo " Analysis failed" +fi + +DMAP_MEM=$(ls -t "$PROFILE_DIR"/dmap_mem_*.prof 2>/dev/null | head -1) +if [ -n "$DMAP_MEM" ]; then + echo -e "\nMemory Profile: $(basename "$DMAP_MEM")" + ../profiling/profile.sh -top 5 "$DMAP_MEM" 2>/dev/null || echo " Analysis failed" +fi + +echo +echo -e "${GREEN}dmap profiling complete!${NC}" +echo +echo "To analyze profiles in detail:" +echo " go tool pprof $PROFILE_DIR/dmap_cpu_*.prof" +echo " go tool pprof -alloc_space $PROFILE_DIR/dmap_mem_*.prof" + +# No cleanup needed - no output files are created during profiling diff --git a/profiling/profile_quick.sh b/profiling/profile_quick.sh new file mode 100755 index 0000000..1aa9425 --- /dev/null +++ b/profiling/profile_quick.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Quick profile script for dtail commands +# This runs profiling with smaller datasets for faster results + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Default values +PROFILE_DIR="${PROFILE_DIR:-profiles}" +TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}" + +# Create directories +mkdir -p "$PROFILE_DIR" +mkdir -p "$TEST_DATA_DIR" + +echo -e "${GREEN}DTail Quick Profiling${NC}" +echo "=====================" +echo + +# Generate test data if needed +if [ ! -f "$TEST_DATA_DIR/quick_test.log" ]; then + echo -e "${YELLOW}Generating test data...${NC}" + echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.log\" -format log" + go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.log" -format log + echo " Command: go run generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.csv\" -format csv" + go run generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.csv" -format csv +fi + +# Build commands +echo -e "${GREEN}Building commands...${NC}" +echo " Command: cd .. && make dcat dgrep dmap" +cd .. +make dcat dgrep dmap 2>/dev/null || true +cd "$SCRIPT_DIR" + +echo +echo -e "${GREEN}Running quick profiles...${NC}" + +# Profile dcat +echo -e "\n${YELLOW}Profiling dcat...${NC}" +echo "Command: ../dcat -profile -profiledir $PROFILE_DIR -plain -cfg none $TEST_DATA_DIR/quick_test.log" +../dcat -profile -profiledir "$PROFILE_DIR" -plain -cfg none "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1 +DCAT_CPU=$(ls -t "$PROFILE_DIR"/dcat_cpu_*.prof 2>/dev/null | head -1) +if [ -n "$DCAT_CPU" ]; then + echo " Generated: $(basename "$DCAT_CPU")" + echo " Analysis: ../profiling/profile.sh -top 3 $DCAT_CPU" + ../profiling/profile.sh -top 3 "$DCAT_CPU" | grep -A 5 "Top 3 functions" +fi + +# Profile dgrep +echo -e "\n${YELLOW}Profiling dgrep...${NC}" +echo "Command: ../dgrep -profile -profiledir $PROFILE_DIR -plain -cfg none -regex \"user[0-9]+\" $TEST_DATA_DIR/quick_test.log" +../dgrep -profile -profiledir "$PROFILE_DIR" -plain -cfg none -regex "user[0-9]+" "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1 +DGREP_CPU=$(ls -t "$PROFILE_DIR"/dgrep_cpu_*.prof 2>/dev/null | head -1) +if [ -n "$DGREP_CPU" ]; then + echo " Generated: $(basename "$DGREP_CPU")" + echo " Analysis: ../profiling/profile.sh -top 3 $DGREP_CPU" + ../profiling/profile.sh -top 3 "$DGREP_CPU" | grep -A 5 "Top 3 functions" +fi + +# Profile dmap +echo -e "\n${YELLOW}Profiling dmap...${NC}" +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"select count(*) from $TEST_DATA_DIR/quick_test.csv\"" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "select count(*) from $TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1 +DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1) +if [ -n "$DMAP_CPU" ]; then + echo " Generated: $(basename "$DMAP_CPU")" + echo " Analysis: ../profiling/profile.sh -top 3 $DMAP_CPU" + ../profiling/profile.sh -top 3 "$DMAP_CPU" | grep -A 5 "Top 3 functions" +fi + +echo +echo -e "${GREEN}Quick profiling complete!${NC}" +echo +echo "To analyze in detail:" +echo " go tool pprof $PROFILE_DIR/<profile_file>" +echo " make profile-flamegraph PROFILE=$PROFILE_DIR/<profile_file>" +echo
\ No newline at end of file |
