From 6491d425b98e62fb75a271bf34ad2686cd4e842c Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Thu, 26 Jun 2025 15:34:04 +0300 Subject: fix: handle dmap continuous execution in profiling framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dmap is designed to run continuously and report MapReduce results at intervals, which caused it to hang during profiling. Fixed by: - Added run_profile_dmap() function that runs dmap in background - Sends SIGINT after 3 seconds to cleanly exit dmap - Updated all dmap profiling calls to use the new function - Applied fix to both profile_benchmarks.sh and profile_dmap.sh This ensures dmap can be profiled successfully without timing out. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- benchmarks/profile_benchmarks.sh | 40 ++++++++++++++++++++++++++++++++++++---- benchmarks/profile_dmap.sh | 25 +++++++++++++++++++------ 2 files changed, 55 insertions(+), 10 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/profile_benchmarks.sh b/benchmarks/profile_benchmarks.sh index b0bcf64..976aaf7 100755 --- a/benchmarks/profile_benchmarks.sh +++ b/benchmarks/profile_benchmarks.sh @@ -69,6 +69,38 @@ run_profile() { echo } +# Special function for profiling dmap which runs continuously +run_profile_dmap() { + local cmd=$1 + local name=$2 + local args=$3 + + echo -e "${GREEN}Profiling $cmd - $name${NC}" + + for i in $(seq 1 $PROFILE_RUNS); do + echo " Run $i/$PROFILE_RUNS..." + echo " Command: $cmd -profile -profiledir $PROFILE_DIR $args (will interrupt after 3s)" + + # Run dmap in background, wait a bit for it to process, then interrupt it + $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 & + local pid=$! + + # Wait for dmap to process the file and generate initial results + sleep 3 + + # Send interrupt signal to make it exit cleanly + kill -INT $pid 2>/dev/null + wait $pid 2>/dev/null + + echo " Completed" + + # Small delay between runs + sleep 1 + done + + echo +} + # Generate test data echo -e "${GREEN}Preparing test data...${NC}" generate_test_data "1MB" "$TEST_DATA_DIR/small.log" @@ -132,13 +164,13 @@ if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then fi # Profile dmap with DTail format -run_profile "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log" -run_profile "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log" -run_profile "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log" +run_profile_dmap "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log" +run_profile_dmap "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log" +run_profile_dmap "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log" # Also test CSV format echo -e "\n${YELLOW}Testing CSV format with dmap${NC}" -run_profile "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv" +run_profile_dmap "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv" echo echo -e "${GREEN}Profiling complete!${NC}" diff --git a/benchmarks/profile_dmap.sh b/benchmarks/profile_dmap.sh index a3a1151..904c793 100755 --- a/benchmarks/profile_dmap.sh +++ b/benchmarks/profile_dmap.sh @@ -100,20 +100,33 @@ echo -e "${GREEN}Profiling dmap queries...${NC}" # Query 1: Simple count echo -e "\n${YELLOW}Query: Count by hostname${NC}" QUERY="from STATS select count(\$line) group by hostname outfile $TEST_DATA_DIR/count_output.csv" -echo "Command: timeout 10s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log" -timeout 10s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" +# Run dmap in background and interrupt after 3 seconds +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & +DMAP_PID=$! +sleep 3 +kill -INT $DMAP_PID 2>/dev/null +wait $DMAP_PID 2>/dev/null # Query 2: Aggregations echo -e "\n${YELLOW}Query: Sum and average${NC}" QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname outfile $TEST_DATA_DIR/sum_avg_output.csv" -echo "Command: timeout 10s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log" -timeout 10s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & +DMAP_PID=$! +sleep 3 +kill -INT $DMAP_PID 2>/dev/null +wait $DMAP_PID 2>/dev/null # Query 3: Min/Max echo -e "\n${YELLOW}Query: Min and max${NC}" QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname outfile $TEST_DATA_DIR/min_max_output.csv" -echo "Command: timeout 10s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log" -timeout 10s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 +echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)" +../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 & +DMAP_PID=$! +sleep 3 +kill -INT $DMAP_PID 2>/dev/null +wait $DMAP_PID 2>/dev/null echo echo -e "${GREEN}Analyzing dmap profiles...${NC}" -- cgit v1.2.3