summaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 15:34:04 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 15:34:04 +0300
commit6491d425b98e62fb75a271bf34ad2686cd4e842c (patch)
tree9f0d4e6434c0eb35335bf8875e70ad6a8d84bd0c /benchmarks
parenta26d91c804b3d6c774c049868847b536d03aef1a (diff)
fix: handle dmap continuous execution in profiling framework
dmap is designed to run continuously and report MapReduce results at intervals, which caused it to hang during profiling. Fixed by: - Added run_profile_dmap() function that runs dmap in background - Sends SIGINT after 3 seconds to cleanly exit dmap - Updated all dmap profiling calls to use the new function - Applied fix to both profile_benchmarks.sh and profile_dmap.sh This ensures dmap can be profiled successfully without timing out. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'benchmarks')
-rwxr-xr-xbenchmarks/profile_benchmarks.sh40
-rwxr-xr-xbenchmarks/profile_dmap.sh25
2 files changed, 55 insertions, 10 deletions
diff --git a/benchmarks/profile_benchmarks.sh b/benchmarks/profile_benchmarks.sh
index b0bcf64..976aaf7 100755
--- a/benchmarks/profile_benchmarks.sh
+++ b/benchmarks/profile_benchmarks.sh
@@ -69,6 +69,38 @@ run_profile() {
echo
}
+# Special function for profiling dmap which runs continuously
+run_profile_dmap() {
+ local cmd=$1
+ local name=$2
+ local args=$3
+
+ echo -e "${GREEN}Profiling $cmd - $name${NC}"
+
+ for i in $(seq 1 $PROFILE_RUNS); do
+ echo " Run $i/$PROFILE_RUNS..."
+ echo " Command: $cmd -profile -profiledir $PROFILE_DIR $args (will interrupt after 3s)"
+
+ # Run dmap in background, wait a bit for it to process, then interrupt it
+ $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1 &
+ local pid=$!
+
+ # Wait for dmap to process the file and generate initial results
+ sleep 3
+
+ # Send interrupt signal to make it exit cleanly
+ kill -INT $pid 2>/dev/null
+ wait $pid 2>/dev/null
+
+ echo " Completed"
+
+ # Small delay between runs
+ sleep 1
+ done
+
+ echo
+}
+
# Generate test data
echo -e "${GREEN}Preparing test data...${NC}"
generate_test_data "1MB" "$TEST_DATA_DIR/small.log"
@@ -132,13 +164,13 @@ if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then
fi
# Profile dmap with DTail format
-run_profile "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log"
-run_profile "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log"
-run_profile "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log"
+run_profile_dmap "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log"
+run_profile_dmap "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log"
+run_profile_dmap "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log"
# Also test CSV format
echo -e "\n${YELLOW}Testing CSV format with dmap${NC}"
-run_profile "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv"
+run_profile_dmap "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv"
echo
echo -e "${GREEN}Profiling complete!${NC}"
diff --git a/benchmarks/profile_dmap.sh b/benchmarks/profile_dmap.sh
index a3a1151..904c793 100755
--- a/benchmarks/profile_dmap.sh
+++ b/benchmarks/profile_dmap.sh
@@ -100,20 +100,33 @@ echo -e "${GREEN}Profiling dmap queries...${NC}"
# Query 1: Simple count
echo -e "\n${YELLOW}Query: Count by hostname${NC}"
QUERY="from STATS select count(\$line) group by hostname outfile $TEST_DATA_DIR/count_output.csv"
-echo "Command: timeout 10s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
-timeout 10s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)"
+# Run dmap in background and interrupt after 3 seconds
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 &
+DMAP_PID=$!
+sleep 3
+kill -INT $DMAP_PID 2>/dev/null
+wait $DMAP_PID 2>/dev/null
# Query 2: Aggregations
echo -e "\n${YELLOW}Query: Sum and average${NC}"
QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname outfile $TEST_DATA_DIR/sum_avg_output.csv"
-echo "Command: timeout 10s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
-timeout 10s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)"
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 &
+DMAP_PID=$!
+sleep 3
+kill -INT $DMAP_PID 2>/dev/null
+wait $DMAP_PID 2>/dev/null
# Query 3: Min/Max
echo -e "\n${YELLOW}Query: Min and max${NC}"
QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname outfile $TEST_DATA_DIR/min_max_output.csv"
-echo "Command: timeout 10s ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
-timeout 10s ../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
+echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log (will interrupt after 3s)"
+../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10 &
+DMAP_PID=$!
+sleep 3
+kill -INT $DMAP_PID 2>/dev/null
+wait $DMAP_PID 2>/dev/null
echo
echo -e "${GREEN}Analyzing dmap profiles...${NC}"