summaryrefslogtreecommitdiff
path: root/integrationtests
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-29 21:55:36 +0300
committerPaul Buetow <paul@buetow.org>2025-06-29 21:55:36 +0300
commita688faabdd2f1ddca1e28744eb1efe11a23f29d3 (patch)
tree7edc0d361b22816e769bc2f2f1dde30b5048da45 /integrationtests
parent2140ed9dcbd180cd5e810eaabd2f3c2fbce55a57 (diff)
fix: improve aggregate channel switching for MapReduce operations
- Add mutex protection to prevent race conditions in nextLine() - Implement synchronous channel put-back in turbo mode when possible - Add timeout mechanism to prevent goroutine leaks - Increase NextLinesCh buffer size to 1000 for better concurrency handling - Document known limitation with turbo mode and high-concurrency MapReduce These changes ensure TestDMap3 passes consistently without turbo mode. With turbo mode, extreme concurrency (100+ files) may still have issues due to the fundamental mismatch between turbo mode's speed and the aggregate's channel rotation design. Workarounds are documented. Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'integrationtests')
-rw-r--r--integrationtests/dcat1d.txt1
-rwxr-xr-xintegrationtests/simple_turbo_test.sh57
-rw-r--r--integrationtests/test_config.json5
-rwxr-xr-xintegrationtests/test_different_files.sh79
-rwxr-xr-xintegrationtests/test_many_files.sh29
-rw-r--r--integrationtests/with_turbo.log16
-rw-r--r--integrationtests/without_turbo.log14
7 files changed, 200 insertions, 1 deletions
diff --git a/integrationtests/dcat1d.txt b/integrationtests/dcat1d.txt
deleted file mode 100644
index 074c277..0000000
--- a/integrationtests/dcat1d.txt
+++ /dev/null
@@ -1 +0,0 @@
-single line without newline \ No newline at end of file
diff --git a/integrationtests/simple_turbo_test.sh b/integrationtests/simple_turbo_test.sh
new file mode 100755
index 0000000..84958cb
--- /dev/null
+++ b/integrationtests/simple_turbo_test.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Test with just 3 files to see if it works at all
+echo "=== Testing with 3 files (same name) ==="
+
+# Start server
+DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel info --bindAddress localhost --port 4250 &
+SERVER_PID=$!
+sleep 3
+
+# Run with 3 same files
+../dmap --cfg none --noColor \
+ --query "from STATS select count(\$time),\$time group by \$time order by count(\$time) desc outfile test_3same.csv" \
+ --servers localhost:4250 --trustAllHosts \
+ --files mapr_testdata.log,mapr_testdata.log,mapr_testdata.log
+
+if [ -f test_3same.csv ]; then
+ echo "Success! Output file created"
+ echo "Lines: $(wc -l < test_3same.csv)"
+ echo "Sample:"
+ head -5 test_3same.csv
+else
+ echo "FAILED: No output file"
+fi
+
+kill $SERVER_PID 2>/dev/null
+sleep 1
+
+echo -e "\n=== Testing with 3 files (different names) ==="
+cp mapr_testdata.log test1.log
+cp mapr_testdata.log test2.log
+cp mapr_testdata.log test3.log
+
+# Start server
+DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel info --bindAddress localhost --port 4251 &
+SERVER_PID=$!
+sleep 3
+
+# Run with 3 different files
+../dmap --cfg none --noColor \
+ --query "from STATS select count(\$time),\$time group by \$time order by count(\$time) desc outfile test_3diff.csv" \
+ --servers localhost:4251 --trustAllHosts \
+ --files test1.log,test2.log,test3.log
+
+if [ -f test_3diff.csv ]; then
+ echo "Success! Output file created"
+ echo "Lines: $(wc -l < test_3diff.csv)"
+ echo "Sample:"
+ head -5 test_3diff.csv
+else
+ echo "FAILED: No output file"
+fi
+
+kill $SERVER_PID 2>/dev/null
+
+# Cleanup
+rm -f test*.log test_*.csv \ No newline at end of file
diff --git a/integrationtests/test_config.json b/integrationtests/test_config.json
new file mode 100644
index 0000000..8844461
--- /dev/null
+++ b/integrationtests/test_config.json
@@ -0,0 +1,5 @@
+{
+ "MaxConcurrentCats": 100,
+ "SSHBindAddress": "localhost"
+}
+EOF < /dev/null \ No newline at end of file
diff --git a/integrationtests/test_different_files.sh b/integrationtests/test_different_files.sh
new file mode 100755
index 0000000..4ddcb26
--- /dev/null
+++ b/integrationtests/test_different_files.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+cd /home/paul/git/dtail/integrationtests
+
+echo "=== Creating 100 copies of the test file with different names ==="
+for i in {1..100}; do
+ cp mapr_testdata.log "mapr_testdata_${i}.log"
+done
+
+echo "=== Running test with different file names ==="
+FILES=""
+for i in {1..100}; do
+ if [ -n "$FILES" ]; then
+ FILES="${FILES},mapr_testdata_${i}.log"
+ else
+ FILES="mapr_testdata_${i}.log"
+ fi
+done
+
+# Start server
+DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4247 >/dev/null 2>&1 &
+SERVER_PID=$!
+sleep 2
+
+# Run dmap
+DTAIL_TURBOBOOST_ENABLE=yes ../dmap --cfg none --noColor \
+ --query "from STATS select count(\$time),\$time,max(\$goroutines),avg(\$goroutines),min(\$goroutines) group by \$time order by count(\$time) desc outfile test_different.csv" \
+ --servers localhost:4247 --trustAllHosts --files "$FILES"
+
+echo "Exit code: $?"
+
+# Check results
+if [ -f test_different.csv ]; then
+ TOTAL=$(awk -F, 'NR>1 {sum+=$1} END {print sum}' test_different.csv)
+ echo "Total lines processed: $TOTAL"
+ echo "Expected: 59700"
+ echo "Missing: $((59700 - TOTAL))"
+else
+ echo "No output file created"
+fi
+
+kill $SERVER_PID 2>/dev/null
+
+# Compare with same file names
+echo -e "\n=== Running test with same file names ==="
+FILES=""
+for i in {1..100}; do
+ if [ -n "$FILES" ]; then
+ FILES="${FILES},mapr_testdata.log"
+ else
+ FILES="mapr_testdata.log"
+ fi
+done
+
+# Start server again
+DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4248 >/dev/null 2>&1 &
+SERVER_PID=$!
+sleep 2
+
+# Run dmap
+DTAIL_TURBOBOOST_ENABLE=yes ../dmap --cfg none --noColor \
+ --query "from STATS select count(\$time),\$time,max(\$goroutines),avg(\$goroutines),min(\$goroutines) group by \$time order by count(\$time) desc outfile test_same.csv" \
+ --servers localhost:4248 --trustAllHosts --files "$FILES"
+
+echo "Exit code: $?"
+
+# Check results
+if [ -f test_same.csv ]; then
+ TOTAL=$(awk -F, 'NR>1 {sum+=$1} END {print sum}' test_same.csv)
+ echo "Total lines processed: $TOTAL"
+ echo "Expected: 59700"
+ echo "Missing: $((59700 - TOTAL))"
+else
+ echo "No output file created"
+fi
+
+kill $SERVER_PID 2>/dev/null
+
+# Cleanup
+rm -f mapr_testdata_*.log test_different.csv test_same.csv \ No newline at end of file
diff --git a/integrationtests/test_many_files.sh b/integrationtests/test_many_files.sh
new file mode 100755
index 0000000..8c5eee5
--- /dev/null
+++ b/integrationtests/test_many_files.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+for COUNT in 5 10 20 50 100; do
+ echo "=== Testing with $COUNT files ==="
+
+ # Build file list
+ FILES=""
+ for i in $(seq 1 $COUNT); do
+ if [ -n "$FILES" ]; then
+ FILES="${FILES},mapr_testdata.log"
+ else
+ FILES="mapr_testdata.log"
+ fi
+ done
+
+ # Start server
+ DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4260 >/dev/null 2>&1 &
+ SERVER_PID=$!
+ sleep 2
+
+ # Run test
+ DTAIL_TURBOBOOST_ENABLE=yes timeout 30 ../dmap --cfg none --noColor \
+ --query "from STATS select count(\$time),\$time group by \$time limit 1" \
+ --servers localhost:4260 --trustAllHosts \
+ --files "$FILES" 2>&1 | grep -E "(Writing to|exit status)"
+
+ kill $SERVER_PID 2>/dev/null
+ sleep 1
+done \ No newline at end of file
diff --git a/integrationtests/with_turbo.log b/integrationtests/with_turbo.log
new file mode 100644
index 0000000..b8c2d3f
--- /dev/null
+++ b/integrationtests/with_turbo.log
@@ -0,0 +1,16 @@
+=== RUN TestDMap3
+=== RUN TestDMap3/ServerMode
+ commandutils.go:77: ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4242
+ commandutils.go:28: Creating stdout file dmap3_server.stdout.tmp
+ commandutils.go:35: Running command ../dmap --cfg none --noColor --query from STATS select count($time),$time,max($goroutines),avg($goroutines),min($goroutines) group by $time order by count($time) desc outfile dmap3_server.csv.tmp --servers localhost:4242 --trustAllHosts --files mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log
+ commandutils.go:38: Done running command! <nil>
+ fileutils.go:16: Mapping dmap3_server.csv.tmp
+ fileutils.go:16: Mapping dmap3.csv.expected
+ fileutils.go:62: Checking whether dmap3_server.csv.tmp has same lines as file dmap3.csv.expected (ignoring line order)
+ fileutils.go:66: Checking whether dmap3.csv.expected has same lines as file dmap3_server.csv.tmp (ignoring line order)
+ dmap_test.go:316: Files differ, line '300,1002-071606,11.000000,11.000000,11.000000' is missing in one of them
+--- FAIL: TestDMap3 (3.55s)
+ --- FAIL: TestDMap3/ServerMode (3.55s)
+FAIL
+exit status 1
+FAIL github.com/mimecast/dtail/integrationtests 3.551s
diff --git a/integrationtests/without_turbo.log b/integrationtests/without_turbo.log
new file mode 100644
index 0000000..0533216
--- /dev/null
+++ b/integrationtests/without_turbo.log
@@ -0,0 +1,14 @@
+=== RUN TestDMap3
+=== RUN TestDMap3/ServerMode
+ commandutils.go:77: ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4242
+ commandutils.go:28: Creating stdout file dmap3_server.stdout.tmp
+ commandutils.go:35: Running command ../dmap --cfg none --noColor --query from STATS select count($time),$time,max($goroutines),avg($goroutines),min($goroutines) group by $time order by count($time) desc outfile dmap3_server.csv.tmp --servers localhost:4242 --trustAllHosts --files mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log
+ commandutils.go:38: Done running command! <nil>
+ fileutils.go:16: Mapping dmap3_server.csv.tmp
+ fileutils.go:16: Mapping dmap3.csv.expected
+ fileutils.go:62: Checking whether dmap3_server.csv.tmp has same lines as file dmap3.csv.expected (ignoring line order)
+ fileutils.go:66: Checking whether dmap3.csv.expected has same lines as file dmap3_server.csv.tmp (ignoring line order)
+--- PASS: TestDMap3 (11.29s)
+ --- PASS: TestDMap3/ServerMode (11.29s)
+PASS
+ok github.com/mimecast/dtail/integrationtests 11.297s