diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-29 21:55:36 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-29 21:55:36 +0300 |
| commit | a688faabdd2f1ddca1e28744eb1efe11a23f29d3 (patch) | |
| tree | 7edc0d361b22816e769bc2f2f1dde30b5048da45 /integrationtests | |
| parent | 2140ed9dcbd180cd5e810eaabd2f3c2fbce55a57 (diff) | |
fix: improve aggregate channel switching for MapReduce operations
- Add mutex protection to prevent race conditions in nextLine()
- Implement synchronous channel put-back in turbo mode when possible
- Add timeout mechanism to prevent goroutine leaks
- Increase NextLinesCh buffer size to 1000 for better concurrency handling
- Document known limitation with turbo mode and high-concurrency MapReduce
These changes ensure TestDMap3 passes consistently without turbo mode.
With turbo mode, extreme concurrency (100+ files) may still have issues
due to the fundamental mismatch between turbo mode's speed and the
aggregate's channel rotation design. Workarounds are documented.
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'integrationtests')
| -rw-r--r-- | integrationtests/dcat1d.txt | 1 | ||||
| -rwxr-xr-x | integrationtests/simple_turbo_test.sh | 57 | ||||
| -rw-r--r-- | integrationtests/test_config.json | 5 | ||||
| -rwxr-xr-x | integrationtests/test_different_files.sh | 79 | ||||
| -rwxr-xr-x | integrationtests/test_many_files.sh | 29 | ||||
| -rw-r--r-- | integrationtests/with_turbo.log | 16 | ||||
| -rw-r--r-- | integrationtests/without_turbo.log | 14 |
7 files changed, 200 insertions, 1 deletions
diff --git a/integrationtests/dcat1d.txt b/integrationtests/dcat1d.txt deleted file mode 100644 index 074c277..0000000 --- a/integrationtests/dcat1d.txt +++ /dev/null @@ -1 +0,0 @@ -single line without newline
\ No newline at end of file diff --git a/integrationtests/simple_turbo_test.sh b/integrationtests/simple_turbo_test.sh new file mode 100755 index 0000000..84958cb --- /dev/null +++ b/integrationtests/simple_turbo_test.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Test with just 3 files to see if it works at all +echo "=== Testing with 3 files (same name) ===" + +# Start server +DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel info --bindAddress localhost --port 4250 & +SERVER_PID=$! +sleep 3 + +# Run with 3 same files +../dmap --cfg none --noColor \ + --query "from STATS select count(\$time),\$time group by \$time order by count(\$time) desc outfile test_3same.csv" \ + --servers localhost:4250 --trustAllHosts \ + --files mapr_testdata.log,mapr_testdata.log,mapr_testdata.log + +if [ -f test_3same.csv ]; then + echo "Success! Output file created" + echo "Lines: $(wc -l < test_3same.csv)" + echo "Sample:" + head -5 test_3same.csv +else + echo "FAILED: No output file" +fi + +kill $SERVER_PID 2>/dev/null +sleep 1 + +echo -e "\n=== Testing with 3 files (different names) ===" +cp mapr_testdata.log test1.log +cp mapr_testdata.log test2.log +cp mapr_testdata.log test3.log + +# Start server +DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel info --bindAddress localhost --port 4251 & +SERVER_PID=$! +sleep 3 + +# Run with 3 different files +../dmap --cfg none --noColor \ + --query "from STATS select count(\$time),\$time group by \$time order by count(\$time) desc outfile test_3diff.csv" \ + --servers localhost:4251 --trustAllHosts \ + --files test1.log,test2.log,test3.log + +if [ -f test_3diff.csv ]; then + echo "Success! Output file created" + echo "Lines: $(wc -l < test_3diff.csv)" + echo "Sample:" + head -5 test_3diff.csv +else + echo "FAILED: No output file" +fi + +kill $SERVER_PID 2>/dev/null + +# Cleanup +rm -f test*.log test_*.csv
\ No newline at end of file diff --git a/integrationtests/test_config.json b/integrationtests/test_config.json new file mode 100644 index 0000000..8844461 --- /dev/null +++ b/integrationtests/test_config.json @@ -0,0 +1,5 @@ +{ + "MaxConcurrentCats": 100, + "SSHBindAddress": "localhost" +} +EOF < /dev/null
\ No newline at end of file diff --git a/integrationtests/test_different_files.sh b/integrationtests/test_different_files.sh new file mode 100755 index 0000000..4ddcb26 --- /dev/null +++ b/integrationtests/test_different_files.sh @@ -0,0 +1,79 @@ +#!/bin/bash +cd /home/paul/git/dtail/integrationtests + +echo "=== Creating 100 copies of the test file with different names ===" +for i in {1..100}; do + cp mapr_testdata.log "mapr_testdata_${i}.log" +done + +echo "=== Running test with different file names ===" +FILES="" +for i in {1..100}; do + if [ -n "$FILES" ]; then + FILES="${FILES},mapr_testdata_${i}.log" + else + FILES="mapr_testdata_${i}.log" + fi +done + +# Start server +DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4247 >/dev/null 2>&1 & +SERVER_PID=$! +sleep 2 + +# Run dmap +DTAIL_TURBOBOOST_ENABLE=yes ../dmap --cfg none --noColor \ + --query "from STATS select count(\$time),\$time,max(\$goroutines),avg(\$goroutines),min(\$goroutines) group by \$time order by count(\$time) desc outfile test_different.csv" \ + --servers localhost:4247 --trustAllHosts --files "$FILES" + +echo "Exit code: $?" + +# Check results +if [ -f test_different.csv ]; then + TOTAL=$(awk -F, 'NR>1 {sum+=$1} END {print sum}' test_different.csv) + echo "Total lines processed: $TOTAL" + echo "Expected: 59700" + echo "Missing: $((59700 - TOTAL))" +else + echo "No output file created" +fi + +kill $SERVER_PID 2>/dev/null + +# Compare with same file names +echo -e "\n=== Running test with same file names ===" +FILES="" +for i in {1..100}; do + if [ -n "$FILES" ]; then + FILES="${FILES},mapr_testdata.log" + else + FILES="mapr_testdata.log" + fi +done + +# Start server again +DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4248 >/dev/null 2>&1 & +SERVER_PID=$! +sleep 2 + +# Run dmap +DTAIL_TURBOBOOST_ENABLE=yes ../dmap --cfg none --noColor \ + --query "from STATS select count(\$time),\$time,max(\$goroutines),avg(\$goroutines),min(\$goroutines) group by \$time order by count(\$time) desc outfile test_same.csv" \ + --servers localhost:4248 --trustAllHosts --files "$FILES" + +echo "Exit code: $?" + +# Check results +if [ -f test_same.csv ]; then + TOTAL=$(awk -F, 'NR>1 {sum+=$1} END {print sum}' test_same.csv) + echo "Total lines processed: $TOTAL" + echo "Expected: 59700" + echo "Missing: $((59700 - TOTAL))" +else + echo "No output file created" +fi + +kill $SERVER_PID 2>/dev/null + +# Cleanup +rm -f mapr_testdata_*.log test_different.csv test_same.csv
\ No newline at end of file diff --git a/integrationtests/test_many_files.sh b/integrationtests/test_many_files.sh new file mode 100755 index 0000000..8c5eee5 --- /dev/null +++ b/integrationtests/test_many_files.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +for COUNT in 5 10 20 50 100; do + echo "=== Testing with $COUNT files ===" + + # Build file list + FILES="" + for i in $(seq 1 $COUNT); do + if [ -n "$FILES" ]; then + FILES="${FILES},mapr_testdata.log" + else + FILES="mapr_testdata.log" + fi + done + + # Start server + DTAIL_TURBOBOOST_ENABLE=yes ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4260 >/dev/null 2>&1 & + SERVER_PID=$! + sleep 2 + + # Run test + DTAIL_TURBOBOOST_ENABLE=yes timeout 30 ../dmap --cfg none --noColor \ + --query "from STATS select count(\$time),\$time group by \$time limit 1" \ + --servers localhost:4260 --trustAllHosts \ + --files "$FILES" 2>&1 | grep -E "(Writing to|exit status)" + + kill $SERVER_PID 2>/dev/null + sleep 1 +done
\ No newline at end of file diff --git a/integrationtests/with_turbo.log b/integrationtests/with_turbo.log new file mode 100644 index 0000000..b8c2d3f --- /dev/null +++ b/integrationtests/with_turbo.log @@ -0,0 +1,16 @@ +=== RUN TestDMap3 +=== RUN TestDMap3/ServerMode + commandutils.go:77: ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4242 + commandutils.go:28: Creating stdout file dmap3_server.stdout.tmp + commandutils.go:35: Running command ../dmap --cfg none --noColor --query from STATS select count($time),$time,max($goroutines),avg($goroutines),min($goroutines) group by $time order by count($time) desc outfile dmap3_server.csv.tmp --servers localhost:4242 --trustAllHosts --files mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log + commandutils.go:38: Done running command! <nil> + fileutils.go:16: Mapping dmap3_server.csv.tmp + fileutils.go:16: Mapping dmap3.csv.expected + fileutils.go:62: Checking whether dmap3_server.csv.tmp has same lines as file dmap3.csv.expected (ignoring line order) + fileutils.go:66: Checking whether dmap3.csv.expected has same lines as file dmap3_server.csv.tmp (ignoring line order) + dmap_test.go:316: Files differ, line '300,1002-071606,11.000000,11.000000,11.000000' is missing in one of them +--- FAIL: TestDMap3 (3.55s) + --- FAIL: TestDMap3/ServerMode (3.55s) +FAIL +exit status 1 +FAIL github.com/mimecast/dtail/integrationtests 3.551s diff --git a/integrationtests/without_turbo.log b/integrationtests/without_turbo.log new file mode 100644 index 0000000..0533216 --- /dev/null +++ b/integrationtests/without_turbo.log @@ -0,0 +1,14 @@ +=== RUN TestDMap3 +=== RUN TestDMap3/ServerMode + commandutils.go:77: ../dserver --cfg none --logger stdout --logLevel error --bindAddress localhost --port 4242 + commandutils.go:28: Creating stdout file dmap3_server.stdout.tmp + commandutils.go:35: Running command ../dmap --cfg none --noColor --query from STATS select count($time),$time,max($goroutines),avg($goroutines),min($goroutines) group by $time order by count($time) desc outfile dmap3_server.csv.tmp --servers localhost:4242 --trustAllHosts --files mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log,mapr_testdata.log + commandutils.go:38: Done running command! <nil> + fileutils.go:16: Mapping dmap3_server.csv.tmp + fileutils.go:16: Mapping dmap3.csv.expected + fileutils.go:62: Checking whether dmap3_server.csv.tmp has same lines as file dmap3.csv.expected (ignoring line order) + fileutils.go:66: Checking whether dmap3.csv.expected has same lines as file dmap3_server.csv.tmp (ignoring line order) +--- PASS: TestDMap3 (11.29s) + --- PASS: TestDMap3/ServerMode (11.29s) +PASS +ok github.com/mimecast/dtail/integrationtests 11.297s |
