summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-19 10:10:23 +0300
committerPaul Buetow <paul@buetow.org>2025-06-19 10:10:23 +0300
commitfdd68ef02bb17988631e11ad581df9b65ce65b81 (patch)
treeefd024056b5b34fc8ad8e5e42bbf5f91f53dc0fb /scripts
parent077bdbfe10e1f955e2a869516fde55be1bff371b (diff)
Implement line ending preservation and address CLAUDE comments
- Fix server-side line ending preservation in plain mode by updating basehandler to not add protocol delimiters, preserving original CRLF/LF line endings - Add comprehensive documentation to ProcessLine methods in all processors - Remove all CLAUDE comments and replace with proper function documentation - Update DCat test to include --quiet flag for cleaner server output - Clean up PGO script and report files from scripts directory - Improve code formatting and consistency across processor files 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/pgo.sh159
-rw-r--r--scripts/pgo_report.txt68
2 files changed, 0 insertions, 227 deletions
diff --git a/scripts/pgo.sh b/scripts/pgo.sh
deleted file mode 100755
index a590313..0000000
--- a/scripts/pgo.sh
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/bash
-
-# Performance Guided Optimization (PGO) script for dgrep
-# This script implements true PGO using Go's -pgo compiler flag:
-# 1. Build baseline version
-# 2. Generate CPU profile for training
-# 3. Rebuild with PGO using the profile
-# 4. Compare before/after performance
-
-set -e
-
-# Global variables
-setup_environment() {
- # Get the directory where this script is located
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- # Get the project root directory (parent of scripts)
- PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
-
- # Change to project root to run commands
- cd "$PROJECT_ROOT"
-
- # Define paths for all PGO files in scripts directory
- PGO_DIR="$SCRIPT_DIR"
- TEST_FILE="$PGO_DIR/test_100mb.txt"
- BASELINE_CPU_PROF="$PGO_DIR/pgo_baseline_cpu.prof"
- BASELINE_MEM_PROF="$PGO_DIR/pgo_baseline_mem.prof"
- TRAINING_PROF="$PGO_DIR/pgo_training.prof"
- OPTIMIZED_CPU_PROF="$PGO_DIR/pgo_optimized_cpu.prof"
- OPTIMIZED_MEM_PROF="$PGO_DIR/pgo_optimized_mem.prof"
- REPORT_FILE="$PGO_DIR/pgo_report.txt"
-
- echo "=== Starting Profile Guided Optimization (PGO) for dgrep ==="
- echo "Working directory: $PROJECT_ROOT"
- echo "PGO files location: $PGO_DIR"
-}
-
-create_test_file() {
- echo "1. Creating test file if needed..."
- if [ ! -f "$TEST_FILE" ]; then
- echo "Creating 100MB test file with 1M lines..."
- for i in $(seq 1 1000000); do
- echo "$i: This is a test line with INFO level logging and some extra content to make it realistic"
- done > "$TEST_FILE"
- fi
-}
-
-build_baseline() {
- echo "2. Building baseline version (without PGO)..."
- # Clean any existing binaries
- rm -f dgrep dgrep_pgo dcat dmap dtail dserver dtailhealth
- go build -tags '' -o dgrep ./cmd/dgrep/main.go
-}
-
-collect_training_data() {
- echo "3. Running baseline performance test and collecting training profile..."
- echo " - Generating baseline CPU and memory profiles..."
- ./dgrep --plain -regex "INFO" -files "$TEST_FILE" -cpuprofile "$BASELINE_CPU_PROF" -memprofile "$BASELINE_MEM_PROF" > /dev/null
-
- echo " - Collecting training profile for PGO..."
- ./dgrep --plain -regex "INFO" -files "$TEST_FILE" -cpuprofile "$TRAINING_PROF" > /dev/null
-}
-
-build_pgo_optimized() {
- echo "4. Building PGO-optimized version using training profile..."
- go build -tags '' -pgo="$TRAINING_PROF" -o dgrep_pgo ./cmd/dgrep/main.go
-}
-
-run_pgo_performance_test() {
- echo "5. Running PGO-optimized performance test..."
- echo " - Generating optimized CPU and memory profiles..."
- ./dgrep_pgo --plain -regex "INFO" -files "$TEST_FILE" -cpuprofile "$OPTIMIZED_CPU_PROF" -memprofile "$OPTIMIZED_MEM_PROF" > /dev/null
-}
-
-run_performance_comparison() {
- echo "6. Running performance comparison..."
- echo "=== PROFILE GUIDED OPTIMIZATION REPORT ===" > "$REPORT_FILE"
- echo "Generated: $(date)" >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
-
- echo "BASELINE (without PGO):" >> "$REPORT_FILE"
- echo "Baseline performance (5 iterations):" >> "$REPORT_FILE"
- for i in 1 2 3 4 5; do
- echo " Iteration $i:"
- { time ./dgrep --plain -regex "INFO" -files "$TEST_FILE" > /dev/null; } 2>&1 | grep real >> "$REPORT_FILE"
- done
-
- echo "" >> "$REPORT_FILE"
- echo "PGO-OPTIMIZED:" >> "$REPORT_FILE"
- echo "PGO-optimized performance (5 iterations):" >> "$REPORT_FILE"
- for i in 1 2 3 4 5; do
- echo " Iteration $i:"
- { time ./dgrep_pgo --plain -regex "INFO" -files "$TEST_FILE" > /dev/null; } 2>&1 | grep real >> "$REPORT_FILE"
- done
-}
-
-generate_detailed_analysis() {
- echo "7. Adding detailed profile analysis..."
- echo "" >> "$REPORT_FILE"
- echo "DETAILED ANALYSIS:" >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "Baseline CPU Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$BASELINE_CPU_PROF" | head -10 >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "PGO-Optimized CPU Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$OPTIMIZED_CPU_PROF" | head -10 >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "Baseline Memory Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$BASELINE_MEM_PROF" | head -10 >> "$REPORT_FILE"
- echo "" >> "$REPORT_FILE"
- echo "PGO-Optimized Memory Profile:" >> "$REPORT_FILE"
- go tool pprof -top "$OPTIMIZED_MEM_PROF" | head -10 >> "$REPORT_FILE"
-}
-
-cleanup() {
- echo "8. Cleaning up..."
- rm -f dgrep_pgo
-}
-
-show_summary() {
- echo "=== PGO Complete! ==="
- echo "Results saved to: $REPORT_FILE"
- echo "Profile files generated:"
- echo " - Baseline: $BASELINE_CPU_PROF, $BASELINE_MEM_PROF"
- echo " - Training: $TRAINING_PROF"
- echo " - Optimized: $OPTIMIZED_CPU_PROF, $OPTIMIZED_MEM_PROF"
- echo ""
- echo "Test file location: $TEST_FILE"
- echo ""
- echo "PGO Process:"
- echo " ✓ Built baseline version without PGO"
- echo " ✓ Collected CPU profile for training"
- echo " ✓ Rebuilt with Go's -pgo flag using training profile"
- echo " ✓ Compared baseline vs PGO-optimized performance"
- echo ""
-
- # Show performance comparison from report
- echo "=== Performance Comparison ==="
- echo "Check $REPORT_FILE for detailed before/after comparison"
- grep -A 20 "BASELINE (without PGO)" "$REPORT_FILE" | head -10
- echo "..."
- grep -A 20 "PGO-OPTIMIZED" "$REPORT_FILE" | head -10
-}
-
-# Main execution flow
-main() {
- setup_environment
- create_test_file
- build_baseline
- collect_training_data
- build_pgo_optimized
- run_pgo_performance_test
- run_performance_comparison
- generate_detailed_analysis
- cleanup
- show_summary
-}
-
-# Run the main function
-main "$@" \ No newline at end of file
diff --git a/scripts/pgo_report.txt b/scripts/pgo_report.txt
deleted file mode 100644
index d6e1a83..0000000
--- a/scripts/pgo_report.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-=== PROFILE GUIDED OPTIMIZATION REPORT ===
-Generated: Mon 16 Jun 23:18:37 EEST 2025
-
-BASELINE (without PGO):
-Baseline performance (5 iterations):
-real 0m3.040s
-real 0m3.029s
-real 0m3.032s
-real 0m3.030s
-real 0m3.031s
-
-PGO-OPTIMIZED:
-PGO-optimized performance (5 iterations):
-real 0m3.035s
-real 0m3.033s
-real 0m3.033s
-real 0m3.034s
-real 0m3.031s
-
-DETAILED ANALYSIS:
-
-Baseline CPU Profile:
-File: dgrep
-Build ID: c4f25989f74683061bfabfc72b383431d1aeeb23
-Type: cpu
-Time: 2025-06-16 23:17:42 EEST
-Duration: 3.20s, Total samples = 8.73s (272.51%)
-Showing nodes accounting for 7.32s, 83.85% of 8.73s total
-Dropped 174 nodes (cum <= 0.04s)
- flat flat% sum% cum cum%
- 2.23s 25.54% 25.54% 2.23s 25.54% internal/runtime/syscall.Syscall6
- 0.37s 4.24% 29.78% 1.01s 11.57% runtime.selectgo
-
-PGO-Optimized CPU Profile:
-File: dgrep_pgo
-Build ID: 106bf00e9fe2a0beaaf9b0e80a5e7e14aae84c40
-Type: cpu
-Time: 2025-06-16 23:18:34 EEST
-Duration: 3.11s, Total samples = 8.66s (278.78%)
-Showing nodes accounting for 7.41s, 85.57% of 8.66s total
-Dropped 152 nodes (cum <= 0.04s)
- flat flat% sum% cum cum%
- 2.17s 25.06% 25.06% 2.17s 25.06% internal/runtime/syscall.Syscall6
- 0.51s 5.89% 30.95% 1.31s 15.13% runtime.selectgo
-
-Baseline Memory Profile:
-File: dgrep
-Build ID: c4f25989f74683061bfabfc72b383431d1aeeb23
-Type: inuse_space
-Time: 2025-06-16 23:17:45 EEST
-Showing nodes accounting for 66.08MB, 100% of 66.08MB total
- flat flat% sum% cum cum%
- 33MB 49.94% 49.94% 60.84MB 92.06% time.NewTimer
- 27.83MB 42.12% 92.06% 27.83MB 42.12% time.newTimer
- 1.72MB 2.61% 94.67% 1.72MB 2.61% runtime/pprof.StartCPUProfile
- 1.50MB 2.27% 96.94% 1.50MB 2.27% runtime.allocm
-
-PGO-Optimized Memory Profile:
-File: dgrep_pgo
-Build ID: 106bf00e9fe2a0beaaf9b0e80a5e7e14aae84c40
-Type: inuse_space
-Time: 2025-06-16 23:18:37 EEST
-Showing nodes accounting for 80.57MB, 100% of 80.57MB total
- flat flat% sum% cum cum%
- 42.35MB 52.57% 52.57% 42.35MB 52.57% time.newTimer
- 32.50MB 40.34% 92.91% 74.86MB 92.91% time.NewTimer
- 2MB 2.49% 95.39% 2MB 2.49% runtime.allocm
- 1.16MB 1.44% 96.83% 1.16MB 1.44% runtime/pprof.StartCPUProfile