summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 22:40:27 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 22:40:27 +0300
commitafba86489b00a2f5ac4d39b2853c2c51c2931536 (patch)
tree934ab9c6805dbc17bf71cd4b30f827c10a58a192
parent91e4743ce5ddab8f6359009bb45e243d4726bcdb (diff)
Remove bash scripts and update documentation to use dtail-tools
Following the successful refactoring to Go-based tooling, this commit: 1. Removes all obsolete bash scripts: - benchmarks/benchmark.sh - profiling/profile.sh - profiling/profile_benchmarks.sh - profiling/profile_dmap.sh - profiling/profile_quick.sh 2. Updates all documentation to use dtail-tools: - README.md: Updated benchmark commands to use dtail-tools - PROFILING.md: Updated profiling instructions to use dtail-tools 3. Updates Go code references: - profile_runner.go: Uses dtail-tools instead of profile.sh - profile_example.go: Uses dtail-tools for profile analysis The new dtail-tools provides all the functionality of the old bash scripts with better cross-platform compatibility, error handling, and maintainability. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
-rw-r--r--benchmarks/PROFILING.md76
-rw-r--r--benchmarks/README.md22
-rwxr-xr-xbenchmarks/benchmark.sh318
-rw-r--r--benchmarks/cmd/profile_example.go6
-rw-r--r--benchmarks/profile_runner.go23
-rwxr-xr-xprofiling/profile.sh210
-rwxr-xr-xprofiling/profile_benchmarks.sh179
-rwxr-xr-xprofiling/profile_dmap.sh142
-rwxr-xr-xprofiling/profile_quick.sh89
9 files changed, 66 insertions, 999 deletions
diff --git a/benchmarks/PROFILING.md b/benchmarks/PROFILING.md
index 04ed933..7925fb3 100644
--- a/benchmarks/PROFILING.md
+++ b/benchmarks/PROFILING.md
@@ -35,23 +35,20 @@ Each command now supports profiling flags:
### 3. Analyze Profiles
-Use the included `profile.sh` script for quick analysis:
+Use dtail-tools for quick analysis:
```bash
-# Analyze CPU profile
-./profiling/profile.sh profiles/dcat_cpu_20240101_120000.prof
-
-# Show top 20 functions
-./profiling/profile.sh -top 20 profiles/dgrep_mem_20240101_120000.prof
-
-# Sort by cumulative time/allocations
-./profiling/profile.sh -cum profiles/dmap_cpu_20240101_120000.prof
-
# List all profiles
-./profiling/profile.sh -list profiles/
+./dtail-tools profile -mode list
+
+# Analyze a specific profile
+./dtail-tools profile -mode analyze profiles/dcat_cpu_20240101_120000.prof
# Open web browser with flame graph
-./profiling/profile.sh -web profiles/dcat_cpu_*.prof
+./dtail-tools profile -mode analyze profiles/dcat_cpu_*.prof -web
+
+# You can also use go tool pprof directly:
+go tool pprof profiles/dcat_cpu_20240101_120000.prof
```
## Profiling Options
@@ -84,26 +81,26 @@ All dtail commands support these profiling flags:
## Using with Benchmarks
-### Automated Profiling Script
+### Automated Profiling
-Run the included profiling script:
+Run profiling using dtail-tools:
```bash
-cd benchmarks
-./profile_benchmarks.sh
+# Quick profiling with small datasets
+./dtail-tools profile -mode quick
+
+# Full profiling suite
+./dtail-tools profile -mode full
+
+# Profile dmap specifically (with MapReduce format)
+./dtail-tools profile -mode dmap
```
-This script:
+This tool:
- Generates test data of various sizes
-- Profiles dcat and dgrep with different workloads
+- Profiles dcat, dgrep, and dmap with different workloads
- Stores profiles in the `profiles` directory
-- Provides analysis commands
-
-For dmap profiling (requires MapReduce format):
-```bash
-cd benchmarks
-./profile_dmap.sh
-```
+- Provides immediate analysis of results
### Using Make Targets
@@ -111,13 +108,20 @@ cd benchmarks
# Quick profiling with immediate results
make profile-quick
-# Profile individual commands
-make profile-dcat
-make profile-dgrep
-make profile-dmap # Uses MapReduce format
+# Full profiling suite
+make profile-all
+
+# Profile dmap specifically
+make profile-dmap
+
+# List available profiles
+make profile-list
+
+# Analyze a specific profile
+make profile-analyze PROFILE=profiles/dcat_cpu_*.prof
-# Full automated profiling
-make profile-auto
+# Open web interface for profile
+make profile-web PROFILE=profiles/dcat_cpu_*.prof
```
### Benchmark Integration
@@ -172,19 +176,19 @@ go tool pprof -svg profiles/dgrep_mem_*.prof > profile.svg
go tool pprof -text profiles/dmap_alloc_*.prof > report.txt
```
-### Using profile.sh
+### Using dtail-tools profile
-The `profile.sh` script provides quick summaries:
+The dtail-tools profile command provides quick summaries:
```bash
# List all profiles
-./profiling/profile.sh -list profiles/
+./dtail-tools profile -mode list
# Analyze specific profile
-./profiling/profile.sh profiles/dcat_cpu_20240101_120000.prof
+./dtail-tools profile -mode analyze profiles/dcat_cpu_20240101_120000.prof
# Get help
-./profiling/profile.sh -help
+./dtail-tools profile -h
```
## Optimization Workflow
diff --git a/benchmarks/README.md b/benchmarks/README.md
index dfb2627..3494086 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -40,13 +40,13 @@ make benchmark-baseline
make benchmark-baseline-quick
# Create a baseline with a descriptive tag (no prompt)
-./benchmarks/benchmark.sh baseline --tag "before-optimization"
+./dtail-tools benchmark -mode baseline -tag "before-optimization"
# Create a baseline interactively (will prompt if no tag provided)
-./benchmarks/benchmark.sh baseline
+make benchmark-baseline
# Create a comprehensive baseline (3x iterations)
-./benchmarks/benchmark.sh full-baseline --tag "v1.0-release"
+./dtail-tools benchmark -mode baseline -iterations 3x -tag "v1.0-release"
```
### Comparing Performance
@@ -55,10 +55,10 @@ make benchmark-baseline-quick
make benchmark-compare BASELINE=benchmarks/baselines/baseline_20240125_143022.txt
# Use the benchmark script for more options
-./benchmarks/benchmark.sh compare benchmarks/baselines/baseline_20240125_143022.txt
+./dtail-tools benchmark -mode compare -baseline benchmarks/baselines/baseline_20240125_143022.txt
# List available baselines
-./benchmarks/benchmark.sh list
+./dtail-tools benchmark -mode list
```
### Specific Tool Benchmarks
@@ -183,10 +183,10 @@ make benchmark-baseline-quick
> Enter a descriptive name for this baseline: initial-performance-check
# Tagged baseline with description (no prompt)
-./benchmarks/benchmark.sh baseline --tag "before-refactoring"
+./dtail-tools benchmark -mode baseline -tag "before-refactoring"
# Full baseline with multiple iterations
-./benchmarks/benchmark.sh full-baseline --memory --tag "release-v2.0"
+./dtail-tools benchmark -mode baseline -iterations 3x -memory -tag "release-v2.0"
```
Baseline files are named with the pattern:
@@ -200,19 +200,19 @@ Compare current performance against a baseline to detect regressions or improvem
make benchmark-compare BASELINE=benchmarks/baselines/baseline_20240125_143022.txt
# Using benchmark script (provides benchstat analysis if available)
-./benchmarks/benchmark.sh compare benchmarks/baselines/baseline_20240125_143022.txt
+./dtail-tools benchmark -mode compare -baseline benchmarks/baselines/baseline_20240125_143022.txt
```
### Managing Baselines
```bash
# List all baselines
-./benchmarks/benchmark.sh list
+./dtail-tools benchmark -mode list
# View a specific baseline
-./benchmarks/benchmark.sh show benchmarks/baselines/baseline_20240125_143022.txt
+cat benchmarks/baselines/baseline_20240125_143022.txt
# Clean old baselines (keeps last 10)
-./benchmarks/benchmark.sh clean
+./dtail-tools benchmark -mode clean
```
### Best Practices for Baselines
diff --git a/benchmarks/benchmark.sh b/benchmarks/benchmark.sh
deleted file mode 100755
index 1b4a71f..0000000
--- a/benchmarks/benchmark.sh
+++ /dev/null
@@ -1,318 +0,0 @@
-#!/bin/bash
-# Benchmark management script for DTail
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-BASELINES_DIR="${SCRIPT_DIR}/baselines"
-TIMESTAMP=$(date +%Y%m%d_%H%M%S)
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-# Function to print usage
-usage() {
- cat << EOF
-DTail Benchmark Management Tool
-
-Usage: $0 [command] [options]
-
-Commands:
- baseline Create a new baseline snapshot
- compare [baseline] Compare current performance against a baseline
- list List available baselines
- show [baseline] Display a baseline file
- clean Remove old baselines (keeps last 10)
- full-baseline Create a comprehensive baseline (all benchmarks, 3x iterations)
-
-Options:
- -o, --output FILE Save benchmark output to custom file
- -t, --tag TAG Add a descriptive tag to baseline filename
- -q, --quick Run quick benchmarks only
- -m, --memory Include memory profiling
- -c, --cpu-profile Generate CPU profile
- -v, --verbose Show detailed output
-
-Examples:
- # Create a baseline before optimization
- $0 baseline --tag "before-optimization"
-
- # Compare current performance with baseline
- $0 compare benchmarks/baselines/baseline_20240125_143022_before-optimization.txt
-
- # Create full baseline with memory stats
- $0 full-baseline --memory --tag "v1.0-release"
-
-EOF
-}
-
-# Function to ensure baselines directory exists
-ensure_baselines_dir() {
- mkdir -p "$BASELINES_DIR"
-}
-
-# Function to create baseline
-create_baseline() {
- local tag=""
- local bench_args="-bench=."
- local output_file=""
- local memory_profile=""
-
- # Parse arguments
- while [[ $# -gt 0 ]]; do
- case $1 in
- -t|--tag)
- tag="_$2"
- shift 2
- ;;
- -q|--quick)
- bench_args="-bench=BenchmarkQuick"
- shift
- ;;
- -m|--memory)
- memory_profile="-benchmem"
- shift
- ;;
- -o|--output)
- output_file="$2"
- shift 2
- ;;
- *)
- shift
- ;;
- esac
- done
-
- # If no tag provided, ask for one
- if [ -z "$tag" ]; then
- echo -e "${YELLOW}Creating benchmark baseline...${NC}"
- read -p "Enter a descriptive name for this baseline (e.g. 'before-optimization', 'v1.0-release'): " tag_input
- if [ -z "$tag_input" ]; then
- echo -e "${RED}Error: Baseline name cannot be empty${NC}"
- exit 1
- fi
- # Clean the tag input
- tag="_$(echo "$tag_input" | tr ' ' '_' | tr -cd '[:alnum:]._-')"
- fi
-
- ensure_baselines_dir
-
- if [ -z "$output_file" ]; then
- output_file="${BASELINES_DIR}/baseline_${TIMESTAMP}${tag}.txt"
- fi
-
- echo -e "${GREEN}Creating baseline: ${output_file}${NC}"
- echo "Git commit: $(git rev-parse --short HEAD)" > "$output_file"
- echo "Date: $(date)" >> "$output_file"
- echo "Tag: ${tag#_}" >> "$output_file"
- echo "----------------------------------------" >> "$output_file"
-
- cd "$SCRIPT_DIR/.."
- make build
- go test $bench_args $memory_profile ./benchmarks -count=1 | tee -a "$output_file"
-
- echo -e "${GREEN}Baseline created: ${output_file}${NC}"
-}
-
-# Function to create full baseline
-create_full_baseline() {
- local tag=""
- local memory_profile=""
-
- # Parse arguments
- while [[ $# -gt 0 ]]; do
- case $1 in
- -t|--tag)
- tag="_$2"
- shift 2
- ;;
- -m|--memory)
- memory_profile="-benchmem"
- shift
- ;;
- *)
- shift
- ;;
- esac
- done
-
- # If no tag provided, ask for one
- if [ -z "$tag" ]; then
- echo -e "${YELLOW}Creating comprehensive benchmark baseline...${NC}"
- read -p "Enter a descriptive name for this baseline (e.g. 'before-optimization', 'v1.0-release'): " tag_input
- if [ -z "$tag_input" ]; then
- echo -e "${RED}Error: Baseline name cannot be empty${NC}"
- exit 1
- fi
- # Clean the tag input
- tag="_$(echo "$tag_input" | tr ' ' '_' | tr -cd '[:alnum:]._-')"
- fi
-
- ensure_baselines_dir
-
- local output_file="${BASELINES_DIR}/baseline_${TIMESTAMP}${tag}_full.txt"
-
- echo -e "${GREEN}Creating comprehensive baseline: ${output_file}${NC}"
- echo "Git commit: $(git rev-parse --short HEAD)" > "$output_file"
- echo "Date: $(date)" >> "$output_file"
- echo "Tag: ${tag#_} (full)" >> "$output_file"
- echo "----------------------------------------" >> "$output_file"
-
- cd "$SCRIPT_DIR/.."
- make build
-
- # Run with multiple iterations for stability
- go test -bench=. $memory_profile -benchtime=3x ./benchmarks -count=1 | tee -a "$output_file"
-
- echo -e "${GREEN}Full baseline created: ${output_file}${NC}"
-}
-
-# Function to compare with baseline
-compare_baseline() {
- local baseline_file="$1"
-
- if [ -z "$baseline_file" ]; then
- echo -e "${RED}Error: No baseline file specified${NC}"
- echo "Available baselines:"
- list_baselines
- exit 1
- fi
-
- if [ ! -f "$baseline_file" ]; then
- echo -e "${RED}Error: Baseline file not found: $baseline_file${NC}"
- exit 1
- fi
-
- ensure_baselines_dir
- local current_file="${BASELINES_DIR}/current_${TIMESTAMP}.txt"
-
- echo -e "${YELLOW}Running current benchmarks...${NC}"
- echo "Git commit: $(git rev-parse --short HEAD)" > "$current_file"
- echo "Date: $(date)" >> "$current_file"
- echo "----------------------------------------" >> "$current_file"
-
- cd "$SCRIPT_DIR/.."
- make build
- go test -bench=. -benchmem ./benchmarks -count=1 | tee -a "$current_file"
-
- echo -e "\n${YELLOW}=== Performance Comparison ===${NC}"
-
- # Use benchstat if available
- if command -v benchstat >/dev/null 2>&1; then
- benchstat "$baseline_file" "$current_file"
- else
- echo -e "${YELLOW}benchstat not found. Install with:${NC}"
- echo " go install golang.org/x/perf/cmd/benchstat@latest"
- echo -e "\n${YELLOW}Showing simple comparison:${NC}"
-
- # Extract benchmark results for comparison
- echo -e "\nBaseline ($(basename "$baseline_file")):"
- grep "^Benchmark" "$baseline_file" | head -10
-
- echo -e "\nCurrent:"
- grep "^Benchmark" "$current_file" | head -10
- fi
-
- # Save comparison report
- local report_file="${BASELINES_DIR}/comparison_${TIMESTAMP}.txt"
- {
- echo "Comparison Report"
- echo "================"
- echo "Baseline: $baseline_file"
- echo "Current: $current_file"
- echo "Date: $(date)"
- echo ""
- if command -v benchstat >/dev/null 2>&1; then
- benchstat "$baseline_file" "$current_file"
- else
- diff -u "$baseline_file" "$current_file" || true
- fi
- } > "$report_file"
-
- echo -e "\n${GREEN}Comparison report saved: $report_file${NC}"
-}
-
-# Function to list baselines
-list_baselines() {
- ensure_baselines_dir
-
- echo -e "${YELLOW}Available baselines:${NC}"
- if [ -d "$BASELINES_DIR" ]; then
- ls -la "$BASELINES_DIR"/*.txt 2>/dev/null | awk '{print $9, $6, $7, $8}' | column -t || echo "No baselines found"
- else
- echo "No baselines found"
- fi
-}
-
-# Function to show baseline content
-show_baseline() {
- local baseline_file="$1"
-
- if [ -z "$baseline_file" ]; then
- echo -e "${RED}Error: No baseline file specified${NC}"
- list_baselines
- exit 1
- fi
-
- if [ ! -f "$baseline_file" ]; then
- echo -e "${RED}Error: Baseline file not found: $baseline_file${NC}"
- exit 1
- fi
-
- less "$baseline_file"
-}
-
-# Function to clean old baselines
-clean_baselines() {
- ensure_baselines_dir
-
- echo -e "${YELLOW}Cleaning old baselines (keeping last 10)...${NC}"
-
- # Count files
- local file_count=$(ls -1 "$BASELINES_DIR"/*.txt 2>/dev/null | wc -l)
-
- if [ "$file_count" -gt 10 ]; then
- # Remove oldest files, keeping last 10
- ls -t "$BASELINES_DIR"/*.txt | tail -n +11 | xargs rm -v
- echo -e "${GREEN}Cleanup complete${NC}"
- else
- echo "No cleanup needed (only $file_count baselines found)"
- fi
-}
-
-# Main command handling
-case "${1:-}" in
- baseline)
- shift
- create_baseline "$@"
- ;;
- full-baseline)
- shift
- create_full_baseline "$@"
- ;;
- compare)
- shift
- compare_baseline "$@"
- ;;
- list)
- list_baselines
- ;;
- show)
- shift
- show_baseline "$@"
- ;;
- clean)
- clean_baselines
- ;;
- -h|--help|help)
- usage
- ;;
- *)
- echo -e "${RED}Error: Unknown command '${1:-}'${NC}"
- usage
- exit 1
- ;;
-esac \ No newline at end of file
diff --git a/benchmarks/cmd/profile_example.go b/benchmarks/cmd/profile_example.go
index f996565..01d4a66 100644
--- a/benchmarks/cmd/profile_example.go
+++ b/benchmarks/cmd/profile_example.go
@@ -219,9 +219,9 @@ func analyzeProfiles() {
fmt.Printf("\nAnalyzing %s CPU profile:\n", tool)
- // Run profile.sh
- cmd := exec.Command("../profiling/profile.sh",
- "-top", "5",
+ // Run dtail-tools profile analyze
+ cmd := exec.Command("../dtail-tools",
+ "profile", "-mode", "analyze",
latestProfile)
output, err := cmd.CombinedOutput()
diff --git a/benchmarks/profile_runner.go b/benchmarks/profile_runner.go
index 2da122b..e7934dd 100644
--- a/benchmarks/profile_runner.go
+++ b/benchmarks/profile_runner.go
@@ -157,34 +157,35 @@ func ProfileBenchmark(b *testing.B, name string, tool string, args ...string) {
b.Logf("Allocation profile: %s", result.AllocProfile)
}
- // Analyze profiles if profile.sh is available
- dprofilePath := filepath.Join("..", "profiling", "profile.sh")
- if _, err := os.Stat(dprofilePath); err == nil {
+ // Analyze profiles using dtail-tools
+ dtailToolsPath := filepath.Join("..", "dtail-tools")
+ if _, err := os.Stat(dtailToolsPath); err == nil {
if result.CPUProfile != "" {
- analyzeProfile(b, dprofilePath, result.CPUProfile, "CPU")
+ analyzeProfileWithTools(b, dtailToolsPath, result.CPUProfile, "CPU")
}
if result.MemProfile != "" {
- analyzeProfile(b, dprofilePath, result.MemProfile, "Memory")
+ analyzeProfileWithTools(b, dtailToolsPath, result.MemProfile, "Memory")
}
}
})
}
-// analyzeProfile runs profile.sh on a profile file
-func analyzeProfile(b *testing.B, dprofilePath, profilePath, profileType string) {
+// analyzeProfileWithTools runs dtail-tools profile analyze on a profile file
+func analyzeProfileWithTools(b *testing.B, dtailToolsPath, profilePath, profileType string) {
b.Logf("\n%s Profile Analysis:", profileType)
- cmd := exec.Command(dprofilePath, "-top", "5", profilePath)
+ cmd := exec.Command(dtailToolsPath, "profile", "-mode", "analyze", profilePath)
output, err := cmd.CombinedOutput()
if err != nil {
b.Logf("Failed to analyze profile: %v", err)
return
}
- // Print top functions
+ // Print analysis output
lines := strings.Split(string(output), "\n")
- for _, line := range lines {
- if strings.Contains(line, "%") || strings.Contains(line, "Top") {
+ // Print first 10 lines of analysis
+ for i, line := range lines {
+ if i < 10 && line != "" {
b.Log(line)
}
}
diff --git a/profiling/profile.sh b/profiling/profile.sh
deleted file mode 100755
index d0be9e2..0000000
--- a/profiling/profile.sh
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/bin/bash
-
-# dprofile - Simple profile analysis script for dtail
-# A lightweight wrapper around go tool pprof
-
-set -e
-
-# Colors
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Default values
-TOP_N=10
-SORT_BY="flat"
-LIST_MODE=false
-PROFILE_PATH=""
-
-# Usage function
-usage() {
- echo "dprofile - Analyze pprof profiles"
- echo ""
- echo "Usage:"
- echo " dprofile <profile> # Analyze a profile"
- echo " dprofile -list [directory] # List profiles in directory"
- echo " dprofile -top N <profile> # Show top N functions (default: 10)"
- echo " dprofile -cum <profile> # Sort by cumulative value"
- echo " dprofile -web <profile> # Open web browser with flame graph"
- echo " dprofile -text <profile> # Full text report"
- echo " dprofile -help # Show this help"
- echo ""
- echo "Examples:"
- echo " dprofile profiles/dcat_cpu_*.prof"
- echo " dprofile -top 20 -cum profiles/dgrep_mem_*.prof"
- echo " dprofile -list profiles/"
- echo " dprofile -web profiles/dmap_cpu_*.prof"
- exit 1
-}
-
-# Parse arguments
-while [[ $# -gt 0 ]]; do
- case $1 in
- -help|--help|-h)
- usage
- ;;
- -list|--list)
- LIST_MODE=true
- shift
- if [[ $# -gt 0 && ! "$1" =~ ^- ]]; then
- PROFILE_DIR="$1"
- shift
- else
- PROFILE_DIR="."
- fi
- ;;
- -top|--top)
- shift
- TOP_N="$1"
- shift
- ;;
- -cum|--cum)
- SORT_BY="cum"
- shift
- ;;
- -web|--web)
- shift
- if [[ $# -eq 0 ]]; then
- echo "Error: -web requires a profile file"
- exit 1
- fi
- echo -e "${GREEN}Opening web browser for $1...${NC}"
- echo "Press Ctrl+C to stop the server"
- exec go tool pprof -http=:8080 "$1"
- ;;
- -text|--text)
- shift
- if [[ $# -eq 0 ]]; then
- echo "Error: -text requires a profile file"
- exit 1
- fi
- exec go tool pprof -text "$1"
- ;;
- -*)
- echo "Unknown option: $1"
- usage
- ;;
- *)
- PROFILE_PATH="$1"
- shift
- ;;
- esac
-done
-
-# List mode
-if $LIST_MODE; then
- echo -e "${GREEN}Profile files in $PROFILE_DIR:${NC}"
- echo ""
-
- # Group by tool and type
- declare -A profiles
-
- for file in "$PROFILE_DIR"/*.prof; do
- if [[ -f "$file" ]]; then
- basename=$(basename "$file")
- # Extract tool and type (e.g., dcat_cpu -> "dcat cpu")
- if [[ $basename =~ ^([^_]+)_([^_]+)_.*\.prof$ ]]; then
- tool="${BASH_REMATCH[1]}"
- type="${BASH_REMATCH[2]}"
- key="$tool:$type"
-
- if [[ -z "${profiles[$key]}" ]]; then
- profiles[$key]="$file"
- else
- profiles[$key]="${profiles[$key]}|$file"
- fi
- fi
- fi
- done
-
- # Display grouped profiles
- current_tool=""
- for key in $(echo "${!profiles[@]}" | tr ' ' '\n' | sort); do
- IFS=':' read -r tool type <<< "$key"
-
- if [[ "$tool" != "$current_tool" ]]; then
- [[ -n "$current_tool" ]] && echo
- echo -e "${YELLOW}$tool profiles:${NC}"
- current_tool="$tool"
- fi
-
- echo " $type:"
- IFS='|' read -ra files <<< "${profiles[$key]}"
- for file in "${files[@]}"; do
- size=$(ls -lh "$file" 2>/dev/null | awk '{print $5}')
- timestamp=$(basename "$file" | grep -oE '[0-9]{8}_[0-9]{6}' || echo "unknown")
- echo " $(basename "$file") ($size) - $timestamp"
- done
- done
-
- [[ -z "$current_tool" ]] && echo " No profile files found in $PROFILE_DIR"
- exit 0
-fi
-
-# Check if profile path provided
-if [[ -z "$PROFILE_PATH" ]]; then
- usage
-fi
-
-# Check if file exists
-if [[ ! -f "$PROFILE_PATH" ]]; then
- echo -e "${RED}Error: Profile file not found: $PROFILE_PATH${NC}"
- exit 1
-fi
-
-# Detect profile type
-PROFILE_TYPE="unknown"
-if go tool pprof -raw "$PROFILE_PATH" 2>/dev/null | grep -q "samples/count"; then
- PROFILE_TYPE="cpu"
-elif go tool pprof -raw "$PROFILE_PATH" 2>/dev/null | grep -q "alloc_space"; then
- PROFILE_TYPE="memory"
-elif go tool pprof -raw "$PROFILE_PATH" 2>/dev/null | grep -q "inuse_space"; then
- PROFILE_TYPE="memory"
-fi
-
-# Analyze profile
-echo -e "${GREEN}Profile Analysis: $PROFILE_PATH${NC}"
-echo "Type: $PROFILE_TYPE"
-echo ""
-
-# Get top functions
-echo "Top $TOP_N functions (sorted by $SORT_BY):"
-echo "================================================================"
-
-# Use different flags based on sort order
-if [[ "$SORT_BY" == "cum" ]]; then
- echo "# Command: go tool pprof -top -cum -nodecount=$TOP_N $PROFILE_PATH"
- go tool pprof -top -cum -nodecount="$TOP_N" "$PROFILE_PATH" 2>/dev/null | \
- grep -E "^[[:space:]]*[0-9]+" | head -n "$TOP_N" || true
-else
- echo "# Command: go tool pprof -top -nodecount=$TOP_N $PROFILE_PATH"
- go tool pprof -top -nodecount="$TOP_N" "$PROFILE_PATH" 2>/dev/null | \
- grep -E "^[[:space:]]*[0-9]+" | head -n "$TOP_N" || true
-fi
-
-echo ""
-
-# Provide helpful tips based on profile type
-if [[ "$PROFILE_TYPE" == "cpu" ]]; then
- echo -e "${YELLOW}CPU Profile Tips:${NC}"
- echo "- flat: time spent in the function itself"
- echo "- cum: time spent in the function and its callees"
- echo "- Focus on functions with high flat% for optimization"
- echo ""
- echo "Interactive exploration:"
- echo " go tool pprof $PROFILE_PATH"
- echo ""
- echo "Generate flame graph:"
- echo " dprofile -web $PROFILE_PATH"
-elif [[ "$PROFILE_TYPE" == "memory" ]]; then
- echo -e "${YELLOW}Memory Profile Tips:${NC}"
- echo "- Shows memory allocations by function"
- echo "- Focus on unexpected allocations in hot paths"
- echo ""
- echo "View all allocations:"
- echo " go tool pprof -alloc_space $PROFILE_PATH"
- echo ""
- echo "View in-use memory:"
- echo " go tool pprof -inuse_space $PROFILE_PATH"
-fi \ No newline at end of file
diff --git a/profiling/profile_benchmarks.sh b/profiling/profile_benchmarks.sh
deleted file mode 100755
index 78cc587..0000000
--- a/profiling/profile_benchmarks.sh
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/bin/bash
-
-# Profile benchmarks script for dtail commands
-# This script runs profiling on dcat, dgrep, and dmap with various workloads
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
-
-# Colors for output
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Default values
-PROFILE_DIR="${PROFILE_DIR:-profiles}"
-TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}"
-PROFILE_RUNS=1
-
-# Create directories
-mkdir -p "$PROFILE_DIR"
-mkdir -p "$TEST_DATA_DIR"
-
-echo -e "${GREEN}DTail Profiling Framework${NC}"
-echo "=========================="
-echo
-
-# Function to generate test data
-generate_test_data() {
- local size=$1
- local filename=$2
-
- if [ ! -f "$filename" ]; then
- echo -e "${YELLOW}Generating test data: $filename (${size})${NC}"
- # Use the standalone generator
- echo " Command: go run ../benchmarks/cmd/generate_profile_data.go -size \"${size}\" -output \"$filename\" -format log"
- go run ../benchmarks/cmd/generate_profile_data.go -size "${size}" -output "$filename" -format log
- fi
-}
-
-# Function to run profiling
-run_profile() {
- local cmd=$1
- local name=$2
- local args=$3
-
- echo -e "${GREEN}Profiling $cmd - $name${NC}"
-
- for i in $(seq 1 $PROFILE_RUNS); do
- echo " Run $i/$PROFILE_RUNS..."
- echo " Command: timeout 30s $cmd -profile -profiledir $PROFILE_DIR $args"
-
- # Run with CPU and memory profiling with timeout
- timeout 30s $cmd -profile -profiledir "$PROFILE_DIR" $args > /dev/null 2>&1
- local exit_code=$?
-
- if [ $exit_code -eq 124 ]; then
- echo -e " ${YELLOW}Warning: Run $i timed out after 30s${NC}"
- elif [ $exit_code -ne 0 ]; then
- echo -e " ${RED}Error: Run $i failed with exit code $exit_code${NC}"
- fi
-
- # Small delay between runs
- sleep 1
- done
-
- echo
-}
-
-
-# Generate test data
-echo -e "${GREEN}Preparing test data...${NC}"
-generate_test_data "10MB" "$TEST_DATA_DIR/small.log"
-generate_test_data "100MB" "$TEST_DATA_DIR/medium.log"
-# Skip large file for faster testing
-# generate_test_data "1GB" "$TEST_DATA_DIR/large.log"
-
-# Generate CSV data for dmap (smaller size for faster processing)
-if [ ! -f "$TEST_DATA_DIR/test.csv" ]; then
- echo -e "${YELLOW}Generating CSV test data${NC}"
- echo " Command: go run ../benchmarks/cmd/generate_profile_data.go -size \"50MB\" -output \"$TEST_DATA_DIR/test.csv\" -format csv"
- go run ../benchmarks/cmd/generate_profile_data.go -size "50MB" -output "$TEST_DATA_DIR/test.csv" -format csv
-fi
-
-echo
-
-# Build commands
-echo -e "${GREEN}Building commands...${NC}"
-echo " Command: cd .. && make dcat dgrep dmap"
-cd ..
-make dcat dgrep dmap
-cd "$SCRIPT_DIR"
-
-echo
-
-# Profile dcat
-echo -e "${GREEN}=== Profiling dcat ===${NC}"
-run_profile "../dcat" "small_file" "-plain -cfg none $TEST_DATA_DIR/small.log"
-# Skip medium file for faster profiling
-# run_profile "../dcat" "medium_file" "-plain -cfg none $TEST_DATA_DIR/medium.log"
-# Skip large file for faster profiling - uncomment if needed
-# run_profile "../dcat" "large_file" "-plain -cfg none $TEST_DATA_DIR/large.log"
-
-# Profile dgrep
-echo -e "${GREEN}=== Profiling dgrep ===${NC}"
-run_profile "../dgrep" "simple_regex" "-plain -cfg none -regex 'user[0-9]+' $TEST_DATA_DIR/small.log"
-# Use small file for faster profiling
-# run_profile "../dgrep" "complex_regex" "-plain -cfg none -regex '\\d{4}-\\d{2}-\\d{2}.*login.*\\d{3}' $TEST_DATA_DIR/medium.log"
-# run_profile "../dgrep" "with_context" "-plain -cfg none -regex 'login' -before 2 -after 2 $TEST_DATA_DIR/medium.log"
-
-# Profile dmap
-echo -e "${GREEN}=== Profiling dmap ===${NC}"
-
-# Generate DTail default format test data for dmap
-if [ ! -f "$TEST_DATA_DIR/dtail_format.log" ]; then
- echo -e "${YELLOW}Generating DTail format test data for dmap${NC}"
- echo " Command: Creating DTail format log file"
- # Generate DTail default format log lines (100K lines for meaningful profiling)
- for i in $(seq 1 100000); do
- hostname="host$((i % 10))"
- goroutines=$((40 + i % 40))
- cgocalls=$((i % 100))
- cpus=$((1 + i % 8))
- loadavg=$(printf "%.2f" $(echo "scale=2; $i % 100 / 100" | bc))
- uptime="${i}h0m0s"
- connections=$((i % 10))
- lifetime=$((1000 + i))
-
- echo "INFO|$(date +%m%d-%H%M%S)|1|stats.go:56|$cpus|$goroutines|$cgocalls|$loadavg|$uptime|MAPREDUCE:STATS|currentConnections=$connections|lifetimeConnections=$lifetime"
- done > "$TEST_DATA_DIR/dtail_format.log"
-fi
-
-# Profile dmap with DTail format
-run_profile "../dmap" "simple_count" "-plain -cfg none -query 'from STATS select count(*)' -files $TEST_DATA_DIR/dtail_format.log"
-run_profile "../dmap" "aggregations" "-plain -cfg none -query 'from STATS select sum(\$goroutines),avg(\$cgocalls),max(lifetimeConnections)' -files $TEST_DATA_DIR/dtail_format.log"
-run_profile "../dmap" "group_by_connections" "-plain -cfg none -query 'from STATS select currentConnections,count(*) group by currentConnections' -files $TEST_DATA_DIR/dtail_format.log"
-
-# Also test CSV format
-echo -e "\n${YELLOW}Testing CSV format with dmap${NC}"
-run_profile "../dmap" "csv_query" "-plain -cfg none -query 'select user,action,count(*) where status=\"success\" group by user,action logformat csv' -files $TEST_DATA_DIR/test.csv"
-
-echo
-echo -e "${GREEN}Profiling complete!${NC}"
-echo
-
-# Analyze profiles
-echo -e "${GREEN}=== Profile Analysis ===${NC}"
-echo "Profile files generated in: $PROFILE_DIR"
-echo
-
-# List recent profiles
-echo "Recent CPU profiles:"
-ls -lt "$PROFILE_DIR"/*_cpu_*.prof 2>/dev/null | head -5 || echo " No CPU profiles found"
-
-echo
-echo "Recent memory profiles:"
-ls -lt "$PROFILE_DIR"/*_mem_*.prof 2>/dev/null | head -5 || echo " No memory profiles found"
-
-echo
-echo "Recent allocation profiles:"
-ls -lt "$PROFILE_DIR"/*_alloc_*.prof 2>/dev/null | head -5 || echo " No allocation profiles found"
-
-echo
-echo -e "${GREEN}To analyze a profile, use:${NC}"
-echo " go tool pprof <profile_file>"
-echo " ../profiling/profile.sh <profile_file>"
-echo
-echo -e "${GREEN}Examples:${NC}"
-echo " # Interactive analysis"
-echo " go tool pprof $PROFILE_DIR/dcat_cpu_*.prof"
-echo
-echo " # Generate flame graph"
-echo " go tool pprof -http=:8080 $PROFILE_DIR/dcat_cpu_*.prof"
-echo
-echo " # Quick summary with dprofile"
-echo " ../profiling/profile.sh $PROFILE_DIR/dcat_cpu_*.prof"
-echo \ No newline at end of file
diff --git a/profiling/profile_dmap.sh b/profiling/profile_dmap.sh
deleted file mode 100755
index 2702dee..0000000
--- a/profiling/profile_dmap.sh
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/bin/bash
-
-# Profile script specifically for dmap with MapReduce format data
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
-
-# Colors for output
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Default values
-PROFILE_DIR="${PROFILE_DIR:-profiles}"
-TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}"
-
-# Create directories
-mkdir -p "$PROFILE_DIR"
-mkdir -p "$TEST_DATA_DIR"
-
-echo -e "${GREEN}DTail dmap Profiling${NC}"
-echo "===================="
-echo
-
-# Function to generate MapReduce format test data (generickv format)
-generate_mapreduce_data() {
- local filename=$1
- local lines=$2
-
- if [ ! -f "$filename" ]; then
- echo -e "${YELLOW}Generating MapReduce format test data: $filename${NC}"
- echo " Command: Creating $filename with $lines lines (generickv format)"
-
- # Generate data in generickv format: field1=value1|field2=value2|...
- for i in $(seq 1 $lines); do
- hostname="host$((i % 10))"
- # Simple timestamp generation without date command
- hour=$((10 + (i / 3600) % 24))
- min=$(((i / 60) % 60))
- sec=$((i % 60))
- timestamp=$(printf "2024-01-01T%02d:%02d:%02d.000Z" $hour $min $sec)
- goroutines=$((40 + i % 40))
- openFiles=$((100 + i % 50))
- connections=$((10 + i % 20))
- currentConnections=$((i % 10))
- lifetimeConnections=$((1000 + i))
-
- echo "table=STATS|hostname=$hostname|timestamp=$timestamp|goroutines=$goroutines|openFiles=$openFiles|connections=$connections|currentConnections=$currentConnections|lifetimeConnections=$lifetimeConnections" >> "$filename"
- done
- fi
-}
-
-# Generate test data in DTail default format instead
-echo -e "${GREEN}Preparing MapReduce test data...${NC}"
-
-# Function to generate DTail default format test data
-generate_dtail_format_data() {
- local filename=$1
- local lines=$2
-
- if [ ! -f "$filename" ]; then
- echo -e "${YELLOW}Generating DTail default format test data: $filename${NC}"
- echo " Command: Creating $filename with $lines lines (DTail default format)"
-
- # Generate DTail default format log lines
- for i in $(seq 1 $lines); do
- hostname="host$((i % 10))"
- goroutines=$((40 + i % 40))
- cgocalls=$((i % 100))
- cpus=$((1 + i % 8))
- loadavg=$(printf "%.2f" $(echo "scale=2; $i % 100 / 100" | bc))
- uptime="${i}h0m0s"
- connections=$((i % 10))
- lifetime=$((1000 + i))
-
- # DTail default format: INFO|date-time|pid|caller|cpus|goroutines|cgocalls|loadavg|uptime|MAPREDUCE:STATS|key=value|...
- echo "INFO|$(date +%m%d-%H%M%S)|1|stats.go:56|$cpus|$goroutines|$cgocalls|$loadavg|$uptime|MAPREDUCE:STATS|hostname=$hostname|currentConnections=$connections|lifetimeConnections=$lifetime" >> "$filename"
- done
- fi
-}
-
-generate_dtail_format_data "$TEST_DATA_DIR/stats_small.log" 1000
-generate_dtail_format_data "$TEST_DATA_DIR/stats_medium.log" 1000000
-
-# Build dmap
-echo -e "${GREEN}Building commands...${NC}"
-echo " Command: cd .. && make dmap"
-cd ..
-make dmap 2>/dev/null || true
-cd "$SCRIPT_DIR"
-
-echo
-
-# Profile different dmap queries
-echo -e "${GREEN}Profiling dmap queries...${NC}"
-
-# Query 1: Simple count
-echo -e "\n${YELLOW}Query: Count by hostname${NC}"
-QUERY="from STATS select count(\$line) group by hostname"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
-# Run dmap and let it complete naturally
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
-
-# Query 2: Aggregations
-echo -e "\n${YELLOW}Query: Sum and average${NC}"
-QUERY="from STATS select sum(\$goroutines),avg(\$goroutines) group by hostname"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
-
-# Query 3: Min/Max
-echo -e "\n${YELLOW}Query: Min and max${NC}"
-QUERY="from STATS select min(currentConnections),max(lifetimeConnections) group by hostname"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/stats_small.log"
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/stats_small.log" 2>&1 | head -10
-
-echo
-echo -e "${GREEN}Analyzing dmap profiles...${NC}"
-
-# Find and analyze latest dmap profiles
-DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1)
-if [ -n "$DMAP_CPU" ]; then
- echo -e "\nCPU Profile: $(basename "$DMAP_CPU")"
- ../profiling/profile.sh -top 5 "$DMAP_CPU" 2>/dev/null || echo " Analysis failed"
-fi
-
-DMAP_MEM=$(ls -t "$PROFILE_DIR"/dmap_mem_*.prof 2>/dev/null | head -1)
-if [ -n "$DMAP_MEM" ]; then
- echo -e "\nMemory Profile: $(basename "$DMAP_MEM")"
- ../profiling/profile.sh -top 5 "$DMAP_MEM" 2>/dev/null || echo " Analysis failed"
-fi
-
-echo
-echo -e "${GREEN}dmap profiling complete!${NC}"
-echo
-echo "To analyze profiles in detail:"
-echo " go tool pprof $PROFILE_DIR/dmap_cpu_*.prof"
-echo " go tool pprof -alloc_space $PROFILE_DIR/dmap_mem_*.prof"
-
-# No cleanup needed - no output files are created during profiling
diff --git a/profiling/profile_quick.sh b/profiling/profile_quick.sh
deleted file mode 100755
index 249b73c..0000000
--- a/profiling/profile_quick.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/bash
-
-# Quick profile script for dtail commands
-# This runs profiling with smaller datasets for faster results
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
-
-# Colors for output
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-# Default values
-PROFILE_DIR="${PROFILE_DIR:-profiles}"
-TEST_DATA_DIR="${TEST_DATA_DIR:-testdata}"
-
-# Create directories
-mkdir -p "$PROFILE_DIR"
-mkdir -p "$TEST_DATA_DIR"
-
-echo -e "${GREEN}DTail Quick Profiling${NC}"
-echo "====================="
-echo
-
-# Generate test data if needed
-if [ ! -f "$TEST_DATA_DIR/quick_test.log" ]; then
- echo -e "${YELLOW}Generating test data...${NC}"
- echo " Command: go run ../benchmarks/cmd/generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.log\" -format log"
- go run ../benchmarks/cmd/generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.log" -format log
- echo " Command: go run ../benchmarks/cmd/generate_profile_data.go -size \"10MB\" -output \"$TEST_DATA_DIR/quick_test.csv\" -format csv"
- go run ../benchmarks/cmd/generate_profile_data.go -size "10MB" -output "$TEST_DATA_DIR/quick_test.csv" -format csv
-fi
-
-# Build commands
-echo -e "${GREEN}Building commands...${NC}"
-echo " Command: cd .. && make dcat dgrep dmap"
-cd ..
-make dcat dgrep dmap 2>/dev/null || true
-cd "$SCRIPT_DIR"
-
-echo
-echo -e "${GREEN}Running quick profiles...${NC}"
-
-# Profile dcat
-echo -e "\n${YELLOW}Profiling dcat...${NC}"
-echo "Command: ../dcat -profile -profiledir $PROFILE_DIR -plain -cfg none $TEST_DATA_DIR/quick_test.log"
-../dcat -profile -profiledir "$PROFILE_DIR" -plain -cfg none "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1
-DCAT_CPU=$(ls -t "$PROFILE_DIR"/dcat_cpu_*.prof 2>/dev/null | head -1)
-if [ -n "$DCAT_CPU" ]; then
- echo " Generated: $(basename "$DCAT_CPU")"
- echo " Analysis: ../profiling/profile.sh -top 3 $DCAT_CPU"
- ../profiling/profile.sh -top 3 "$DCAT_CPU" | grep -A 5 "Top 3 functions"
-fi
-
-# Profile dgrep
-echo -e "\n${YELLOW}Profiling dgrep...${NC}"
-echo "Command: ../dgrep -profile -profiledir $PROFILE_DIR -plain -cfg none -regex \"user[0-9]+\" $TEST_DATA_DIR/quick_test.log"
-../dgrep -profile -profiledir "$PROFILE_DIR" -plain -cfg none -regex "user[0-9]+" "$TEST_DATA_DIR/quick_test.log" > /dev/null 2>&1
-DGREP_CPU=$(ls -t "$PROFILE_DIR"/dgrep_cpu_*.prof 2>/dev/null | head -1)
-if [ -n "$DGREP_CPU" ]; then
- echo " Generated: $(basename "$DGREP_CPU")"
- echo " Analysis: ../profiling/profile.sh -top 3 $DGREP_CPU"
- ../profiling/profile.sh -top 3 "$DGREP_CPU" | grep -A 5 "Top 3 functions"
-fi
-
-# Profile dmap (use proper MapReduce query on CSV file)
-echo -e "\n${YELLOW}Profiling dmap...${NC}"
-QUERY="select count($line),avg($duration) group by $user logformat csv"
-echo "Command: ../dmap -profile -profiledir $PROFILE_DIR -plain -cfg none -query \"$QUERY\" -files $TEST_DATA_DIR/quick_test.csv"
-# Run dmap and let it complete naturally
-../dmap -profile -profiledir "$PROFILE_DIR" -plain -cfg none -query "$QUERY" -files "$TEST_DATA_DIR/quick_test.csv" > /dev/null 2>&1
-
-DMAP_CPU=$(ls -t "$PROFILE_DIR"/dmap_cpu_*.prof 2>/dev/null | head -1)
-if [ -n "$DMAP_CPU" ]; then
- echo " Generated: $(basename "$DMAP_CPU")"
- echo " Analysis: ../profiling/profile.sh -top 3 $DMAP_CPU"
- ../profiling/profile.sh -top 3 "$DMAP_CPU" | grep -A 5 "Top 3 functions"
-fi
-
-echo
-echo -e "${GREEN}Quick profiling complete!${NC}"
-echo
-echo "To analyze in detail:"
-echo " go tool pprof $PROFILE_DIR/<profile_file>"
-echo " make profile-flamegraph PROFILE=$PROFILE_DIR/<profile_file>"
-echo \ No newline at end of file