summaryrefslogtreecommitdiff
path: root/internal/tools/profile
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 22:19:22 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 22:19:22 +0300
commit947e08e4f9e3c9c44b346adff4eb6d68fa79a726 (patch)
treecfa94aba72f91d26657de09b7a5b6a23eff10fd7 /internal/tools/profile
parent1e643ac66765fc0ab4224335191731d8b77fece2 (diff)
Refactor profiling and benchmarking tools from bash to Go
This major refactoring replaces all bash-based profiling and benchmarking scripts with a unified Go tool (dtail-tools) that provides: - Better cross-platform compatibility - Improved error handling and reliability - Structured data generation for test files - Consistent command-line interface - Easier maintenance and extensibility Key changes: - Created dtail-tools command with profile and benchmark subcommands - Implemented common utilities for data generation and file operations - Updated Makefile to use the new Go-based tools - Maintained backward compatibility with existing make targets - Fixed ParseSize to handle single-letter suffixes (10M, 1G, etc.) The new tool supports all previous functionality: - profile-quick, profile-all, profile-dmap - benchmark creation, comparison, and management - Test data generation with multiple formats - Profile analysis and listing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal/tools/profile')
-rw-r--r--internal/tools/profile/analyze.go221
-rw-r--r--internal/tools/profile/profile.go358
2 files changed, 579 insertions, 0 deletions
diff --git a/internal/tools/profile/analyze.go b/internal/tools/profile/analyze.go
new file mode 100644
index 0000000..f27841a
--- /dev/null
+++ b/internal/tools/profile/analyze.go
@@ -0,0 +1,221 @@
+package profile
+
+import (
+ "bufio"
+ "flag"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "sort"
+ "strings"
+
+ "github.com/mimecast/dtail/internal/tools/common"
+)
+
+// ProfileInfo holds information about a profile file
+type ProfileInfo struct {
+ Path string
+ Tool string
+ Type string // cpu, mem, alloc
+ ModTime string
+ Size int64
+}
+
+func runAnalyze(cfg *Config) error {
+ args := flag.Args()
+ if len(args) == 0 {
+ return fmt.Errorf("no profile file specified")
+ }
+
+ profilePath := args[0]
+ if !common.FileExists(profilePath) {
+ return fmt.Errorf("profile file not found: %s", profilePath)
+ }
+
+ // Determine if web mode requested
+ for _, arg := range args[1:] {
+ if arg == "-web" || arg == "--web" {
+ return openWebProfile(profilePath)
+ }
+ }
+
+ // Default to text analysis
+ return analyzeProfile(profilePath, args[1:]...)
+}
+
+func listProfiles(cfg *Config) error {
+ common.PrintSection("Available Profiles")
+
+ profiles, err := findProfiles(cfg.ProfileDir)
+ if err != nil {
+ return err
+ }
+
+ if len(profiles) == 0 {
+ fmt.Printf("No profiles found in %s\n", cfg.ProfileDir)
+ return nil
+ }
+
+ // Group by tool
+ byTool := make(map[string][]ProfileInfo)
+ for _, p := range profiles {
+ byTool[p.Tool] = append(byTool[p.Tool], p)
+ }
+
+ // Sort tools
+ var tools []string
+ for tool := range byTool {
+ tools = append(tools, tool)
+ }
+ sort.Strings(tools)
+
+ // Display profiles
+ for _, tool := range tools {
+ fmt.Printf("\n%s profiles:\n", tool)
+ toolProfiles := byTool[tool]
+
+ // Sort by modification time (newest first)
+ sort.Slice(toolProfiles, func(i, j int) bool {
+ return toolProfiles[i].ModTime > toolProfiles[j].ModTime
+ })
+
+ for _, p := range toolProfiles {
+ fmt.Printf(" %-8s %s %8s %s\n",
+ p.Type, p.ModTime, common.FormatSize(p.Size), filepath.Base(p.Path))
+ }
+ }
+
+ fmt.Printf("\nTotal: %d profiles\n", len(profiles))
+ fmt.Printf("\nUsage: dtail-tools profile -mode analyze <profile_file>\n")
+
+ return nil
+}
+
+func findProfiles(dir string) ([]ProfileInfo, error) {
+ var profiles []ProfileInfo
+
+ pattern := filepath.Join(dir, "*.prof")
+ matches, err := filepath.Glob(pattern)
+ if err != nil {
+ return nil, err
+ }
+
+ for _, path := range matches {
+ info, err := os.Stat(path)
+ if err != nil {
+ continue
+ }
+
+ // Parse filename to extract tool and type
+ base := filepath.Base(path)
+ parts := strings.Split(base, "_")
+ if len(parts) < 3 {
+ continue
+ }
+
+ tool := parts[0]
+ profType := parts[1]
+
+ profiles = append(profiles, ProfileInfo{
+ Path: path,
+ Tool: tool,
+ Type: profType,
+ ModTime: info.ModTime().Format("2006-01-02 15:04:05"),
+ Size: info.Size(),
+ })
+ }
+
+ return profiles, nil
+}
+
+func analyzeProfile(profilePath string, args ...string) error {
+ // Detect profile type
+ isMemProfile := strings.Contains(profilePath, "_mem_") || strings.Contains(profilePath, "_alloc_")
+
+ fmt.Printf("Analyzing %s\n", profilePath)
+ fmt.Println(strings.Repeat("-", 60))
+
+ // Default analysis
+ if err := showTopFunctions(profilePath, 10, isMemProfile); err != nil {
+ return err
+ }
+
+ // Show tips
+ fmt.Println("\nAnalysis tips:")
+ if isMemProfile {
+ fmt.Println(" - Use -alloc_space to see total allocations")
+ fmt.Println(" - Use -alloc_objects to see allocation counts")
+ fmt.Println(" - Use -inuse_space to see current memory usage")
+ } else {
+ fmt.Println(" - Use -cum to sort by cumulative time")
+ fmt.Println(" - Use -list <function> to see source code")
+ fmt.Println(" - Use -web to open interactive flame graph")
+ }
+
+ return nil
+}
+
+func showTopFunctions(profilePath string, count int, isMemProfile bool) error {
+ args := []string{"tool", "pprof", "-top", fmt.Sprintf("-nodecount=%d", count)}
+
+ if isMemProfile {
+ args = append(args, "-alloc_space")
+ }
+
+ args = append(args, profilePath)
+
+ cmd := exec.Command("go", args...)
+ output, err := cmd.Output()
+ if err != nil {
+ return fmt.Errorf("pprof failed: %w", err)
+ }
+
+ // Parse and display output
+ scanner := bufio.NewScanner(strings.NewReader(string(output)))
+ lineCount := 0
+ inTop := false
+
+ fmt.Printf("Top %d functions (sorted by flat):\n", count)
+ fmt.Println("================================================================")
+
+ for scanner.Scan() {
+ line := scanner.Text()
+
+ // Skip header lines
+ if strings.HasPrefix(line, "File:") || strings.HasPrefix(line, "Type:") ||
+ strings.HasPrefix(line, "Time:") || strings.HasPrefix(line, "Duration:") {
+ continue
+ }
+
+ // Start printing from the table header
+ if strings.Contains(line, "flat") && strings.Contains(line, "cum") {
+ inTop = true
+ fmt.Println("# Command: go " + strings.Join(args[1:], " "))
+ }
+
+ if inTop {
+ fmt.Println(line)
+ if line != "" {
+ lineCount++
+ }
+ if lineCount > count+2 { // +2 for header and separator
+ break
+ }
+ }
+ }
+
+ return nil
+}
+
+func openWebProfile(profilePath string) error {
+ fmt.Printf("Starting pprof web server for %s...\n", profilePath)
+ fmt.Println("Opening http://localhost:8080 in your browser")
+ fmt.Println("Press Ctrl+C to stop")
+
+ cmd := exec.Command("go", "tool", "pprof", "-http=:8080", profilePath)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+
+ return cmd.Run()
+} \ No newline at end of file
diff --git a/internal/tools/profile/profile.go b/internal/tools/profile/profile.go
new file mode 100644
index 0000000..34dfc7e
--- /dev/null
+++ b/internal/tools/profile/profile.go
@@ -0,0 +1,358 @@
+package profile
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "sort"
+ "strings"
+ "time"
+
+ "github.com/mimecast/dtail/internal/tools/common"
+)
+
+// Config holds profiling configuration
+type Config struct {
+ Mode string
+ ProfileDir string
+ TestDataDir string
+ Runs int
+ NoColor bool
+ Commands []string
+ Timeout time.Duration
+}
+
+// Run executes the profiling command
+func Run() error {
+ cfg := parseFlags()
+
+ // Create directories
+ if err := common.EnsureDirectory(cfg.ProfileDir); err != nil {
+ return fmt.Errorf("failed to create profile directory: %w", err)
+ }
+ if err := common.EnsureDirectory(cfg.TestDataDir); err != nil {
+ return fmt.Errorf("failed to create test data directory: %w", err)
+ }
+
+ switch cfg.Mode {
+ case "quick":
+ return runQuickProfile(cfg)
+ case "full":
+ return runFullProfile(cfg)
+ case "dmap":
+ return runDMapProfile(cfg)
+ case "analyze":
+ return runAnalyze(cfg)
+ case "list":
+ return listProfiles(cfg)
+ default:
+ return fmt.Errorf("unknown profile mode: %s", cfg.Mode)
+ }
+}
+
+func parseFlags() *Config {
+ cfg := &Config{
+ Commands: []string{"dcat", "dgrep", "dmap"},
+ Timeout: 30 * time.Second,
+ }
+
+ flag.StringVar(&cfg.Mode, "mode", "quick", "Profile mode: quick, full, dmap, analyze, list")
+ flag.StringVar(&cfg.ProfileDir, "dir", "profiles", "Profile output directory")
+ flag.StringVar(&cfg.TestDataDir, "testdata", "testdata", "Test data directory")
+ flag.IntVar(&cfg.Runs, "runs", 1, "Number of profiling runs")
+ flag.BoolVar(&cfg.NoColor, "nocolor", false, "Disable colored output")
+ flag.DurationVar(&cfg.Timeout, "timeout", cfg.Timeout, "Timeout for profiling runs")
+
+ // Custom command list
+ var cmdList string
+ flag.StringVar(&cmdList, "commands", "", "Comma-separated list of commands to profile")
+
+ flag.Parse()
+
+ if cmdList != "" {
+ cfg.Commands = strings.Split(cmdList, ",")
+ }
+
+ return cfg
+}
+
+func runQuickProfile(cfg *Config) error {
+ common.PrintSection("DTail Quick Profiling")
+
+ // Generate test data
+ gen := common.NewDataGenerator()
+
+ logFile := filepath.Join(cfg.TestDataDir, "quick_test.log")
+ csvFile := filepath.Join(cfg.TestDataDir, "quick_test.csv")
+
+ common.PrintInfo("Generating test data...\n")
+ if err := gen.GenerateFile(logFile, "10MB", common.FormatLog); err != nil {
+ return fmt.Errorf("failed to generate log file: %w", err)
+ }
+ if err := gen.GenerateFile(csvFile, "10MB", common.FormatCSV); err != nil {
+ return fmt.Errorf("failed to generate CSV file: %w", err)
+ }
+
+ // Build commands
+ common.PrintInfo("Building commands...\n")
+ if err := common.BuildCommands("dcat", "dgrep", "dmap"); err != nil {
+ return err
+ }
+
+ // Profile each command
+ common.PrintSection("Running quick profiles...")
+
+ // Profile dcat
+ if err := profileCommand("dcat", "dcat",
+ []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", logFile},
+ cfg.Timeout); err != nil {
+ return err
+ }
+
+ // Profile dgrep
+ if err := profileCommand("dgrep", "dgrep",
+ []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-regex", "user[0-9]+", logFile},
+ cfg.Timeout); err != nil {
+ return err
+ }
+
+ // Profile dmap
+ query := `select count($line),avg($duration) group by $user logformat csv`
+ if err := profileCommand("dmap", "dmap",
+ []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-query", query, "-files", csvFile},
+ cfg.Timeout); err != nil {
+ return err
+ }
+
+ // Analyze results
+ return analyzeLatestProfiles(cfg)
+}
+
+func runFullProfile(cfg *Config) error {
+ common.PrintSection("DTail Full Profiling")
+
+ // Generate test data
+ gen := common.NewDataGenerator()
+
+ testFiles := map[string]string{
+ "small.log": "10MB",
+ "medium.log": "100MB",
+ "test.csv": "50MB",
+ "dtail_format.log": "100000", // lines
+ }
+
+ common.PrintInfo("Generating test data...\n")
+ for filename, size := range testFiles {
+ fullPath := filepath.Join(cfg.TestDataDir, filename)
+ if filename == "dtail_format.log" {
+ lines := 100000
+ if err := gen.GenerateLogFileWithLines(fullPath, lines, common.FormatDTail); err != nil {
+ return fmt.Errorf("failed to generate %s: %w", filename, err)
+ }
+ } else if strings.HasSuffix(filename, ".csv") {
+ if err := gen.GenerateFile(fullPath, size, common.FormatCSV); err != nil {
+ return fmt.Errorf("failed to generate %s: %w", filename, err)
+ }
+ } else {
+ if err := gen.GenerateFile(fullPath, size, common.FormatLog); err != nil {
+ return fmt.Errorf("failed to generate %s: %w", filename, err)
+ }
+ }
+ }
+
+ // Build commands
+ common.PrintInfo("Building commands...\n")
+ if err := common.BuildCommands("dcat", "dgrep", "dmap"); err != nil {
+ return err
+ }
+
+ // Run profiling
+ common.PrintSection("Running full profiling suite...")
+
+ // Profile configurations
+ profiles := []struct {
+ cmd string
+ name string
+ args []string
+ }{
+ // dcat profiles
+ {"dcat", "small_file", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ filepath.Join(cfg.TestDataDir, "small.log")}},
+ {"dcat", "medium_file", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ filepath.Join(cfg.TestDataDir, "medium.log")}},
+
+ // dgrep profiles
+ {"dgrep", "simple_pattern", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-regex", "ERROR", filepath.Join(cfg.TestDataDir, "medium.log")}},
+ {"dgrep", "complex_pattern", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-regex", "(ERROR|WARN).*user[0-9]+", filepath.Join(cfg.TestDataDir, "medium.log")}},
+
+ // dmap profiles
+ {"dmap", "simple_count", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-query", "from STATS select count(*)", "-files", filepath.Join(cfg.TestDataDir, "dtail_format.log")}},
+ {"dmap", "aggregations", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-query", "from STATS select sum($goroutines),avg($cgocalls),max(lifetimeConnections)",
+ "-files", filepath.Join(cfg.TestDataDir, "dtail_format.log")}},
+ {"dmap", "csv_query", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-query", `select user,action,count(*) where status="success" group by user,action logformat csv`,
+ "-files", filepath.Join(cfg.TestDataDir, "test.csv")}},
+ }
+
+ for _, p := range profiles {
+ common.PrintInfo("\nProfiling %s - %s\n", p.cmd, p.name)
+ for i := 1; i <= cfg.Runs; i++ {
+ if cfg.Runs > 1 {
+ fmt.Printf(" Run %d/%d...\n", i, cfg.Runs)
+ }
+ if err := profileCommand(p.cmd, p.cmd, p.args, cfg.Timeout); err != nil {
+ return fmt.Errorf("failed to profile %s-%s: %w", p.cmd, p.name, err)
+ }
+ if i < cfg.Runs {
+ time.Sleep(1 * time.Second) // Small delay between runs
+ }
+ }
+ }
+
+ return analyzeLatestProfiles(cfg)
+}
+
+func runDMapProfile(cfg *Config) error {
+ common.PrintSection("DTail dmap Profiling")
+
+ // Generate MapReduce test data
+ gen := common.NewDataGenerator()
+
+ smallFile := filepath.Join(cfg.TestDataDir, "stats_small.log")
+ mediumFile := filepath.Join(cfg.TestDataDir, "stats_medium.log")
+
+ common.PrintInfo("Preparing MapReduce test data...\n")
+ if err := gen.GenerateLogFileWithLines(smallFile, 1000, common.FormatDTail); err != nil {
+ return fmt.Errorf("failed to generate small file: %w", err)
+ }
+ if err := gen.GenerateLogFileWithLines(mediumFile, 1000000, common.FormatDTail); err != nil {
+ return fmt.Errorf("failed to generate medium file: %w", err)
+ }
+
+ // Build dmap
+ common.PrintInfo("Building dmap...\n")
+ if err := common.BuildCommand("dmap"); err != nil {
+ return err
+ }
+
+ // Profile different queries
+ common.PrintSection("Profiling dmap queries...")
+
+ queries := []struct {
+ name string
+ query string
+ file string
+ }{
+ {"Count by hostname", "from STATS select count($line) group by hostname", smallFile},
+ {"Sum and average", "from STATS select sum($goroutines),avg($goroutines) group by hostname", smallFile},
+ {"Min and max", "from STATS select min(currentConnections),max(lifetimeConnections) group by hostname", smallFile},
+ {"Large file processing", "from STATS select count($line),avg($goroutines) group by hostname", mediumFile},
+ }
+
+ for _, q := range queries {
+ common.PrintInfo("\nQuery: %s\n", q.name)
+ args := []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none",
+ "-query", q.query, "-files", q.file}
+ if err := profileCommand("dmap", "dmap", args, cfg.Timeout); err != nil {
+ return fmt.Errorf("failed to profile query %s: %w", q.name, err)
+ }
+ }
+
+ return analyzeLatestProfiles(cfg)
+}
+
+func profileCommand(name, cmd string, args []string, timeout time.Duration) error {
+ fmt.Printf("Command: %s %s\n", cmd, strings.Join(args, " "))
+
+ command := exec.Command("./"+cmd, args...)
+ command.Stdout = nil // Suppress output during profiling
+ command.Stderr = os.Stderr
+
+ if err := command.Start(); err != nil {
+ return err
+ }
+
+ done := make(chan error, 1)
+ go func() {
+ done <- command.Wait()
+ }()
+
+ select {
+ case <-time.After(timeout):
+ command.Process.Kill()
+ return fmt.Errorf("command timed out after %v", timeout)
+ case err := <-done:
+ if err != nil && !strings.Contains(err.Error(), "signal: interrupt") {
+ return err
+ }
+ }
+
+ // Find generated profile
+ pattern := filepath.Join("profiles", fmt.Sprintf("%s_cpu_*.prof", name))
+ matches, _ := filepath.Glob(pattern)
+ if len(matches) > 0 {
+ // Sort by modification time and get the latest
+ sort.Slice(matches, func(i, j int) bool {
+ fi, _ := os.Stat(matches[i])
+ fj, _ := os.Stat(matches[j])
+ return fi.ModTime().After(fj.ModTime())
+ })
+ fmt.Printf(" Generated: %s\n", filepath.Base(matches[0]))
+ }
+
+ return nil
+}
+
+func analyzeLatestProfiles(cfg *Config) error {
+ common.PrintSection("Profile Analysis")
+
+ // Find latest profiles for each command
+ for _, cmd := range cfg.Commands {
+ cpuPattern := filepath.Join(cfg.ProfileDir, fmt.Sprintf("%s_cpu_*.prof", cmd))
+ memPattern := filepath.Join(cfg.ProfileDir, fmt.Sprintf("%s_mem_*.prof", cmd))
+
+ cpuProfiles, _ := filepath.Glob(cpuPattern)
+ memProfiles, _ := filepath.Glob(memPattern)
+
+ if len(cpuProfiles) > 0 {
+ sort.Slice(cpuProfiles, func(i, j int) bool {
+ fi, _ := os.Stat(cpuProfiles[i])
+ fj, _ := os.Stat(cpuProfiles[j])
+ return fi.ModTime().After(fj.ModTime())
+ })
+
+ fmt.Printf("\n%s CPU Profile: %s\n", cmd, filepath.Base(cpuProfiles[0]))
+ if err := showTopFunctions(cpuProfiles[0], 5, false); err != nil {
+ fmt.Printf(" Analysis failed: %v\n", err)
+ }
+ }
+
+ if len(memProfiles) > 0 {
+ sort.Slice(memProfiles, func(i, j int) bool {
+ fi, _ := os.Stat(memProfiles[i])
+ fj, _ := os.Stat(memProfiles[j])
+ return fi.ModTime().After(fj.ModTime())
+ })
+
+ fmt.Printf("\n%s Memory Profile: %s\n", cmd, filepath.Base(memProfiles[0]))
+ if err := showTopFunctions(memProfiles[0], 5, true); err != nil {
+ fmt.Printf(" Analysis failed: %v\n", err)
+ }
+ }
+ }
+
+ common.PrintSuccess("\nProfiling complete!\n")
+ fmt.Println("\nTo analyze profiles in detail:")
+ fmt.Printf(" go tool pprof %s/<profile_file>\n", cfg.ProfileDir)
+ fmt.Printf(" dtail-tools profile -mode analyze <profile_file>\n")
+
+ return nil
+} \ No newline at end of file