diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-26 22:19:22 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-26 22:19:22 +0300 |
| commit | 947e08e4f9e3c9c44b346adff4eb6d68fa79a726 (patch) | |
| tree | cfa94aba72f91d26657de09b7a5b6a23eff10fd7 /internal/tools/profile | |
| parent | 1e643ac66765fc0ab4224335191731d8b77fece2 (diff) | |
Refactor profiling and benchmarking tools from bash to Go
This major refactoring replaces all bash-based profiling and benchmarking
scripts with a unified Go tool (dtail-tools) that provides:
- Better cross-platform compatibility
- Improved error handling and reliability
- Structured data generation for test files
- Consistent command-line interface
- Easier maintenance and extensibility
Key changes:
- Created dtail-tools command with profile and benchmark subcommands
- Implemented common utilities for data generation and file operations
- Updated Makefile to use the new Go-based tools
- Maintained backward compatibility with existing make targets
- Fixed ParseSize to handle single-letter suffixes (10M, 1G, etc.)
The new tool supports all previous functionality:
- profile-quick, profile-all, profile-dmap
- benchmark creation, comparison, and management
- Test data generation with multiple formats
- Profile analysis and listing
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal/tools/profile')
| -rw-r--r-- | internal/tools/profile/analyze.go | 221 | ||||
| -rw-r--r-- | internal/tools/profile/profile.go | 358 |
2 files changed, 579 insertions, 0 deletions
diff --git a/internal/tools/profile/analyze.go b/internal/tools/profile/analyze.go new file mode 100644 index 0000000..f27841a --- /dev/null +++ b/internal/tools/profile/analyze.go @@ -0,0 +1,221 @@ +package profile + +import ( + "bufio" + "flag" + "fmt" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + + "github.com/mimecast/dtail/internal/tools/common" +) + +// ProfileInfo holds information about a profile file +type ProfileInfo struct { + Path string + Tool string + Type string // cpu, mem, alloc + ModTime string + Size int64 +} + +func runAnalyze(cfg *Config) error { + args := flag.Args() + if len(args) == 0 { + return fmt.Errorf("no profile file specified") + } + + profilePath := args[0] + if !common.FileExists(profilePath) { + return fmt.Errorf("profile file not found: %s", profilePath) + } + + // Determine if web mode requested + for _, arg := range args[1:] { + if arg == "-web" || arg == "--web" { + return openWebProfile(profilePath) + } + } + + // Default to text analysis + return analyzeProfile(profilePath, args[1:]...) +} + +func listProfiles(cfg *Config) error { + common.PrintSection("Available Profiles") + + profiles, err := findProfiles(cfg.ProfileDir) + if err != nil { + return err + } + + if len(profiles) == 0 { + fmt.Printf("No profiles found in %s\n", cfg.ProfileDir) + return nil + } + + // Group by tool + byTool := make(map[string][]ProfileInfo) + for _, p := range profiles { + byTool[p.Tool] = append(byTool[p.Tool], p) + } + + // Sort tools + var tools []string + for tool := range byTool { + tools = append(tools, tool) + } + sort.Strings(tools) + + // Display profiles + for _, tool := range tools { + fmt.Printf("\n%s profiles:\n", tool) + toolProfiles := byTool[tool] + + // Sort by modification time (newest first) + sort.Slice(toolProfiles, func(i, j int) bool { + return toolProfiles[i].ModTime > toolProfiles[j].ModTime + }) + + for _, p := range toolProfiles { + fmt.Printf(" %-8s %s %8s %s\n", + p.Type, p.ModTime, common.FormatSize(p.Size), filepath.Base(p.Path)) + } + } + + fmt.Printf("\nTotal: %d profiles\n", len(profiles)) + fmt.Printf("\nUsage: dtail-tools profile -mode analyze <profile_file>\n") + + return nil +} + +func findProfiles(dir string) ([]ProfileInfo, error) { + var profiles []ProfileInfo + + pattern := filepath.Join(dir, "*.prof") + matches, err := filepath.Glob(pattern) + if err != nil { + return nil, err + } + + for _, path := range matches { + info, err := os.Stat(path) + if err != nil { + continue + } + + // Parse filename to extract tool and type + base := filepath.Base(path) + parts := strings.Split(base, "_") + if len(parts) < 3 { + continue + } + + tool := parts[0] + profType := parts[1] + + profiles = append(profiles, ProfileInfo{ + Path: path, + Tool: tool, + Type: profType, + ModTime: info.ModTime().Format("2006-01-02 15:04:05"), + Size: info.Size(), + }) + } + + return profiles, nil +} + +func analyzeProfile(profilePath string, args ...string) error { + // Detect profile type + isMemProfile := strings.Contains(profilePath, "_mem_") || strings.Contains(profilePath, "_alloc_") + + fmt.Printf("Analyzing %s\n", profilePath) + fmt.Println(strings.Repeat("-", 60)) + + // Default analysis + if err := showTopFunctions(profilePath, 10, isMemProfile); err != nil { + return err + } + + // Show tips + fmt.Println("\nAnalysis tips:") + if isMemProfile { + fmt.Println(" - Use -alloc_space to see total allocations") + fmt.Println(" - Use -alloc_objects to see allocation counts") + fmt.Println(" - Use -inuse_space to see current memory usage") + } else { + fmt.Println(" - Use -cum to sort by cumulative time") + fmt.Println(" - Use -list <function> to see source code") + fmt.Println(" - Use -web to open interactive flame graph") + } + + return nil +} + +func showTopFunctions(profilePath string, count int, isMemProfile bool) error { + args := []string{"tool", "pprof", "-top", fmt.Sprintf("-nodecount=%d", count)} + + if isMemProfile { + args = append(args, "-alloc_space") + } + + args = append(args, profilePath) + + cmd := exec.Command("go", args...) + output, err := cmd.Output() + if err != nil { + return fmt.Errorf("pprof failed: %w", err) + } + + // Parse and display output + scanner := bufio.NewScanner(strings.NewReader(string(output))) + lineCount := 0 + inTop := false + + fmt.Printf("Top %d functions (sorted by flat):\n", count) + fmt.Println("================================================================") + + for scanner.Scan() { + line := scanner.Text() + + // Skip header lines + if strings.HasPrefix(line, "File:") || strings.HasPrefix(line, "Type:") || + strings.HasPrefix(line, "Time:") || strings.HasPrefix(line, "Duration:") { + continue + } + + // Start printing from the table header + if strings.Contains(line, "flat") && strings.Contains(line, "cum") { + inTop = true + fmt.Println("# Command: go " + strings.Join(args[1:], " ")) + } + + if inTop { + fmt.Println(line) + if line != "" { + lineCount++ + } + if lineCount > count+2 { // +2 for header and separator + break + } + } + } + + return nil +} + +func openWebProfile(profilePath string) error { + fmt.Printf("Starting pprof web server for %s...\n", profilePath) + fmt.Println("Opening http://localhost:8080 in your browser") + fmt.Println("Press Ctrl+C to stop") + + cmd := exec.Command("go", "tool", "pprof", "-http=:8080", profilePath) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + return cmd.Run() +}
\ No newline at end of file diff --git a/internal/tools/profile/profile.go b/internal/tools/profile/profile.go new file mode 100644 index 0000000..34dfc7e --- /dev/null +++ b/internal/tools/profile/profile.go @@ -0,0 +1,358 @@ +package profile + +import ( + "flag" + "fmt" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/mimecast/dtail/internal/tools/common" +) + +// Config holds profiling configuration +type Config struct { + Mode string + ProfileDir string + TestDataDir string + Runs int + NoColor bool + Commands []string + Timeout time.Duration +} + +// Run executes the profiling command +func Run() error { + cfg := parseFlags() + + // Create directories + if err := common.EnsureDirectory(cfg.ProfileDir); err != nil { + return fmt.Errorf("failed to create profile directory: %w", err) + } + if err := common.EnsureDirectory(cfg.TestDataDir); err != nil { + return fmt.Errorf("failed to create test data directory: %w", err) + } + + switch cfg.Mode { + case "quick": + return runQuickProfile(cfg) + case "full": + return runFullProfile(cfg) + case "dmap": + return runDMapProfile(cfg) + case "analyze": + return runAnalyze(cfg) + case "list": + return listProfiles(cfg) + default: + return fmt.Errorf("unknown profile mode: %s", cfg.Mode) + } +} + +func parseFlags() *Config { + cfg := &Config{ + Commands: []string{"dcat", "dgrep", "dmap"}, + Timeout: 30 * time.Second, + } + + flag.StringVar(&cfg.Mode, "mode", "quick", "Profile mode: quick, full, dmap, analyze, list") + flag.StringVar(&cfg.ProfileDir, "dir", "profiles", "Profile output directory") + flag.StringVar(&cfg.TestDataDir, "testdata", "testdata", "Test data directory") + flag.IntVar(&cfg.Runs, "runs", 1, "Number of profiling runs") + flag.BoolVar(&cfg.NoColor, "nocolor", false, "Disable colored output") + flag.DurationVar(&cfg.Timeout, "timeout", cfg.Timeout, "Timeout for profiling runs") + + // Custom command list + var cmdList string + flag.StringVar(&cmdList, "commands", "", "Comma-separated list of commands to profile") + + flag.Parse() + + if cmdList != "" { + cfg.Commands = strings.Split(cmdList, ",") + } + + return cfg +} + +func runQuickProfile(cfg *Config) error { + common.PrintSection("DTail Quick Profiling") + + // Generate test data + gen := common.NewDataGenerator() + + logFile := filepath.Join(cfg.TestDataDir, "quick_test.log") + csvFile := filepath.Join(cfg.TestDataDir, "quick_test.csv") + + common.PrintInfo("Generating test data...\n") + if err := gen.GenerateFile(logFile, "10MB", common.FormatLog); err != nil { + return fmt.Errorf("failed to generate log file: %w", err) + } + if err := gen.GenerateFile(csvFile, "10MB", common.FormatCSV); err != nil { + return fmt.Errorf("failed to generate CSV file: %w", err) + } + + // Build commands + common.PrintInfo("Building commands...\n") + if err := common.BuildCommands("dcat", "dgrep", "dmap"); err != nil { + return err + } + + // Profile each command + common.PrintSection("Running quick profiles...") + + // Profile dcat + if err := profileCommand("dcat", "dcat", + []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", logFile}, + cfg.Timeout); err != nil { + return err + } + + // Profile dgrep + if err := profileCommand("dgrep", "dgrep", + []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-regex", "user[0-9]+", logFile}, + cfg.Timeout); err != nil { + return err + } + + // Profile dmap + query := `select count($line),avg($duration) group by $user logformat csv` + if err := profileCommand("dmap", "dmap", + []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-query", query, "-files", csvFile}, + cfg.Timeout); err != nil { + return err + } + + // Analyze results + return analyzeLatestProfiles(cfg) +} + +func runFullProfile(cfg *Config) error { + common.PrintSection("DTail Full Profiling") + + // Generate test data + gen := common.NewDataGenerator() + + testFiles := map[string]string{ + "small.log": "10MB", + "medium.log": "100MB", + "test.csv": "50MB", + "dtail_format.log": "100000", // lines + } + + common.PrintInfo("Generating test data...\n") + for filename, size := range testFiles { + fullPath := filepath.Join(cfg.TestDataDir, filename) + if filename == "dtail_format.log" { + lines := 100000 + if err := gen.GenerateLogFileWithLines(fullPath, lines, common.FormatDTail); err != nil { + return fmt.Errorf("failed to generate %s: %w", filename, err) + } + } else if strings.HasSuffix(filename, ".csv") { + if err := gen.GenerateFile(fullPath, size, common.FormatCSV); err != nil { + return fmt.Errorf("failed to generate %s: %w", filename, err) + } + } else { + if err := gen.GenerateFile(fullPath, size, common.FormatLog); err != nil { + return fmt.Errorf("failed to generate %s: %w", filename, err) + } + } + } + + // Build commands + common.PrintInfo("Building commands...\n") + if err := common.BuildCommands("dcat", "dgrep", "dmap"); err != nil { + return err + } + + // Run profiling + common.PrintSection("Running full profiling suite...") + + // Profile configurations + profiles := []struct { + cmd string + name string + args []string + }{ + // dcat profiles + {"dcat", "small_file", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + filepath.Join(cfg.TestDataDir, "small.log")}}, + {"dcat", "medium_file", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + filepath.Join(cfg.TestDataDir, "medium.log")}}, + + // dgrep profiles + {"dgrep", "simple_pattern", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-regex", "ERROR", filepath.Join(cfg.TestDataDir, "medium.log")}}, + {"dgrep", "complex_pattern", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-regex", "(ERROR|WARN).*user[0-9]+", filepath.Join(cfg.TestDataDir, "medium.log")}}, + + // dmap profiles + {"dmap", "simple_count", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-query", "from STATS select count(*)", "-files", filepath.Join(cfg.TestDataDir, "dtail_format.log")}}, + {"dmap", "aggregations", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-query", "from STATS select sum($goroutines),avg($cgocalls),max(lifetimeConnections)", + "-files", filepath.Join(cfg.TestDataDir, "dtail_format.log")}}, + {"dmap", "csv_query", []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-query", `select user,action,count(*) where status="success" group by user,action logformat csv`, + "-files", filepath.Join(cfg.TestDataDir, "test.csv")}}, + } + + for _, p := range profiles { + common.PrintInfo("\nProfiling %s - %s\n", p.cmd, p.name) + for i := 1; i <= cfg.Runs; i++ { + if cfg.Runs > 1 { + fmt.Printf(" Run %d/%d...\n", i, cfg.Runs) + } + if err := profileCommand(p.cmd, p.cmd, p.args, cfg.Timeout); err != nil { + return fmt.Errorf("failed to profile %s-%s: %w", p.cmd, p.name, err) + } + if i < cfg.Runs { + time.Sleep(1 * time.Second) // Small delay between runs + } + } + } + + return analyzeLatestProfiles(cfg) +} + +func runDMapProfile(cfg *Config) error { + common.PrintSection("DTail dmap Profiling") + + // Generate MapReduce test data + gen := common.NewDataGenerator() + + smallFile := filepath.Join(cfg.TestDataDir, "stats_small.log") + mediumFile := filepath.Join(cfg.TestDataDir, "stats_medium.log") + + common.PrintInfo("Preparing MapReduce test data...\n") + if err := gen.GenerateLogFileWithLines(smallFile, 1000, common.FormatDTail); err != nil { + return fmt.Errorf("failed to generate small file: %w", err) + } + if err := gen.GenerateLogFileWithLines(mediumFile, 1000000, common.FormatDTail); err != nil { + return fmt.Errorf("failed to generate medium file: %w", err) + } + + // Build dmap + common.PrintInfo("Building dmap...\n") + if err := common.BuildCommand("dmap"); err != nil { + return err + } + + // Profile different queries + common.PrintSection("Profiling dmap queries...") + + queries := []struct { + name string + query string + file string + }{ + {"Count by hostname", "from STATS select count($line) group by hostname", smallFile}, + {"Sum and average", "from STATS select sum($goroutines),avg($goroutines) group by hostname", smallFile}, + {"Min and max", "from STATS select min(currentConnections),max(lifetimeConnections) group by hostname", smallFile}, + {"Large file processing", "from STATS select count($line),avg($goroutines) group by hostname", mediumFile}, + } + + for _, q := range queries { + common.PrintInfo("\nQuery: %s\n", q.name) + args := []string{"-profile", "-profiledir", cfg.ProfileDir, "-plain", "-cfg", "none", + "-query", q.query, "-files", q.file} + if err := profileCommand("dmap", "dmap", args, cfg.Timeout); err != nil { + return fmt.Errorf("failed to profile query %s: %w", q.name, err) + } + } + + return analyzeLatestProfiles(cfg) +} + +func profileCommand(name, cmd string, args []string, timeout time.Duration) error { + fmt.Printf("Command: %s %s\n", cmd, strings.Join(args, " ")) + + command := exec.Command("./"+cmd, args...) + command.Stdout = nil // Suppress output during profiling + command.Stderr = os.Stderr + + if err := command.Start(); err != nil { + return err + } + + done := make(chan error, 1) + go func() { + done <- command.Wait() + }() + + select { + case <-time.After(timeout): + command.Process.Kill() + return fmt.Errorf("command timed out after %v", timeout) + case err := <-done: + if err != nil && !strings.Contains(err.Error(), "signal: interrupt") { + return err + } + } + + // Find generated profile + pattern := filepath.Join("profiles", fmt.Sprintf("%s_cpu_*.prof", name)) + matches, _ := filepath.Glob(pattern) + if len(matches) > 0 { + // Sort by modification time and get the latest + sort.Slice(matches, func(i, j int) bool { + fi, _ := os.Stat(matches[i]) + fj, _ := os.Stat(matches[j]) + return fi.ModTime().After(fj.ModTime()) + }) + fmt.Printf(" Generated: %s\n", filepath.Base(matches[0])) + } + + return nil +} + +func analyzeLatestProfiles(cfg *Config) error { + common.PrintSection("Profile Analysis") + + // Find latest profiles for each command + for _, cmd := range cfg.Commands { + cpuPattern := filepath.Join(cfg.ProfileDir, fmt.Sprintf("%s_cpu_*.prof", cmd)) + memPattern := filepath.Join(cfg.ProfileDir, fmt.Sprintf("%s_mem_*.prof", cmd)) + + cpuProfiles, _ := filepath.Glob(cpuPattern) + memProfiles, _ := filepath.Glob(memPattern) + + if len(cpuProfiles) > 0 { + sort.Slice(cpuProfiles, func(i, j int) bool { + fi, _ := os.Stat(cpuProfiles[i]) + fj, _ := os.Stat(cpuProfiles[j]) + return fi.ModTime().After(fj.ModTime()) + }) + + fmt.Printf("\n%s CPU Profile: %s\n", cmd, filepath.Base(cpuProfiles[0])) + if err := showTopFunctions(cpuProfiles[0], 5, false); err != nil { + fmt.Printf(" Analysis failed: %v\n", err) + } + } + + if len(memProfiles) > 0 { + sort.Slice(memProfiles, func(i, j int) bool { + fi, _ := os.Stat(memProfiles[i]) + fj, _ := os.Stat(memProfiles[j]) + return fi.ModTime().After(fj.ModTime()) + }) + + fmt.Printf("\n%s Memory Profile: %s\n", cmd, filepath.Base(memProfiles[0])) + if err := showTopFunctions(memProfiles[0], 5, true); err != nil { + fmt.Printf(" Analysis failed: %v\n", err) + } + } + } + + common.PrintSuccess("\nProfiling complete!\n") + fmt.Println("\nTo analyze profiles in detail:") + fmt.Printf(" go tool pprof %s/<profile_file>\n", cfg.ProfileDir) + fmt.Printf(" dtail-tools profile -mode analyze <profile_file>\n") + + return nil +}
\ No newline at end of file |
