summaryrefslogtreecommitdiff
path: root/internal/tools/common/data_generator.go
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-26 22:19:22 +0300
committerPaul Buetow <paul@buetow.org>2025-06-26 22:19:22 +0300
commit947e08e4f9e3c9c44b346adff4eb6d68fa79a726 (patch)
treecfa94aba72f91d26657de09b7a5b6a23eff10fd7 /internal/tools/common/data_generator.go
parent1e643ac66765fc0ab4224335191731d8b77fece2 (diff)
Refactor profiling and benchmarking tools from bash to Go
This major refactoring replaces all bash-based profiling and benchmarking scripts with a unified Go tool (dtail-tools) that provides: - Better cross-platform compatibility - Improved error handling and reliability - Structured data generation for test files - Consistent command-line interface - Easier maintenance and extensibility Key changes: - Created dtail-tools command with profile and benchmark subcommands - Implemented common utilities for data generation and file operations - Updated Makefile to use the new Go-based tools - Maintained backward compatibility with existing make targets - Fixed ParseSize to handle single-letter suffixes (10M, 1G, etc.) The new tool supports all previous functionality: - profile-quick, profile-all, profile-dmap - benchmark creation, comparison, and management - Test data generation with multiple formats - Profile analysis and listing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal/tools/common/data_generator.go')
-rw-r--r--internal/tools/common/data_generator.go248
1 files changed, 248 insertions, 0 deletions
diff --git a/internal/tools/common/data_generator.go b/internal/tools/common/data_generator.go
new file mode 100644
index 0000000..f9c4e5e
--- /dev/null
+++ b/internal/tools/common/data_generator.go
@@ -0,0 +1,248 @@
+package common
+
+import (
+ "bufio"
+ "fmt"
+ "math/rand"
+ "os"
+ "path/filepath"
+ "time"
+)
+
+// DataFormat represents the format of generated data
+type DataFormat string
+
+const (
+ FormatLog DataFormat = "log"
+ FormatCSV DataFormat = "csv"
+ FormatDTail DataFormat = "dtail"
+ FormatMapReduce DataFormat = "mapreduce"
+)
+
+// DataGenerator generates test data for profiling and benchmarking
+type DataGenerator struct {
+ rand *rand.Rand
+}
+
+// NewDataGenerator creates a new data generator
+func NewDataGenerator() *DataGenerator {
+ return &DataGenerator{
+ rand: rand.New(rand.NewSource(time.Now().UnixNano())),
+ }
+}
+
+// GenerateFile generates a test data file of the specified size and format
+func (g *DataGenerator) GenerateFile(filename string, sizeStr string, format DataFormat) error {
+ size, err := ParseSize(sizeStr)
+ if err != nil {
+ return fmt.Errorf("invalid size: %w", err)
+ }
+
+ // Create directory if needed
+ dir := filepath.Dir(filename)
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ return fmt.Errorf("failed to create directory: %w", err)
+ }
+
+ // Check if file already exists
+ if _, err := os.Stat(filename); err == nil {
+ return nil // File exists, skip generation
+ }
+
+ switch format {
+ case FormatLog:
+ return g.generateLogFile(filename, size)
+ case FormatCSV:
+ return g.generateCSVFile(filename, size)
+ case FormatDTail, FormatMapReduce:
+ return g.generateDTailFormatFile(filename, size)
+ default:
+ return fmt.Errorf("unsupported format: %s", format)
+ }
+}
+
+// GenerateLogFileWithLines generates a log file with specific number of lines
+func (g *DataGenerator) GenerateLogFileWithLines(filename string, lines int, format DataFormat) error {
+ // Create directory if needed
+ dir := filepath.Dir(filename)
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ return fmt.Errorf("failed to create directory: %w", err)
+ }
+
+ // Check if file already exists
+ if _, err := os.Stat(filename); err == nil {
+ return nil // File exists, skip generation
+ }
+
+ switch format {
+ case FormatDTail, FormatMapReduce:
+ return g.generateDTailFormatFileWithLines(filename, lines)
+ default:
+ return fmt.Errorf("line-based generation only supported for dtail/mapreduce format")
+ }
+}
+
+func (g *DataGenerator) generateLogFile(filename string, targetSize int64) error {
+ file, err := os.Create(filename)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ writer := bufio.NewWriter(file)
+ defer writer.Flush()
+
+ var currentSize int64
+ lineNum := 0
+ levels := []string{"INFO", "DEBUG", "WARN", "ERROR"}
+ users := []string{"user1", "user2", "user3", "user4", "user5", "admin", "guest", "service", "monitor", "test"}
+ actions := []string{"login", "logout", "query", "update", "delete", "create", "read", "write", "sync", "backup"}
+
+ for currentSize < targetSize {
+ lineNum++
+ timestamp := time.Now().Add(time.Duration(-lineNum) * time.Second).Format("2006-01-02 15:04:05")
+ level := levels[g.rand.Intn(len(levels))]
+ user := users[g.rand.Intn(len(users))]
+ action := actions[g.rand.Intn(len(actions))]
+ duration := g.rand.Intn(5000) + 100
+ status := "success"
+ if g.rand.Float32() < 0.1 {
+ status = "failure"
+ }
+
+ line := fmt.Sprintf("[%s] %s - User %s performed %s action (duration: %dms, status: %s)\n",
+ timestamp, level, user, action, duration, status)
+
+ n, err := writer.WriteString(line)
+ if err != nil {
+ return err
+ }
+ currentSize += int64(n)
+ }
+
+ return nil
+}
+
+func (g *DataGenerator) generateCSVFile(filename string, targetSize int64) error {
+ file, err := os.Create(filename)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ writer := bufio.NewWriter(file)
+ defer writer.Flush()
+
+ // Write header
+ header := "timestamp,user,action,duration,status\n"
+ n, err := writer.WriteString(header)
+ if err != nil {
+ return err
+ }
+ currentSize := int64(n)
+
+ lineNum := 0
+ users := []string{"user1", "user2", "user3", "user4", "user5", "admin", "guest", "service", "monitor", "test"}
+ actions := []string{"login", "logout", "query", "update", "delete", "create", "read", "write", "sync", "backup"}
+
+ for currentSize < targetSize {
+ lineNum++
+ timestamp := time.Now().Add(time.Duration(-lineNum) * time.Second).Format("2006-01-02 15:04:05")
+ user := users[g.rand.Intn(len(users))]
+ action := actions[g.rand.Intn(len(actions))]
+ duration := g.rand.Intn(5000) + 100
+ status := "success"
+ if g.rand.Float32() < 0.1 {
+ status = "failure"
+ }
+
+ line := fmt.Sprintf("%s,%s,%s,%d,%s\n", timestamp, user, action, duration, status)
+
+ n, err := writer.WriteString(line)
+ if err != nil {
+ return err
+ }
+ currentSize += int64(n)
+ }
+
+ return nil
+}
+
+func (g *DataGenerator) generateDTailFormatFile(filename string, targetSize int64) error {
+ file, err := os.Create(filename)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ writer := bufio.NewWriter(file)
+ defer writer.Flush()
+
+ var currentSize int64
+ lineNum := 0
+ hostnames := []string{"server01", "server02", "server03", "server04", "server05",
+ "server06", "server07", "server08", "server09", "server10"}
+
+ for currentSize < targetSize {
+ lineNum++
+ hostname := hostnames[lineNum%len(hostnames)]
+ timestamp := fmt.Sprintf("%02d%02d-%02d%02d%02d",
+ 10+(lineNum/86400)%12, (lineNum/3600)%30+1,
+ (lineNum/3600)%24, (lineNum/60)%60, lineNum%60)
+ goroutines := 10 + (lineNum % 50)
+ cgocalls := lineNum % 100
+ cpus := 1 + (lineNum % 8)
+ loadavg := float64(lineNum%100) / 100.0
+ uptime := fmt.Sprintf("%dh%dm%ds", lineNum/3600, (lineNum/60)%60, lineNum%60)
+ currentConnections := lineNum % 20
+ lifetimeConnections := 1000 + lineNum
+
+ line := fmt.Sprintf("INFO|%s|1|stats.go:56|%d|%d|%d|%.2f|%s|MAPREDUCE:STATS|hostname=%s|currentConnections=%d|lifetimeConnections=%d\n",
+ timestamp, cpus, goroutines, cgocalls, loadavg, uptime, hostname, currentConnections, lifetimeConnections)
+
+ n, err := writer.WriteString(line)
+ if err != nil {
+ return err
+ }
+ currentSize += int64(n)
+ }
+
+ return nil
+}
+
+func (g *DataGenerator) generateDTailFormatFileWithLines(filename string, lines int) error {
+ file, err := os.Create(filename)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ writer := bufio.NewWriter(file)
+ defer writer.Flush()
+
+ hostnames := []string{"server01", "server02", "server03", "server04", "server05",
+ "server06", "server07", "server08", "server09", "server10"}
+
+ for i := 1; i <= lines; i++ {
+ hostname := hostnames[i%len(hostnames)]
+ timestamp := fmt.Sprintf("%02d%02d-%02d%02d%02d",
+ 10+(i/86400)%12, (i/3600)%30+1,
+ (i/3600)%24, (i/60)%60, i%60)
+ goroutines := 10 + (i % 50)
+ cgocalls := i % 100
+ cpus := 1 + (i % 8)
+ loadavg := float64(i%100) / 100.0
+ uptime := fmt.Sprintf("%dh%dm%ds", i/3600, (i/60)%60, i%60)
+ currentConnections := i % 20
+ lifetimeConnections := 1000 + i
+
+ line := fmt.Sprintf("INFO|%s|1|stats.go:56|%d|%d|%d|%.2f|%s|MAPREDUCE:STATS|hostname=%s|currentConnections=%d|lifetimeConnections=%d\n",
+ timestamp, cpus, goroutines, cgocalls, loadavg, uptime, hostname, currentConnections, lifetimeConnections)
+
+ if _, err := writer.WriteString(line); err != nil {
+ return err
+ }
+ }
+
+ return nil
+} \ No newline at end of file