1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
|
package benchmarks
import (
"bufio"
"compress/gzip"
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/DataDog/zstd"
)
// FileSize represents the size category of test files
type FileSize int
// Supported benchmark file sizes.
const (
// Small represents a 10MB test file.
Small FileSize = 10 * 1024 * 1024 // 10MB
Medium FileSize = 100 * 1024 * 1024 // 100MB
Large FileSize = 1024 * 1024 * 1024 // 1GB
)
func (fs FileSize) String() string {
switch fs {
case Small:
return "10MB"
case Medium:
return "100MB"
case Large:
return "1GB"
default:
return fmt.Sprintf("%dB", fs)
}
}
// LogFormat represents different log format types
type LogFormat int
// Supported synthetic log formats.
const (
// SimpleLogFormat contains plain text log lines.
SimpleLogFormat LogFormat = iota
MapReduceLogFormat
MixedLogFormat
)
// CompressionType represents file compression options
type CompressionType int
// Supported compression modes for generated test files.
const (
// NoCompression stores test data without compression.
NoCompression CompressionType = iota
GzipCompression
ZstdCompression
)
// TestDataConfig configures test data generation
type TestDataConfig struct {
Size FileSize
Format LogFormat
Compression CompressionType
LineVariation int // Percentage of unique lines (0-100)
Pattern string // Pattern to include for grep testing
PatternRate int // Percentage of lines containing pattern (0-100)
}
// GenerateTestFile creates a test log file based on config
func GenerateTestFile(tb testing.TB, config TestDataConfig) string {
tb.Helper()
// Create temp file with .tmp suffix
tmpFile, err := os.CreateTemp("", "dtail_bench_*.log.tmp")
if err != nil {
tb.Fatalf("Failed to create temp file: %v", err)
}
tmpFile.Close()
filename := tmpFile.Name()
// Apply compression if needed
var finalFilename string
switch config.Compression {
case GzipCompression:
finalFilename = filename + ".gz"
if err := generateCompressedFile(filename, finalFilename, config, gzipWriter); err != nil {
tb.Fatalf("Failed to generate gzip file: %v", err)
}
os.Remove(filename)
return finalFilename
case ZstdCompression:
finalFilename = filename + ".zst"
if err := generateCompressedFile(filename, finalFilename, config, zstdWriter); err != nil {
tb.Fatalf("Failed to generate zstd file: %v", err)
}
os.Remove(filename)
return finalFilename
default:
if err := generateUncompressedFile(filename, config); err != nil {
tb.Fatalf("Failed to generate file: %v", err)
}
return filename
}
}
// generateUncompressedFile creates an uncompressed log file
func generateUncompressedFile(filename string, config TestDataConfig) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
writer := bufio.NewWriter(file)
defer writer.Flush()
return writeLogLines(writer, config)
}
// compressionWriter is a function that creates a compression writer
type compressionWriter func(io.Writer) (io.WriteCloser, error)
// gzipWriter creates a gzip writer
func gzipWriter(w io.Writer) (io.WriteCloser, error) {
return gzip.NewWriter(w), nil
}
// zstdWriter creates a zstd writer
func zstdWriter(w io.Writer) (io.WriteCloser, error) {
return zstd.NewWriterLevel(w, zstd.DefaultCompression), nil
}
// generateCompressedFile creates a compressed log file
func generateCompressedFile(tmpFile, finalFile string, config TestDataConfig, createWriter compressionWriter) error {
// First generate uncompressed
if err := generateUncompressedFile(tmpFile, config); err != nil {
return err
}
// Read and compress
input, err := os.Open(tmpFile)
if err != nil {
return err
}
defer input.Close()
output, err := os.Create(finalFile)
if err != nil {
return err
}
defer output.Close()
compressor, err := createWriter(output)
if err != nil {
return err
}
defer compressor.Close()
_, err = io.Copy(compressor, input)
return err
}
// writeLogLines generates log content based on config
func writeLogLines(w io.Writer, config TestDataConfig) error {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
// Calculate approximate lines needed
avgLineSize := 150 // bytes
totalLines := int(config.Size) / avgLineSize
// Pre-generate some template lines for variation
templateLines := generateTemplateLines(config.Format, config.LineVariation, config.Pattern, config.PatternRate, rng)
bytesWritten := 0
for i := 0; i < totalLines && bytesWritten < int(config.Size); i++ {
// Pick a random template line
line := templateLines[rng.Intn(len(templateLines))]
// Write with current timestamp
timestampedLine := strings.Replace(line, "{TIMESTAMP}", generateTimestamp(i), 1)
timestampedLine = strings.Replace(timestampedLine, "{COUNTER}", fmt.Sprintf("%d", i), 1)
n, err := fmt.Fprintln(w, timestampedLine)
if err != nil {
return err
}
bytesWritten += n
}
return nil
}
// generateTemplateLines creates a set of template log lines
func generateTemplateLines(format LogFormat, variation int, pattern string, patternRate int, rng *rand.Rand) []string {
numTemplates := max(10, variation) // At least 10 templates
templates := make([]string, 0, numTemplates)
for i := 0; i < numTemplates; i++ {
includePattern := pattern != "" && rng.Intn(100) < patternRate
switch format {
case SimpleLogFormat:
templates = append(templates, generateSimpleLogLine(i, includePattern, pattern, rng))
case MapReduceLogFormat:
templates = append(templates, generateMapReduceLogLine(i, includePattern, pattern, rng))
case MixedLogFormat:
if rng.Intn(2) == 0 {
templates = append(templates, generateSimpleLogLine(i, includePattern, pattern, rng))
} else {
templates = append(templates, generateMapReduceLogLine(i, includePattern, pattern, rng))
}
}
}
return templates
}
// generateSimpleLogLine creates a simple log line template
func generateSimpleLogLine(id int, includePattern bool, pattern string, rng *rand.Rand) string {
levels := []string{"INFO", "WARN", "ERROR", "DEBUG"}
level := levels[rng.Intn(len(levels))]
message := fmt.Sprintf("Processing request %d", id)
if includePattern && pattern != "" {
message = fmt.Sprintf("%s %s", message, pattern)
}
// Format: LEVEL|TIMESTAMP|THREAD|FILE:LINE|MESSAGE
return fmt.Sprintf("%s|{TIMESTAMP}|thread-%d|app.go:%d|%s",
level, rng.Intn(10)+1, rng.Intn(1000)+1, message)
}
// generateMapReduceLogLine creates a MapReduce format log line template
func generateMapReduceLogLine(id int, includePattern bool, pattern string, rng *rand.Rand) string {
goroutines := rng.Intn(50) + 10
connections := rng.Intn(100)
lifetime := rng.Intn(1000) + 100
message := "MAPREDUCE:STATS"
if includePattern && pattern != "" {
message = fmt.Sprintf("%s|%s", message, pattern)
}
// Format matching the integration test data
return fmt.Sprintf("INFO|{TIMESTAMP}|1|stats.go:56|8|%d|7|0.%02d|471h%dm%ds|%s|currentConnections=%d|lifetimeConnections=%d",
goroutines, rng.Intn(100), rng.Intn(60), rng.Intn(60), message, connections, lifetime)
}
// generateTimestamp creates a timestamp for log lines
func generateTimestamp(lineNum int) string {
// Format: MMDD-HHMMSS
baseTime := time.Date(2024, 10, 2, 7, 10, 0, 0, time.UTC)
offsetSeconds := lineNum / 10 // Advance time every 10 lines
t := baseTime.Add(time.Duration(offsetSeconds) * time.Second)
return t.Format("0102-150405")
}
// CleanupBenchmarkFiles removes all benchmark temporary files
func CleanupBenchmarkFiles(pattern string) error {
if pattern == "" {
pattern = "dtail_bench_*.tmp*"
}
tempDir := os.TempDir()
matches, err := filepath.Glob(filepath.Join(tempDir, pattern))
if err != nil {
return err
}
for _, match := range matches {
if err := os.Remove(match); err != nil && !os.IsNotExist(err) {
return err
}
}
return nil
}
// max returns the maximum of two integers
func max(a, b int) int {
if a > b {
return a
}
return b
}
|