summaryrefslogtreecommitdiff
path: root/internal/regex/bench_test.go
blob: 16fd98e040fbce3bf1714d2ae55e59070a325545 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package regex

import (
	"bytes"
	"testing"
)

func BenchmarkLiteralVsRegex(b *testing.B) {
	// Test data - typical log lines
	testLines := [][]byte{
		[]byte("2024-01-01 10:00:00 INFO Starting application"),
		[]byte("2024-01-01 10:00:01 DEBUG Loading configuration"),
		[]byte("2024-01-01 10:00:02 ERROR Failed to connect to database"),
		[]byte("2024-01-01 10:00:03 WARN Retrying connection"),
		[]byte("2024-01-01 10:00:04 INFO Connection established"),
		[]byte("2024-01-01 10:00:05 ERROR Timeout while processing request"),
		[]byte("2024-01-01 10:00:06 DEBUG Processing request ID: 12345"),
		[]byte("2024-01-01 10:00:07 INFO Request processed successfully"),
		[]byte("2024-01-01 10:00:08 ERROR Invalid input parameters"),
		[]byte("2024-01-01 10:00:09 WARN High memory usage detected"),
	}
	
	// Benchmark literal pattern matching (our optimization)
	b.Run("Literal_ERROR", func(b *testing.B) {
		r, _ := New("ERROR", Default)
		if !r.isLiteral {
			b.Fatal("Pattern should be detected as literal")
		}
		
		b.ResetTimer()
		matches := 0
		for i := 0; i < b.N; i++ {
			for _, line := range testLines {
				if r.Match(line) {
					matches++
				}
			}
		}
		_ = matches
	})
	
	// Force regex pattern matching for comparison
	b.Run("Regex_ERROR", func(b *testing.B) {
		// Add a harmless regex operator to force regex compilation
		r, _ := New("(?:ERROR)", Default)
		if r.isLiteral {
			b.Fatal("Pattern should not be detected as literal")
		}
		
		b.ResetTimer()
		matches := 0
		for i := 0; i < b.N; i++ {
			for _, line := range testLines {
				if r.Match(line) {
					matches++
				}
			}
		}
		_ = matches
	})
	
	// Direct bytes.Contains for reference
	b.Run("BytesContains_ERROR", func(b *testing.B) {
		pattern := []byte("ERROR")
		
		b.ResetTimer()
		matches := 0
		for i := 0; i < b.N; i++ {
			for _, line := range testLines {
				if bytes.Contains(line, pattern) {
					matches++
				}
			}
		}
		_ = matches
	})
}

func BenchmarkComplexPatterns(b *testing.B) {
	testLine := []byte("2024-01-01 10:00:00 ERROR Failed to connect to database server at 192.168.1.100:5432")
	
	patterns := []struct {
		name    string
		pattern string
	}{
		{"Simple_ERROR", "ERROR"},
		{"Simple_database", "database"},
		{"Regex_ERROR.*database", "ERROR.*database"},
		{"Regex_\\d+\\.\\d+\\.\\d+\\.\\d+", `\d+\.\d+\.\d+\.\d+`}, // IP address pattern
		{"Regex_^2024", "^2024"},
		{"Regex_5432$", "5432$"},
	}
	
	for _, p := range patterns {
		b.Run(p.name, func(b *testing.B) {
			r, err := New(p.pattern, Default)
			if err != nil {
				b.Fatal(err)
			}
			
			b.ResetTimer()
			matches := 0
			for i := 0; i < b.N; i++ {
				if r.Match(testLine) {
					matches++
				}
			}
			_ = matches
		})
	}
}