summaryrefslogtreecommitdiff
path: root/internal/parser/tabular_csv_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/parser/tabular_csv_test.go')
-rw-r--r--internal/parser/tabular_csv_test.go469
1 files changed, 469 insertions, 0 deletions
diff --git a/internal/parser/tabular_csv_test.go b/internal/parser/tabular_csv_test.go
new file mode 100644
index 0000000..07818e8
--- /dev/null
+++ b/internal/parser/tabular_csv_test.go
@@ -0,0 +1,469 @@
+package parser
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "testing"
+ "time"
+
+ "epimetheus/internal/metrics"
+)
+
+func TestTabularCSVParser_Parse(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ metricName string
+ expectedCount int
+ wantErr bool
+ }{
+ {
+ name: "simple tabular CSV with numeric and text columns",
+ input: `responsecode,httpmethod,user,totaltime
+200,GET,alice,50.5
+404,POST,bob,100.2
+500,GET,charlie,75.0`,
+ metricName: "test_metric",
+ expectedCount: 6, // 3 rows * 2 numeric columns (responsecode, totaltime)
+ wantErr: false,
+ },
+ {
+ name: "CSV with mixed data types",
+ input: `col1,col2,col3
+1,text,3.14
+2,more,2.71
+3,data,1.41`,
+ metricName: "mixed_metric",
+ expectedCount: 6, // 3 rows * 2 numeric columns (col1, col3)
+ wantErr: false,
+ },
+ {
+ name: "CSV with whitespace",
+ input: ` col1 , col2 , col3
+ 1 , value2 , 3
+ 4 , value5 , 6 `,
+ metricName: "whitespace_metric",
+ expectedCount: 4, // 2 rows * 2 numeric columns (col1, col3)
+ wantErr: false,
+ },
+ {
+ name: "CSV with comments",
+ input: `# This is a comment
+col1,col2,col3
+# Another comment
+1,value2,3`,
+ metricName: "comment_metric",
+ expectedCount: 2, // 1 row * 2 numeric columns (col1, col3)
+ wantErr: false,
+ },
+ {
+ name: "empty CSV",
+ input: "",
+ metricName: "empty_metric",
+ expectedCount: 0,
+ wantErr: true, // No header row
+ },
+ {
+ name: "header only",
+ input: `col1,col2,col3
+`,
+ metricName: "header_only",
+ expectedCount: 0,
+ wantErr: false,
+ },
+ {
+ name: "mismatched columns - skipped",
+ input: `col1,col2,col3
+1,value2,3
+value4,value5
+6,value7,8`,
+ metricName: "mismatched_metric",
+ expectedCount: 4, // 2 matching rows * 2 numeric columns
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+ parser := NewTabularCSVParser(tt.metricName, timestamp, []string{}) // No DNS resolution
+
+ reader := strings.NewReader(tt.input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if (err != nil) != tt.wantErr {
+ t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+
+ if err == nil && len(samples) != tt.expectedCount {
+ t.Errorf("Parse() got %d samples, want %d", len(samples), tt.expectedCount)
+ }
+
+ // Verify all samples have the correct base metric name and timestamp
+ for _, sample := range samples {
+ if !strings.HasPrefix(sample.MetricName, tt.metricName+"_") {
+ t.Errorf("Sample metric name = %s, want prefix %s_", sample.MetricName, tt.metricName)
+ }
+ if !sample.Timestamp.Equal(timestamp) {
+ t.Errorf("Sample timestamp = %v, want %v", sample.Timestamp, timestamp)
+ }
+ }
+ })
+ }
+}
+
+func TestTabularCSVParser_Labels(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+ input := `responsecode,httpmethod,user,totaltime
+200,GET,alice,50.5
+404,POST,bob,100.2`
+
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ // 2 rows * 2 numeric columns (responsecode, totaltime) = 4 samples
+ if len(samples) != 4 {
+ t.Fatalf("Expected 4 samples, got %d", len(samples))
+ }
+
+ // Find the responsecode and totaltime metrics for first row
+ var responsecodeMetric, totaltimeMetric *metrics.Sample
+ for i := range samples {
+ if samples[i].Labels["httpmethod"] == "GET" && samples[i].Labels["user"] == "alice" {
+ if strings.HasSuffix(samples[i].MetricName, "_responsecode") {
+ responsecodeMetric = &samples[i]
+ } else if strings.HasSuffix(samples[i].MetricName, "_totaltime") {
+ totaltimeMetric = &samples[i]
+ }
+ }
+ }
+
+ if responsecodeMetric == nil || totaltimeMetric == nil {
+ t.Fatalf("Could not find expected metrics")
+ }
+
+ // Check responsecode metric
+ if responsecodeMetric.Value != 200 {
+ t.Errorf("responsecode value = %f, want 200", responsecodeMetric.Value)
+ }
+ expectedLabels := map[string]string{
+ "httpmethod": "GET",
+ "user": "alice",
+ }
+ for key, expectedValue := range expectedLabels {
+ if actualValue, ok := responsecodeMetric.Labels[key]; !ok {
+ t.Errorf("responsecode metric missing label %s", key)
+ } else if actualValue != expectedValue {
+ t.Errorf("responsecode metric label %s = %s, want %s", key, actualValue, expectedValue)
+ }
+ }
+
+ // Check totaltime metric
+ if totaltimeMetric.Value != 50.5 {
+ t.Errorf("totaltime value = %f, want 50.5", totaltimeMetric.Value)
+ }
+ for key, expectedValue := range expectedLabels {
+ if actualValue, ok := totaltimeMetric.Labels[key]; !ok {
+ t.Errorf("totaltime metric missing label %s", key)
+ } else if actualValue != expectedValue {
+ t.Errorf("totaltime metric label %s = %s, want %s", key, actualValue, expectedValue)
+ }
+ }
+}
+
+func TestTabularCSVParser_ContextCancellation(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel() // Cancel immediately
+
+ timestamp := time.Now()
+ input := `col1,col2,col3
+value1,value2,value3
+value4,value5,value6`
+
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{})
+ reader := strings.NewReader(input)
+ _, err := parser.Parse(ctx, reader)
+
+ if err != context.Canceled {
+ t.Errorf("Expected context.Canceled error, got %v", err)
+ }
+}
+
+func TestTabularCSVParser_LargeFile(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+
+ // Generate a CSV with 1000 rows, 1 numeric column
+ var builder strings.Builder
+ builder.WriteString("col1,col2,col3\n")
+ for i := 0; i < 1000; i++ {
+ builder.WriteString(fmt.Sprintf("%d,value2,value3\n", i))
+ }
+
+ parser := NewTabularCSVParser("large_metric", timestamp, []string{})
+ reader := strings.NewReader(builder.String())
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ // 1000 rows * 1 numeric column = 1000 samples
+ if len(samples) != 1000 {
+ t.Errorf("Expected 1000 samples, got %d", len(samples))
+ }
+}
+
+func TestSanitizeLabelName(t *testing.T) {
+ tests := []struct {
+ input string
+ expected string
+ }{
+ {"avg(totaltime)", "avg_totaltime"},
+ {"sum(rcv)", "sum_rcv"},
+ {"response-code", "response_code"},
+ {"valid_label", "valid_label"},
+ {"ValidLabel123", "ValidLabel123"},
+ {"123invalid", "label_123invalid"},
+ {"label__with___underscores", "label_with_underscores"},
+ {"_leading", "leading"},
+ {"trailing_", "trailing"},
+ {"special!@#$chars", "special_chars"},
+ {"", "unknown"},
+ {"___", "unknown"},
+ {"http.method", "http_method"},
+ {"status_code", "status_code"},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.input, func(t *testing.T) {
+ result := sanitizeLabelName(tt.input)
+ if result != tt.expected {
+ t.Errorf("sanitizeLabelName(%q) = %q, want %q", tt.input, result, tt.expected)
+ }
+ })
+ }
+}
+
+func TestTabularCSVParser_WithSpecialCharacterHeaders(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+ input := `avg(totaltime),sum(rcv),response-code,http.method
+50.5,1102,200,GET
+100.2,2204,404,POST`
+
+ parser := NewTabularCSVParser("special_metric", timestamp, []string{})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ // 2 rows * 3 numeric columns (avg(totaltime), sum(rcv), response-code) = 6 samples
+ if len(samples) != 6 {
+ t.Fatalf("Expected 6 samples, got %d", len(samples))
+ }
+
+ // Check that metric names are sanitized
+ expectedMetrics := []string{"special_metric_avg_totaltime", "special_metric_sum_rcv", "special_metric_response_code"}
+ foundMetrics := make(map[string]bool)
+ for _, sample := range samples {
+ foundMetrics[sample.MetricName] = true
+ }
+ for _, expected := range expectedMetrics {
+ if !foundMetrics[expected] {
+ t.Errorf("Missing expected metric %s", expected)
+ }
+ }
+
+ // Check that string column becomes label (http.method -> http_method)
+ for _, sample := range samples {
+ if _, ok := sample.Labels["http_method"]; !ok {
+ t.Errorf("Sample missing sanitized label http_method")
+ }
+ }
+
+ // Check values are correct for first row
+ for _, sample := range samples {
+ if sample.Labels["http_method"] == "GET" {
+ if strings.HasSuffix(sample.MetricName, "_avg_totaltime") {
+ if sample.Value != 50.5 {
+ t.Errorf("avg_totaltime value = %f, want 50.5", sample.Value)
+ }
+ } else if strings.HasSuffix(sample.MetricName, "_sum_rcv") {
+ if sample.Value != 1102 {
+ t.Errorf("sum_rcv value = %f, want 1102", sample.Value)
+ }
+ } else if strings.HasSuffix(sample.MetricName, "_response_code") {
+ if sample.Value != 200 {
+ t.Errorf("response_code value = %f, want 200", sample.Value)
+ }
+ }
+ }
+ }
+}
+
+func TestTabularCSVParser_DNSResolution(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+
+ // Test with localhost IP which should resolve on most systems
+ input := `ip,responsecode,count
+127.0.0.1,200,100`
+
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ // Should have 1 sample (count metric)
+ if len(samples) != 1 {
+ t.Fatalf("Expected 1 sample, got %d", len(samples))
+ }
+
+ // The ip label should either be resolved to a hostname or remain as IP if DNS failed
+ ipLabel := samples[0].Labels["ip"]
+ if ipLabel == "" {
+ t.Error("ip label is empty")
+ }
+
+ // If resolution succeeded, it should not be the original IP
+ // If it failed, it should still be the IP
+ t.Logf("IP label value: %s (original: 127.0.0.1)", ipLabel)
+}
+
+func TestTabularCSVParser_DNSResolutionDisabled(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+
+ input := `ip,responsecode,count
+192.168.1.1,200,100`
+
+ // Pass empty slice - no DNS resolution
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ if len(samples) != 1 {
+ t.Fatalf("Expected 1 sample, got %d", len(samples))
+ }
+
+ // IP should remain unchanged
+ ipLabel := samples[0].Labels["ip"]
+ if ipLabel != "192.168.1.1" {
+ t.Errorf("IP label = %s, want 192.168.1.1 (DNS resolution should be disabled)", ipLabel)
+ }
+}
+
+func TestTabularCSVParser_DNSResolutionMultipleLabels(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+
+ input := `source_ip,dest_ip,count
+127.0.0.1,192.168.1.1,100`
+
+ // Resolve both source_ip and dest_ip
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{"source_ip", "dest_ip"})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ if len(samples) != 1 {
+ t.Fatalf("Expected 1 sample, got %d", len(samples))
+ }
+
+ // Both labels should be present
+ if _, ok := samples[0].Labels["source_ip"]; !ok {
+ t.Error("source_ip label missing")
+ }
+ if _, ok := samples[0].Labels["dest_ip"]; !ok {
+ t.Error("dest_ip label missing")
+ }
+
+ t.Logf("source_ip: %s", samples[0].Labels["source_ip"])
+ t.Logf("dest_ip: %s", samples[0].Labels["dest_ip"])
+}
+
+func TestTabularCSVParser_DNSResolutionNonIPValue(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+
+ // ip column contains non-IP value
+ input := `ip,responsecode,count
+not-an-ip,200,100`
+
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ if len(samples) != 1 {
+ t.Fatalf("Expected 1 sample, got %d", len(samples))
+ }
+
+ // Non-IP value should remain unchanged
+ ipLabel := samples[0].Labels["ip"]
+ if ipLabel != "not-an-ip" {
+ t.Errorf("IP label = %s, want not-an-ip (should not resolve non-IP values)", ipLabel)
+ }
+}
+
+func TestTabularCSVParser_DNSResolutionCaching(t *testing.T) {
+ ctx := context.Background()
+ timestamp := time.Now()
+
+ // Multiple rows with same IP
+ input := `ip,responsecode,count
+127.0.0.1,200,100
+127.0.0.1,404,50
+127.0.0.1,500,5`
+
+ parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"})
+ reader := strings.NewReader(input)
+ samples, err := parser.Parse(ctx, reader)
+
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ // Should have 3 samples
+ if len(samples) != 3 {
+ t.Fatalf("Expected 3 samples, got %d", len(samples))
+ }
+
+ // All should have same resolved IP (cached result)
+ firstIP := samples[0].Labels["ip"]
+ for i, sample := range samples {
+ if sample.Labels["ip"] != firstIP {
+ t.Errorf("Sample %d has different IP label: %s vs %s (caching issue)", i, sample.Labels["ip"], firstIP)
+ }
+ }
+
+ // Check cache size
+ if parser.resolver.GetCacheSize() != 1 {
+ t.Errorf("Expected cache size 1 (one unique IP), got %d", parser.resolver.GetCacheSize())
+ }
+}