diff options
Diffstat (limited to 'internal/parser/tabular_csv_test.go')
| -rw-r--r-- | internal/parser/tabular_csv_test.go | 469 |
1 files changed, 469 insertions, 0 deletions
diff --git a/internal/parser/tabular_csv_test.go b/internal/parser/tabular_csv_test.go new file mode 100644 index 0000000..07818e8 --- /dev/null +++ b/internal/parser/tabular_csv_test.go @@ -0,0 +1,469 @@ +package parser + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "epimetheus/internal/metrics" +) + +func TestTabularCSVParser_Parse(t *testing.T) { + tests := []struct { + name string + input string + metricName string + expectedCount int + wantErr bool + }{ + { + name: "simple tabular CSV with numeric and text columns", + input: `responsecode,httpmethod,user,totaltime +200,GET,alice,50.5 +404,POST,bob,100.2 +500,GET,charlie,75.0`, + metricName: "test_metric", + expectedCount: 6, // 3 rows * 2 numeric columns (responsecode, totaltime) + wantErr: false, + }, + { + name: "CSV with mixed data types", + input: `col1,col2,col3 +1,text,3.14 +2,more,2.71 +3,data,1.41`, + metricName: "mixed_metric", + expectedCount: 6, // 3 rows * 2 numeric columns (col1, col3) + wantErr: false, + }, + { + name: "CSV with whitespace", + input: ` col1 , col2 , col3 + 1 , value2 , 3 + 4 , value5 , 6 `, + metricName: "whitespace_metric", + expectedCount: 4, // 2 rows * 2 numeric columns (col1, col3) + wantErr: false, + }, + { + name: "CSV with comments", + input: `# This is a comment +col1,col2,col3 +# Another comment +1,value2,3`, + metricName: "comment_metric", + expectedCount: 2, // 1 row * 2 numeric columns (col1, col3) + wantErr: false, + }, + { + name: "empty CSV", + input: "", + metricName: "empty_metric", + expectedCount: 0, + wantErr: true, // No header row + }, + { + name: "header only", + input: `col1,col2,col3 +`, + metricName: "header_only", + expectedCount: 0, + wantErr: false, + }, + { + name: "mismatched columns - skipped", + input: `col1,col2,col3 +1,value2,3 +value4,value5 +6,value7,8`, + metricName: "mismatched_metric", + expectedCount: 4, // 2 matching rows * 2 numeric columns + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + parser := NewTabularCSVParser(tt.metricName, timestamp, []string{}) // No DNS resolution + + reader := strings.NewReader(tt.input) + samples, err := parser.Parse(ctx, reader) + + if (err != nil) != tt.wantErr { + t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if err == nil && len(samples) != tt.expectedCount { + t.Errorf("Parse() got %d samples, want %d", len(samples), tt.expectedCount) + } + + // Verify all samples have the correct base metric name and timestamp + for _, sample := range samples { + if !strings.HasPrefix(sample.MetricName, tt.metricName+"_") { + t.Errorf("Sample metric name = %s, want prefix %s_", sample.MetricName, tt.metricName) + } + if !sample.Timestamp.Equal(timestamp) { + t.Errorf("Sample timestamp = %v, want %v", sample.Timestamp, timestamp) + } + } + }) + } +} + +func TestTabularCSVParser_Labels(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + input := `responsecode,httpmethod,user,totaltime +200,GET,alice,50.5 +404,POST,bob,100.2` + + parser := NewTabularCSVParser("test_metric", timestamp, []string{}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // 2 rows * 2 numeric columns (responsecode, totaltime) = 4 samples + if len(samples) != 4 { + t.Fatalf("Expected 4 samples, got %d", len(samples)) + } + + // Find the responsecode and totaltime metrics for first row + var responsecodeMetric, totaltimeMetric *metrics.Sample + for i := range samples { + if samples[i].Labels["httpmethod"] == "GET" && samples[i].Labels["user"] == "alice" { + if strings.HasSuffix(samples[i].MetricName, "_responsecode") { + responsecodeMetric = &samples[i] + } else if strings.HasSuffix(samples[i].MetricName, "_totaltime") { + totaltimeMetric = &samples[i] + } + } + } + + if responsecodeMetric == nil || totaltimeMetric == nil { + t.Fatalf("Could not find expected metrics") + } + + // Check responsecode metric + if responsecodeMetric.Value != 200 { + t.Errorf("responsecode value = %f, want 200", responsecodeMetric.Value) + } + expectedLabels := map[string]string{ + "httpmethod": "GET", + "user": "alice", + } + for key, expectedValue := range expectedLabels { + if actualValue, ok := responsecodeMetric.Labels[key]; !ok { + t.Errorf("responsecode metric missing label %s", key) + } else if actualValue != expectedValue { + t.Errorf("responsecode metric label %s = %s, want %s", key, actualValue, expectedValue) + } + } + + // Check totaltime metric + if totaltimeMetric.Value != 50.5 { + t.Errorf("totaltime value = %f, want 50.5", totaltimeMetric.Value) + } + for key, expectedValue := range expectedLabels { + if actualValue, ok := totaltimeMetric.Labels[key]; !ok { + t.Errorf("totaltime metric missing label %s", key) + } else if actualValue != expectedValue { + t.Errorf("totaltime metric label %s = %s, want %s", key, actualValue, expectedValue) + } + } +} + +func TestTabularCSVParser_ContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + timestamp := time.Now() + input := `col1,col2,col3 +value1,value2,value3 +value4,value5,value6` + + parser := NewTabularCSVParser("test_metric", timestamp, []string{}) + reader := strings.NewReader(input) + _, err := parser.Parse(ctx, reader) + + if err != context.Canceled { + t.Errorf("Expected context.Canceled error, got %v", err) + } +} + +func TestTabularCSVParser_LargeFile(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + + // Generate a CSV with 1000 rows, 1 numeric column + var builder strings.Builder + builder.WriteString("col1,col2,col3\n") + for i := 0; i < 1000; i++ { + builder.WriteString(fmt.Sprintf("%d,value2,value3\n", i)) + } + + parser := NewTabularCSVParser("large_metric", timestamp, []string{}) + reader := strings.NewReader(builder.String()) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // 1000 rows * 1 numeric column = 1000 samples + if len(samples) != 1000 { + t.Errorf("Expected 1000 samples, got %d", len(samples)) + } +} + +func TestSanitizeLabelName(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"avg(totaltime)", "avg_totaltime"}, + {"sum(rcv)", "sum_rcv"}, + {"response-code", "response_code"}, + {"valid_label", "valid_label"}, + {"ValidLabel123", "ValidLabel123"}, + {"123invalid", "label_123invalid"}, + {"label__with___underscores", "label_with_underscores"}, + {"_leading", "leading"}, + {"trailing_", "trailing"}, + {"special!@#$chars", "special_chars"}, + {"", "unknown"}, + {"___", "unknown"}, + {"http.method", "http_method"}, + {"status_code", "status_code"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := sanitizeLabelName(tt.input) + if result != tt.expected { + t.Errorf("sanitizeLabelName(%q) = %q, want %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestTabularCSVParser_WithSpecialCharacterHeaders(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + input := `avg(totaltime),sum(rcv),response-code,http.method +50.5,1102,200,GET +100.2,2204,404,POST` + + parser := NewTabularCSVParser("special_metric", timestamp, []string{}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // 2 rows * 3 numeric columns (avg(totaltime), sum(rcv), response-code) = 6 samples + if len(samples) != 6 { + t.Fatalf("Expected 6 samples, got %d", len(samples)) + } + + // Check that metric names are sanitized + expectedMetrics := []string{"special_metric_avg_totaltime", "special_metric_sum_rcv", "special_metric_response_code"} + foundMetrics := make(map[string]bool) + for _, sample := range samples { + foundMetrics[sample.MetricName] = true + } + for _, expected := range expectedMetrics { + if !foundMetrics[expected] { + t.Errorf("Missing expected metric %s", expected) + } + } + + // Check that string column becomes label (http.method -> http_method) + for _, sample := range samples { + if _, ok := sample.Labels["http_method"]; !ok { + t.Errorf("Sample missing sanitized label http_method") + } + } + + // Check values are correct for first row + for _, sample := range samples { + if sample.Labels["http_method"] == "GET" { + if strings.HasSuffix(sample.MetricName, "_avg_totaltime") { + if sample.Value != 50.5 { + t.Errorf("avg_totaltime value = %f, want 50.5", sample.Value) + } + } else if strings.HasSuffix(sample.MetricName, "_sum_rcv") { + if sample.Value != 1102 { + t.Errorf("sum_rcv value = %f, want 1102", sample.Value) + } + } else if strings.HasSuffix(sample.MetricName, "_response_code") { + if sample.Value != 200 { + t.Errorf("response_code value = %f, want 200", sample.Value) + } + } + } + } +} + +func TestTabularCSVParser_DNSResolution(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + + // Test with localhost IP which should resolve on most systems + input := `ip,responsecode,count +127.0.0.1,200,100` + + parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // Should have 1 sample (count metric) + if len(samples) != 1 { + t.Fatalf("Expected 1 sample, got %d", len(samples)) + } + + // The ip label should either be resolved to a hostname or remain as IP if DNS failed + ipLabel := samples[0].Labels["ip"] + if ipLabel == "" { + t.Error("ip label is empty") + } + + // If resolution succeeded, it should not be the original IP + // If it failed, it should still be the IP + t.Logf("IP label value: %s (original: 127.0.0.1)", ipLabel) +} + +func TestTabularCSVParser_DNSResolutionDisabled(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + + input := `ip,responsecode,count +192.168.1.1,200,100` + + // Pass empty slice - no DNS resolution + parser := NewTabularCSVParser("test_metric", timestamp, []string{}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(samples) != 1 { + t.Fatalf("Expected 1 sample, got %d", len(samples)) + } + + // IP should remain unchanged + ipLabel := samples[0].Labels["ip"] + if ipLabel != "192.168.1.1" { + t.Errorf("IP label = %s, want 192.168.1.1 (DNS resolution should be disabled)", ipLabel) + } +} + +func TestTabularCSVParser_DNSResolutionMultipleLabels(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + + input := `source_ip,dest_ip,count +127.0.0.1,192.168.1.1,100` + + // Resolve both source_ip and dest_ip + parser := NewTabularCSVParser("test_metric", timestamp, []string{"source_ip", "dest_ip"}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(samples) != 1 { + t.Fatalf("Expected 1 sample, got %d", len(samples)) + } + + // Both labels should be present + if _, ok := samples[0].Labels["source_ip"]; !ok { + t.Error("source_ip label missing") + } + if _, ok := samples[0].Labels["dest_ip"]; !ok { + t.Error("dest_ip label missing") + } + + t.Logf("source_ip: %s", samples[0].Labels["source_ip"]) + t.Logf("dest_ip: %s", samples[0].Labels["dest_ip"]) +} + +func TestTabularCSVParser_DNSResolutionNonIPValue(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + + // ip column contains non-IP value + input := `ip,responsecode,count +not-an-ip,200,100` + + parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(samples) != 1 { + t.Fatalf("Expected 1 sample, got %d", len(samples)) + } + + // Non-IP value should remain unchanged + ipLabel := samples[0].Labels["ip"] + if ipLabel != "not-an-ip" { + t.Errorf("IP label = %s, want not-an-ip (should not resolve non-IP values)", ipLabel) + } +} + +func TestTabularCSVParser_DNSResolutionCaching(t *testing.T) { + ctx := context.Background() + timestamp := time.Now() + + // Multiple rows with same IP + input := `ip,responsecode,count +127.0.0.1,200,100 +127.0.0.1,404,50 +127.0.0.1,500,5` + + parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"}) + reader := strings.NewReader(input) + samples, err := parser.Parse(ctx, reader) + + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // Should have 3 samples + if len(samples) != 3 { + t.Fatalf("Expected 3 samples, got %d", len(samples)) + } + + // All should have same resolved IP (cached result) + firstIP := samples[0].Labels["ip"] + for i, sample := range samples { + if sample.Labels["ip"] != firstIP { + t.Errorf("Sample %d has different IP label: %s vs %s (caching issue)", i, sample.Labels["ip"], firstIP) + } + } + + // Check cache size + if parser.resolver.GetCacheSize() != 1 { + t.Errorf("Expected cache size 1 (one unique IP), got %d", parser.resolver.GetCacheSize()) + } +} |
