package parser import ( "context" "fmt" "strings" "testing" "time" "epimetheus/internal/metrics" ) func TestTabularCSVParser_Parse(t *testing.T) { tests := []struct { name string input string metricName string expectedCount int wantErr bool }{ { name: "simple tabular CSV with numeric and text columns", input: `responsecode,httpmethod,user,totaltime 200,GET,alice,50.5 404,POST,bob,100.2 500,GET,charlie,75.0`, metricName: "test_metric", expectedCount: 6, // 3 rows * 2 numeric columns (responsecode, totaltime) wantErr: false, }, { name: "CSV with mixed data types", input: `col1,col2,col3 1,text,3.14 2,more,2.71 3,data,1.41`, metricName: "mixed_metric", expectedCount: 6, // 3 rows * 2 numeric columns (col1, col3) wantErr: false, }, { name: "CSV with whitespace", input: ` col1 , col2 , col3 1 , value2 , 3 4 , value5 , 6 `, metricName: "whitespace_metric", expectedCount: 4, // 2 rows * 2 numeric columns (col1, col3) wantErr: false, }, { name: "CSV with comments", input: `# This is a comment col1,col2,col3 # Another comment 1,value2,3`, metricName: "comment_metric", expectedCount: 2, // 1 row * 2 numeric columns (col1, col3) wantErr: false, }, { name: "empty CSV", input: "", metricName: "empty_metric", expectedCount: 0, wantErr: true, // No header row }, { name: "header only", input: `col1,col2,col3 `, metricName: "header_only", expectedCount: 0, wantErr: false, }, { name: "mismatched columns - skipped", input: `col1,col2,col3 1,value2,3 value4,value5 6,value7,8`, metricName: "mismatched_metric", expectedCount: 4, // 2 matching rows * 2 numeric columns wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() timestamp := time.Now() parser := NewTabularCSVParser(tt.metricName, timestamp, []string{}) // No DNS resolution reader := strings.NewReader(tt.input) samples, err := parser.Parse(ctx, reader) if (err != nil) != tt.wantErr { t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr) return } if err == nil && len(samples) != tt.expectedCount { t.Errorf("Parse() got %d samples, want %d", len(samples), tt.expectedCount) } // Verify all samples have the correct base metric name and timestamp for _, sample := range samples { if !strings.HasPrefix(sample.MetricName, tt.metricName+"_") { t.Errorf("Sample metric name = %s, want prefix %s_", sample.MetricName, tt.metricName) } if !sample.Timestamp.Equal(timestamp) { t.Errorf("Sample timestamp = %v, want %v", sample.Timestamp, timestamp) } } }) } } func TestTabularCSVParser_Labels(t *testing.T) { ctx := context.Background() timestamp := time.Now() input := `responsecode,httpmethod,user,totaltime 200,GET,alice,50.5 404,POST,bob,100.2` parser := NewTabularCSVParser("test_metric", timestamp, []string{}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } // 2 rows * 2 numeric columns (responsecode, totaltime) = 4 samples if len(samples) != 4 { t.Fatalf("Expected 4 samples, got %d", len(samples)) } // Find the responsecode and totaltime metrics for first row var responsecodeMetric, totaltimeMetric *metrics.Sample for i := range samples { if samples[i].Labels["httpmethod"] == "GET" && samples[i].Labels["user"] == "alice" { if strings.HasSuffix(samples[i].MetricName, "_responsecode") { responsecodeMetric = &samples[i] } else if strings.HasSuffix(samples[i].MetricName, "_totaltime") { totaltimeMetric = &samples[i] } } } if responsecodeMetric == nil || totaltimeMetric == nil { t.Fatalf("Could not find expected metrics") } // Check responsecode metric if responsecodeMetric.Value != 200 { t.Errorf("responsecode value = %f, want 200", responsecodeMetric.Value) } expectedLabels := map[string]string{ "httpmethod": "GET", "user": "alice", } for key, expectedValue := range expectedLabels { if actualValue, ok := responsecodeMetric.Labels[key]; !ok { t.Errorf("responsecode metric missing label %s", key) } else if actualValue != expectedValue { t.Errorf("responsecode metric label %s = %s, want %s", key, actualValue, expectedValue) } } // Check totaltime metric if totaltimeMetric.Value != 50.5 { t.Errorf("totaltime value = %f, want 50.5", totaltimeMetric.Value) } for key, expectedValue := range expectedLabels { if actualValue, ok := totaltimeMetric.Labels[key]; !ok { t.Errorf("totaltime metric missing label %s", key) } else if actualValue != expectedValue { t.Errorf("totaltime metric label %s = %s, want %s", key, actualValue, expectedValue) } } } func TestTabularCSVParser_ContextCancellation(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() // Cancel immediately timestamp := time.Now() input := `col1,col2,col3 value1,value2,value3 value4,value5,value6` parser := NewTabularCSVParser("test_metric", timestamp, []string{}) reader := strings.NewReader(input) _, err := parser.Parse(ctx, reader) if err != context.Canceled { t.Errorf("Expected context.Canceled error, got %v", err) } } func TestTabularCSVParser_LargeFile(t *testing.T) { ctx := context.Background() timestamp := time.Now() // Generate a CSV with 1000 rows, 1 numeric column var builder strings.Builder builder.WriteString("col1,col2,col3\n") for i := 0; i < 1000; i++ { builder.WriteString(fmt.Sprintf("%d,value2,value3\n", i)) } parser := NewTabularCSVParser("large_metric", timestamp, []string{}) reader := strings.NewReader(builder.String()) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } // 1000 rows * 1 numeric column = 1000 samples if len(samples) != 1000 { t.Errorf("Expected 1000 samples, got %d", len(samples)) } } func TestSanitizeLabelName(t *testing.T) { tests := []struct { input string expected string }{ {"avg(totaltime)", "avg_totaltime"}, {"sum(rcv)", "sum_rcv"}, {"response-code", "response_code"}, {"valid_label", "valid_label"}, {"ValidLabel123", "ValidLabel123"}, {"123invalid", "label_123invalid"}, {"label__with___underscores", "label_with_underscores"}, {"_leading", "leading"}, {"trailing_", "trailing"}, {"special!@#$chars", "special_chars"}, {"", "unknown"}, {"___", "unknown"}, {"http.method", "http_method"}, {"status_code", "status_code"}, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { result := sanitizeLabelName(tt.input) if result != tt.expected { t.Errorf("sanitizeLabelName(%q) = %q, want %q", tt.input, result, tt.expected) } }) } } func TestTabularCSVParser_WithSpecialCharacterHeaders(t *testing.T) { ctx := context.Background() timestamp := time.Now() input := `avg(totaltime),sum(rcv),response-code,http.method 50.5,1102,200,GET 100.2,2204,404,POST` parser := NewTabularCSVParser("special_metric", timestamp, []string{}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } // 2 rows * 3 numeric columns (avg(totaltime), sum(rcv), response-code) = 6 samples if len(samples) != 6 { t.Fatalf("Expected 6 samples, got %d", len(samples)) } // Check that metric names are sanitized expectedMetrics := []string{"special_metric_avg_totaltime", "special_metric_sum_rcv", "special_metric_response_code"} foundMetrics := make(map[string]bool) for _, sample := range samples { foundMetrics[sample.MetricName] = true } for _, expected := range expectedMetrics { if !foundMetrics[expected] { t.Errorf("Missing expected metric %s", expected) } } // Check that string column becomes label (http.method -> http_method) for _, sample := range samples { if _, ok := sample.Labels["http_method"]; !ok { t.Errorf("Sample missing sanitized label http_method") } } // Check values are correct for first row for _, sample := range samples { if sample.Labels["http_method"] == "GET" { if strings.HasSuffix(sample.MetricName, "_avg_totaltime") { if sample.Value != 50.5 { t.Errorf("avg_totaltime value = %f, want 50.5", sample.Value) } } else if strings.HasSuffix(sample.MetricName, "_sum_rcv") { if sample.Value != 1102 { t.Errorf("sum_rcv value = %f, want 1102", sample.Value) } } else if strings.HasSuffix(sample.MetricName, "_response_code") { if sample.Value != 200 { t.Errorf("response_code value = %f, want 200", sample.Value) } } } } } func TestTabularCSVParser_DNSResolution(t *testing.T) { ctx := context.Background() timestamp := time.Now() // Test with localhost IP which should resolve on most systems input := `ip,responsecode,count 127.0.0.1,200,100` parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } // Should have 1 sample (count metric) if len(samples) != 1 { t.Fatalf("Expected 1 sample, got %d", len(samples)) } // The ip label should either be resolved to a hostname or remain as IP if DNS failed ipLabel := samples[0].Labels["ip"] if ipLabel == "" { t.Error("ip label is empty") } // If resolution succeeded, it should not be the original IP // If it failed, it should still be the IP t.Logf("IP label value: %s (original: 127.0.0.1)", ipLabel) } func TestTabularCSVParser_DNSResolutionDisabled(t *testing.T) { ctx := context.Background() timestamp := time.Now() input := `ip,responsecode,count 192.168.1.1,200,100` // Pass empty slice - no DNS resolution parser := NewTabularCSVParser("test_metric", timestamp, []string{}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } if len(samples) != 1 { t.Fatalf("Expected 1 sample, got %d", len(samples)) } // IP should remain unchanged ipLabel := samples[0].Labels["ip"] if ipLabel != "192.168.1.1" { t.Errorf("IP label = %s, want 192.168.1.1 (DNS resolution should be disabled)", ipLabel) } } func TestTabularCSVParser_DNSResolutionMultipleLabels(t *testing.T) { ctx := context.Background() timestamp := time.Now() input := `source_ip,dest_ip,count 127.0.0.1,192.168.1.1,100` // Resolve both source_ip and dest_ip parser := NewTabularCSVParser("test_metric", timestamp, []string{"source_ip", "dest_ip"}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } if len(samples) != 1 { t.Fatalf("Expected 1 sample, got %d", len(samples)) } // Both labels should be present if _, ok := samples[0].Labels["source_ip"]; !ok { t.Error("source_ip label missing") } if _, ok := samples[0].Labels["dest_ip"]; !ok { t.Error("dest_ip label missing") } t.Logf("source_ip: %s", samples[0].Labels["source_ip"]) t.Logf("dest_ip: %s", samples[0].Labels["dest_ip"]) } func TestTabularCSVParser_DNSResolutionNonIPValue(t *testing.T) { ctx := context.Background() timestamp := time.Now() // ip column contains non-IP value input := `ip,responsecode,count not-an-ip,200,100` parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } if len(samples) != 1 { t.Fatalf("Expected 1 sample, got %d", len(samples)) } // Non-IP value should remain unchanged ipLabel := samples[0].Labels["ip"] if ipLabel != "not-an-ip" { t.Errorf("IP label = %s, want not-an-ip (should not resolve non-IP values)", ipLabel) } } func TestTabularCSVParser_DNSResolutionCaching(t *testing.T) { ctx := context.Background() timestamp := time.Now() // Multiple rows with same IP input := `ip,responsecode,count 127.0.0.1,200,100 127.0.0.1,404,50 127.0.0.1,500,5` parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"}) reader := strings.NewReader(input) samples, err := parser.Parse(ctx, reader) if err != nil { t.Fatalf("Parse() error = %v", err) } // Should have 3 samples if len(samples) != 3 { t.Fatalf("Expected 3 samples, got %d", len(samples)) } // All should have same resolved IP (cached result) firstIP := samples[0].Labels["ip"] for i, sample := range samples { if sample.Labels["ip"] != firstIP { t.Errorf("Sample %d has different IP label: %s vs %s (caching issue)", i, sample.Labels["ip"], firstIP) } } // Check cache size if parser.resolver.GetCacheSize() != 1 { t.Errorf("Expected cache size 1 (one unique IP), got %d", parser.resolver.GetCacheSize()) } }