1 files changed, 469 insertions, 0 deletions
diff --git a/internal/parser/tabular_csv_test.go b/internal/parser/tabular_csv_test.go
new file mode 100644
index 0000000..07818e8
--- /dev/null
+++ b/internal/parser/tabular_csv_test.go
@@ -0,0 +1,469 @@
+package parser
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	"epimetheus/internal/metrics"
+)
+
+func TestTabularCSVParser_Parse(t *testing.T) {
+	tests := []struct {
+		name          string
+		input         string
+		metricName    string
+		expectedCount int
+		wantErr       bool
+	}{
+		{
+			name: "simple tabular CSV with numeric and text columns",
+			input: `responsecode,httpmethod,user,totaltime
+200,GET,alice,50.5
+404,POST,bob,100.2
+500,GET,charlie,75.0`,
+			metricName:    "test_metric",
+			expectedCount: 6, // 3 rows * 2 numeric columns (responsecode, totaltime)
+			wantErr:       false,
+		},
+		{
+			name: "CSV with mixed data types",
+			input: `col1,col2,col3
+1,text,3.14
+2,more,2.71
+3,data,1.41`,
+			metricName:    "mixed_metric",
+			expectedCount: 6, // 3 rows * 2 numeric columns (col1, col3)
+			wantErr:       false,
+		},
+		{
+			name: "CSV with whitespace",
+			input: `  col1  ,  col2  ,  col3  
+  1  ,  value2  ,  3  
+  4  ,  value5  ,  6  `,
+			metricName:    "whitespace_metric",
+			expectedCount: 4, // 2 rows * 2 numeric columns (col1, col3)
+			wantErr:       false,
+		},
+		{
+			name: "CSV with comments",
+			input: `# This is a comment
+col1,col2,col3
+# Another comment
+1,value2,3`,
+			metricName:    "comment_metric",
+			expectedCount: 2, // 1 row * 2 numeric columns (col1, col3)
+			wantErr:       false,
+		},
+		{
+			name:          "empty CSV",
+			input:         "",
+			metricName:    "empty_metric",
+			expectedCount: 0,
+			wantErr:       true, // No header row
+		},
+		{
+			name: "header only",
+			input: `col1,col2,col3
+`,
+			metricName:    "header_only",
+			expectedCount: 0,
+			wantErr:       false,
+		},
+		{
+			name: "mismatched columns - skipped",
+			input: `col1,col2,col3
+1,value2,3
+value4,value5
+6,value7,8`,
+			metricName:    "mismatched_metric",
+			expectedCount: 4, // 2 matching rows * 2 numeric columns
+			wantErr:       false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctx := context.Background()
+			timestamp := time.Now()
+			parser := NewTabularCSVParser(tt.metricName, timestamp, []string{}) // No DNS resolution
+
+			reader := strings.NewReader(tt.input)
+			samples, err := parser.Parse(ctx, reader)
+
+			if (err != nil) != tt.wantErr {
+				t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+
+			if err == nil && len(samples) != tt.expectedCount {
+				t.Errorf("Parse() got %d samples, want %d", len(samples), tt.expectedCount)
+			}
+
+			// Verify all samples have the correct base metric name and timestamp
+			for _, sample := range samples {
+				if !strings.HasPrefix(sample.MetricName, tt.metricName+"_") {
+					t.Errorf("Sample metric name = %s, want prefix %s_", sample.MetricName, tt.metricName)
+				}
+				if !sample.Timestamp.Equal(timestamp) {
+					t.Errorf("Sample timestamp = %v, want %v", sample.Timestamp, timestamp)
+				}
+			}
+		})
+	}
+}
+
+func TestTabularCSVParser_Labels(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	input := `responsecode,httpmethod,user,totaltime
+200,GET,alice,50.5
+404,POST,bob,100.2`
+
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	// 2 rows * 2 numeric columns (responsecode, totaltime) = 4 samples
+	if len(samples) != 4 {
+		t.Fatalf("Expected 4 samples, got %d", len(samples))
+	}
+
+	// Find the responsecode and totaltime metrics for first row
+	var responsecodeMetric, totaltimeMetric *metrics.Sample
+	for i := range samples {
+		if samples[i].Labels["httpmethod"] == "GET" && samples[i].Labels["user"] == "alice" {
+			if strings.HasSuffix(samples[i].MetricName, "_responsecode") {
+				responsecodeMetric = &samples[i]
+			} else if strings.HasSuffix(samples[i].MetricName, "_totaltime") {
+				totaltimeMetric = &samples[i]
+			}
+		}
+	}
+
+	if responsecodeMetric == nil || totaltimeMetric == nil {
+		t.Fatalf("Could not find expected metrics")
+	}
+
+	// Check responsecode metric
+	if responsecodeMetric.Value != 200 {
+		t.Errorf("responsecode value = %f, want 200", responsecodeMetric.Value)
+	}
+	expectedLabels := map[string]string{
+		"httpmethod": "GET",
+		"user":       "alice",
+	}
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := responsecodeMetric.Labels[key]; !ok {
+			t.Errorf("responsecode metric missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("responsecode metric label %s = %s, want %s", key, actualValue, expectedValue)
+		}
+	}
+
+	// Check totaltime metric
+	if totaltimeMetric.Value != 50.5 {
+		t.Errorf("totaltime value = %f, want 50.5", totaltimeMetric.Value)
+	}
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := totaltimeMetric.Labels[key]; !ok {
+			t.Errorf("totaltime metric missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("totaltime metric label %s = %s, want %s", key, actualValue, expectedValue)
+		}
+	}
+}
+
+func TestTabularCSVParser_ContextCancellation(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // Cancel immediately
+
+	timestamp := time.Now()
+	input := `col1,col2,col3
+value1,value2,value3
+value4,value5,value6`
+
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{})
+	reader := strings.NewReader(input)
+	_, err := parser.Parse(ctx, reader)
+
+	if err != context.Canceled {
+		t.Errorf("Expected context.Canceled error, got %v", err)
+	}
+}
+
+func TestTabularCSVParser_LargeFile(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+
+	// Generate a CSV with 1000 rows, 1 numeric column
+	var builder strings.Builder
+	builder.WriteString("col1,col2,col3\n")
+	for i := 0; i < 1000; i++ {
+		builder.WriteString(fmt.Sprintf("%d,value2,value3\n", i))
+	}
+
+	parser := NewTabularCSVParser("large_metric", timestamp, []string{})
+	reader := strings.NewReader(builder.String())
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	// 1000 rows * 1 numeric column = 1000 samples
+	if len(samples) != 1000 {
+		t.Errorf("Expected 1000 samples, got %d", len(samples))
+	}
+}
+
+func TestSanitizeLabelName(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"avg(totaltime)", "avg_totaltime"},
+		{"sum(rcv)", "sum_rcv"},
+		{"response-code", "response_code"},
+		{"valid_label", "valid_label"},
+		{"ValidLabel123", "ValidLabel123"},
+		{"123invalid", "label_123invalid"},
+		{"label__with___underscores", "label_with_underscores"},
+		{"_leading", "leading"},
+		{"trailing_", "trailing"},
+		{"special!@#$chars", "special_chars"},
+		{"", "unknown"},
+		{"___", "unknown"},
+		{"http.method", "http_method"},
+		{"status_code", "status_code"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			result := sanitizeLabelName(tt.input)
+			if result != tt.expected {
+				t.Errorf("sanitizeLabelName(%q) = %q, want %q", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestTabularCSVParser_WithSpecialCharacterHeaders(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	input := `avg(totaltime),sum(rcv),response-code,http.method
+50.5,1102,200,GET
+100.2,2204,404,POST`
+
+	parser := NewTabularCSVParser("special_metric", timestamp, []string{})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	// 2 rows * 3 numeric columns (avg(totaltime), sum(rcv), response-code) = 6 samples
+	if len(samples) != 6 {
+		t.Fatalf("Expected 6 samples, got %d", len(samples))
+	}
+
+	// Check that metric names are sanitized
+	expectedMetrics := []string{"special_metric_avg_totaltime", "special_metric_sum_rcv", "special_metric_response_code"}
+	foundMetrics := make(map[string]bool)
+	for _, sample := range samples {
+		foundMetrics[sample.MetricName] = true
+	}
+	for _, expected := range expectedMetrics {
+		if !foundMetrics[expected] {
+			t.Errorf("Missing expected metric %s", expected)
+		}
+	}
+
+	// Check that string column becomes label (http.method -> http_method)
+	for _, sample := range samples {
+		if _, ok := sample.Labels["http_method"]; !ok {
+			t.Errorf("Sample missing sanitized label http_method")
+		}
+	}
+
+	// Check values are correct for first row
+	for _, sample := range samples {
+		if sample.Labels["http_method"] == "GET" {
+			if strings.HasSuffix(sample.MetricName, "_avg_totaltime") {
+				if sample.Value != 50.5 {
+					t.Errorf("avg_totaltime value = %f, want 50.5", sample.Value)
+				}
+			} else if strings.HasSuffix(sample.MetricName, "_sum_rcv") {
+				if sample.Value != 1102 {
+					t.Errorf("sum_rcv value = %f, want 1102", sample.Value)
+				}
+			} else if strings.HasSuffix(sample.MetricName, "_response_code") {
+				if sample.Value != 200 {
+					t.Errorf("response_code value = %f, want 200", sample.Value)
+				}
+			}
+		}
+	}
+}
+
+func TestTabularCSVParser_DNSResolution(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	
+	// Test with localhost IP which should resolve on most systems
+	input := `ip,responsecode,count
+127.0.0.1,200,100`
+
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	// Should have 1 sample (count metric)
+	if len(samples) != 1 {
+		t.Fatalf("Expected 1 sample, got %d", len(samples))
+	}
+
+	// The ip label should either be resolved to a hostname or remain as IP if DNS failed
+	ipLabel := samples[0].Labels["ip"]
+	if ipLabel == "" {
+		t.Error("ip label is empty")
+	}
+	
+	// If resolution succeeded, it should not be the original IP
+	// If it failed, it should still be the IP
+	t.Logf("IP label value: %s (original: 127.0.0.1)", ipLabel)
+}
+
+func TestTabularCSVParser_DNSResolutionDisabled(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	
+	input := `ip,responsecode,count
+192.168.1.1,200,100`
+
+	// Pass empty slice - no DNS resolution
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	if len(samples) != 1 {
+		t.Fatalf("Expected 1 sample, got %d", len(samples))
+	}
+
+	// IP should remain unchanged
+	ipLabel := samples[0].Labels["ip"]
+	if ipLabel != "192.168.1.1" {
+		t.Errorf("IP label = %s, want 192.168.1.1 (DNS resolution should be disabled)", ipLabel)
+	}
+}
+
+func TestTabularCSVParser_DNSResolutionMultipleLabels(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	
+	input := `source_ip,dest_ip,count
+127.0.0.1,192.168.1.1,100`
+
+	// Resolve both source_ip and dest_ip
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{"source_ip", "dest_ip"})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	if len(samples) != 1 {
+		t.Fatalf("Expected 1 sample, got %d", len(samples))
+	}
+
+	// Both labels should be present
+	if _, ok := samples[0].Labels["source_ip"]; !ok {
+		t.Error("source_ip label missing")
+	}
+	if _, ok := samples[0].Labels["dest_ip"]; !ok {
+		t.Error("dest_ip label missing")
+	}
+	
+	t.Logf("source_ip: %s", samples[0].Labels["source_ip"])
+	t.Logf("dest_ip: %s", samples[0].Labels["dest_ip"])
+}
+
+func TestTabularCSVParser_DNSResolutionNonIPValue(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	
+	// ip column contains non-IP value
+	input := `ip,responsecode,count
+not-an-ip,200,100`
+
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	if len(samples) != 1 {
+		t.Fatalf("Expected 1 sample, got %d", len(samples))
+	}
+
+	// Non-IP value should remain unchanged
+	ipLabel := samples[0].Labels["ip"]
+	if ipLabel != "not-an-ip" {
+		t.Errorf("IP label = %s, want not-an-ip (should not resolve non-IP values)", ipLabel)
+	}
+}
+
+func TestTabularCSVParser_DNSResolutionCaching(t *testing.T) {
+	ctx := context.Background()
+	timestamp := time.Now()
+	
+	// Multiple rows with same IP
+	input := `ip,responsecode,count
+127.0.0.1,200,100
+127.0.0.1,404,50
+127.0.0.1,500,5`
+
+	parser := NewTabularCSVParser("test_metric", timestamp, []string{"ip"})
+	reader := strings.NewReader(input)
+	samples, err := parser.Parse(ctx, reader)
+
+	if err != nil {
+		t.Fatalf("Parse() error = %v", err)
+	}
+
+	// Should have 3 samples
+	if len(samples) != 3 {
+		t.Fatalf("Expected 3 samples, got %d", len(samples))
+	}
+
+	// All should have same resolved IP (cached result)
+	firstIP := samples[0].Labels["ip"]
+	for i, sample := range samples {
+		if sample.Labels["ip"] != firstIP {
+			t.Errorf("Sample %d has different IP label: %s vs %s (caching issue)", i, sample.Labels["ip"], firstIP)
+		}
+	}
+	
+	// Check cache size
+	if parser.resolver.GetCacheSize() != 1 {
+		t.Errorf("Expected cache size 1 (one unique IP), got %d", parser.resolver.GetCacheSize())
+	}
+}