diff options
| author | Paul Buetow <paul@buetow.org> | 2026-04-11 09:30:28 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-04-11 09:30:28 +0300 |
| commit | a5b9ca23e2e0e08a5908a723e93fe89685afb28b (patch) | |
| tree | 785f402a163705fcff7151363ce57dcdeb8c0ba8 | |
| parent | 3f867dc6a821f85dc54f230b5f6a6c7e933f54a8 (diff) | |
b0: add tests and docs for percentage/percentile aggregations
| -rw-r--r-- | doc/querylanguage.md | 2 | ||||
| -rw-r--r-- | internal/mapr/groupset_percentage_test.go | 80 |
2 files changed, 77 insertions, 5 deletions
diff --git a/doc/querylanguage.md b/doc/querylanguage.md index fab387b..3d134e3 100644 --- a/doc/querylanguage.md +++ b/doc/querylanguage.md @@ -61,3 +61,5 @@ FUNCTION := md5sum|maskdigits * `rorder` stands for reverse order. * `lacks` is an alias for `ncontains` (not contains). * Available fields (variables and barewords) vary from the log format used. Check out the [log format](./logformats.md) documentation for more information. +* `percentage(field)` returns the selected group's share of the total for that field across all groups, expressed as a value between 0 and 100. +* `percentile(field)` returns the percentile rank of the selected group's value among all grouped values for that field, also expressed as a value between 0 and 100. Equal values share the same rank. diff --git a/internal/mapr/groupset_percentage_test.go b/internal/mapr/groupset_percentage_test.go index 94a9fc8..1273859 100644 --- a/internal/mapr/groupset_percentage_test.go +++ b/internal/mapr/groupset_percentage_test.go @@ -29,12 +29,20 @@ func TestGroupSetResultPercentageAndPercentile(t *testing.T) { t.Fatalf("Unable to aggregate percentile for host-b: %v", err) } + setC := groupSet.GetSet("host-c") + if err := setC.Aggregate("percentage(value)", Percentage, "20", false); err != nil { + t.Fatalf("Unable to aggregate percentage for host-c: %v", err) + } + if err := setC.Aggregate("percentile(value)", Percentile, "20", false); err != nil { + t.Fatalf("Unable to aggregate percentile for host-c: %v", err) + } + rows, _, err := groupSet.result(query, false) if err != nil { t.Fatalf("Unable to build result rows: %v", err) } - if len(rows) != 2 { - t.Fatalf("Expected 2 result rows, got %d", len(rows)) + if len(rows) != 3 { + t.Fatalf("Expected 3 result rows, got %d", len(rows)) } if rows[0].groupKey != "host-b" { @@ -54,10 +62,72 @@ func TestGroupSetResultPercentageAndPercentile(t *testing.T) { valuesByGroup[row.groupKey] = parsedValues } - assertAlmostEqual(t, valuesByGroup["host-a"][0], 25.0, 0.0001, "host-a percentage") - assertAlmostEqual(t, valuesByGroup["host-a"][1], 50.0, 0.0001, "host-a percentile") - assertAlmostEqual(t, valuesByGroup["host-b"][0], 75.0, 0.0001, "host-b percentage") + assertAlmostEqual(t, valuesByGroup["host-a"][0], 16.6666666667, 0.0001, "host-a percentage") + assertAlmostEqual(t, valuesByGroup["host-a"][1], 33.3333333333, 0.0001, "host-a percentile") + assertAlmostEqual(t, valuesByGroup["host-b"][0], 50.0, 0.0001, "host-b percentage") assertAlmostEqual(t, valuesByGroup["host-b"][1], 100.0, 0.0001, "host-b percentile") + assertAlmostEqual(t, valuesByGroup["host-c"][0], 33.3333333333, 0.0001, "host-c percentage") + assertAlmostEqual(t, valuesByGroup["host-c"][1], 66.6666666667, 0.0001, "host-c percentile") +} + +func TestGroupSetPercentageReturnsZeroWhenTotalIsZero(t *testing.T) { + query, err := NewQuery("select percentage(value) from stats group by host") + if err != nil { + t.Fatalf("Unable to parse query: %v", err) + } + + groupSet := NewGroupSet() + for _, host := range []string{"host-a", "host-b"} { + set := groupSet.GetSet(host) + if err := set.Aggregate("percentage(value)", Percentage, "0", false); err != nil { + t.Fatalf("Unable to aggregate percentage for %s: %v", host, err) + } + } + + rows, _, err := groupSet.result(query, false) + if err != nil { + t.Fatalf("Unable to build result rows: %v", err) + } + if len(rows) != 2 { + t.Fatalf("Expected 2 result rows, got %d", len(rows)) + } + for _, row := range rows { + if len(row.values) != 1 { + t.Fatalf("Expected one result value, got %d for %s", len(row.values), row.groupKey) + } + value, err := strconv.ParseFloat(row.values[0], 64) + if err != nil { + t.Fatalf("Unable to parse percentage result %q: %v", row.values[0], err) + } + assertAlmostEqual(t, value, 0.0, 0.0001, row.groupKey+" percentage") + } +} + +func TestPercentileRank(t *testing.T) { + sortedValues := []float64{10, 20, 30} + + tests := []struct { + name string + value float64 + expected float64 + }{ + {name: "below minimum", value: 5, expected: 0}, + {name: "first bucket", value: 10, expected: 33.3333333333}, + {name: "middle bucket", value: 20, expected: 66.6666666667}, + {name: "maximum", value: 30, expected: 100}, + {name: "above maximum", value: 40, expected: 100}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assertAlmostEqual(t, percentileRank(tt.value, sortedValues), tt.expected, 0.0001, tt.name) + }) + } + + assertAlmostEqual(t, percentileRank(10, []float64{10, 10, 30}), 66.6666666667, 0.0001, "duplicate percentile rank") + if got := percentileRank(10, nil); got != 0 { + t.Fatalf("Expected empty percentile input to return 0, got %f", got) + } } func assertAlmostEqual(t *testing.T, got, expected, tolerance float64, label string) { |
