summaryrefslogtreecommitdiff
path: root/internal/prometheus.go
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-01-08 21:41:29 +0200
committerPaul Buetow <paul@buetow.org>2026-01-08 21:41:29 +0200
commita10cbd4e27d944464cec88aaf49d8b8c354d26e1 (patch)
tree0bdd0a23fbb8939c15544b857c74101cb5721a6c /internal/prometheus.go
parentf5cffe240c44045684d4f74981235b060828550e (diff)
Add Prometheus alert scraping with configurable timeout and host failover
Diffstat (limited to 'internal/prometheus.go')
-rw-r--r--internal/prometheus.go131
1 files changed, 131 insertions, 0 deletions
diff --git a/internal/prometheus.go b/internal/prometheus.go
new file mode 100644
index 0000000..1c06aaf
--- /dev/null
+++ b/internal/prometheus.go
@@ -0,0 +1,131 @@
+package internal
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "time"
+)
+
+type prometheusResponse struct {
+ Status string `json:"status"`
+ Data struct {
+ Alerts []prometheusAlert `json:"alerts"`
+ } `json:"data"`
+}
+
+type prometheusAlert struct {
+ Labels map[string]string `json:"labels"`
+ Annotations map[string]string `json:"annotations"`
+ State string `json:"state"`
+}
+
+func mergePrometheusAlerts(ctx context.Context, state state, conf config) state {
+ if len(conf.PrometheusHosts) == 0 {
+ return state
+ }
+
+ timeout := time.Duration(conf.PrometheusTimeoutS) * time.Second
+ alerts, host, err := fetchPrometheusAlerts(ctx, conf.PrometheusHosts, timeout)
+ if err != nil {
+ log.Printf("Failed to fetch Prometheus alerts from any host: %v", err)
+ cs := checkResult{
+ name: "Prometheus alerts",
+ output: fmt.Sprintf("CRITICAL: %v", err),
+ epoch: time.Now().Unix(),
+ status: nagiosCritical,
+ }
+ state.update(cs)
+ return state
+ }
+
+ log.Printf("Fetched %d firing alerts from Prometheus host %s", len(alerts), host)
+
+ for _, alert := range alerts {
+ if alert.State != "firing" {
+ continue
+ }
+
+ name := fmt.Sprintf("Prometheus: %s", alert.Labels["alertname"])
+ severity := alert.Labels["severity"]
+ description := alert.Annotations["summary"]
+ if description == "" {
+ description = alert.Annotations["description"]
+ }
+ if description == "" {
+ description = "no description"
+ }
+
+ status := nagiosWarning
+ if severity == "critical" {
+ status = nagiosCritical
+ }
+
+ cs := checkResult{
+ name: name,
+ output: fmt.Sprintf("%s [%s]: %s", alert.Labels["alertname"], severity, description),
+ epoch: time.Now().Unix(),
+ status: status,
+ }
+ state.update(cs)
+ }
+
+ return state
+}
+
+func fetchPrometheusAlerts(ctx context.Context, hosts []string, timeout time.Duration) ([]prometheusAlert, string, error) {
+ var lastErr error
+
+ for _, host := range hosts {
+ alerts, err := fetchFromHost(ctx, host, timeout)
+ if err != nil {
+ log.Printf("Failed to fetch from Prometheus host %s: %v", host, err)
+ lastErr = err
+ continue
+ }
+ return alerts, host, nil
+ }
+
+ return nil, "", fmt.Errorf("all Prometheus hosts failed, last error: %w", lastErr)
+}
+
+func fetchFromHost(ctx context.Context, host string, timeout time.Duration) ([]prometheusAlert, error) {
+ url := fmt.Sprintf("http://%s/api/v1/alerts", host)
+
+ ctx, cancel := context.WithTimeout(ctx, timeout)
+ defer cancel()
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create request: %w", err)
+ }
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("request failed: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+ }
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read response: %w", err)
+ }
+
+ var promResp prometheusResponse
+ if err := json.Unmarshal(body, &promResp); err != nil {
+ return nil, fmt.Errorf("failed to parse response: %w", err)
+ }
+
+ if promResp.Status != "success" {
+ return nil, fmt.Errorf("prometheus returned status: %s", promResp.Status)
+ }
+
+ return promResp.Data.Alerts, nil
+}