summaryrefslogtreecommitdiff
path: root/f3s/prometheus
diff options
context:
space:
mode:
Diffstat (limited to 'f3s/prometheus')
-rw-r--r--f3s/prometheus/Justfile8
-rw-r--r--f3s/prometheus/README.md50
-rw-r--r--f3s/prometheus/grafana-datasources-all.yaml77
-rw-r--r--f3s/prometheus/loki-datasource.yaml18
-rwxr-xr-xf3s/prometheus/patch-datasources.sh61
-rw-r--r--f3s/prometheus/persistence-values.yaml44
-rw-r--r--f3s/prometheus/problem.md1154
-rw-r--r--f3s/prometheus/tempo-datasource.yaml34
8 files changed, 1386 insertions, 60 deletions
diff --git a/f3s/prometheus/Justfile b/f3s/prometheus/Justfile
index 1038650..a5be5ec 100644
--- a/f3s/prometheus/Justfile
+++ b/f3s/prometheus/Justfile
@@ -1,18 +1,26 @@
install:
kubectl apply -f persistent-volumes.yaml
kubectl create secret generic additional-scrape-configs --from-file=additional-scrape-configs.yaml -n monitoring --dry-run=client -o yaml | kubectl apply -f -
+ kubectl apply -f grafana-datasources-all.yaml
helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring -f persistence-values.yaml
kubectl apply -f freebsd-recording-rules.yaml
kubectl apply -f openbsd-recording-rules.yaml
kubectl apply -f zfs-recording-rules.yaml
just -f grafana-ingress/Justfile install
+ @echo "Waiting for Grafana to be ready..."
+ @sleep 10
+ @echo "Restarting Grafana to load datasources..."
+ kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
upgrade:
kubectl create secret generic additional-scrape-configs --from-file=additional-scrape-configs.yaml -n monitoring --dry-run=client -o yaml | kubectl apply -f -
+ kubectl apply -f grafana-datasources-all.yaml
helm upgrade prometheus prometheus-community/kube-prometheus-stack --namespace monitoring -f persistence-values.yaml
kubectl apply -f freebsd-recording-rules.yaml
kubectl apply -f openbsd-recording-rules.yaml
kubectl apply -f zfs-recording-rules.yaml
+ @echo "Restarting Grafana to load datasources..."
+ kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
uninstall:
just -f grafana-ingress/Justfile delete
diff --git a/f3s/prometheus/README.md b/f3s/prometheus/README.md
index 1ac093c..9a5fb8b 100644
--- a/f3s/prometheus/README.md
+++ b/f3s/prometheus/README.md
@@ -1,41 +1,25 @@
-# Prometheus installation
+# Prometheus Stack Configuration
-This directory contains the configuration to deploy the kube-prometheus-stack, with persistent storage for Prometheus and Grafana, and a Grafana ingress.
+## Deploying
-## Prerequisites
+```bash
+just install # First time
+just upgrade # Updates
+```
-1. Create the monitoring namespace:
+**IMPORTANT**: After upgrading, Grafana will automatically restart to load new configurations.
- ```sh
- kubectl create ns monitoring
- ```
+## Datasources
-2. Add the Prometheus Helm chart repository:
+All Grafana datasources are provisioned via a single unified ConfigMap:
+- `grafana-datasources-all.yaml` - Contains Prometheus, Alertmanager, Loki, and Tempo
- ```sh
- helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
- helm repo update
- ```
+This ConfigMap is directly mounted to `/etc/grafana/provisioning/datasources/` in the Grafana pod, ensuring datasources are automatically loaded on startup.
-3. Create the directories on your NFS server:
+**Provisioned Datasources:**
+- ✅ **Prometheus** (uid=prometheus) - Default datasource for metrics
+- ✅ **Alertmanager** (uid=alertmanager) - Alert management
+- ✅ **Loki** (uid=loki) - Log aggregation
+- ✅ **Tempo** (uid=tempo) - Distributed tracing with traces-to-logs and traces-to-metrics correlation
- ```sh
- mkdir -p /data/nfs/k3svolumes/prometheus/data
- mkdir -p /data/nfs/k3svolumes/grafana/data
- ```
-
-## Automation with Justfile
-
-A `Justfile` is provided to automate the installation and uninstallation process.
-
-- To install everything, run:
-
- ```sh
- just install
- ```
-
-- To uninstall everything, run:
-
- ```sh
- just uninstall
- ```
+**Note:** The sidecar-based provisioning is disabled in favor of direct ConfigMap mounting (following the pattern from /home/paul/git/x-rag/infra/k8s/monitoring/). See `problem.md` for the complete debugging journey and resolution.
diff --git a/f3s/prometheus/grafana-datasources-all.yaml b/f3s/prometheus/grafana-datasources-all.yaml
new file mode 100644
index 0000000..dcc6a11
--- /dev/null
+++ b/f3s/prometheus/grafana-datasources-all.yaml
@@ -0,0 +1,77 @@
+# Unified Grafana Datasources ConfigMap
+# This ConfigMap contains all datasources (Prometheus, Alertmanager, Loki, Tempo)
+# and is directly mounted to /etc/grafana/provisioning/datasources/
+#
+# This approach follows the pattern from /home/paul/git/x-rag/infra/k8s/monitoring/
+# where datasources are provisioned via direct ConfigMap mount instead of sidecar discovery.
+#
+# Benefits:
+# - Simple and direct (no sidecar complexity)
+# - Grafana provisioning runs on startup
+# - Field order preserved exactly as written
+# - All datasources in one place
+# - Fully IaC compliant
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-datasources-all
+ namespace: monitoring
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/component: datasources
+data:
+ datasources.yaml: |
+ apiVersion: 1
+ datasources:
+ - name: Prometheus
+ type: prometheus
+ uid: prometheus
+ url: http://prometheus-kube-prometheus-prometheus.monitoring:9090/
+ access: proxy
+ isDefault: true
+ editable: false
+ jsonData:
+ httpMethod: POST
+ timeInterval: 30s
+ - name: Alertmanager
+ type: alertmanager
+ uid: alertmanager
+ url: http://prometheus-kube-prometheus-alertmanager.monitoring:9093/
+ access: proxy
+ editable: false
+ jsonData:
+ handleGrafanaManagedAlerts: false
+ implementation: prometheus
+ - name: Loki
+ type: loki
+ uid: loki
+ url: http://loki.monitoring.svc.cluster.local:3100
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ maxLines: 1000
+ - name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki
+ spanStartTimeShift: -1h
+ spanEndTimeShift: 1h
+ tracesToMetrics:
+ datasourceUid: prometheus
+ serviceMap:
+ datasourceUid: prometheus
+ nodeGraph:
+ enabled: true
+ search:
+ hide: false
+ lokiSearch:
+ datasourceUid: loki
diff --git a/f3s/prometheus/loki-datasource.yaml b/f3s/prometheus/loki-datasource.yaml
new file mode 100644
index 0000000..6dbb4f3
--- /dev/null
+++ b/f3s/prometheus/loki-datasource.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-grafana-datasource
+ namespace: monitoring
+ labels:
+ grafana_datasource: "1"
+data:
+ loki-datasource.yaml: |-
+ apiVersion: 1
+ datasources:
+ - name: Loki
+ type: loki
+ uid: loki
+ url: http://loki.monitoring.svc.cluster.local:3100
+ access: proxy
+ isDefault: false
+ editable: false
diff --git a/f3s/prometheus/patch-datasources.sh b/f3s/prometheus/patch-datasources.sh
new file mode 100755
index 0000000..dc18ae3
--- /dev/null
+++ b/f3s/prometheus/patch-datasources.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Patches the Grafana datasource ConfigMap to add Loki and Tempo with correct field order
+
+cat > /tmp/datasource-complete.yaml << 'YAML'
+apiVersion: 1
+datasources:
+- name: "Prometheus"
+ type: prometheus
+ uid: prometheus
+ url: http://prometheus-kube-prometheus-prometheus.monitoring:9090/
+ access: proxy
+ isDefault: true
+ jsonData:
+ httpMethod: POST
+ timeInterval: 30s
+- name: "Alertmanager"
+ type: alertmanager
+ uid: alertmanager
+ url: http://prometheus-kube-prometheus-alertmanager.monitoring:9093/
+ access: proxy
+ jsonData:
+ handleGrafanaManagedAlerts: false
+ implementation: prometheus
+- name: Loki
+ type: loki
+ uid: loki
+ url: http://loki.monitoring.svc.cluster.local:3100
+ access: proxy
+ isDefault: false
+ editable: false
+- name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki
+ spanStartTimeShift: -1h
+ spanEndTimeShift: 1h
+ tracesToMetrics:
+ datasourceUid: prometheus
+ serviceMap:
+ datasourceUid: prometheus
+ nodeGraph:
+ enabled: true
+ search:
+ hide: false
+ lokiSearch:
+ datasourceUid: loki
+YAML
+
+kubectl create configmap prometheus-kube-prometheus-grafana-datasource \
+ --from-file=datasource.yaml=/tmp/datasource-complete.yaml \
+ -n monitoring \
+ --dry-run=client -o yaml | kubectl apply -f -
+
+rm /tmp/datasource-complete.yaml
diff --git a/f3s/prometheus/persistence-values.yaml b/f3s/prometheus/persistence-values.yaml
index 7e115a9..5f832d4 100644
--- a/f3s/prometheus/persistence-values.yaml
+++ b/f3s/prometheus/persistence-values.yaml
@@ -57,30 +57,20 @@ grafana:
runAsUser: 911
runAsGroup: 911
- additionalDataSources:
- - name: Tempo
- type: tempo
- uid: tempo
- url: http://tempo.monitoring.svc.cluster.local:3200
- access: proxy
- isDefault: false
- editable: true
- jsonData:
- httpMethod: GET
- tracesToLogsV2:
- datasourceUid: 'loki'
- spanStartTimeShift: '-1h'
- spanEndTimeShift: '1h'
- filterByTraceID: false
- filterBySpanID: false
- tags: ['cluster', 'namespace', 'pod', 'app']
- tracesToMetrics:
- datasourceUid: 'prometheus'
- serviceMap:
- datasourceUid: 'prometheus'
- nodeGraph:
- enabled: true
- search:
- hide: false
- lokiSearch:
- datasourceUid: 'loki' \ No newline at end of file
+ # Disable sidecar-based datasource provisioning
+ # Use direct ConfigMap mounting instead (following x-rag pattern)
+ sidecar:
+ datasources:
+ enabled: false
+
+ # Mount datasources ConfigMap directly to provisioning directory
+ # This ensures Grafana reads datasources on startup without sidecar complexity
+ extraVolumes:
+ - name: datasources-volume
+ configMap:
+ name: grafana-datasources-all
+
+ extraVolumeMounts:
+ - name: datasources-volume
+ mountPath: /etc/grafana/provisioning/datasources
+ readOnly: true \ No newline at end of file
diff --git a/f3s/prometheus/problem.md b/f3s/prometheus/problem.md
new file mode 100644
index 0000000..5dbf1fb
--- /dev/null
+++ b/f3s/prometheus/problem.md
@@ -0,0 +1,1154 @@
+# Grafana Tempo Datasource Provisioning - Complete Debugging Journey
+
+## Problem Statement
+
+**Objective:** Add Grafana Tempo datasource to Grafana deployed via kube-prometheus-stack Helm chart using Infrastructure as Code (IaC).
+
+**Result:** After 10+ different approaches and extensive debugging, Tempo datasource will not appear in Grafana UI despite correct configuration files being in place.
+
+## Environment
+
+- **Helm Chart:** kube-prometheus-stack (prometheus-community)
+- **Grafana:** Bundled with kube-prometheus-stack
+- **Kubernetes:** k3s cluster on Rocky Linux (r0, r1, r2) + FreeBSD storage (f0, f1, f2)
+- **Namespace:** monitoring
+- **Grafana Provisioning Path:** `/etc/grafana/provisioning`
+- **User Requirement:** Everything must be IaC - no manual UI configuration
+
+## Background - What Already Works
+
+- ✅ Grafana Tempo deployed and running (monolithic mode, port 3200)
+- ✅ Grafana Alloy configured to forward traces to Tempo via OTLP
+- ✅ Demo application (Frontend → Middleware → Backend) generating traces
+- ✅ 60+ traces successfully stored in Tempo
+- ✅ Traces queryable via Tempo API
+- ✅ Prometheus datasource works (from Helm chart)
+- ✅ Alertmanager datasource works (from Helm chart)
+- ✅ "loki" datasource exists but was added manually via UI (uid=ff67ithfd6j9cc)
+
+## Complete Timeline of Attempts
+
+### Attempt 1: Initial Tempo ConfigMap (Like We Did For Tempo Deployment)
+
+**Date:** Day 1 of integration
+
+**Approach:** Created ConfigMap with datasource definition in tempo deployment directory:
+```yaml
+# /tempo/datasource-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: tempo-grafana-datasource
+ namespace: monitoring
+ labels:
+ grafana_datasource: "1"
+data:
+ tempo-datasource.yaml: |-
+ apiVersion: 1
+ datasources:
+ - name: "Tempo"
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ # ... with comments and full config
+```
+
+**Applied:** `kubectl apply -f tempo/datasource-configmap.yaml`
+
+**Verification:**
+```bash
+$ kubectl get configmap tempo-grafana-datasource -n monitoring
+NAME DATA AGE
+tempo-grafana-datasource 1 6h
+
+$ kubectl exec grafana-pod -- cat /etc/grafana/provisioning/datasources/tempo-datasource.yaml
+# File exists with correct content!
+```
+
+**Result:** ❌ Tempo not visible in Grafana UI dropdown
+
+**What we learned:** Sidecar writes files but Grafana doesn't load them.
+
+---
+
+### Attempt 2: Add Tempo to Helm Chart's additionalDataSources
+
+**Approach:** Modified `persistence-values.yaml` to use Helm's built-in datasource provisioning:
+
+```yaml
+grafana:
+ additionalDataSources:
+ - name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: true
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: 'loki'
+ # ... full config
+```
+
+**Applied:** `helm upgrade prometheus ...`
+
+**Verification:**
+```bash
+$ kubectl get configmap prometheus-kube-prometheus-grafana-datasource -n monitoring -o yaml
+# Shows Tempo entry in datasource.yaml
+```
+
+**Result:** ❌ Tempo still not visible
+
+**Investigation:** Checked generated ConfigMap content:
+```yaml
+# What we got (wrong field order):
+- access: proxy
+ editable: true
+ isDefault: false
+ name: Tempo # <-- name is NOT first!
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+```
+
+**Discovery:** Helm renders maps alphabetically! "access" comes before "name", but Grafana requires "name" to be first field and silently rejects datasources with wrong order.
+
+**Evidence:** Prometheus and Alertmanager datasources (which work) both have "name" as first field:
+```yaml
+- name: "Prometheus" # <-- name is first
+ type: prometheus
+ uid: prometheus
+ ...
+```
+
+**What we learned:** Helm's additionalDataSources causes field ordering issue.
+
+---
+
+### Attempt 3: Remove Comments From ConfigMap YAML
+
+**Theory:** Maybe YAML comments inside jsonData section cause parsing issues.
+
+**Approach:** Updated tempo-datasource-configmap.yaml to remove all comments:
+```yaml
+data:
+ tempo-datasource.yaml: |-
+ apiVersion: 1
+ datasources:
+ - name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: true
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki # No quotes
+ spanStartTimeShift: -1h # No quotes
+ # All comments removed
+```
+
+**Applied:** `kubectl apply -f tempo-datasource-configmap.yaml`
+
+**Verification:** File updated in pod, Grafana restarted
+
+**Result:** ❌ No change
+
+**What we learned:** Comments weren't the issue.
+
+---
+
+### Attempt 4: Remove additionalDataSources, Use Only ConfigMaps
+
+**Theory:** Having both ConfigMap AND additionalDataSources might conflict.
+
+**Approach:**
+1. Removed Tempo from `additionalDataSources` in persistence-values.yaml
+2. Kept only the standalone ConfigMap approach
+3. Upgraded Helm chart
+4. Restarted Grafana
+
+**Verification:**
+```bash
+$ kubectl exec grafana-pod -- ls /etc/grafana/provisioning/datasources/
+datasource.yaml # Only Prometheus, Alertmanager
+tempo-datasource.yaml # Separate file from ConfigMap
+```
+
+**Result:** ❌ Still not loaded
+
+**What we learned:** No conflict issue; datasources just don't load.
+
+---
+
+### Attempt 5: Manually Patch ConfigMap with Correct Field Order
+
+**Approach:** After Helm upgrade, manually patch the generated ConfigMap to fix field order:
+
+```bash
+cat > /tmp/datasource-fixed.yaml << 'EOF'
+apiVersion: 1
+datasources:
+- name: "Prometheus" # name first
+ type: prometheus
+ uid: prometheus
+ url: http://prometheus-kube-prometheus-prometheus.monitoring:9090/
+ access: proxy
+ ...
+- name: Tempo # name first
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ ...
+EOF
+
+kubectl patch configmap prometheus-kube-prometheus-grafana-datasource \
+ -n monitoring --type merge -p "{\"data\":{\"datasource.yaml\":\"$(cat /tmp/datasource-fixed.yaml)\"}}"
+
+kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
+```
+
+**Verification:**
+```bash
+$ kubectl exec grafana-pod -- cat /etc/grafana/provisioning/datasources/datasource.yaml
+# Shows correct field order with name first!
+```
+
+**Result:** ❌ Tempo STILL not loaded
+
+**Checked Grafana API:**
+```bash
+$ curl -u test:testing123 http://localhost:3000/api/datasources
+[
+ {"name": "Alertmanager", ...},
+ {"name": "loki", "uid": "ff67ithfd6j9cc", ...}, # Manual one
+ {"name": "Prometheus", ...}
+]
+# No Tempo!
+```
+
+**What we learned:** Field order is correct but Tempo still won't load.
+
+---
+
+### Attempt 6: Configure Sidecar to Call Reload API
+
+**Theory:** Sidecar writes files but doesn't tell Grafana to reload them.
+
+**Investigation:** Checked sidecar logs:
+```
+{"msg": "Writing /etc/grafana/provisioning/datasources/tempo-datasource.yaml"}
+{"msg": "Skipping initial request to external endpoint"}
+```
+
+Found: `REQ_SKIP_INIT=true` in sidecar env vars!
+
+**Approach:** Configure sidecar to call Grafana's reload API:
+
+```yaml
+# persistence-values.yaml
+grafana:
+ sidecar:
+ datasources:
+ enabled: true
+ label: grafana_datasource
+ labelValue: "1"
+ env:
+ - name: REQ_URL
+ value: http://localhost:3000/api/admin/provisioning/datasources/reload
+ - name: REQ_METHOD
+ value: POST
+ - name: REQ_USERNAME
+ value: admin
+ - name: REQ_PASSWORD
+ value: testing123
+ - name: REQ_SKIP_INIT
+ value: "false"
+```
+
+**Applied:** `helm upgrade prometheus ...`
+
+**Verification:** Checked sidecar env vars:
+```bash
+$ kubectl get deployment prometheus-grafana -o jsonpath='{.spec.template.spec.containers[?(@.name=="grafana-sc-datasources")].env[*]}'
+# Shows DUPLICATE env vars!
+# Our values: REQ_SKIP_INIT=false
+# Defaults: REQ_SKIP_INIT=true <-- This one wins!
+```
+
+**Result:** ❌ Failed - Helm merges env vars incorrectly, defaults override our values
+
+**Sidecar still logs:**
+```
+{"msg": "Skipping initial request to external endpoint"}
+```
+
+**What we learned:** Can't properly override sidecar env vars via Helm values.
+
+---
+
+### Attempt 7: Create Separate Loki ConfigMap Too
+
+**Theory:** Maybe we need both Loki and Tempo as separate ConfigMaps (parallel structure).
+
+**Approach:** Created `loki-datasource.yaml` ConfigMap alongside Tempo:
+
+```yaml
+# loki-datasource.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-grafana-datasource
+ namespace: monitoring
+ labels:
+ grafana_datasource: "1"
+data:
+ loki-datasource.yaml: |-
+ apiVersion: 1
+ datasources:
+ - name: Loki
+ type: loki
+ uid: loki
+ url: http://loki.monitoring.svc.cluster.local:3100
+ access: proxy
+ isDefault: false
+ editable: false
+```
+
+**Applied:**
+```bash
+kubectl apply -f loki-datasource.yaml
+kubectl apply -f tempo-datasource.yaml
+kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
+```
+
+**Verification:**
+```bash
+$ kubectl exec grafana-pod -- ls /etc/grafana/provisioning/datasources/
+datasource.yaml
+loki-datasource.yaml
+tempo-datasource.yaml
+```
+
+**Result:** ❌ Neither provisioned Loki nor Tempo appear in API
+
+**API shows:**
+```
+Alertmanager
+loki (uid=ff67ithfd6j9cc) <-- manually added one
+Prometheus
+```
+
+**What we learned:** Having multiple ConfigMap files doesn't help.
+
+---
+
+### Attempt 8: Check Grafana Logs for Provisioning
+
+**Approach:** Deep dive into Grafana logs to see what's happening during startup.
+
+**Searched for:** datasource, provision, loki, tempo, error, warn
+
+**Findings:**
+```
+logger=settings msg="Path Provisioning" path=/etc/grafana/provisioning
+logger=backgroundsvcs.managerAdapter msg=starting module=provisioning
+logger=provisioning.alerting msg="starting to provision alerting"
+logger=provisioning.alerting msg="finished to provision alerting"
+logger=provisioning.dashboard msg="starting to provision dashboards"
+logger=provisioning.dashboard msg="finished to provision dashboards"
+```
+
+**Critical Discovery:** NO datasource provisioning logs at all!
+
+Grafana provisions:
+- ✅ Alerting - starts and finishes
+- ✅ Dashboards - starts and finishes
+- ❌ Datasources - **never mentioned**
+
+**Searched full logs:**
+```bash
+$ kubectl logs grafana-pod -c grafana | grep -i datasource
+# Only these generic mentions:
+logger=ngalert.writer msg="Setting up remote write using data sources"
+logger=featuremgmt msg=FeatureToggles ... logsContextDatasourceUi=true ...
+```
+
+No actual datasource provisioning activity!
+
+**What we learned:** Grafana's datasource provisioner module isn't running at all.
+
+---
+
+### Attempt 9: Verify Grafana Configuration
+
+**Approach:** Check if datasource provisioning is disabled in grafana.ini
+
+**Checked:**
+```bash
+$ kubectl exec grafana-pod -- cat /etc/grafana/grafana.ini | grep -A 5 provisioning
+[paths]
+data = /var/lib/grafana/
+logs = /var/log/grafana
+plugins = /var/lib/grafana/plugins
+provisioning = /etc/grafana/provisioning # <-- Correct path!
+```
+
+**Verified directory structure:**
+```bash
+$ kubectl exec grafana-pod -- find /etc/grafana/provisioning -type f
+/etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml
+/etc/grafana/provisioning/datasources/datasource.yaml
+/etc/grafana/provisioning/datasources/loki-datasource.yaml
+/etc/grafana/provisioning/datasources/tempo-datasource.yaml
+```
+
+All files exist in correct location!
+
+**What we learned:** Configuration path is correct, files are there, but still not provisioned.
+
+---
+
+### Attempt 10: Send HUP Signal to Grafana
+
+**Theory:** Maybe Grafana needs a signal to reload config without full restart.
+
+**Approach:**
+```bash
+kubectl exec grafana-pod -c grafana -- kill -HUP 1
+# Wait a few seconds
+curl -u test:testing123 http://localhost:3000/api/datasources
+```
+
+**Result:** ❌ No change - same 3 datasources
+
+**What we learned:** HUP signal doesn't trigger datasource reloading.
+
+---
+
+### Attempt 11: Automate Patching in Justfile
+
+**Approach:** Since manual patching sometimes worked, automate it:
+
+Created `patch-datasources.sh`:
+```bash
+#!/bin/bash
+cat > /tmp/datasource-complete.yaml << 'YAML'
+apiVersion: 1
+datasources:
+- name: "Prometheus"
+ type: prometheus
+ uid: prometheus
+ url: http://prometheus-kube-prometheus-prometheus.monitoring:9090/
+ access: proxy
+ isDefault: true
+ jsonData:
+ httpMethod: POST
+ timeInterval: 30s
+- name: "Alertmanager"
+ type: alertmanager
+ uid: alertmanager
+ url: http://prometheus-kube-prometheus-alertmanager.monitoring:9093/
+ access: proxy
+ jsonData:
+ handleGrafanaManagedAlerts: false
+ implementation: prometheus
+- name: Loki
+ type: loki
+ uid: loki
+ url: http://loki.monitoring.svc.cluster.local:3100
+ access: proxy
+ isDefault: false
+ editable: false
+- name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki
+ spanStartTimeShift: -1h
+ spanEndTimeShift: 1h
+ tracesToMetrics:
+ datasourceUid: prometheus
+ serviceMap:
+ datasourceUid: prometheus
+ nodeGraph:
+ enabled: true
+ search:
+ hide: false
+ lokiSearch:
+ datasourceUid: loki
+YAML
+
+kubectl create configmap prometheus-kube-prometheus-grafana-datasource \
+ --from-file=datasource.yaml=/tmp/datasource-complete.yaml \
+ -n monitoring \
+ --dry-run=client -o yaml | kubectl apply -f -
+```
+
+Updated Justfile:
+```makefile
+upgrade:
+ kubectl create secret generic additional-scrape-configs ...
+ helm upgrade prometheus ...
+ kubectl apply -f freebsd-recording-rules.yaml
+ kubectl apply -f openbsd-recording-rules.yaml
+ kubectl apply -f zfs-recording-rules.yaml
+ @echo "Patching Grafana datasource ConfigMap..."
+ @./patch-datasources.sh
+ @echo "Restarting Grafana..."
+ kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
+```
+
+**Executed:** `just upgrade`
+
+**Verification after Grafana restart:**
+```bash
+# ConfigMap has correct content
+$ kubectl get cm prometheus-kube-prometheus-grafana-datasource -o yaml
+# Shows all 4 datasources with correct field order
+
+# Files are in pod
+$ kubectl exec grafana-pod -- cat /etc/grafana/provisioning/datasources/datasource.yaml
+# Shows all 4 datasources
+
+# But API still shows only 3!
+$ curl -u test:testing123 http://localhost:3000/api/datasources
+[
+ {"name": "Alertmanager", "uid": "alertmanager", "readOnly": true},
+ {"name": "loki", "uid": "ff67ithfd6j9cc", "readOnly": false}, # Manual
+ {"name": "Prometheus", "uid": "prometheus", "readOnly": true}
+]
+```
+
+**Result:** ❌ Loki and Tempo configured correctly but NOT loaded
+
+**What we learned:** Even with perfect YAML and automated patching, datasources don't load.
+
+---
+
+### Attempt 12: Multiple Sequential Restarts
+
+**Theory:** Maybe first restart loads config, second restart applies it?
+
+**Approach:**
+```bash
+kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
+sleep 30
+kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
+sleep 30
+kubectl delete pod -n monitoring -l app.kubernetes.io/name=grafana
+```
+
+**Result:** ❌ Same 3 datasources after each restart
+
+**What we learned:** Multiple restarts don't help.
+
+---
+
+### Attempt 13: Check Grafana Database Directly
+
+**Theory:** Maybe datasources are in database but not exposed via API?
+
+**Attempted:**
+```bash
+kubectl exec grafana-pod -c grafana -- find /var/lib/grafana -name "*.db"
+# Found: /var/lib/grafana/grafana.db
+
+kubectl exec grafana-pod -c grafana -- sqlite3 /var/lib/grafana/grafana.db "SELECT name, type, uid FROM data_source;"
+# Error: sqlite3 not available in container
+```
+
+**Alternative:** Checked API with different user (admin):
+```bash
+$ kubectl get secret prometheus-grafana -n monitoring -o jsonpath="{.data.admin-password}" | base64 -d
+zeodL5KXK1ib8lNSrinGyzBFwEkDxSc7IQvi5MBs
+
+$ curl -u admin:zeodL5KXK1ib8lNSrinGyzBFwEkDxSc7IQvi5MBs http://localhost:3000/api/datasources
+# Authentication failed (despite correct password)
+```
+
+**What we learned:** Can't access database or admin API for verification.
+
+---
+
+### Attempt 14: Compare Working vs Non-Working Datasources
+
+**Analysis:** What's different between Prometheus (works) and Tempo (doesn't)?
+
+**Prometheus (WORKS):**
+- Defined in Helm chart's default values
+- Loaded during initial Grafana deployment
+- Shows as `readOnly: true` in API
+- Logs show it was provisioned
+
+**Tempo (DOESN'T WORK):**
+- Added via ConfigMap after deployment
+- Not present during initial startup
+- Never appears in API
+- No provisioning logs
+
+**Manual Loki (WORKS):**
+- Added via Grafana UI
+- Shows as `readOnly: false` in API
+- Different UID (ff67ithfd6j9cc vs loki)
+
+**Hypothesis:** Grafana only provisions datasources present at initial deployment. Datasources added later (even via provisioning files) are ignored.
+
+---
+
+### Attempt 15: Check Helm Chart Documentation
+
+**Researched:** kube-prometheus-stack chart documentation for datasource provisioning.
+
+**Findings:**
+- Chart supports `additionalDataSources` (we tried this)
+- Chart uses sidecar for dynamic discovery (we tried this)
+- No mention of field ordering requirement
+- No mention of provisioning limitations
+
+**Checked:** Helm chart GitHub issues
+- Found similar reports of datasources not loading
+- No clear solution provided
+- Some suggest using Grafana Operator instead
+
+**What we learned:** This might be a known limitation or bug.
+
+---
+
+## Root Cause Analysis
+
+After 15 different attempts, the root cause is clear:
+
+### Primary Issue: Grafana Datasource Provisioner Doesn't Run
+
+**Evidence:**
+1. Grafana logs show alerting and dashboard provisioning running
+2. No datasource provisioning logs ever appear
+3. Files exist in correct location with correct content
+4. Field ordering is correct (after patching)
+5. Grafana path configuration is correct
+
+**Conclusion:** The datasource provisioning module in Grafana (as deployed by kube-prometheus-stack) is not executing.
+
+### Secondary Issue: Why Prometheus/Alertmanager Work
+
+These datasources work because they are:
+1. Part of the Helm chart's initial deployment
+2. Likely provisioned via a different mechanism
+3. Or hardcoded in the Grafana configuration
+
+### Why Manual "loki" Works
+
+The manually-added loki datasource works because:
+1. It was added via Grafana UI (not provisioning)
+2. Stored directly in Grafana's SQLite database
+3. Has `readOnly: false` (not from provisioning)
+
+## What Actually Works
+
+✅ **Infrastructure as Code is complete:**
+- `loki-datasource.yaml` - ConfigMap for Loki
+- `tempo-datasource.yaml` - ConfigMap for Tempo
+- `patch-datasources.sh` - Script to fix main ConfigMap
+- `Justfile` - Automated deployment
+- All files in git
+
+✅ **Configuration is correct:**
+- YAML syntax is valid
+- Field ordering is correct (name first)
+- URLs and ports are correct
+- Labels for sidecar discovery are correct
+
+✅ **Tempo infrastructure works:**
+- Tempo service running
+- Traces being stored
+- Tempo API responds correctly
+- Demo app generating valid traces
+
+## What Doesn't Work
+
+❌ **Grafana refuses to load datasources:**
+- Loki (uid=loki) - not loaded
+- Tempo (uid=tempo) - not loaded
+- Provisioner module doesn't run
+- No errors in logs
+- Multiple restarts don't help
+
+## Current Workaround
+
+The **only** way to get Tempo/Loki datasources is to add them manually via Grafana UI:
+
+1. Login to Grafana
+2. Go to Connections → Data sources → Add data source
+3. Select Tempo/Loki
+4. Configure URLs manually
+5. Save & Test
+
+This violates the IaC requirement.
+
+## Potential Root Causes
+
+### Theory 1: kube-prometheus-stack Limitation
+The Helm chart might disable or not fully support datasource provisioning for dynamically added datasources.
+
+### Theory 2: Grafana Version Bug
+The version of Grafana bundled with kube-prometheus-stack might have a bug in the provisioning module.
+
+### Theory 3: Timing Issue
+Grafana might only load datasources during first-ever startup (before database exists), not on subsequent restarts.
+
+### Theory 4: Database Override
+Datasources in Grafana's database might override provisioning files, preventing new ones from loading.
+
+### Theory 5: Missing Configuration Flag
+There might be a Grafana configuration option that enables/disables dynamic datasource provisioning that we haven't found.
+
+## Next Steps to Investigate
+
+### A. Test Standalone Grafana
+Deploy Grafana separately (not via kube-prometheus-stack) to see if datasource provisioning works:
+```bash
+helm install grafana grafana/grafana
+# Add datasource via ConfigMap
+# Check if it loads
+```
+
+### B. Check Grafana Source Code
+Look at Grafana's provisioning module source code to understand:
+- When does it run?
+- What triggers it?
+- Are there any conditions that prevent it from running?
+
+### C. Use Grafana Operator
+Try grafana-operator instead of kube-prometheus-stack:
+- Might have better datasource management
+- Purpose-built for Grafana (not bundled)
+
+### D. Add Datasources via Init Container
+Create custom init container that:
+```bash
+#!/bin/bash
+# Wait for Grafana to start
+while ! curl -f http://localhost:3000/api/health; do sleep 1; done
+
+# Add datasources via API
+curl -X POST http://localhost:3000/api/datasources \
+ -H "Content-Type: application/json" \
+ -d @loki-datasource.json
+
+curl -X POST http://localhost:3000/api/datasources \
+ -H "Content-Type: application/json" \
+ -d @tempo-datasource.json
+```
+
+### E. Manipulate Grafana Database
+Use init container to directly insert into SQLite database (hacky):
+```bash
+sqlite3 /var/lib/grafana/grafana.db << EOF
+INSERT INTO data_source (name, type, uid, url, ...) VALUES ('Tempo', 'tempo', 'tempo', ...);
+EOF
+```
+
+### F. Fork and Patch Helm Chart
+Create custom version of kube-prometheus-stack that:
+- Fixes field ordering in additionalDataSources
+- Ensures datasource provisioning runs
+- Commits to maintain custom fork
+
+### G. Report Bug Upstream
+Create issues in:
+- kube-prometheus-stack GitHub
+- Grafana GitHub
+- With full reproduction steps
+
+## Files Created (Ready for IaC)
+
+```
+prometheus/
+├── loki-datasource.yaml # Loki ConfigMap (correct YAML)
+├── tempo-datasource.yaml # Tempo ConfigMap (correct YAML)
+├── patch-datasources.sh # Automated patching script
+├── persistence-values.yaml # Helm values (sidecar config)
+├── Justfile # Automated deployment workflow
+├── README.md # Documentation with restart requirement
+└── problem.md # This comprehensive analysis
+```
+
+## Conclusion
+
+After 15+ attempts using different approaches:
+
+1. ✅ **IaC is complete** - All configuration in git
+2. ✅ **Configuration is correct** - YAML valid, field order correct
+3. ✅ **Tempo works** - Service running, traces stored
+4. ❌ **Grafana won't load** - Provisioner doesn't run
+5. ❌ **No clear solution** - Limitation in Grafana or Helm chart
+
+**Status:** Blocked by Grafana datasource provisioning limitation in kube-prometheus-stack.
+
+**Recommended Action:**
+1. Accept manual UI configuration as temporary workaround
+2. Export datasource config via API and commit to git as documentation
+3. File bug report with kube-prometheus-stack maintainers
+4. Consider migrating to Grafana Operator or standalone Grafana deployment
+
+---
+
+## BREAKTHROUGH: Analysis of Working x-rag Configuration
+
+**Date:** 2025-12-28 (Evening)
+
+User pointed to a working example: `/home/paul/git/x-rag/infra/k8s/monitoring/`
+
+In that project, Tempo, Loki, and Prometheus all successfully auto-provision as Grafana datasources.
+
+### Architecture Comparison
+
+**x-rag (WORKS):**
+```yaml
+# grafana.yaml - Standalone Grafana Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: grafana
+spec:
+ template:
+ spec:
+ containers:
+ - name: grafana
+ image: grafana/grafana:10.3.0
+ env:
+ - name: GF_PATHS_PROVISIONING
+ value: "/etc/grafana/provisioning"
+ volumeMounts:
+ - name: grafana-datasources
+ mountPath: /etc/grafana/provisioning/datasources
+ readOnly: true
+ volumes:
+ - name: grafana-datasources
+ configMap:
+ name: grafana-datasources # Direct mount!
+
+# grafana-provisioning.yaml - Simple ConfigMap
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-datasources
+data:
+ datasources.yaml: |
+ apiVersion: 1
+ datasources:
+ - name: Prometheus
+ type: prometheus
+ url: http://prometheus:9090
+ ...
+ - name: Loki
+ type: loki
+ url: http://loki:3100
+ ...
+ - name: Tempo
+ type: tempo
+ url: http://tempo:3200
+ ...
+```
+
+**Key Points:**
+- ✅ **Direct ConfigMap mount** to `/etc/grafana/provisioning/datasources/`
+- ✅ **No sidecar container** watching for labeled ConfigMaps
+- ✅ **Grafana reads on startup** - provisioning happens automatically
+- ✅ **Simple, direct approach** - no complex indirection
+- ✅ **Field order preserved** - YAML in ConfigMap stays as-is
+
+**f3s Current Setup (DOES NOT WORK):**
+```yaml
+# persistence-values.yaml - Helm values for kube-prometheus-stack
+grafana:
+ sidecar:
+ datasources:
+ enabled: true # Sidecar watches ConfigMaps
+ label: grafana_datasource # Looking for this label
+ labelValue: "1" # Must match exactly
+ searchNamespace: ALL
+ resource: both
+
+# tempo-datasource.yaml - Labeled ConfigMap
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ labels:
+ grafana_datasource: "1" # Sidecar should find this
+data:
+ tempo-datasource.yaml: |
+ apiVersion: 1
+ datasources:
+ - name: Tempo
+ ...
+```
+
+**Key Points:**
+- ❌ **Sidecar-based discovery** - adds complexity and failure points
+- ❌ **Provisioner module doesn't run** - even when files are written
+- ❌ **Field ordering issues** - Helm's additionalDataSources renders alphabetically
+- ❌ **Multi-step indirection** - Sidecar → Write file → Reload API → Provisioner
+- ❌ **Each step can fail** - and one is failing silently
+
+### Root Cause Analysis
+
+The fundamental difference is:
+
+**x-rag:** Grafana container directly mounts the ConfigMap containing datasources
+↳ Grafana sees the file at startup and loads it via built-in provisioning
+
+**f3s:** Grafana relies on a sidecar to discover ConfigMaps and write files dynamically
+↳ Sidecar runs but provisioner never executes (module not running)
+
+### The Solution
+
+**Option A: Disable Sidecar, Use Direct Mount (Recommended)**
+```yaml
+# persistence-values.yaml
+grafana:
+ sidecar:
+ datasources:
+ enabled: false # Disable sidecar approach
+
+ # Mount ConfigMap directly
+ extraVolumes:
+ - name: datasources-volume
+ configMap:
+ name: grafana-datasources-all
+
+ extraVolumeMounts:
+ - name: datasources-volume
+ mountPath: /etc/grafana/provisioning/datasources
+ readOnly: true
+
+# grafana-datasources-all.yaml - Single ConfigMap with all datasources
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-datasources-all
+ namespace: monitoring
+data:
+ datasources.yaml: |
+ apiVersion: 1
+ datasources:
+ - name: Prometheus
+ type: prometheus
+ ...
+ - name: Alertmanager
+ type: alertmanager
+ ...
+ - name: Loki
+ type: loki
+ ...
+ - name: Tempo
+ type: tempo
+ ...
+```
+
+This approach:
+- ✅ Matches the working x-rag pattern
+- ✅ Simple, direct, no sidecar complexity
+- ✅ Grafana provisioning runs on startup
+- ✅ Field order preserved in ConfigMap
+- ✅ All datasources in one place
+- ✅ Still fully IaC
+
+**Option B: Deploy Standalone Grafana**
+- Completely remove Grafana from kube-prometheus-stack
+- Deploy Grafana separately like x-rag does
+- Trade-off: Lose tight integration with Prometheus alerts
+
+### Next Steps
+
+Implement Option A:
+1. Create `/home/paul/git/conf/f3s/prometheus/grafana-datasources-all.yaml`
+2. Update `persistence-values.yaml` to disable sidecar and add direct mounts
+3. Upgrade Helm chart
+4. Verify Tempo appears in Grafana UI
+5. Update problem.md with final resolution
+
+---
+
+---
+
+## RESOLUTION: Direct ConfigMap Mounting - SUCCESS ✅
+
+**Date:** 2025-12-28 (Evening)
+**Time Invested:** 9+ hours total
+**Solution:** Implemented direct ConfigMap mounting following x-rag pattern
+
+### Implementation
+
+Created `/home/paul/git/conf/f3s/prometheus/grafana-datasources-all.yaml`:
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-datasources-all
+ namespace: monitoring
+data:
+ datasources.yaml: |
+ apiVersion: 1
+ datasources:
+ - name: Prometheus
+ type: prometheus
+ uid: prometheus
+ url: http://prometheus-kube-prometheus-prometheus.monitoring:9090/
+ access: proxy
+ isDefault: true
+ editable: false
+ jsonData:
+ httpMethod: POST
+ timeInterval: 30s
+ - name: Alertmanager
+ type: alertmanager
+ uid: alertmanager
+ url: http://prometheus-kube-prometheus-alertmanager.monitoring:9093/
+ access: proxy
+ editable: false
+ jsonData:
+ handleGrafanaManagedAlerts: false
+ implementation: prometheus
+ - name: Loki
+ type: loki
+ uid: loki
+ url: http://loki.monitoring.svc.cluster.local:3100
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ maxLines: 1000
+ - name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki
+ spanStartTimeShift: -1h
+ spanEndTimeShift: 1h
+ tracesToMetrics:
+ datasourceUid: prometheus
+ serviceMap:
+ datasourceUid: prometheus
+ nodeGraph:
+ enabled: true
+ search:
+ hide: false
+ lokiSearch:
+ datasourceUid: loki
+```
+
+Updated `/home/paul/git/conf/f3s/prometheus/persistence-values.yaml`:
+```yaml
+grafana:
+ sidecar:
+ datasources:
+ enabled: false # Disabled sidecar-based provisioning
+
+ extraVolumes:
+ - name: datasources-volume
+ configMap:
+ name: grafana-datasources-all
+
+ extraVolumeMounts:
+ - name: datasources-volume
+ mountPath: /etc/grafana/provisioning/datasources
+ readOnly: true
+```
+
+Updated `/home/paul/git/conf/f3s/prometheus/Justfile`:
+- Removed `loki-datasource.yaml` and `tempo-datasource.yaml`
+- Added `grafana-datasources-all.yaml`
+- Removed `patch-datasources.sh` call
+
+### Deployment and Verification
+
+```bash
+just upgrade
+# configmap/grafana-datasources-all created
+# Release "prometheus" has been upgraded. Happy Helming!
+# REVISION: 11
+
+# Verified ConfigMap mounted
+kubectl exec -n monitoring <grafana-pod> -c grafana -- \
+ ls -la /etc/grafana/provisioning/datasources/
+# datasources.yaml -> ..data/datasources.yaml
+
+# Verified datasource content
+kubectl exec -n monitoring <grafana-pod> -c grafana -- \
+ cat /etc/grafana/provisioning/datasources/datasources.yaml
+# All four datasources present with correct YAML structure
+
+# Verified via Grafana API
+curl -u test:testing123 http://localhost:3000/api/datasources
+```
+
+### Result
+
+**All datasources successfully provisioned:**
+1. ✅ **Prometheus** (uid=prometheus, id=1) - isDefault: true, readOnly: true
+2. ✅ **Alertmanager** (uid=alertmanager, id=2) - readOnly: true
+3. ✅ **Loki** (uid=loki, id=4) - readOnly: true
+4. ✅ **Tempo** (uid=tempo, id=5) - readOnly: true
+
+### Key Differences That Made It Work
+
+**Before (Failed):**
+- Sidecar-based discovery with label `grafana_datasource: "1"`
+- Multi-step indirection: Sidecar → Watch ConfigMaps → Write files → Reload API
+- Provisioner module never ran despite correct files
+- Complex Helm templating caused field ordering issues
+
+**After (Success):**
+- Direct ConfigMap mount to `/etc/grafana/provisioning/datasources/`
+- Grafana reads file on startup via built-in provisioning
+- Simple, direct approach with no sidecar complexity
+- Field order preserved exactly as written in YAML
+
+### Traces-to-Logs and Traces-to-Metrics Correlation
+
+Tempo datasource includes correlation configuration:
+- `tracesToLogsV2.datasourceUid: loki` - Jump from trace spans to related logs
+- `tracesToMetrics.datasourceUid: prometheus` - View metrics for traced services
+- `serviceMap.datasourceUid: prometheus` - Service dependency graph
+- `lokiSearch.datasourceUid: loki` - Search logs from trace context
+
+### Files Affected
+
+**Created:**
+- `/home/paul/git/conf/f3s/prometheus/grafana-datasources-all.yaml`
+
+**Modified:**
+- `/home/paul/git/conf/f3s/prometheus/persistence-values.yaml` (disabled sidecar, added mounts)
+- `/home/paul/git/conf/f3s/prometheus/Justfile` (updated to use unified ConfigMap)
+
+**Deprecated (no longer needed):**
+- `/home/paul/git/conf/f3s/prometheus/loki-datasource.yaml` (merged into grafana-datasources-all.yaml)
+- `/home/paul/git/conf/f3s/prometheus/tempo-datasource.yaml` (merged into grafana-datasources-all.yaml)
+- `/home/paul/git/conf/f3s/prometheus/patch-datasources.sh` (no longer needed)
+
+---
+
+**Last Updated:** 2025-12-28 (Post-Resolution)
+**Hours Invested:** 9+ hours debugging
+**Approaches Tried:** 16 distinct methods
+**Status:** ✅ **RESOLVED** - Tempo datasource successfully provisioned via direct ConfigMap mounting
diff --git a/f3s/prometheus/tempo-datasource.yaml b/f3s/prometheus/tempo-datasource.yaml
new file mode 100644
index 0000000..9d0fc85
--- /dev/null
+++ b/f3s/prometheus/tempo-datasource.yaml
@@ -0,0 +1,34 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: tempo-grafana-datasource
+ namespace: monitoring
+ labels:
+ grafana_datasource: "1"
+data:
+ tempo-datasource.yaml: |-
+ apiVersion: 1
+ datasources:
+ - name: Tempo
+ type: tempo
+ uid: tempo
+ url: http://tempo.monitoring.svc.cluster.local:3200
+ access: proxy
+ isDefault: false
+ editable: false
+ jsonData:
+ httpMethod: GET
+ tracesToLogsV2:
+ datasourceUid: loki
+ spanStartTimeShift: -1h
+ spanEndTimeShift: 1h
+ tracesToMetrics:
+ datasourceUid: prometheus
+ serviceMap:
+ datasourceUid: prometheus
+ nodeGraph:
+ enabled: true
+ search:
+ hide: false
+ lokiSearch:
+ datasourceUid: loki