summaryrefslogtreecommitdiff
path: root/f3s
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-01-15 19:32:46 +0200
committerPaul Buetow <paul@buetow.org>2026-01-15 19:32:46 +0200
commit03eb96f968a6d2cecb2bdf1d23accf51a270188e (patch)
treeec5fea29a139ad6211523f69c12cff1414268497 /f3s
parentcf2f8a9c77adde891a85c6e4ab405ede95e6ec97 (diff)
Update monitoring and gogios configuration
- Add node resources multi-select dashboard for Prometheus - Update gogios cron schedule and add HTML status file output - Update Prometheus scrape configs - Add gogios documentation Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'f3s')
-rw-r--r--f3s/prometheus/additional-scrape-configs.yaml30
-rw-r--r--f3s/prometheus/manifests/node-resources-multi-select-dashboard.yaml662
2 files changed, 692 insertions, 0 deletions
diff --git a/f3s/prometheus/additional-scrape-configs.yaml b/f3s/prometheus/additional-scrape-configs.yaml
index be118df..21cbf1f 100644
--- a/f3s/prometheus/additional-scrape-configs.yaml
+++ b/f3s/prometheus/additional-scrape-configs.yaml
@@ -18,4 +18,34 @@
- targets:
- 'pushgateway.monitoring.svc.cluster.local:9091'
+# Drop radicale service from being scraped (does not expose Prometheus metrics)
+- job_name: 'radicale-drop'
+ static_configs:
+ - targets:
+ - 'radicale-service.services.svc.cluster.local:80'
+ relabel_configs:
+ - source_labels: [__address__]
+ action: drop
+
+# Kubernetes service discovery with radicale dropped
+- job_name: 'kubernetes-services-no-radicale'
+ kubernetes_sd_configs:
+ - role: service
+ namespaces:
+ names:
+ - services
+ relabel_configs:
+ # Drop radicale service
+ - source_labels: [__meta_kubernetes_service_name]
+ regex: radicale-service
+ action: drop
+ # Keep only metrics ports
+ - source_labels: [__meta_kubernetes_service_port_name]
+ regex: metrics|prometheus
+ action: keep
+ - source_labels: [__meta_kubernetes_service_name]
+ target_label: job
+ - source_labels: [__meta_kubernetes_namespace]
+ target_label: namespace
+
diff --git a/f3s/prometheus/manifests/node-resources-multi-select-dashboard.yaml b/f3s/prometheus/manifests/node-resources-multi-select-dashboard.yaml
new file mode 100644
index 0000000..05ec93a
--- /dev/null
+++ b/f3s/prometheus/manifests/node-resources-multi-select-dashboard.yaml
@@ -0,0 +1,662 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: node-resources-multi-select-dashboard
+ namespace: monitoring
+ annotations:
+ argocd.argoproj.io/sync-wave: "4"
+ labels:
+ grafana_dashboard: '1'
+ app.kubernetes.io/instance: prometheus
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: kube-prometheus-stack
+ release: prometheus
+data:
+ node-resources-multi.json: |-
+ {
+ "title": "Node Resources - Multi Select",
+ "uid": "node-resources-multi",
+ "timezone": "browser",
+ "schemaVersion": 38,
+ "refresh": "30s",
+ "tags": [
+ "node-exporter",
+ "nodes",
+ "resources"
+ ],
+ "editable": true,
+ "templating": {
+ "list": [
+ {
+ "name": "datasource",
+ "type": "datasource",
+ "query": "prometheus",
+ "hide": 0
+ },
+ {
+ "name": "instance",
+ "type": "query",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "query": "label_values(node_uname_info, instance)",
+ "refresh": 2,
+ "hide": 0,
+ "label": "Nodes",
+ "includeAll": true,
+ "allValue": ".*",
+ "multi": true,
+ "sort": 1
+ }
+ ]
+ },
+ "panels": [
+ {
+ "type": "row",
+ "collapsed": false,
+ "title": "CPU Usage",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "CPU Usage % (all cores)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 1
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\",instance=~\"$instance\"}[5m])) by (instance) * 100)",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "min": 0,
+ "max": 100,
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "gauge",
+ "title": "CPU Usage % (Current)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 1
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\",instance=~\"$instance\"}[5m])) by (instance) * 100)",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "min": 0,
+ "max": 100,
+ "custom": {
+ "hideFrom": {
+ "tooltip": false,
+ "viz": false,
+ "legend": false
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "yellow",
+ "value": 70
+ },
+ {
+ "color": "red",
+ "value": 90
+ }
+ ]
+ }
+ }
+ },
+ "options": {
+ "orientation": "auto",
+ "textMode": "value_and_name",
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ]
+ }
+ }
+ },
+ {
+ "type": "row",
+ "collapsed": false,
+ "title": "Memory Usage",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 9
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "Memory Usage %",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "100 * (1 - ((node_memory_MemAvailable_bytes{instance=~\"$instance\"} or (node_memory_MemFree_bytes{instance=~\"$instance\"} + node_memory_Buffers_bytes{instance=~\"$instance\"} + node_memory_Cached_bytes{instance=~\"$instance\"})) / node_memory_MemTotal_bytes{instance=~\"$instance\"}))",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "min": 0,
+ "max": 100,
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "gauge",
+ "title": "Memory Usage % (Current)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "100 * (1 - ((node_memory_MemAvailable_bytes{instance=~\"$instance\"} or (node_memory_MemFree_bytes{instance=~\"$instance\"} + node_memory_Buffers_bytes{instance=~\"$instance\"} + node_memory_Cached_bytes{instance=~\"$instance\"})) / node_memory_MemTotal_bytes{instance=~\"$instance\"}))",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "min": 0,
+ "max": 100,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "yellow",
+ "value": 70
+ },
+ {
+ "color": "red",
+ "value": 90
+ }
+ ]
+ }
+ }
+ },
+ "options": {
+ "orientation": "auto",
+ "textMode": "value_and_name",
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ]
+ }
+ }
+ },
+ {
+ "type": "row",
+ "collapsed": false,
+ "title": "Disk Usage",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 18
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "Disk Usage % (root /)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 19
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "100 * (node_filesystem_size_bytes{instance=~\"$instance\",mountpoint=\"/\",fstype!=\"tmpfs\"} - node_filesystem_avail_bytes{instance=~\"$instance\",mountpoint=\"/\",fstype!=\"tmpfs\"}) / node_filesystem_size_bytes{instance=~\"$instance\",mountpoint=\"/\",fstype!=\"tmpfs\"}",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "min": 0,
+ "max": 100,
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "gauge",
+ "title": "Disk Usage % (root / - Current)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 19
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "100 * (node_filesystem_size_bytes{instance=~\"$instance\",mountpoint=\"/\",fstype!=\"tmpfs\"} - node_filesystem_avail_bytes{instance=~\"$instance\",mountpoint=\"/\",fstype!=\"tmpfs\"}) / node_filesystem_size_bytes{instance=~\"$instance\",mountpoint=\"/\",fstype!=\"tmpfs\"}",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "min": 0,
+ "max": 100,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "yellow",
+ "value": 70
+ },
+ {
+ "color": "red",
+ "value": 85
+ }
+ ]
+ }
+ }
+ },
+ "options": {
+ "orientation": "auto",
+ "textMode": "value_and_name",
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ]
+ }
+ }
+ },
+ {
+ "type": "row",
+ "collapsed": false,
+ "title": "Network I/O",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 27
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "Network Bytes Received",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 28
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "rate(node_network_receive_bytes_total{instance=~\"$instance\",device!~\"^(lo|veth.*|docker.*|br.*|virbr.*)$\"}[5m])",
+ "legendFormat": "{{instance}} - {{device}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "Bps",
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "Network Bytes Transmitted",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 28
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "rate(node_network_transmit_bytes_total{instance=~\"$instance\",device!~\"^(lo|veth.*|docker.*|br.*|virbr.*)$\"}[5m])",
+ "legendFormat": "{{instance}} - {{device}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "Bps",
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "row",
+ "collapsed": false,
+ "title": "Disk I/O",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 36
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "Disk Read Rate (bytes/sec)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 37
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "rate(node_disk_read_bytes_total{instance=~\"$instance\",device!~\"^(loop|sr)\"}[5m])",
+ "legendFormat": "{{instance}} - {{device}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "Bps",
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "Disk Write Rate (bytes/sec)",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 37
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "rate(node_disk_written_bytes_total{instance=~\"$instance\",device!~\"^(loop|sr)\"}[5m])",
+ "legendFormat": "{{instance}} - {{device}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "Bps",
+ "custom": {
+ "fillOpacity": 20,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "row",
+ "collapsed": false,
+ "title": "System Load & Processes",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 45
+ }
+ },
+ {
+ "type": "timeseries",
+ "title": "System Load Average",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 46
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "node_load1{instance=~\"$instance\"}",
+ "legendFormat": "{{instance}} - 1m"
+ },
+ {
+ "expr": "node_load5{instance=~\"$instance\"}",
+ "legendFormat": "{{instance}} - 5m"
+ },
+ {
+ "expr": "node_load15{instance=~\"$instance\"}",
+ "legendFormat": "{{instance}} - 15m"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "fillOpacity": 10,
+ "showPoints": "never"
+ }
+ }
+ },
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right",
+ "showLegend": true
+ }
+ }
+ },
+ {
+ "type": "stat",
+ "title": "Running Processes",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 46
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${datasource}"
+ },
+ "targets": [
+ {
+ "expr": "node_processes_running{instance=~\"$instance\"}",
+ "legendFormat": "{{instance}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "hideFrom": {
+ "tooltip": false,
+ "viz": false,
+ "legend": false
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ }
+ }
+ },
+ "options": {
+ "orientation": "auto",
+ "textMode": "value_and_name",
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ]
+ }
+ }
+ }
+ ],
+ "time": {
+ "from": "now-6h",
+ "to": "now"
+ }
+ }