summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-12-28 12:21:46 +0200
committerPaul Buetow <paul@buetow.org>2025-12-28 12:21:46 +0200
commit49086b43aeebfd3fdd06cd330cca8130d32e5202 (patch)
tree5a8e0c3e22486b70c070b54025c395d8c070bab8
parent598bcd7f6ccf9e884ec1a29e8188947954bc064f (diff)
Add comprehensive ZFS monitoring for FreeBSD servers
Implemented complete ZFS monitoring solution including ARC cache statistics, pool health/capacity metrics, dataset usage, and I/O throughput monitoring. Changes: - Add ZFS recording rules (9 calculated metrics for ARC hit rates, memory usage, etc.) - Add comprehensive Grafana dashboard with 19 panels across 5 rows: * Pool Overview: capacity, health, size, free space, usage trends * I/O Throughput: read/write operations and bytes per second * Dataset Statistics: table showing all datasets with usage details * ARC Cache Statistics: hit rates, size, memory usage * ARC Breakdown: data vs metadata, MRU vs MFU with pie charts - Update Justfile to deploy ZFS recording rules - Add textfile collector script on FreeBSD servers (f0, f1, f2) for pool/dataset metrics Metrics collected: - Pool: size, allocated, free, capacity %, health status - I/O: read/write operations and throughput (via zpool iostat) - Dataset: used, available, referenced space per filesystem - ARC: hit rate, size, memory usage, data/metadata breakdown Fixes: - Pool health panel properly displays ONLINE/DEGRADED/FAULTED status - All stat panels have correct options configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
-rw-r--r--f3s/prometheus/Justfile2
-rw-r--r--f3s/prometheus/zfs-dashboards.yaml36
-rw-r--r--f3s/prometheus/zfs-recording-rules.yaml106
3 files changed, 144 insertions, 0 deletions
diff --git a/f3s/prometheus/Justfile b/f3s/prometheus/Justfile
index 686a6a1..1038650 100644
--- a/f3s/prometheus/Justfile
+++ b/f3s/prometheus/Justfile
@@ -4,6 +4,7 @@ install:
helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring -f persistence-values.yaml
kubectl apply -f freebsd-recording-rules.yaml
kubectl apply -f openbsd-recording-rules.yaml
+ kubectl apply -f zfs-recording-rules.yaml
just -f grafana-ingress/Justfile install
upgrade:
@@ -11,6 +12,7 @@ upgrade:
helm upgrade prometheus prometheus-community/kube-prometheus-stack --namespace monitoring -f persistence-values.yaml
kubectl apply -f freebsd-recording-rules.yaml
kubectl apply -f openbsd-recording-rules.yaml
+ kubectl apply -f zfs-recording-rules.yaml
uninstall:
just -f grafana-ingress/Justfile delete
diff --git a/f3s/prometheus/zfs-dashboards.yaml b/f3s/prometheus/zfs-dashboards.yaml
new file mode 100644
index 0000000..208a090
--- /dev/null
+++ b/f3s/prometheus/zfs-dashboards.yaml
@@ -0,0 +1,36 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: freebsd-zfs-dashboards
+ namespace: monitoring
+ labels:
+ grafana_dashboard: '1'
+ app.kubernetes.io/instance: prometheus
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: kube-prometheus-stack
+ release: prometheus
+data:
+ freebsd-zfs.json: "{\n \"title\": \"FreeBSD ZFS\",\n \"uid\": \"freebsd-zfs\",\n \"timezone\": \"utc\",\n \"schemaVersion\": 38,\n \"refresh\": \"30s\",\n \"tags\": [\n \"zfs\",\n \"freebsd\",\n \"storage\"\n ],\n \"editable\": true,\n \"templating\": {\n \"list\": [\n {\n \"name\": \"datasource\",\n \"type\": \"datasource\",\n \"query\": \"prometheus\",\n \"hide\": 0\n },\n {\n \"name\": \"instance\",\n \"type\": \"query\",\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"query\": \"label_values(node_zfs_arcstats_size_bytes{os=\\\"freebsd\\\"}, instance)\",\n \"refresh\": 2,\n \"hide\": 0,\n \"label\": \"FreeBSD Server\"\n },\n {\n \"name\": \"pool\",\n \"type\": \"query\",\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \
+ \ \"query\": \"label_values(zfs_pool_size_bytes{instance=\\\"$instance\\\"}, pool)\",\n \"refresh\": 2,\n \"hide\": 0,\n \"label\": \"ZFS Pool\",\n \"includeAll\": true,\n \"multi\": false\n }\n ]\n },\n \"panels\": [\n {\n \"type\": \"row\",\n \"collapsed\": false,\n \"title\": \"Pool Overview\",\n \"gridPos\": {\n \"h\": 1,\n \"w\": 24,\n \"x\": 0,\n \"y\": 0\n }\n },\n {\n \"type\": \"gauge\",\n \"title\": \"Pool Capacity\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 6,\n \"x\": 0,\n \"y\": 1\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"zfs_pool_capacity_percent{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"fieldConfig\": {\n \
+ \ \"defaults\": {\n \"unit\": \"percent\",\n \"min\": 0,\n \"max\": 100,\n \"thresholds\": {\n \"mode\": \"absolute\",\n \"steps\": [\n {\n \"color\": \"green\",\n \"value\": 0\n },\n {\n \"color\": \"yellow\",\n \"value\": 70\n },\n {\n \"color\": \"red\",\n \"value\": 85\n }\n ]\n }\n }\n }\n },\n {\n \"type\": \"stat\",\n \"title\": \"Pool Health\",\n \"gridPos\": {\n \"h\": 4,\n \"w\": 6,\n \"x\": 6,\n \"y\": 1\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"zfs_pool_health{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"\
+ {{pool}}\"\n }\n ],\n \"options\": {\n \"reduceOptions\": {\n \"values\": false,\n \"calcs\": [\n \"lastNotNull\"\n ]\n },\n \"orientation\": \"auto\",\n \"textMode\": \"value_and_name\",\n \"colorMode\": \"background\",\n \"graphMode\": \"none\",\n \"justifyMode\": \"auto\"\n },\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"short\",\n \"mappings\": [\n {\n \"options\": {\n \"0\": {\n \"text\": \"ONLINE\",\n \"color\": \"green\"\n }\n },\n \"type\": \"value\"\n },\n {\n \"options\": {\n \"1\": {\n \"text\": \"DEGRADED\",\n \"color\": \"yellow\"\n }\n },\n \"type\": \"value\"\n },\n {\n\
+ \ \"options\": {\n \"2\": {\n \"text\": \"FAULTED\",\n \"color\": \"red\"\n }\n },\n \"type\": \"value\"\n }\n ],\n \"thresholds\": {\n \"mode\": \"absolute\",\n \"steps\": [\n {\n \"color\": \"green\",\n \"value\": null\n },\n {\n \"color\": \"yellow\",\n \"value\": 1\n },\n {\n \"color\": \"red\",\n \"value\": 2\n }\n ]\n }\n },\n \"overrides\": []\n }\n },\n {\n \"type\": \"stat\",\n \"title\": \"Total Pool Size\",\n \"gridPos\": {\n \"h\": 4,\n \"w\": 6,\n \"x\": 12,\n \"y\": 1\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\
+ \n },\n \"targets\": [\n {\n \"expr\": \"zfs_pool_size_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"options\": {\n \"reduceOptions\": {\n \"values\": false,\n \"calcs\": [\n \"lastNotNull\"\n ]\n },\n \"orientation\": \"auto\",\n \"textMode\": \"value_and_name\",\n \"colorMode\": \"value\",\n \"graphMode\": \"none\",\n \"justifyMode\": \"auto\"\n },\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"bytes\"\n },\n \"overrides\": []\n }\n },\n {\n \"type\": \"stat\",\n \"title\": \"Free Space\",\n \"gridPos\": {\n \"h\": 4,\n \"w\": 6,\n \"x\": 18,\n \"y\": 1\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n\
+ \ {\n \"expr\": \"zfs_pool_free_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"options\": {\n \"reduceOptions\": {\n \"values\": false,\n \"calcs\": [\n \"lastNotNull\"\n ]\n },\n \"orientation\": \"auto\",\n \"textMode\": \"value_and_name\",\n \"colorMode\": \"value\",\n \"graphMode\": \"none\",\n \"justifyMode\": \"auto\"\n },\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"bytes\"\n },\n \"overrides\": []\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"Pool Space Usage Over Time\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 6,\n \"y\": 5\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n\
+ \ \"expr\": \"zfs_pool_allocated_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"{{pool}} - Used\"\n },\n {\n \"expr\": \"zfs_pool_free_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"{{pool}} - Free\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"bytes\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\",\n \"stacking\": {\n \"mode\": \"normal\"\n }\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"Pool Capacity Trend\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 6,\n \"x\": 18,\n \"y\": 5\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"zfs_pool_capacity_percent{instance=\\\
+ \"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"percent\",\n \"min\": 0,\n \"max\": 100,\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"row\",\n \"collapsed\": false,\n \"title\": \"I/O Throughput\",\n \"gridPos\": {\n \"h\": 1,\n \"w\": 24,\n \"x\": 0,\n \"y\": 13\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"Read Operations Rate\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 0,\n \"y\": 14\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"rate(zfs_pool_read_operations_total{instance=\\\"$instance\\\
+ \",pool=~\\\"$pool\\\"}[5m])\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"ops\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"Write Operations Rate\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 12,\n \"y\": 14\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"rate(zfs_pool_write_operations_total{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}[5m])\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"ops\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"\
+ never\"\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"Read Throughput\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 0,\n \"y\": 22\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"rate(zfs_pool_read_bytes_total{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}[5m])\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"Bps\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"Write Throughput\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 12,\n \"y\": 22\n },\n \"datasource\": {\n \"type\"\
+ : \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"rate(zfs_pool_write_bytes_total{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}[5m])\",\n \"legendFormat\": \"{{pool}}\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"Bps\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"row\",\n \"collapsed\": false,\n \"title\": \"Dataset Statistics\",\n \"gridPos\": {\n \"h\": 1,\n \"w\": 24,\n \"x\": 0,\n \"y\": 30\n }\n },\n {\n \"type\": \"table\",\n \"title\": \"Datasets by Pool\",\n \"gridPos\": {\n \"h\": 10,\n \"w\": 24,\n \"x\": 0,\n \"y\": 31\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n \
+ \ },\n \"targets\": [\n {\n \"expr\": \"zfs_dataset_used_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"format\": \"table\",\n \"instant\": true,\n \"refId\": \"A\"\n },\n {\n \"expr\": \"zfs_dataset_available_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"format\": \"table\",\n \"instant\": true,\n \"refId\": \"B\"\n },\n {\n \"expr\": \"zfs_dataset_referenced_bytes{instance=\\\"$instance\\\",pool=~\\\"$pool\\\"}\",\n \"format\": \"table\",\n \"instant\": true,\n \"refId\": \"C\"\n }\n ],\n \"transformations\": [\n {\n \"id\": \"merge\",\n \"options\": {}\n },\n {\n \"id\": \"organize\",\n \"options\": {\n \"renameByName\": {\n \"dataset\": \"Dataset\",\n \"pool\": \"Pool\",\n \"Value #A\"\
+ : \"Used\",\n \"Value #B\": \"Available\",\n \"Value #C\": \"Referenced\"\n },\n \"excludeByName\": {\n \"__name__\": true,\n \"instance\": true,\n \"job\": true,\n \"os\": true,\n \"Time\": true\n }\n }\n }\n ],\n \"fieldConfig\": {\n \"overrides\": [\n {\n \"matcher\": {\n \"id\": \"byName\",\n \"options\": \"Used\"\n },\n \"properties\": [\n {\n \"id\": \"unit\",\n \"value\": \"bytes\"\n }\n ]\n },\n {\n \"matcher\": {\n \"id\": \"byName\",\n \"options\": \"Available\"\n },\n \"properties\": [\n {\n \"id\": \"unit\",\n \"value\": \"bytes\"\n }\n ]\n \
+ \ },\n {\n \"matcher\": {\n \"id\": \"byName\",\n \"options\": \"Referenced\"\n },\n \"properties\": [\n {\n \"id\": \"unit\",\n \"value\": \"bytes\"\n }\n ]\n }\n ]\n }\n },\n {\n \"type\": \"row\",\n \"collapsed\": false,\n \"title\": \"ARC Cache Statistics\",\n \"gridPos\": {\n \"h\": 1,\n \"w\": 24,\n \"x\": 0,\n \"y\": 41\n }\n },\n {\n \"type\": \"gauge\",\n \"title\": \"ARC Hit Rate\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 6,\n \"x\": 0,\n \"y\": 42\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arc_hit_rate_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Hit\
+ \ Rate\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"percent\",\n \"min\": 0,\n \"max\": 100,\n \"thresholds\": {\n \"mode\": \"absolute\",\n \"steps\": [\n {\n \"color\": \"red\",\n \"value\": 0\n },\n {\n \"color\": \"yellow\",\n \"value\": 70\n },\n {\n \"color\": \"green\",\n \"value\": 90\n }\n ]\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"ARC Size (Current, Target, Max)\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 6,\n \"y\": 42\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arcstats_c_max_bytes{instance=\\\
+ \"$instance\\\"}\",\n \"legendFormat\": \"Max Size\"\n },\n {\n \"expr\": \"node_zfs_arcstats_c_bytes{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Target Size\"\n },\n {\n \"expr\": \"node_zfs_arcstats_size_bytes{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Current Size\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"bytes\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"gauge\",\n \"title\": \"ARC Memory Usage %\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 6,\n \"x\": 18,\n \"y\": 42\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arc_memory_usage_percent{instance=\\\
+ \"$instance\\\"}\",\n \"legendFormat\": \"Memory Usage\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"percent\",\n \"min\": 0,\n \"max\": 100,\n \"thresholds\": {\n \"mode\": \"absolute\",\n \"steps\": [\n {\n \"color\": \"green\",\n \"value\": 0\n },\n {\n \"color\": \"yellow\",\n \"value\": 80\n },\n {\n \"color\": \"red\",\n \"value\": 95\n }\n ]\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"ARC Hits vs Misses (rate)\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 0,\n \"y\": 50\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\"\
+ : [\n {\n \"expr\": \"rate(node_zfs_arcstats_hits_total{instance=\\\"$instance\\\"}[5m])\",\n \"legendFormat\": \"Hits/sec\"\n },\n {\n \"expr\": \"rate(node_zfs_arcstats_misses_total{instance=\\\"$instance\\\"}[5m])\",\n \"legendFormat\": \"Misses/sec\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"ops\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"ARC Hit Rates by Type\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 12,\n \"y\": 50\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arc_demand_data_hit_rate_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\"\
+ : \"Data Hit Rate\"\n },\n {\n \"expr\": \"node_zfs_arc_demand_metadata_hit_rate_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Metadata Hit Rate\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"percent\",\n \"custom\": {\n \"fillOpacity\": 10,\n \"showPoints\": \"never\"\n }\n }\n }\n },\n {\n \"type\": \"row\",\n \"collapsed\": false,\n \"title\": \"ARC Breakdown\",\n \"gridPos\": {\n \"h\": 1,\n \"w\": 24,\n \"x\": 0,\n \"y\": 58\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"ARC Data vs Metadata\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 0,\n \"y\": 59\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"\
+ expr\": \"node_zfs_arcstats_data_bytes{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Data\"\n },\n {\n \"expr\": \"node_zfs_arcstats_meta_bytes{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Metadata\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"bytes\",\n \"custom\": {\n \"fillOpacity\": 20,\n \"showPoints\": \"never\",\n \"stacking\": {\n \"mode\": \"normal\"\n }\n }\n }\n }\n },\n {\n \"type\": \"timeseries\",\n \"title\": \"MRU vs MFU Cache Size\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 12,\n \"y\": 59\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arcstats_mru_bytes{instance=\\\"$instance\\\"}\",\n\
+ \ \"legendFormat\": \"MRU (Most Recently Used)\"\n },\n {\n \"expr\": \"node_zfs_arcstats_mfu_bytes{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"MFU (Most Frequently Used)\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"bytes\",\n \"custom\": {\n \"fillOpacity\": 20,\n \"showPoints\": \"never\",\n \"stacking\": {\n \"mode\": \"normal\"\n }\n }\n }\n }\n },\n {\n \"type\": \"piechart\",\n \"title\": \"ARC Composition (Data vs Metadata)\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 0,\n \"y\": 67\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arc_data_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\"\
+ : \"Data\"\n },\n {\n \"expr\": \"node_zfs_arc_metadata_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"Metadata\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"percent\"\n }\n },\n \"options\": {\n \"legend\": {\n \"displayMode\": \"table\",\n \"placement\": \"right\",\n \"showLegend\": true,\n \"values\": [\n \"value\"\n ]\n },\n \"pieType\": \"pie\"\n }\n },\n {\n \"type\": \"piechart\",\n \"title\": \"ARC Composition (MRU vs MFU)\",\n \"gridPos\": {\n \"h\": 8,\n \"w\": 12,\n \"x\": 12,\n \"y\": 67\n },\n \"datasource\": {\n \"type\": \"prometheus\",\n \"uid\": \"${datasource}\"\n },\n \"targets\": [\n {\n \"expr\": \"node_zfs_arc_mru_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\"\
+ : \"MRU (Recently Used)\"\n },\n {\n \"expr\": \"node_zfs_arc_mfu_percent{instance=\\\"$instance\\\"}\",\n \"legendFormat\": \"MFU (Frequently Used)\"\n }\n ],\n \"fieldConfig\": {\n \"defaults\": {\n \"unit\": \"percent\"\n }\n },\n \"options\": {\n \"legend\": {\n \"displayMode\": \"table\",\n \"placement\": \"right\",\n \"showLegend\": true,\n \"values\": [\n \"value\"\n ]\n },\n \"pieType\": \"pie\"\n }\n }\n ],\n \"time\": {\n \"from\": \"now-6h\",\n \"to\": \"now\"\n }\n}"
diff --git a/f3s/prometheus/zfs-recording-rules.yaml b/f3s/prometheus/zfs-recording-rules.yaml
new file mode 100644
index 0000000..c445ea7
--- /dev/null
+++ b/f3s/prometheus/zfs-recording-rules.yaml
@@ -0,0 +1,106 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: freebsd-zfs-rules
+ namespace: monitoring
+ labels:
+ release: prometheus
+spec:
+ groups:
+ # FreeBSD ZFS ARC metrics - calculated values for easier dashboard consumption
+ - name: freebsd-zfs-arc
+ interval: 30s
+ rules:
+ # ARC Hit Rate (percentage)
+ # Rate of hits divided by total requests (hits + misses)
+ - record: node_zfs_arc_hit_rate_percent
+ expr: |
+ 100 * (
+ rate(node_zfs_arcstats_hits_total{os="freebsd"}[5m]) /
+ (rate(node_zfs_arcstats_hits_total{os="freebsd"}[5m]) +
+ rate(node_zfs_arcstats_misses_total{os="freebsd"}[5m]))
+ )
+ labels:
+ os: freebsd
+
+ # ARC Memory Usage Percentage (current size vs maximum)
+ - record: node_zfs_arc_memory_usage_percent
+ expr: |
+ 100 * (
+ node_zfs_arcstats_size_bytes{os="freebsd"} /
+ node_zfs_arcstats_c_max_bytes{os="freebsd"}
+ )
+ labels:
+ os: freebsd
+
+ # ARC Target vs Max Ratio (how close to maximum target is)
+ - record: node_zfs_arc_target_percent
+ expr: |
+ 100 * (
+ node_zfs_arcstats_c_bytes{os="freebsd"} /
+ node_zfs_arcstats_c_max_bytes{os="freebsd"}
+ )
+ labels:
+ os: freebsd
+
+ # ARC Metadata Percentage (metadata vs total ARC size)
+ - record: node_zfs_arc_metadata_percent
+ expr: |
+ 100 * (
+ node_zfs_arcstats_meta_bytes{os="freebsd"} /
+ node_zfs_arcstats_size_bytes{os="freebsd"}
+ )
+ labels:
+ os: freebsd
+
+ # ARC Data Percentage (data vs total ARC size)
+ - record: node_zfs_arc_data_percent
+ expr: |
+ 100 * (
+ node_zfs_arcstats_data_bytes{os="freebsd"} /
+ node_zfs_arcstats_size_bytes{os="freebsd"}
+ )
+ labels:
+ os: freebsd
+
+ # MFU Percentage (Most Frequently Used vs total ARC)
+ - record: node_zfs_arc_mfu_percent
+ expr: |
+ 100 * (
+ node_zfs_arcstats_mfu_bytes{os="freebsd"} /
+ node_zfs_arcstats_size_bytes{os="freebsd"}
+ )
+ labels:
+ os: freebsd
+
+ # MRU Percentage (Most Recently Used vs total ARC)
+ - record: node_zfs_arc_mru_percent
+ expr: |
+ 100 * (
+ node_zfs_arcstats_mru_bytes{os="freebsd"} /
+ node_zfs_arcstats_size_bytes{os="freebsd"}
+ )
+ labels:
+ os: freebsd
+
+ # Demand Data Hit Rate (percentage)
+ - record: node_zfs_arc_demand_data_hit_rate_percent
+ expr: |
+ 100 * (
+ rate(node_zfs_arcstats_demand_data_hits_total{os="freebsd"}[5m]) /
+ (rate(node_zfs_arcstats_demand_data_hits_total{os="freebsd"}[5m]) +
+ rate(node_zfs_arcstats_demand_data_misses_total{os="freebsd"}[5m]))
+ )
+ labels:
+ os: freebsd
+
+ # Demand Metadata Hit Rate (percentage)
+ - record: node_zfs_arc_demand_metadata_hit_rate_percent
+ expr: |
+ 100 * (
+ rate(node_zfs_arcstats_demand_metadata_hits_total{os="freebsd"}[5m]) /
+ (rate(node_zfs_arcstats_demand_metadata_hits_total{os="freebsd"}[5m]) +
+ rate(node_zfs_arcstats_demand_metadata_misses_total{os="freebsd"}[5m]))
+ )
+ labels:
+ os: freebsd