summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--f3s/goprecords/docker-image/Justfile2
-rw-r--r--f3s/goprecords/helm-chart/templates/deployment.yaml2
-rw-r--r--f3s/prometheus/additional-scrape-configs.yaml1
-rw-r--r--f3s/prometheus/manifests/additional-scrape-configs-secret.yaml1
-rw-r--r--f3s/prometheus/manifests/freebsd-temperature-alerts.yaml48
-rw-r--r--frontends/Rexfile1
-rw-r--r--frontends/etc/hosts.wg.append2
7 files changed, 55 insertions, 2 deletions
diff --git a/f3s/goprecords/docker-image/Justfile b/f3s/goprecords/docker-image/Justfile
index 7457f88..eeb642f 100644
--- a/f3s/goprecords/docker-image/Justfile
+++ b/f3s/goprecords/docker-image/Justfile
@@ -1,6 +1,6 @@
REGISTRY := "r0.lan.buetow.org:30001"
IMAGE := "goprecords"
-TAG := "0.5.1"
+TAG := "0.5.2"
SRC := "/home/paul/git/goprecords"
build:
diff --git a/f3s/goprecords/helm-chart/templates/deployment.yaml b/f3s/goprecords/helm-chart/templates/deployment.yaml
index 1ae6529..b717afc 100644
--- a/f3s/goprecords/helm-chart/templates/deployment.yaml
+++ b/f3s/goprecords/helm-chart/templates/deployment.yaml
@@ -36,7 +36,7 @@ spec:
readOnly: true
containers:
- name: goprecords
- image: registry.lan.buetow.org:30001/goprecords:0.5.1
+ image: registry.lan.buetow.org:30001/goprecords:0.5.2
imagePullPolicy: Always
args:
- -daemon
diff --git a/f3s/prometheus/additional-scrape-configs.yaml b/f3s/prometheus/additional-scrape-configs.yaml
index bb19367..5883e17 100644
--- a/f3s/prometheus/additional-scrape-configs.yaml
+++ b/f3s/prometheus/additional-scrape-configs.yaml
@@ -4,6 +4,7 @@
- '192.168.2.130:9100' # f0 via WireGuard
- '192.168.2.131:9100' # f1 via WireGuard
- '192.168.2.132:9100' # f2 via WireGuard
+ - '192.168.2.133:9100' # f3 via WireGuard
labels:
os: freebsd
- targets:
diff --git a/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml b/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml
index bb89e4c..970a45a 100644
--- a/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml
+++ b/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml
@@ -14,6 +14,7 @@ stringData:
- '192.168.2.130:9100' # f0 via WireGuard
- '192.168.2.131:9100' # f1 via WireGuard
- '192.168.2.132:9100' # f2 via WireGuard
+ - '192.168.2.133:9100' # f3 via WireGuard
labels:
os: freebsd
- targets:
diff --git a/f3s/prometheus/manifests/freebsd-temperature-alerts.yaml b/f3s/prometheus/manifests/freebsd-temperature-alerts.yaml
new file mode 100644
index 0000000..fe91115
--- /dev/null
+++ b/f3s/prometheus/manifests/freebsd-temperature-alerts.yaml
@@ -0,0 +1,48 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: freebsd-temperature-alerts
+ namespace: monitoring
+ labels:
+ release: prometheus
+spec:
+ groups:
+ # CPU temperature alerting for the FreeBSD bhyve hosts (f0, f1, f2, f3).
+ #
+ # Metric: node_cpu_temperature_celsius{os="freebsd", cpu="N"}
+ # One reading per CPU core, exported by node_exporter (built with the
+ # FreeBSD temperature collector).
+ #
+ # The expression takes the hottest core per host (max by instance) and
+ # joins node_uname_info to attach a friendly `nodename` label (f0/f1/f2/f3)
+ # for the alert summary, since the raw series is only labelled by the
+ # WireGuard instance address.
+ - name: freebsd-temperature
+ interval: 30s
+ rules:
+ # Warning: hottest core at or above 80 C, sustained for 5 minutes.
+ # The Intel N100 throttles near ~100 C (TjMax 105 C), so 80 C is a
+ # "running hot" early warning, not an emergency. The 5m `for` avoids
+ # noise from brief load spikes (e.g. Prometheus TSDB compaction).
+ - alert: FreebsdCpuTemperatureHigh
+ expr: |
+ max by (instance) (node_cpu_temperature_celsius{os="freebsd"})
+ * on (instance) group_left(nodename) node_uname_info{os="freebsd"}
+ >= 80
+ for: 5m
+ labels:
+ severity: warning
+ component: thermal
+ annotations:
+ summary: "CPU temperature high on {{ $labels.nodename }} ({{ $value | printf \"%.0f\" }}C)"
+ description: >
+ FreeBSD host {{ $labels.nodename }} ({{ $labels.instance }}) has a
+ CPU core at {{ $value | printf "%.1f" }}C, at or above the 80C
+ warning threshold for more than 5 minutes. The Intel N100 throttles
+ near 100C, so this is an early warning that the host is running hot.
+ action: >
+ Check load: ssh paul@{{ $labels.nodename }} "sh -c 'sysctl dev.cpu | grep temperature; uptime; top -b -n1 | head -12'"
+ The usual cause is the host's bhyve guest (k3s node) carrying more
+ CPU than its peers. Rebalance heavy pods off the busiest k3s node
+ (cordon the node, delete the heavy pods so they reschedule, then
+ uncordon) to even out load and lower the temperature.
diff --git a/frontends/Rexfile b/frontends/Rexfile
index 2510dd0..9327402 100644
--- a/frontends/Rexfile
+++ b/frontends/Rexfile
@@ -108,6 +108,7 @@ our %wg0_ips = (
'r0' => { '4' => '192.168.2.120', '6' => 'fd42:beef:cafe:2::120' },
'r1' => { '4' => '192.168.2.121', '6' => 'fd42:beef:cafe:2::121' },
'r2' => { '4' => '192.168.2.122', '6' => 'fd42:beef:cafe:2::122' },
+ 'rocky' => { '4' => '192.168.2.123', '6' => 'fd42:beef:cafe:2::123' },
'pi0' => { '4' => '192.168.2.203', '6' => 'fd42:beef:cafe:2::203' },
'pi1' => { '4' => '192.168.2.204', '6' => 'fd42:beef:cafe:2::204' },
);
diff --git a/frontends/etc/hosts.wg.append b/frontends/etc/hosts.wg.append
index 3da6a50..bfda7cd 100644
--- a/frontends/etc/hosts.wg.append
+++ b/frontends/etc/hosts.wg.append
@@ -4,6 +4,7 @@
192.168.2.120 r0.wg0.wan.buetow.org r0.wg0
192.168.2.121 r1.wg0.wan.buetow.org r1.wg0
192.168.2.122 r2.wg0.wan.buetow.org r2.wg0
+192.168.2.123 rocky.wg0.wan.buetow.org rocky.wg0
192.168.2.130 f0.wg0.wan.buetow.org f0.wg0
192.168.2.131 f1.wg0.wan.buetow.org f1.wg0
192.168.2.132 f2.wg0.wan.buetow.org f2.wg0
@@ -17,6 +18,7 @@ fd42:beef:cafe:2::111 fishfinger.wg0.wan.buetow.org fishfinger.wg0
fd42:beef:cafe:2::120 r0.wg0.wan.buetow.org r0.wg0
fd42:beef:cafe:2::121 r1.wg0.wan.buetow.org r1.wg0
fd42:beef:cafe:2::122 r2.wg0.wan.buetow.org r2.wg0
+fd42:beef:cafe:2::123 rocky.wg0.wan.buetow.org rocky.wg0
fd42:beef:cafe:2::130 f0.wg0.wan.buetow.org f0.wg0
fd42:beef:cafe:2::131 f1.wg0.wan.buetow.org f1.wg0
fd42:beef:cafe:2::132 f2.wg0.wan.buetow.org f2.wg0