diff options
| -rw-r--r-- | f3s/goprecords/docker-image/Justfile | 2 | ||||
| -rw-r--r-- | f3s/goprecords/helm-chart/templates/deployment.yaml | 2 | ||||
| -rw-r--r-- | f3s/prometheus/additional-scrape-configs.yaml | 1 | ||||
| -rw-r--r-- | f3s/prometheus/manifests/additional-scrape-configs-secret.yaml | 1 | ||||
| -rw-r--r-- | f3s/prometheus/manifests/freebsd-temperature-alerts.yaml | 48 | ||||
| -rw-r--r-- | frontends/Rexfile | 1 | ||||
| -rw-r--r-- | frontends/etc/hosts.wg.append | 2 |
7 files changed, 55 insertions, 2 deletions
diff --git a/f3s/goprecords/docker-image/Justfile b/f3s/goprecords/docker-image/Justfile index 7457f88..eeb642f 100644 --- a/f3s/goprecords/docker-image/Justfile +++ b/f3s/goprecords/docker-image/Justfile @@ -1,6 +1,6 @@ REGISTRY := "r0.lan.buetow.org:30001" IMAGE := "goprecords" -TAG := "0.5.1" +TAG := "0.5.2" SRC := "/home/paul/git/goprecords" build: diff --git a/f3s/goprecords/helm-chart/templates/deployment.yaml b/f3s/goprecords/helm-chart/templates/deployment.yaml index 1ae6529..b717afc 100644 --- a/f3s/goprecords/helm-chart/templates/deployment.yaml +++ b/f3s/goprecords/helm-chart/templates/deployment.yaml @@ -36,7 +36,7 @@ spec: readOnly: true containers: - name: goprecords - image: registry.lan.buetow.org:30001/goprecords:0.5.1 + image: registry.lan.buetow.org:30001/goprecords:0.5.2 imagePullPolicy: Always args: - -daemon diff --git a/f3s/prometheus/additional-scrape-configs.yaml b/f3s/prometheus/additional-scrape-configs.yaml index bb19367..5883e17 100644 --- a/f3s/prometheus/additional-scrape-configs.yaml +++ b/f3s/prometheus/additional-scrape-configs.yaml @@ -4,6 +4,7 @@ - '192.168.2.130:9100' # f0 via WireGuard - '192.168.2.131:9100' # f1 via WireGuard - '192.168.2.132:9100' # f2 via WireGuard + - '192.168.2.133:9100' # f3 via WireGuard labels: os: freebsd - targets: diff --git a/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml b/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml index bb89e4c..970a45a 100644 --- a/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml +++ b/f3s/prometheus/manifests/additional-scrape-configs-secret.yaml @@ -14,6 +14,7 @@ stringData: - '192.168.2.130:9100' # f0 via WireGuard - '192.168.2.131:9100' # f1 via WireGuard - '192.168.2.132:9100' # f2 via WireGuard + - '192.168.2.133:9100' # f3 via WireGuard labels: os: freebsd - targets: diff --git a/f3s/prometheus/manifests/freebsd-temperature-alerts.yaml b/f3s/prometheus/manifests/freebsd-temperature-alerts.yaml new file mode 100644 index 0000000..fe91115 --- /dev/null +++ b/f3s/prometheus/manifests/freebsd-temperature-alerts.yaml @@ -0,0 +1,48 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: freebsd-temperature-alerts + namespace: monitoring + labels: + release: prometheus +spec: + groups: + # CPU temperature alerting for the FreeBSD bhyve hosts (f0, f1, f2, f3). + # + # Metric: node_cpu_temperature_celsius{os="freebsd", cpu="N"} + # One reading per CPU core, exported by node_exporter (built with the + # FreeBSD temperature collector). + # + # The expression takes the hottest core per host (max by instance) and + # joins node_uname_info to attach a friendly `nodename` label (f0/f1/f2/f3) + # for the alert summary, since the raw series is only labelled by the + # WireGuard instance address. + - name: freebsd-temperature + interval: 30s + rules: + # Warning: hottest core at or above 80 C, sustained for 5 minutes. + # The Intel N100 throttles near ~100 C (TjMax 105 C), so 80 C is a + # "running hot" early warning, not an emergency. The 5m `for` avoids + # noise from brief load spikes (e.g. Prometheus TSDB compaction). + - alert: FreebsdCpuTemperatureHigh + expr: | + max by (instance) (node_cpu_temperature_celsius{os="freebsd"}) + * on (instance) group_left(nodename) node_uname_info{os="freebsd"} + >= 80 + for: 5m + labels: + severity: warning + component: thermal + annotations: + summary: "CPU temperature high on {{ $labels.nodename }} ({{ $value | printf \"%.0f\" }}C)" + description: > + FreeBSD host {{ $labels.nodename }} ({{ $labels.instance }}) has a + CPU core at {{ $value | printf "%.1f" }}C, at or above the 80C + warning threshold for more than 5 minutes. The Intel N100 throttles + near 100C, so this is an early warning that the host is running hot. + action: > + Check load: ssh paul@{{ $labels.nodename }} "sh -c 'sysctl dev.cpu | grep temperature; uptime; top -b -n1 | head -12'" + The usual cause is the host's bhyve guest (k3s node) carrying more + CPU than its peers. Rebalance heavy pods off the busiest k3s node + (cordon the node, delete the heavy pods so they reschedule, then + uncordon) to even out load and lower the temperature. diff --git a/frontends/Rexfile b/frontends/Rexfile index 2510dd0..9327402 100644 --- a/frontends/Rexfile +++ b/frontends/Rexfile @@ -108,6 +108,7 @@ our %wg0_ips = ( 'r0' => { '4' => '192.168.2.120', '6' => 'fd42:beef:cafe:2::120' }, 'r1' => { '4' => '192.168.2.121', '6' => 'fd42:beef:cafe:2::121' }, 'r2' => { '4' => '192.168.2.122', '6' => 'fd42:beef:cafe:2::122' }, + 'rocky' => { '4' => '192.168.2.123', '6' => 'fd42:beef:cafe:2::123' }, 'pi0' => { '4' => '192.168.2.203', '6' => 'fd42:beef:cafe:2::203' }, 'pi1' => { '4' => '192.168.2.204', '6' => 'fd42:beef:cafe:2::204' }, ); diff --git a/frontends/etc/hosts.wg.append b/frontends/etc/hosts.wg.append index 3da6a50..bfda7cd 100644 --- a/frontends/etc/hosts.wg.append +++ b/frontends/etc/hosts.wg.append @@ -4,6 +4,7 @@ 192.168.2.120 r0.wg0.wan.buetow.org r0.wg0 192.168.2.121 r1.wg0.wan.buetow.org r1.wg0 192.168.2.122 r2.wg0.wan.buetow.org r2.wg0 +192.168.2.123 rocky.wg0.wan.buetow.org rocky.wg0 192.168.2.130 f0.wg0.wan.buetow.org f0.wg0 192.168.2.131 f1.wg0.wan.buetow.org f1.wg0 192.168.2.132 f2.wg0.wan.buetow.org f2.wg0 @@ -17,6 +18,7 @@ fd42:beef:cafe:2::111 fishfinger.wg0.wan.buetow.org fishfinger.wg0 fd42:beef:cafe:2::120 r0.wg0.wan.buetow.org r0.wg0 fd42:beef:cafe:2::121 r1.wg0.wan.buetow.org r1.wg0 fd42:beef:cafe:2::122 r2.wg0.wan.buetow.org r2.wg0 +fd42:beef:cafe:2::123 rocky.wg0.wan.buetow.org rocky.wg0 fd42:beef:cafe:2::130 f0.wg0.wan.buetow.org f0.wg0 fd42:beef:cafe:2::131 f1.wg0.wan.buetow.org f1.wg0 fd42:beef:cafe:2::132 f2.wg0.wan.buetow.org f2.wg0 |
