diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-13 20:15:28 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-13 20:15:28 +0300 |
| commit | 4b583f7d4ee660b7b2e74603764d5e121c7ca549 (patch) | |
| tree | 981912d7880c565647f334c0b6596a4f4af535bf /f3s | |
| parent | 7783d2b3fef3a49c0115ca5a2b2b0c0664dd2d4d (diff) | |
f3s/beets-art: nightly k3s CronJob to fetch+embed cover art for Navidrome
Adds a beets-based CronJob that runs every night on r1 (where the
Navidrome music PVC lives), fetching external cover.jpg into each album
folder and embedding art into audio files. Idempotent on re-runs:
- import.incremental skips already-known album folders
- fetchart skips albums that already have cover art
- embedart with ifempty:no + compare_threshold:50 only fills missing
embeds and refuses risky overwrites
Navidrome picks new art up via its existing 1h scan; no Navidrome change
required. Reuses navidrome-music-pvc directly (RWO is fine because both
pods pin to r1 via nodeSelector). State (library.db, logs) lives on a
small local-path PVC, regenerable by deleting the PVC.
Files: f3s/beets-art/helm-chart/{Chart.yaml,README.md,templates/*.yaml}
f3s/beets-art/Justfile (status, logs, run-now, suspend, resume, shell)
f3s/argocd-apps/services/beets-art.yaml
Amp-Thread-ID: https://ampcode.com/threads/T-019e223a-d137-705e-879b-84130c0e78ea
Co-authored-by: Amp <amp@ampcode.com>
Diffstat (limited to 'f3s')
| -rw-r--r-- | f3s/argocd-apps/services/beets-art.yaml | 28 | ||||
| -rw-r--r-- | f3s/beets-art/Justfile | 66 | ||||
| -rw-r--r-- | f3s/beets-art/helm-chart/Chart.yaml | 5 | ||||
| -rw-r--r-- | f3s/beets-art/helm-chart/README.md | 108 | ||||
| -rw-r--r-- | f3s/beets-art/helm-chart/templates/configmap.yaml | 59 | ||||
| -rw-r--r-- | f3s/beets-art/helm-chart/templates/cronjob.yaml | 117 | ||||
| -rw-r--r-- | f3s/beets-art/helm-chart/templates/persistent-volume.yaml | 20 |
7 files changed, 403 insertions, 0 deletions
diff --git a/f3s/argocd-apps/services/beets-art.yaml b/f3s/argocd-apps/services/beets-art.yaml new file mode 100644 index 0000000..1d544d3 --- /dev/null +++ b/f3s/argocd-apps/services/beets-art.yaml @@ -0,0 +1,28 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: beets-art + namespace: cicd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: http://git-server.cicd.svc.cluster.local/conf.git + targetRevision: master + path: f3s/beets-art/helm-chart + destination: + server: https://kubernetes.default.svc + namespace: services + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=false + retry: + limit: 3 + backoff: + duration: 5s + factor: 2 + maxDuration: 1m diff --git a/f3s/beets-art/Justfile b/f3s/beets-art/Justfile new file mode 100644 index 0000000..4313a55 --- /dev/null +++ b/f3s/beets-art/Justfile @@ -0,0 +1,66 @@ +NAMESPACE := "services" +APP_NAME := "beets-art" +CRONJOB := "beets-art" + +# Show CronJob + recent Job status +status: + @echo "=== CronJob ===" + @kubectl get cronjob -n {{NAMESPACE}} {{CRONJOB}} + @echo "" + @echo "=== Recent Jobs ===" + @kubectl get jobs -n {{NAMESPACE}} -l app.kubernetes.io/name={{APP_NAME}} --sort-by=.metadata.creationTimestamp 2>/dev/null || \ + kubectl get jobs -n {{NAMESPACE}} | grep {{CRONJOB}} || true + @echo "" + @echo "=== State PVC ===" + @kubectl get pvc -n {{NAMESPACE}} beets-art-state-pvc + @echo "" + @echo "=== ArgoCD ===" + @kubectl get application {{APP_NAME}} -n cicd -o jsonpath='Sync: {.status.sync.status}, Health: {.status.health.status}' 2>/dev/null && echo "" || echo "Not found" + +# Logs of the most recent Job (follow if still running) +logs: + #!/usr/bin/env bash + set -euo pipefail + job=$(kubectl get jobs -n {{NAMESPACE}} -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\n"}{end}' \ + | grep '^{{CRONJOB}}-' | sort -k2 | tail -1 | cut -f1) + if [ -z "$job" ]; then + echo "No {{CRONJOB}} jobs yet." + exit 0 + fi + echo "=== logs for $job ===" + kubectl logs -n {{NAMESPACE}} job/"$job" -f + +# Trigger an ad-hoc run *right now* (does not affect the schedule). +# Useful for the initial backfill: the first run will be the slow one. +run-now: + @ts=$$(date +%s); \ + kubectl create job --from=cronjob/{{CRONJOB}} -n {{NAMESPACE}} {{CRONJOB}}-manual-$$ts; \ + echo "Created job {{CRONJOB}}-manual-$$ts. Tail with: just logs" + +# Pause the schedule (e.g. while debugging or doing a one-off pass) +suspend: + kubectl patch cronjob -n {{NAMESPACE}} {{CRONJOB}} -p '{"spec":{"suspend":true}}' + +# Resume the schedule +resume: + kubectl patch cronjob -n {{NAMESPACE}} {{CRONJOB}} -p '{"spec":{"suspend":false}}' + +# Trigger ArgoCD sync +sync: + @echo "Triggering ArgoCD sync..." + @kubectl annotate application {{APP_NAME}} -n cicd argocd.argoproj.io/refresh=normal --overwrite + @sleep 2 + @kubectl get application {{APP_NAME}} -n cicd -o jsonpath='Sync: {.status.sync.status}, Health: {.status.health.status}' && echo "" + +# Show ArgoCD application details +argocd-status: + argocd app get {{APP_NAME}} --core + +# Open a transient interactive beets shell in a pod that mounts the same +# music + state volumes. Handy for debugging (e.g. `beet ls`, +# `beet fetchart -- album:Foo`, `beet embedart -f cover.jpg -- album:Foo`). +shell: + kubectl run beets-art-shell -n {{NAMESPACE}} \ + --rm -it --restart=Never \ + --image=lscr.io/linuxserver/beets:latest \ + --overrides='{"spec":{"nodeSelector":{"kubernetes.io/hostname":"r1.lan.buetow.org"},"containers":[{"name":"beets","image":"lscr.io/linuxserver/beets:latest","stdin":true,"tty":true,"command":["/bin/sh"],"env":[{"name":"BEETSDIR","value":"/etc/beets"}],"volumeMounts":[{"name":"music","mountPath":"/music"},{"name":"state","mountPath":"/state"},{"name":"config","mountPath":"/etc/beets","readOnly":true}]}],"volumes":[{"name":"music","persistentVolumeClaim":{"claimName":"navidrome-music-pvc"}},{"name":"state","persistentVolumeClaim":{"claimName":"beets-art-state-pvc"}},{"name":"config","configMap":{"name":"beets-art-config"}}]}}' diff --git a/f3s/beets-art/helm-chart/Chart.yaml b/f3s/beets-art/helm-chart/Chart.yaml new file mode 100644 index 0000000..b086f51 --- /dev/null +++ b/f3s/beets-art/helm-chart/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: beets-art +description: Nightly beets fetchart + embedart sweep for the Navidrome music library. +version: 0.1.0 +appVersion: "latest" diff --git a/f3s/beets-art/helm-chart/README.md b/f3s/beets-art/helm-chart/README.md new file mode 100644 index 0000000..e15c6db --- /dev/null +++ b/f3s/beets-art/helm-chart/README.md @@ -0,0 +1,108 @@ +# beets-art: nightly cover-art sweep for Navidrome + +A Kubernetes CronJob that runs `beets` against the Navidrome music library +to fetch external cover art (`cover.jpg` in each album folder) and embed +art into every audio file. Navidrome's own hourly scan +(`ND_SCANSCHEDULE: "1h"`, see the [navidrome chart](../../navidrome/helm-chart/templates/deployment.yaml)) +then picks the new art up. + +## What it does + +Every night at 03:30 UTC the CronJob runs three idempotent steps inside a +single short-lived pod on r1: + +1. `beet import -A -q --quiet-fallback=asis /music` — registers any new + albums in the beets library (`incremental: yes` skips already-known + folders). With `auto: yes` set on both `fetchart` and `embedart`, this + step alone is enough for newly imported music. +2. `beet fetchart` — backfill pass for albums that previously failed art + lookup (rate limit, network blip, missing MBID). Idempotent: skips + albums that already have art. +3. `beet embedart` — embeds art into audio files where the picture frame + is missing. `compare_threshold: 50` and `ifempty: no` make it refuse + to overwrite existing embeds. + +## Why a CronJob (not a host systemd timer) + +* Declarative: lives in git, deployed by ArgoCD, same pattern as every + other f3s service. +* Sandbox: beets, ImageMagick, and ffmpeg dependencies are pinned to the + container image — no host pollution, no Python venv drift on r1. +* Restart safety: if the job dies mid-run, k8s reaps it and the next run + picks up where it left off (`incremental: yes`). + +## Storage + +* `navidrome-music-pvc` (200 GiB hostPath PV at + `/data/nfs/k3svolumes/navidrome/music`) — **shared with Navidrome**. + Mounted at `/music` inside the CronJob pod. RWO is fine because both + pods are pinned to r1 (multi-pod single-node mounts are allowed for + RWO PVCs). +* `beets-art-state-pvc` (2 GiB local-path PVC) — holds `library.db` and + `import.log` between runs. Regenerable: deleting it just forces the + next run to re-import the whole library. +* `beets-art-config` ConfigMap — mounted at `/etc/beets/config.yaml` + (read-only) via `BEETSDIR=/etc/beets`. Single source of truth for + beets settings. + +## First run (initial backfill) + +The first run will be **slow** (potentially hours) because: + +* It has to import every album into the beets DB. +* Every album with no embedded art triggers a Cover Art Archive lookup. +* Every audio file gets rewritten to embed the new picture. + +Trigger it manually instead of waiting for the nightly schedule: + +```bash +just run-now +just logs # tail the job +``` + +While the backfill runs, ZFS-snapshot the music dataset first so the +embed pass is reversible. Subsequent nightly runs touch only new or +previously-failed albums and finish in minutes. + +## Operations + +```bash +just status # CronJob, recent Jobs, PVC, ArgoCD state +just logs # follow logs of the most recent Job +just run-now # ad-hoc Job from the CronJob template +just suspend # pause the schedule +just resume # resume the schedule +just shell # interactive beets shell with the same mounts +just sync # trigger ArgoCD sync +``` + +## Tuning knobs + +In [templates/configmap.yaml](templates/configmap.yaml): + +* `embedart.maxwidth` — embedded image cap (default 1200 px). Lower for + smaller files. +* `embedart.compare_threshold` — 0 = always overwrite, 100 = never. + Default 50 is "overwrite only when the new art is broadly similar". +* `fetchart.sources` — order of art providers. Cover Art Archive first + exploits MusicBrainz IDs already present from beets imports. +* `embedart.remove_art_file: no` — keep `cover.jpg` in album folders too; + Navidrome and cmus both benefit. + +In [templates/cronjob.yaml](templates/cronjob.yaml): + +* `schedule` — change from nightly if you want a different cadence. +* `activeDeadlineSeconds` — current cap is 6 h; raise for the very first + run if your library is huge. + +## Failure handling + +* Job exits non-zero → visible in `just status` and `kubectl get jobs`. + Inspect with `just logs`. Common causes: NFS hiccup, Cover Art Archive + rate limit. The next nightly run usually clears it. +* Wrong art picked for an album → `just shell`, then + `beet mbsync album:"Bad Album"` to refresh the MBID and re-fetch, or + drop a hand-sourced `cover.jpg` into the album folder and run + `beet embedart -f cover.jpg -- album:"Bad Album"`. +* Library DB corruption → delete the `beets-art-state-pvc` (state is + regenerable) and let the next run rebuild it. diff --git a/f3s/beets-art/helm-chart/templates/configmap.yaml b/f3s/beets-art/helm-chart/templates/configmap.yaml new file mode 100644 index 0000000..887def5 --- /dev/null +++ b/f3s/beets-art/helm-chart/templates/configmap.yaml @@ -0,0 +1,59 @@ +# Beets configuration injected as /etc/beets/config.yaml inside the CronJob +# container (BEETSDIR=/etc/beets). The library DB lives on the PVC at +# /state/library.db so it survives across runs while the config itself stays +# declarative in git. +# +# Design notes: +# - directory: /music is the same path Navidrome sees (hostPath PV mount). +# - import.copy/move: no -> never relocate Navidrome's tree; only register +# paths and (with auto fetchart/embedart) attach art. +# - import.write: yes -> let beets write tags it learns (mostly a no-op +# here because we pass -A on the CLI to skip autotagging). +# - import.incremental: yes -> beets remembers which directories it has +# already imported, so subsequent runs only touch new albums. +# - fetchart/embedart auto: yes -> the import pass alone covers new music. +# The explicit `beet fetchart`/`beet embedart` invocations in the +# CronJob command are the *backfill* sweep for albums that previously +# failed art lookup. +# - embedart.compare_threshold: 50 -> refuse to overwrite an existing +# embed that diverges wildly from the new candidate (guards against +# accidental "wrong cover stomps right cover"). +# - embedart.remove_art_file: no -> keep cover.jpg in the album folder +# too; Navidrome serves it directly without decoding tags, and cmus / +# file managers benefit. +apiVersion: v1 +kind: ConfigMap +metadata: + name: beets-art-config + namespace: services +data: + config.yaml: | + directory: /music + library: /state/library.db + + plugins: fetchart embedart + + import: + copy: no + move: no + write: yes + incremental: yes + quiet_fallback: asis + detail: no + log: /state/import.log + + fetchart: + auto: yes + cautious: yes + cover_names: cover front folder album + sources: coverart itunes amazon albumart + maxwidth: 1200 + enforce_ratio: no + store_source: yes + + embedart: + auto: yes + maxwidth: 1200 + compare_threshold: 50 + ifempty: no + remove_art_file: no diff --git a/f3s/beets-art/helm-chart/templates/cronjob.yaml b/f3s/beets-art/helm-chart/templates/cronjob.yaml new file mode 100644 index 0000000..6f969e9 --- /dev/null +++ b/f3s/beets-art/helm-chart/templates/cronjob.yaml @@ -0,0 +1,117 @@ +# beets-art: nightly sweep that fetches and embeds cover art for every +# album in the Navidrome music library, then lets Navidrome's own hourly +# scan (ND_SCANSCHEDULE=1h) pick the changes up. +# +# Why this works alongside Navidrome: +# - The music PVC is RWO. Kubernetes RWO permits multi-pod mounts as long +# as all pods land on the *same node*. Both Navidrome and this CronJob +# pin to r1 via nodeSelector, so concurrent mounts are fine. +# - Navidrome and beets only conflict on file *write* if they touch the +# same file at the same instant. Navidrome writes nothing under /music +# (read-only consumer); beets writes art into album folders and embeds +# into audio files. No real contention. +# +# Why it is safe to re-run forever: +# - import.incremental: yes -> already-imported album folders are skipped. +# - fetchart skips albums that already have a cover image. +# - embedart with ifempty: no + compare_threshold: 50 only embeds where +# missing, and refuses risky overwrites. +apiVersion: batch/v1 +kind: CronJob +metadata: + name: beets-art + namespace: services +spec: + # 03:30 UTC nightly: off-peak, well before Navidrome's next scan slot, + # leaves headroom for a long first-run backfill. + schedule: "30 3 * * *" + # Forbid stacking: a long backfill run must not get a second worker on + # top of it (would race on the SQLite library DB). + concurrencyPolicy: Forbid + startingDeadlineSeconds: 300 + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + # Do not auto-retry: art-fetch failures are usually rate-limit or + # network blips that resolve by the next nightly run. Spamming + # retries would just hammer Cover Art Archive. + backoffLimit: 0 + # Cap a runaway run at 6h. The first backfill of a 200 GB library + # may take a few hours; steady state is minutes. + activeDeadlineSeconds: 21600 + template: + spec: + restartPolicy: Never + nodeSelector: + kubernetes.io/hostname: r1.lan.buetow.org + containers: + - name: beets + image: lscr.io/linuxserver/beets:latest + imagePullPolicy: IfNotPresent + env: + # Tell beets to read config from the ConfigMap mount instead + # of its default ~/.config/beets, so the PVC only stores + # mutable state (library.db, logs). + - name: BEETSDIR + value: /etc/beets + # Override the linuxserver s6 entrypoint; we just need the + # `beet` CLI for a one-shot job. Running as root (the image + # default when s6 is bypassed) so we can write into the NFS + # music tree, which Navidrome also writes as root. + command: ["/bin/sh", "-c"] + args: + - | + set -u + echo "=== $(date -u) beets-art sweep starting ===" + + # 1. Register any new albums. -A skips autotag (we trust + # the existing tags); auto fetchart/embedart fire here + # for newly imported albums via config. + echo "--- import (incremental) ---" + beet import -A -q --quiet-fallback=asis /music || \ + echo "import returned non-zero (continuing)" + + # 2. Backfill external cover.jpg for albums missing it. + echo "--- fetchart (backfill) ---" + beet fetchart || echo "fetchart returned non-zero (continuing)" + + # 3. Embed art into audio files where missing. + echo "--- embedart (backfill) ---" + beet embedart || echo "embedart returned non-zero (continuing)" + + echo "=== $(date -u) beets-art sweep finished ===" + volumeMounts: + - name: music + mountPath: /music + - name: state + mountPath: /state + - name: config + mountPath: /etc/beets + readOnly: true + - name: tmp + mountPath: /tmp + resources: + # Generous because ImageMagick + ffprobe + SQLite scans can + # spike. Tighten after observing real usage. + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: "2" + memory: 1Gi + volumes: + - name: music + persistentVolumeClaim: + # Reuse the existing Navidrome music PVC — single source of + # truth for the library tree. RWO is OK because both pods + # are pinned to r1. + claimName: navidrome-music-pvc + - name: state + persistentVolumeClaim: + claimName: beets-art-state-pvc + - name: config + configMap: + name: beets-art-config + - name: tmp + emptyDir: {} diff --git a/f3s/beets-art/helm-chart/templates/persistent-volume.yaml b/f3s/beets-art/helm-chart/templates/persistent-volume.yaml new file mode 100644 index 0000000..0b6c422 --- /dev/null +++ b/f3s/beets-art/helm-chart/templates/persistent-volume.yaml @@ -0,0 +1,20 @@ +# beets-art-state-pvc: small local-path PVC on r1 for the beets SQLite DB, +# logs, and any temp working files. Same reasoning as navidrome-data-pvc: +# - SQLite over NFS is fragile under concurrent locks. +# - The CronJob is pinned to r1 (see cronjob.yaml nodeSelector), and the +# local-path provisioner pins the PV to whichever node first consumes +# it (WaitForFirstConsumer), so r1 is always the home. +# Contents are essentially regenerable: deleting this PVC just forces the +# next run to re-import the library (slower one-off, then incremental again). +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: beets-art-state-pvc + namespace: services +spec: + storageClassName: local-path + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi |
