nfs-mount-monitor: switch to soft NFS mount + handle stale lockfile

A hard NFS mount that fails enters uninterruptible kernel sleep (D-state) which SIGKILL cannot wake, so the recovery script hangs forever and the lockfile stays — silently disabling all subsequent health checks. Switch the remount to explicit soft,timeo=50,retrans=3 so the kernel gives up after ~15s, and detect/remove lockfiles older than 90s left behind by a SIGKILL'd predecessor.
author: Paul Buetow <paul@buetow.org> 2026-05-16 15:21:54 +0300
committer: Paul Buetow <paul@buetow.org> 2026-05-16 15:21:54 +0300
commit: 98217b5ab29265d2662bebf0a1d946eaead80dbd (patch)
tree: 4c3a63b124e03dd46666c7bf85be2558cfbe4e80
parent: f633a65a3592805f8d458c8d2cbe979e63484c02 (diff)
1 files changed, 26 insertions, 6 deletions
diff --git a/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh b/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh
index 882b982..a2800e8 100644
--- a/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh
+++ b/f3s/r-nodes/nfs-mount-monitor/check-nfs-mount.sh
@@ -23,7 +23,15 @@
 #   3. umount -f            (force unmount)
 #   4. umount -l            (lazy detach VFS node if -f failed)
 #   5. systemctl restart stunnel + 2s sleep (refresh the TLS transport)
-#   6. mount                (fresh mount via stunnel)
+#   6. mount with soft+short-timeo (NFS-specific; avoids infinite kernel D-state)
+#
+# Step 6 uses explicit soft NFS options instead of `mount $MOUNT_POINT`
+# (which would read the fstab `hard` flag).  A `hard` NFS mount that fails
+# to reach the server enters uninterruptible kernel sleep (D-state), and
+# SIGKILL cannot wake a D-state process on Linux — the script would hang
+# indefinitely, leaving the lock file stale.  With `soft,timeo=50,retrans=3`
+# the kernel gives up after ~15s and returns ETIMEDOUT, allowing the fail
+# counter to increment and eventually trigger the reboot escalation.
 #
 # A hard 60-second deadline is enforced so the function can never outlast
 # its own timer interval (10s) by more than 6x, preventing timer pile-up.
@@ -61,9 +69,19 @@ NFS_FAIL_THRESHOLD=5
 # shellcheck source=/etc/default/nfs-mount-monitor
 [ -f /etc/default/nfs-mount-monitor ] && . /etc/default/nfs-mount-monitor
 
-# Use a lock file to prevent concurrent runs (timer fires every 10 s)
+# Use a lock file to prevent concurrent runs (timer fires every 10 s).
+# If the lock is older than MAX_LOCK_AGE_SECS it was left by a run that was
+# SIGKILL'd before its EXIT trap could clean up (e.g. systemd kills the
+# process after its own timeout, bypassing the trap).  Remove the stale lock
+# and continue rather than silently skipping all health checks forever.
+MAX_LOCK_AGE_SECS=90
 if [ -f "$LOCK_FILE" ]; then
-    exit 0
+    lock_age=$(( $(date +%s) - $(stat -c %Y "$LOCK_FILE") ))
+    if (( lock_age < MAX_LOCK_AGE_SECS )); then
+        exit 0
+    fi
+    echo "Stale lock file detected (age=${lock_age}s > ${MAX_LOCK_AGE_SECS}s) — removing and continuing"
+    rm -f "$LOCK_FILE"
 fi
 touch "$LOCK_FILE"
 trap "rm -f $LOCK_FILE" EXIT
@@ -187,7 +205,8 @@ fix_mount () {
         echo "$MOUNT_POINT is still a valid mountpoint after remount; trying fresh mount"
     else
         echo "$MOUNT_POINT is not a valid mountpoint — attempting direct mount"
-        if mount "$MOUNT_POINT" 2>/dev/null; then
+        if mount -t nfs4 -o port=2323,soft,timeo=50,retrans=3 \
+               127.0.0.1:/k3svolumes "$MOUNT_POINT" 2>/dev/null; then
             echo "Successfully mounted $MOUNT_POINT"
             MOUNT_FIXED=1
             return 0
@@ -235,9 +254,10 @@ fix_mount () {
 
     check_deadline || return 1
 
-    # --- Step 6: fresh mount ---
+    # --- Step 6: fresh mount (soft options — see top comment for rationale) ---
     echo "Attempting to mount $MOUNT_POINT"
-    if mount "$MOUNT_POINT" 2>/dev/null; then
+    if mount -t nfs4 -o port=2323,soft,timeo=50,retrans=3 \
+           127.0.0.1:/k3svolumes "$MOUNT_POINT" 2>/dev/null; then
         echo "NFS mount $MOUNT_POINT mounted successfully"
         MOUNT_FIXED=1
         return 0
author	Paul Buetow <paul@buetow.org>	2026-05-16 15:21:54 +0300
committer	Paul Buetow <paul@buetow.org>	2026-05-16 15:21:54 +0300
commit	98217b5ab29265d2662bebf0a1d946eaead80dbd (patch)
tree	4c3a63b124e03dd46666c7bf85be2558cfbe4e80
parent	f633a65a3592805f8d458c8d2cbe979e63484c02 (diff)