summaryrefslogtreecommitdiff
path: root/internal/askcli
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-05-29 17:45:17 +0300
committerPaul Buetow <paul@buetow.org>2026-05-29 17:45:17 +0300
commit6eccdbfa107d16670e5bd96d31e075493d0ee2d6 (patch)
tree4fbfa771feb08ee4fa1ba5394155f9bef4506143 /internal/askcli
parent5b49734f3e2bba38e689f274d39e5ff3b52f529d (diff)
askcli: prevent stale-lock inode split during contention
Diffstat (limited to 'internal/askcli')
-rw-r--r--internal/askcli/runlock.go31
-rw-r--r--internal/askcli/runlock_test.go89
2 files changed, 96 insertions, 24 deletions
diff --git a/internal/askcli/runlock.go b/internal/askcli/runlock.go
index d0d7be3..9fbe0cd 100644
--- a/internal/askcli/runlock.go
+++ b/internal/askcli/runlock.go
@@ -16,8 +16,6 @@ import (
const askRepoLockFile = "hexai-ask.lock"
-var errAskLockReopen = errors.New("ask lock: reopen after stale file removal")
-
func lockProcessLabel() string {
if exe, err := os.Executable(); err == nil {
if b := filepath.Base(exe); b != "" && b != "." {
@@ -70,11 +68,9 @@ func writeLockMetadata(f *os.File, pid int, comm string) error {
// waitOrAcquireAskLockFD tries to take an exclusive lock on f, or blocks until ctx ends.
// On success it writes lock metadata and returns an unlock function (which closes f).
-// errAskLockReopen means the caller should open the lock path again after stale removal.
func waitOrAcquireAskLockFD(
ctx context.Context,
f *os.File,
- lockPath string,
comm string,
retryTimer *time.Timer,
) (func() error, error) {
@@ -98,12 +94,10 @@ func waitOrAcquireAskLockFD(
}
pid := readLockHolderPID(f)
+ // Keep waiting even if metadata appears stale: removing a contended lock file can
+ // split ownership across different inodes and break serialization guarantees.
if pid > 0 && lockHolderIsStale(pid, comm) {
- _ = f.Close()
- if rerr := os.Remove(lockPath); rerr != nil && !errors.Is(rerr, os.ErrNotExist) {
- return nil, fmt.Errorf("ask lock: remove stale %s: %w", lockPath, rerr)
- }
- return nil, errAskLockReopen
+ // Intentional no-op: contention is resolved only by waiting for flock release.
}
retryTimer.Reset(5 * time.Millisecond)
@@ -128,20 +122,9 @@ func acquireAskRepoLock(ctx context.Context, gitRoot string) (func() error, erro
retryTimer := time.NewTimer(5 * time.Millisecond)
defer retryTimer.Stop()
- for removalAttempts := 0; removalAttempts < 16; removalAttempts++ {
- f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0o600)
- if err != nil {
- return nil, fmt.Errorf("ask lock: open %s: %w", lockPath, err)
- }
- unlock, err := waitOrAcquireAskLockFD(ctx, f, lockPath, comm, retryTimer)
- if err == nil {
- return unlock, nil
- }
- if errors.Is(err, errAskLockReopen) {
- continue
- }
- return nil, err
+ f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0o600)
+ if err != nil {
+ return nil, fmt.Errorf("ask lock: open %s: %w", lockPath, err)
}
-
- return nil, fmt.Errorf("ask lock: could not acquire %s after stale recovery attempts", lockPath)
+ return waitOrAcquireAskLockFD(ctx, f, comm, retryTimer)
}
diff --git a/internal/askcli/runlock_test.go b/internal/askcli/runlock_test.go
index f56f214..8ef8f2c 100644
--- a/internal/askcli/runlock_test.go
+++ b/internal/askcli/runlock_test.go
@@ -8,8 +8,15 @@ import (
"sync/atomic"
"testing"
"time"
+
+ "codeberg.org/snonux/hexai/internal/filelock"
)
+type lockResult struct {
+ unlock func() error
+ err error
+}
+
func TestAcquireAskRepoLock_SerializesConcurrentHolders(t *testing.T) {
tmp := t.TempDir()
if err := os.MkdirAll(filepath.Join(tmp, ".git"), 0o755); err != nil {
@@ -44,3 +51,85 @@ func TestAcquireAskRepoLock_SerializesConcurrentHolders(t *testing.T) {
t.Fatalf("max concurrent lock holders = %d, want 1", got)
}
}
+
+func TestAcquireAskRepoLock_StaleMetadataDoesNotRotateContendedLockFile(t *testing.T) {
+ tmp := t.TempDir()
+ holder, lockPath, origInfo := prepareContendedStaleLock(t, tmp)
+
+ ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+ defer cancel()
+ resultCh := acquireLockAsync(ctx, tmp)
+
+ select {
+ case result := <-resultCh:
+ if result.unlock != nil {
+ _ = result.unlock()
+ }
+ t.Fatalf("lock acquired while holder still held lock: %v", result.err)
+ case <-time.After(40 * time.Millisecond):
+ }
+
+ curInfo, err := os.Stat(lockPath)
+ if err != nil {
+ t.Fatalf("stat contended lock: %v", err)
+ }
+ if !os.SameFile(origInfo, curInfo) {
+ t.Fatal("contended lock file was replaced while locked")
+ }
+
+ releaseContendedLock(t, holder)
+
+ result := <-resultCh
+ if result.err != nil {
+ t.Fatalf("contender lock: %v", result.err)
+ }
+ if result.unlock == nil {
+ t.Fatal("contender returned nil unlock")
+ }
+ if err := result.unlock(); err != nil {
+ t.Fatalf("contender unlock: %v", err)
+ }
+}
+
+func prepareContendedStaleLock(t *testing.T, gitRoot string) (*os.File, string, os.FileInfo) {
+ t.Helper()
+ lockDir := filepath.Join(gitRoot, ".git")
+ if err := os.MkdirAll(lockDir, 0o755); err != nil {
+ t.Fatal(err)
+ }
+ lockPath := filepath.Join(lockDir, askRepoLockFile)
+ holder, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0o600)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if err := filelock.TryExclusive(holder); err != nil {
+ t.Fatalf("holder lock: %v", err)
+ }
+ if err := writeLockMetadata(holder, 999999, "ask"); err != nil {
+ t.Fatalf("write stale metadata: %v", err)
+ }
+ origInfo, err := os.Stat(lockPath)
+ if err != nil {
+ t.Fatalf("stat original lock: %v", err)
+ }
+ return holder, lockPath, origInfo
+}
+
+func acquireLockAsync(ctx context.Context, gitRoot string) <-chan lockResult {
+ resultCh := make(chan lockResult, 1)
+ go func() {
+ unlock, err := acquireAskRepoLock(ctx, gitRoot)
+ resultCh <- lockResult{unlock: unlock, err: err}
+ }()
+ return resultCh
+}
+
+func releaseContendedLock(t *testing.T, holder *os.File) {
+ t.Helper()
+ if err := filelock.UnlockExclusive(holder); err != nil {
+ t.Fatalf("release holder lock: %v", err)
+ }
+ if err := holder.Close(); err != nil {
+ t.Fatalf("close holder file: %v", err)
+ }
+}