From 90feabaf14b67ef61124881164ef3636b1827398 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Mon, 4 May 2026 23:20:21 +0300 Subject: askcli: serialize task alias cache writes to fix concurrent rename race Two concurrent 'ask' invocations could race on the alias cache file: both wrote the JSON to a shared '.tmp' filename and then both called os.Rename, so the loser failed with: replace task alias cache: rename .../task-aliases-v2.json.tmp .../task-aliases-v2.json: no such file or directory The shared tempfile also enabled lost updates because each process loaded the file independently before saving its own version on top. Fix: - Take an exclusive flock on a sentinel file (task-aliases-v2.json.lock) in the cache directory around the full load/modify/save cycle in both ensureTaskAliases and resolveTaskSelectorFromCache, using the existing internal/filelock package. - Switch save() to os.CreateTemp so each writer gets a unique tempfile name; the loser's tempfile is removed cleanly on rename failure. - Refactor resolveTaskSelectorFromCache by extracting finalizeResolvedTaskSelector to keep functions under 50 lines. Adds TestEnsureTaskAliases_ConcurrentCallsDoNotRaceOnTempFile, which reproduces the original error reliably on the unfixed code and now passes with -race. Amp-Thread-ID: https://ampcode.com/threads/T-019df49f-52a5-75b1-98d5-371a163ef100 Co-authored-by: Amp --- internal/askcli/task_alias_cache_test.go | 83 ++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'internal/askcli/task_alias_cache_test.go') diff --git a/internal/askcli/task_alias_cache_test.go b/internal/askcli/task_alias_cache_test.go index f93a762..7cfa2a7 100644 --- a/internal/askcli/task_alias_cache_test.go +++ b/internal/askcli/task_alias_cache_test.go @@ -2,8 +2,10 @@ package askcli import ( "encoding/json" + "fmt" "os" "path/filepath" + "sync" "testing" "time" ) @@ -292,6 +294,87 @@ func TestEnsureTaskAliases_RejectsNextIDReuse(t *testing.T) { } } +func TestEnsureTaskAliases_ConcurrentCallsDoNotRaceOnTempFile(t *testing.T) { + dir := t.TempDir() + + oldNow := nowTaskAliasCache + oldRoot := taskAliasCacheRoot + nowTaskAliasCache = func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) } + taskAliasCacheRoot = func() (string, error) { return filepath.Join(dir, "hexai"), nil } + defer func() { + nowTaskAliasCache = oldNow + taskAliasCacheRoot = oldRoot + }() + + const goroutines = 16 + uuids := make([]string, goroutines) + for i := range uuids { + uuids[i] = fmt.Sprintf("uuid-%02d", i) + } + + // Release all goroutines simultaneously to maximise the chance of a race + // before the fix (shared .tmp filename + concurrent rename) and to prove + // the fix's locking serialises load/modify/save correctly. + var start sync.WaitGroup + start.Add(1) + var done sync.WaitGroup + errs := make([]error, goroutines) + for i := 0; i < goroutines; i++ { + done.Add(1) + go func(idx int) { + defer done.Done() + start.Wait() + _, err := ensureTaskAliases([]TaskExport{{UUID: uuids[idx]}}) + errs[idx] = err + }(i) + } + start.Done() + done.Wait() + + for i, err := range errs { + if err != nil { + t.Fatalf("goroutine %d: ensureTaskAliases returned error: %v", i, err) + } + } + + path, err := taskAliasCachePath() + if err != nil { + t.Fatalf("taskAliasCachePath: %v", err) + } + cache := readTaskAliasCacheForTest(t, path) + if got, want := len(cache.Entries), goroutines; got != want { + t.Fatalf("len(Entries) = %d, want %d (cache lost updates under concurrency)", got, want) + } + if got, want := cache.NextID, uint64(goroutines); got != want { + t.Fatalf("NextID = %d, want %d", got, want) + } + for _, uuid := range uuids { + if !hasTaskAliasEntry(cache, uuid) { + t.Fatalf("expected entry for %s after concurrent writes", uuid) + } + } + // Aliases must be unique — if save() is racy, two UUIDs could share an + // alias because both processes saw the same NextID. + seen := make(map[string]string, len(cache.Entries)) + for _, entry := range cache.Entries { + if existing, ok := seen[entry.Alias]; ok { + t.Fatalf("alias %q assigned to both %s and %s", entry.Alias, existing, entry.UUID) + } + seen[entry.Alias] = entry.UUID + } + + // No leftover .tmp files should remain in the cache directory. + entries, err := os.ReadDir(filepath.Dir(path)) + if err != nil { + t.Fatalf("ReadDir: %v", err) + } + for _, e := range entries { + if filepath.Ext(e.Name()) == ".tmp" { + t.Fatalf("leftover temp file: %s", e.Name()) + } + } +} + func readTaskAliasCacheForTest(t *testing.T, path string) taskAliasCache { t.Helper() -- cgit v1.2.3