From cbc74944490a18c2cccbc8a108b3665b2cf3650b Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sun, 24 May 2026 12:26:07 +0300 Subject: fix(cli): avoid false VM2 abort when VM1 fails after WG step succeeded In run_create_both, VM1's thread rescue unconditionally set vm1_wg_state[:error], even when the WireGuard step had already signaled success (vm1_wg_state[:done] = true). If VM2 was waiting on the condition variable at that moment, it would raise 'VM1 WireGuard setup failed' and abort needlessly. Now the rescue only sets :error when :done is still false, so a downstream VM1 failure (e.g. vLLM install) no longer leaks to VM2. Resolves agent task ic. --- lib/hyperstack/cli.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/hyperstack/cli.rb b/lib/hyperstack/cli.rb index 479f1c3..b1f0ca6 100644 --- a/lib/hyperstack/cli.rb +++ b/lib/hyperstack/cli.rb @@ -300,8 +300,10 @@ module HyperstackVM rescue Error => e errors_mutex.synchronize { errors[:vm1] = e.message } # Unblock VM2 even if VM1 failed so the process doesn't hang. + # Only set the error flag if the WireGuard step itself failed. + # If WG already succeeded (:done is true), VM2 should proceed. wg_mutex.synchronize do - vm1_wg_state[:error] = e.message + vm1_wg_state[:error] = e.message unless vm1_wg_state[:done] wg_cv.broadcast end end -- cgit v1.2.3