2 files changed, 46 insertions, 19 deletions
diff --git a/lib/hyperstack/cli.rb b/lib/hyperstack/cli.rb
index f4d1cef..d4679b1 100644
--- a/lib/hyperstack/cli.rb
+++ b/lib/hyperstack/cli.rb
@@ -21,12 +21,12 @@ module HyperstackVM
       puts 'Commands:'
       puts '  create [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama] [--model PRESET]'
       puts '  create-both [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama]'
-      puts '               Provision hyperstack-vm1-gptoss.toml and hyperstack-vm2.toml concurrently.'
+      puts '               Provision hyperstack-vm1-coder.toml and hyperstack-vm2.toml concurrently.'
       puts '               WireGuard setup is serialized: VM1 writes the base wg1.conf first,'
       puts '               then VM2 adds its peer. Requires both TOML files next to the script.'
       puts '  delete [--vm-id ID] [--dry-run]'
       puts '  delete-both [--dry-run]'
-      puts '               Delete the VMs tracked by hyperstack-vm1-gptoss.toml and hyperstack-vm2.toml.'
+      puts '               Delete the VMs tracked by hyperstack-vm1-coder.toml and hyperstack-vm2.toml.'
       puts '  status'
       puts '  watch'
       puts '               Poll all active VMs for vLLM and GPU stats every 60 s.'
@@ -237,7 +237,7 @@ module HyperstackVM
 
       candidates = [
         @config_path,
-        File.join(REPO_ROOT, 'hyperstack-vm1-gptoss.toml'),
+        File.join(REPO_ROOT, 'hyperstack-vm1-coder.toml'),
         File.join(REPO_ROOT, 'hyperstack-vm2.toml'),
         File.join(REPO_ROOT, 'hyperstack-vm-photo.toml')
       ].uniq.select { |path| File.exist?(path) }
@@ -249,7 +249,7 @@ module HyperstackVM
 
     def pair_config_loaders
       [
-        ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm1-gptoss.toml')),
+        ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm1-coder.toml')),
         ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm2.toml'))
       ]
     end
diff --git a/lib/hyperstack/watcher.rb b/lib/hyperstack/watcher.rb
index de3d71e..1c126c5 100644
--- a/lib/hyperstack/watcher.rb
+++ b/lib/hyperstack/watcher.rb
@@ -22,11 +22,14 @@ module HyperstackVM
 
     # Snapshot of one VM's stats at a point in time.
     # service_type is :vllm or :comfyui — controls which metrics section is rendered.
+    # loading_status holds the last meaningful log line while vLLM is still initialising;
+    # it is nil once the Engine 0 stats line starts appearing.
     VmSnapshot = Struct.new(
       :label, :wg_host, :service_type,
       :vllm_model, :container_name,
       :metrics, :gpus,
       :vllm_error, :gpu_error,
+      :loading_status,
       :fetched_at,
       keyword_init: true
     )
@@ -78,7 +81,7 @@ module HyperstackVM
                               vllm_model: nil, container_name: nil,
                               metrics: nil, gpus: nil,
                               vllm_error: 'no state file', gpu_error: nil,
-                              fetched_at: Time.now)
+                              loading_status: nil, fetched_at: Time.now)
       end
 
       if config.comfyui_install_enabled?
@@ -91,7 +94,7 @@ module HyperstackVM
                      vllm_model: nil, container_name: nil,
                      metrics: nil, gpus: nil,
                      vllm_error: e.message, gpu_error: nil,
-                     fetched_at: Time.now)
+                     loading_status: nil, fetched_at: Time.now)
     end
 
     # Fetches GPU + vLLM container stats for a vLLM VM.
@@ -99,13 +102,13 @@ module HyperstackVM
       vllm_model     = state['vllm_model'] || config.vllm_model
       container_name = state['vllm_container_name'] || config.vllm_container_name
 
-      gpus, metrics, ssh_error = fetch_vm_stats(config, wg_host, container_name)
+      gpus, metrics, loading_status, ssh_error = fetch_vm_stats(config, wg_host, container_name)
 
       VmSnapshot.new(label: label, wg_host: wg_host, service_type: :vllm,
                      vllm_model: vllm_model, container_name: container_name,
                      metrics: metrics, gpus: gpus,
                      vllm_error: ssh_error, gpu_error: ssh_error,
-                     fetched_at: Time.now)
+                     loading_status: loading_status, fetched_at: Time.now)
     end
 
     # Fetches GPU + ComfyUI queue stats for a ComfyUI VM.
@@ -117,7 +120,7 @@ module HyperstackVM
                      vllm_model: nil, container_name: nil,
                      metrics: metrics, gpus: gpus,
                      vllm_error: ssh_error, gpu_error: ssh_error,
-                     fetched_at: Time.now)
+                     loading_status: nil, fetched_at: Time.now)
     end
 
     def load_state(path)
@@ -167,26 +170,37 @@ module HyperstackVM
     end
 
     # Single SSH call that runs nvidia-smi and tails the vLLM container logs.
-    # The two sections are separated by a sentinel line so we can split them.
-    # Returns [gpus, metrics, error_or_nil].
+    # Captures the Engine 0 stats line (present once the model is running) and,
+    # when that line is absent, the last relevant loading-phase log line so the
+    # watch display can show model-download / weight-load progress.
+    # Returns [gpus, metrics, loading_status, error_or_nil].
     def fetch_vm_stats(config, wg_host, container_name)
       gpu_query = 'index,name,temperature.gpu,utilization.gpu,power.draw,memory.used,memory.total'
-      # --tail 200 instead of --since N so we always get the last stats line
+      # Capture logs once into a shell variable to avoid two docker calls.
+      # --tail 300 instead of --since N so we always get the last stats line
       # even when the VM has been idle for longer than the refresh interval.
-      script    = <<~BASH
+      # grep exit 1 (no match) is swallowed by the pipeline tail -1, which
+      # always succeeds, so bash -se does not abort on an empty grep result.
+      script = <<~BASH
         nvidia-smi --query-gpu=#{gpu_query} --format=csv,noheader,nounits
         echo ===VLLM===
-        docker logs --tail 200 #{container_name} 2>&1 | grep 'Engine 0' | tail -1
+        _logs=$(docker logs --tail 300 #{container_name} 2>&1)
+        echo "$_logs" | grep 'Engine 0' | tail -1
+        echo ===LOADING===
+        echo "$_logs" | grep -E 'Starting to load|Loading model|model weight|Downloading|GPU block|Profil|shard|Initializ|quantiz|AWQ' | tail -1
       BASH
 
       ssh = build_ssh_command(config, wg_host)
       stdout, stderr, status = Timeout.timeout(15) { Open3.capture3(*ssh, stdin_data: script) }
-      return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"] unless status.success?
+      return [nil, nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"] unless status.success?
 
-      gpu_section, vllm_section = stdout.split("===VLLM===\n", 2)
+      gpu_section, rest       = stdout.split("===VLLM===\n", 2)
+      vllm_section, load_section = rest.to_s.split("===LOADING===\n", 2)
       gpus    = parse_nvidia_smi(gpu_section.to_s)
       metrics = parse_engine_log_line(vllm_section.to_s.strip)
-      [gpus, metrics, nil]
+      # Only surface the loading line while the engine stats aren't available yet.
+      loading_status = metrics.empty? ? clean_log_line(load_section.to_s.strip) : nil
+      [gpus, metrics, loading_status, nil]
     end
 
     # Parse a vLLM "Engine 0" log line into a plain Hash.
@@ -216,6 +230,14 @@ module HyperstackVM
       m ? m[1].to_f : nil
     end
 
+    # Strips the vLLM log prefix "(EngineCore pid=N) INFO YYYY-MM-DD HH:MM:SS [file.py:NN]"
+    # so only the human-readable message is shown in the watch display.
+    def clean_log_line(line)
+      return line if line.empty?
+
+      line.sub(/^\(.*?pid=\d+\)\s+\w+\s+[\d-]+\s+[\d:]+\s+\[[\w.]+:\d+\]\s*/, '').strip
+    end
+
     # Build an SSH command array for the watcher.
     # Uses accept-new rather than yes because the known-hosts file was populated
     # with the VM's public IP during provisioning; the WireGuard hostname
@@ -330,8 +352,13 @@ module HyperstackVM
           lines.concat(render_comfyui_metrics(snap.metrics))
         elsif snap.metrics&.any?
           lines.concat(render_vllm_metrics(snap.metrics))
-        elsif snap.metrics && snap.metrics.empty?
-          lines << "  #{DIM}(no Engine log line yet — container may still be loading)#{RESET}"
+        elsif snap.metrics
+          # Engine stats not yet available — model is still loading.
+          if snap.loading_status && !snap.loading_status.empty?
+            lines << row('loading', "#{YELLOW}#{snap.loading_status}#{RESET}")
+          else
+            lines << "  #{DIM}(container starting…)#{RESET}"
+          end
         end
       end