4 files changed, 914 insertions, 163 deletions
diff --git a/snippets/hyperstack/hyperstack-vm1.toml b/snippets/hyperstack/hyperstack-vm1.toml
new file mode 100644
index 0000000..c5c940a
--- /dev/null
+++ b/snippets/hyperstack/hyperstack-vm1.toml
@@ -0,0 +1,183 @@
+[auth]
+api_key_file = "~/.hyperstack"
+
+[hyperstack]
+base_url = "https://infrahub-api.nexgencloud.com/v1"
+
+[state]
+# Separate state file for VM1 so vm1 and vm2 can be managed independently.
+file = ".hyperstack-vm1-state.json"
+
+[vm]
+name_prefix = "hyperstack1"
+hostname = "hyperstack1"
+environment_name = "snonux-ollama"
+
+# A100-80GB is the cost-first default for nemotron-3-super inference.
+# Switch this to n3-H100x1 if you want safer throughput and compatibility headroom.
+flavor_name = "n3-A100x1"
+image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker"
+assign_floating_ip = true
+create_bootable_volume = false
+enable_port_randomization = false
+labels = ["nemotron-3-super", "wireguard"]
+
+[ssh]
+username = "ubuntu"
+private_key_path = "~/.ssh/id_rsa"
+hyperstack_key_name = "earth"
+port = 22
+connect_timeout_sec = 10
+
+[network]
+wireguard_udp_port = 56710
+wireguard_subnet = "192.168.3.0/24"
+# VM1 gets the first server-side WireGuard IP (gateway address + 0).
+# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3.
+wireguard_server_ip = "192.168.3.1"
+# Port 11434 is shared by both Ollama and vLLM for firewall compatibility.
+ollama_port = 11434
+# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM).
+litellm_port = 4000
+allowed_ssh_cidrs = ["0.0.0.0/0"]
+allowed_wireguard_cidrs = ["0.0.0.0/0"]
+
+[bootstrap]
+enable_guest_bootstrap = true
+install_wireguard = true
+configure_ufw = true
+configure_ollama_host = false
+
+[ollama]
+# Disabled in favour of vLLM; set install = true to switch back to Ollama.
+install = false
+models_dir = "/ephemeral/ollama/models"
+listen_host = "0.0.0.0:11434"
+gpu_overhead_mb = 2000
+num_parallel = 1
+context_length = 32768
+pull_models = ["nemotron-3-super"]
+
+# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI.
+# VM1 defaults to nemotron-3-super; use 'model switch' to load any other preset.
+[vllm]
+install = true
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers).
+hug_cache_dir = "/ephemeral/hug"
+container_name = "vllm_nemotron_super"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+# NVIDIA Nemotron-3-Super uses the same XML tool call format as Qwen3 XML.
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here.
+litellm_master_key = "sk-litellm-master"
+litellm_claude_model_names = [
+  "claude-sonnet-4-20250514",
+  "claude-opus-4-20250514",
+  "claude-opus-4-6-20260604",
+  "claude-haiku-3-5-20241022"
+]
+
+# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm1.toml model switch <name>'.
+# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.
+
+[vllm.presets.qwen3-coder-next]
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total).
+# ~60 GB weights on A100 80GB. Uses NoPE so context can be set to 1M; no YaRN needed.
+# Requires trust_remote_code=true for the nemotron_h architecture.
+[vllm.presets.nemotron-super]
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+container_name = "vllm_nemotron_super"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+
+# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
+[vllm.presets.gpt-oss-20b]
+model = "openai/gpt-oss-20b"
+container_name = "vllm_gpt_oss_20b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+[vllm.presets.gpt-oss-120b]
+model = "openai/gpt-oss-120b"
+container_name = "vllm_gpt_oss_120b"
+max_model_len = 131072
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
+[vllm.presets.qwen25-coder-32b]
+model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+container_name = "vllm_qwen25_coder32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "hermes"
+
+# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB.
+[vllm.presets.qwen3-coder-30b]
+model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ"
+container_name = "vllm_qwen3_coder30b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100.
+[vllm.presets.deepseek-r1-32b]
+model = "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+container_name = "vllm_deepseek_r1_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100.
+[vllm.presets.qwen3-32b]
+model = "Qwen/Qwen3-32B-AWQ"
+container_name = "vllm_qwen3_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100).
+[vllm.presets.devstral]
+model = "cyankiwi/Devstral-Small-2507-AWQ-4bit"
+container_name = "vllm_devstral"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "mistral"
+extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"]
+
+[wireguard]
+auto_setup = true
+setup_script = "./wg1-setup.sh"
+
+[local_client]
+check_wg1_service = true
+interface_name = "wg1"
+config_path = "/etc/wireguard/wg1.conf"
diff --git a/snippets/hyperstack/hyperstack-vm2.toml b/snippets/hyperstack/hyperstack-vm2.toml
new file mode 100644
index 0000000..6cc6503
--- /dev/null
+++ b/snippets/hyperstack/hyperstack-vm2.toml
@@ -0,0 +1,180 @@
+[auth]
+api_key_file = "~/.hyperstack"
+
+[hyperstack]
+base_url = "https://infrahub-api.nexgencloud.com/v1"
+
+[state]
+# Separate state file for VM2 so vm1 and vm2 can be managed independently.
+file = ".hyperstack-vm2-state.json"
+
+[vm]
+name_prefix = "hyperstack2"
+hostname = "hyperstack2"
+environment_name = "snonux-ollama"
+
+# A100-80GB is the cost-first default for qwen3-coder-next inference.
+# Switch this to n3-H100x1 if you want safer throughput and compatibility headroom.
+flavor_name = "n3-A100x1"
+image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker"
+assign_floating_ip = true
+create_bootable_volume = false
+enable_port_randomization = false
+labels = ["qwen3-coder-next", "wireguard"]
+
+[ssh]
+username = "ubuntu"
+private_key_path = "~/.ssh/id_rsa"
+hyperstack_key_name = "earth"
+port = 22
+connect_timeout_sec = 10
+
+[network]
+wireguard_udp_port = 56710
+wireguard_subnet = "192.168.3.0/24"
+# VM2 gets the third server-side WireGuard IP (skipping .2 which is the earth client).
+# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3.
+wireguard_server_ip = "192.168.3.3"
+# Port 11434 is shared by both Ollama and vLLM for firewall compatibility.
+ollama_port = 11434
+# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM).
+litellm_port = 4000
+allowed_ssh_cidrs = ["0.0.0.0/0"]
+allowed_wireguard_cidrs = ["0.0.0.0/0"]
+
+[bootstrap]
+enable_guest_bootstrap = true
+install_wireguard = true
+configure_ufw = true
+configure_ollama_host = false
+
+[ollama]
+# Disabled in favour of vLLM; set install = true to switch back to Ollama.
+install = false
+models_dir = "/ephemeral/ollama/models"
+listen_host = "0.0.0.0:11434"
+gpu_overhead_mb = 2000
+num_parallel = 1
+context_length = 32768
+pull_models = ["qwen3-coder-next"]
+
+# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI.
+# VM2 defaults to qwen3-coder-next; use 'model switch' to load any other preset.
+[vllm]
+install = true
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers).
+hug_cache_dir = "/ephemeral/hug"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here.
+litellm_master_key = "sk-litellm-master"
+litellm_claude_model_names = [
+  "claude-sonnet-4-20250514",
+  "claude-opus-4-20250514",
+  "claude-opus-4-6-20260604",
+  "claude-haiku-3-5-20241022"
+]
+
+# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm2.toml model switch <name>'.
+# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.
+
+[vllm.presets.qwen3-coder-next]
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total).
+# ~60 GB weights on A100 80GB. Uses NoPE so context can be set to 1M; no YaRN needed.
+# Requires trust_remote_code=true for the nemotron_h architecture.
+[vllm.presets.nemotron-super]
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+container_name = "vllm_nemotron_super"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+
+# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
+[vllm.presets.gpt-oss-20b]
+model = "openai/gpt-oss-20b"
+container_name = "vllm_gpt_oss_20b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+[vllm.presets.gpt-oss-120b]
+model = "openai/gpt-oss-120b"
+container_name = "vllm_gpt_oss_120b"
+max_model_len = 131072
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
+[vllm.presets.qwen25-coder-32b]
+model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+container_name = "vllm_qwen25_coder32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "hermes"
+
+# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB.
+[vllm.presets.qwen3-coder-30b]
+model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ"
+container_name = "vllm_qwen3_coder30b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100.
+[vllm.presets.deepseek-r1-32b]
+model = "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+container_name = "vllm_deepseek_r1_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100.
+[vllm.presets.qwen3-32b]
+model = "Qwen/Qwen3-32B-AWQ"
+container_name = "vllm_qwen3_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100).
+[vllm.presets.devstral]
+model = "cyankiwi/Devstral-Small-2507-AWQ-4bit"
+container_name = "vllm_devstral"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "mistral"
+extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"]
+
+[wireguard]
+auto_setup = true
+setup_script = "./wg1-setup.sh"
+
+[local_client]
+check_wg1_service = true
+interface_name = "wg1"
+config_path = "/etc/wireguard/wg1.conf"
diff --git a/snippets/hyperstack/hyperstack.rb b/snippets/hyperstack/hyperstack.rb
index 69bb6f6..526d7aa 100755
--- a/snippets/hyperstack/hyperstack.rb
+++ b/snippets/hyperstack/hyperstack.rb
@@ -82,6 +82,9 @@ module HyperstackVM
       'network' => {
         'wireguard_udp_port' => 56_710,
         'wireguard_subnet' => '192.168.3.0/24',
+        # Optional: explicit server-side WireGuard IP. When nil, derived as subnet + 1 (i.e. .1).
+        # Set to a different address (e.g. 192.168.3.3) for a second VM sharing the same wg1 tunnel.
+        'wireguard_server_ip' => nil,
         'ollama_port' => 11_434,
         'litellm_port' => 4_000,
         'allowed_ssh_cidrs' => ['0.0.0.0/0'],
@@ -155,6 +158,20 @@ module HyperstackVM
       rescue IPAddr::InvalidAddressError => e
         raise Error, "Invalid CIDR #{cidr.inspect}: #{e.message}"
       end
+
+      server_ip = fetch('network', 'wireguard_server_ip')
+      if server_ip
+        # Validate that the explicit server WireGuard IP is within the configured subnet.
+        begin
+          subnet = IPAddr.new(fetch('network', 'wireguard_subnet'))
+          unless subnet.include?(IPAddr.new(server_ip))
+            raise Error,
+                  "wireguard_server_ip #{server_ip.inspect} is not in wireguard_subnet #{fetch('network', 'wireguard_subnet')}"
+          end
+        rescue IPAddr::InvalidAddressError => e
+          raise Error, "Invalid wireguard_server_ip #{server_ip.inspect}: #{e.message}"
+        end
+      end
     end
 
     def fetch(section, key)
@@ -305,15 +322,25 @@ module HyperstackVM
       Integer(fetch('network', 'litellm_port'))
     end
 
+    # Returns the server-side WireGuard IP for this VM.
+    # Uses the explicitly configured address when set; otherwise derives it as subnet_base + 1.
+    # Example: 192.168.3.0/24 → 192.168.3.1 (default VM1); VM2 sets wireguard_server_ip=192.168.3.3.
     def wireguard_gateway_ip
+      configured = fetch('network', 'wireguard_server_ip')
+      return configured.to_s if configured && !configured.to_s.strip.empty?
+
+      # Fall back to first usable address in the subnet.
       base = IPAddr.new(wireguard_subnet).to_s
       parts = base.split('.').map(&:to_i)
       parts[-1] += 1
       parts.join('.')
     end
 
+    # Returns the WireGuard hostname for this VM: e.g. hyperstack1.wg1 or hyperstack2.wg1.
+    # Used as the DNS name to reach the VM over the tunnel (must be in /etc/hosts on the client).
     def wireguard_gateway_hostname
-      "hyperstack.#{local_interface_name}"
+      host = vm_hostname || 'hyperstack'
+      "#{host}.#{local_interface_name}"
     end
 
     def allowed_ssh_cidrs
@@ -414,6 +441,17 @@ module HyperstackVM
       fetch('vllm', 'litellm_master_key')
     end
 
+    # Whether to pass --trust-remote-code to vLLM for the default model.
+    # Required for architectures not yet in the vLLM upstream registry (e.g. nemotron_h).
+    def vllm_trust_remote_code
+      truthy?(fetch('vllm', 'trust_remote_code'))
+    end
+
+    # Extra vLLM CLI flags for the default model (e.g. reasoning-parser args).
+    def vllm_extra_args
+      Array(fetch('vllm', 'extra_vllm_args')).map(&:to_s)
+    end
+
     def vllm_presets
       Hash(dig('vllm', 'presets')).transform_keys(&:to_s)
     end
@@ -695,10 +733,12 @@ module HyperstackVM
     end
 
     def status
+      endpoints = configured_endpoints
       {
         'service_state' => service_state,
         'config_path' => @config_path,
-        'endpoint' => configured_endpoint,
+        'endpoint' => endpoints.last,
+        'endpoints' => endpoints,
         'config_readable' => !config_contents.nil?
       }
     end
@@ -715,51 +755,90 @@ module HyperstackVM
     end
 
     def configured_endpoint
-      content = config_contents
-      return nil if content.nil?
-
-      parse_wireguard_config(content)['Endpoint']
+      configured_endpoints.last
     end
 
-    def config_contents
-      return @config_contents if defined?(@config_contents)
+    def configured_endpoints
+      content = config_contents
+      return [] if content.nil?
 
-      @config_contents = File.read(@config_path)
-    rescue Errno::EACCES, Errno::ENOENT
-      stdout, _stderr, status = Open3.capture3('sudo', '-n', 'cat', @config_path)
-      @config_contents = status.success? ? stdout : nil
+      parse_wireguard_peers(content).filter_map { |peer| peer['Endpoint'] }.uniq
     end
 
-    def parse_wireguard_config(content)
+    def parse_wireguard_peers(content)
       current_section = nil
-      peer = {}
+      current_peer = nil
+      peers = []
 
       content.each_line do |line|
         stripped = line.strip
         next if stripped.empty? || stripped.start_with?('#')
 
         if stripped.start_with?('[') && stripped.end_with?(']')
+          peers << current_peer if current_section == 'Peer' && current_peer && !current_peer.empty?
           current_section = stripped[1..-2]
+          current_peer = current_section == 'Peer' ? {} : nil
           next
         end
 
         key, value = stripped.split('=', 2).map { |part| part&.strip }
         next unless current_section == 'Peer' && key && value
 
-        peer[key] = value
+        current_peer[key] = value
       end
 
-      peer
+      peers << current_peer if current_section == 'Peer' && current_peer && !current_peer.empty?
+      peers
+    end
+
+    def config_contents
+      return @config_contents if defined?(@config_contents)
+
+      @config_contents = File.read(@config_path)
+    rescue Errno::EACCES, Errno::ENOENT
+      stdout, _stderr, status = Open3.capture3('sudo', '-n', 'cat', @config_path)
+      @config_contents = status.success? ? stdout : nil
+    end
+  end
+
+  # Thread-safe output wrapper that prepends a fixed prefix to each line.
+  # Used by create-both so interleaved output from VM1 and VM2 threads is distinguishable.
+  # #print buffers partial lines until a newline is received, then flushes with the prefix.
+  class PrefixedOutput
+    def initialize(prefix, delegate, mutex)
+      @prefix   = prefix
+      @delegate = delegate
+      @mutex    = mutex
+      @buffer   = +''
+    end
+
+    def puts(msg = '')
+      @mutex.synchronize { @delegate.puts("#{@prefix}#{msg}") }
+    end
+
+    def print(msg)
+      @buffer << msg.to_s
+      while (idx = @buffer.index("\n"))
+        line = @buffer.slice!(0, idx + 1)
+        @mutex.synchronize { @delegate.print("#{@prefix}#{line}") }
+      end
     end
   end
 
   class Manager
-    def initialize(config:, client:, state_store:, local_wireguard:, out: $stdout)
+    # wg_setup_pre:  optional Proc called just before this VM's WireGuard setup step runs.
+    #                Used by create-both to block VM2 until VM1 has written the base wg1.conf.
+    # wg_setup_post: optional Proc called after the WireGuard step completes (or is skipped).
+    #                Used by create-both to signal that VM1's base config is ready for VM2.
+    def initialize(config:, client:, state_store:, local_wireguard:, out: $stdout,
+                   wg_setup_pre: nil, wg_setup_post: nil)
       @config = config
       @client = client
       @state_store = state_store
       @local_wireguard = local_wireguard
       @out = out
+      @wg_setup_pre  = wg_setup_pre
+      @wg_setup_post = wg_setup_post
     end
 
     def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil)
@@ -847,7 +926,7 @@ module HyperstackVM
       raise
     end
 
-    def status
+    def status(include_local_wireguard: true)
       state = @state_store.load
       if state.nil?
         info "No tracked VM state file at #{@state_store.path}."
@@ -868,7 +947,12 @@ module HyperstackVM
         end
       end
 
-      print_local_wireguard_summary(state&.dig('public_ip'))
+      print_local_wireguard_summary(state&.dig('public_ip')) if include_local_wireguard
+      state&.dig('public_ip')
+    end
+
+    def show_local_wireguard(expected_ips = nil)
+      print_local_wireguard_summary(expected_ips)
     end
 
     # Lists configured model presets and marks the one currently running on the VM.
@@ -1004,11 +1088,18 @@ module HyperstackVM
         @state_store.save(state)
       end
 
+      # Call pre-hook before deciding whether WireGuard setup is needed; this allows a concurrent
+      # sibling VM (e.g. VM2 in create-both) to block here until the primary VM (VM1) has
+      # already written the base wg1.conf, which VM2's setup will then extend with its own peer.
+      @wg_setup_pre&.call
       if wireguard_setup_needed?(state)
         run_wireguard_setup(state['public_ip'])
         state['wireguard_setup_at'] = Time.now.utc.iso8601
         @state_store.save(state)
       end
+      # Always signal post-hook so that a waiting sibling VM is unblocked even when
+      # WireGuard setup was not needed (e.g. already done on a resume).
+      @wg_setup_post&.call
 
       # Pull and verify Ollama models after the tunnel is established.
       if ollama_setup_needed?(state)
@@ -1209,7 +1300,7 @@ module HyperstackVM
       return true if public_ip.empty?
 
       expected_endpoint = "#{public_ip}:#{@config.wireguard_udp_port}"
-      @local_wireguard.status['endpoint'] != expected_endpoint
+      !Array(@local_wireguard.status['endpoints']).include?(expected_endpoint)
     end
 
     def run_wireguard_setup(host)
@@ -1232,7 +1323,12 @@ module HyperstackVM
     end
 
     def run_wireguard_script(host)
-      Open3.popen2e('bash', @config.wireguard_setup_script, host) do |stdin, output, wait_thr|
+      # Pass server WireGuard IP and WireGuard hostname as positional args so that
+      # wg1-setup.sh can configure the correct server-side tunnel address and update
+      # /etc/hosts on the client. The Enter keystroke via stdin bypasses the interactive prompt.
+      server_ip = @config.wireguard_gateway_ip
+      wg_hostname = @config.wireguard_gateway_hostname
+      Open3.popen2e('bash', @config.wireguard_setup_script, host, server_ip, wg_hostname) do |stdin, output, wait_thr|
         stdin.sync = true
         stdin.puts
         stdin.close
@@ -1272,6 +1368,17 @@ module HyperstackVM
         mismatches << "network.wireguard_subnet must be '192.168.3.0/24'"
       end
 
+      # Validate that the resolved server IP is actually within the configured subnet.
+      begin
+        subnet    = IPAddr.new(@config.wireguard_subnet)
+        server_ip = IPAddr.new(@config.wireguard_gateway_ip)
+        unless subnet.include?(server_ip)
+          mismatches << "wireguard_server_ip #{@config.wireguard_gateway_ip.inspect} is outside #{@config.wireguard_subnet}"
+        end
+      rescue IPAddr::InvalidAddressError => e
+        mismatches << "Invalid wireguard_server_ip: #{e.message}"
+      end
+
       return if mismatches.empty?
 
       raise Error, "Configured WireGuard settings do not match #{script_path}: #{mismatches.join('; ')}"
@@ -1659,7 +1766,10 @@ module HyperstackVM
       # parser is nil only when preset explicitly omits the key and config has no default;
       # empty string means "disable tool calling" (e.g. gpt-oss reasoning models).
       parser           = @config.vllm_tool_call_parser if parser.nil?
-      trust_remote     = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : false
+      # Fall back to the top-level [vllm] config values when no preset is in use.
+      # This allows setting trust_remote_code / extra_vllm_args in the default [vllm] block
+      # (e.g. for nemotron on VM1) without requiring a --model preset flag at create time.
+      trust_remote     = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : @config.vllm_trust_remote_code
       port             = @config.ollama_port # vLLM reuses the Ollama port for firewall compat
 
       docker_args = [
@@ -1688,7 +1798,9 @@ module HyperstackVM
       end
       docker_args << '--trust-remote-code' if trust_remote
       # Append any extra flags verbatim (e.g. Mistral loader flags, reasoning parser).
-      (cfg['extra_vllm_args'] || []).each { |arg| docker_args << arg }
+      # Preset extra_vllm_args take precedence; fall back to top-level [vllm].extra_vllm_args.
+      extra_args = cfg.key?('extra_vllm_args') ? Array(cfg['extra_vllm_args']) : @config.vllm_extra_args
+      extra_args.each { |arg| docker_args << arg }
       docker_run = docker_args.join(' ')
 
       script = []
@@ -1902,24 +2014,42 @@ module HyperstackVM
       value.nil? ? nil : Integer(value)
     end
 
-    def print_local_wireguard_summary(expected_ip)
+    def print_local_wireguard_summary(expected_ips)
       return unless @config.local_client_checks_enabled?
 
       wg_status = @local_wireguard.status
-      endpoint = wg_status['endpoint']
+      endpoints = Array(wg_status['endpoints']).compact.uniq
       info "Local WireGuard #{@config.local_interface_name}: #{wg_status['service_state']}"
-      if endpoint
-        info "Local WireGuard endpoint: #{endpoint}"
-        if expected_ip
-          host, = endpoint.split(':', 2)
-          if host == expected_ip
-            info 'Local WireGuard endpoint matches the managed VM IP.'
-          else
-            warn "Local WireGuard endpoint points to #{host}, expected #{expected_ip}."
-          end
+      if endpoints.empty?
+        warn "Unable to read #{@config.local_wg_config_path} for local WireGuard endpoint validation."
+        return
+      end
+
+      label = endpoints.one? ? 'endpoint' : 'endpoints'
+      info "Local WireGuard #{label}: #{endpoints.join(', ')}"
+
+      expected = Array(expected_ips).compact.map(&:to_s).map(&:strip).reject(&:empty?).uniq
+      return if expected.empty?
+
+      expected_endpoints = expected.map { |ip| "#{ip}:#{@config.wireguard_udp_port}" }
+      missing = expected_endpoints.reject { |endpoint| endpoints.include?(endpoint) }
+
+      if expected_endpoints.one?
+        if missing.empty?
+          info 'Local WireGuard endpoint matches the managed VM IP.'
+        else
+          hosts = endpoints.map { |endpoint| endpoint.split(':', 2).first }.uniq
+          warn "Local WireGuard endpoints point to #{hosts.join(', ')}, expected #{expected.first}."
         end
+        return
+      end
+
+      if missing.empty?
+        info 'Local WireGuard has peers for all managed VM IPs.'
       else
-        warn "Unable to read #{@config.local_wg_config_path} for local WireGuard endpoint validation."
+        present = expected_endpoints - missing
+        info "Local WireGuard has peers for: #{present.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}" unless present.empty?
+        warn "Local WireGuard missing peers for: #{missing.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}."
       end
     end
 
@@ -1936,6 +2066,7 @@ module HyperstackVM
     def initialize(argv)
       @argv = argv.dup
       @config_path = File.join(__dir__, 'hyperstack-vm.toml')
+      @config_explicit = false
     end
 
     def show_help
@@ -1943,7 +2074,13 @@ module HyperstackVM
       puts
       puts 'Commands:'
       puts '  create [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama] [--model PRESET]'
+      puts '  create-both [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama]'
+      puts '               Provision hyperstack-vm1.toml and hyperstack-vm2.toml concurrently.'
+      puts '               WireGuard setup is serialized: VM1 writes the base wg1.conf first,'
+      puts '               then VM2 adds its peer. Requires both TOML files next to the script.'
       puts '  delete [--vm-id ID] [--dry-run]'
+      puts '  delete-both [--dry-run]'
+      puts '               Delete the VMs tracked by hyperstack-vm1.toml and hyperstack-vm2.toml.'
       puts '  status'
       puts '  test'
       puts '  model list'
@@ -1955,6 +2092,7 @@ module HyperstackVM
         opts.banner = 'Usage: ruby hyperstack.rb [--config path] <create|delete|status> [options]'
         opts.on('--config PATH', "Path to TOML config (default: #{@config_path})") do |value|
           @config_path = value
+          @config_explicit = true
         end
         opts.on('-h', '--help', 'Show help') do
           show_help
@@ -1969,39 +2107,33 @@ module HyperstackVM
         exit 0
       end
 
+      # create-both loads its own config files and does not use the default config path.
+      # Parse it before building the manager so we avoid loading the default config needlessly.
+      if command == 'create-both'
+        opts = parse_create_options(@argv, include_model_preset: false)
+        run_create_both(**opts)
+        return
+      end
+
+      if command == 'delete-both'
+        opts = parse_delete_both_options(@argv)
+        run_delete_both(**opts)
+        return
+      end
+
+      if command == 'status'
+        run_status
+        return
+      end
+
+      # All other commands operate on a single VM defined by the --config path.
       config_loader = ConfigLoader.load(@config_path)
-      state_store = StateStore.new(config_loader.config.state_file)
-      client = HyperstackClient.new(base_url: config_loader.config.api_base_url, api_key: config_loader.config.api_key)
-      local_wireguard = LocalWireGuard.new(
-        interface_name: config_loader.config.local_interface_name,
-        config_path: config_loader.config.local_wg_config_path
-      )
-      manager = Manager.new(
-        config: config_loader.config,
-        client: client,
-        state_store: state_store,
-        local_wireguard: local_wireguard
-      )
+      manager       = build_manager(config_loader.config)
 
       case command
       when 'create'
-        replace = false
-        dry_run = false
-        install_vllm = nil
-        install_ollama = nil
-        vllm_preset = nil
-        parser = OptionParser.new do |opts|
-          opts.on('--replace', 'Delete the tracked VM before creating a new one') { replace = true }
-          opts.on('--dry-run', 'Resolve config and print the create plan without creating a VM') { dry_run = true }
-          opts.on('--vllm', 'Enable vLLM+LiteLLM setup (overrides config)') { install_vllm = true }
-          opts.on('--no-vllm', 'Disable vLLM+LiteLLM setup (overrides config)') { install_vllm = false }
-          opts.on('--ollama', 'Enable Ollama setup (overrides config)') { install_ollama = true }
-          opts.on('--no-ollama', 'Disable Ollama setup (overrides config)') { install_ollama = false }
-          opts.on('--model PRESET', 'Use a named vLLM model preset at create time') { |v| vllm_preset = v }
-        end
-        parser.parse!(@argv)
-        manager.create(replace: replace, dry_run: dry_run, install_vllm: install_vllm,
-                       install_ollama: install_ollama, vllm_preset: vllm_preset)
+        opts = parse_create_options(@argv)
+        manager.create(**opts)
       when 'delete'
         vm_id = nil
         dry_run = false
@@ -2013,8 +2145,6 @@ module HyperstackVM
         end
         parser.parse!(@argv)
         manager.delete(vm_id: vm_id, dry_run: dry_run)
-      when 'status'
-        manager.status
       when 'test'
         manager.test
       when 'model'
@@ -2035,9 +2165,174 @@ module HyperstackVM
           raise Error, "Unknown model subcommand #{sub.inspect}. Use list or switch."
         end
       else
-        raise Error, "Unknown command #{command.inspect}. Use create, delete, status, test, or model."
+        raise Error, "Unknown command #{command.inspect}. Use create, create-both, delete, delete-both, status, test, or model."
       end
     end
+
+    private
+
+    # Parses the shared --replace / --dry-run / --vllm / --ollama / --model flags
+    # used by both 'create' and 'create-both'.  When include_model_preset is false
+    # (create-both), the --model flag is not registered because each VM uses its own
+    # TOML default.  Returns a hash suitable for splatting into Manager#create.
+    def parse_create_options(argv, include_model_preset: true)
+      opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil }
+      OptionParser.new do |o|
+        o.on('--replace',    'Delete the tracked VM before creating a new one')      { opts[:replace] = true }
+        o.on('--dry-run',    'Print the create plan without creating a VM')          { opts[:dry_run] = true }
+        o.on('--vllm',       'Enable vLLM+LiteLLM setup (overrides config)')         { opts[:install_vllm] = true }
+        o.on('--no-vllm',    'Disable vLLM+LiteLLM setup (overrides config)')        { opts[:install_vllm] = false }
+        o.on('--ollama',     'Enable Ollama setup (overrides config)')               { opts[:install_ollama] = true }
+        o.on('--no-ollama',  'Disable Ollama setup (overrides config)')              { opts[:install_ollama] = false }
+        o.on('--model PRESET', 'Use a named vLLM preset at create time') { |v| opts[:vllm_preset] = v } if include_model_preset
+      end.parse!(argv)
+      opts
+    end
+
+    def parse_delete_both_options(argv)
+      opts = { dry_run: false }
+      OptionParser.new do |o|
+        o.on('--dry-run', 'Show which VMs would be deleted without deleting them') { opts[:dry_run] = true }
+      end.parse!(argv)
+      opts
+    end
+
+    # Constructs a Manager and all its dependencies from a Config object.
+    # Accepts optional output destination and WireGuard concurrency hooks.
+    def build_manager(config, out: $stdout, wg_setup_pre: nil, wg_setup_post: nil)
+      state_store     = StateStore.new(config.state_file)
+      client          = HyperstackClient.new(base_url: config.api_base_url, api_key: config.api_key)
+      local_wireguard = LocalWireGuard.new(
+        interface_name: config.local_interface_name,
+        config_path:    config.local_wg_config_path
+      )
+      Manager.new(
+        config:          config,
+        client:          client,
+        state_store:     state_store,
+        local_wireguard: local_wireguard,
+        out:             out,
+        wg_setup_pre:    wg_setup_pre,
+        wg_setup_post:   wg_setup_post
+      )
+    end
+
+    def run_status
+      loaders = status_config_loaders
+      if loaders.one?
+        build_manager(loaders.first.config).status
+        return
+      end
+
+      expected_ips = []
+      loaders.each_with_index do |loader, index|
+        puts if index.positive?
+        puts "[#{File.basename(loader.path)}]"
+        expected_ip = build_manager(loader.config).status(include_local_wireguard: false)
+        expected_ips << expected_ip if expected_ip
+      end
+
+      puts
+      puts '[local-wireguard]'
+      build_manager(loaders.first.config).show_local_wireguard(expected_ips)
+    end
+
+    def status_config_loaders
+      return [ConfigLoader.load(@config_path)] if @config_explicit
+
+      candidates = [
+        @config_path,
+        File.join(__dir__, 'hyperstack-vm1.toml'),
+        File.join(__dir__, 'hyperstack-vm2.toml')
+      ].uniq.select { |path| File.exist?(path) }
+
+      loaders = candidates.map { |path| ConfigLoader.load(path) }
+      tracked = loaders.select { |loader| File.exist?(loader.config.state_file) }
+      tracked.empty? ? [ConfigLoader.load(@config_path)] : tracked
+    end
+
+    def pair_config_loaders
+      [
+        ConfigLoader.load(File.join(__dir__, 'hyperstack-vm1.toml')),
+        ConfigLoader.load(File.join(__dir__, 'hyperstack-vm2.toml'))
+      ]
+    end
+
+    # Provisions hyperstack-vm1 and hyperstack-vm2 concurrently in separate threads.
+    # WireGuard setup is serialized: VM1 runs first (replacing the base wg1.conf), then
+    # VM2 adds its peer. A Mutex+ConditionVariable acts as a one-shot latch between threads.
+    # If VM1 fails before reaching the WG step the latch is still released so VM2 doesn't hang.
+    # vllm_preset is accepted but ignored — each VM uses its own TOML default preset.
+    def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument
+      vm1_loader, vm2_loader = pair_config_loaders
+      vm1_config = vm1_loader.config
+      vm2_config = vm2_loader.config
+
+      out_mutex    = Mutex.new
+      wg_mutex     = Mutex.new
+      wg_cv        = ConditionVariable.new
+      vm1_wg_state = { done: false, error: nil }
+
+      # VM1 signals the latch after its WG step (whether WG ran or was already done).
+      vm1_wg_post = proc do
+        wg_mutex.synchronize { vm1_wg_state[:done] = true; wg_cv.broadcast }
+      end
+
+      # VM2 blocks here until VM1's WG step resolves, then raises if VM1 failed.
+      vm2_wg_pre = proc do
+        wg_mutex.synchronize { wg_cv.wait(wg_mutex) until vm1_wg_state[:done] || vm1_wg_state[:error] }
+        raise Error, "VM1 WireGuard setup failed; cannot add VM2 peer." if vm1_wg_state[:error]
+      end
+
+      manager1 = build_manager(vm1_config,
+                               out:            PrefixedOutput.new('[vm1] ', $stdout, out_mutex),
+                               wg_setup_post:  vm1_wg_post)
+      manager2 = build_manager(vm2_config,
+                               out:           PrefixedOutput.new('[vm2] ', $stdout, out_mutex),
+                               wg_setup_pre:  vm2_wg_pre)
+
+      errors = {}
+      create_opts = { replace: replace, dry_run: dry_run,
+                      install_vllm: install_vllm, install_ollama: install_ollama }
+
+      vm1_thread = Thread.new do
+        manager1.create(**create_opts)
+      rescue Error => e
+        errors[:vm1] = e.message
+        # Unblock VM2 even if VM1 failed so the process doesn't hang.
+        wg_mutex.synchronize { vm1_wg_state[:error] = e.message; wg_cv.broadcast }
+      end
+
+      vm2_thread = Thread.new do
+        manager2.create(**create_opts)
+      rescue Error => e
+        errors[:vm2] = e.message
+      end
+
+      [vm1_thread, vm2_thread].each(&:join)
+
+      errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") }
+      exit 1 unless errors.empty?
+    end
+
+    def run_delete_both(dry_run:)
+      out_mutex = Mutex.new
+      errors = {}
+
+      pair_config_loaders.each_with_index do |loader, index|
+        label = "vm#{index + 1}"
+        manager = build_manager(loader.config, out: PrefixedOutput.new("[#{label}] ", $stdout, out_mutex))
+
+        begin
+          manager.delete(dry_run: dry_run)
+        rescue Error => e
+          errors[label.to_sym] = e.message
+        end
+      end
+
+      errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") }
+      exit 1 unless errors.empty?
+    end
   end
 end
 
diff --git a/snippets/hyperstack/wg1-setup.sh b/snippets/hyperstack/wg1-setup.sh
index d057fb8..49d716a 100755
--- a/snippets/hyperstack/wg1-setup.sh
+++ b/snippets/hyperstack/wg1-setup.sh
@@ -1,56 +1,76 @@
 #!/bin/bash
 #
-# wg1-setup.sh - Set up WireGuard wg1 tunnel between earth and hyperstack VM
+# wg1-setup.sh - Set up WireGuard wg1 tunnel between earth and a hyperstack VM
 #
 # USAGE:
-#   ./wg1-setup.sh <VM_PUBLIC_IP>
-#   Example: ./wg1-setup.sh 185.216.20.163
+#   ./wg1-setup.sh <VM_PUBLIC_IP> [SERVER_WG_IP] [WG_HOSTNAME]
+#
+#   VM_PUBLIC_IP  Public IP of the hyperstack VM (required)
+#   SERVER_WG_IP  WireGuard IP to assign to this VM's tunnel interface (default: 192.168.3.1)
+#                 Use 192.168.3.3 for hyperstack2 when hyperstack1 is already set up.
+#   WG_HOSTNAME   Hostname mapped to SERVER_WG_IP in /etc/hosts (default: <vmhostname>.wg1)
+#
+# EXAMPLES:
+#   ./wg1-setup.sh 185.216.20.163                            # VM1 (hyperstack1, 192.168.3.1)
+#   ./wg1-setup.sh 185.216.20.200 192.168.3.3 hyperstack2.wg1  # VM2 added to existing tunnel
 #
 # NETWORK DESIGN:
 #   Subnet: 192.168.3.0/24 (separate from wg0's 192.168.2.0/24)
 #   Port: 56710/udp
 #
-#   +----------------+                     +------------------+
-#   | earth (client) |                     | hyperstack (VM)  |
-#   | 192.168.3.2    |<--- WireGuard --->  | 192.168.3.1      |
-#   +----------------+     tunnel          +------------------+
-#                                          | Ollama :11434    |
-#                                          +------------------+
+#   +----------------+                      +------------------+
+#   | earth (client) |                      | hyperstack1 (VM) |
+#   | 192.168.3.2    |<--- WireGuard --->   | 192.168.3.1      |
+#   +----------------+     tunnel           +------------------+
+#        |                                  | vLLM  :11434     |
+#        |                                  +------------------+
+#        |                                  +------------------+
+#        +--------- WireGuard ---------->   | hyperstack2 (VM) |
+#                                           | 192.168.3.3      |
+#                                           +------------------+
+#                                           | vLLM  :11434     |
+#                                           +------------------+
 #
 # WHAT THIS SCRIPT DOES:
-#   On hyperstack VM (via SSH):
+#
+#   For the FIRST VM (SERVER_WG_IP = 192.168.3.1, default):
+#     Generates fresh key-pairs and REPLACES /etc/wireguard/wg1.conf on earth with
+#     a single-peer config pointing to this VM.
+#
+#   For ADDITIONAL VMs (any other SERVER_WG_IP, e.g. 192.168.3.3):
+#     Generates new server-side keys and ADDS or UPDATES just the new [Peer] block
+#     in the existing /etc/wireguard/wg1.conf, preserving the [Interface] section
+#     (client key-pair) and any other peers already present.
+#     The existing client public key from wg1.conf is extracted and used in the new
+#     VM's server config so it can encrypt traffic to earth.
+#
+#   On every hyperstack VM (via SSH):
 #     - Installs WireGuard if not present
-#     - Creates /etc/wireguard/wg1.conf
-#     - Opens UFW ports: 56710/udp (WireGuard), 11434/tcp from 192.168.3.0/24 (Ollama)
-#     - Configures Ollama to listen on 0.0.0.0:11434
+#     - Creates /etc/wireguard/wg1.conf with SERVER_WG_IP as the tunnel address
+#     - Opens UFW ports: 56710/udp (WireGuard), 11434/tcp from 192.168.3.0/24
 #     - Starts wg-quick@wg1
 #
 #   On earth (locally):
 #     - Installs WireGuard if not present (dnf)
-#     - Creates /etc/wireguard/wg1.conf
-#     - Starts wg-quick@wg1
+#     - Creates or updates /etc/wireguard/wg1.conf (see above)
+#     - Adds SERVER_WG_IP <-> WG_HOSTNAME mapping to /etc/hosts
+#     - Restarts wg-quick@wg1
 #
 # PREREQUISITES:
 #   - SSH access to ubuntu@<VM_IP> with key-based auth
 #   - UDP port 56710 open in cloud provider's firewall/security group
 #
 # RE-RUNNING:
-#   When the VM IP changes, simply re-run this script with the new IP.
+#   When a VM IP changes, simply re-run this script with the new IP.
 #   It will regenerate keys and update configs on both sides.
 #
-# USING OLLAMA REMOTELY:
-#   export OLLAMA_HOST=http://192.168.3.1:11434
-#   ollama run qwen2.5-coder:14b-instruct
-#   # Or with aider:
-#   aider --model ollama/qwen2.5-coder:14b-instruct
-#
 
 set -euo pipefail
 
-# Configuration constants
+# Fixed network constants that must match hyperstack-vm*.toml [network] section.
 WG_INTERFACE="wg1"
 WG_PORT="56710"
-SERVER_WG_IP="192.168.3.1"
+DEFAULT_SERVER_WG_IP="192.168.3.1"
 CLIENT_WG_IP="192.168.3.2"
 SUBNET_MASK="24"
 SSH_USER="ubuntu"
@@ -61,22 +81,12 @@ GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 
-print_warning() {
-    echo -e "${YELLOW}$1${NC}"
-}
-
-print_success() {
-    echo -e "${GREEN}$1${NC}"
-}
-
-print_error() {
-    echo -e "${RED}$1${NC}"
-}
+print_warning() { echo -e "${YELLOW}$1${NC}"; }
+print_success() { echo -e "${GREEN}$1${NC}"; }
+print_error()   { echo -e "${RED}$1${NC}"; }
 
 # Retry wrapper for SSH/SCP commands that may fail due to transient
 # connection resets (e.g. sshd restart from unattended-upgrades).
-# Usage: retry_ssh ssh user@host "command"
-#        retry_ssh scp file user@host:/path
 retry_ssh() {
     local max_attempts=5
     local attempt=1
@@ -96,50 +106,137 @@ retry_ssh() {
     done
 }
 
+# Updates or adds a [Peer] block in the existing /etc/wireguard/wg1.conf.
+# Preserves the [Interface] section and any other peers; only the block for
+# SERVER_WG_IP (matched by AllowedIPs) is replaced.
+# Uses python3 for safe regex-based TOML-like block manipulation.
+update_peer_in_client_config() {
+    local server_ip="$1"
+    local server_pubkey="$2"
+    local vm_ip="$3"
+    local tmpfile conf_copy
+    tmpfile=$(mktemp)
+    conf_copy=$(mktemp)
+
+    # /etc/wireguard/wg1.conf is root-owned; read it via sudo into a user-readable temp copy.
+    if ! sudo cat /etc/wireguard/wg1.conf > "$conf_copy" 2>/dev/null; then
+        print_error "Cannot read /etc/wireguard/wg1.conf. Run wg1-setup.sh for VM1 (192.168.3.1) first."
+        rm -f "$tmpfile" "$conf_copy"
+        return 1
+    fi
+
+    python3 - "$server_ip" "$server_pubkey" "$vm_ip" "$WG_PORT" "$conf_copy" "$tmpfile" << 'PYEOF'
+import sys, re
+
+server_ip, server_pubkey, vm_ip, wg_port, conf_copy, tmpfile = sys.argv[1:]
+
+with open(conf_copy) as f:
+    content = f.read()
+
+if not content.strip():
+    print("ERROR: wg1.conf is empty. Run wg1-setup.sh for VM1 (192.168.3.1) first.", file=sys.stderr)
+    sys.exit(1)
+
+# Split into sections: [Interface] block + any [Peer] blocks.
+# Each section starts with a [ header; split on newline-[ boundaries.
+parts = re.split(r'(?=\n\[)', content)
+
+# Remove any existing [Peer] block whose AllowedIPs matches server_ip/32.
+kept = [p for p in parts if not (re.search(r'^\[Peer\]', p.lstrip()) and f'AllowedIPs = {server_ip}/32' in p)]
+
+new_peer = f"""
+[Peer]
+# hyperstack VM ({server_ip})
+PublicKey = {server_pubkey}
+Endpoint = {vm_ip}:{wg_port}
+AllowedIPs = {server_ip}/32
+PersistentKeepalive = 25"""
+
+result = ''.join(kept).rstrip('\n') + '\n' + new_peer + '\n'
+
+with open(tmpfile, 'w') as f:
+    f.write(result)
+print('peer-updated-ok')
+PYEOF
+
+    local rc=$?
+    rm -f "$conf_copy"
+    if [[ $rc -eq 0 ]]; then
+        sudo cp "${tmpfile}" /etc/wireguard/wg1.conf
+        sudo chmod 600 /etc/wireguard/wg1.conf
+    fi
+    rm -f "${tmpfile}"
+    return $rc
+}
+
 # Validate arguments
-if [[ $# -ne 1 ]]; then
-    echo "Usage: $0 <VM_PUBLIC_IP>"
-    echo "Example: $0 185.216.20.163"
+if [[ $# -lt 1 ]]; then
+    echo "Usage: $0 <VM_PUBLIC_IP> [SERVER_WG_IP] [WG_HOSTNAME]"
+    echo "Example (VM1): $0 185.216.20.163"
+    echo "Example (VM2): $0 185.216.20.200 192.168.3.3 hyperstack2.wg1"
     exit 1
 fi
 
 VM_IP="$1"
+SERVER_WG_IP="${2:-${DEFAULT_SERVER_WG_IP}}"
+# Default WG_HOSTNAME: replace 192.168.3. prefix with 'hyperstack' and append .wg1,
+# or fall back to server IP if the address doesn't match the expected pattern.
+WG_HOSTNAME="${3:-$(echo "$SERVER_WG_IP" | sed 's/^192\.168\.3\.\(.*\)/hyperstack\1.wg1/' || echo "${SERVER_WG_IP}.wg1")}"
+
+# Determine mode: first VM replaces the entire client config; additional VMs add a peer.
+IS_FIRST_VM=false
+[[ "$SERVER_WG_IP" == "$DEFAULT_SERVER_WG_IP" ]] && IS_FIRST_VM=true
 
 echo "=============================================="
 print_warning "IMPORTANT: Ensure UDP port ${WG_PORT} is open on the VM!"
 print_warning "This must be configured in your cloud provider's"
 print_warning "firewall/security group settings."
+if [[ "$IS_FIRST_VM" == "false" ]]; then
+    print_warning "Mode: ADD PEER — ${SERVER_WG_IP} (${WG_HOSTNAME}) will be added to existing wg1.conf."
+    print_warning "Ensure the first VM (192.168.3.1) has already been set up."
+fi
 echo "=============================================="
 echo ""
-read -p "Press Enter to continue (or Ctrl+C to abort)..."
+read -rp "Press Enter to continue (or Ctrl+C to abort)..."
 echo ""
 
 # Create temporary directory for key generation
 TMPDIR=$(mktemp -d)
-trap "rm -rf $TMPDIR" EXIT
+trap 'rm -rf $TMPDIR' EXIT
 
 echo "=== Generating WireGuard keys locally ==="
 
-# Generate server (hyperstack) keys
+# Generate server (hyperstack VM) keys — always fresh for each VM.
 wg genkey > "$TMPDIR/server-privatekey"
 wg pubkey < "$TMPDIR/server-privatekey" > "$TMPDIR/server-publickey"
 SERVER_PRIVATE_KEY=$(cat "$TMPDIR/server-privatekey")
-SERVER_PUBLIC_KEY=$(cat "$TMPDIR/server-publickey")
-
-# Generate client (earth) keys
-wg genkey > "$TMPDIR/client-privatekey"
-wg pubkey < "$TMPDIR/client-privatekey" > "$TMPDIR/client-publickey"
-CLIENT_PRIVATE_KEY=$(cat "$TMPDIR/client-privatekey")
-CLIENT_PUBLIC_KEY=$(cat "$TMPDIR/client-publickey")
-
-print_success "Keys generated successfully"
+SERVER_PUBLIC_KEY=$(cat  "$TMPDIR/server-publickey")
+
+if [[ "$IS_FIRST_VM" == "true" ]]; then
+    # First VM: generate fresh client keys; the entire wg1.conf will be replaced.
+    wg genkey > "$TMPDIR/client-privatekey"
+    wg pubkey < "$TMPDIR/client-privatekey" > "$TMPDIR/client-publickey"
+    CLIENT_PRIVATE_KEY=$(cat "$TMPDIR/client-privatekey")
+    CLIENT_PUBLIC_KEY=$(cat  "$TMPDIR/client-publickey")
+    print_success "Keys generated (first VM — full config will be replaced)"
+else
+    # Additional VM: reuse the existing client keys from /etc/wireguard/wg1.conf so that
+    # the first VM's server config (which already stores the client public key) keeps working.
+    CLIENT_PRIVATE_KEY=$(sudo cat /etc/wireguard/wg1.conf | grep -m1 'PrivateKey' | awk '{print $3}')
+    if [[ -z "$CLIENT_PRIVATE_KEY" ]]; then
+        print_error "Cannot extract client private key from /etc/wireguard/wg1.conf."
+        print_error "Run this script for VM1 (192.168.3.1) first."
+        exit 1
+    fi
+    CLIENT_PUBLIC_KEY=$(echo "$CLIENT_PRIVATE_KEY" | wg pubkey)
+    print_success "Keys generated (additional VM — client keys reused from existing wg1.conf)"
+fi
 
 echo ""
-echo "=== Creating server (hyperstack) configuration ==="
+echo "=== Creating server (hyperstack VM ${SERVER_WG_IP}) configuration ==="
 
-# Create server wg1.conf
 cat > "$TMPDIR/server-wg1.conf" << EOF
-# WireGuard wg1 configuration for hyperstack VM
+# WireGuard wg1 configuration for hyperstack VM (${SERVER_WG_IP})
 # Server side of earth <-> hyperstack tunnel
 # Generated by wg1-setup.sh on $(date)
 
@@ -154,13 +251,13 @@ PublicKey = ${CLIENT_PUBLIC_KEY}
 AllowedIPs = ${CLIENT_WG_IP}/32
 EOF
 
-print_success "Server config created"
+print_success "Server config created (server IP: ${SERVER_WG_IP})"
 
-echo ""
-echo "=== Creating client (earth) configuration ==="
+if [[ "$IS_FIRST_VM" == "true" ]]; then
+    echo ""
+    echo "=== Creating client (earth) configuration ==="
 
-# Create client wg1.conf
-cat > "$TMPDIR/client-wg1.conf" << EOF
+    cat > "$TMPDIR/client-wg1.conf" << EOF
 # WireGuard wg1 configuration for earth
 # Client side of earth <-> hyperstack tunnel
 # Generated by wg1-setup.sh on $(date)
@@ -170,49 +267,43 @@ Address = ${CLIENT_WG_IP}/${SUBNET_MASK}
 PrivateKey = ${CLIENT_PRIVATE_KEY}
 
 [Peer]
-# hyperstack VM (server)
+# hyperstack VM (${SERVER_WG_IP})
 PublicKey = ${SERVER_PUBLIC_KEY}
 Endpoint = ${VM_IP}:${WG_PORT}
 AllowedIPs = ${SERVER_WG_IP}/32
 PersistentKeepalive = 25
 EOF
 
-print_success "Client config created"
+    print_success "Client config created"
+fi
 
 echo ""
-echo "=== Setting up hyperstack VM (${VM_IP}) ==="
+echo "=== Setting up hyperstack VM (${VM_IP}, tunnel IP ${SERVER_WG_IP}) ==="
 
-# Wait for SSH to become available (handles transient connection resets
-# from sshd restarts due to unattended-upgrades or package installs)
 echo "Testing SSH connection..."
 retry_ssh ssh -o ConnectTimeout=10 -o BatchMode=yes "${SSH_USER}@${VM_IP}" "echo 'SSH OK'"
 print_success "SSH connection OK"
 
-# Install WireGuard on server if not present
 echo "Installing WireGuard on hyperstack..."
 retry_ssh ssh "${SSH_USER}@${VM_IP}" "which wg >/dev/null 2>&1 || (sudo apt update && sudo apt install -y wireguard)"
 print_success "WireGuard installed"
 
-# Copy server config to hyperstack
 echo "Copying wg1.conf to hyperstack..."
 retry_ssh scp "$TMPDIR/server-wg1.conf" "${SSH_USER}@${VM_IP}:/tmp/wg1.conf"
 retry_ssh ssh "${SSH_USER}@${VM_IP}" "sudo mv /tmp/wg1.conf /etc/wireguard/wg1.conf && sudo chmod 600 /etc/wireguard/wg1.conf"
 print_success "Server config installed"
 
-# Configure firewall on hyperstack
 echo "Configuring firewall (ufw) on hyperstack..."
 retry_ssh ssh "${SSH_USER}@${VM_IP}" bash -s << 'REMOTE_SCRIPT'
 sudo ufw allow ssh comment 'Allow SSH' 2>/dev/null || true
 sudo ufw --force enable >/dev/null 2>&1 || true
 sudo ufw allow 56710/udp comment 'WireGuard wg1' 2>/dev/null || true
-sudo ufw allow from 192.168.3.0/24 to any port 11434 proto tcp comment 'Ollama via wg1' 2>/dev/null || true
+sudo ufw allow from 192.168.3.0/24 to any port 11434 proto tcp comment 'Ollama/vLLM via wg1' 2>/dev/null || true
 echo "Firewall rules added"
 REMOTE_SCRIPT
 print_success "Firewall configured"
 
-# Ensure Ollama listens on all interfaces (only if override not already set
-# by ollama_setup_script, which also configures OLLAMA_MODELS and other env vars)
-echo "Configuring Ollama to listen on 0.0.0.0..."
+echo "Configuring Ollama to listen on 0.0.0.0 (if installed)..."
 retry_ssh ssh "${SSH_USER}@${VM_IP}" bash -s << 'REMOTE_SCRIPT'
 if [ -f /etc/systemd/system/ollama.service.d/override.conf ] && \
    grep -q 'OLLAMA_HOST' /etc/systemd/system/ollama.service.d/override.conf; then
@@ -224,12 +315,11 @@ else
 Environment="OLLAMA_HOST=0.0.0.0:11434"
 OVERRIDE
   sudo systemctl daemon-reload
-  sudo systemctl restart ollama 2>/dev/null || echo "Note: Ollama service not running or not installed"
+  sudo systemctl restart ollama 2>/dev/null || echo "Note: Ollama not running or not installed"
 fi
 REMOTE_SCRIPT
 print_success "Ollama configured"
 
-# Start wg1 on hyperstack
 echo "Starting wg1 on hyperstack..."
 retry_ssh ssh "${SSH_USER}@${VM_IP}" "sudo systemctl start wg-quick@wg1 2>/dev/null || sudo wg-quick up wg1"
 print_success "wg1 started on hyperstack"
@@ -237,35 +327,43 @@ print_success "wg1 started on hyperstack"
 echo ""
 echo "=== Setting up earth (local) ==="
 
-# Check if WireGuard is installed locally
 if ! which wg >/dev/null 2>&1; then
     echo "Installing WireGuard locally..."
     sudo dnf install -y wireguard-tools
 fi
 print_success "WireGuard installed locally"
 
-# Install client config locally
-echo "Installing wg1.conf locally..."
-sudo cp "$TMPDIR/client-wg1.conf" /etc/wireguard/wg1.conf
-sudo chmod 600 /etc/wireguard/wg1.conf
-print_success "Client config installed"
+if [[ "$IS_FIRST_VM" == "true" ]]; then
+    echo "Installing fresh wg1.conf locally (first VM — replaces any existing config)..."
+    sudo cp "$TMPDIR/client-wg1.conf" /etc/wireguard/wg1.conf
+    sudo chmod 600 /etc/wireguard/wg1.conf
+    print_success "Client config installed"
+else
+    echo "Adding peer ${SERVER_WG_IP} to existing wg1.conf (additional VM)..."
+    update_peer_in_client_config "$SERVER_WG_IP" "$SERVER_PUBLIC_KEY" "$VM_IP"
+    print_success "Peer added to client config"
+fi
+
+# Update /etc/hosts so that WG_HOSTNAME resolves to the VM's WireGuard IP.
+# hyperstack.rb uses this hostname in test URLs and informational output.
+echo "Updating /etc/hosts: ${SERVER_WG_IP} ${WG_HOSTNAME}..."
+sudo sed -i "/ ${WG_HOSTNAME}$/d" /etc/hosts   # Remove stale entry if present
+echo "${SERVER_WG_IP} ${WG_HOSTNAME}" | sudo tee -a /etc/hosts > /dev/null
+print_success "/etc/hosts updated"
 
-# Stop existing wg1 if running, then start fresh
-echo "Starting wg1 locally..."
-sudo systemctl stop wg-quick@wg1 2>/dev/null || true
+echo "Restarting wg1 locally..."
+sudo systemctl stop  wg-quick@wg1 2>/dev/null || true
 sudo systemctl start wg-quick@wg1
-print_success "wg1 started locally"
+print_success "wg1 restarted locally"
 
 echo ""
 echo "=============================================="
 print_success "Setup complete!"
 echo "=============================================="
 echo ""
-echo "WireGuard wg1 tunnel is now active."
-echo ""
-echo "Tunnel IPs:"
-echo "  hyperstack (server): ${SERVER_WG_IP}"
-echo "  earth (client):      ${CLIENT_WG_IP}"
+echo "WireGuard wg1 tunnel peer active:"
+echo "  hyperstack VM (server): ${SERVER_WG_IP} (${WG_HOSTNAME})"
+echo "  earth (client):         ${CLIENT_WG_IP}"
 echo ""
 echo "=== Verification commands ==="
 echo ""
@@ -278,8 +376,8 @@ echo ""
 echo "# Verify default route is UNCHANGED:"
 echo "ip route | grep default"
 echo ""
-echo "# Test Ollama access:"
-echo "curl http://${SERVER_WG_IP}:11434/api/tags"
+echo "# Test vLLM access:"
+echo "curl http://${WG_HOSTNAME}:11434/v1/models"
 echo ""
 echo "=== Manual start/stop commands ==="
 echo ""
@@ -291,8 +389,3 @@ echo "sudo systemctl start wg-quick@wg1"
 echo ""
 echo "# Restart on hyperstack (if VM rebooted):"
 echo "ssh ${SSH_USER}@${VM_IP} 'sudo systemctl start wg-quick@wg1'"
-echo ""
-echo "=== Use Ollama remotely ==="
-echo ""
-echo "export OLLAMA_HOST=http://${SERVER_WG_IP}:11434"
-echo "curl http://${SERVER_WG_IP}:11434/v1/models"