summaryrefslogtreecommitdiff
path: root/snippets/hyperstack
diff options
context:
space:
mode:
Diffstat (limited to 'snippets/hyperstack')
-rw-r--r--snippets/hyperstack/hyperstack-vm1.toml183
-rw-r--r--snippets/hyperstack/hyperstack-vm2.toml180
-rwxr-xr-xsnippets/hyperstack/hyperstack.rb429
-rwxr-xr-xsnippets/hyperstack/wg1-setup.sh285
4 files changed, 914 insertions, 163 deletions
diff --git a/snippets/hyperstack/hyperstack-vm1.toml b/snippets/hyperstack/hyperstack-vm1.toml
new file mode 100644
index 0000000..c5c940a
--- /dev/null
+++ b/snippets/hyperstack/hyperstack-vm1.toml
@@ -0,0 +1,183 @@
+[auth]
+api_key_file = "~/.hyperstack"
+
+[hyperstack]
+base_url = "https://infrahub-api.nexgencloud.com/v1"
+
+[state]
+# Separate state file for VM1 so vm1 and vm2 can be managed independently.
+file = ".hyperstack-vm1-state.json"
+
+[vm]
+name_prefix = "hyperstack1"
+hostname = "hyperstack1"
+environment_name = "snonux-ollama"
+
+# A100-80GB is the cost-first default for nemotron-3-super inference.
+# Switch this to n3-H100x1 if you want safer throughput and compatibility headroom.
+flavor_name = "n3-A100x1"
+image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker"
+assign_floating_ip = true
+create_bootable_volume = false
+enable_port_randomization = false
+labels = ["nemotron-3-super", "wireguard"]
+
+[ssh]
+username = "ubuntu"
+private_key_path = "~/.ssh/id_rsa"
+hyperstack_key_name = "earth"
+port = 22
+connect_timeout_sec = 10
+
+[network]
+wireguard_udp_port = 56710
+wireguard_subnet = "192.168.3.0/24"
+# VM1 gets the first server-side WireGuard IP (gateway address + 0).
+# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3.
+wireguard_server_ip = "192.168.3.1"
+# Port 11434 is shared by both Ollama and vLLM for firewall compatibility.
+ollama_port = 11434
+# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM).
+litellm_port = 4000
+allowed_ssh_cidrs = ["0.0.0.0/0"]
+allowed_wireguard_cidrs = ["0.0.0.0/0"]
+
+[bootstrap]
+enable_guest_bootstrap = true
+install_wireguard = true
+configure_ufw = true
+configure_ollama_host = false
+
+[ollama]
+# Disabled in favour of vLLM; set install = true to switch back to Ollama.
+install = false
+models_dir = "/ephemeral/ollama/models"
+listen_host = "0.0.0.0:11434"
+gpu_overhead_mb = 2000
+num_parallel = 1
+context_length = 32768
+pull_models = ["nemotron-3-super"]
+
+# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI.
+# VM1 defaults to nemotron-3-super; use 'model switch' to load any other preset.
+[vllm]
+install = true
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers).
+hug_cache_dir = "/ephemeral/hug"
+container_name = "vllm_nemotron_super"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+# NVIDIA Nemotron-3-Super uses the same XML tool call format as Qwen3 XML.
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here.
+litellm_master_key = "sk-litellm-master"
+litellm_claude_model_names = [
+ "claude-sonnet-4-20250514",
+ "claude-opus-4-20250514",
+ "claude-opus-4-6-20260604",
+ "claude-haiku-3-5-20241022"
+]
+
+# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm1.toml model switch <name>'.
+# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.
+
+[vllm.presets.qwen3-coder-next]
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total).
+# ~60 GB weights on A100 80GB. Uses NoPE so context can be set to 1M; no YaRN needed.
+# Requires trust_remote_code=true for the nemotron_h architecture.
+[vllm.presets.nemotron-super]
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+container_name = "vllm_nemotron_super"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+
+# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
+[vllm.presets.gpt-oss-20b]
+model = "openai/gpt-oss-20b"
+container_name = "vllm_gpt_oss_20b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+[vllm.presets.gpt-oss-120b]
+model = "openai/gpt-oss-120b"
+container_name = "vllm_gpt_oss_120b"
+max_model_len = 131072
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
+[vllm.presets.qwen25-coder-32b]
+model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+container_name = "vllm_qwen25_coder32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "hermes"
+
+# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB.
+[vllm.presets.qwen3-coder-30b]
+model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ"
+container_name = "vllm_qwen3_coder30b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100.
+[vllm.presets.deepseek-r1-32b]
+model = "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+container_name = "vllm_deepseek_r1_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100.
+[vllm.presets.qwen3-32b]
+model = "Qwen/Qwen3-32B-AWQ"
+container_name = "vllm_qwen3_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100).
+[vllm.presets.devstral]
+model = "cyankiwi/Devstral-Small-2507-AWQ-4bit"
+container_name = "vllm_devstral"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "mistral"
+extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"]
+
+[wireguard]
+auto_setup = true
+setup_script = "./wg1-setup.sh"
+
+[local_client]
+check_wg1_service = true
+interface_name = "wg1"
+config_path = "/etc/wireguard/wg1.conf"
diff --git a/snippets/hyperstack/hyperstack-vm2.toml b/snippets/hyperstack/hyperstack-vm2.toml
new file mode 100644
index 0000000..6cc6503
--- /dev/null
+++ b/snippets/hyperstack/hyperstack-vm2.toml
@@ -0,0 +1,180 @@
+[auth]
+api_key_file = "~/.hyperstack"
+
+[hyperstack]
+base_url = "https://infrahub-api.nexgencloud.com/v1"
+
+[state]
+# Separate state file for VM2 so vm1 and vm2 can be managed independently.
+file = ".hyperstack-vm2-state.json"
+
+[vm]
+name_prefix = "hyperstack2"
+hostname = "hyperstack2"
+environment_name = "snonux-ollama"
+
+# A100-80GB is the cost-first default for qwen3-coder-next inference.
+# Switch this to n3-H100x1 if you want safer throughput and compatibility headroom.
+flavor_name = "n3-A100x1"
+image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker"
+assign_floating_ip = true
+create_bootable_volume = false
+enable_port_randomization = false
+labels = ["qwen3-coder-next", "wireguard"]
+
+[ssh]
+username = "ubuntu"
+private_key_path = "~/.ssh/id_rsa"
+hyperstack_key_name = "earth"
+port = 22
+connect_timeout_sec = 10
+
+[network]
+wireguard_udp_port = 56710
+wireguard_subnet = "192.168.3.0/24"
+# VM2 gets the third server-side WireGuard IP (skipping .2 which is the earth client).
+# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3.
+wireguard_server_ip = "192.168.3.3"
+# Port 11434 is shared by both Ollama and vLLM for firewall compatibility.
+ollama_port = 11434
+# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM).
+litellm_port = 4000
+allowed_ssh_cidrs = ["0.0.0.0/0"]
+allowed_wireguard_cidrs = ["0.0.0.0/0"]
+
+[bootstrap]
+enable_guest_bootstrap = true
+install_wireguard = true
+configure_ufw = true
+configure_ollama_host = false
+
+[ollama]
+# Disabled in favour of vLLM; set install = true to switch back to Ollama.
+install = false
+models_dir = "/ephemeral/ollama/models"
+listen_host = "0.0.0.0:11434"
+gpu_overhead_mb = 2000
+num_parallel = 1
+context_length = 32768
+pull_models = ["qwen3-coder-next"]
+
+# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI.
+# VM2 defaults to qwen3-coder-next; use 'model switch' to load any other preset.
+[vllm]
+install = true
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers).
+hug_cache_dir = "/ephemeral/hug"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here.
+litellm_master_key = "sk-litellm-master"
+litellm_claude_model_names = [
+ "claude-sonnet-4-20250514",
+ "claude-opus-4-20250514",
+ "claude-opus-4-6-20260604",
+ "claude-haiku-3-5-20241022"
+]
+
+# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm2.toml model switch <name>'.
+# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults.
+
+[vllm.presets.qwen3-coder-next]
+model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit"
+container_name = "vllm_qwen3"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total).
+# ~60 GB weights on A100 80GB. Uses NoPE so context can be set to 1M; no YaRN needed.
+# Requires trust_remote_code=true for the nemotron_h architecture.
+[vllm.presets.nemotron-super]
+model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
+container_name = "vllm_nemotron_super"
+max_model_len = 262144
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_xml"
+trust_remote_code = true
+extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
+
+# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
+[vllm.presets.gpt-oss-20b]
+model = "openai/gpt-oss-20b"
+container_name = "vllm_gpt_oss_20b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+[vllm.presets.gpt-oss-120b]
+model = "openai/gpt-oss-120b"
+container_name = "vllm_gpt_oss_120b"
+max_model_len = 131072
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+
+# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
+[vllm.presets.qwen25-coder-32b]
+model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+container_name = "vllm_qwen25_coder32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "hermes"
+
+# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB.
+[vllm.presets.qwen3-coder-30b]
+model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ"
+container_name = "vllm_qwen3_coder30b"
+max_model_len = 65536
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "qwen3_coder"
+
+# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100.
+[vllm.presets.deepseek-r1-32b]
+model = "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+container_name = "vllm_deepseek_r1_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100.
+[vllm.presets.qwen3-32b]
+model = "Qwen/Qwen3-32B-AWQ"
+container_name = "vllm_qwen3_32b"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "deepseek_r1"]
+
+# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100).
+[vllm.presets.devstral]
+model = "cyankiwi/Devstral-Small-2507-AWQ-4bit"
+container_name = "vllm_devstral"
+max_model_len = 32768
+gpu_memory_utilization = 0.92
+tensor_parallel_size = 1
+tool_call_parser = "mistral"
+extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"]
+
+[wireguard]
+auto_setup = true
+setup_script = "./wg1-setup.sh"
+
+[local_client]
+check_wg1_service = true
+interface_name = "wg1"
+config_path = "/etc/wireguard/wg1.conf"
diff --git a/snippets/hyperstack/hyperstack.rb b/snippets/hyperstack/hyperstack.rb
index 69bb6f6..526d7aa 100755
--- a/snippets/hyperstack/hyperstack.rb
+++ b/snippets/hyperstack/hyperstack.rb
@@ -82,6 +82,9 @@ module HyperstackVM
'network' => {
'wireguard_udp_port' => 56_710,
'wireguard_subnet' => '192.168.3.0/24',
+ # Optional: explicit server-side WireGuard IP. When nil, derived as subnet + 1 (i.e. .1).
+ # Set to a different address (e.g. 192.168.3.3) for a second VM sharing the same wg1 tunnel.
+ 'wireguard_server_ip' => nil,
'ollama_port' => 11_434,
'litellm_port' => 4_000,
'allowed_ssh_cidrs' => ['0.0.0.0/0'],
@@ -155,6 +158,20 @@ module HyperstackVM
rescue IPAddr::InvalidAddressError => e
raise Error, "Invalid CIDR #{cidr.inspect}: #{e.message}"
end
+
+ server_ip = fetch('network', 'wireguard_server_ip')
+ if server_ip
+ # Validate that the explicit server WireGuard IP is within the configured subnet.
+ begin
+ subnet = IPAddr.new(fetch('network', 'wireguard_subnet'))
+ unless subnet.include?(IPAddr.new(server_ip))
+ raise Error,
+ "wireguard_server_ip #{server_ip.inspect} is not in wireguard_subnet #{fetch('network', 'wireguard_subnet')}"
+ end
+ rescue IPAddr::InvalidAddressError => e
+ raise Error, "Invalid wireguard_server_ip #{server_ip.inspect}: #{e.message}"
+ end
+ end
end
def fetch(section, key)
@@ -305,15 +322,25 @@ module HyperstackVM
Integer(fetch('network', 'litellm_port'))
end
+ # Returns the server-side WireGuard IP for this VM.
+ # Uses the explicitly configured address when set; otherwise derives it as subnet_base + 1.
+ # Example: 192.168.3.0/24 → 192.168.3.1 (default VM1); VM2 sets wireguard_server_ip=192.168.3.3.
def wireguard_gateway_ip
+ configured = fetch('network', 'wireguard_server_ip')
+ return configured.to_s if configured && !configured.to_s.strip.empty?
+
+ # Fall back to first usable address in the subnet.
base = IPAddr.new(wireguard_subnet).to_s
parts = base.split('.').map(&:to_i)
parts[-1] += 1
parts.join('.')
end
+ # Returns the WireGuard hostname for this VM: e.g. hyperstack1.wg1 or hyperstack2.wg1.
+ # Used as the DNS name to reach the VM over the tunnel (must be in /etc/hosts on the client).
def wireguard_gateway_hostname
- "hyperstack.#{local_interface_name}"
+ host = vm_hostname || 'hyperstack'
+ "#{host}.#{local_interface_name}"
end
def allowed_ssh_cidrs
@@ -414,6 +441,17 @@ module HyperstackVM
fetch('vllm', 'litellm_master_key')
end
+ # Whether to pass --trust-remote-code to vLLM for the default model.
+ # Required for architectures not yet in the vLLM upstream registry (e.g. nemotron_h).
+ def vllm_trust_remote_code
+ truthy?(fetch('vllm', 'trust_remote_code'))
+ end
+
+ # Extra vLLM CLI flags for the default model (e.g. reasoning-parser args).
+ def vllm_extra_args
+ Array(fetch('vllm', 'extra_vllm_args')).map(&:to_s)
+ end
+
def vllm_presets
Hash(dig('vllm', 'presets')).transform_keys(&:to_s)
end
@@ -695,10 +733,12 @@ module HyperstackVM
end
def status
+ endpoints = configured_endpoints
{
'service_state' => service_state,
'config_path' => @config_path,
- 'endpoint' => configured_endpoint,
+ 'endpoint' => endpoints.last,
+ 'endpoints' => endpoints,
'config_readable' => !config_contents.nil?
}
end
@@ -715,51 +755,90 @@ module HyperstackVM
end
def configured_endpoint
- content = config_contents
- return nil if content.nil?
-
- parse_wireguard_config(content)['Endpoint']
+ configured_endpoints.last
end
- def config_contents
- return @config_contents if defined?(@config_contents)
+ def configured_endpoints
+ content = config_contents
+ return [] if content.nil?
- @config_contents = File.read(@config_path)
- rescue Errno::EACCES, Errno::ENOENT
- stdout, _stderr, status = Open3.capture3('sudo', '-n', 'cat', @config_path)
- @config_contents = status.success? ? stdout : nil
+ parse_wireguard_peers(content).filter_map { |peer| peer['Endpoint'] }.uniq
end
- def parse_wireguard_config(content)
+ def parse_wireguard_peers(content)
current_section = nil
- peer = {}
+ current_peer = nil
+ peers = []
content.each_line do |line|
stripped = line.strip
next if stripped.empty? || stripped.start_with?('#')
if stripped.start_with?('[') && stripped.end_with?(']')
+ peers << current_peer if current_section == 'Peer' && current_peer && !current_peer.empty?
current_section = stripped[1..-2]
+ current_peer = current_section == 'Peer' ? {} : nil
next
end
key, value = stripped.split('=', 2).map { |part| part&.strip }
next unless current_section == 'Peer' && key && value
- peer[key] = value
+ current_peer[key] = value
end
- peer
+ peers << current_peer if current_section == 'Peer' && current_peer && !current_peer.empty?
+ peers
+ end
+
+ def config_contents
+ return @config_contents if defined?(@config_contents)
+
+ @config_contents = File.read(@config_path)
+ rescue Errno::EACCES, Errno::ENOENT
+ stdout, _stderr, status = Open3.capture3('sudo', '-n', 'cat', @config_path)
+ @config_contents = status.success? ? stdout : nil
+ end
+ end
+
+ # Thread-safe output wrapper that prepends a fixed prefix to each line.
+ # Used by create-both so interleaved output from VM1 and VM2 threads is distinguishable.
+ # #print buffers partial lines until a newline is received, then flushes with the prefix.
+ class PrefixedOutput
+ def initialize(prefix, delegate, mutex)
+ @prefix = prefix
+ @delegate = delegate
+ @mutex = mutex
+ @buffer = +''
+ end
+
+ def puts(msg = '')
+ @mutex.synchronize { @delegate.puts("#{@prefix}#{msg}") }
+ end
+
+ def print(msg)
+ @buffer << msg.to_s
+ while (idx = @buffer.index("\n"))
+ line = @buffer.slice!(0, idx + 1)
+ @mutex.synchronize { @delegate.print("#{@prefix}#{line}") }
+ end
end
end
class Manager
- def initialize(config:, client:, state_store:, local_wireguard:, out: $stdout)
+ # wg_setup_pre: optional Proc called just before this VM's WireGuard setup step runs.
+ # Used by create-both to block VM2 until VM1 has written the base wg1.conf.
+ # wg_setup_post: optional Proc called after the WireGuard step completes (or is skipped).
+ # Used by create-both to signal that VM1's base config is ready for VM2.
+ def initialize(config:, client:, state_store:, local_wireguard:, out: $stdout,
+ wg_setup_pre: nil, wg_setup_post: nil)
@config = config
@client = client
@state_store = state_store
@local_wireguard = local_wireguard
@out = out
+ @wg_setup_pre = wg_setup_pre
+ @wg_setup_post = wg_setup_post
end
def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil)
@@ -847,7 +926,7 @@ module HyperstackVM
raise
end
- def status
+ def status(include_local_wireguard: true)
state = @state_store.load
if state.nil?
info "No tracked VM state file at #{@state_store.path}."
@@ -868,7 +947,12 @@ module HyperstackVM
end
end
- print_local_wireguard_summary(state&.dig('public_ip'))
+ print_local_wireguard_summary(state&.dig('public_ip')) if include_local_wireguard
+ state&.dig('public_ip')
+ end
+
+ def show_local_wireguard(expected_ips = nil)
+ print_local_wireguard_summary(expected_ips)
end
# Lists configured model presets and marks the one currently running on the VM.
@@ -1004,11 +1088,18 @@ module HyperstackVM
@state_store.save(state)
end
+ # Call pre-hook before deciding whether WireGuard setup is needed; this allows a concurrent
+ # sibling VM (e.g. VM2 in create-both) to block here until the primary VM (VM1) has
+ # already written the base wg1.conf, which VM2's setup will then extend with its own peer.
+ @wg_setup_pre&.call
if wireguard_setup_needed?(state)
run_wireguard_setup(state['public_ip'])
state['wireguard_setup_at'] = Time.now.utc.iso8601
@state_store.save(state)
end
+ # Always signal post-hook so that a waiting sibling VM is unblocked even when
+ # WireGuard setup was not needed (e.g. already done on a resume).
+ @wg_setup_post&.call
# Pull and verify Ollama models after the tunnel is established.
if ollama_setup_needed?(state)
@@ -1209,7 +1300,7 @@ module HyperstackVM
return true if public_ip.empty?
expected_endpoint = "#{public_ip}:#{@config.wireguard_udp_port}"
- @local_wireguard.status['endpoint'] != expected_endpoint
+ !Array(@local_wireguard.status['endpoints']).include?(expected_endpoint)
end
def run_wireguard_setup(host)
@@ -1232,7 +1323,12 @@ module HyperstackVM
end
def run_wireguard_script(host)
- Open3.popen2e('bash', @config.wireguard_setup_script, host) do |stdin, output, wait_thr|
+ # Pass server WireGuard IP and WireGuard hostname as positional args so that
+ # wg1-setup.sh can configure the correct server-side tunnel address and update
+ # /etc/hosts on the client. The Enter keystroke via stdin bypasses the interactive prompt.
+ server_ip = @config.wireguard_gateway_ip
+ wg_hostname = @config.wireguard_gateway_hostname
+ Open3.popen2e('bash', @config.wireguard_setup_script, host, server_ip, wg_hostname) do |stdin, output, wait_thr|
stdin.sync = true
stdin.puts
stdin.close
@@ -1272,6 +1368,17 @@ module HyperstackVM
mismatches << "network.wireguard_subnet must be '192.168.3.0/24'"
end
+ # Validate that the resolved server IP is actually within the configured subnet.
+ begin
+ subnet = IPAddr.new(@config.wireguard_subnet)
+ server_ip = IPAddr.new(@config.wireguard_gateway_ip)
+ unless subnet.include?(server_ip)
+ mismatches << "wireguard_server_ip #{@config.wireguard_gateway_ip.inspect} is outside #{@config.wireguard_subnet}"
+ end
+ rescue IPAddr::InvalidAddressError => e
+ mismatches << "Invalid wireguard_server_ip: #{e.message}"
+ end
+
return if mismatches.empty?
raise Error, "Configured WireGuard settings do not match #{script_path}: #{mismatches.join('; ')}"
@@ -1659,7 +1766,10 @@ module HyperstackVM
# parser is nil only when preset explicitly omits the key and config has no default;
# empty string means "disable tool calling" (e.g. gpt-oss reasoning models).
parser = @config.vllm_tool_call_parser if parser.nil?
- trust_remote = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : false
+ # Fall back to the top-level [vllm] config values when no preset is in use.
+ # This allows setting trust_remote_code / extra_vllm_args in the default [vllm] block
+ # (e.g. for nemotron on VM1) without requiring a --model preset flag at create time.
+ trust_remote = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : @config.vllm_trust_remote_code
port = @config.ollama_port # vLLM reuses the Ollama port for firewall compat
docker_args = [
@@ -1688,7 +1798,9 @@ module HyperstackVM
end
docker_args << '--trust-remote-code' if trust_remote
# Append any extra flags verbatim (e.g. Mistral loader flags, reasoning parser).
- (cfg['extra_vllm_args'] || []).each { |arg| docker_args << arg }
+ # Preset extra_vllm_args take precedence; fall back to top-level [vllm].extra_vllm_args.
+ extra_args = cfg.key?('extra_vllm_args') ? Array(cfg['extra_vllm_args']) : @config.vllm_extra_args
+ extra_args.each { |arg| docker_args << arg }
docker_run = docker_args.join(' ')
script = []
@@ -1902,24 +2014,42 @@ module HyperstackVM
value.nil? ? nil : Integer(value)
end
- def print_local_wireguard_summary(expected_ip)
+ def print_local_wireguard_summary(expected_ips)
return unless @config.local_client_checks_enabled?
wg_status = @local_wireguard.status
- endpoint = wg_status['endpoint']
+ endpoints = Array(wg_status['endpoints']).compact.uniq
info "Local WireGuard #{@config.local_interface_name}: #{wg_status['service_state']}"
- if endpoint
- info "Local WireGuard endpoint: #{endpoint}"
- if expected_ip
- host, = endpoint.split(':', 2)
- if host == expected_ip
- info 'Local WireGuard endpoint matches the managed VM IP.'
- else
- warn "Local WireGuard endpoint points to #{host}, expected #{expected_ip}."
- end
+ if endpoints.empty?
+ warn "Unable to read #{@config.local_wg_config_path} for local WireGuard endpoint validation."
+ return
+ end
+
+ label = endpoints.one? ? 'endpoint' : 'endpoints'
+ info "Local WireGuard #{label}: #{endpoints.join(', ')}"
+
+ expected = Array(expected_ips).compact.map(&:to_s).map(&:strip).reject(&:empty?).uniq
+ return if expected.empty?
+
+ expected_endpoints = expected.map { |ip| "#{ip}:#{@config.wireguard_udp_port}" }
+ missing = expected_endpoints.reject { |endpoint| endpoints.include?(endpoint) }
+
+ if expected_endpoints.one?
+ if missing.empty?
+ info 'Local WireGuard endpoint matches the managed VM IP.'
+ else
+ hosts = endpoints.map { |endpoint| endpoint.split(':', 2).first }.uniq
+ warn "Local WireGuard endpoints point to #{hosts.join(', ')}, expected #{expected.first}."
end
+ return
+ end
+
+ if missing.empty?
+ info 'Local WireGuard has peers for all managed VM IPs.'
else
- warn "Unable to read #{@config.local_wg_config_path} for local WireGuard endpoint validation."
+ present = expected_endpoints - missing
+ info "Local WireGuard has peers for: #{present.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}" unless present.empty?
+ warn "Local WireGuard missing peers for: #{missing.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}."
end
end
@@ -1936,6 +2066,7 @@ module HyperstackVM
def initialize(argv)
@argv = argv.dup
@config_path = File.join(__dir__, 'hyperstack-vm.toml')
+ @config_explicit = false
end
def show_help
@@ -1943,7 +2074,13 @@ module HyperstackVM
puts
puts 'Commands:'
puts ' create [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama] [--model PRESET]'
+ puts ' create-both [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama]'
+ puts ' Provision hyperstack-vm1.toml and hyperstack-vm2.toml concurrently.'
+ puts ' WireGuard setup is serialized: VM1 writes the base wg1.conf first,'
+ puts ' then VM2 adds its peer. Requires both TOML files next to the script.'
puts ' delete [--vm-id ID] [--dry-run]'
+ puts ' delete-both [--dry-run]'
+ puts ' Delete the VMs tracked by hyperstack-vm1.toml and hyperstack-vm2.toml.'
puts ' status'
puts ' test'
puts ' model list'
@@ -1955,6 +2092,7 @@ module HyperstackVM
opts.banner = 'Usage: ruby hyperstack.rb [--config path] <create|delete|status> [options]'
opts.on('--config PATH', "Path to TOML config (default: #{@config_path})") do |value|
@config_path = value
+ @config_explicit = true
end
opts.on('-h', '--help', 'Show help') do
show_help
@@ -1969,39 +2107,33 @@ module HyperstackVM
exit 0
end
+ # create-both loads its own config files and does not use the default config path.
+ # Parse it before building the manager so we avoid loading the default config needlessly.
+ if command == 'create-both'
+ opts = parse_create_options(@argv, include_model_preset: false)
+ run_create_both(**opts)
+ return
+ end
+
+ if command == 'delete-both'
+ opts = parse_delete_both_options(@argv)
+ run_delete_both(**opts)
+ return
+ end
+
+ if command == 'status'
+ run_status
+ return
+ end
+
+ # All other commands operate on a single VM defined by the --config path.
config_loader = ConfigLoader.load(@config_path)
- state_store = StateStore.new(config_loader.config.state_file)
- client = HyperstackClient.new(base_url: config_loader.config.api_base_url, api_key: config_loader.config.api_key)
- local_wireguard = LocalWireGuard.new(
- interface_name: config_loader.config.local_interface_name,
- config_path: config_loader.config.local_wg_config_path
- )
- manager = Manager.new(
- config: config_loader.config,
- client: client,
- state_store: state_store,
- local_wireguard: local_wireguard
- )
+ manager = build_manager(config_loader.config)
case command
when 'create'
- replace = false
- dry_run = false
- install_vllm = nil
- install_ollama = nil
- vllm_preset = nil
- parser = OptionParser.new do |opts|
- opts.on('--replace', 'Delete the tracked VM before creating a new one') { replace = true }
- opts.on('--dry-run', 'Resolve config and print the create plan without creating a VM') { dry_run = true }
- opts.on('--vllm', 'Enable vLLM+LiteLLM setup (overrides config)') { install_vllm = true }
- opts.on('--no-vllm', 'Disable vLLM+LiteLLM setup (overrides config)') { install_vllm = false }
- opts.on('--ollama', 'Enable Ollama setup (overrides config)') { install_ollama = true }
- opts.on('--no-ollama', 'Disable Ollama setup (overrides config)') { install_ollama = false }
- opts.on('--model PRESET', 'Use a named vLLM model preset at create time') { |v| vllm_preset = v }
- end
- parser.parse!(@argv)
- manager.create(replace: replace, dry_run: dry_run, install_vllm: install_vllm,
- install_ollama: install_ollama, vllm_preset: vllm_preset)
+ opts = parse_create_options(@argv)
+ manager.create(**opts)
when 'delete'
vm_id = nil
dry_run = false
@@ -2013,8 +2145,6 @@ module HyperstackVM
end
parser.parse!(@argv)
manager.delete(vm_id: vm_id, dry_run: dry_run)
- when 'status'
- manager.status
when 'test'
manager.test
when 'model'
@@ -2035,9 +2165,174 @@ module HyperstackVM
raise Error, "Unknown model subcommand #{sub.inspect}. Use list or switch."
end
else
- raise Error, "Unknown command #{command.inspect}. Use create, delete, status, test, or model."
+ raise Error, "Unknown command #{command.inspect}. Use create, create-both, delete, delete-both, status, test, or model."
end
end
+
+ private
+
+ # Parses the shared --replace / --dry-run / --vllm / --ollama / --model flags
+ # used by both 'create' and 'create-both'. When include_model_preset is false
+ # (create-both), the --model flag is not registered because each VM uses its own
+ # TOML default. Returns a hash suitable for splatting into Manager#create.
+ def parse_create_options(argv, include_model_preset: true)
+ opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil }
+ OptionParser.new do |o|
+ o.on('--replace', 'Delete the tracked VM before creating a new one') { opts[:replace] = true }
+ o.on('--dry-run', 'Print the create plan without creating a VM') { opts[:dry_run] = true }
+ o.on('--vllm', 'Enable vLLM+LiteLLM setup (overrides config)') { opts[:install_vllm] = true }
+ o.on('--no-vllm', 'Disable vLLM+LiteLLM setup (overrides config)') { opts[:install_vllm] = false }
+ o.on('--ollama', 'Enable Ollama setup (overrides config)') { opts[:install_ollama] = true }
+ o.on('--no-ollama', 'Disable Ollama setup (overrides config)') { opts[:install_ollama] = false }
+ o.on('--model PRESET', 'Use a named vLLM preset at create time') { |v| opts[:vllm_preset] = v } if include_model_preset
+ end.parse!(argv)
+ opts
+ end
+
+ def parse_delete_both_options(argv)
+ opts = { dry_run: false }
+ OptionParser.new do |o|
+ o.on('--dry-run', 'Show which VMs would be deleted without deleting them') { opts[:dry_run] = true }
+ end.parse!(argv)
+ opts
+ end
+
+ # Constructs a Manager and all its dependencies from a Config object.
+ # Accepts optional output destination and WireGuard concurrency hooks.
+ def build_manager(config, out: $stdout, wg_setup_pre: nil, wg_setup_post: nil)
+ state_store = StateStore.new(config.state_file)
+ client = HyperstackClient.new(base_url: config.api_base_url, api_key: config.api_key)
+ local_wireguard = LocalWireGuard.new(
+ interface_name: config.local_interface_name,
+ config_path: config.local_wg_config_path
+ )
+ Manager.new(
+ config: config,
+ client: client,
+ state_store: state_store,
+ local_wireguard: local_wireguard,
+ out: out,
+ wg_setup_pre: wg_setup_pre,
+ wg_setup_post: wg_setup_post
+ )
+ end
+
+ def run_status
+ loaders = status_config_loaders
+ if loaders.one?
+ build_manager(loaders.first.config).status
+ return
+ end
+
+ expected_ips = []
+ loaders.each_with_index do |loader, index|
+ puts if index.positive?
+ puts "[#{File.basename(loader.path)}]"
+ expected_ip = build_manager(loader.config).status(include_local_wireguard: false)
+ expected_ips << expected_ip if expected_ip
+ end
+
+ puts
+ puts '[local-wireguard]'
+ build_manager(loaders.first.config).show_local_wireguard(expected_ips)
+ end
+
+ def status_config_loaders
+ return [ConfigLoader.load(@config_path)] if @config_explicit
+
+ candidates = [
+ @config_path,
+ File.join(__dir__, 'hyperstack-vm1.toml'),
+ File.join(__dir__, 'hyperstack-vm2.toml')
+ ].uniq.select { |path| File.exist?(path) }
+
+ loaders = candidates.map { |path| ConfigLoader.load(path) }
+ tracked = loaders.select { |loader| File.exist?(loader.config.state_file) }
+ tracked.empty? ? [ConfigLoader.load(@config_path)] : tracked
+ end
+
+ def pair_config_loaders
+ [
+ ConfigLoader.load(File.join(__dir__, 'hyperstack-vm1.toml')),
+ ConfigLoader.load(File.join(__dir__, 'hyperstack-vm2.toml'))
+ ]
+ end
+
+ # Provisions hyperstack-vm1 and hyperstack-vm2 concurrently in separate threads.
+ # WireGuard setup is serialized: VM1 runs first (replacing the base wg1.conf), then
+ # VM2 adds its peer. A Mutex+ConditionVariable acts as a one-shot latch between threads.
+ # If VM1 fails before reaching the WG step the latch is still released so VM2 doesn't hang.
+ # vllm_preset is accepted but ignored — each VM uses its own TOML default preset.
+ def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument
+ vm1_loader, vm2_loader = pair_config_loaders
+ vm1_config = vm1_loader.config
+ vm2_config = vm2_loader.config
+
+ out_mutex = Mutex.new
+ wg_mutex = Mutex.new
+ wg_cv = ConditionVariable.new
+ vm1_wg_state = { done: false, error: nil }
+
+ # VM1 signals the latch after its WG step (whether WG ran or was already done).
+ vm1_wg_post = proc do
+ wg_mutex.synchronize { vm1_wg_state[:done] = true; wg_cv.broadcast }
+ end
+
+ # VM2 blocks here until VM1's WG step resolves, then raises if VM1 failed.
+ vm2_wg_pre = proc do
+ wg_mutex.synchronize { wg_cv.wait(wg_mutex) until vm1_wg_state[:done] || vm1_wg_state[:error] }
+ raise Error, "VM1 WireGuard setup failed; cannot add VM2 peer." if vm1_wg_state[:error]
+ end
+
+ manager1 = build_manager(vm1_config,
+ out: PrefixedOutput.new('[vm1] ', $stdout, out_mutex),
+ wg_setup_post: vm1_wg_post)
+ manager2 = build_manager(vm2_config,
+ out: PrefixedOutput.new('[vm2] ', $stdout, out_mutex),
+ wg_setup_pre: vm2_wg_pre)
+
+ errors = {}
+ create_opts = { replace: replace, dry_run: dry_run,
+ install_vllm: install_vllm, install_ollama: install_ollama }
+
+ vm1_thread = Thread.new do
+ manager1.create(**create_opts)
+ rescue Error => e
+ errors[:vm1] = e.message
+ # Unblock VM2 even if VM1 failed so the process doesn't hang.
+ wg_mutex.synchronize { vm1_wg_state[:error] = e.message; wg_cv.broadcast }
+ end
+
+ vm2_thread = Thread.new do
+ manager2.create(**create_opts)
+ rescue Error => e
+ errors[:vm2] = e.message
+ end
+
+ [vm1_thread, vm2_thread].each(&:join)
+
+ errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") }
+ exit 1 unless errors.empty?
+ end
+
+ def run_delete_both(dry_run:)
+ out_mutex = Mutex.new
+ errors = {}
+
+ pair_config_loaders.each_with_index do |loader, index|
+ label = "vm#{index + 1}"
+ manager = build_manager(loader.config, out: PrefixedOutput.new("[#{label}] ", $stdout, out_mutex))
+
+ begin
+ manager.delete(dry_run: dry_run)
+ rescue Error => e
+ errors[label.to_sym] = e.message
+ end
+ end
+
+ errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") }
+ exit 1 unless errors.empty?
+ end
end
end
diff --git a/snippets/hyperstack/wg1-setup.sh b/snippets/hyperstack/wg1-setup.sh
index d057fb8..49d716a 100755
--- a/snippets/hyperstack/wg1-setup.sh
+++ b/snippets/hyperstack/wg1-setup.sh
@@ -1,56 +1,76 @@
#!/bin/bash
#
-# wg1-setup.sh - Set up WireGuard wg1 tunnel between earth and hyperstack VM
+# wg1-setup.sh - Set up WireGuard wg1 tunnel between earth and a hyperstack VM
#
# USAGE:
-# ./wg1-setup.sh <VM_PUBLIC_IP>
-# Example: ./wg1-setup.sh 185.216.20.163
+# ./wg1-setup.sh <VM_PUBLIC_IP> [SERVER_WG_IP] [WG_HOSTNAME]
+#
+# VM_PUBLIC_IP Public IP of the hyperstack VM (required)
+# SERVER_WG_IP WireGuard IP to assign to this VM's tunnel interface (default: 192.168.3.1)
+# Use 192.168.3.3 for hyperstack2 when hyperstack1 is already set up.
+# WG_HOSTNAME Hostname mapped to SERVER_WG_IP in /etc/hosts (default: <vmhostname>.wg1)
+#
+# EXAMPLES:
+# ./wg1-setup.sh 185.216.20.163 # VM1 (hyperstack1, 192.168.3.1)
+# ./wg1-setup.sh 185.216.20.200 192.168.3.3 hyperstack2.wg1 # VM2 added to existing tunnel
#
# NETWORK DESIGN:
# Subnet: 192.168.3.0/24 (separate from wg0's 192.168.2.0/24)
# Port: 56710/udp
#
-# +----------------+ +------------------+
-# | earth (client) | | hyperstack (VM) |
-# | 192.168.3.2 |<--- WireGuard ---> | 192.168.3.1 |
-# +----------------+ tunnel +------------------+
-# | Ollama :11434 |
-# +------------------+
+# +----------------+ +------------------+
+# | earth (client) | | hyperstack1 (VM) |
+# | 192.168.3.2 |<--- WireGuard ---> | 192.168.3.1 |
+# +----------------+ tunnel +------------------+
+# | | vLLM :11434 |
+# | +------------------+
+# | +------------------+
+# +--------- WireGuard ----------> | hyperstack2 (VM) |
+# | 192.168.3.3 |
+# +------------------+
+# | vLLM :11434 |
+# +------------------+
#
# WHAT THIS SCRIPT DOES:
-# On hyperstack VM (via SSH):
+#
+# For the FIRST VM (SERVER_WG_IP = 192.168.3.1, default):
+# Generates fresh key-pairs and REPLACES /etc/wireguard/wg1.conf on earth with
+# a single-peer config pointing to this VM.
+#
+# For ADDITIONAL VMs (any other SERVER_WG_IP, e.g. 192.168.3.3):
+# Generates new server-side keys and ADDS or UPDATES just the new [Peer] block
+# in the existing /etc/wireguard/wg1.conf, preserving the [Interface] section
+# (client key-pair) and any other peers already present.
+# The existing client public key from wg1.conf is extracted and used in the new
+# VM's server config so it can encrypt traffic to earth.
+#
+# On every hyperstack VM (via SSH):
# - Installs WireGuard if not present
-# - Creates /etc/wireguard/wg1.conf
-# - Opens UFW ports: 56710/udp (WireGuard), 11434/tcp from 192.168.3.0/24 (Ollama)
-# - Configures Ollama to listen on 0.0.0.0:11434
+# - Creates /etc/wireguard/wg1.conf with SERVER_WG_IP as the tunnel address
+# - Opens UFW ports: 56710/udp (WireGuard), 11434/tcp from 192.168.3.0/24
# - Starts wg-quick@wg1
#
# On earth (locally):
# - Installs WireGuard if not present (dnf)
-# - Creates /etc/wireguard/wg1.conf
-# - Starts wg-quick@wg1
+# - Creates or updates /etc/wireguard/wg1.conf (see above)
+# - Adds SERVER_WG_IP <-> WG_HOSTNAME mapping to /etc/hosts
+# - Restarts wg-quick@wg1
#
# PREREQUISITES:
# - SSH access to ubuntu@<VM_IP> with key-based auth
# - UDP port 56710 open in cloud provider's firewall/security group
#
# RE-RUNNING:
-# When the VM IP changes, simply re-run this script with the new IP.
+# When a VM IP changes, simply re-run this script with the new IP.
# It will regenerate keys and update configs on both sides.
#
-# USING OLLAMA REMOTELY:
-# export OLLAMA_HOST=http://192.168.3.1:11434
-# ollama run qwen2.5-coder:14b-instruct
-# # Or with aider:
-# aider --model ollama/qwen2.5-coder:14b-instruct
-#
set -euo pipefail
-# Configuration constants
+# Fixed network constants that must match hyperstack-vm*.toml [network] section.
WG_INTERFACE="wg1"
WG_PORT="56710"
-SERVER_WG_IP="192.168.3.1"
+DEFAULT_SERVER_WG_IP="192.168.3.1"
CLIENT_WG_IP="192.168.3.2"
SUBNET_MASK="24"
SSH_USER="ubuntu"
@@ -61,22 +81,12 @@ GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
-print_warning() {
- echo -e "${YELLOW}$1${NC}"
-}
-
-print_success() {
- echo -e "${GREEN}$1${NC}"
-}
-
-print_error() {
- echo -e "${RED}$1${NC}"
-}
+print_warning() { echo -e "${YELLOW}$1${NC}"; }
+print_success() { echo -e "${GREEN}$1${NC}"; }
+print_error() { echo -e "${RED}$1${NC}"; }
# Retry wrapper for SSH/SCP commands that may fail due to transient
# connection resets (e.g. sshd restart from unattended-upgrades).
-# Usage: retry_ssh ssh user@host "command"
-# retry_ssh scp file user@host:/path
retry_ssh() {
local max_attempts=5
local attempt=1
@@ -96,50 +106,137 @@ retry_ssh() {
done
}
+# Updates or adds a [Peer] block in the existing /etc/wireguard/wg1.conf.
+# Preserves the [Interface] section and any other peers; only the block for
+# SERVER_WG_IP (matched by AllowedIPs) is replaced.
+# Uses python3 for safe regex-based TOML-like block manipulation.
+update_peer_in_client_config() {
+ local server_ip="$1"
+ local server_pubkey="$2"
+ local vm_ip="$3"
+ local tmpfile conf_copy
+ tmpfile=$(mktemp)
+ conf_copy=$(mktemp)
+
+ # /etc/wireguard/wg1.conf is root-owned; read it via sudo into a user-readable temp copy.
+ if ! sudo cat /etc/wireguard/wg1.conf > "$conf_copy" 2>/dev/null; then
+ print_error "Cannot read /etc/wireguard/wg1.conf. Run wg1-setup.sh for VM1 (192.168.3.1) first."
+ rm -f "$tmpfile" "$conf_copy"
+ return 1
+ fi
+
+ python3 - "$server_ip" "$server_pubkey" "$vm_ip" "$WG_PORT" "$conf_copy" "$tmpfile" << 'PYEOF'
+import sys, re
+
+server_ip, server_pubkey, vm_ip, wg_port, conf_copy, tmpfile = sys.argv[1:]
+
+with open(conf_copy) as f:
+ content = f.read()
+
+if not content.strip():
+ print("ERROR: wg1.conf is empty. Run wg1-setup.sh for VM1 (192.168.3.1) first.", file=sys.stderr)
+ sys.exit(1)
+
+# Split into sections: [Interface] block + any [Peer] blocks.
+# Each section starts with a [ header; split on newline-[ boundaries.
+parts = re.split(r'(?=\n\[)', content)
+
+# Remove any existing [Peer] block whose AllowedIPs matches server_ip/32.
+kept = [p for p in parts if not (re.search(r'^\[Peer\]', p.lstrip()) and f'AllowedIPs = {server_ip}/32' in p)]
+
+new_peer = f"""
+[Peer]
+# hyperstack VM ({server_ip})
+PublicKey = {server_pubkey}
+Endpoint = {vm_ip}:{wg_port}
+AllowedIPs = {server_ip}/32
+PersistentKeepalive = 25"""
+
+result = ''.join(kept).rstrip('\n') + '\n' + new_peer + '\n'
+
+with open(tmpfile, 'w') as f:
+ f.write(result)
+print('peer-updated-ok')
+PYEOF
+
+ local rc=$?
+ rm -f "$conf_copy"
+ if [[ $rc -eq 0 ]]; then
+ sudo cp "${tmpfile}" /etc/wireguard/wg1.conf
+ sudo chmod 600 /etc/wireguard/wg1.conf
+ fi
+ rm -f "${tmpfile}"
+ return $rc
+}
+
# Validate arguments
-if [[ $# -ne 1 ]]; then
- echo "Usage: $0 <VM_PUBLIC_IP>"
- echo "Example: $0 185.216.20.163"
+if [[ $# -lt 1 ]]; then
+ echo "Usage: $0 <VM_PUBLIC_IP> [SERVER_WG_IP] [WG_HOSTNAME]"
+ echo "Example (VM1): $0 185.216.20.163"
+ echo "Example (VM2): $0 185.216.20.200 192.168.3.3 hyperstack2.wg1"
exit 1
fi
VM_IP="$1"
+SERVER_WG_IP="${2:-${DEFAULT_SERVER_WG_IP}}"
+# Default WG_HOSTNAME: replace 192.168.3. prefix with 'hyperstack' and append .wg1,
+# or fall back to server IP if the address doesn't match the expected pattern.
+WG_HOSTNAME="${3:-$(echo "$SERVER_WG_IP" | sed 's/^192\.168\.3\.\(.*\)/hyperstack\1.wg1/' || echo "${SERVER_WG_IP}.wg1")}"
+
+# Determine mode: first VM replaces the entire client config; additional VMs add a peer.
+IS_FIRST_VM=false
+[[ "$SERVER_WG_IP" == "$DEFAULT_SERVER_WG_IP" ]] && IS_FIRST_VM=true
echo "=============================================="
print_warning "IMPORTANT: Ensure UDP port ${WG_PORT} is open on the VM!"
print_warning "This must be configured in your cloud provider's"
print_warning "firewall/security group settings."
+if [[ "$IS_FIRST_VM" == "false" ]]; then
+ print_warning "Mode: ADD PEER — ${SERVER_WG_IP} (${WG_HOSTNAME}) will be added to existing wg1.conf."
+ print_warning "Ensure the first VM (192.168.3.1) has already been set up."
+fi
echo "=============================================="
echo ""
-read -p "Press Enter to continue (or Ctrl+C to abort)..."
+read -rp "Press Enter to continue (or Ctrl+C to abort)..."
echo ""
# Create temporary directory for key generation
TMPDIR=$(mktemp -d)
-trap "rm -rf $TMPDIR" EXIT
+trap 'rm -rf $TMPDIR' EXIT
echo "=== Generating WireGuard keys locally ==="
-# Generate server (hyperstack) keys
+# Generate server (hyperstack VM) keys — always fresh for each VM.
wg genkey > "$TMPDIR/server-privatekey"
wg pubkey < "$TMPDIR/server-privatekey" > "$TMPDIR/server-publickey"
SERVER_PRIVATE_KEY=$(cat "$TMPDIR/server-privatekey")
-SERVER_PUBLIC_KEY=$(cat "$TMPDIR/server-publickey")
-
-# Generate client (earth) keys
-wg genkey > "$TMPDIR/client-privatekey"
-wg pubkey < "$TMPDIR/client-privatekey" > "$TMPDIR/client-publickey"
-CLIENT_PRIVATE_KEY=$(cat "$TMPDIR/client-privatekey")
-CLIENT_PUBLIC_KEY=$(cat "$TMPDIR/client-publickey")
-
-print_success "Keys generated successfully"
+SERVER_PUBLIC_KEY=$(cat "$TMPDIR/server-publickey")
+
+if [[ "$IS_FIRST_VM" == "true" ]]; then
+ # First VM: generate fresh client keys; the entire wg1.conf will be replaced.
+ wg genkey > "$TMPDIR/client-privatekey"
+ wg pubkey < "$TMPDIR/client-privatekey" > "$TMPDIR/client-publickey"
+ CLIENT_PRIVATE_KEY=$(cat "$TMPDIR/client-privatekey")
+ CLIENT_PUBLIC_KEY=$(cat "$TMPDIR/client-publickey")
+ print_success "Keys generated (first VM — full config will be replaced)"
+else
+ # Additional VM: reuse the existing client keys from /etc/wireguard/wg1.conf so that
+ # the first VM's server config (which already stores the client public key) keeps working.
+ CLIENT_PRIVATE_KEY=$(sudo cat /etc/wireguard/wg1.conf | grep -m1 'PrivateKey' | awk '{print $3}')
+ if [[ -z "$CLIENT_PRIVATE_KEY" ]]; then
+ print_error "Cannot extract client private key from /etc/wireguard/wg1.conf."
+ print_error "Run this script for VM1 (192.168.3.1) first."
+ exit 1
+ fi
+ CLIENT_PUBLIC_KEY=$(echo "$CLIENT_PRIVATE_KEY" | wg pubkey)
+ print_success "Keys generated (additional VM — client keys reused from existing wg1.conf)"
+fi
echo ""
-echo "=== Creating server (hyperstack) configuration ==="
+echo "=== Creating server (hyperstack VM ${SERVER_WG_IP}) configuration ==="
-# Create server wg1.conf
cat > "$TMPDIR/server-wg1.conf" << EOF
-# WireGuard wg1 configuration for hyperstack VM
+# WireGuard wg1 configuration for hyperstack VM (${SERVER_WG_IP})
# Server side of earth <-> hyperstack tunnel
# Generated by wg1-setup.sh on $(date)
@@ -154,13 +251,13 @@ PublicKey = ${CLIENT_PUBLIC_KEY}
AllowedIPs = ${CLIENT_WG_IP}/32
EOF
-print_success "Server config created"
+print_success "Server config created (server IP: ${SERVER_WG_IP})"
-echo ""
-echo "=== Creating client (earth) configuration ==="
+if [[ "$IS_FIRST_VM" == "true" ]]; then
+ echo ""
+ echo "=== Creating client (earth) configuration ==="
-# Create client wg1.conf
-cat > "$TMPDIR/client-wg1.conf" << EOF
+ cat > "$TMPDIR/client-wg1.conf" << EOF
# WireGuard wg1 configuration for earth
# Client side of earth <-> hyperstack tunnel
# Generated by wg1-setup.sh on $(date)
@@ -170,49 +267,43 @@ Address = ${CLIENT_WG_IP}/${SUBNET_MASK}
PrivateKey = ${CLIENT_PRIVATE_KEY}
[Peer]
-# hyperstack VM (server)
+# hyperstack VM (${SERVER_WG_IP})
PublicKey = ${SERVER_PUBLIC_KEY}
Endpoint = ${VM_IP}:${WG_PORT}
AllowedIPs = ${SERVER_WG_IP}/32
PersistentKeepalive = 25
EOF
-print_success "Client config created"
+ print_success "Client config created"
+fi
echo ""
-echo "=== Setting up hyperstack VM (${VM_IP}) ==="
+echo "=== Setting up hyperstack VM (${VM_IP}, tunnel IP ${SERVER_WG_IP}) ==="
-# Wait for SSH to become available (handles transient connection resets
-# from sshd restarts due to unattended-upgrades or package installs)
echo "Testing SSH connection..."
retry_ssh ssh -o ConnectTimeout=10 -o BatchMode=yes "${SSH_USER}@${VM_IP}" "echo 'SSH OK'"
print_success "SSH connection OK"
-# Install WireGuard on server if not present
echo "Installing WireGuard on hyperstack..."
retry_ssh ssh "${SSH_USER}@${VM_IP}" "which wg >/dev/null 2>&1 || (sudo apt update && sudo apt install -y wireguard)"
print_success "WireGuard installed"
-# Copy server config to hyperstack
echo "Copying wg1.conf to hyperstack..."
retry_ssh scp "$TMPDIR/server-wg1.conf" "${SSH_USER}@${VM_IP}:/tmp/wg1.conf"
retry_ssh ssh "${SSH_USER}@${VM_IP}" "sudo mv /tmp/wg1.conf /etc/wireguard/wg1.conf && sudo chmod 600 /etc/wireguard/wg1.conf"
print_success "Server config installed"
-# Configure firewall on hyperstack
echo "Configuring firewall (ufw) on hyperstack..."
retry_ssh ssh "${SSH_USER}@${VM_IP}" bash -s << 'REMOTE_SCRIPT'
sudo ufw allow ssh comment 'Allow SSH' 2>/dev/null || true
sudo ufw --force enable >/dev/null 2>&1 || true
sudo ufw allow 56710/udp comment 'WireGuard wg1' 2>/dev/null || true
-sudo ufw allow from 192.168.3.0/24 to any port 11434 proto tcp comment 'Ollama via wg1' 2>/dev/null || true
+sudo ufw allow from 192.168.3.0/24 to any port 11434 proto tcp comment 'Ollama/vLLM via wg1' 2>/dev/null || true
echo "Firewall rules added"
REMOTE_SCRIPT
print_success "Firewall configured"
-# Ensure Ollama listens on all interfaces (only if override not already set
-# by ollama_setup_script, which also configures OLLAMA_MODELS and other env vars)
-echo "Configuring Ollama to listen on 0.0.0.0..."
+echo "Configuring Ollama to listen on 0.0.0.0 (if installed)..."
retry_ssh ssh "${SSH_USER}@${VM_IP}" bash -s << 'REMOTE_SCRIPT'
if [ -f /etc/systemd/system/ollama.service.d/override.conf ] && \
grep -q 'OLLAMA_HOST' /etc/systemd/system/ollama.service.d/override.conf; then
@@ -224,12 +315,11 @@ else
Environment="OLLAMA_HOST=0.0.0.0:11434"
OVERRIDE
sudo systemctl daemon-reload
- sudo systemctl restart ollama 2>/dev/null || echo "Note: Ollama service not running or not installed"
+ sudo systemctl restart ollama 2>/dev/null || echo "Note: Ollama not running or not installed"
fi
REMOTE_SCRIPT
print_success "Ollama configured"
-# Start wg1 on hyperstack
echo "Starting wg1 on hyperstack..."
retry_ssh ssh "${SSH_USER}@${VM_IP}" "sudo systemctl start wg-quick@wg1 2>/dev/null || sudo wg-quick up wg1"
print_success "wg1 started on hyperstack"
@@ -237,35 +327,43 @@ print_success "wg1 started on hyperstack"
echo ""
echo "=== Setting up earth (local) ==="
-# Check if WireGuard is installed locally
if ! which wg >/dev/null 2>&1; then
echo "Installing WireGuard locally..."
sudo dnf install -y wireguard-tools
fi
print_success "WireGuard installed locally"
-# Install client config locally
-echo "Installing wg1.conf locally..."
-sudo cp "$TMPDIR/client-wg1.conf" /etc/wireguard/wg1.conf
-sudo chmod 600 /etc/wireguard/wg1.conf
-print_success "Client config installed"
+if [[ "$IS_FIRST_VM" == "true" ]]; then
+ echo "Installing fresh wg1.conf locally (first VM — replaces any existing config)..."
+ sudo cp "$TMPDIR/client-wg1.conf" /etc/wireguard/wg1.conf
+ sudo chmod 600 /etc/wireguard/wg1.conf
+ print_success "Client config installed"
+else
+ echo "Adding peer ${SERVER_WG_IP} to existing wg1.conf (additional VM)..."
+ update_peer_in_client_config "$SERVER_WG_IP" "$SERVER_PUBLIC_KEY" "$VM_IP"
+ print_success "Peer added to client config"
+fi
+
+# Update /etc/hosts so that WG_HOSTNAME resolves to the VM's WireGuard IP.
+# hyperstack.rb uses this hostname in test URLs and informational output.
+echo "Updating /etc/hosts: ${SERVER_WG_IP} ${WG_HOSTNAME}..."
+sudo sed -i "/ ${WG_HOSTNAME}$/d" /etc/hosts # Remove stale entry if present
+echo "${SERVER_WG_IP} ${WG_HOSTNAME}" | sudo tee -a /etc/hosts > /dev/null
+print_success "/etc/hosts updated"
-# Stop existing wg1 if running, then start fresh
-echo "Starting wg1 locally..."
-sudo systemctl stop wg-quick@wg1 2>/dev/null || true
+echo "Restarting wg1 locally..."
+sudo systemctl stop wg-quick@wg1 2>/dev/null || true
sudo systemctl start wg-quick@wg1
-print_success "wg1 started locally"
+print_success "wg1 restarted locally"
echo ""
echo "=============================================="
print_success "Setup complete!"
echo "=============================================="
echo ""
-echo "WireGuard wg1 tunnel is now active."
-echo ""
-echo "Tunnel IPs:"
-echo " hyperstack (server): ${SERVER_WG_IP}"
-echo " earth (client): ${CLIENT_WG_IP}"
+echo "WireGuard wg1 tunnel peer active:"
+echo " hyperstack VM (server): ${SERVER_WG_IP} (${WG_HOSTNAME})"
+echo " earth (client): ${CLIENT_WG_IP}"
echo ""
echo "=== Verification commands ==="
echo ""
@@ -278,8 +376,8 @@ echo ""
echo "# Verify default route is UNCHANGED:"
echo "ip route | grep default"
echo ""
-echo "# Test Ollama access:"
-echo "curl http://${SERVER_WG_IP}:11434/api/tags"
+echo "# Test vLLM access:"
+echo "curl http://${WG_HOSTNAME}:11434/v1/models"
echo ""
echo "=== Manual start/stop commands ==="
echo ""
@@ -291,8 +389,3 @@ echo "sudo systemctl start wg-quick@wg1"
echo ""
echo "# Restart on hyperstack (if VM rebooted):"
echo "ssh ${SSH_USER}@${VM_IP} 'sudo systemctl start wg-quick@wg1'"
-echo ""
-echo "=== Use Ollama remotely ==="
-echo ""
-echo "export OLLAMA_HOST=http://${SERVER_WG_IP}:11434"
-echo "curl http://${SERVER_WG_IP}:11434/v1/models"