diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-20 10:13:34 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-20 10:13:34 +0200 |
| commit | ba3f4074e312d51409d82a67cb097c5f555bd3d6 (patch) | |
| tree | 629c51dfc134188b0d54976df699c69f4ff42ac3 /snippets/hyperstack | |
| parent | 1c906b2378c49d28b47889e0db659cb6d9cf5395 (diff) | |
Initial commit: add hyperstack-vm1.toml, hyperstack-vm2.toml, update hyperstack.rb and wg1-setup.sh for multi-VM WireGuard support
Diffstat (limited to 'snippets/hyperstack')
| -rw-r--r-- | snippets/hyperstack/hyperstack-vm1.toml | 183 | ||||
| -rw-r--r-- | snippets/hyperstack/hyperstack-vm2.toml | 180 | ||||
| -rwxr-xr-x | snippets/hyperstack/hyperstack.rb | 429 | ||||
| -rwxr-xr-x | snippets/hyperstack/wg1-setup.sh | 285 |
4 files changed, 914 insertions, 163 deletions
diff --git a/snippets/hyperstack/hyperstack-vm1.toml b/snippets/hyperstack/hyperstack-vm1.toml new file mode 100644 index 0000000..c5c940a --- /dev/null +++ b/snippets/hyperstack/hyperstack-vm1.toml @@ -0,0 +1,183 @@ +[auth] +api_key_file = "~/.hyperstack" + +[hyperstack] +base_url = "https://infrahub-api.nexgencloud.com/v1" + +[state] +# Separate state file for VM1 so vm1 and vm2 can be managed independently. +file = ".hyperstack-vm1-state.json" + +[vm] +name_prefix = "hyperstack1" +hostname = "hyperstack1" +environment_name = "snonux-ollama" + +# A100-80GB is the cost-first default for nemotron-3-super inference. +# Switch this to n3-H100x1 if you want safer throughput and compatibility headroom. +flavor_name = "n3-A100x1" +image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker" +assign_floating_ip = true +create_bootable_volume = false +enable_port_randomization = false +labels = ["nemotron-3-super", "wireguard"] + +[ssh] +username = "ubuntu" +private_key_path = "~/.ssh/id_rsa" +hyperstack_key_name = "earth" +port = 22 +connect_timeout_sec = 10 + +[network] +wireguard_udp_port = 56710 +wireguard_subnet = "192.168.3.0/24" +# VM1 gets the first server-side WireGuard IP (gateway address + 0). +# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3. +wireguard_server_ip = "192.168.3.1" +# Port 11434 is shared by both Ollama and vLLM for firewall compatibility. +ollama_port = 11434 +# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM). +litellm_port = 4000 +allowed_ssh_cidrs = ["0.0.0.0/0"] +allowed_wireguard_cidrs = ["0.0.0.0/0"] + +[bootstrap] +enable_guest_bootstrap = true +install_wireguard = true +configure_ufw = true +configure_ollama_host = false + +[ollama] +# Disabled in favour of vLLM; set install = true to switch back to Ollama. +install = false +models_dir = "/ephemeral/ollama/models" +listen_host = "0.0.0.0:11434" +gpu_overhead_mb = 2000 +num_parallel = 1 +context_length = 32768 +pull_models = ["nemotron-3-super"] + +# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI. +# VM1 defaults to nemotron-3-super; use 'model switch' to load any other preset. +[vllm] +install = true +model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit" +# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers). +hug_cache_dir = "/ephemeral/hug" +container_name = "vllm_nemotron_super" +max_model_len = 262144 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +# NVIDIA Nemotron-3-Super uses the same XML tool call format as Qwen3 XML. +tool_call_parser = "qwen3_xml" +trust_remote_code = true +extra_vllm_args = ["--reasoning-parser", "nemotron_v3"] +# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here. +litellm_master_key = "sk-litellm-master" +litellm_claude_model_names = [ + "claude-sonnet-4-20250514", + "claude-opus-4-20250514", + "claude-opus-4-6-20260604", + "claude-haiku-3-5-20241022" +] + +# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm1.toml model switch <name>'. +# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults. + +[vllm.presets.qwen3-coder-next] +model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit" +container_name = "vllm_qwen3" +max_model_len = 262144 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_coder" + +# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total). +# ~60 GB weights on A100 80GB. Uses NoPE so context can be set to 1M; no YaRN needed. +# Requires trust_remote_code=true for the nemotron_h architecture. +[vllm.presets.nemotron-super] +model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit" +container_name = "vllm_nemotron_super" +max_model_len = 262144 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_xml" +trust_remote_code = true +extra_vllm_args = ["--reasoning-parser", "nemotron_v3"] + +# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100. +[vllm.presets.gpt-oss-20b] +model = "openai/gpt-oss-20b" +container_name = "vllm_gpt_oss_20b" +max_model_len = 65536 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" + +# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100. +# Hard architecture limit: max_position_embeddings=131072 in model config.json. +[vllm.presets.gpt-oss-120b] +model = "openai/gpt-oss-120b" +container_name = "vllm_gpt_oss_120b" +max_model_len = 131072 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" + +# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100. +[vllm.presets.qwen25-coder-32b] +model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ" +container_name = "vllm_qwen25_coder32b" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "hermes" + +# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB. +[vllm.presets.qwen3-coder-30b] +model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ" +container_name = "vllm_qwen3_coder30b" +max_model_len = 65536 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_coder" + +# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100. +[vllm.presets.deepseek-r1-32b] +model = "casperhansen/deepseek-r1-distill-qwen-32b-awq" +container_name = "vllm_deepseek_r1_32b" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" +extra_vllm_args = ["--reasoning-parser", "deepseek_r1"] + +# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100. +[vllm.presets.qwen3-32b] +model = "Qwen/Qwen3-32B-AWQ" +container_name = "vllm_qwen3_32b" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" +extra_vllm_args = ["--reasoning-parser", "deepseek_r1"] + +# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100). +[vllm.presets.devstral] +model = "cyankiwi/Devstral-Small-2507-AWQ-4bit" +container_name = "vllm_devstral" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "mistral" +extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"] + +[wireguard] +auto_setup = true +setup_script = "./wg1-setup.sh" + +[local_client] +check_wg1_service = true +interface_name = "wg1" +config_path = "/etc/wireguard/wg1.conf" diff --git a/snippets/hyperstack/hyperstack-vm2.toml b/snippets/hyperstack/hyperstack-vm2.toml new file mode 100644 index 0000000..6cc6503 --- /dev/null +++ b/snippets/hyperstack/hyperstack-vm2.toml @@ -0,0 +1,180 @@ +[auth] +api_key_file = "~/.hyperstack" + +[hyperstack] +base_url = "https://infrahub-api.nexgencloud.com/v1" + +[state] +# Separate state file for VM2 so vm1 and vm2 can be managed independently. +file = ".hyperstack-vm2-state.json" + +[vm] +name_prefix = "hyperstack2" +hostname = "hyperstack2" +environment_name = "snonux-ollama" + +# A100-80GB is the cost-first default for qwen3-coder-next inference. +# Switch this to n3-H100x1 if you want safer throughput and compatibility headroom. +flavor_name = "n3-A100x1" +image_name = "Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker" +assign_floating_ip = true +create_bootable_volume = false +enable_port_randomization = false +labels = ["qwen3-coder-next", "wireguard"] + +[ssh] +username = "ubuntu" +private_key_path = "~/.ssh/id_rsa" +hyperstack_key_name = "earth" +port = 22 +connect_timeout_sec = 10 + +[network] +wireguard_udp_port = 56710 +wireguard_subnet = "192.168.3.0/24" +# VM2 gets the third server-side WireGuard IP (skipping .2 which is the earth client). +# earth (client) is 192.168.3.2; VM1 is 192.168.3.1; VM2 is 192.168.3.3. +wireguard_server_ip = "192.168.3.3" +# Port 11434 is shared by both Ollama and vLLM for firewall compatibility. +ollama_port = 11434 +# Port 4000: LiteLLM Anthropic-API proxy (used with vLLM). +litellm_port = 4000 +allowed_ssh_cidrs = ["0.0.0.0/0"] +allowed_wireguard_cidrs = ["0.0.0.0/0"] + +[bootstrap] +enable_guest_bootstrap = true +install_wireguard = true +configure_ufw = true +configure_ollama_host = false + +[ollama] +# Disabled in favour of vLLM; set install = true to switch back to Ollama. +install = false +models_dir = "/ephemeral/ollama/models" +listen_host = "0.0.0.0:11434" +gpu_overhead_mb = 2000 +num_parallel = 1 +context_length = 32768 +pull_models = ["qwen3-coder-next"] + +# vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI. +# VM2 defaults to qwen3-coder-next; use 'model switch' to load any other preset. +[vllm] +install = true +model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit" +# HuggingFace model cache on ephemeral NVMe (fast; survives reboots on most providers). +hug_cache_dir = "/ephemeral/hug" +container_name = "vllm_qwen3" +max_model_len = 262144 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_coder" +# LiteLLM maps each entry to the vLLM model; add new Anthropic model IDs here. +litellm_master_key = "sk-litellm-master" +litellm_claude_model_names = [ + "claude-sonnet-4-20250514", + "claude-opus-4-20250514", + "claude-opus-4-6-20260604", + "claude-haiku-3-5-20241022" +] + +# Named model presets for 'ruby hyperstack.rb --config hyperstack-vm2.toml model switch <name>'. +# Each preset overrides the matching [vllm] field; unset fields fall back to [vllm] defaults. + +[vllm.presets.qwen3-coder-next] +model = "bullpoint/Qwen3-Coder-Next-AWQ-4bit" +container_name = "vllm_qwen3" +max_model_len = 262144 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_coder" + +# NVIDIA Nemotron-3-Super-120B-A12B AWQ 4-bit — hybrid Mamba+MoE (12B active / 120B total). +# ~60 GB weights on A100 80GB. Uses NoPE so context can be set to 1M; no YaRN needed. +# Requires trust_remote_code=true for the nemotron_h architecture. +[vllm.presets.nemotron-super] +model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit" +container_name = "vllm_nemotron_super" +max_model_len = 262144 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_xml" +trust_remote_code = true +extra_vllm_args = ["--reasoning-parser", "nemotron_v3"] + +# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100. +[vllm.presets.gpt-oss-20b] +model = "openai/gpt-oss-20b" +container_name = "vllm_gpt_oss_20b" +max_model_len = 65536 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" + +# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100. +# Hard architecture limit: max_position_embeddings=131072 in model config.json. +[vllm.presets.gpt-oss-120b] +model = "openai/gpt-oss-120b" +container_name = "vllm_gpt_oss_120b" +max_model_len = 131072 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" + +# Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100. +[vllm.presets.qwen25-coder-32b] +model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ" +container_name = "vllm_qwen25_coder32b" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "hermes" + +# Qwen3-Coder-30B-A3B AWQ — Qwen3 generation coding MoE (3B active / 30B total), ~18 GB. +[vllm.presets.qwen3-coder-30b] +model = "QuantTrio/Qwen3-Coder-30B-A3B-Instruct-AWQ" +container_name = "vllm_qwen3_coder30b" +max_model_len = 65536 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "qwen3_coder" + +# DeepSeek-R1-Distill-Qwen-32B AWQ — R1 reasoning distillation of Qwen 32B, ~18 GB on A100. +[vllm.presets.deepseek-r1-32b] +model = "casperhansen/deepseek-r1-distill-qwen-32b-awq" +container_name = "vllm_deepseek_r1_32b" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" +extra_vllm_args = ["--reasoning-parser", "deepseek_r1"] + +# Qwen3-32B AWQ — dense 32B reasoning model with extended context, ~18 GB on A100. +[vllm.presets.qwen3-32b] +model = "Qwen/Qwen3-32B-AWQ" +container_name = "vllm_qwen3_32b" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "" +extra_vllm_args = ["--reasoning-parser", "deepseek_r1"] + +# Devstral-Small-2507 AWQ — Mistral's coding agent model (~15 GB on A100). +[vllm.presets.devstral] +model = "cyankiwi/Devstral-Small-2507-AWQ-4bit" +container_name = "vllm_devstral" +max_model_len = 32768 +gpu_memory_utilization = 0.92 +tensor_parallel_size = 1 +tool_call_parser = "mistral" +extra_vllm_args = ["--tokenizer_mode", "mistral", "--config_format", "mistral"] + +[wireguard] +auto_setup = true +setup_script = "./wg1-setup.sh" + +[local_client] +check_wg1_service = true +interface_name = "wg1" +config_path = "/etc/wireguard/wg1.conf" diff --git a/snippets/hyperstack/hyperstack.rb b/snippets/hyperstack/hyperstack.rb index 69bb6f6..526d7aa 100755 --- a/snippets/hyperstack/hyperstack.rb +++ b/snippets/hyperstack/hyperstack.rb @@ -82,6 +82,9 @@ module HyperstackVM 'network' => { 'wireguard_udp_port' => 56_710, 'wireguard_subnet' => '192.168.3.0/24', + # Optional: explicit server-side WireGuard IP. When nil, derived as subnet + 1 (i.e. .1). + # Set to a different address (e.g. 192.168.3.3) for a second VM sharing the same wg1 tunnel. + 'wireguard_server_ip' => nil, 'ollama_port' => 11_434, 'litellm_port' => 4_000, 'allowed_ssh_cidrs' => ['0.0.0.0/0'], @@ -155,6 +158,20 @@ module HyperstackVM rescue IPAddr::InvalidAddressError => e raise Error, "Invalid CIDR #{cidr.inspect}: #{e.message}" end + + server_ip = fetch('network', 'wireguard_server_ip') + if server_ip + # Validate that the explicit server WireGuard IP is within the configured subnet. + begin + subnet = IPAddr.new(fetch('network', 'wireguard_subnet')) + unless subnet.include?(IPAddr.new(server_ip)) + raise Error, + "wireguard_server_ip #{server_ip.inspect} is not in wireguard_subnet #{fetch('network', 'wireguard_subnet')}" + end + rescue IPAddr::InvalidAddressError => e + raise Error, "Invalid wireguard_server_ip #{server_ip.inspect}: #{e.message}" + end + end end def fetch(section, key) @@ -305,15 +322,25 @@ module HyperstackVM Integer(fetch('network', 'litellm_port')) end + # Returns the server-side WireGuard IP for this VM. + # Uses the explicitly configured address when set; otherwise derives it as subnet_base + 1. + # Example: 192.168.3.0/24 → 192.168.3.1 (default VM1); VM2 sets wireguard_server_ip=192.168.3.3. def wireguard_gateway_ip + configured = fetch('network', 'wireguard_server_ip') + return configured.to_s if configured && !configured.to_s.strip.empty? + + # Fall back to first usable address in the subnet. base = IPAddr.new(wireguard_subnet).to_s parts = base.split('.').map(&:to_i) parts[-1] += 1 parts.join('.') end + # Returns the WireGuard hostname for this VM: e.g. hyperstack1.wg1 or hyperstack2.wg1. + # Used as the DNS name to reach the VM over the tunnel (must be in /etc/hosts on the client). def wireguard_gateway_hostname - "hyperstack.#{local_interface_name}" + host = vm_hostname || 'hyperstack' + "#{host}.#{local_interface_name}" end def allowed_ssh_cidrs @@ -414,6 +441,17 @@ module HyperstackVM fetch('vllm', 'litellm_master_key') end + # Whether to pass --trust-remote-code to vLLM for the default model. + # Required for architectures not yet in the vLLM upstream registry (e.g. nemotron_h). + def vllm_trust_remote_code + truthy?(fetch('vllm', 'trust_remote_code')) + end + + # Extra vLLM CLI flags for the default model (e.g. reasoning-parser args). + def vllm_extra_args + Array(fetch('vllm', 'extra_vllm_args')).map(&:to_s) + end + def vllm_presets Hash(dig('vllm', 'presets')).transform_keys(&:to_s) end @@ -695,10 +733,12 @@ module HyperstackVM end def status + endpoints = configured_endpoints { 'service_state' => service_state, 'config_path' => @config_path, - 'endpoint' => configured_endpoint, + 'endpoint' => endpoints.last, + 'endpoints' => endpoints, 'config_readable' => !config_contents.nil? } end @@ -715,51 +755,90 @@ module HyperstackVM end def configured_endpoint - content = config_contents - return nil if content.nil? - - parse_wireguard_config(content)['Endpoint'] + configured_endpoints.last end - def config_contents - return @config_contents if defined?(@config_contents) + def configured_endpoints + content = config_contents + return [] if content.nil? - @config_contents = File.read(@config_path) - rescue Errno::EACCES, Errno::ENOENT - stdout, _stderr, status = Open3.capture3('sudo', '-n', 'cat', @config_path) - @config_contents = status.success? ? stdout : nil + parse_wireguard_peers(content).filter_map { |peer| peer['Endpoint'] }.uniq end - def parse_wireguard_config(content) + def parse_wireguard_peers(content) current_section = nil - peer = {} + current_peer = nil + peers = [] content.each_line do |line| stripped = line.strip next if stripped.empty? || stripped.start_with?('#') if stripped.start_with?('[') && stripped.end_with?(']') + peers << current_peer if current_section == 'Peer' && current_peer && !current_peer.empty? current_section = stripped[1..-2] + current_peer = current_section == 'Peer' ? {} : nil next end key, value = stripped.split('=', 2).map { |part| part&.strip } next unless current_section == 'Peer' && key && value - peer[key] = value + current_peer[key] = value end - peer + peers << current_peer if current_section == 'Peer' && current_peer && !current_peer.empty? + peers + end + + def config_contents + return @config_contents if defined?(@config_contents) + + @config_contents = File.read(@config_path) + rescue Errno::EACCES, Errno::ENOENT + stdout, _stderr, status = Open3.capture3('sudo', '-n', 'cat', @config_path) + @config_contents = status.success? ? stdout : nil + end + end + + # Thread-safe output wrapper that prepends a fixed prefix to each line. + # Used by create-both so interleaved output from VM1 and VM2 threads is distinguishable. + # #print buffers partial lines until a newline is received, then flushes with the prefix. + class PrefixedOutput + def initialize(prefix, delegate, mutex) + @prefix = prefix + @delegate = delegate + @mutex = mutex + @buffer = +'' + end + + def puts(msg = '') + @mutex.synchronize { @delegate.puts("#{@prefix}#{msg}") } + end + + def print(msg) + @buffer << msg.to_s + while (idx = @buffer.index("\n")) + line = @buffer.slice!(0, idx + 1) + @mutex.synchronize { @delegate.print("#{@prefix}#{line}") } + end end end class Manager - def initialize(config:, client:, state_store:, local_wireguard:, out: $stdout) + # wg_setup_pre: optional Proc called just before this VM's WireGuard setup step runs. + # Used by create-both to block VM2 until VM1 has written the base wg1.conf. + # wg_setup_post: optional Proc called after the WireGuard step completes (or is skipped). + # Used by create-both to signal that VM1's base config is ready for VM2. + def initialize(config:, client:, state_store:, local_wireguard:, out: $stdout, + wg_setup_pre: nil, wg_setup_post: nil) @config = config @client = client @state_store = state_store @local_wireguard = local_wireguard @out = out + @wg_setup_pre = wg_setup_pre + @wg_setup_post = wg_setup_post end def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil) @@ -847,7 +926,7 @@ module HyperstackVM raise end - def status + def status(include_local_wireguard: true) state = @state_store.load if state.nil? info "No tracked VM state file at #{@state_store.path}." @@ -868,7 +947,12 @@ module HyperstackVM end end - print_local_wireguard_summary(state&.dig('public_ip')) + print_local_wireguard_summary(state&.dig('public_ip')) if include_local_wireguard + state&.dig('public_ip') + end + + def show_local_wireguard(expected_ips = nil) + print_local_wireguard_summary(expected_ips) end # Lists configured model presets and marks the one currently running on the VM. @@ -1004,11 +1088,18 @@ module HyperstackVM @state_store.save(state) end + # Call pre-hook before deciding whether WireGuard setup is needed; this allows a concurrent + # sibling VM (e.g. VM2 in create-both) to block here until the primary VM (VM1) has + # already written the base wg1.conf, which VM2's setup will then extend with its own peer. + @wg_setup_pre&.call if wireguard_setup_needed?(state) run_wireguard_setup(state['public_ip']) state['wireguard_setup_at'] = Time.now.utc.iso8601 @state_store.save(state) end + # Always signal post-hook so that a waiting sibling VM is unblocked even when + # WireGuard setup was not needed (e.g. already done on a resume). + @wg_setup_post&.call # Pull and verify Ollama models after the tunnel is established. if ollama_setup_needed?(state) @@ -1209,7 +1300,7 @@ module HyperstackVM return true if public_ip.empty? expected_endpoint = "#{public_ip}:#{@config.wireguard_udp_port}" - @local_wireguard.status['endpoint'] != expected_endpoint + !Array(@local_wireguard.status['endpoints']).include?(expected_endpoint) end def run_wireguard_setup(host) @@ -1232,7 +1323,12 @@ module HyperstackVM end def run_wireguard_script(host) - Open3.popen2e('bash', @config.wireguard_setup_script, host) do |stdin, output, wait_thr| + # Pass server WireGuard IP and WireGuard hostname as positional args so that + # wg1-setup.sh can configure the correct server-side tunnel address and update + # /etc/hosts on the client. The Enter keystroke via stdin bypasses the interactive prompt. + server_ip = @config.wireguard_gateway_ip + wg_hostname = @config.wireguard_gateway_hostname + Open3.popen2e('bash', @config.wireguard_setup_script, host, server_ip, wg_hostname) do |stdin, output, wait_thr| stdin.sync = true stdin.puts stdin.close @@ -1272,6 +1368,17 @@ module HyperstackVM mismatches << "network.wireguard_subnet must be '192.168.3.0/24'" end + # Validate that the resolved server IP is actually within the configured subnet. + begin + subnet = IPAddr.new(@config.wireguard_subnet) + server_ip = IPAddr.new(@config.wireguard_gateway_ip) + unless subnet.include?(server_ip) + mismatches << "wireguard_server_ip #{@config.wireguard_gateway_ip.inspect} is outside #{@config.wireguard_subnet}" + end + rescue IPAddr::InvalidAddressError => e + mismatches << "Invalid wireguard_server_ip: #{e.message}" + end + return if mismatches.empty? raise Error, "Configured WireGuard settings do not match #{script_path}: #{mismatches.join('; ')}" @@ -1659,7 +1766,10 @@ module HyperstackVM # parser is nil only when preset explicitly omits the key and config has no default; # empty string means "disable tool calling" (e.g. gpt-oss reasoning models). parser = @config.vllm_tool_call_parser if parser.nil? - trust_remote = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : false + # Fall back to the top-level [vllm] config values when no preset is in use. + # This allows setting trust_remote_code / extra_vllm_args in the default [vllm] block + # (e.g. for nemotron on VM1) without requiring a --model preset flag at create time. + trust_remote = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : @config.vllm_trust_remote_code port = @config.ollama_port # vLLM reuses the Ollama port for firewall compat docker_args = [ @@ -1688,7 +1798,9 @@ module HyperstackVM end docker_args << '--trust-remote-code' if trust_remote # Append any extra flags verbatim (e.g. Mistral loader flags, reasoning parser). - (cfg['extra_vllm_args'] || []).each { |arg| docker_args << arg } + # Preset extra_vllm_args take precedence; fall back to top-level [vllm].extra_vllm_args. + extra_args = cfg.key?('extra_vllm_args') ? Array(cfg['extra_vllm_args']) : @config.vllm_extra_args + extra_args.each { |arg| docker_args << arg } docker_run = docker_args.join(' ') script = [] @@ -1902,24 +2014,42 @@ module HyperstackVM value.nil? ? nil : Integer(value) end - def print_local_wireguard_summary(expected_ip) + def print_local_wireguard_summary(expected_ips) return unless @config.local_client_checks_enabled? wg_status = @local_wireguard.status - endpoint = wg_status['endpoint'] + endpoints = Array(wg_status['endpoints']).compact.uniq info "Local WireGuard #{@config.local_interface_name}: #{wg_status['service_state']}" - if endpoint - info "Local WireGuard endpoint: #{endpoint}" - if expected_ip - host, = endpoint.split(':', 2) - if host == expected_ip - info 'Local WireGuard endpoint matches the managed VM IP.' - else - warn "Local WireGuard endpoint points to #{host}, expected #{expected_ip}." - end + if endpoints.empty? + warn "Unable to read #{@config.local_wg_config_path} for local WireGuard endpoint validation." + return + end + + label = endpoints.one? ? 'endpoint' : 'endpoints' + info "Local WireGuard #{label}: #{endpoints.join(', ')}" + + expected = Array(expected_ips).compact.map(&:to_s).map(&:strip).reject(&:empty?).uniq + return if expected.empty? + + expected_endpoints = expected.map { |ip| "#{ip}:#{@config.wireguard_udp_port}" } + missing = expected_endpoints.reject { |endpoint| endpoints.include?(endpoint) } + + if expected_endpoints.one? + if missing.empty? + info 'Local WireGuard endpoint matches the managed VM IP.' + else + hosts = endpoints.map { |endpoint| endpoint.split(':', 2).first }.uniq + warn "Local WireGuard endpoints point to #{hosts.join(', ')}, expected #{expected.first}." end + return + end + + if missing.empty? + info 'Local WireGuard has peers for all managed VM IPs.' else - warn "Unable to read #{@config.local_wg_config_path} for local WireGuard endpoint validation." + present = expected_endpoints - missing + info "Local WireGuard has peers for: #{present.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}" unless present.empty? + warn "Local WireGuard missing peers for: #{missing.map { |endpoint| endpoint.split(':', 2).first }.join(', ')}." end end @@ -1936,6 +2066,7 @@ module HyperstackVM def initialize(argv) @argv = argv.dup @config_path = File.join(__dir__, 'hyperstack-vm.toml') + @config_explicit = false end def show_help @@ -1943,7 +2074,13 @@ module HyperstackVM puts puts 'Commands:' puts ' create [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama] [--model PRESET]' + puts ' create-both [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama]' + puts ' Provision hyperstack-vm1.toml and hyperstack-vm2.toml concurrently.' + puts ' WireGuard setup is serialized: VM1 writes the base wg1.conf first,' + puts ' then VM2 adds its peer. Requires both TOML files next to the script.' puts ' delete [--vm-id ID] [--dry-run]' + puts ' delete-both [--dry-run]' + puts ' Delete the VMs tracked by hyperstack-vm1.toml and hyperstack-vm2.toml.' puts ' status' puts ' test' puts ' model list' @@ -1955,6 +2092,7 @@ module HyperstackVM opts.banner = 'Usage: ruby hyperstack.rb [--config path] <create|delete|status> [options]' opts.on('--config PATH', "Path to TOML config (default: #{@config_path})") do |value| @config_path = value + @config_explicit = true end opts.on('-h', '--help', 'Show help') do show_help @@ -1969,39 +2107,33 @@ module HyperstackVM exit 0 end + # create-both loads its own config files and does not use the default config path. + # Parse it before building the manager so we avoid loading the default config needlessly. + if command == 'create-both' + opts = parse_create_options(@argv, include_model_preset: false) + run_create_both(**opts) + return + end + + if command == 'delete-both' + opts = parse_delete_both_options(@argv) + run_delete_both(**opts) + return + end + + if command == 'status' + run_status + return + end + + # All other commands operate on a single VM defined by the --config path. config_loader = ConfigLoader.load(@config_path) - state_store = StateStore.new(config_loader.config.state_file) - client = HyperstackClient.new(base_url: config_loader.config.api_base_url, api_key: config_loader.config.api_key) - local_wireguard = LocalWireGuard.new( - interface_name: config_loader.config.local_interface_name, - config_path: config_loader.config.local_wg_config_path - ) - manager = Manager.new( - config: config_loader.config, - client: client, - state_store: state_store, - local_wireguard: local_wireguard - ) + manager = build_manager(config_loader.config) case command when 'create' - replace = false - dry_run = false - install_vllm = nil - install_ollama = nil - vllm_preset = nil - parser = OptionParser.new do |opts| - opts.on('--replace', 'Delete the tracked VM before creating a new one') { replace = true } - opts.on('--dry-run', 'Resolve config and print the create plan without creating a VM') { dry_run = true } - opts.on('--vllm', 'Enable vLLM+LiteLLM setup (overrides config)') { install_vllm = true } - opts.on('--no-vllm', 'Disable vLLM+LiteLLM setup (overrides config)') { install_vllm = false } - opts.on('--ollama', 'Enable Ollama setup (overrides config)') { install_ollama = true } - opts.on('--no-ollama', 'Disable Ollama setup (overrides config)') { install_ollama = false } - opts.on('--model PRESET', 'Use a named vLLM model preset at create time') { |v| vllm_preset = v } - end - parser.parse!(@argv) - manager.create(replace: replace, dry_run: dry_run, install_vllm: install_vllm, - install_ollama: install_ollama, vllm_preset: vllm_preset) + opts = parse_create_options(@argv) + manager.create(**opts) when 'delete' vm_id = nil dry_run = false @@ -2013,8 +2145,6 @@ module HyperstackVM end parser.parse!(@argv) manager.delete(vm_id: vm_id, dry_run: dry_run) - when 'status' - manager.status when 'test' manager.test when 'model' @@ -2035,9 +2165,174 @@ module HyperstackVM raise Error, "Unknown model subcommand #{sub.inspect}. Use list or switch." end else - raise Error, "Unknown command #{command.inspect}. Use create, delete, status, test, or model." + raise Error, "Unknown command #{command.inspect}. Use create, create-both, delete, delete-both, status, test, or model." end end + + private + + # Parses the shared --replace / --dry-run / --vllm / --ollama / --model flags + # used by both 'create' and 'create-both'. When include_model_preset is false + # (create-both), the --model flag is not registered because each VM uses its own + # TOML default. Returns a hash suitable for splatting into Manager#create. + def parse_create_options(argv, include_model_preset: true) + opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil } + OptionParser.new do |o| + o.on('--replace', 'Delete the tracked VM before creating a new one') { opts[:replace] = true } + o.on('--dry-run', 'Print the create plan without creating a VM') { opts[:dry_run] = true } + o.on('--vllm', 'Enable vLLM+LiteLLM setup (overrides config)') { opts[:install_vllm] = true } + o.on('--no-vllm', 'Disable vLLM+LiteLLM setup (overrides config)') { opts[:install_vllm] = false } + o.on('--ollama', 'Enable Ollama setup (overrides config)') { opts[:install_ollama] = true } + o.on('--no-ollama', 'Disable Ollama setup (overrides config)') { opts[:install_ollama] = false } + o.on('--model PRESET', 'Use a named vLLM preset at create time') { |v| opts[:vllm_preset] = v } if include_model_preset + end.parse!(argv) + opts + end + + def parse_delete_both_options(argv) + opts = { dry_run: false } + OptionParser.new do |o| + o.on('--dry-run', 'Show which VMs would be deleted without deleting them') { opts[:dry_run] = true } + end.parse!(argv) + opts + end + + # Constructs a Manager and all its dependencies from a Config object. + # Accepts optional output destination and WireGuard concurrency hooks. + def build_manager(config, out: $stdout, wg_setup_pre: nil, wg_setup_post: nil) + state_store = StateStore.new(config.state_file) + client = HyperstackClient.new(base_url: config.api_base_url, api_key: config.api_key) + local_wireguard = LocalWireGuard.new( + interface_name: config.local_interface_name, + config_path: config.local_wg_config_path + ) + Manager.new( + config: config, + client: client, + state_store: state_store, + local_wireguard: local_wireguard, + out: out, + wg_setup_pre: wg_setup_pre, + wg_setup_post: wg_setup_post + ) + end + + def run_status + loaders = status_config_loaders + if loaders.one? + build_manager(loaders.first.config).status + return + end + + expected_ips = [] + loaders.each_with_index do |loader, index| + puts if index.positive? + puts "[#{File.basename(loader.path)}]" + expected_ip = build_manager(loader.config).status(include_local_wireguard: false) + expected_ips << expected_ip if expected_ip + end + + puts + puts '[local-wireguard]' + build_manager(loaders.first.config).show_local_wireguard(expected_ips) + end + + def status_config_loaders + return [ConfigLoader.load(@config_path)] if @config_explicit + + candidates = [ + @config_path, + File.join(__dir__, 'hyperstack-vm1.toml'), + File.join(__dir__, 'hyperstack-vm2.toml') + ].uniq.select { |path| File.exist?(path) } + + loaders = candidates.map { |path| ConfigLoader.load(path) } + tracked = loaders.select { |loader| File.exist?(loader.config.state_file) } + tracked.empty? ? [ConfigLoader.load(@config_path)] : tracked + end + + def pair_config_loaders + [ + ConfigLoader.load(File.join(__dir__, 'hyperstack-vm1.toml')), + ConfigLoader.load(File.join(__dir__, 'hyperstack-vm2.toml')) + ] + end + + # Provisions hyperstack-vm1 and hyperstack-vm2 concurrently in separate threads. + # WireGuard setup is serialized: VM1 runs first (replacing the base wg1.conf), then + # VM2 adds its peer. A Mutex+ConditionVariable acts as a one-shot latch between threads. + # If VM1 fails before reaching the WG step the latch is still released so VM2 doesn't hang. + # vllm_preset is accepted but ignored — each VM uses its own TOML default preset. + def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument + vm1_loader, vm2_loader = pair_config_loaders + vm1_config = vm1_loader.config + vm2_config = vm2_loader.config + + out_mutex = Mutex.new + wg_mutex = Mutex.new + wg_cv = ConditionVariable.new + vm1_wg_state = { done: false, error: nil } + + # VM1 signals the latch after its WG step (whether WG ran or was already done). + vm1_wg_post = proc do + wg_mutex.synchronize { vm1_wg_state[:done] = true; wg_cv.broadcast } + end + + # VM2 blocks here until VM1's WG step resolves, then raises if VM1 failed. + vm2_wg_pre = proc do + wg_mutex.synchronize { wg_cv.wait(wg_mutex) until vm1_wg_state[:done] || vm1_wg_state[:error] } + raise Error, "VM1 WireGuard setup failed; cannot add VM2 peer." if vm1_wg_state[:error] + end + + manager1 = build_manager(vm1_config, + out: PrefixedOutput.new('[vm1] ', $stdout, out_mutex), + wg_setup_post: vm1_wg_post) + manager2 = build_manager(vm2_config, + out: PrefixedOutput.new('[vm2] ', $stdout, out_mutex), + wg_setup_pre: vm2_wg_pre) + + errors = {} + create_opts = { replace: replace, dry_run: dry_run, + install_vllm: install_vllm, install_ollama: install_ollama } + + vm1_thread = Thread.new do + manager1.create(**create_opts) + rescue Error => e + errors[:vm1] = e.message + # Unblock VM2 even if VM1 failed so the process doesn't hang. + wg_mutex.synchronize { vm1_wg_state[:error] = e.message; wg_cv.broadcast } + end + + vm2_thread = Thread.new do + manager2.create(**create_opts) + rescue Error => e + errors[:vm2] = e.message + end + + [vm1_thread, vm2_thread].each(&:join) + + errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") } + exit 1 unless errors.empty? + end + + def run_delete_both(dry_run:) + out_mutex = Mutex.new + errors = {} + + pair_config_loaders.each_with_index do |loader, index| + label = "vm#{index + 1}" + manager = build_manager(loader.config, out: PrefixedOutput.new("[#{label}] ", $stdout, out_mutex)) + + begin + manager.delete(dry_run: dry_run) + rescue Error => e + errors[label.to_sym] = e.message + end + end + + errors.each { |vm, msg| $stderr.puts("ERROR [#{vm}]: #{msg}") } + exit 1 unless errors.empty? + end end end diff --git a/snippets/hyperstack/wg1-setup.sh b/snippets/hyperstack/wg1-setup.sh index d057fb8..49d716a 100755 --- a/snippets/hyperstack/wg1-setup.sh +++ b/snippets/hyperstack/wg1-setup.sh @@ -1,56 +1,76 @@ #!/bin/bash # -# wg1-setup.sh - Set up WireGuard wg1 tunnel between earth and hyperstack VM +# wg1-setup.sh - Set up WireGuard wg1 tunnel between earth and a hyperstack VM # # USAGE: -# ./wg1-setup.sh <VM_PUBLIC_IP> -# Example: ./wg1-setup.sh 185.216.20.163 +# ./wg1-setup.sh <VM_PUBLIC_IP> [SERVER_WG_IP] [WG_HOSTNAME] +# +# VM_PUBLIC_IP Public IP of the hyperstack VM (required) +# SERVER_WG_IP WireGuard IP to assign to this VM's tunnel interface (default: 192.168.3.1) +# Use 192.168.3.3 for hyperstack2 when hyperstack1 is already set up. +# WG_HOSTNAME Hostname mapped to SERVER_WG_IP in /etc/hosts (default: <vmhostname>.wg1) +# +# EXAMPLES: +# ./wg1-setup.sh 185.216.20.163 # VM1 (hyperstack1, 192.168.3.1) +# ./wg1-setup.sh 185.216.20.200 192.168.3.3 hyperstack2.wg1 # VM2 added to existing tunnel # # NETWORK DESIGN: # Subnet: 192.168.3.0/24 (separate from wg0's 192.168.2.0/24) # Port: 56710/udp # -# +----------------+ +------------------+ -# | earth (client) | | hyperstack (VM) | -# | 192.168.3.2 |<--- WireGuard ---> | 192.168.3.1 | -# +----------------+ tunnel +------------------+ -# | Ollama :11434 | -# +------------------+ +# +----------------+ +------------------+ +# | earth (client) | | hyperstack1 (VM) | +# | 192.168.3.2 |<--- WireGuard ---> | 192.168.3.1 | +# +----------------+ tunnel +------------------+ +# | | vLLM :11434 | +# | +------------------+ +# | +------------------+ +# +--------- WireGuard ----------> | hyperstack2 (VM) | +# | 192.168.3.3 | +# +------------------+ +# | vLLM :11434 | +# +------------------+ # # WHAT THIS SCRIPT DOES: -# On hyperstack VM (via SSH): +# +# For the FIRST VM (SERVER_WG_IP = 192.168.3.1, default): +# Generates fresh key-pairs and REPLACES /etc/wireguard/wg1.conf on earth with +# a single-peer config pointing to this VM. +# +# For ADDITIONAL VMs (any other SERVER_WG_IP, e.g. 192.168.3.3): +# Generates new server-side keys and ADDS or UPDATES just the new [Peer] block +# in the existing /etc/wireguard/wg1.conf, preserving the [Interface] section +# (client key-pair) and any other peers already present. +# The existing client public key from wg1.conf is extracted and used in the new +# VM's server config so it can encrypt traffic to earth. +# +# On every hyperstack VM (via SSH): # - Installs WireGuard if not present -# - Creates /etc/wireguard/wg1.conf -# - Opens UFW ports: 56710/udp (WireGuard), 11434/tcp from 192.168.3.0/24 (Ollama) -# - Configures Ollama to listen on 0.0.0.0:11434 +# - Creates /etc/wireguard/wg1.conf with SERVER_WG_IP as the tunnel address +# - Opens UFW ports: 56710/udp (WireGuard), 11434/tcp from 192.168.3.0/24 # - Starts wg-quick@wg1 # # On earth (locally): # - Installs WireGuard if not present (dnf) -# - Creates /etc/wireguard/wg1.conf -# - Starts wg-quick@wg1 +# - Creates or updates /etc/wireguard/wg1.conf (see above) +# - Adds SERVER_WG_IP <-> WG_HOSTNAME mapping to /etc/hosts +# - Restarts wg-quick@wg1 # # PREREQUISITES: # - SSH access to ubuntu@<VM_IP> with key-based auth # - UDP port 56710 open in cloud provider's firewall/security group # # RE-RUNNING: -# When the VM IP changes, simply re-run this script with the new IP. +# When a VM IP changes, simply re-run this script with the new IP. # It will regenerate keys and update configs on both sides. # -# USING OLLAMA REMOTELY: -# export OLLAMA_HOST=http://192.168.3.1:11434 -# ollama run qwen2.5-coder:14b-instruct -# # Or with aider: -# aider --model ollama/qwen2.5-coder:14b-instruct -# set -euo pipefail -# Configuration constants +# Fixed network constants that must match hyperstack-vm*.toml [network] section. WG_INTERFACE="wg1" WG_PORT="56710" -SERVER_WG_IP="192.168.3.1" +DEFAULT_SERVER_WG_IP="192.168.3.1" CLIENT_WG_IP="192.168.3.2" SUBNET_MASK="24" SSH_USER="ubuntu" @@ -61,22 +81,12 @@ GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color -print_warning() { - echo -e "${YELLOW}$1${NC}" -} - -print_success() { - echo -e "${GREEN}$1${NC}" -} - -print_error() { - echo -e "${RED}$1${NC}" -} +print_warning() { echo -e "${YELLOW}$1${NC}"; } +print_success() { echo -e "${GREEN}$1${NC}"; } +print_error() { echo -e "${RED}$1${NC}"; } # Retry wrapper for SSH/SCP commands that may fail due to transient # connection resets (e.g. sshd restart from unattended-upgrades). -# Usage: retry_ssh ssh user@host "command" -# retry_ssh scp file user@host:/path retry_ssh() { local max_attempts=5 local attempt=1 @@ -96,50 +106,137 @@ retry_ssh() { done } +# Updates or adds a [Peer] block in the existing /etc/wireguard/wg1.conf. +# Preserves the [Interface] section and any other peers; only the block for +# SERVER_WG_IP (matched by AllowedIPs) is replaced. +# Uses python3 for safe regex-based TOML-like block manipulation. +update_peer_in_client_config() { + local server_ip="$1" + local server_pubkey="$2" + local vm_ip="$3" + local tmpfile conf_copy + tmpfile=$(mktemp) + conf_copy=$(mktemp) + + # /etc/wireguard/wg1.conf is root-owned; read it via sudo into a user-readable temp copy. + if ! sudo cat /etc/wireguard/wg1.conf > "$conf_copy" 2>/dev/null; then + print_error "Cannot read /etc/wireguard/wg1.conf. Run wg1-setup.sh for VM1 (192.168.3.1) first." + rm -f "$tmpfile" "$conf_copy" + return 1 + fi + + python3 - "$server_ip" "$server_pubkey" "$vm_ip" "$WG_PORT" "$conf_copy" "$tmpfile" << 'PYEOF' +import sys, re + +server_ip, server_pubkey, vm_ip, wg_port, conf_copy, tmpfile = sys.argv[1:] + +with open(conf_copy) as f: + content = f.read() + +if not content.strip(): + print("ERROR: wg1.conf is empty. Run wg1-setup.sh for VM1 (192.168.3.1) first.", file=sys.stderr) + sys.exit(1) + +# Split into sections: [Interface] block + any [Peer] blocks. +# Each section starts with a [ header; split on newline-[ boundaries. +parts = re.split(r'(?=\n\[)', content) + +# Remove any existing [Peer] block whose AllowedIPs matches server_ip/32. +kept = [p for p in parts if not (re.search(r'^\[Peer\]', p.lstrip()) and f'AllowedIPs = {server_ip}/32' in p)] + +new_peer = f""" +[Peer] +# hyperstack VM ({server_ip}) +PublicKey = {server_pubkey} +Endpoint = {vm_ip}:{wg_port} +AllowedIPs = {server_ip}/32 +PersistentKeepalive = 25""" + +result = ''.join(kept).rstrip('\n') + '\n' + new_peer + '\n' + +with open(tmpfile, 'w') as f: + f.write(result) +print('peer-updated-ok') +PYEOF + + local rc=$? + rm -f "$conf_copy" + if [[ $rc -eq 0 ]]; then + sudo cp "${tmpfile}" /etc/wireguard/wg1.conf + sudo chmod 600 /etc/wireguard/wg1.conf + fi + rm -f "${tmpfile}" + return $rc +} + # Validate arguments -if [[ $# -ne 1 ]]; then - echo "Usage: $0 <VM_PUBLIC_IP>" - echo "Example: $0 185.216.20.163" +if [[ $# -lt 1 ]]; then + echo "Usage: $0 <VM_PUBLIC_IP> [SERVER_WG_IP] [WG_HOSTNAME]" + echo "Example (VM1): $0 185.216.20.163" + echo "Example (VM2): $0 185.216.20.200 192.168.3.3 hyperstack2.wg1" exit 1 fi VM_IP="$1" +SERVER_WG_IP="${2:-${DEFAULT_SERVER_WG_IP}}" +# Default WG_HOSTNAME: replace 192.168.3. prefix with 'hyperstack' and append .wg1, +# or fall back to server IP if the address doesn't match the expected pattern. +WG_HOSTNAME="${3:-$(echo "$SERVER_WG_IP" | sed 's/^192\.168\.3\.\(.*\)/hyperstack\1.wg1/' || echo "${SERVER_WG_IP}.wg1")}" + +# Determine mode: first VM replaces the entire client config; additional VMs add a peer. +IS_FIRST_VM=false +[[ "$SERVER_WG_IP" == "$DEFAULT_SERVER_WG_IP" ]] && IS_FIRST_VM=true echo "==============================================" print_warning "IMPORTANT: Ensure UDP port ${WG_PORT} is open on the VM!" print_warning "This must be configured in your cloud provider's" print_warning "firewall/security group settings." +if [[ "$IS_FIRST_VM" == "false" ]]; then + print_warning "Mode: ADD PEER — ${SERVER_WG_IP} (${WG_HOSTNAME}) will be added to existing wg1.conf." + print_warning "Ensure the first VM (192.168.3.1) has already been set up." +fi echo "==============================================" echo "" -read -p "Press Enter to continue (or Ctrl+C to abort)..." +read -rp "Press Enter to continue (or Ctrl+C to abort)..." echo "" # Create temporary directory for key generation TMPDIR=$(mktemp -d) -trap "rm -rf $TMPDIR" EXIT +trap 'rm -rf $TMPDIR' EXIT echo "=== Generating WireGuard keys locally ===" -# Generate server (hyperstack) keys +# Generate server (hyperstack VM) keys — always fresh for each VM. wg genkey > "$TMPDIR/server-privatekey" wg pubkey < "$TMPDIR/server-privatekey" > "$TMPDIR/server-publickey" SERVER_PRIVATE_KEY=$(cat "$TMPDIR/server-privatekey") -SERVER_PUBLIC_KEY=$(cat "$TMPDIR/server-publickey") - -# Generate client (earth) keys -wg genkey > "$TMPDIR/client-privatekey" -wg pubkey < "$TMPDIR/client-privatekey" > "$TMPDIR/client-publickey" -CLIENT_PRIVATE_KEY=$(cat "$TMPDIR/client-privatekey") -CLIENT_PUBLIC_KEY=$(cat "$TMPDIR/client-publickey") - -print_success "Keys generated successfully" +SERVER_PUBLIC_KEY=$(cat "$TMPDIR/server-publickey") + +if [[ "$IS_FIRST_VM" == "true" ]]; then + # First VM: generate fresh client keys; the entire wg1.conf will be replaced. + wg genkey > "$TMPDIR/client-privatekey" + wg pubkey < "$TMPDIR/client-privatekey" > "$TMPDIR/client-publickey" + CLIENT_PRIVATE_KEY=$(cat "$TMPDIR/client-privatekey") + CLIENT_PUBLIC_KEY=$(cat "$TMPDIR/client-publickey") + print_success "Keys generated (first VM — full config will be replaced)" +else + # Additional VM: reuse the existing client keys from /etc/wireguard/wg1.conf so that + # the first VM's server config (which already stores the client public key) keeps working. + CLIENT_PRIVATE_KEY=$(sudo cat /etc/wireguard/wg1.conf | grep -m1 'PrivateKey' | awk '{print $3}') + if [[ -z "$CLIENT_PRIVATE_KEY" ]]; then + print_error "Cannot extract client private key from /etc/wireguard/wg1.conf." + print_error "Run this script for VM1 (192.168.3.1) first." + exit 1 + fi + CLIENT_PUBLIC_KEY=$(echo "$CLIENT_PRIVATE_KEY" | wg pubkey) + print_success "Keys generated (additional VM — client keys reused from existing wg1.conf)" +fi echo "" -echo "=== Creating server (hyperstack) configuration ===" +echo "=== Creating server (hyperstack VM ${SERVER_WG_IP}) configuration ===" -# Create server wg1.conf cat > "$TMPDIR/server-wg1.conf" << EOF -# WireGuard wg1 configuration for hyperstack VM +# WireGuard wg1 configuration for hyperstack VM (${SERVER_WG_IP}) # Server side of earth <-> hyperstack tunnel # Generated by wg1-setup.sh on $(date) @@ -154,13 +251,13 @@ PublicKey = ${CLIENT_PUBLIC_KEY} AllowedIPs = ${CLIENT_WG_IP}/32 EOF -print_success "Server config created" +print_success "Server config created (server IP: ${SERVER_WG_IP})" -echo "" -echo "=== Creating client (earth) configuration ===" +if [[ "$IS_FIRST_VM" == "true" ]]; then + echo "" + echo "=== Creating client (earth) configuration ===" -# Create client wg1.conf -cat > "$TMPDIR/client-wg1.conf" << EOF + cat > "$TMPDIR/client-wg1.conf" << EOF # WireGuard wg1 configuration for earth # Client side of earth <-> hyperstack tunnel # Generated by wg1-setup.sh on $(date) @@ -170,49 +267,43 @@ Address = ${CLIENT_WG_IP}/${SUBNET_MASK} PrivateKey = ${CLIENT_PRIVATE_KEY} [Peer] -# hyperstack VM (server) +# hyperstack VM (${SERVER_WG_IP}) PublicKey = ${SERVER_PUBLIC_KEY} Endpoint = ${VM_IP}:${WG_PORT} AllowedIPs = ${SERVER_WG_IP}/32 PersistentKeepalive = 25 EOF -print_success "Client config created" + print_success "Client config created" +fi echo "" -echo "=== Setting up hyperstack VM (${VM_IP}) ===" +echo "=== Setting up hyperstack VM (${VM_IP}, tunnel IP ${SERVER_WG_IP}) ===" -# Wait for SSH to become available (handles transient connection resets -# from sshd restarts due to unattended-upgrades or package installs) echo "Testing SSH connection..." retry_ssh ssh -o ConnectTimeout=10 -o BatchMode=yes "${SSH_USER}@${VM_IP}" "echo 'SSH OK'" print_success "SSH connection OK" -# Install WireGuard on server if not present echo "Installing WireGuard on hyperstack..." retry_ssh ssh "${SSH_USER}@${VM_IP}" "which wg >/dev/null 2>&1 || (sudo apt update && sudo apt install -y wireguard)" print_success "WireGuard installed" -# Copy server config to hyperstack echo "Copying wg1.conf to hyperstack..." retry_ssh scp "$TMPDIR/server-wg1.conf" "${SSH_USER}@${VM_IP}:/tmp/wg1.conf" retry_ssh ssh "${SSH_USER}@${VM_IP}" "sudo mv /tmp/wg1.conf /etc/wireguard/wg1.conf && sudo chmod 600 /etc/wireguard/wg1.conf" print_success "Server config installed" -# Configure firewall on hyperstack echo "Configuring firewall (ufw) on hyperstack..." retry_ssh ssh "${SSH_USER}@${VM_IP}" bash -s << 'REMOTE_SCRIPT' sudo ufw allow ssh comment 'Allow SSH' 2>/dev/null || true sudo ufw --force enable >/dev/null 2>&1 || true sudo ufw allow 56710/udp comment 'WireGuard wg1' 2>/dev/null || true -sudo ufw allow from 192.168.3.0/24 to any port 11434 proto tcp comment 'Ollama via wg1' 2>/dev/null || true +sudo ufw allow from 192.168.3.0/24 to any port 11434 proto tcp comment 'Ollama/vLLM via wg1' 2>/dev/null || true echo "Firewall rules added" REMOTE_SCRIPT print_success "Firewall configured" -# Ensure Ollama listens on all interfaces (only if override not already set -# by ollama_setup_script, which also configures OLLAMA_MODELS and other env vars) -echo "Configuring Ollama to listen on 0.0.0.0..." +echo "Configuring Ollama to listen on 0.0.0.0 (if installed)..." retry_ssh ssh "${SSH_USER}@${VM_IP}" bash -s << 'REMOTE_SCRIPT' if [ -f /etc/systemd/system/ollama.service.d/override.conf ] && \ grep -q 'OLLAMA_HOST' /etc/systemd/system/ollama.service.d/override.conf; then @@ -224,12 +315,11 @@ else Environment="OLLAMA_HOST=0.0.0.0:11434" OVERRIDE sudo systemctl daemon-reload - sudo systemctl restart ollama 2>/dev/null || echo "Note: Ollama service not running or not installed" + sudo systemctl restart ollama 2>/dev/null || echo "Note: Ollama not running or not installed" fi REMOTE_SCRIPT print_success "Ollama configured" -# Start wg1 on hyperstack echo "Starting wg1 on hyperstack..." retry_ssh ssh "${SSH_USER}@${VM_IP}" "sudo systemctl start wg-quick@wg1 2>/dev/null || sudo wg-quick up wg1" print_success "wg1 started on hyperstack" @@ -237,35 +327,43 @@ print_success "wg1 started on hyperstack" echo "" echo "=== Setting up earth (local) ===" -# Check if WireGuard is installed locally if ! which wg >/dev/null 2>&1; then echo "Installing WireGuard locally..." sudo dnf install -y wireguard-tools fi print_success "WireGuard installed locally" -# Install client config locally -echo "Installing wg1.conf locally..." -sudo cp "$TMPDIR/client-wg1.conf" /etc/wireguard/wg1.conf -sudo chmod 600 /etc/wireguard/wg1.conf -print_success "Client config installed" +if [[ "$IS_FIRST_VM" == "true" ]]; then + echo "Installing fresh wg1.conf locally (first VM — replaces any existing config)..." + sudo cp "$TMPDIR/client-wg1.conf" /etc/wireguard/wg1.conf + sudo chmod 600 /etc/wireguard/wg1.conf + print_success "Client config installed" +else + echo "Adding peer ${SERVER_WG_IP} to existing wg1.conf (additional VM)..." + update_peer_in_client_config "$SERVER_WG_IP" "$SERVER_PUBLIC_KEY" "$VM_IP" + print_success "Peer added to client config" +fi + +# Update /etc/hosts so that WG_HOSTNAME resolves to the VM's WireGuard IP. +# hyperstack.rb uses this hostname in test URLs and informational output. +echo "Updating /etc/hosts: ${SERVER_WG_IP} ${WG_HOSTNAME}..." +sudo sed -i "/ ${WG_HOSTNAME}$/d" /etc/hosts # Remove stale entry if present +echo "${SERVER_WG_IP} ${WG_HOSTNAME}" | sudo tee -a /etc/hosts > /dev/null +print_success "/etc/hosts updated" -# Stop existing wg1 if running, then start fresh -echo "Starting wg1 locally..." -sudo systemctl stop wg-quick@wg1 2>/dev/null || true +echo "Restarting wg1 locally..." +sudo systemctl stop wg-quick@wg1 2>/dev/null || true sudo systemctl start wg-quick@wg1 -print_success "wg1 started locally" +print_success "wg1 restarted locally" echo "" echo "==============================================" print_success "Setup complete!" echo "==============================================" echo "" -echo "WireGuard wg1 tunnel is now active." -echo "" -echo "Tunnel IPs:" -echo " hyperstack (server): ${SERVER_WG_IP}" -echo " earth (client): ${CLIENT_WG_IP}" +echo "WireGuard wg1 tunnel peer active:" +echo " hyperstack VM (server): ${SERVER_WG_IP} (${WG_HOSTNAME})" +echo " earth (client): ${CLIENT_WG_IP}" echo "" echo "=== Verification commands ===" echo "" @@ -278,8 +376,8 @@ echo "" echo "# Verify default route is UNCHANGED:" echo "ip route | grep default" echo "" -echo "# Test Ollama access:" -echo "curl http://${SERVER_WG_IP}:11434/api/tags" +echo "# Test vLLM access:" +echo "curl http://${WG_HOSTNAME}:11434/v1/models" echo "" echo "=== Manual start/stop commands ===" echo "" @@ -291,8 +389,3 @@ echo "sudo systemctl start wg-quick@wg1" echo "" echo "# Restart on hyperstack (if VM rebooted):" echo "ssh ${SSH_USER}@${VM_IP} 'sudo systemctl start wg-quick@wg1'" -echo "" -echo "=== Use Ollama remotely ===" -echo "" -echo "export OLLAMA_HOST=http://${SERVER_WG_IP}:11434" -echo "curl http://${SERVER_WG_IP}:11434/v1/models" |
