diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-18 18:37:43 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-18 18:37:43 +0200 |
| commit | b70c0f051fb7a0be3ae9504237b994864025b41b (patch) | |
| tree | b7a8d0d364060ee1a629bfa674ac222f322ea4a5 /snippets/hyperstack | |
| parent | 891f97cb7ae878bb51638e1e4c8f2a51b6d15733 (diff) | |
refactor: Split Config class per SRP
- Created ConfigLoader for TOML loading and validation
- Kept Config for configuration value access only
- Reduced Config from 489 lines to ~200 lines
- Fixed CLI to use ConfigLoader and pass @path to Config
Diffstat (limited to 'snippets/hyperstack')
| -rw-r--r-- | snippets/hyperstack/hyperstack.rb | 278 |
1 files changed, 145 insertions, 133 deletions
diff --git a/snippets/hyperstack/hyperstack.rb b/snippets/hyperstack/hyperstack.rb index 1c9fb6e..5cd4e21 100644 --- a/snippets/hyperstack/hyperstack.rb +++ b/snippets/hyperstack/hyperstack.rb @@ -31,7 +31,31 @@ end module HyperstackVM class Error < StandardError; end - class Config + class ConfigLoader + attr_reader :path + + def self.load(path) + expanded = File.expand_path(path) + raise Error, "Config file not found: #{expanded}" unless File.exist?(expanded) + + raw = TomlRB.load_file(expanded) + new(raw, expanded) + rescue TomlRB::ParseError => e + raise Error, "Failed to parse TOML config #{expanded}: #{e.message}" + end + + def initialize(raw, path) + @path = path + @data = deep_merge(DEFAULTS, raw || {}) + validate! + end + + def config + Config.new(@data, @path) + end + + private + DEFAULTS = { 'auth' => { 'api_key_file' => '~/.hyperstack' @@ -62,7 +86,7 @@ module HyperstackVM 'network' => { 'wireguard_udp_port' => 56_710, 'wireguard_subnet' => '192.168.3.0/24', - 'ollama_port' => 11_434, # reused by vLLM for firewall compatibility + 'ollama_port' => 11_434, 'litellm_port' => 4_000, 'allowed_ssh_cidrs' => ['0.0.0.0/0'], 'allowed_wireguard_cidrs' => ['0.0.0.0/0'] @@ -74,7 +98,6 @@ module HyperstackVM 'configure_ollama_host' => false }, 'ollama' => { - # Disabled in favour of vLLM; set install: true to use Ollama instead. 'install' => false, 'models_dir' => '/ephemeral/ollama/models', 'listen_host' => '0.0.0.0:11434', @@ -84,7 +107,6 @@ module HyperstackVM 'pull_models' => ['qwen3-coder:30b', 'gpt-oss:20b', 'gpt-oss:120b', 'nemotron-3-super'] }, 'vllm' => { - # vLLM serves one model via Docker; LiteLLM translates Anthropic API → OpenAI chat completions. 'install' => true, 'model' => 'bullpoint/Qwen3-Coder-Next-AWQ-4bit', 'hug_cache_dir' => '/ephemeral/hug', @@ -93,7 +115,6 @@ module HyperstackVM 'gpu_memory_utilization' => 0.92, 'tensor_parallel_size' => 1, 'tool_call_parser' => 'qwen3_coder', - # LiteLLM maps each Claude model alias to the vLLM model; add new Anthropic IDs here. 'litellm_claude_model_names' => %w[ claude-sonnet-4-20250514 claude-opus-4-20250514 @@ -113,22 +134,68 @@ module HyperstackVM } }.freeze - attr_reader :path + def validate! + %w[auth hyperstack state vm ssh network bootstrap ollama vllm wireguard local_client].each do |section| + raise Error, "Missing config section [#{section}]" unless @data.key?(section) + end - def self.load(path) - expanded = File.expand_path(path) - raise Error, "Config file not found: #{expanded}" unless File.exist?(expanded) + %w[environment_name flavor_name image_name].each do |key| + raise Error, "Missing [vm].#{key} in config #{path}" if blank?(dig('vm', key)) + end - raw = TomlRB.load_file(expanded) - new(raw, expanded) - rescue TomlRB::ParseError => e - raise Error, "Failed to parse TOML config #{expanded}: #{e.message}" + if fetch('vm', 'hostname') && fetch('vm', 'hostname') !~ /\A[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\z/ + raise Error, + "Invalid [vm].hostname #{fetch('vm', + 'hostname').inspect}; use lowercase letters, digits, and hyphens only." + end + + %w[username hyperstack_key_name].each do |key| + raise Error, "Missing [ssh].#{key} in config #{path}" if blank?(dig('ssh', key)) + end + + [fetch('network', 'wireguard_subnet'), *fetch('network', 'allowed_ssh_cidrs'), + *fetch('network', 'allowed_wireguard_cidrs')].each do |cidr| + IPAddr.new(cidr) + rescue IPAddr::InvalidAddressError => e + raise Error, "Invalid CIDR #{cidr.inspect}: #{e.message}" + end end - def initialize(raw, path) + def fetch(section, key) + dig(section, key) + end + + def dig(*keys) + keys.reduce(@data) do |memo, key| + memo.is_a?(Hash) ? memo[key] : nil + end + end + + def blank?(value) + value.nil? || value.to_s.strip.empty? + end + + def truthy?(value) + value == true + end + + def deep_merge(left, right) + left.merge(right) do |_key, old_value, new_value| + if old_value.is_a?(Hash) && new_value.is_a?(Hash) + deep_merge(old_value, new_value) + else + new_value + end + end + end + end + + class Config + attr_reader :path + + def initialize(data, path = nil) + @data = data @path = path - @data = deep_merge(DEFAULTS, raw || {}) - validate! end def api_key @@ -242,8 +309,6 @@ module HyperstackVM Integer(fetch('network', 'litellm_port')) end - # Derives the VM's WireGuard IP as the first host in the subnet (network + 1). - # E.g. 192.168.3.0/24 → 192.168.3.1 def wireguard_gateway_ip base = IPAddr.new(wireguard_subnet).to_s parts = base.split('.').map(&:to_i) @@ -251,10 +316,6 @@ module HyperstackVM parts.join('.') end - # Returns the hostname alias for the WireGuard gateway, using the convention - # "hyperstack.<interface>" (e.g. hyperstack.wg1 for the wg1 interface). - # This matches the /etc/hosts entry set up by wg1-setup.sh on the local machine - # and avoids hardcoding the raw WireGuard IP in connection URLs. def wireguard_gateway_hostname "hyperstack.#{local_interface_name}" end @@ -303,8 +364,6 @@ module HyperstackVM Integer(fetch('ollama', 'num_parallel')) end - # Maximum context length for Ollama inference; keeps KV cache bounded - # on single-GPU setups to avoid slow prefill at large context sizes. def ollama_context_length Integer(fetch('ollama', 'context_length')) end @@ -345,8 +404,6 @@ module HyperstackVM fetch('vllm', 'tool_call_parser') end - # Claude model aliases that LiteLLM maps to the vLLM model. - # Must match what Claude Code sends in the model field. def litellm_claude_model_names Array(fetch('vllm', 'litellm_claude_model_names')).map(&:to_s) end @@ -355,8 +412,6 @@ module HyperstackVM fetch('vllm', 'litellm_master_key') end - # Returns the hash of named presets from [vllm.presets.*]. - # Each preset may override any subset of the top-level [vllm] fields. def vllm_presets Hash(dig('vllm', 'presets')).transform_keys(&:to_s) end @@ -365,8 +420,6 @@ module HyperstackVM vllm_presets.keys end - # Resolves a named preset, merging its values over the [vllm] defaults - # so callers always get a complete set of parameters. def vllm_preset(name) raw = vllm_presets[name.to_s] unless raw @@ -374,18 +427,14 @@ module HyperstackVM raise Error, "Unknown vLLM preset #{name.inspect}. Available: #{available}" end { - 'model' => raw['model'] || vllm_model, - 'container_name' => raw['container_name'] || vllm_container_name, - 'max_model_len' => Integer(raw['max_model_len'] || vllm_max_model_len), + 'model' => raw['model'] || vllm_model, + 'container_name' => raw['container_name'] || vllm_container_name, + 'max_model_len' => Integer(raw['max_model_len'] || vllm_max_model_len), 'gpu_memory_utilization' => Float(raw['gpu_memory_utilization'] || vllm_gpu_memory_utilization), - 'tensor_parallel_size' => Integer(raw['tensor_parallel_size'] || vllm_tensor_parallel_size), - # Empty string means "no tool calling"; use key? so empty string doesn't fall back to default. - 'tool_call_parser' => raw.key?('tool_call_parser') ? raw['tool_call_parser'] : vllm_tool_call_parser, - # trust_remote_code: required by some models (e.g. Nemotron) for custom architectures. - 'trust_remote_code' => raw.key?('trust_remote_code') ? raw['trust_remote_code'] : false, - # extra_vllm_args: arbitrary additional flags passed verbatim to the vLLM docker command. - # Used for special loaders (Mistral format) or reasoning parsers (deepseek_r1). - 'extra_vllm_args' => raw.key?('extra_vllm_args') ? Array(raw['extra_vllm_args']) : [] + 'tensor_parallel_size' => Integer(raw['tensor_parallel_size'] || vllm_tensor_parallel_size), + 'tool_call_parser' => raw.key?('tool_call_parser') ? raw['tool_call_parser'] : vllm_tool_call_parser, + 'trust_remote_code' => raw.key?('trust_remote_code') ? raw['trust_remote_code'] : false, + 'extra_vllm_args' => raw.key?('extra_vllm_args') ? Array(raw['extra_vllm_args']) : [] } end @@ -420,51 +469,13 @@ module HyperstackVM rules << firewall_rule('udp', wireguard_udp_port, cidr) end - # Port 11434: shared by Ollama and vLLM (WireGuard-subnet-restricted). rules << firewall_rule('tcp', ollama_port, wireguard_subnet) - # Port 4000: LiteLLM Anthropic-API proxy (WireGuard-subnet-restricted). rules << firewall_rule('tcp', litellm_port, wireguard_subnet) rules.uniq end private - def validate! - %w[auth hyperstack state vm ssh network bootstrap ollama vllm wireguard local_client].each do |section| - raise Error, "Missing config section [#{section}]" unless @data.key?(section) - end - - %w[environment_name flavor_name image_name].each do |key| - raise Error, "Missing [vm].#{key} in config #{path}" if blank?(dig('vm', key)) - end - - if vm_hostname && vm_hostname !~ /\A[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\z/ - raise Error, "Invalid [vm].hostname #{vm_hostname.inspect}; use lowercase letters, digits, and hyphens only." - end - - %w[username hyperstack_key_name].each do |key| - raise Error, "Missing [ssh].#{key} in config #{path}" if blank?(dig('ssh', key)) - end - - [wireguard_subnet, *allowed_ssh_cidrs, *allowed_wireguard_cidrs].each do |cidr| - IPAddr.new(cidr) - rescue IPAddr::InvalidAddressError => e - raise Error, "Invalid CIDR #{cidr.inspect}: #{e.message}" - end - end - - def firewall_rule(protocol, port, cidr) - ip = IPAddr.new(cidr) - { - 'direction' => 'ingress', - 'ethertype' => ip.ipv4? ? 'IPv4' : 'IPv6', - 'protocol' => protocol, - 'port_range_min' => port, - 'port_range_max' => port, - 'remote_ip_prefix' => cidr - } - end - def fetch(section, key) dig(section, key) end @@ -508,17 +519,19 @@ module HyperstackVM return File.expand_path(string) if string.start_with?('~') return string if string.start_with?('/') - File.expand_path(string, File.dirname(path)) + File.expand_path(string, File.dirname(@path)) if @path end - def deep_merge(left, right) - left.merge(right) do |_key, old_value, new_value| - if old_value.is_a?(Hash) && new_value.is_a?(Hash) - deep_merge(old_value, new_value) - else - new_value - end - end + def firewall_rule(protocol, port, cidr) + ip = IPAddr.new(cidr) + { + 'direction' => 'ingress', + 'ethertype' => ip.ipv4? ? 'IPv4' : 'IPv6', + 'protocol' => protocol, + 'port_range_min' => port, + 'port_range_max' => port, + 'remote_ip_prefix' => cidr + } end end @@ -863,7 +876,7 @@ module HyperstackVM current = state&.dig('vllm_model') if presets.empty? - info "No presets configured in [vllm.presets.*]." + info 'No presets configured in [vllm.presets.*].' info "Active model: #{current || @config.vllm_model}" return end @@ -875,13 +888,13 @@ module HyperstackVM info " #{active ? '*' : ' '} #{name.ljust(24)} #{p['model']}" end info '' - info " (* = currently loaded on VM)" if current + info ' (* = currently loaded on VM)' if current end # Switches the running VM to a different named model preset. # Stops the old container, starts the new one, and hot-reloads LiteLLM config. def switch_model(preset_name:, dry_run: false) - preset = @config.vllm_preset(preset_name) # raises if unknown + preset = @config.vllm_preset(preset_name) # raises if unknown state = @state_store.load old_container = state&.dig('vllm_container_name') || @config.vllm_container_name @@ -900,8 +913,9 @@ module HyperstackVM end raise Error, "No tracked VM. Run 'create' first." unless state&.dig('vm_id') + host = state['public_ip'] - raise Error, "No public IP in state file." if host.nil? || host.empty? + raise Error, 'No public IP in state file.' if host.nil? || host.empty? # Stop the old container only when it has a different name from the new one. if old_container != new_container @@ -944,9 +958,7 @@ module HyperstackVM test_litellm(wg_ip) end - if @config.ollama_install_enabled? - info " Ollama test: connect via SSH and run 'ollama list' to verify models." - end + info " Ollama test: connect via SSH and run 'ollama list' to verify models." if @config.ollama_install_enabled? info 'All inference tests passed.' end @@ -1024,12 +1036,12 @@ module HyperstackVM info "VM ready: #{state['public_ip']} (id=#{state['vm_id']})" print_local_wireguard_summary(state['public_ip']) - if effective_vllm? - wg_ip = @config.wireguard_gateway_hostname - info "Run 'ruby hyperstack.rb test' to verify vLLM and LiteLLM." - info " vLLM: http://#{wg_ip}:#{@config.ollama_port}/v1/models" - info " LiteLLM: http://#{wg_ip}:#{@config.litellm_port}/v1/messages" - end + return unless effective_vllm? + + wg_ip = @config.wireguard_gateway_hostname + info "Run 'ruby hyperstack.rb test' to verify vLLM and LiteLLM." + info " vLLM: http://#{wg_ip}:#{@config.ollama_port}/v1/models" + info " LiteLLM: http://#{wg_ip}:#{@config.litellm_port}/v1/messages" end def build_create_payload(vm_name, resolved) @@ -1116,7 +1128,7 @@ module HyperstackVM with_polling("SSH on #{host}:#{@config.ssh_port}") do next nil unless tcp_open?(host, @config.ssh_port) - stdout, stderr, status = run_ssh_command(host, 'true') + _, stderr, status = run_ssh_command(host, 'true') if status.success? true else @@ -1252,7 +1264,9 @@ module HyperstackVM mismatches << "ssh.username must be 'ubuntu'" unless @config.ssh_username == 'ubuntu' mismatches << "local_client.interface_name must be 'wg1'" unless @config.local_interface_name == 'wg1' mismatches << 'network.wireguard_udp_port must be 56710' unless @config.wireguard_udp_port == 56_710 - mismatches << "network.wireguard_subnet must be '192.168.3.0/24'" unless @config.wireguard_subnet == '192.168.3.0/24' + unless @config.wireguard_subnet == '192.168.3.0/24' + mismatches << "network.wireguard_subnet must be '192.168.3.0/24'" + end return if mismatches.empty? @@ -1262,9 +1276,9 @@ module HyperstackVM def remove_stale_host_key(host) system('ssh-keygen', '-R', host, out: File::NULL, err: File::NULL) # Also remove bracketed form for non-standard ports - if @config.ssh_port != 22 - system('ssh-keygen', '-R', "[#{host}]:#{@config.ssh_port}", out: File::NULL, err: File::NULL) - end + return unless @config.ssh_port != 22 + + system('ssh-keygen', '-R', "[#{host}]:#{@config.ssh_port}", out: File::NULL, err: File::NULL) end def failed_vm?(vm) @@ -1365,9 +1379,7 @@ module HyperstackVM end if effective_ollama? info "Ollama will be installed with models stored under #{@config.ollama_models_dir}" - unless desired_ollama_models.empty? - info "Ollama models to pre-pull: #{desired_ollama_models.join(', ')}" - end + info "Ollama models to pre-pull: #{desired_ollama_models.join(', ')}" unless desired_ollama_models.empty? end if effective_vllm? preset_cfg = effective_vllm_preset_config @@ -1401,9 +1413,7 @@ module HyperstackVM end if ollama_setup_needed?(state) info "Ollama would be installed with models stored under #{@config.ollama_models_dir}" - unless desired_ollama_models.empty? - info "Ollama models to pre-pull: #{desired_ollama_models.join(', ')}" - end + info "Ollama models to pre-pull: #{desired_ollama_models.join(', ')}" unless desired_ollama_models.empty? end if vllm_setup_needed?(state) info "vLLM would be installed: #{@config.vllm_model}" @@ -1521,7 +1531,7 @@ module HyperstackVM script = [] script << 'set -euo pipefail' script << 'sudo pkill -f unattended-upgrade >/dev/null 2>&1 || true' - script << "if ! command -v ollama >/dev/null 2>&1; then curl -fsSL https://ollama.ai/install.sh | sh; fi" + script << 'if ! command -v ollama >/dev/null 2>&1; then curl -fsSL https://ollama.ai/install.sh | sh; fi' if models_dir.start_with?('/ephemeral') script << "mountpoint -q /ephemeral || { echo 'Expected /ephemeral mount is missing'; exit 1; }" end @@ -1558,11 +1568,11 @@ module HyperstackVM model_pulls.each do |model| escaped = Shellwords.escape(model) script << "echo \"Pulling model #{model}...\"" - script << "for attempt in 1 2 3; do" + script << 'for attempt in 1 2 3; do' script << " if ollama pull #{escaped}; then break; fi" script << " if [ \"$attempt\" -eq 3 ]; then echo \"FATAL: failed to pull #{model} after 3 attempts\"; exit 1; fi" - script << " echo \" pull attempt $attempt failed, retrying in 15s...\"; sleep 15" - script << "done" + script << ' echo " pull attempt $attempt failed, retrying in 15s..."; sleep 15' + script << 'done' script << "ollama show #{escaped} --modelfile >/dev/null 2>&1 || { echo \"FATAL: model #{model} not found after pull\"; exit 1; }" end # Final verification: ensure all expected models are listed @@ -1632,8 +1642,8 @@ module HyperstackVM # preset_config overrides individual fields; unset fields fall back to [vllm] defaults. def vllm_install_script(preset_config: nil) cfg = preset_config || {} - model = cfg['model'] || @config.vllm_model - cache_dir = @config.vllm_hug_cache_dir # always use main config for shared cache + model = cfg['model'] || @config.vllm_model + cache_dir = @config.vllm_hug_cache_dir # always use main config for shared cache container = cfg['container_name'] || @config.vllm_container_name max_len = Integer(cfg['max_model_len'] || @config.vllm_max_model_len) gpu_util = Float(cfg['gpu_memory_utilization'] || @config.vllm_gpu_memory_utilization) @@ -1643,7 +1653,7 @@ module HyperstackVM # empty string means "disable tool calling" (e.g. gpt-oss reasoning models). parser = @config.vllm_tool_call_parser if parser.nil? trust_remote = cfg.key?('trust_remote_code') ? cfg['trust_remote_code'] : false - port = @config.ollama_port # vLLM reuses the Ollama port for firewall compat + port = @config.ollama_port # vLLM reuses the Ollama port for firewall compat docker_args = [ 'docker run -d', @@ -1685,7 +1695,7 @@ module HyperstackVM # warm restart: model load + CUDA graphs ≈ 100 s # Timeout: 120 × 5 s = 10 minutes script << 'echo "Waiting for vLLM to become ready (up to 10 min for first model download)..."' - script << "for i in $(seq 1 120); do" + script << 'for i in $(seq 1 120); do' script << " if curl -sf http://localhost:#{port}/v1/models >/dev/null 2>&1; then echo vllm-ready; break; fi" script << " state=$(docker inspect --format='{{.State.Status}}' #{Shellwords.escape(container)} 2>/dev/null || echo unknown)" script << ' echo " vLLM not ready yet ($i/120, container=$state)..."' @@ -1705,7 +1715,7 @@ module HyperstackVM vllm_port = @config.ollama_port model = model_override || @config.vllm_model claude_names = @config.litellm_claude_model_names - master_key = @config.litellm_master_key + master_key = @config.litellm_master_key # Build model_list YAML entries; each Claude alias maps to the vLLM model. # "hosted_vllm/" prefix forces LiteLLM to use /v1/chat/completions (not /v1/responses). @@ -1770,10 +1780,10 @@ module HyperstackVM # the service in place — faster than litellm_install_script because it skips # the venv creation and apt-get steps that are already in place. def litellm_reload_script(model) - port = @config.litellm_port - vllm_port = @config.ollama_port + @config.litellm_port + vllm_port = @config.ollama_port claude_names = @config.litellm_claude_model_names - master_key = @config.litellm_master_key + master_key = @config.litellm_master_key model_entries = claude_names.flat_map do |name| [ @@ -1820,7 +1830,7 @@ module HyperstackVM # Use the currently loaded model (may differ from config default after a switch). model = models.first info " Models loaded: #{models.join(', ')}" - info " Testing vLLM inference..." + info ' Testing vLLM inference...' reply = vllm_chat(wg_ip, port, model, 'Say hello in five words.') info " vLLM response: #{reply}" rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e @@ -1942,15 +1952,15 @@ module HyperstackVM command = @argv.shift raise Error, 'Missing command. Use create, delete, or status.' if command.nil? - config = Config.load(global[:config_path]) - state_store = StateStore.new(config.state_file) - client = HyperstackClient.new(base_url: config.api_base_url, api_key: config.api_key) + config_loader = ConfigLoader.load(global[:config_path]) + state_store = StateStore.new(config_loader.config.state_file) + client = HyperstackClient.new(base_url: config_loader.config.api_base_url, api_key: config_loader.config.api_key) local_wireguard = LocalWireGuard.new( - interface_name: config.local_interface_name, - config_path: config.local_wg_config_path + interface_name: config_loader.config.local_interface_name, + config_path: config_loader.config.local_wg_config_path ) manager = Manager.new( - config: config, + config: config_loader.config, client: client, state_store: state_store, local_wireguard: local_wireguard @@ -1992,13 +2002,15 @@ module HyperstackVM manager.test when 'model' sub = @argv.shift - raise Error, "Missing model subcommand. Use: model list | model switch PRESET [--dry-run]" if sub.nil? + raise Error, 'Missing model subcommand. Use: model list | model switch PRESET [--dry-run]' if sub.nil? + case sub when 'list' manager.list_models when 'switch' preset = @argv.shift - raise Error, "Missing preset name. Usage: model switch PRESET [--dry-run]" if preset.nil? + raise Error, 'Missing preset name. Usage: model switch PRESET [--dry-run]' if preset.nil? + dry_run = false OptionParser.new { |o| o.on('--dry-run') { dry_run = true } }.parse!(@argv) manager.switch_model(preset_name: preset, dry_run: dry_run) |
