From 0664ffcc62b2fb240286fde463635e510a41df84 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Mon, 6 Apr 2026 11:02:43 +0300 Subject: hyperstack: switch to Gemma 4 31B on VM2, Qwen3-Coder-Next on VM1 VM1 (hyperstack-vm1-coder.toml, renamed from hyperstack-vm1-gptoss.toml): - Default model switched from gpt-oss-120b to qwen3-coder-next - Config file renamed to reflect actual default model VM2 (hyperstack-vm2.toml): - Default model switched from qwen3-coder-next to Gemma 4 31B AWQ - Uses vLLM nightly image + transformers==5.5.0 workaround: Gemma 4 architecture is registered in transformers 5.x but vLLM stable pins <5 - max_model_len=131072 (128K context); KV cache fills ~95% of H100-80GB VRAM - Added gemma4-31b preset watcher.rb: - Add loading_status field to VmSnapshot to show live model-load progress (last relevant log line during startup instead of generic "loading" message) - fetch_vm_stats now captures both Engine 0 stats and loading-phase log lines in a single SSH call using a shell variable to avoid two docker log invocations - clean_log_line() strips vLLM PID/timestamp prefix for readable display cli.rb: update all hardcoded hyperstack-vm1-gptoss.toml references to hyperstack-vm1-coder.toml hypr.fish: replace pi-hyperstack-nemotron with pi-hyperstack-coder (VM1), add pi-hyperstack-gemma4 (VM2) Co-Authored-By: Claude Sonnet 4.6 --- lib/hyperstack/cli.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/hyperstack/cli.rb') diff --git a/lib/hyperstack/cli.rb b/lib/hyperstack/cli.rb index f4d1cef..d4679b1 100644 --- a/lib/hyperstack/cli.rb +++ b/lib/hyperstack/cli.rb @@ -21,12 +21,12 @@ module HyperstackVM puts 'Commands:' puts ' create [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama] [--model PRESET]' puts ' create-both [--replace] [--dry-run] [--vllm|--no-vllm] [--ollama|--no-ollama]' - puts ' Provision hyperstack-vm1-gptoss.toml and hyperstack-vm2.toml concurrently.' + puts ' Provision hyperstack-vm1-coder.toml and hyperstack-vm2.toml concurrently.' puts ' WireGuard setup is serialized: VM1 writes the base wg1.conf first,' puts ' then VM2 adds its peer. Requires both TOML files next to the script.' puts ' delete [--vm-id ID] [--dry-run]' puts ' delete-both [--dry-run]' - puts ' Delete the VMs tracked by hyperstack-vm1-gptoss.toml and hyperstack-vm2.toml.' + puts ' Delete the VMs tracked by hyperstack-vm1-coder.toml and hyperstack-vm2.toml.' puts ' status' puts ' watch' puts ' Poll all active VMs for vLLM and GPU stats every 60 s.' @@ -237,7 +237,7 @@ module HyperstackVM candidates = [ @config_path, - File.join(REPO_ROOT, 'hyperstack-vm1-gptoss.toml'), + File.join(REPO_ROOT, 'hyperstack-vm1-coder.toml'), File.join(REPO_ROOT, 'hyperstack-vm2.toml'), File.join(REPO_ROOT, 'hyperstack-vm-photo.toml') ].uniq.select { |path| File.exist?(path) } @@ -249,7 +249,7 @@ module HyperstackVM def pair_config_loaders [ - ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm1-gptoss.toml')), + ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm1-coder.toml')), ConfigLoader.load(File.join(REPO_ROOT, 'hyperstack-vm2.toml')) ] end -- cgit v1.2.3