From d3821c76ecd18bf6256d7493596c304fff784d29 Mon Sep 17 00:00:00 2001
From: Paul Buetow <paul@buetow.org>
Date: Wed, 18 Mar 2026 16:50:38 +0200
Subject: Add extra_vllm_args support; fix nemotron-super to real 120B; add
 deepseek-r1-32b, qwen3-32b, devstral presets

- hyperstack.rb: add extra_vllm_args array field to preset resolver and
  vllm_install_script; flags are appended verbatim to the docker run command,
  enabling per-preset vLLM flags (reasoning parsers, Mistral loader)
- hyperstack.rb: show extra_args in dry-run model switch output
- hyperstack-vm.toml: fix nemotron-super to use actual NVIDIA Nemotron-3-Super-120B-A12B
  AWQ (cyankiwi) with trust_remote_code=true; previous preset incorrectly used llama-3.3-70b
- hyperstack-vm.toml: add deepseek-r1-32b (--reasoning-parser deepseek_r1, ~18 GB)
- hyperstack-vm.toml: add qwen3-32b (--reasoning-parser deepseek_r1, ~18 GB)
- hyperstack-vm.toml: add devstral (Mistral tokenizer+config format, ~15 GB); --load_format
  mistral omitted because AWQ weights are in standard HF safetensors format
All 6 new/updated presets end-to-end tested on A100 80GB (vLLM 0.17.1).

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 snippets/hyperstack/hyperstack.rb | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'snippets/hyperstack/hyperstack.rb')

diff --git a/snippets/hyperstack/hyperstack.rb b/snippets/hyperstack/hyperstack.rb
index cf2ee0d..2ab53c9 100644
--- a/snippets/hyperstack/hyperstack.rb
+++ b/snippets/hyperstack/hyperstack.rb
@@ -382,7 +382,10 @@ module HyperstackVM
         # Empty string means "no tool calling"; use key? so empty string doesn't fall back to default.
         'tool_call_parser'       => raw.key?('tool_call_parser') ? raw['tool_call_parser'] : vllm_tool_call_parser,
         # trust_remote_code: required by some models (e.g. Nemotron) for custom architectures.
-        'trust_remote_code'      => raw.key?('trust_remote_code') ? raw['trust_remote_code'] : false
+        'trust_remote_code'      => raw.key?('trust_remote_code') ? raw['trust_remote_code'] : false,
+        # extra_vllm_args: arbitrary additional flags passed verbatim to the vLLM docker command.
+        # Used for special loaders (Mistral format) or reasoning parsers (deepseek_r1).
+        'extra_vllm_args'        => raw.key?('extra_vllm_args') ? Array(raw['extra_vllm_args']) : []
       }
     end
 
@@ -890,7 +893,8 @@ module HyperstackVM
         info "  container: #{old_container} → #{new_container}"
         trust_note  = preset['trust_remote_code'] ? ', trust_remote_code: true' : ''
         parser_note = preset['tool_call_parser'].to_s.empty? ? 'none' : preset['tool_call_parser']
-        info "  max_model_len: #{preset['max_model_len']}, tool_call_parser: #{parser_note}#{trust_note}"
+        extra_note  = preset['extra_vllm_args']&.any? ? ", extra_args: #{preset['extra_vllm_args'].join(' ')}" : ''
+        info "  max_model_len: #{preset['max_model_len']}, tool_call_parser: #{parser_note}#{trust_note}#{extra_note}"
         return
       end
 
@@ -1660,6 +1664,8 @@ module HyperstackVM
         docker_args << "--tool-call-parser #{Shellwords.escape(parser)}"
       end
       docker_args << '--trust-remote-code' if trust_remote
+      # Append any extra flags verbatim (e.g. Mistral loader flags, reasoning parser).
+      (cfg['extra_vllm_args'] || []).each { |arg| docker_args << arg }
       docker_run = docker_args.join(' ')
 
       script = []
-- 
cgit v1.2.3