From 5ec7da198ec4010bddca997699e02274b7b85613 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Tue, 24 Mar 2026 23:49:42 +0200 Subject: gpt-oss-120b: enable reasoning via openai_gptoss parser - Add --reasoning-parser openai_gptoss to gpt-oss-120b vLLM config in all three toml files; extracts <|channel|>analysis thinking blocks into reasoning_content in API responses - Mark gpt-oss-120b as reasoning: true in pi/agent/models.json for all three providers (hyperstack, hyperstack1, hyperstack2) - Update vm1 state file Co-Authored-By: Claude Sonnet 4.6 --- hyperstack-vm1.toml | 1 + 1 file changed, 1 insertion(+) (limited to 'hyperstack-vm1.toml') diff --git a/hyperstack-vm1.toml b/hyperstack-vm1.toml index 35a330c..8df93f5 100644 --- a/hyperstack-vm1.toml +++ b/hyperstack-vm1.toml @@ -134,6 +134,7 @@ max_model_len = 131072 gpu_memory_utilization = 0.92 tensor_parallel_size = 1 tool_call_parser = "" +extra_vllm_args = ["--reasoning-parser", "openai_gptoss"] # Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100. [vllm.presets.qwen25-coder-32b] -- cgit v1.2.3