gpt-oss-120b: enable reasoning via openai_gptoss parser

- Add --reasoning-parser openai_gptoss to gpt-oss-120b vLLM config in all three toml files; extracts <|channel|>analysis thinking blocks into reasoning_content in API responses - Mark gpt-oss-120b as reasoning: true in pi/agent/models.json for all three providers (hyperstack, hyperstack1, hyperstack2) - Update vm1 state file Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-03-24 23:49:42 +0200
committer: Paul Buetow <paul@buetow.org> 2026-03-24 23:49:42 +0200
commit: 5ec7da198ec4010bddca997699e02274b7b85613 (patch)
tree: f1b13713ecf9f23274be15336cf44dade1c55eaa /hyperstack-vm1.toml
parent: 6eb999e2fafc7a9731155f69c696993dbf67da94 (diff)
1 files changed, 1 insertions, 0 deletions
diff --git a/hyperstack-vm1.toml b/hyperstack-vm1.toml
index 35a330c..8df93f5 100644
--- a/hyperstack-vm1.toml
+++ b/hyperstack-vm1.toml
@@ -134,6 +134,7 @@ max_model_len = 131072
 gpu_memory_utilization = 0.92
 tensor_parallel_size = 1
 tool_call_parser = ""
+extra_vllm_args = ["--reasoning-parser", "openai_gptoss"]
 
 # Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
 [vllm.presets.qwen25-coder-32b]
author	Paul Buetow <paul@buetow.org>	2026-03-24 23:49:42 +0200
committer	Paul Buetow <paul@buetow.org>	2026-03-24 23:49:42 +0200
commit	5ec7da198ec4010bddca997699e02274b7b85613 (patch)
tree	f1b13713ecf9f23274be15336cf44dade1c55eaa /hyperstack-vm1.toml
parent	6eb999e2fafc7a9731155f69c696993dbf67da94 (diff)