gpt-oss-120b: revert to 131072 — hard architecture limit

max_position_embeddings=131072 in model config.json; exceeding it causes NaN/CUDA OOB. 163840 was rejected by vLLM at startup. The 135K error requires starting a fresh opencode conversation instead. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-03-18 18:58:41 +0200
committer: Paul Buetow <paul@buetow.org> 2026-03-18 18:58:41 +0200
commit: 3b01d5cb2c8932207127e7dd72848cea91c6347d (patch)
tree: 8f62c3609ed86db29d1792fe380d222825fb3a9f /snippets/hyperstack/hyperstack-vm.toml
parent: 0fff87a18044cdbf7af72157451ebdd801e3ff69 (diff)
1 files changed, 4 insertions, 3 deletions
diff --git a/snippets/hyperstack/hyperstack-vm.toml b/snippets/hyperstack/hyperstack-vm.toml
index cd16615..e23294f 100644
--- a/snippets/hyperstack/hyperstack-vm.toml
+++ b/snippets/hyperstack/hyperstack-vm.toml
@@ -123,13 +123,14 @@ tensor_parallel_size = 1
 tool_call_parser = ""
 
 # OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
-# MXFP4 KV cache is very compact: vLLM allocates 168K token blocks (10560×16) at 0.92 util.
-# 163840 (160K) stays within physical KV capacity; handles long Claude Code sessions (135K+).
+# Hard architecture limit: max_position_embeddings=131072 in model config.json.
+# 131072 is the absolute ceiling — exceeding it causes NaN or CUDA OOB errors.
+# For sessions approaching this limit, start a fresh opencode conversation.
 # tool_call_parser = "" disables --enable-auto-tool-choice (same reason as gpt-oss-20b).
 [vllm.presets.gpt-oss-120b]
 model = "openai/gpt-oss-120b"
 container_name = "vllm_gpt_oss_120b"
-max_model_len = 163840
+max_model_len = 131072
 gpu_memory_utilization = 0.92
 tensor_parallel_size = 1
 tool_call_parser = ""
author	Paul Buetow <paul@buetow.org>	2026-03-18 18:58:41 +0200
committer	Paul Buetow <paul@buetow.org>	2026-03-18 18:58:41 +0200
commit	3b01d5cb2c8932207127e7dd72848cea91c6347d (patch)
tree	8f62c3609ed86db29d1792fe380d222825fb3a9f /snippets/hyperstack/hyperstack-vm.toml
parent	0fff87a18044cdbf7af72157451ebdd801e3ff69 (diff)