8 files changed, 10 insertions, 147 deletions
diff --git a/README.md b/README.md
index cdb4df4..39c669e 100644
--- a/README.md
+++ b/README.md
@@ -166,7 +166,7 @@ definitions are available without any manual config editing.
 Source `hyperstack.fish` or copy the abbreviations into your Fish config:
 
 ```fish
-abbr pi-hyperstack         pi --model hyperstack/openai/gpt-oss-120b
+abbr pi-hyperstack         pi --model hyperstack/Qwen/Qwen3.6-27B-FP8
 abbr pi-hyperstack-coder   pi --model hyperstack1/Qwen/Qwen3.6-27B-FP8
 abbr pi-hyperstack-qwen36  pi --model hyperstack2/Qwen/Qwen3.6-27B-FP8
 abbr pi-hyperstack-gemma4  pi --model hyperstack2/cyankiwi/gemma-4-31B-it-AWQ-4bit
@@ -175,7 +175,7 @@ abbr pi-hyperstack-gemma4  pi --model hyperstack2/cyankiwi/gemma-4-31B-it-AWQ-4b
 Then launch a session after the VM(s) are up:
 
 ```fish
-pi-hyperstack            # GPT-OSS 120B on VM1
+pi-hyperstack            # Qwen3.6 27B FP8 on VM1
 pi-hyperstack-coder      # Qwen3.6 27B FP8 on VM1
 pi-hyperstack-qwen36     # Qwen3.6 27B FP8 on VM2
 pi-hyperstack-gemma4     # Gemma 4 31B on VM2
@@ -187,7 +187,7 @@ Three providers are defined, one per setup, each pointing at its vLLM endpoint o
 
 | Provider | Base URL | Primary model |
 |----------|----------|---------------|
-| `hyperstack` | `http://hyperstack.wg1:11434/v1` | GPT-OSS 120B (single-VM) |
+| `hyperstack` | `http://hyperstack.wg1:11434/v1` | Qwen3.6 27B FP8 (single-VM) |
 | `hyperstack1` | `http://hyperstack1.wg1:11434/v1` | Qwen3.6 27B FP8 (default; presets in TOML) |
 | `hyperstack2` | `http://hyperstack2.wg1:11434/v1` | Gemma 4 31B (default; presets in TOML) |
 
@@ -212,7 +212,7 @@ After loading a different model on a VM with `model switch` (see [Switching mode
 tell Pi to use it without restarting the session:
 
 ```
-model switch hyperstack1/openai/gpt-oss-120b
+model switch hyperstack1/Qwen/Qwen3.6-27B-FP8
 ```
 
 Pi sends subsequent requests to the new model ID immediately; the provider base URL stays the same.
@@ -281,8 +281,6 @@ Available presets (both VMs share the same set):
 | `gemma4-31b` | Gemma 4 31B IT (AWQ-4bit) | ~19 GB | 32K–128K (see TOML) |
 | `nemotron-super` | Nemotron-3-Super 120B (Mamba+MoE, 12B active) | ~60 GB | 131K |
 | `qwen36-27b` | Qwen3.6 27B FP8 | ~45 GB | 262K |
-| `gpt-oss-120b` | GPT-OSS 120B (MoE, MXFP4) | ~65 GB | 131K |
-| `gpt-oss-20b` | GPT-OSS 20B (MoE, MXFP4) | ~14 GB | 65K |
 | `qwen25-coder-32b` | Qwen2.5-Coder-32B-Instruct (AWQ) | ~18 GB | 32K |
 | `qwen3-coder-30b` | Qwen3-Coder-30B-A3B (MoE, AWQ) | ~18 GB | 65K |
 | `deepseek-r1-32b` | DeepSeek-R1-Distill-Qwen-32B (AWQ) | ~18 GB | 32K |
diff --git a/hyperstack-vm1.toml b/hyperstack-vm1.toml
index 75c313c..d28dbb8 100644
--- a/hyperstack-vm1.toml
+++ b/hyperstack-vm1.toml
@@ -55,7 +55,7 @@ listen_host = "0.0.0.0:11434"
 gpu_overhead_mb = 2000
 num_parallel = 1
 context_length = 32768
-pull_models = ["qwen36-27b", "qwen3-coder:30b", "gpt-oss:20b", "gpt-oss:120b", "nemotron-3-super"]
+pull_models = ["qwen36-27b", "qwen3-coder:30b", "nemotron-3-super"]
 
 # vLLM serves one model via Docker on the OpenAI-compatible API.
 # VM1 defaults to Qwen3.6 27B; use 'model switch' to load any other preset.
@@ -102,26 +102,6 @@ enable_prefix_caching = false
 extra_docker_env = ["VLLM_ALLOW_LONG_MAX_MODEL_LEN=1", "PYTORCH_ALLOC_CONF=expandable_segments:True"]
 extra_vllm_args = ["--reasoning-parser", "nemotron_v3", "--enforce-eager"]
 
-# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
-[vllm.presets.gpt-oss-20b]
-model = "openai/gpt-oss-20b"
-container_name = "vllm_gpt_oss_20b"
-max_model_len = 65536
-gpu_memory_utilization = 0.92
-tensor_parallel_size = 1
-tool_call_parser = ""
-
-# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
-# Hard architecture limit: max_position_embeddings=131072 in model config.json.
-[vllm.presets.gpt-oss-120b]
-model = "openai/gpt-oss-120b"
-container_name = "vllm_gpt_oss_120b"
-max_model_len = 131072
-gpu_memory_utilization = 0.92
-tensor_parallel_size = 1
-tool_call_parser = ""
-extra_vllm_args = ["--reasoning-parser", "openai_gptoss"]
-
 # Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
 [vllm.presets.qwen25-coder-32b]
 model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
diff --git a/hyperstack-vm2.toml b/hyperstack-vm2.toml
index faa8054..3e74aae 100644
--- a/hyperstack-vm2.toml
+++ b/hyperstack-vm2.toml
@@ -118,26 +118,6 @@ trust_remote_code = true
 # otherwise needs alongside the ~60 GB weights. Trades some throughput for stability.
 extra_vllm_args = ["--reasoning-parser", "nemotron_v3", "--enforce-eager"]
 
-# OpenAI GPT-OSS 20B — ultra-fast MoE (3.6B active / 20B total, MXFP4), ~14 GB on A100.
-[vllm.presets.gpt-oss-20b]
-model = "openai/gpt-oss-20b"
-container_name = "vllm_gpt_oss_20b"
-max_model_len = 65536
-gpu_memory_utilization = 0.92
-tensor_parallel_size = 1
-tool_call_parser = ""
-
-# OpenAI GPT-OSS 120B — powerful MoE (5.1B active / 117B total, MXFP4), ~65 GB on A100.
-# Hard architecture limit: max_position_embeddings=131072 in model config.json.
-[vllm.presets.gpt-oss-120b]
-model = "openai/gpt-oss-120b"
-container_name = "vllm_gpt_oss_120b"
-max_model_len = 131072
-gpu_memory_utilization = 0.92
-tensor_parallel_size = 1
-tool_call_parser = ""
-extra_vllm_args = ["--reasoning-parser", "openai_gptoss"]
-
 # Qwen2.5-Coder-32B-Instruct AWQ — best-in-class open coding model at 32B, ~18 GB on A100.
 [vllm.presets.qwen25-coder-32b]
 model = "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
diff --git a/hypr.fish b/hypr.fish
index 60f8356..f243255 100644
--- a/hypr.fish
+++ b/hypr.fish
@@ -1,4 +1,5 @@
 # Dual-VM setup (hyperstack-vm1/vm2.toml -> hyperstack1/2.wg1)
+abbr pi-hyperstack         pi --model hyperstack1/Qwen/Qwen3.6-27B-FP8
 abbr pi-hyperstack-coder   pi --model hyperstack1/Qwen/Qwen3.6-27B-FP8
 abbr pi-hyperstack-qwen36  pi --model hyperstack2/Qwen/Qwen3.6-27B-FP8
 abbr pi-hyperstack-gemma4  pi --model hyperstack2/cyankiwi/gemma-4-31B-it-AWQ-4bit
diff --git a/lib/hyperstack/config.rb b/lib/hyperstack/config.rb
index 7057b4f..e41d1dd 100644
--- a/lib/hyperstack/config.rb
+++ b/lib/hyperstack/config.rb
@@ -49,7 +49,7 @@ module HyperstackVM
         'assign_floating_ip' => true,
         'create_bootable_volume' => false,
         'enable_port_randomization' => false,
-        'labels' => %w[gpt-oss-120b wireguard]
+        'labels' => %w[qwen36-27b wireguard]
       },
       'ssh' => {
         'username' => 'ubuntu',
@@ -81,7 +81,7 @@ module HyperstackVM
         'gpu_overhead_mb' => 2000,
         'num_parallel' => 1,
         'context_length' => 32_768,
-        'pull_models' => ['qwen3-coder:30b', 'gpt-oss:20b', 'gpt-oss:120b', 'nemotron-3-super']
+        'pull_models' => ['qwen3-coder:30b', 'qwen36-27b', 'nemotron-3-super']
       },
       'vllm' => {
         'install' => true,
diff --git a/lib/hyperstack/manager.rb b/lib/hyperstack/manager.rb
index 7a68199..e8382bb 100644
--- a/lib/hyperstack/manager.rb
+++ b/lib/hyperstack/manager.rb
@@ -938,7 +938,7 @@ module HyperstackVM
       req.body = JSON.generate(
         'model' => model,
         'messages' => [{ 'role' => 'user', 'content' => prompt }],
-        # 500 tokens: reasoning models (e.g. gpt-oss) use tokens for chain-of-thought
+        # 500 tokens: reasoning models use tokens for chain-of-thought
         # before content; 50 is too small and yields an empty content field.
         'max_tokens' => 500
       )
diff --git a/lib/hyperstack/provisioning.rb b/lib/hyperstack/provisioning.rb
index 0b56559..948cd2a 100644
--- a/lib/hyperstack/provisioning.rb
+++ b/lib/hyperstack/provisioning.rb
@@ -143,7 +143,7 @@ module HyperstackVM
       tp_size = Integer(cfg['tensor_parallel_size'] || @config.vllm_tensor_parallel_size)
       parser = cfg['tool_call_parser']
       # parser is nil only when preset explicitly omits the key and config has no default;
-      # empty string means "disable tool calling" (e.g. gpt-oss reasoning models).
+      # empty string means "disable tool calling" (e.g. reasoning models).
       parser = @config.vllm_tool_call_parser if parser.nil?
       # Fall back to the top-level [vllm] config values when no preset is in use.
       # This allows setting trust_remote_code / extra_vllm_args in the default [vllm] block
diff --git a/pi/agent/models.json b/pi/agent/models.json
index a5e8200..3636503 100644
--- a/pi/agent/models.json
+++ b/pi/agent/models.json
@@ -11,38 +11,6 @@
       },
       "models": [
         {
-          "id": "openai/gpt-oss-120b",
-          "name": "GPT-OSS 120B [vm]",
-          "reasoning": true,
-          "input": [
-            "text"
-          ],
-          "cost": {
-            "input": 0,
-            "output": 0,
-            "cacheRead": 0,
-            "cacheWrite": 0
-          },
-          "contextWindow": 131072,
-          "maxTokens": 8192
-        },
-        {
-          "id": "openai/gpt-oss-20b",
-          "name": "GPT-OSS 20B [vm]",
-          "reasoning": false,
-          "input": [
-            "text"
-          ],
-          "cost": {
-            "input": 0,
-            "output": 0,
-            "cacheRead": 0,
-            "cacheWrite": 0
-          },
-          "contextWindow": 65536,
-          "maxTokens": 8192
-        },
-        {
           "id": "Qwen/Qwen3.6-27B-FP8",
           "name": "Qwen3.6 27B FP8 [vm]",
           "reasoning": true,
@@ -255,38 +223,6 @@
           }
         },
         {
-          "id": "openai/gpt-oss-20b",
-          "name": "GPT-OSS 20B [vm1]",
-          "reasoning": false,
-          "input": [
-            "text"
-          ],
-          "cost": {
-            "input": 0,
-            "output": 0,
-            "cacheRead": 0,
-            "cacheWrite": 0
-          },
-          "contextWindow": 65536,
-          "maxTokens": 8192
-        },
-        {
-          "id": "openai/gpt-oss-120b",
-          "name": "GPT-OSS 120B [vm1]",
-          "reasoning": true,
-          "input": [
-            "text"
-          ],
-          "cost": {
-            "input": 0,
-            "output": 0,
-            "cacheRead": 0,
-            "cacheWrite": 0
-          },
-          "contextWindow": 131072,
-          "maxTokens": 8192
-        },
-        {
           "id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
           "name": "Qwen2.5 Coder 32B [vm1]",
           "reasoning": false,
@@ -467,38 +403,6 @@
           "maxTokens": 8192
         },
         {
-          "id": "openai/gpt-oss-20b",
-          "name": "GPT-OSS 20B [vm2]",
-          "reasoning": false,
-          "input": [
-            "text"
-          ],
-          "cost": {
-            "input": 0,
-            "output": 0,
-            "cacheRead": 0,
-            "cacheWrite": 0
-          },
-          "contextWindow": 65536,
-          "maxTokens": 8192
-        },
-        {
-          "id": "openai/gpt-oss-120b",
-          "name": "GPT-OSS 120B [vm2]",
-          "reasoning": true,
-          "input": [
-            "text"
-          ],
-          "cost": {
-            "input": 0,
-            "output": 0,
-            "cacheRead": 0,
-            "cacheWrite": 0
-          },
-          "contextWindow": 131072,
-          "maxTokens": 8192
-        },
-        {
           "id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
           "name": "Qwen2.5 Coder 32B [vm2]",
           "reasoning": false,