nemotron-super: use qwen3_xml tool call parser — same XML format, works

Both Nemotron and Qwen3-XML use identical <tool_call><function=name> <parameter=p>value</parameter></function></tool_call> format. qwen3_xml correctly parses Nemotron's output; tool calling now works with opencode and other API clients. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-03-18 17:47:45 +0200
committer: Paul Buetow <paul@buetow.org> 2026-03-18 17:47:45 +0200
commit: 1122c9373cadb90d28b8d588e73f84b86237fd15 (patch)
tree: 757cdf425cedd54eb4fa82b211b4c67f4996f2d2 /snippets/hyperstack/hyperstack-vm.toml
parent: a7d3d2d4339815cf4a39b58873069b07a0ac1d47 (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/snippets/hyperstack/hyperstack-vm.toml b/snippets/hyperstack/hyperstack-vm.toml
index e739d5f..4e47a76 100644
--- a/snippets/hyperstack/hyperstack-vm.toml
+++ b/snippets/hyperstack/hyperstack-vm.toml
@@ -94,16 +94,16 @@ tool_call_parser = "qwen3_coder"
 # Requires trust_remote_code=true for the nemotron_h architecture.
 # Note: cyankiwi AWQ has model_type="nemotron_nas" (underscore); vLLM keys on "nemotron-nas"
 # (hyphen), so vLLM may not recognise it without trust_remote_code and latest vLLM.
-# Tool calling: Nemotron uses a custom XML format (<tool_call><function=...><parameter=...>)
-# not supported by any vLLM 0.17.1 built-in parser. tool_call_parser="" disables tool calling.
-# Use for long-context analysis and reasoning; switch to qwen3-coder-next for agentic work.
+# NVIDIA Nemotron-3-Super uses the same XML tool call format as Qwen3 XML:
+# <tool_call><function=name><parameter=p>value</parameter></function></tool_call>
+# qwen3_xml handles this format and is compatible with Nemotron's chat template.
 [vllm.presets.nemotron-super]
 model = "cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit"
 container_name = "vllm_nemotron_super"
 max_model_len = 65536
 gpu_memory_utilization = 0.92
 tensor_parallel_size = 1
-tool_call_parser = ""
+tool_call_parser = "qwen3_xml"
 trust_remote_code = true
 # nemotron_v3 reasoning parser exposes <think> tokens as reasoning_content in the API.
 extra_vllm_args = ["--reasoning-parser", "nemotron_v3"]
author	Paul Buetow <paul@buetow.org>	2026-03-18 17:47:45 +0200
committer	Paul Buetow <paul@buetow.org>	2026-03-18 17:47:45 +0200
commit	1122c9373cadb90d28b8d588e73f84b86237fd15 (patch)
tree	757cdf425cedd54eb4fa82b211b4c67f4996f2d2 /snippets/hyperstack/hyperstack-vm.toml
parent	a7d3d2d4339815cf4a39b58873069b07a0ac1d47 (diff)