summaryrefslogtreecommitdiff
path: root/lib/hyperstack/watcher.rb
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-05-24 12:38:05 +0300
committerPaul Buetow <paul@buetow.org>2026-05-24 12:38:05 +0300
commit4d6230254d6ee6b677ef225ca9aec64904486a9b (patch)
tree292413b4fe16630a952bd0c1568300d51741c1db /lib/hyperstack/watcher.rb
parent008c30bca834447cad4f0aed427178cd155d4703 (diff)
cleanup: remove ComfyUI and photo-related code from lib/hyperstack
Diffstat (limited to 'lib/hyperstack/watcher.rb')
-rw-r--r--lib/hyperstack/watcher.rb89
1 files changed, 5 insertions, 84 deletions
diff --git a/lib/hyperstack/watcher.rb b/lib/hyperstack/watcher.rb
index c3eee0c..50c032f 100644
--- a/lib/hyperstack/watcher.rb
+++ b/lib/hyperstack/watcher.rb
@@ -20,7 +20,6 @@ module HyperstackVM
CLEAR = "\033[2J\033[H"
# Snapshot of one VM's stats at a point in time.
- # service_type is :vllm or :comfyui — controls which metrics section is rendered.
# loading_status holds the last meaningful log line while vLLM is still initialising;
# it is nil once the Engine 0 stats line starts appearing.
VmSnapshot = Struct.new(
@@ -66,8 +65,7 @@ module HyperstackVM
threads.map(&:value)
end
- # Fetches GPU stats and service stats for a single VM via one SSH session.
- # Routes to fetch_comfyui_vm or fetch_vllm_vm based on config.
+ # Fetches GPU stats and vLLM container stats for a single VM via one SSH session.
def fetch_vm(loader)
config = loader.config
label = File.basename(loader.path, '.toml')
@@ -75,19 +73,14 @@ module HyperstackVM
state = load_state(config.state_file)
unless state
- svc = config.comfyui_install_enabled? ? :comfyui : :vllm
- return VmSnapshot.new(label: label, wg_host: wg_host, service_type: svc,
+ return VmSnapshot.new(label: label, wg_host: wg_host, service_type: :vllm,
vllm_model: nil, container_name: nil,
metrics: nil, gpus: nil,
vllm_error: 'no state file', gpu_error: nil,
loading_status: nil, fetched_at: Time.now)
end
- if config.comfyui_install_enabled?
- fetch_comfyui_vm(config, label, wg_host)
- else
- fetch_vllm_vm(config, label, wg_host, state)
- end
+ fetch_vllm_vm(config, label, wg_host, state)
rescue StandardError => e
VmSnapshot.new(label: label || '?', wg_host: wg_host || '?', service_type: :vllm,
vllm_model: nil, container_name: nil,
@@ -110,64 +103,12 @@ module HyperstackVM
loading_status: loading_status, fetched_at: Time.now)
end
- # Fetches GPU + ComfyUI queue stats for a ComfyUI VM.
- # Returns queue running/pending counts and total outputs produced so far.
- def fetch_comfyui_vm(config, label, wg_host)
- gpus, metrics, ssh_error = fetch_comfyui_stats(config, wg_host, config.comfyui_port)
-
- VmSnapshot.new(label: label, wg_host: wg_host, service_type: :comfyui,
- vllm_model: nil, container_name: nil,
- metrics: metrics, gpus: gpus,
- vllm_error: ssh_error, gpu_error: ssh_error,
- loading_status: nil, fetched_at: Time.now)
- end
-
def load_state(path)
JSON.parse(File.read(path))
rescue Errno::ENOENT, JSON::ParserError
nil
end
- # Single SSH call: nvidia-smi + ComfyUI queue + output file count.
- # Sections separated by sentinel lines so we can split the output cleanly.
- # Returns [gpus, metrics_hash, error_or_nil].
- def fetch_comfyui_stats(config, wg_host, port)
- gpu_query = 'index,name,temperature.gpu,utilization.gpu,power.draw,memory.used,memory.total'
- script = <<~BASH
- nvidia-smi --query-gpu=#{gpu_query} --format=csv,noheader,nounits
- echo ===COMFYUI===
- curl -s http://localhost:#{port}/queue 2>/dev/null
- echo
- echo ===HISTORY===
- curl -s http://localhost:#{port}/history 2>/dev/null | python3 -c \
- "import json,sys; h=json.load(sys.stdin); print(len(h))" 2>/dev/null || echo 0
- BASH
-
- ssh = build_ssh_command(config, wg_host)
- stdout, stderr, status = Open3.capture3(*ssh, stdin_data: script)
- return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"] unless status.success?
-
- gpu_section, rest = stdout.split("===COMFYUI===\n", 2)
- queue_section, hist_section = rest.to_s.split("===HISTORY===\n", 2)
- gpus = parse_nvidia_smi(gpu_section.to_s)
- metrics = parse_comfyui_queue(queue_section.to_s.strip, hist_section.to_s.strip)
- [gpus, metrics, nil]
- end
-
- # Parse ComfyUI /queue JSON into a plain Hash.
- def parse_comfyui_queue(queue_json, history_count_str)
- q = begin
- JSON.parse(queue_json)
- rescue StandardError
- {}
- end
- {
- 'queue_running' => Array(q['queue_running']).size,
- 'queue_pending' => Array(q['queue_pending']).size,
- 'history_count' => history_count_str.to_i
- }
- end
-
# Single SSH call that runs nvidia-smi and tails the vLLM container logs.
# Captures the Engine 0 stats line (present once the model is running) and,
# when that line is absent, the last relevant loading-phase log line so the
@@ -330,12 +271,10 @@ module HyperstackVM
LABEL_W = 10
# Renders a single VM panel as an array of strings (one per display line).
- # Routes the service-specific metrics section based on service_type.
def render_vm(snap)
lines = []
- svc_label = snap.service_type == :comfyui ? "#{DIM}ComfyUI#{RESET}" : ''
- model_label = snap.vllm_model ? DIM + snap.vllm_model.split('/').last + RESET : svc_label
+ model_label = snap.vllm_model ? DIM + snap.vllm_model.split('/').last + RESET : ''
lines << "#{BOLD}#{snap.label}#{RESET} #{DIM}#{snap.wg_host}#{RESET} #{model_label}"
# Both GPU and service stats come from the same SSH call; show one error if it failed.
@@ -349,9 +288,7 @@ module HyperstackVM
lines << bar_row('util', gpu.util_pct)
lines << bar_row('VRAM', mem_pct)
end
- if snap.service_type == :comfyui
- lines.concat(render_comfyui_metrics(snap.metrics))
- elsif snap.metrics&.any?
+ if snap.metrics&.any?
lines.concat(render_vllm_metrics(snap.metrics))
elsif snap.metrics
# Engine stats not yet available — model is still loading.
@@ -366,22 +303,6 @@ module HyperstackVM
lines
end
- # Formats ComfyUI queue stats into display lines.
- def render_comfyui_metrics(m)
- return [" #{DIM}(no ComfyUI stats)#{RESET}"] unless m&.any?
-
- running = m['queue_running'].to_i
- pending = m['queue_pending'].to_i
- history = m['history_count'].to_i
-
- q_str = running > 0 ? "#{GREEN}#{running} running#{RESET}" : "#{DIM}idle#{RESET}"
- q_str += " #{pending} queued" if pending > 0
- [
- row('queue', q_str),
- row('completed', "#{history} jobs total")
- ]
- end
-
# Formats the vLLM engine log stats into display lines.
# All values come directly from the "Engine 0" log line that vLLM emits
# every few seconds, so tok/s figures are the rolling averages vLLM computes