diff options
| author | Paul Buetow <paul@buetow.org> | 2026-05-24 12:38:05 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-05-24 12:38:05 +0300 |
| commit | 4d6230254d6ee6b677ef225ca9aec64904486a9b (patch) | |
| tree | 292413b4fe16630a952bd0c1568300d51741c1db /lib/hyperstack | |
| parent | 008c30bca834447cad4f0aed427178cd155d4703 (diff) | |
cleanup: remove ComfyUI and photo-related code from lib/hyperstack
Diffstat (limited to 'lib/hyperstack')
| -rw-r--r-- | lib/hyperstack/cli.rb | 11 | ||||
| -rw-r--r-- | lib/hyperstack/config.rb | 45 | ||||
| -rw-r--r-- | lib/hyperstack/manager.rb | 59 | ||||
| -rw-r--r-- | lib/hyperstack/provisioning.rb | 129 | ||||
| -rw-r--r-- | lib/hyperstack/watcher.rb | 89 |
5 files changed, 18 insertions, 315 deletions
diff --git a/lib/hyperstack/cli.rb b/lib/hyperstack/cli.rb index b1f0ca6..9be78b0 100644 --- a/lib/hyperstack/cli.rb +++ b/lib/hyperstack/cli.rb @@ -130,7 +130,7 @@ module HyperstackVM # (create-both), the --model flag is not registered because each VM uses its own # TOML default. Returns a hash suitable for splatting into Manager#create. def parse_create_options(argv, include_model_preset: true) - opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil, + opts = { replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil } OptionParser.new do |o| o.on('--replace', 'Delete the tracked VM before creating a new one') { opts[:replace] = true } @@ -139,8 +139,6 @@ module HyperstackVM o.on('--no-vllm', 'Disable vLLM setup (overrides config)') { opts[:install_vllm] = false } o.on('--ollama', 'Enable Ollama setup (overrides config)') { opts[:install_ollama] = true } o.on('--no-ollama', 'Disable Ollama setup (overrides config)') { opts[:install_ollama] = false } - o.on('--comfyui', 'Enable ComfyUI setup (overrides config)') { opts[:install_comfyui] = true } - o.on('--no-comfyui', 'Disable ComfyUI setup (overrides config)') { opts[:install_comfyui] = false } if include_model_preset o.on('--model PRESET', 'Use a named vLLM preset at create time') do |v| opts[:vllm_preset] = v @@ -238,8 +236,7 @@ module HyperstackVM candidates = [ @config_path, File.join(REPO_ROOT, 'hyperstack-vm1.toml'), - File.join(REPO_ROOT, 'hyperstack-vm2.toml'), - File.join(REPO_ROOT, 'hyperstack-vm-photo.toml') + File.join(REPO_ROOT, 'hyperstack-vm2.toml') ].uniq.select { |path| File.exist?(path) } loaders = candidates.map { |path| ConfigLoader.load(path) } @@ -259,7 +256,7 @@ module HyperstackVM # VM2 adds its peer. A Mutex+ConditionVariable acts as a one-shot latch between threads. # If VM1 fails before reaching the WG step the latch is still released so VM2 doesn't hang. # vllm_preset is accepted but ignored — each VM uses its own TOML default preset. - def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, install_comfyui: nil, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument + def run_create_both(replace:, dry_run:, install_vllm:, install_ollama:, vllm_preset: nil) # rubocop:disable Lint/UnusedMethodArgument vm1_loader, vm2_loader = pair_config_loaders vm1_config = vm1_loader.config vm2_config = vm2_loader.config @@ -293,7 +290,7 @@ module HyperstackVM errors = {} errors_mutex = Mutex.new create_opts = { replace: replace, dry_run: dry_run, - install_vllm: install_vllm, install_ollama: install_ollama, install_comfyui: install_comfyui } + install_vllm: install_vllm, install_ollama: install_ollama } vm1_thread = Thread.new do manager1.create(**create_opts) diff --git a/lib/hyperstack/config.rb b/lib/hyperstack/config.rb index 076cbed..ba143e7 100644 --- a/lib/hyperstack/config.rb +++ b/lib/hyperstack/config.rb @@ -93,19 +93,6 @@ module HyperstackVM 'tensor_parallel_size' => 1, 'tool_call_parser' => 'qwen3_coder' }, - 'comfyui' => { - 'install' => false, - 'port' => 8188, - 'models_dir' => '/ephemeral/comfyui/models', - 'output_dir' => '/ephemeral/comfyui/output', - 'container_name' => 'comfyui', - # Models to pre-download: Real-ESRGAN for fast upscaling, SUPIR for deep restoration. - 'models' => [] - }, - 'wireguard' => { - 'auto_setup' => true, - 'setup_script' => './wg1-setup.sh' - }, 'local_client' => { 'check_wg1_service' => true, 'interface_name' => 'wg1', @@ -114,7 +101,7 @@ module HyperstackVM }.freeze def validate! - %w[auth hyperstack state vm ssh network bootstrap ollama vllm comfyui wireguard local_client].each do |section| + %w[auth hyperstack state vm ssh network bootstrap ollama vllm wireguard local_client].each do |section| raise Error, "Missing config section [#{section}]" unless @data.key?(section) end @@ -498,31 +485,6 @@ module HyperstackVM } end - def comfyui_install_enabled? - truthy?(fetch('comfyui', 'install')) - end - - def comfyui_port - Integer(fetch('comfyui', 'port')) - end - - def comfyui_models_dir - fetch('comfyui', 'models_dir') - end - - def comfyui_output_dir - fetch('comfyui', 'output_dir') - end - - def comfyui_container_name - fetch('comfyui', 'container_name') - end - - # Models to pre-download during provisioning (e.g. RealESRGAN_x4plus, SUPIR-v0Q). - def comfyui_models - Array(fetch('comfyui', 'models')).map(&:to_s) - end - def local_client_checks_enabled? truthy?(fetch('local_client', 'check_wg1_service')) end @@ -543,8 +505,7 @@ module HyperstackVM expand_path(fetch('wireguard', 'setup_script')) end - def desired_security_rules(include_ollama: ollama_install_enabled?, include_vllm: vllm_install_enabled?, - include_comfyui: comfyui_install_enabled?) + def desired_security_rules(include_ollama: ollama_install_enabled?, include_vllm: vllm_install_enabled?) rules = [] allowed_ssh_cidrs.each do |cidr| @@ -556,8 +517,6 @@ module HyperstackVM end rules << firewall_rule('tcp', ollama_port, wireguard_subnet) if include_ollama || include_vllm - # ComfyUI REST API on its own port, restricted to the WireGuard subnet. - rules << firewall_rule('tcp', comfyui_port, wireguard_subnet) if include_comfyui rules.uniq end diff --git a/lib/hyperstack/manager.rb b/lib/hyperstack/manager.rb index 39b8767..7a68199 100644 --- a/lib/hyperstack/manager.rb +++ b/lib/hyperstack/manager.rb @@ -28,12 +28,11 @@ module HyperstackVM @wg_setup_post = wg_setup_post end - def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, install_comfyui: nil, + def create(replace: false, dry_run: false, install_vllm: nil, install_ollama: nil, vllm_preset: nil) # CLI flags override config; nil means "use config default". @effective_vllm = install_vllm.nil? ? @config.vllm_install_enabled? : install_vllm @effective_ollama = install_ollama.nil? ? @config.ollama_install_enabled? : install_ollama - @effective_comfyui = install_comfyui.nil? ? @config.comfyui_install_enabled? : install_comfyui # Validate preset name early so we fail before touching any remote state. @effective_vllm_preset = vllm_preset @config.vllm_preset(vllm_preset) if vllm_preset @@ -140,18 +139,12 @@ module HyperstackVM missing_rules = desired - current vllm_enabled = state_vllm_enabled?(state) ollama_enabled = state_ollama_enabled?(state) - comfyui_enabled = state_comfyui_enabled?(state) info "Tracked VM: #{state['vm_id']} #{vm['name']}" info "Status: #{vm['status']} / #{vm['vm_state']}" info "Public IP: #{connect_host_for(vm) || 'none'}" - info "Service mode: #{service_mode_summary(vllm_enabled: vllm_enabled, ollama_enabled: ollama_enabled, - comfyui_enabled: comfyui_enabled)}" + info "Service mode: #{service_mode_summary(vllm_enabled: vllm_enabled, ollama_enabled: ollama_enabled)}" info "Active model: #{state['vllm_model'] || @config.vllm_model}" if vllm_enabled - if comfyui_enabled - wg_ip = @config.wireguard_gateway_hostname - info "ComfyUI: http://#{wg_ip}:#{@config.comfyui_port}" - end info "Missing firewall rules: #{missing_rules.empty? ? 'none' : missing_rules.size}" rescue Error => e warn "Unable to load VM #{state['vm_id']}: #{e.message}" @@ -262,7 +255,6 @@ module HyperstackVM state['bootstrapped_at'].nil? || ollama_setup_needed?(state) || vllm_setup_needed?(state) || - comfyui_setup_needed?(state) || wireguard_setup_needed?(state) ) end @@ -329,15 +321,6 @@ module HyperstackVM @state_store.save(state) end - # Set up ComfyUI after the tunnel is up so model downloads are visible locally. - if comfyui_setup_needed?(state) - @provisioner.install_comfyui(state['public_ip']) - state['comfyui_setup_at'] = Time.now.utc.iso8601 - state['comfyui_container_name'] = @config.comfyui_container_name - state['comfyui_models'] = @config.comfyui_models - @state_store.save(state) - end - vm = @client.get_vm(vm_id) state['security_rules'] = Array(vm['security_rules']).map { |rule| normalize_rule(rule) } state['status'] = vm['status'] @@ -349,7 +332,6 @@ module HyperstackVM print_local_wireguard_summary(state['public_ip']) # Run end-to-end tests automatically so the human doesn't need a manual step. test - info " Enhance: ruby photo-enhance.rb --config #{File.basename(@config.path)} --indir ~/Pictures --outdir ~/Pictures/enhanced" end def build_create_payload(vm_name, resolved) @@ -718,21 +700,17 @@ module HyperstackVM def sync_service_mode_state(state) state['services'] = { 'vllm_enabled' => effective_vllm?, - 'ollama_enabled' => effective_ollama?, - 'comfyui_enabled' => effective_comfyui? + 'ollama_enabled' => effective_ollama? } end - def desired_security_rules(include_vllm: effective_vllm?, include_ollama: effective_ollama?, - include_comfyui: effective_comfyui?) - @config.desired_security_rules(include_vllm: include_vllm, include_ollama: include_ollama, - include_comfyui: include_comfyui) + def desired_security_rules(include_vllm: effective_vllm?, include_ollama: effective_ollama?) + @config.desired_security_rules(include_vllm: include_vllm, include_ollama: include_ollama) end def desired_security_rules_for_state(state) desired_security_rules(include_vllm: state_vllm_enabled?(state), - include_ollama: state_ollama_enabled?(state), - include_comfyui: state_comfyui_enabled?(state)) + include_ollama: state_ollama_enabled?(state)) end def legacy_litellm_rules(rules) @@ -763,20 +741,10 @@ module HyperstackVM @config.ollama_install_enabled? end - def state_comfyui_enabled?(state) - recorded = state&.dig('services', 'comfyui_enabled') - return recorded unless recorded.nil? - - return true if state&.key?('comfyui_setup_at') - - @config.comfyui_install_enabled? - end - - def service_mode_summary(vllm_enabled:, ollama_enabled:, comfyui_enabled: false) + def service_mode_summary(vllm_enabled:, ollama_enabled:) parts = [] parts << 'vLLM' if vllm_enabled parts << 'Ollama' if ollama_enabled - parts << 'ComfyUI' if comfyui_enabled return 'All inference services disabled' if parts.empty? "#{parts.join(', ')} enabled" @@ -938,19 +906,6 @@ module HyperstackVM state['vllm_model'] != desired end - # Returns the effective ComfyUI flag: CLI override if set, else config default. - def effective_comfyui? - defined?(@effective_comfyui) ? @effective_comfyui : @config.comfyui_install_enabled? - end - - def comfyui_setup_needed?(state) - return false unless effective_comfyui? - return true if state['comfyui_setup_at'].nil? - - # Re-run if the desired model list changed since last provision. - (@config.comfyui_models.sort != Array(state['comfyui_models']).sort) - end - # Tests the vLLM OpenAI-compatible API: lists loaded models and runs a # short inference request to confirm the model accepts requests. def test_vllm(wg_ip) diff --git a/lib/hyperstack/provisioning.rb b/lib/hyperstack/provisioning.rb index 4d33731..0b56559 100644 --- a/lib/hyperstack/provisioning.rb +++ b/lib/hyperstack/provisioning.rb @@ -34,10 +34,6 @@ module HyperstackVM script << "sudo ufw allow #{@config.wireguard_udp_port}/udp comment 'WireGuard #{@config.local_interface_name}' >/dev/null 2>&1 || true" # Port 11434 is shared by Ollama and vLLM; open for both regardless of which is installed. script << "sudo ufw allow from #{Shellwords.escape(@config.wireguard_subnet)} to any port #{@config.ollama_port} proto tcp comment 'Inference API (Ollama/vLLM) via #{@config.local_interface_name}' >/dev/null 2>&1 || true" - # ComfyUI REST API on port 8188; only open when ComfyUI is enabled. - if @config.comfyui_install_enabled? - script << "sudo ufw allow from #{Shellwords.escape(@config.wireguard_subnet)} to any port #{@config.comfyui_port} proto tcp comment 'ComfyUI API via #{@config.local_interface_name}' >/dev/null 2>&1 || true" - end end if @config.configure_ollama_host? @@ -249,125 +245,6 @@ module HyperstackVM script.join("\n") end - def comfyui_install_script - models_dir = @config.comfyui_models_dir - output_dir = @config.comfyui_output_dir - port = @config.comfyui_port - model_names = @config.comfyui_models - # Use ubuntu home dir to avoid /opt permission issues when running as the SSH user. - install_dir = '/home/ubuntu/ComfyUI' - venv_dir = '/home/ubuntu/comfyui-venv' - service = 'comfyui' - - script = [] - script << 'set -euo pipefail' - - # Wait for apt locks released by unattended-upgrades before touching packages. - script << 'for i in $(seq 1 30); do' - script << ' if ! fuser /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock >/dev/null 2>&1; then break; fi' - script << ' echo " apt lock held, waiting ($i/30)..."; sleep 10' - script << 'done' - script << 'sudo pkill -f unattended-upgrade >/dev/null 2>&1 || true' - - # Install system deps: git, python venv, wget. - script << 'sudo apt-get update -qq' - script << 'sudo apt-get install -y -qq git python3-venv python3-pip wget' - - # Ephemeral NVMe dirs for models and output. - script << "sudo mkdir -p #{Shellwords.escape(models_dir)} #{Shellwords.escape(output_dir)}" - script << "sudo chmod -R 0777 #{Shellwords.escape(models_dir)} #{Shellwords.escape(output_dir)}" - - # Clone or update ComfyUI from the official repo (no sudo needed in ubuntu home). - script << "if [ ! -d #{Shellwords.escape(install_dir)} ]; then" - script << " git clone --depth 1 https://github.com/comfyanonymous/ComfyUI #{Shellwords.escape(install_dir)}" - script << 'else' - script << " git -C #{Shellwords.escape(install_dir)} pull --ff-only" - script << 'fi' - - # Create Python venv and install PyTorch + ComfyUI deps. - # CUDA 12.8 is installed on the VM; cu128 wheel index covers it. - script << "[ -d #{Shellwords.escape(venv_dir)} ] || python3 -m venv #{Shellwords.escape(venv_dir)}" - script << "#{venv_dir}/bin/pip install --quiet --upgrade pip" - script << "#{venv_dir}/bin/pip install --quiet torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128" - script << "#{venv_dir}/bin/pip install --quiet -r #{Shellwords.escape("#{install_dir}/requirements.txt")}" - - # Symlink ephemeral model/output dirs into the ComfyUI directory tree. - script << "rm -rf #{Shellwords.escape("#{install_dir}/models")} && ln -sfn #{Shellwords.escape(models_dir)} #{Shellwords.escape("#{install_dir}/models")}" - script << "rm -rf #{Shellwords.escape("#{install_dir}/output")} && ln -sfn #{Shellwords.escape(output_dir)} #{Shellwords.escape("#{install_dir}/output")}" - - # Systemd service so ComfyUI starts on reboot. - script << "cat <<'UNIT' | sudo tee /etc/systemd/system/#{Shellwords.escape(service)}.service >/dev/null" - script << '[Unit]' - script << 'Description=ComfyUI photo enhancement server' - script << 'After=network.target' - script << '[Service]' - script << "ExecStart=#{venv_dir}/bin/python #{install_dir}/main.py --listen 0.0.0.0 --port #{port} --output-directory #{output_dir}" - script << 'Restart=on-failure' - script << 'RestartSec=5' - script << "WorkingDirectory=#{install_dir}" - script << 'Environment=HOME=/root' - script << '[Install]' - script << 'WantedBy=multi-user.target' - script << 'UNIT' - script << 'sudo systemctl daemon-reload' - script << "sudo systemctl enable --now #{Shellwords.escape(service)}" - script << "sudo systemctl restart #{Shellwords.escape(service)}" - - # Wait for ComfyUI API to respond (model loading and CUDA init can take ~60s). - script << 'echo "Waiting for ComfyUI to become ready (up to 5 min)..."' - script << 'for i in $(seq 1 60); do' - script << " if curl -sf http://localhost:#{port}/system_stats >/dev/null 2>&1; then echo comfyui-ready; break; fi" - script << ' echo " ComfyUI not ready yet ($i/60)..."; sleep 5' - script << 'done' - script << "curl -sf http://localhost:#{port}/system_stats >/dev/null || { echo 'FATAL: ComfyUI did not become ready within 5 minutes'; exit 1; }" - - # Install ComfyUI-SUPIR custom node (provides SUPIR_Upscale and related nodes). - supir_node_dir = "#{install_dir}/custom_nodes/ComfyUI-SUPIR" - script << "if [ ! -d #{Shellwords.escape(supir_node_dir)} ]; then" - script << " git clone --depth 1 https://github.com/kijai/ComfyUI-SUPIR #{Shellwords.escape(supir_node_dir)}" - script << " #{venv_dir}/bin/pip install --quiet -r #{Shellwords.escape("#{supir_node_dir}/requirements.txt")}" - script << 'fi' - - # Download model weights into the ComfyUI subdirectories. - # Real-ESRGAN → upscale_models/; SUPIR → checkpoints/; SDXL base → checkpoints/. - model_names.each do |model_name| - case model_name - when /RealESRGAN/i - dest_dir = "#{models_dir}/upscale_models" - url = if model_name =~ /anime/i - 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth' - else - 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth' - end - dest_file = "#{dest_dir}/#{model_name}.pth" - script << "mkdir -p #{Shellwords.escape(dest_dir)}" - script << "[ -f #{Shellwords.escape(dest_file)} ] || wget -q --show-progress -O #{Shellwords.escape(dest_file)} #{Shellwords.escape(url)}" - when /SUPIR/i - # SUPIR-v0Q (~5 GB): AI photo restoration backbone (denoising + detail recovery). - # SDXL base (~7 GB): provides CLIP encoders that SUPIR uses for text conditioning. - # Both must live in checkpoints/ so SUPIR_Upscale can find them by filename. - dest_dir = "#{models_dir}/checkpoints" - hf_file = model_name.end_with?('F') ? 'SUPIR-v0F.ckpt' : 'SUPIR-v0Q.ckpt' - supir_url = "https://huggingface.co/camenduru/SUPIR/resolve/main/#{hf_file}" - sdxl_url = 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors' - script << "mkdir -p #{Shellwords.escape(dest_dir)}" - script << "[ -f #{Shellwords.escape("#{dest_dir}/#{hf_file}")} ] || wget -q --show-progress -O #{Shellwords.escape("#{dest_dir}/#{hf_file}")} #{Shellwords.escape(supir_url)}" - script << "[ -f #{Shellwords.escape("#{dest_dir}/sd_xl_base_1.0.safetensors")} ] || wget -q --show-progress -O #{Shellwords.escape("#{dest_dir}/sd_xl_base_1.0.safetensors")} #{Shellwords.escape(sdxl_url)}" - end - end - - # Restart ComfyUI so it picks up the new custom nodes and model files. - script << "sudo systemctl restart #{Shellwords.escape(service)}" - script << 'echo "Waiting for ComfyUI restart..."' - script << 'for i in $(seq 1 60); do' - script << " if curl -sf http://localhost:#{port}/system_stats >/dev/null 2>&1; then echo comfyui-ready; break; fi" - script << ' echo " ComfyUI not ready yet ($i/60)..."; sleep 5' - script << 'done' - - script << 'echo comfyui-install-ok' - script.join("\n") - end - def litellm_decommission_script script = [] script << 'set -euo pipefail' @@ -457,12 +334,6 @@ module HyperstackVM install_vllm(host, preset_config: preset_config) end - def install_comfyui(host) - info "Setting up ComfyUI Docker container on #{host}..." - output, status = @ssh_stream_runner.call(host, @scripts.comfyui_install_script) - raise Error, "ComfyUI install failed: #{output.strip}" unless status.success? - end - private def verify_remote_models(host) diff --git a/lib/hyperstack/watcher.rb b/lib/hyperstack/watcher.rb index c3eee0c..50c032f 100644 --- a/lib/hyperstack/watcher.rb +++ b/lib/hyperstack/watcher.rb @@ -20,7 +20,6 @@ module HyperstackVM CLEAR = "\033[2J\033[H" # Snapshot of one VM's stats at a point in time. - # service_type is :vllm or :comfyui — controls which metrics section is rendered. # loading_status holds the last meaningful log line while vLLM is still initialising; # it is nil once the Engine 0 stats line starts appearing. VmSnapshot = Struct.new( @@ -66,8 +65,7 @@ module HyperstackVM threads.map(&:value) end - # Fetches GPU stats and service stats for a single VM via one SSH session. - # Routes to fetch_comfyui_vm or fetch_vllm_vm based on config. + # Fetches GPU stats and vLLM container stats for a single VM via one SSH session. def fetch_vm(loader) config = loader.config label = File.basename(loader.path, '.toml') @@ -75,19 +73,14 @@ module HyperstackVM state = load_state(config.state_file) unless state - svc = config.comfyui_install_enabled? ? :comfyui : :vllm - return VmSnapshot.new(label: label, wg_host: wg_host, service_type: svc, + return VmSnapshot.new(label: label, wg_host: wg_host, service_type: :vllm, vllm_model: nil, container_name: nil, metrics: nil, gpus: nil, vllm_error: 'no state file', gpu_error: nil, loading_status: nil, fetched_at: Time.now) end - if config.comfyui_install_enabled? - fetch_comfyui_vm(config, label, wg_host) - else - fetch_vllm_vm(config, label, wg_host, state) - end + fetch_vllm_vm(config, label, wg_host, state) rescue StandardError => e VmSnapshot.new(label: label || '?', wg_host: wg_host || '?', service_type: :vllm, vllm_model: nil, container_name: nil, @@ -110,64 +103,12 @@ module HyperstackVM loading_status: loading_status, fetched_at: Time.now) end - # Fetches GPU + ComfyUI queue stats for a ComfyUI VM. - # Returns queue running/pending counts and total outputs produced so far. - def fetch_comfyui_vm(config, label, wg_host) - gpus, metrics, ssh_error = fetch_comfyui_stats(config, wg_host, config.comfyui_port) - - VmSnapshot.new(label: label, wg_host: wg_host, service_type: :comfyui, - vllm_model: nil, container_name: nil, - metrics: metrics, gpus: gpus, - vllm_error: ssh_error, gpu_error: ssh_error, - loading_status: nil, fetched_at: Time.now) - end - def load_state(path) JSON.parse(File.read(path)) rescue Errno::ENOENT, JSON::ParserError nil end - # Single SSH call: nvidia-smi + ComfyUI queue + output file count. - # Sections separated by sentinel lines so we can split the output cleanly. - # Returns [gpus, metrics_hash, error_or_nil]. - def fetch_comfyui_stats(config, wg_host, port) - gpu_query = 'index,name,temperature.gpu,utilization.gpu,power.draw,memory.used,memory.total' - script = <<~BASH - nvidia-smi --query-gpu=#{gpu_query} --format=csv,noheader,nounits - echo ===COMFYUI=== - curl -s http://localhost:#{port}/queue 2>/dev/null - echo - echo ===HISTORY=== - curl -s http://localhost:#{port}/history 2>/dev/null | python3 -c \ - "import json,sys; h=json.load(sys.stdin); print(len(h))" 2>/dev/null || echo 0 - BASH - - ssh = build_ssh_command(config, wg_host) - stdout, stderr, status = Open3.capture3(*ssh, stdin_data: script) - return [nil, nil, "exit #{status.exitstatus}: #{stderr.strip}"] unless status.success? - - gpu_section, rest = stdout.split("===COMFYUI===\n", 2) - queue_section, hist_section = rest.to_s.split("===HISTORY===\n", 2) - gpus = parse_nvidia_smi(gpu_section.to_s) - metrics = parse_comfyui_queue(queue_section.to_s.strip, hist_section.to_s.strip) - [gpus, metrics, nil] - end - - # Parse ComfyUI /queue JSON into a plain Hash. - def parse_comfyui_queue(queue_json, history_count_str) - q = begin - JSON.parse(queue_json) - rescue StandardError - {} - end - { - 'queue_running' => Array(q['queue_running']).size, - 'queue_pending' => Array(q['queue_pending']).size, - 'history_count' => history_count_str.to_i - } - end - # Single SSH call that runs nvidia-smi and tails the vLLM container logs. # Captures the Engine 0 stats line (present once the model is running) and, # when that line is absent, the last relevant loading-phase log line so the @@ -330,12 +271,10 @@ module HyperstackVM LABEL_W = 10 # Renders a single VM panel as an array of strings (one per display line). - # Routes the service-specific metrics section based on service_type. def render_vm(snap) lines = [] - svc_label = snap.service_type == :comfyui ? "#{DIM}ComfyUI#{RESET}" : '' - model_label = snap.vllm_model ? DIM + snap.vllm_model.split('/').last + RESET : svc_label + model_label = snap.vllm_model ? DIM + snap.vllm_model.split('/').last + RESET : '' lines << "#{BOLD}#{snap.label}#{RESET} #{DIM}#{snap.wg_host}#{RESET} #{model_label}" # Both GPU and service stats come from the same SSH call; show one error if it failed. @@ -349,9 +288,7 @@ module HyperstackVM lines << bar_row('util', gpu.util_pct) lines << bar_row('VRAM', mem_pct) end - if snap.service_type == :comfyui - lines.concat(render_comfyui_metrics(snap.metrics)) - elsif snap.metrics&.any? + if snap.metrics&.any? lines.concat(render_vllm_metrics(snap.metrics)) elsif snap.metrics # Engine stats not yet available — model is still loading. @@ -366,22 +303,6 @@ module HyperstackVM lines end - # Formats ComfyUI queue stats into display lines. - def render_comfyui_metrics(m) - return [" #{DIM}(no ComfyUI stats)#{RESET}"] unless m&.any? - - running = m['queue_running'].to_i - pending = m['queue_pending'].to_i - history = m['history_count'].to_i - - q_str = running > 0 ? "#{GREEN}#{running} running#{RESET}" : "#{DIM}idle#{RESET}" - q_str += " #{pending} queued" if pending > 0 - [ - row('queue', q_str), - row('completed', "#{history} jobs total") - ] - end - # Formats the vLLM engine log stats into display lines. # All values come directly from the "Engine 0" log line that vLLM emits # every few seconds, so tok/s figures are the rolling averages vLLM computes |
