# ai-stack — combined deployment example: caddy + comfyui + ollama + # open-webui (+ anubis later). One GPU host, one bridge network, one TLS # entry point. # # This is the "full meal" deployment. The minimal "just ComfyUI" example # lives in the repo root (../../docker-compose.yml). name: ai-stack volumes: comfyui-models: comfyui-custom-nodes: comfyui-input: comfyui-output: comfyui-user: ollama-data: open-webui-data: caddy-data: caddy-config: services: # --------------------------------------------------------------------------- # Caddy — the only service exposed on 80/443. Terminates TLS (auto Let's # Encrypt), reverse-proxies to the in-compose services by name. # --------------------------------------------------------------------------- caddy: image: caddy:${CADDY_TAG:-2-alpine} container_name: caddy restart: unless-stopped ports: - "80:80" - "443:443" - "443:443/udp" # HTTP/3 volumes: - ./Caddyfile:/etc/caddy/Caddyfile:ro - caddy-data:/data - caddy-config:/config depends_on: - open-webui - comfyui healthcheck: test: ["CMD", "wget", "-qO-", "http://127.0.0.1:2019/config/"] interval: 30s timeout: 5s retries: 3 start_period: 10s # --------------------------------------------------------------------------- # Ollama — LLM daemon, GPU-backed. # --------------------------------------------------------------------------- ollama: image: ollama/ollama:${OLLAMA_TAG:-latest} container_name: ollama restart: unless-stopped # 11434 only published if you want direct access from the VM host. # Services inside the stack reach it via http://ollama:11434. ports: - "11434:11434" volumes: - ollama-data:/root/.ollama environment: - OLLAMA_HOST=0.0.0.0:11434 # KEEP_ALIVE=-1 holds loaded models in VRAM until evicted by another # load (vs the default 5m / our previous 30m which forces a reload # penalty on every cold use). Pair with MAX_LOADED_MODELS sized to # whatever fits in your GPU's VRAM — see README "VRAM sizing". - OLLAMA_KEEP_ALIVE=-1 - OLLAMA_MAX_LOADED_MODELS=3 - OLLAMA_FLASH_ATTENTION=1 deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ["CMD-SHELL", "ollama list >/dev/null 2>&1 || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 20s # One-shot model puller. Runs after ollama is healthy, pulls whatever # init-models.sh lists, exits. `restart: "no"` keeps it from looping. # # Models can come from registry.ollama.ai (default) or your own S3 # mirror (set S3_OLLAMA_BASE in .env; create tarballs with # mirror-ollama-model.sh). model-init: image: ollama/ollama:${OLLAMA_TAG:-latest} container_name: ollama-model-init depends_on: ollama: condition: service_healthy volumes: - ollama-data:/root/.ollama - ./init-models.sh:/init-models.sh:ro environment: - OLLAMA_HOST=ollama:11434 - S3_OLLAMA_BASE=${S3_OLLAMA_BASE:-} entrypoint: ["/bin/sh", "/init-models.sh"] restart: "no" # --------------------------------------------------------------------------- # ComfyUI — image generation (txt2img + img2img), GPU-backed. # No host port published — Caddy fronts it. Uncomment 8188 only for # VM-local debug or to use ComfyUI's native web UI directly. # # Replaces the figment + segment + Forge trio. Open WebUI talks to # /prompt directly; SAM2/Grounding-DINO masking lives in custom_nodes # (install via ComfyUI-Manager) instead of as a separate sidecar. # --------------------------------------------------------------------------- comfyui: image: git.anomalous.dev/alphacentri/comfyui-nvidia:${COMFYUI_IMAGE_TAG:-0.2.1} pull_policy: always container_name: comfyui restart: unless-stopped # ports: # - "8188:8188" volumes: - comfyui-models:/opt/comfyui/models - comfyui-custom-nodes:/opt/comfyui/custom_nodes - comfyui-input:/opt/comfyui/input - comfyui-output:/opt/comfyui/output - comfyui-user:/opt/comfyui/user deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-sf", "http://127.0.0.1:8188/system_stats"] interval: 30s timeout: 10s retries: 5 start_period: 120s # One-shot model puller for ComfyUI. Mounts the same models volume, # downloads whatever comfyui-init-models.sh lists, exits. ComfyUI doesn't # need to be running for this — files just land on the volume; ComfyUI # picks them up next time it scans (or on a restart). comfyui-model-init: image: alpine:${ALPINE_TAG:-3.20} container_name: comfyui-model-init volumes: - comfyui-models:/models - ./comfyui-init-models.sh:/init.sh:ro environment: # Optional — set in .env to download from gated HuggingFace repos # (Flux-dev, SD3, etc.). Leave empty for public-only. HF_TOKEN: "${HF_TOKEN:-}" entrypoint: ["/bin/sh", "/init.sh"] restart: "no" # --------------------------------------------------------------------------- # Open WebUI — multi-user chat. # --------------------------------------------------------------------------- open-webui: image: ghcr.io/open-webui/open-webui:${OPEN_WEBUI_TAG:-main} container_name: open-webui restart: unless-stopped # ports: not published; Caddy fronts it environment: ENABLE_OPENAI_API: "false" WEBUI_AUTH: "true" ENABLE_SIGNUP: "false" DEFAULT_USER_ROLE: "pending" WEBUI_URL: "${WEBUI_URL}" WEBUI_SECRET_KEY: "${WEBUI_SECRET_KEY}" OLLAMA_BASE_URL: "http://ollama:11434" ENABLE_IMAGE_GENERATION: "true" IMAGE_GENERATION_ENGINE: "comfyui" COMFYUI_BASE_URL: "http://comfyui:8188" IMAGE_SIZE: "1024x1024" IMAGE_STEPS: "45" # Workflow JSON + node mappings are pasted into the admin panel # (Settings → Images). Source files live in this repo under # workflows/ — txt2img.json, txt2img.nodes.json, img2img.json, # img2img.nodes.json. volumes: - open-webui-data:/app/backend/data depends_on: ollama: condition: service_healthy comfyui: condition: service_started healthcheck: test: ["CMD", "curl", "-sf", "http://127.0.0.1:8080/health"] interval: 30s timeout: 5s retries: 3 start_period: 60s # --------------------------------------------------------------------------- # Anubis — PoW anti-bot sidecar(s). One per protected hostname. To enable: # uncomment the block, generate a key with `openssl rand -hex 32`, and flip # the corresponding `reverse_proxy` target in Caddyfile from # `open-webui:8080` → `anubis-owui:8923`. # --------------------------------------------------------------------------- anubis-owui: image: ghcr.io/techarohq/anubis:${ANUBIS_TAG:-latest} container_name: anubis-owui restart: unless-stopped environment: BIND: ":8923" TARGET: "http://open-webui:8080" DIFFICULTY: "4" # SHA-256 leading zeros; 4 = ~1s client work COOKIE_DOMAIN: "${LLM_URL}" METRICS_BIND: ":9090" ED25519_PRIVATE_KEY_HEX: "${ANUBIS_OWUI_KEY}"