# ai-stack — combined deployment example: caddy + comfyui + ollama +
# open-webui (+ anubis later). One GPU host, one bridge network, one TLS
# entry point.
#
# This is the "full meal" deployment. The minimal "just ComfyUI" example
# lives in the repo root (../../docker-compose.yml).

name: ai-stack

volumes:
  comfyui-models:
  comfyui-custom-nodes:
  comfyui-input:
  comfyui-output:
  comfyui-user:
  ollama-data:
  open-webui-data:
  caddy-data:
  caddy-config:

services:
  # ---------------------------------------------------------------------------
  # Caddy — the only service exposed on 80/443. Terminates TLS (auto Let's
  # Encrypt), reverse-proxies to the in-compose services by name.
  # ---------------------------------------------------------------------------
  caddy:
    image: caddy:${CADDY_TAG:-2-alpine}
    container_name: caddy
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
      - "443:443/udp"     # HTTP/3
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile:ro
      - caddy-data:/data
      - caddy-config:/config
    depends_on:
      - open-webui
      - comfyui
    healthcheck:
      test: ["CMD", "wget", "-qO-", "http://127.0.0.1:2019/config/"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s

  # ---------------------------------------------------------------------------
  # Ollama — LLM daemon, GPU-backed.
  # ---------------------------------------------------------------------------
  ollama:
    image: ollama/ollama:${OLLAMA_TAG:-latest}
    container_name: ollama
    restart: unless-stopped
    # 11434 only published if you want direct access from the VM host.
    # Services inside the stack reach it via http://ollama:11434.
    ports:
      - "11434:11434"
    volumes:
      - ollama-data:/root/.ollama
    environment:
      - OLLAMA_HOST=0.0.0.0:11434
      # KEEP_ALIVE=-1 holds loaded models in VRAM until evicted by another
      # load (vs the default 5m / our previous 30m which forces a reload
      # penalty on every cold use). Pair with MAX_LOADED_MODELS sized to
      # whatever fits in your GPU's VRAM — see README "VRAM sizing".
      - OLLAMA_KEEP_ALIVE=-1
      - OLLAMA_MAX_LOADED_MODELS=3
      - OLLAMA_FLASH_ATTENTION=1
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD-SHELL", "ollama list >/dev/null 2>&1 || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s

  # One-shot model puller. Runs after ollama is healthy, pulls whatever
  # init-models.sh lists, exits. `restart: "no"` keeps it from looping.
  #
  # Models can come from registry.ollama.ai (default) or your own S3
  # mirror (set S3_OLLAMA_BASE in .env; create tarballs with
  # mirror-ollama-model.sh).
  model-init:
    image: ollama/ollama:${OLLAMA_TAG:-latest}
    container_name: ollama-model-init
    depends_on:
      ollama:
        condition: service_healthy
    volumes:
      - ollama-data:/root/.ollama
      - ./init-models.sh:/init-models.sh:ro
    environment:
      - OLLAMA_HOST=ollama:11434
      - S3_OLLAMA_BASE=${S3_OLLAMA_BASE:-}
    entrypoint: ["/bin/sh", "/init-models.sh"]
    restart: "no"

  # ---------------------------------------------------------------------------
  # ComfyUI — image generation (txt2img + img2img), GPU-backed.
  # No host port published — Caddy fronts it. Uncomment 8188 only for
  # VM-local debug or to use ComfyUI's native web UI directly.
  #
  # Replaces the figment + segment + Forge trio. Open WebUI talks to
  # /prompt directly; SAM2/Grounding-DINO masking lives in custom_nodes
  # (install via ComfyUI-Manager) instead of as a separate sidecar.
  # ---------------------------------------------------------------------------
  comfyui:
    image: git.anomalous.dev/alphacentri/comfyui-nvidia:${COMFYUI_IMAGE_TAG:-0.2.1}
    pull_policy: always
    container_name: comfyui
    restart: unless-stopped
    # ports:
    #   - "8188:8188"
    volumes:
      - comfyui-models:/opt/comfyui/models
      - comfyui-custom-nodes:/opt/comfyui/custom_nodes
      - comfyui-input:/opt/comfyui/input
      - comfyui-output:/opt/comfyui/output
      - comfyui-user:/opt/comfyui/user
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://127.0.0.1:8188/system_stats"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s

  # One-shot model puller for ComfyUI. Mounts the same models volume,
  # downloads whatever comfyui-init-models.sh lists, exits. ComfyUI doesn't
  # need to be running for this — files just land on the volume; ComfyUI
  # picks them up next time it scans (or on a restart).
  comfyui-model-init:
    image: alpine:${ALPINE_TAG:-3.20}
    container_name: comfyui-model-init
    volumes:
      - comfyui-models:/models
      - ./comfyui-init-models.sh:/init.sh:ro
    environment:
      # Optional — set in .env to download from gated HuggingFace repos
      # (Flux-dev, SD3, etc.). Leave empty for public-only.
      HF_TOKEN: "${HF_TOKEN:-}"
    entrypoint: ["/bin/sh", "/init.sh"]
    restart: "no"

  # ---------------------------------------------------------------------------
  # Open WebUI — multi-user chat.
  # ---------------------------------------------------------------------------
  open-webui:
    image: ghcr.io/open-webui/open-webui:${OPEN_WEBUI_TAG:-main}
    container_name: open-webui
    restart: unless-stopped
    # ports: not published; Caddy fronts it
    environment:
      ENABLE_OPENAI_API: "false"
      WEBUI_AUTH: "true"
      ENABLE_SIGNUP: "false"
      DEFAULT_USER_ROLE: "pending"
      WEBUI_URL: "${WEBUI_URL}"
      WEBUI_SECRET_KEY: "${WEBUI_SECRET_KEY}"
      OLLAMA_BASE_URL: "http://ollama:11434"
      ENABLE_IMAGE_GENERATION: "true"
      IMAGE_GENERATION_ENGINE: "comfyui"
      COMFYUI_BASE_URL: "http://comfyui:8188"
      IMAGE_SIZE: "1024x1024"
      IMAGE_STEPS: "45"
      # Workflow JSON + node mappings are pasted into the admin panel
      # (Settings → Images). Source files live in this repo under
      # workflows/ — txt2img.json, txt2img.nodes.json, img2img.json,
      # img2img.nodes.json.
    volumes:
      - open-webui-data:/app/backend/data
    depends_on:
      ollama:
        condition: service_healthy
      comfyui:
        condition: service_started
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://127.0.0.1:8080/health"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 60s

  # ---------------------------------------------------------------------------
  # Anubis — PoW anti-bot sidecar(s). One per protected hostname. To enable:
  # uncomment the block, generate a key with `openssl rand -hex 32`, and flip
  # the corresponding `reverse_proxy` target in Caddyfile from
  # `open-webui:8080` → `anubis-owui:8923`.
  # ---------------------------------------------------------------------------
  anubis-owui:
    image: ghcr.io/techarohq/anubis:${ANUBIS_TAG:-latest}
    container_name: anubis-owui
    restart: unless-stopped
    environment:
      BIND: ":8923"
      TARGET: "http://open-webui:8080"
      DIFFICULTY: "4"                          # SHA-256 leading zeros; 4 = ~1s client work
      COOKIE_DOMAIN: "${LLM_URL}"
      METRICS_BIND: ":9090"
      ED25519_PRIVATE_KEY_HEX: "${ANUBIS_OWUI_KEY}"