Open WebUI 0.9.0 made every model-class accessor (Users.get_user_by_id,
Chats.get_chat_by_id, Files.get_file_by_id, …) a coroutine. Both tools
were still calling them synchronously, so the calls returned coroutines
instead of model objects; the first downstream attribute access threw,
the bare `except Exception: return False` swallowed it, and uploads
silently fell through to the data-URI fallback. The data-URI markdown
rendered during streaming but didn't survive post-stream commit, which
looked like "image flashes in, then disappears."
Add await to the six call sites; promote `_read_file_dict` to async
since it now contains an await; restore `_push_image_to_chat` to the
canonical `files` event so the file-attachment chrome (thumbnail +
download) comes back.
This supersedes commit d034700, which mis-diagnosed the symptom as a
virtualization regression and switched to a `message`-event markdown
workaround. The workaround didn't help (same flash-and-vanish) because
the upload pre-check still failed for the same async-migration reason
and the data-URI fallback path still ran.
smart_image_gen.py 0.7.9 -> 0.7.10
smart_image_pipe.py 0.1.1 -> 0.1.2
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
613 lines
25 KiB
Python
613 lines
25 KiB
Python
"""
|
|
title: Smart Image Studio (Pipe)
|
|
author: ai-stack
|
|
version: 0.1.2
|
|
description: Deterministic image-gen / edit / inpaint pipe — no LLM in the
|
|
loop for the routing decision. Registers as a model in the chat-model
|
|
dropdown ('Image Studio (Pipe)'). Reads the user's message + attached
|
|
image (if any), routes via regex, calls ComfyUI directly, returns the
|
|
image. Use when LLM-with-Tool tool-calling is leaking the call as text
|
|
(the abliterated Qwen 3.5 / Open WebUI parser interop bug).
|
|
required_open_webui_version: 0.5.0
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
import inspect
|
|
import io
|
|
import json
|
|
import re
|
|
import time
|
|
import uuid
|
|
from typing import Awaitable, Callable, Literal, Optional
|
|
|
|
import aiohttp
|
|
from pydantic import BaseModel, Field
|
|
|
|
# Open WebUI runtime imports — same defensive guard as the sibling Tool.
|
|
try:
|
|
from fastapi import UploadFile
|
|
from open_webui.models.chats import Chats
|
|
from open_webui.models.files import Files
|
|
from open_webui.models.users import Users
|
|
from open_webui.routers.files import upload_file_handler
|
|
|
|
_OPENWEBUI_RUNTIME = True
|
|
except ImportError:
|
|
_OPENWEBUI_RUNTIME = False
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Per-style settings — kept in sync with smart_image_gen.py. If you change
|
|
# checkpoint filenames in comfyui-init-models.sh, update both files.
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
STYLES = {
|
|
"photo": {
|
|
"ckpt": "CyberRealisticXLPlay_V8.0_FP16.safetensors",
|
|
"sampler": "dpmpp_2m_sde",
|
|
"scheduler": "karras",
|
|
"cfg": 4.0, "steps": 28, "clip_skip": 1,
|
|
"prefix": "",
|
|
"negative": (
|
|
"cartoon, drawing, illustration, anime, manga, painting, sketch, "
|
|
"render, 3d, cgi, plastic skin, oversaturated, "
|
|
"lowres, blurry, jpeg artifacts, low quality, worst quality, "
|
|
"bad anatomy, deformed, extra fingers, missing fingers, "
|
|
"watermark, signature, text, logo"
|
|
),
|
|
},
|
|
"juggernaut": {
|
|
"ckpt": "Juggernaut-XL_v9_RunDiffusionPhoto_v2.safetensors",
|
|
"sampler": "dpmpp_2m_sde",
|
|
"scheduler": "karras",
|
|
"cfg": 4.5, "steps": 35, "clip_skip": 1,
|
|
"prefix": "",
|
|
"negative": (
|
|
"cartoon, drawing, illustration, anime, painting, sketch, render, "
|
|
"3d, cgi, plastic skin, washed out, "
|
|
"lowres, blurry, jpeg artifacts, low quality, worst quality, "
|
|
"bad anatomy, deformed, extra fingers, missing fingers, "
|
|
"watermark, signature, text, logo"
|
|
),
|
|
},
|
|
"pony": {
|
|
"ckpt": "ponyDiffusionV6XL_v6StartWithThisOne.safetensors",
|
|
"sampler": "euler_ancestral",
|
|
"scheduler": "normal",
|
|
"cfg": 7.5, "steps": 25, "clip_skip": 2,
|
|
"prefix": "score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up, ",
|
|
"negative": (
|
|
"score_6, score_5, score_4, "
|
|
"worst quality, low quality, lowres, blurry, jpeg artifacts, "
|
|
"bad anatomy, bad hands, extra digit, fewer digits, "
|
|
"deformed, ugly, censored, monochrome, "
|
|
"watermark, signature, text, artist name"
|
|
),
|
|
},
|
|
"general": {
|
|
"ckpt": "talmendoxlSDXL_v11Beta.safetensors",
|
|
"sampler": "dpmpp_2m",
|
|
"scheduler": "karras",
|
|
"cfg": 8.0, "steps": 30, "clip_skip": 2,
|
|
"prefix": "",
|
|
"negative": (
|
|
"lowres, blurry, jpeg artifacts, low quality, worst quality, "
|
|
"bad anatomy, deformed, ugly, watermark, signature, text"
|
|
),
|
|
},
|
|
"furry-nai": {
|
|
"ckpt": "reedFURRYMixSDXL_v23nai.safetensors",
|
|
"sampler": "euler_ancestral",
|
|
"scheduler": "normal",
|
|
"cfg": 5.0, "steps": 30, "clip_skip": 2,
|
|
"prefix": (
|
|
"masterpiece, best quality, high quality, detailed eyes, "
|
|
"highres, absurdres, furry, "
|
|
),
|
|
"negative": (
|
|
"human, realistic, photorealistic, 3d, cgi, "
|
|
"worst quality, low quality, lowres, blurry, jpeg artifacts, "
|
|
"bad anatomy, extra digit, fewer digits, deformed, ugly, "
|
|
"watermark, signature, text"
|
|
),
|
|
},
|
|
"furry-noob": {
|
|
"ckpt": "indigoVoidFurryFusedXL_noobaiV32.safetensors",
|
|
"sampler": "euler_ancestral",
|
|
"scheduler": "normal",
|
|
"cfg": 4.5, "steps": 20, "clip_skip": 2,
|
|
"prefix": (
|
|
"masterpiece, best quality, perfect quality, absurdres, newest, "
|
|
"very aesthetic, vibrant colors, "
|
|
),
|
|
"negative": (
|
|
"human, realistic, photorealistic, 3d, cgi, shiny skin, "
|
|
"worst quality, low quality, lowres, blurry, jpeg artifacts, "
|
|
"bad anatomy, bad hands, mutated hands, "
|
|
"watermark, signature, text"
|
|
),
|
|
},
|
|
"furry-il": {
|
|
"ckpt": "novaFurryXL_ilV170.safetensors",
|
|
"sampler": "euler_ancestral",
|
|
"scheduler": "normal",
|
|
"cfg": 4.0, "steps": 30, "clip_skip": 2,
|
|
"prefix": (
|
|
"masterpiece, best quality, amazing quality, very aesthetic, "
|
|
"ultra-detailed, absurdres, newest, furry, anthro, "
|
|
),
|
|
"negative": (
|
|
"human, multiple tails, modern, recent, old, oldest, graphic, "
|
|
"cartoon, painting, deformed, mutated, ugly, lowres, "
|
|
"bad anatomy, bad hands, missing fingers, extra digits, "
|
|
"worst quality, bad quality, sketch, jpeg artifacts, "
|
|
"signature, watermark, text, simple background"
|
|
),
|
|
},
|
|
}
|
|
|
|
DEFAULT_STYLE = "furry-il"
|
|
|
|
ROUTING_RULES = [
|
|
(re.compile(r"\bscore_\d", re.I), "pony"),
|
|
(re.compile(r"\bpony\b", re.I), "pony"),
|
|
(re.compile(r"\b(noobai|noob)\b", re.I), "furry-noob"),
|
|
(re.compile(r"\b(illustrious|ilxl)\b", re.I), "furry-il"),
|
|
(re.compile(r"\b(furry|anthro|feral|kemono|fursona|species)\b", re.I), "furry-il"),
|
|
(re.compile(r"\b(juggernaut)\b", re.I), "juggernaut"),
|
|
(re.compile(r"\b(photo|photograph|realistic|portrait|selfie|cinematic)\b", re.I), "photo"),
|
|
(re.compile(r"\b(anime|manga|2d|illustration)\b", re.I), "pony"),
|
|
]
|
|
|
|
# Phrases that imply local-only editing → triggers inpaint mode and
|
|
# pulls out a noun phrase as the mask text.
|
|
INPAINT_PATTERNS = [
|
|
re.compile(r"\b(?:change|recolor|edit|modify|replace|remove|delete|add)\s+(?:the|that|her|his|its)\s+([\w\s'-]{2,30}?)(?:\s+(?:to|into|with|so|that|and|,|\.)|$)", re.I),
|
|
re.compile(r"\b(?:make|turn)\s+(?:the|that|her|his|its)\s+([\w\s'-]{2,30}?)\s+(?:bigger|smaller|larger|wider|taller|shorter|longer|brighter|darker|red|blue|green|yellow|orange|purple|pink|black|white|gold)", re.I),
|
|
re.compile(r"\b(?:only|just)\s+(?:the|change the|edit the)\s+([\w\s'-]{2,30}?)(?:\s+|$)", re.I),
|
|
]
|
|
|
|
|
|
def _route_style(prompt: str) -> str:
|
|
for pattern, style in ROUTING_RULES:
|
|
if pattern.search(prompt):
|
|
return style
|
|
return DEFAULT_STYLE
|
|
|
|
|
|
def _detect_mask_text(prompt: str) -> Optional[str]:
|
|
"""Pull a noun phrase out of edit-style instructions for inpaint."""
|
|
for pattern in INPAINT_PATTERNS:
|
|
m = pattern.search(prompt)
|
|
if m:
|
|
obj = m.group(1).strip().rstrip(",.").strip()
|
|
if obj:
|
|
return f"the {obj}"
|
|
return None
|
|
|
|
|
|
def _inherited_style(messages) -> Optional[str]:
|
|
"""Best-effort: read prior assistant message metadata for a style hint."""
|
|
if not messages:
|
|
return None
|
|
for msg in reversed(messages):
|
|
if not isinstance(msg, dict):
|
|
continue
|
|
# Look for a "style: X" comment in the assistant's previous text
|
|
if msg.get("role") == "assistant":
|
|
content = msg.get("content")
|
|
if isinstance(content, str):
|
|
m = re.search(r"\bstyle[:=]\s*([\w\-]+)", content)
|
|
if m and m.group(1) in STYLES:
|
|
return m.group(1)
|
|
return None
|
|
|
|
|
|
def _seed_value(seed: int) -> int:
|
|
return seed if seed > 0 else int(time.time() * 1000) % (2**31)
|
|
|
|
|
|
def _build_txt2img(positive: str, negative: str, settings: dict,
|
|
width: int, height: int, seed: int) -> dict:
|
|
return {
|
|
"3": {"class_type": "KSampler", "inputs": {
|
|
"seed": _seed_value(seed),
|
|
"steps": settings["steps"], "cfg": settings["cfg"],
|
|
"sampler_name": settings["sampler"], "scheduler": settings["scheduler"],
|
|
"denoise": 1.0,
|
|
"model": ["4", 0], "positive": ["6", 0],
|
|
"negative": ["7", 0], "latent_image": ["5", 0],
|
|
}},
|
|
"4": {"class_type": "CheckpointLoaderSimple", "inputs": {"ckpt_name": settings["ckpt"]}},
|
|
"5": {"class_type": "EmptyLatentImage",
|
|
"inputs": {"width": width, "height": height, "batch_size": 1}},
|
|
"6": {"class_type": "CLIPTextEncode", "inputs": {"text": positive, "clip": ["10", 0]}},
|
|
"7": {"class_type": "CLIPTextEncode", "inputs": {"text": negative, "clip": ["10", 0]}},
|
|
"8": {"class_type": "VAEDecode", "inputs": {"samples": ["3", 0], "vae": ["4", 2]}},
|
|
"9": {"class_type": "SaveImage",
|
|
"inputs": {"filename_prefix": "smartpipe", "images": ["8", 0]}},
|
|
"10": {"class_type": "CLIPSetLastLayer",
|
|
"inputs": {"stop_at_clip_layer": -settings["clip_skip"], "clip": ["4", 1]}},
|
|
}
|
|
|
|
|
|
def _build_img2img(positive: str, negative: str, settings: dict,
|
|
image_filename: str, denoise: float, seed: int) -> dict:
|
|
return {
|
|
"3": {"class_type": "KSampler", "inputs": {
|
|
"seed": _seed_value(seed),
|
|
"steps": settings["steps"], "cfg": settings["cfg"],
|
|
"sampler_name": settings["sampler"], "scheduler": settings["scheduler"],
|
|
"denoise": denoise,
|
|
"model": ["4", 0], "positive": ["6", 0],
|
|
"negative": ["7", 0], "latent_image": ["11", 0],
|
|
}},
|
|
"4": {"class_type": "CheckpointLoaderSimple", "inputs": {"ckpt_name": settings["ckpt"]}},
|
|
"6": {"class_type": "CLIPTextEncode", "inputs": {"text": positive, "clip": ["10", 0]}},
|
|
"7": {"class_type": "CLIPTextEncode", "inputs": {"text": negative, "clip": ["10", 0]}},
|
|
"8": {"class_type": "VAEDecode", "inputs": {"samples": ["3", 0], "vae": ["4", 2]}},
|
|
"9": {"class_type": "SaveImage",
|
|
"inputs": {"filename_prefix": "smartpipe", "images": ["8", 0]}},
|
|
"10": {"class_type": "CLIPSetLastLayer",
|
|
"inputs": {"stop_at_clip_layer": -settings["clip_skip"], "clip": ["4", 1]}},
|
|
"11": {"class_type": "VAEEncode", "inputs": {"pixels": ["12", 0], "vae": ["4", 2]}},
|
|
"12": {"class_type": "LoadImage", "inputs": {"image": image_filename}},
|
|
}
|
|
|
|
|
|
def _build_inpaint(positive: str, negative: str, settings: dict,
|
|
image_filename: str, mask_text: str,
|
|
denoise: float, seed: int) -> dict:
|
|
return {
|
|
"3": {"class_type": "KSampler", "inputs": {
|
|
"seed": _seed_value(seed),
|
|
"steps": settings["steps"], "cfg": settings["cfg"],
|
|
"sampler_name": settings["sampler"], "scheduler": settings["scheduler"],
|
|
"denoise": denoise,
|
|
"model": ["4", 0], "positive": ["6", 0],
|
|
"negative": ["7", 0], "latent_image": ["13", 0],
|
|
}},
|
|
"4": {"class_type": "CheckpointLoaderSimple", "inputs": {"ckpt_name": settings["ckpt"]}},
|
|
"6": {"class_type": "CLIPTextEncode", "inputs": {"text": positive, "clip": ["10", 0]}},
|
|
"7": {"class_type": "CLIPTextEncode", "inputs": {"text": negative, "clip": ["10", 0]}},
|
|
"8": {"class_type": "VAEDecode", "inputs": {"samples": ["3", 0], "vae": ["4", 2]}},
|
|
"9": {"class_type": "SaveImage",
|
|
"inputs": {"filename_prefix": "smartpipe", "images": ["8", 0]}},
|
|
"10": {"class_type": "CLIPSetLastLayer",
|
|
"inputs": {"stop_at_clip_layer": -settings["clip_skip"], "clip": ["4", 1]}},
|
|
"11": {"class_type": "VAEEncode", "inputs": {"pixels": ["12", 0], "vae": ["4", 2]}},
|
|
"12": {"class_type": "LoadImage", "inputs": {"image": image_filename}},
|
|
"13": {"class_type": "SetLatentNoiseMask",
|
|
"inputs": {"samples": ["11", 0], "mask": ["17", 0]}},
|
|
"14": {"class_type": "SAMModelLoader (segment anything)",
|
|
"inputs": {"model_name": "sam_hq_vit_h (2.57GB)"}},
|
|
"15": {"class_type": "GroundingDinoModelLoader (segment anything)",
|
|
"inputs": {"model_name": "GroundingDINO_SwinT_OGC (694MB)"}},
|
|
"16": {"class_type": "GroundingDinoSAMSegment (segment anything)",
|
|
"inputs": {
|
|
"sam_model": ["14", 0], "grounding_dino_model": ["15", 0],
|
|
"image": ["12", 0], "prompt": mask_text, "threshold": 0.3,
|
|
}},
|
|
"17": {"class_type": "GrowMask",
|
|
"inputs": {"mask": ["16", 1], "expand": 12, "tapered_corners": True}},
|
|
}
|
|
|
|
|
|
_FILE_URL_ID_RE = re.compile(r"/(?:api/v1/)?files/([0-9a-fA-F-]{8,})(?:/content)?")
|
|
|
|
|
|
def _file_dict_is_image(f: dict) -> bool:
|
|
ftype = (f.get("type") or "").lower()
|
|
fname = (f.get("name") or f.get("filename") or "").lower()
|
|
return "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
|
|
|
|
|
|
async def _read_file_dict(f: dict) -> Optional[bytes]:
|
|
for path_key in ("path", "filepath", "file_path"):
|
|
path = f.get(path_key)
|
|
if path:
|
|
try:
|
|
with open(path, "rb") as fh:
|
|
return fh.read()
|
|
except OSError:
|
|
pass
|
|
candidate_ids = []
|
|
if f.get("id"):
|
|
candidate_ids.append(f["id"])
|
|
url = f.get("url")
|
|
if url:
|
|
m = _FILE_URL_ID_RE.search(url)
|
|
if m:
|
|
candidate_ids.append(m.group(1))
|
|
if _OPENWEBUI_RUNTIME:
|
|
for fid in candidate_ids:
|
|
try:
|
|
file_model = await Files.get_file_by_id(fid)
|
|
if file_model is None:
|
|
continue
|
|
path = getattr(file_model, "path", None)
|
|
if not path:
|
|
meta = getattr(file_model, "meta", None) or {}
|
|
path = meta.get("path") if isinstance(meta, dict) else getattr(meta, "path", None)
|
|
if path:
|
|
try:
|
|
with open(path, "rb") as fh:
|
|
return fh.read()
|
|
except OSError:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
async def _extract_attached_image(files, messages, metadata, session) -> Optional[bytes]:
|
|
# 1. Inline data URIs
|
|
for msg in reversed(messages or []):
|
|
content = msg.get("content") if isinstance(msg, dict) else None
|
|
if isinstance(content, list):
|
|
for block in content:
|
|
if not isinstance(block, dict) or block.get("type") != "image_url":
|
|
continue
|
|
url = (block.get("image_url") or {}).get("url", "")
|
|
if url.startswith("data:image"):
|
|
try:
|
|
return base64.b64decode(url.split(",", 1)[1])
|
|
except Exception:
|
|
pass
|
|
# 2. messages[].files
|
|
for msg in reversed(messages or []):
|
|
if not isinstance(msg, dict):
|
|
continue
|
|
for f in (msg.get("files") or []):
|
|
if isinstance(f, dict) and _file_dict_is_image(f):
|
|
data = await _read_file_dict(f)
|
|
if data is not None:
|
|
return data
|
|
# 3. __files__
|
|
for f in files or []:
|
|
if isinstance(f, dict) and _file_dict_is_image(f):
|
|
data = await _read_file_dict(f)
|
|
if data is not None:
|
|
return data
|
|
# 4. DB lookup (assistant-emitted files often only land here)
|
|
if _OPENWEBUI_RUNTIME and metadata:
|
|
chat_id = metadata.get("chat_id")
|
|
if chat_id:
|
|
try:
|
|
chat = await Chats.get_chat_by_id(chat_id)
|
|
chat_data = getattr(chat, "chat", None) if chat else None
|
|
chat_messages = (chat_data or {}).get("messages", []) if isinstance(chat_data, dict) else []
|
|
for msg in reversed(chat_messages):
|
|
for f in (msg.get("files") or []) if isinstance(msg, dict) else []:
|
|
if isinstance(f, dict) and _file_dict_is_image(f):
|
|
data = await _read_file_dict(f)
|
|
if data is not None:
|
|
return data
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
async def _upload_to_comfyui(session, base, raw) -> Optional[str]:
|
|
name = f"smartpipe_{uuid.uuid4().hex[:12]}.png"
|
|
form = aiohttp.FormData()
|
|
form.add_field("image", raw, filename=name, content_type="image/png")
|
|
form.add_field("overwrite", "true")
|
|
async with session.post(f"{base}/upload/image", data=form) as resp:
|
|
if resp.status != 200:
|
|
return None
|
|
return (await resp.json()).get("name", name)
|
|
|
|
|
|
async def _push_image_to_chat(raw, prefix, request, user_dict, metadata, event_emitter) -> bool:
|
|
if not (_OPENWEBUI_RUNTIME and request and user_dict and event_emitter):
|
|
return False
|
|
try:
|
|
user = await Users.get_user_by_id(user_dict.get("id"))
|
|
if not user:
|
|
return False
|
|
upload = UploadFile(
|
|
file=io.BytesIO(raw),
|
|
filename=f"{prefix}_{uuid.uuid4().hex[:8]}.png",
|
|
headers={"content-type": "image/png"},
|
|
)
|
|
result = upload_file_handler(
|
|
request=request, file=upload,
|
|
metadata={"chat_id": (metadata or {}).get("chat_id"),
|
|
"message_id": (metadata or {}).get("message_id")},
|
|
process=False, user=user,
|
|
)
|
|
file_item = await result if inspect.iscoroutine(result) else result
|
|
url = request.app.url_path_for("get_file_content_by_id", id=file_item.id)
|
|
await event_emitter({
|
|
"type": "files",
|
|
"data": {"files": [{"type": "image", "url": url}]},
|
|
})
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
async def _submit_and_fetch(session, base, workflow, timeout_seconds, emit, settings):
|
|
SAVE_NODE_ID = "9"
|
|
client_id = str(uuid.uuid4())
|
|
async with session.post(
|
|
f"{base}/prompt", json={"prompt": workflow, "client_id": client_id}
|
|
) as resp:
|
|
if resp.status != 200:
|
|
return None, f"ComfyUI rejected the prompt: {resp.status} {await resp.text()}"
|
|
prompt_id = (await resp.json()).get("prompt_id")
|
|
if not prompt_id:
|
|
return None, "ComfyUI didn't return a prompt_id."
|
|
|
|
await emit(
|
|
f"Sampling — {settings['sampler']}/{settings['scheduler']}, "
|
|
f"CFG {settings['cfg']}, {settings['steps']} steps"
|
|
)
|
|
deadline = time.time() + timeout_seconds
|
|
output_images: list = []
|
|
while time.time() < deadline:
|
|
await asyncio.sleep(1.5)
|
|
async with session.get(f"{base}/history/{prompt_id}") as resp:
|
|
if resp.status != 200:
|
|
continue
|
|
history = await resp.json()
|
|
if prompt_id in history:
|
|
outputs = history[prompt_id].get("outputs", {}) or {}
|
|
save_imgs = (outputs.get(SAVE_NODE_ID) or {}).get("images", [])
|
|
if save_imgs:
|
|
output_images.extend(save_imgs)
|
|
if not output_images:
|
|
for node_out in outputs.values():
|
|
output_images.extend(node_out.get("images", []))
|
|
if output_images:
|
|
break
|
|
|
|
if not output_images:
|
|
return None, f"Timed out after {timeout_seconds}s waiting for image."
|
|
|
|
img = output_images[0]
|
|
params = {
|
|
"filename": img["filename"],
|
|
"subfolder": img.get("subfolder", ""),
|
|
"type": img.get("type", "output"),
|
|
}
|
|
async with session.get(f"{base}/view", params=params) as resp:
|
|
if resp.status != 200:
|
|
return None, f"Failed to fetch image: {resp.status}"
|
|
return await resp.read(), None
|
|
|
|
|
|
def _extract_user_text(body: dict) -> str:
|
|
"""Pull the latest user message's text content."""
|
|
messages = body.get("messages", [])
|
|
for msg in reversed(messages):
|
|
if not isinstance(msg, dict) or msg.get("role") != "user":
|
|
continue
|
|
content = msg.get("content")
|
|
if isinstance(content, str):
|
|
return content.strip()
|
|
if isinstance(content, list):
|
|
parts = []
|
|
for block in content:
|
|
if isinstance(block, dict) and block.get("type") == "text":
|
|
parts.append(block.get("text", ""))
|
|
return " ".join(parts).strip()
|
|
return ""
|
|
|
|
|
|
class Pipe:
|
|
class Valves(BaseModel):
|
|
COMFYUI_BASE_URL: str = Field(
|
|
default="http://comfyui:8188",
|
|
description="ComfyUI server URL reachable from the open-webui container.",
|
|
)
|
|
TIMEOUT_SECONDS: int = Field(default=600)
|
|
DEFAULT_WIDTH: int = Field(default=1024)
|
|
DEFAULT_HEIGHT: int = Field(default=1024)
|
|
DEFAULT_DENOISE_IMG2IMG: float = Field(default=0.7)
|
|
DEFAULT_DENOISE_INPAINT: float = Field(default=1.0)
|
|
FORCE_STYLE: str = Field(
|
|
default="",
|
|
description="Override style routing. Empty = auto-route. Set to "
|
|
"one of: photo, juggernaut, pony, general, "
|
|
"furry-nai, furry-noob, furry-il.",
|
|
)
|
|
|
|
def __init__(self):
|
|
self.valves = self.Valves()
|
|
self.id = "image-studio-pipe"
|
|
self.name = "Image Studio (Pipe)"
|
|
|
|
async def pipe(
|
|
self,
|
|
body: dict,
|
|
__user__: Optional[dict] = None,
|
|
__request__=None,
|
|
__metadata__: Optional[dict] = None,
|
|
__event_emitter__: Optional[Callable[[dict], Awaitable[None]]] = None,
|
|
) -> str:
|
|
user_text = _extract_user_text(body)
|
|
if not user_text:
|
|
return "Type a message describing the image you want."
|
|
|
|
async def emit(msg: str, done: bool = False):
|
|
if __event_emitter__:
|
|
await __event_emitter__({
|
|
"type": "status",
|
|
"data": {"description": msg, "done": done},
|
|
})
|
|
|
|
# Style: explicit valve override > inherited from prior assistant
|
|
# message > keyword detection on user text > default.
|
|
chosen = (
|
|
self.valves.FORCE_STYLE.strip()
|
|
or _inherited_style(body.get("messages"))
|
|
or _route_style(user_text)
|
|
)
|
|
if chosen not in STYLES:
|
|
chosen = DEFAULT_STYLE
|
|
settings = STYLES[chosen]
|
|
|
|
base = self.valves.COMFYUI_BASE_URL.rstrip("/")
|
|
positive = f"{settings['prefix']}{user_text}"
|
|
negative = settings["negative"]
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
await emit("Looking for attached image…")
|
|
source_bytes = await _extract_attached_image(
|
|
None, body.get("messages"), __metadata__, session,
|
|
)
|
|
|
|
if source_bytes is None:
|
|
# No image → txt2img
|
|
await emit(f"Generating ({chosen})")
|
|
workflow = _build_txt2img(
|
|
positive, negative, settings,
|
|
self.valves.DEFAULT_WIDTH, self.valves.DEFAULT_HEIGHT, 0,
|
|
)
|
|
tag = "gen"
|
|
else:
|
|
# Image present → upload, then inpaint or img2img
|
|
uploaded = await _upload_to_comfyui(session, base, source_bytes)
|
|
if not uploaded:
|
|
return "Failed to upload source image to ComfyUI."
|
|
|
|
mask_text = _detect_mask_text(user_text)
|
|
if mask_text:
|
|
await emit(
|
|
f"Inpainting ({chosen}, mask='{mask_text}', "
|
|
f"denoise={self.valves.DEFAULT_DENOISE_INPAINT})"
|
|
)
|
|
workflow = _build_inpaint(
|
|
positive, negative, settings, uploaded, mask_text,
|
|
self.valves.DEFAULT_DENOISE_INPAINT, 0,
|
|
)
|
|
tag = f"edit (inpaint: {mask_text})"
|
|
else:
|
|
await emit(
|
|
f"Editing ({chosen}, "
|
|
f"denoise={self.valves.DEFAULT_DENOISE_IMG2IMG})"
|
|
)
|
|
workflow = _build_img2img(
|
|
positive, negative, settings, uploaded,
|
|
self.valves.DEFAULT_DENOISE_IMG2IMG, 0,
|
|
)
|
|
tag = "edit (img2img)"
|
|
|
|
raw, err = await _submit_and_fetch(
|
|
session, base, workflow, self.valves.TIMEOUT_SECONDS, emit, settings,
|
|
)
|
|
if err:
|
|
return err
|
|
|
|
await _push_image_to_chat(
|
|
raw, "smartpipe", __request__, __user__, __metadata__, __event_emitter__,
|
|
)
|
|
await emit(f"Done — {chosen}", done=True)
|
|
|
|
# Single-line plain-English follow-up. Emit the style as
|
|
# "style: <name>" so the inheritance helper can find it next turn.
|
|
return f"Done — style: {chosen}, {tag}."
|