From 45d5541be0ddcf6e65a5d5d0d79f2bdec76aff9d Mon Sep 17 00:00:00 2001 From: William Gill Date: Sun, 19 Apr 2026 12:45:34 -0500 Subject: [PATCH] smart_image_gen v0.2: per-style sampler/CFG/steps/CLIP-skip + prompt prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Researched each of the seven SDXL checkpoints on Civitai and encoded the creator-recommended generation defaults per style instead of one global set. Material differences: - photo (CyberRealistic): dpmpp_2m_sde / karras / CFG 4 / 28 steps / CLIP 1 - juggernaut: dpmpp_2m_sde / karras / CFG 4.5 / 35 steps / CLIP 1 - pony: euler_a / normal / CFG 7.5 / 25 steps / CLIP 2 - general (Talmendo): dpmpp_2m / karras / CFG 8 / 30 steps / CLIP 2 - furry-nai (Reed): euler_a / normal / CFG 5 / 30 steps / CLIP 2 - furry-noob (IndigoVoid): euler_a-only / normal / CFG 4.5 / 20 / CLIP 2 - furry-il (NovaFurry): euler_a / normal / CFG 4 / 30 steps / CLIP 2 Three prompt-prefix dialects auto-prepended (NEVER cross-contaminated): photoreal models get nothing, Pony gets the full score_9..score_4_up chain (mandatory), and the NoobAI/Illustrious furry models get their booru quality + year-tag prefixes (masterpiece/best quality/absurdres/newest/etc). Workflow now includes a CLIPSetLastLayer node so per-style CLIP skip works. Routing default for generic "furry" flipped from Reed (NAI) to NovaFurry (Illustrious) — current sweet-spot consensus. Removed global DEFAULT_STEPS/DEFAULT_CFG valves; per-style values are canonical. Sources: each model's Civitai page (CyberRealisticXL, Juggernaut, Pony V6 XL, TalmendoXL, Reed FurryMix, IndigoVoid FurryFused, NovaFurryXL) and Pony/Illustrious prompting guides. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openwebui-tools/smart_image_gen.py | 325 ++++++++++++------ 1 file changed, 220 insertions(+), 105 deletions(-) diff --git a/deployments/ai-stack/openwebui-tools/smart_image_gen.py b/deployments/ai-stack/openwebui-tools/smart_image_gen.py index 94985c7..19ef9b8 100644 --- a/deployments/ai-stack/openwebui-tools/smart_image_gen.py +++ b/deployments/ai-stack/openwebui-tools/smart_image_gen.py @@ -1,10 +1,13 @@ """ title: Smart Image Generator (ComfyUI) author: ai-stack -version: 0.1.0 +version: 0.2.0 description: Generate images via ComfyUI with automatic SDXL checkpoint routing. The LLM picks (or auto-detects) the right model — photoreal, - anime/score-tag, furry-IL, etc. — based on the user's request. + Pony score-tag, NoobAI/Illustrious furry, etc. — based on the user's + request. Each style ships with the creator-recommended sampler, + scheduler, CFG, steps, CLIP skip, prompt-prefix dialect, and + negatives. required_open_webui_version: 0.5.0 """ @@ -19,94 +22,171 @@ import aiohttp from pydantic import BaseModel, Field -# Filename → use case. Edit alongside `comfyui-init-models.sh` so the -# files actually exist in /opt/comfyui/models/checkpoints/. -CHECKPOINTS = { - "photo": "CyberRealisticXLPlay_V8.0_FP16.safetensors", - "juggernaut": "Juggernaut-XL_v9_RunDiffusionPhoto_v2.safetensors", - "pony": "ponyDiffusionV6XL_v6StartWithThisOne.safetensors", - "general": "talmendoxlSDXL_v11Beta.safetensors", - "furry-nai": "reedFURRYMixSDXL_v23nai.safetensors", - "furry-noob": "indigoVoidFurryFusedXL_noobaiV32.safetensors", - "furry-il": "novaFurryXL_ilV170.safetensors", +# ───────────────────────────────────────────────────────────────────────────── +# Per-style settings — sampler/scheduler/cfg/steps/clip_skip/prefix/negatives +# come from each model's creator page on Civitai. Three prefix dialects in +# play: photoreal (no prefix, natural language), Pony score chain (REQUIRED +# for any Pony-derived checkpoint), and Booru quality tags (NoobAI / +# Illustrious lineage). Never cross-contaminate. +# ───────────────────────────────────────────────────────────────────────────── + +STYLES = { + "photo": { + "ckpt": "CyberRealisticXLPlay_V8.0_FP16.safetensors", + "sampler": "dpmpp_2m_sde", + "scheduler": "karras", + "cfg": 4.0, + "steps": 28, + "clip_skip": 1, + "prefix": "", # natural language only — no quality tags + "negative": ( + "cartoon, drawing, illustration, anime, manga, painting, sketch, " + "render, 3d, cgi, watercolor, plastic skin, doll-like, oversaturated, " + "lowres, blurry, jpeg artifacts, noisy, grainy, low quality, worst quality, " + "bad anatomy, deformed, mutated, extra limbs, extra fingers, missing fingers, " + "fused fingers, malformed hands, asymmetric face, " + "watermark, signature, text, logo, label, username" + ), + }, + "juggernaut": { + "ckpt": "Juggernaut-XL_v9_RunDiffusionPhoto_v2.safetensors", + "sampler": "dpmpp_2m_sde", + "scheduler": "karras", + "cfg": 4.5, + "steps": 35, + "clip_skip": 1, + "prefix": "", # natural language only + "negative": ( + "cartoon, drawing, illustration, anime, manga, painting, sketch, " + "render, 3d, cgi, plastic skin, washed out, oversaturated, " + "lowres, blurry, jpeg artifacts, low quality, worst quality, " + "bad anatomy, deformed, mutated, extra limbs, extra fingers, missing fingers, " + "fused fingers, malformed hands, " + "watermark, signature, text, logo, username" + ), + }, + "pony": { + "ckpt": "ponyDiffusionV6XL_v6StartWithThisOne.safetensors", + "sampler": "euler_ancestral", + "scheduler": "normal", + "cfg": 7.5, + "steps": 25, + "clip_skip": 2, + # REQUIRED — the full chain. Just `score_9` alone is much weaker. + "prefix": "score_9, score_8_up, score_7_up, score_6_up, score_5_up, score_4_up, ", + # Pony's creator notes negatives are usually unnecessary; conservative + # baseline only. Source-toggle tags (source_pony/furry/anime/cartoon) + # are intentionally omitted — they exclude entire content domains. + "negative": ( + "score_6, score_5, score_4, " + "worst quality, low quality, lowres, blurry, jpeg artifacts, noisy, " + "bad anatomy, bad proportions, bad hands, extra digit, fewer digits, " + "fused fingers, malformed limbs, deformed, ugly, " + "censored, monochrome, " + "watermark, signature, text, logo, artist name, patreon username, twitter username" + ), + }, + "general": { + "ckpt": "talmendoxlSDXL_v11Beta.safetensors", + "sampler": "dpmpp_2m", + "scheduler": "karras", + "cfg": 8.0, # Talmendo wants notably higher CFG than the others + "steps": 30, + "clip_skip": 2, + "prefix": "", # creator says don't push "masterpiece" — fights the amateur aesthetic + "negative": ( + "lowres, blurry, jpeg artifacts, noisy, grainy, low quality, worst quality, " + "bad anatomy, deformed, mutated, extra limbs, missing fingers, fused fingers, " + "malformed hands, ugly, " + "watermark, signature, text, logo" + ), + }, + "furry-nai": { + "ckpt": "reedFURRYMixSDXL_v23nai.safetensors", + "sampler": "euler_ancestral", + "scheduler": "normal", + "cfg": 5.0, + "steps": 30, + "clip_skip": 2, + "prefix": ( + "masterpiece, best quality, high quality, good quality, " + "detailed eyes, highres, absurdres, furry, " + ), + "negative": ( + "human, realistic, photorealistic, 3d, cgi, " + "worst quality, bad_quality, normal quality, lowres, " + "anatomical nonsense, bad anatomy, interlocked fingers, extra fingers, " + "bad_feet, bad_hands, deformed anatomy, bad proportions, " + "censored, simple background, transparent, face backlighting, " + "watermark, signature, text, logo, username, jpeg artifacts" + ), + }, + "furry-noob": { + "ckpt": "indigoVoidFurryFusedXL_noobaiV32.safetensors", + "sampler": "euler_ancestral", # creator: other samplers won't work + "scheduler": "normal", + "cfg": 4.5, + "steps": 20, + "clip_skip": 2, + "prefix": ( + "masterpiece, best quality, perfect quality, absurdres, newest, " + "very aesthetic, vibrant colors, " + ), + "negative": ( + "human, realistic, photorealistic, 3d, cgi, " + "shiny skin, shiny clothing, " + "worst quality, low quality, lowres, blurry, jpeg artifacts, noisy, " + "bad anatomy, bad hands, mutated hands, bad proportions, " + "extra digit, fewer digits, fused fingers, malformed limbs, deformed, ugly, " + "watermark, signature, text, logo, username, artist signature" + ), + }, + "furry-il": { + "ckpt": "novaFurryXL_ilV170.safetensors", + "sampler": "euler_ancestral", + "scheduler": "normal", + "cfg": 4.0, + "steps": 30, + "clip_skip": 2, + # Illustrious wants `newest` in positive and `old`/`oldest` in negative + # — these are year-bucket tags from the training set. `furry` and + # `anthro` are universally helpful here. + "prefix": ( + "masterpiece, best quality, amazing quality, very aesthetic, " + "high resolution, ultra-detailed, absurdres, newest, furry, anthro, " + ), + "negative": ( + "human, multiple tails, modern, recent, old, oldest, " + "graphic, cartoon, painting, crayon, graphite, abstract, glitch, " + "deformed, mutated, ugly, disfigured, long body, conjoined, " + "lowres, bad anatomy, bad hands, missing fingers, extra digits, fewer digits, " + "cropped, very displeasing, worst quality, bad quality, sketch, " + "jpeg artifacts, signature, watermark, username, text, simple background, " + "bad ai-generated" + ), + }, } -# Style-specific negative prompts. Always applied as the baseline; whatever -# the caller supplies via `negative_prompt` is appended on top. -# -# Quality-focused only — no NSFW or content filtering by default. If you -# want SFW-by-default, add an explicit safe-mode flag rather than baking -# content terms in here (some checkpoints in this set are commonly used -# for adult work and would fight the negative). -NEGATIVES = { - "photo": ( - "cartoon, drawing, illustration, anime, manga, painting, sketch, " - "render, 3d, cgi, watercolor, plastic skin, doll-like, oversaturated, " - "lowres, blurry, jpeg artifacts, noisy, grainy, low quality, worst quality, " - "bad anatomy, deformed, mutated, extra limbs, extra fingers, missing fingers, " - "fused fingers, malformed hands, asymmetric face, " - "watermark, signature, text, logo, username" - ), - "juggernaut": ( - "cartoon, drawing, illustration, anime, manga, painting, sketch, " - "render, 3d, cgi, plastic skin, washed out, oversaturated, " - "lowres, blurry, jpeg artifacts, low quality, worst quality, " - "bad anatomy, deformed, mutated, extra limbs, extra fingers, missing fingers, " - "fused fingers, malformed hands, " - "watermark, signature, text, logo, username" - ), - "pony": ( - "score_6, score_5, score_4, " - "worst quality, low quality, lowres, blurry, jpeg artifacts, noisy, " - "bad anatomy, bad proportions, bad hands, extra digit, fewer digits, " - "fused fingers, malformed limbs, deformed, ugly, " - "censored, monochrome, " - "watermark, signature, text, logo, artist name, patreon username, twitter username" - ), - "general": ( - "lowres, blurry, jpeg artifacts, noisy, grainy, low quality, worst quality, " - "bad anatomy, deformed, mutated, extra limbs, missing fingers, fused fingers, " - "malformed hands, ugly, " - "watermark, signature, text, logo" - ), - "furry-nai": ( - "human, realistic, photorealistic, 3d, cgi, " - "worst quality, low quality, lowres, blurry, jpeg artifacts, noisy, " - "bad anatomy, bad proportions, extra digit, fewer digits, fused fingers, " - "malformed limbs, deformed, ugly, " - "watermark, signature, text, logo, artist signature, patreon username" - ), - "furry-noob": ( - "human, realistic, photorealistic, 3d, cgi, " - "worst quality, low quality, lowres, blurry, jpeg artifacts, noisy, " - "bad anatomy, bad proportions, extra digit, fewer digits, fused fingers, " - "malformed limbs, deformed, ugly, " - "watermark, signature, text, logo, artist signature, patreon username" - ), - "furry-il": ( - "human, realistic, photorealistic, 3d, cgi, " - "worst quality, low quality, lowres, blurry, jpeg artifacts, noisy, " - "bad anatomy, bad proportions, extra digit, fewer digits, fused fingers, " - "malformed limbs, deformed, ugly, " - "watermark, signature, text, logo, artist signature, patreon username" - ), -} +DEFAULT_STYLE = "general" # First-match-wins keyword router used when the caller didn't pick a style. # Order matters — narrower patterns above broader ones. ROUTING_RULES = [ + # Pony score chain is the single strongest signal — Pony only (re.compile(r"\bscore_\d", re.I), "pony"), (re.compile(r"\bpony\b", re.I), "pony"), + # NoobAI / Illustrious explicit mentions (re.compile(r"\b(noobai|noob)\b", re.I), "furry-noob"), (re.compile(r"\b(illustrious|ilxl)\b", re.I), "furry-il"), - (re.compile(r"\b(furry|anthro|feral|kemono|fursona|species)\b", re.I), "furry-nai"), + # Generic furry — defaults to NovaFurry (Illustrious lineage, current sweet spot) + (re.compile(r"\b(furry|anthro|feral|kemono|fursona|species)\b", re.I), "furry-il"), + # Photo / photoreal (re.compile(r"\b(juggernaut)\b", re.I), "juggernaut"), (re.compile(r"\b(photo|photograph|realistic|portrait|selfie|cinematic)\b", re.I), "photo"), + # Generic anime / illustration → Pony covers anime well (re.compile(r"\b(anime|manga|2d|illustration)\b", re.I), "pony"), ] -DEFAULT_STYLE = "general" - def _route_style(prompt: str) -> str: for pattern, style in ROUTING_RULES: @@ -115,24 +195,41 @@ def _route_style(prompt: str) -> str: return DEFAULT_STYLE -def _build_workflow(prompt, negative, ckpt, width, height, steps, cfg, seed): +def _build_workflow(positive: str, negative: str, settings: dict, + width: int, height: int, seed: int) -> dict: + """ + Construct an SDXL txt2img workflow. CLIP skip is implemented via a + CLIPSetLastLayer node so the same graph works for skip 1 (-1) and + skip 2 (-2). + """ return { "3": {"class_type": "KSampler", "inputs": { "seed": seed if seed > 0 else int(time.time() * 1000) % (2**31), - "steps": steps, "cfg": cfg, - "sampler_name": "dpmpp_2m", "scheduler": "karras", + "steps": settings["steps"], + "cfg": settings["cfg"], + "sampler_name": settings["sampler"], + "scheduler": settings["scheduler"], "denoise": 1.0, - "model": ["4", 0], "positive": ["6", 0], - "negative": ["7", 0], "latent_image": ["5", 0], + "model": ["4", 0], + "positive": ["6", 0], + "negative": ["7", 0], + "latent_image": ["5", 0], }}, - "4": {"class_type": "CheckpointLoaderSimple", "inputs": {"ckpt_name": ckpt}}, + "4": {"class_type": "CheckpointLoaderSimple", + "inputs": {"ckpt_name": settings["ckpt"]}}, "5": {"class_type": "EmptyLatentImage", "inputs": {"width": width, "height": height, "batch_size": 1}}, - "6": {"class_type": "CLIPTextEncode", "inputs": {"text": prompt, "clip": ["4", 1]}}, - "7": {"class_type": "CLIPTextEncode", "inputs": {"text": negative, "clip": ["4", 1]}}, - "8": {"class_type": "VAEDecode", "inputs": {"samples": ["3", 0], "vae": ["4", 2]}}, + "6": {"class_type": "CLIPTextEncode", + "inputs": {"text": positive, "clip": ["10", 0]}}, + "7": {"class_type": "CLIPTextEncode", + "inputs": {"text": negative, "clip": ["10", 0]}}, + "8": {"class_type": "VAEDecode", + "inputs": {"samples": ["3", 0], "vae": ["4", 2]}}, "9": {"class_type": "SaveImage", "inputs": {"filename_prefix": "smartgen", "images": ["8", 0]}}, + "10": {"class_type": "CLIPSetLastLayer", + "inputs": {"stop_at_clip_layer": -settings["clip_skip"], + "clip": ["4", 1]}}, } @@ -142,10 +239,8 @@ class Tools: default="http://comfyui:8188", description="ComfyUI server URL reachable from the open-webui container.", ) - DEFAULT_STEPS: int = Field(default=25, description="KSampler steps.") - DEFAULT_CFG: float = Field(default=7.0, description="CFG scale.") TIMEOUT_SECONDS: int = Field( - default=180, + default=240, description="Maximum wait for a single generation to complete.", ) @@ -163,33 +258,51 @@ class Tools: __event_emitter__: Optional[Callable[[dict], Awaitable[None]]] = None, ) -> str: """ - Generate an image with the right SDXL checkpoint for the request. + Generate an image with the right SDXL checkpoint and creator- + recommended sampler/CFG/steps/prompt-format for the request. Pick `style` based on what the user wants: - "photo": photorealistic photographs, portraits, cinematic shots. + Uses CyberRealisticXL — natural-language prompts, no quality tags. - "juggernaut": versatile photoreal alternative — sharper, more saturated. - - "pony": anime / illustration with score tags (score_9, score_8_up, ...). - - "general": catch-all SDXL when no specific style applies. + Uses Juggernaut-XL — natural-language prompts, no quality tags. + - "pony": anime / illustration with Pony's score-tag prompt format. + Uses Pony Diffusion V6 XL — score_9..score_4_up chain auto-prepended. + Best for anime, cartoon, and stylised art. + - "general": amateur-photo aesthetic, catch-all SDXL. + Uses TalmendoXL — natural-language prompts, higher CFG. - "furry-nai": anthropomorphic characters, NAI-trained mix. + Uses reedFURRYMix — booru quality tags auto-prepended. - "furry-noob": anthropomorphic characters, NoobAI base. + Uses IndigoVoid FurryFused — booru quality tags auto-prepended. - "furry-il": anthropomorphic characters, Illustrious base. + Uses NovaFurryXL — booru quality + year tags auto-prepended. + Default for unspecified furry/anthro requests. If `style` is omitted, the tool auto-detects from `prompt` keywords. + Each style has its own creator-recommended sampler, CFG, steps, and + CLIP skip — you don't need to override any of these. - :param prompt: The image description. + :param prompt: The image description. Style-appropriate quality + tags (Pony score chain, Booru masterpiece chain, etc.) are + prepended automatically — don't include them in `prompt`. :param style: One of the keys above. Omit to auto-route. - :param negative_prompt: Extra negatives appended to the style default. - :param width: Output width in pixels (default 1024, SDXL native). - :param height: Output height in pixels (default 1024, SDXL native). + :param negative_prompt: Extra negatives appended to the per-style + baseline. Usually unneeded — each style ships with a tuned + negative. + :param width: Output width in pixels (default 1024 — SDXL native). + Use 832 for portraits with height 1216, or 1216 with height 832 + for landscapes. + :param height: Output height in pixels (default 1024). :param seed: Specific seed, or 0 to randomize. :return: Markdown embedding the generated image. """ chosen = style or _route_style(prompt) - ckpt = CHECKPOINTS.get(chosen) - if not ckpt: + settings = STYLES.get(chosen) + if not settings: return ( f"Unknown style '{chosen}'. " - f"Available: {', '.join(CHECKPOINTS.keys())}" + f"Available: {', '.join(STYLES.keys())}" ) async def emit(msg: str, done: bool = False): @@ -199,20 +312,19 @@ class Tools: "data": {"description": msg, "done": done}, }) - await emit(f"Routing to {chosen} ({ckpt})") + await emit(f"Routing to {chosen} ({settings['ckpt']})") - negative = NEGATIVES.get(chosen, "") + positive = f"{settings['prefix']}{prompt}" + negative = settings["negative"] if negative_prompt: - negative = f"{negative}, {negative_prompt}" if negative else negative_prompt + negative = f"{negative}, {negative_prompt}" workflow = _build_workflow( - prompt=prompt, + positive=positive, negative=negative, - ckpt=ckpt, + settings=settings, width=width, height=height, - steps=self.valves.DEFAULT_STEPS, - cfg=self.valves.DEFAULT_CFG, seed=seed, ) @@ -231,7 +343,10 @@ class Tools: if not prompt_id: return f"ComfyUI didn't return a prompt_id: {submit}" - await emit("Queued, sampling…") + await emit( + f"Sampling — {settings['sampler']}/{settings['scheduler']}, " + f"CFG {settings['cfg']}, {settings['steps']} steps" + ) deadline = time.time() + self.valves.TIMEOUT_SECONDS output_images = []