From f6f5690fcd4eb3e24f2cf8e9ca4a296bfcb9dd6d Mon Sep 17 00:00:00 2001 From: William Gill Date: Sun, 19 Apr 2026 14:46:10 -0500 Subject: [PATCH] smart_image_gen v0.7: edit_image finds previously-emitted images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: after generate_image surfaced an image via the files event, the next edit_image call returned 'No image found in the chat'. The image was attached to the assistant's message, but _extract_attached_image only scanned the user's __files__ param and image_url content blocks on user messages — it never looked at messages.files for any role. Fix: rewrite extraction to scan messages[].files in reverse for ALL roles, so an assistant-emitted image from a prior tool call is found the same way as a user-attached upload. Use Open WebUI's internal Files.get_file_by_id when the file dict has an id, so we get raw bytes from disk without going through the auth-protected /api/v1/files/{id}/content endpoint. Old path-key and URL-fetch paths kept as fallbacks. Refactored shared helpers _file_dict_is_image and _read_file_dict out of the loop to keep the search logic readable. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openwebui-tools/smart_image_gen.py | 120 ++++++++++++++---- 1 file changed, 94 insertions(+), 26 deletions(-) diff --git a/deployments/ai-stack/openwebui-tools/smart_image_gen.py b/deployments/ai-stack/openwebui-tools/smart_image_gen.py index 0924bbc..ca7c45d 100644 --- a/deployments/ai-stack/openwebui-tools/smart_image_gen.py +++ b/deployments/ai-stack/openwebui-tools/smart_image_gen.py @@ -1,7 +1,7 @@ """ title: Smart Image Generator & Editor (ComfyUI) author: ai-stack -version: 0.6.0 +version: 0.7.0 description: Generate or edit images via ComfyUI with automatic SDXL checkpoint routing. Two methods — generate_image (txt2img) and edit_image (img2img on the user's most recently attached image). The @@ -34,6 +34,7 @@ from pydantic import BaseModel, Field # falls back to emitting a markdown data-URI message. try: from fastapi import UploadFile + from open_webui.models.files import Files from open_webui.models.users import Users from open_webui.routers.files import upload_file_handler @@ -338,20 +339,76 @@ def _build_img2img(positive: str, negative: str, settings: dict, } +def _file_dict_is_image(f: dict) -> bool: + ftype = (f.get("type") or "").lower() + fname = (f.get("name") or f.get("filename") or "").lower() + return "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp")) + + +def _read_file_dict(f: dict) -> Optional[bytes]: + """ + Try to read raw bytes for one file dict. Path keys first (covers local + uploads), then Open WebUI's Files model lookup by id (covers assistant- + emitted images that only have an id + relative URL). Returns None if + no method worked. + """ + for path_key in ("path", "filepath", "file_path"): + path = f.get(path_key) + if path: + try: + with open(path, "rb") as fh: + return fh.read() + except OSError: + pass + + fid = f.get("id") + if _OPENWEBUI_RUNTIME and fid: + try: + file_model = Files.get_file_by_id(fid) + if file_model is not None: + # FileModel may expose path directly or under .meta + path = getattr(file_model, "path", None) + if not path: + meta = getattr(file_model, "meta", None) or {} + if isinstance(meta, dict): + path = meta.get("path") + else: + path = getattr(meta, "path", None) + if path: + try: + with open(path, "rb") as fh: + return fh.read() + except OSError: + pass + except Exception: + pass + + return None + + async def _extract_attached_image( files: Optional[list], messages: Optional[list], session: aiohttp.ClientSession, ) -> Optional[bytes]: """ - Find the most recent image the user attached to the chat. Tries three - sources in order: (1) base64 data URIs in `image_url` content blocks - of the recent messages (works for vision-capable models), (2) a local - filesystem path on the file dict (open-webui stores uploads under - /app/backend/data/uploads/), (3) the file's url field, fetched over - HTTP. Returns raw image bytes, or None if nothing matched. + Find the most recent image in the chat — including images previously + emitted by this tool itself. Search order (most recent first): + + 1. Inline base64 data URIs in `image_url` content blocks of recent + messages (vision-model uploads, paste-from-clipboard). + 2. Files attached to messages in the conversation, scanned in + REVERSE so the newest image wins. This covers two cases: + a. Files the user just attached (current user message). + b. Files the assistant emitted via prior `generate_image` / + `edit_image` calls (attached to assistant messages by the + `files` event in _push_image_to_chat). + 3. The __files__ tool param as a final fallback (some Open WebUI + versions pass user uploads here instead of on the message). + 4. Best-effort URL fetch on any leftover file dict (likely fails + on auth-protected endpoints — last resort). """ - # Messages: standard OpenAI image_url content blocks. + # 1. Inline data URIs on recent messages. for msg in reversed(messages or []): content = msg.get("content") if isinstance(msg, dict) else None if isinstance(content, list): @@ -365,27 +422,38 @@ async def _extract_attached_image( except Exception: pass - # Files: try local path, then URL. + # 2. Files on messages, newest first. + for msg in reversed(messages or []): + if not isinstance(msg, dict): + continue + msg_files = msg.get("files") + if not isinstance(msg_files, list): + continue + for f in msg_files: + if not isinstance(f, dict) or not _file_dict_is_image(f): + continue + data = _read_file_dict(f) + if data is not None: + return data + + # 3. __files__ param (current user upload, sometimes only here). for f in files or []: - if not isinstance(f, dict): - continue - ftype = (f.get("type") or "").lower() - fname = (f.get("name") or f.get("filename") or "").lower() - is_image = "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp")) - if not is_image: + if not isinstance(f, dict) or not _file_dict_is_image(f): continue + data = _read_file_dict(f) + if data is not None: + return data - for path_key in ("path", "filepath", "file_path"): - path = f.get(path_key) - if path: - try: - with open(path, "rb") as fh: - return fh.read() - except OSError: - pass - - url = f.get("url") - if url: + # 4. Last-resort URL fetch (no auth — only works for public endpoints). + for source in [files or []] + [ + (msg.get("files") or []) for msg in reversed(messages or []) if isinstance(msg, dict) + ]: + for f in source: + if not isinstance(f, dict) or not _file_dict_is_image(f): + continue + url = f.get("url") + if not url: + continue full = url if url.startswith("http") else f"http://localhost:8080{url}" try: async with session.get(full) as resp: