smart_image_gen v0.7: edit_image finds previously-emitted images

Bug: after generate_image surfaced an image via the files event, the next edit_image call returned 'No image found in the chat'. The image was attached to the assistant's message, but _extract_attached_image only scanned the user's __files__ param and image_url content blocks on user messages — it never looked at messages.files for any role. Fix: rewrite extraction to scan messages[].files in reverse for ALL roles, so an assistant-emitted image from a prior tool call is found the same way as a user-attached upload. Use Open WebUI's internal Files.get_file_by_id when the file dict has an id, so we get raw bytes from disk without going through the auth-protected /api/v1/files/{id}/content endpoint. Old path-key and URL-fetch paths kept as fallbacks. Refactored shared helpers _file_dict_is_image and _read_file_dict out of the loop to keep the search logic readable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 14:46:10 -05:00
parent d935e24624
commit f6f5690fcd
1 changed files with 94 additions and 26 deletions
--- a/deployments/ai-stack/openwebui-tools/smart_image_gen.py
+++ b/deployments/ai-stack/openwebui-tools/smart_image_gen.py
@@ -1,7 +1,7 @@
 """
 title: Smart Image Generator & Editor (ComfyUI)
 author: ai-stack
-version: 0.6.0
+version: 0.7.0
 description: Generate or edit images via ComfyUI with automatic SDXL
    checkpoint routing. Two methods — generate_image (txt2img) and
    edit_image (img2img on the user's most recently attached image). The
@@ -34,6 +34,7 @@ from pydantic import BaseModel, Field
 # falls back to emitting a markdown data-URI message.
 try:
    from fastapi import UploadFile
+    from open_webui.models.files import Files
    from open_webui.models.users import Users
    from open_webui.routers.files import upload_file_handler

@@ -338,20 +339,76 @@ def _build_img2img(positive: str, negative: str, settings: dict,
    }


+def _file_dict_is_image(f: dict) -> bool:
+    ftype = (f.get("type") or "").lower()
+    fname = (f.get("name") or f.get("filename") or "").lower()
+    return "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
+
+
+def _read_file_dict(f: dict) -> Optional[bytes]:
+    """
+    Try to read raw bytes for one file dict. Path keys first (covers local
+    uploads), then Open WebUI's Files model lookup by id (covers assistant-
+    emitted images that only have an id + relative URL). Returns None if
+    no method worked.
+    """
+    for path_key in ("path", "filepath", "file_path"):
+        path = f.get(path_key)
+        if path:
+            try:
+                with open(path, "rb") as fh:
+                    return fh.read()
+            except OSError:
+                pass
+
+    fid = f.get("id")
+    if _OPENWEBUI_RUNTIME and fid:
+        try:
+            file_model = Files.get_file_by_id(fid)
+            if file_model is not None:
+                # FileModel may expose path directly or under .meta
+                path = getattr(file_model, "path", None)
+                if not path:
+                    meta = getattr(file_model, "meta", None) or {}
+                    if isinstance(meta, dict):
+                        path = meta.get("path")
+                    else:
+                        path = getattr(meta, "path", None)
+                if path:
+                    try:
+                        with open(path, "rb") as fh:
+                            return fh.read()
+                    except OSError:
+                        pass
+        except Exception:
+            pass
+
+    return None
+
+
 async def _extract_attached_image(
    files: Optional[list],
    messages: Optional[list],
    session: aiohttp.ClientSession,
 ) -> Optional[bytes]:
    """
-    Find the most recent image the user attached to the chat. Tries three
-    sources in order: (1) base64 data URIs in `image_url` content blocks
-    of the recent messages (works for vision-capable models), (2) a local
-    filesystem path on the file dict (open-webui stores uploads under
-    /app/backend/data/uploads/), (3) the file's url field, fetched over
-    HTTP. Returns raw image bytes, or None if nothing matched.
+    Find the most recent image in the chat — including images previously
+    emitted by this tool itself. Search order (most recent first):
+
+      1. Inline base64 data URIs in `image_url` content blocks of recent
+         messages (vision-model uploads, paste-from-clipboard).
+      2. Files attached to messages in the conversation, scanned in
+         REVERSE so the newest image wins. This covers two cases:
+           a. Files the user just attached (current user message).
+           b. Files the assistant emitted via prior `generate_image` /
+              `edit_image` calls (attached to assistant messages by the
+              `files` event in _push_image_to_chat).
+      3. The __files__ tool param as a final fallback (some Open WebUI
+         versions pass user uploads here instead of on the message).
+      4. Best-effort URL fetch on any leftover file dict (likely fails
+         on auth-protected endpoints — last resort).
    """
-    # Messages: standard OpenAI image_url content blocks.
+    # 1. Inline data URIs on recent messages.
    for msg in reversed(messages or []):
        content = msg.get("content") if isinstance(msg, dict) else None
        if isinstance(content, list):
@@ -365,27 +422,38 @@ async def _extract_attached_image(
                    except Exception:
                        pass

-    # Files: try local path, then URL.
+    # 2. Files on messages, newest first.
+    for msg in reversed(messages or []):
+        if not isinstance(msg, dict):
+            continue
+        msg_files = msg.get("files")
+        if not isinstance(msg_files, list):
+            continue
+        for f in msg_files:
+            if not isinstance(f, dict) or not _file_dict_is_image(f):
+                continue
+            data = _read_file_dict(f)
+            if data is not None:
+                return data
+
+    # 3. __files__ param (current user upload, sometimes only here).
    for f in files or []:
-        if not isinstance(f, dict):
-            continue
-        ftype = (f.get("type") or "").lower()
-        fname = (f.get("name") or f.get("filename") or "").lower()
-        is_image = "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
-        if not is_image:
+        if not isinstance(f, dict) or not _file_dict_is_image(f):
            continue
+        data = _read_file_dict(f)
+        if data is not None:
+            return data

-        for path_key in ("path", "filepath", "file_path"):
-            path = f.get(path_key)
-            if path:
-                try:
-                    with open(path, "rb") as fh:
-                        return fh.read()
-                except OSError:
-                    pass
-
-        url = f.get("url")
-        if url:
+    # 4. Last-resort URL fetch (no auth — only works for public endpoints).
+    for source in [files or []] + [
+        (msg.get("files") or []) for msg in reversed(messages or []) if isinstance(msg, dict)
+    ]:
+        for f in source:
+            if not isinstance(f, dict) or not _file_dict_is_image(f):
+                continue
+            url = f.get("url")
+            if not url:
+                continue
            full = url if url.startswith("http") else f"http://localhost:8080{url}"
            try:
                async with session.get(full) as resp: