From f6f5690fcd4eb3e24f2cf8e9ca4a296bfcb9dd6d Mon Sep 17 00:00:00 2001
From: William Gill <william.gill@anomalous.dev>
Date: Sun, 19 Apr 2026 14:46:10 -0500
Subject: [PATCH] smart_image_gen v0.7: edit_image finds previously-emitted
 images
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug: after generate_image surfaced an image via the files event, the
next edit_image call returned 'No image found in the chat'. The image
was attached to the assistant's message, but _extract_attached_image
only scanned the user's __files__ param and image_url content blocks
on user messages — it never looked at messages.files for any role.

Fix: rewrite extraction to scan messages[].files in reverse for ALL
roles, so an assistant-emitted image from a prior tool call is found
the same way as a user-attached upload. Use Open WebUI's internal
Files.get_file_by_id when the file dict has an id, so we get raw
bytes from disk without going through the auth-protected
/api/v1/files/{id}/content endpoint. Old path-key and URL-fetch
paths kept as fallbacks.

Refactored shared helpers _file_dict_is_image and _read_file_dict
out of the loop to keep the search logic readable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openwebui-tools/smart_image_gen.py        | 120 ++++++++++++++----
 1 file changed, 94 insertions(+), 26 deletions(-)

diff --git a/deployments/ai-stack/openwebui-tools/smart_image_gen.py b/deployments/ai-stack/openwebui-tools/smart_image_gen.py
index 0924bbc..ca7c45d 100644
--- a/deployments/ai-stack/openwebui-tools/smart_image_gen.py
+++ b/deployments/ai-stack/openwebui-tools/smart_image_gen.py
@@ -1,7 +1,7 @@
 """
 title: Smart Image Generator & Editor (ComfyUI)
 author: ai-stack
-version: 0.6.0
+version: 0.7.0
 description: Generate or edit images via ComfyUI with automatic SDXL
     checkpoint routing. Two methods — generate_image (txt2img) and
     edit_image (img2img on the user's most recently attached image). The
@@ -34,6 +34,7 @@ from pydantic import BaseModel, Field
 # falls back to emitting a markdown data-URI message.
 try:
     from fastapi import UploadFile
+    from open_webui.models.files import Files
     from open_webui.models.users import Users
     from open_webui.routers.files import upload_file_handler
 
@@ -338,20 +339,76 @@ def _build_img2img(positive: str, negative: str, settings: dict,
     }
 
 
+def _file_dict_is_image(f: dict) -> bool:
+    ftype = (f.get("type") or "").lower()
+    fname = (f.get("name") or f.get("filename") or "").lower()
+    return "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
+
+
+def _read_file_dict(f: dict) -> Optional[bytes]:
+    """
+    Try to read raw bytes for one file dict. Path keys first (covers local
+    uploads), then Open WebUI's Files model lookup by id (covers assistant-
+    emitted images that only have an id + relative URL). Returns None if
+    no method worked.
+    """
+    for path_key in ("path", "filepath", "file_path"):
+        path = f.get(path_key)
+        if path:
+            try:
+                with open(path, "rb") as fh:
+                    return fh.read()
+            except OSError:
+                pass
+
+    fid = f.get("id")
+    if _OPENWEBUI_RUNTIME and fid:
+        try:
+            file_model = Files.get_file_by_id(fid)
+            if file_model is not None:
+                # FileModel may expose path directly or under .meta
+                path = getattr(file_model, "path", None)
+                if not path:
+                    meta = getattr(file_model, "meta", None) or {}
+                    if isinstance(meta, dict):
+                        path = meta.get("path")
+                    else:
+                        path = getattr(meta, "path", None)
+                if path:
+                    try:
+                        with open(path, "rb") as fh:
+                            return fh.read()
+                    except OSError:
+                        pass
+        except Exception:
+            pass
+
+    return None
+
+
 async def _extract_attached_image(
     files: Optional[list],
     messages: Optional[list],
     session: aiohttp.ClientSession,
 ) -> Optional[bytes]:
     """
-    Find the most recent image the user attached to the chat. Tries three
-    sources in order: (1) base64 data URIs in `image_url` content blocks
-    of the recent messages (works for vision-capable models), (2) a local
-    filesystem path on the file dict (open-webui stores uploads under
-    /app/backend/data/uploads/), (3) the file's url field, fetched over
-    HTTP. Returns raw image bytes, or None if nothing matched.
+    Find the most recent image in the chat — including images previously
+    emitted by this tool itself. Search order (most recent first):
+
+      1. Inline base64 data URIs in `image_url` content blocks of recent
+         messages (vision-model uploads, paste-from-clipboard).
+      2. Files attached to messages in the conversation, scanned in
+         REVERSE so the newest image wins. This covers two cases:
+           a. Files the user just attached (current user message).
+           b. Files the assistant emitted via prior `generate_image` /
+              `edit_image` calls (attached to assistant messages by the
+              `files` event in _push_image_to_chat).
+      3. The __files__ tool param as a final fallback (some Open WebUI
+         versions pass user uploads here instead of on the message).
+      4. Best-effort URL fetch on any leftover file dict (likely fails
+         on auth-protected endpoints — last resort).
     """
-    # Messages: standard OpenAI image_url content blocks.
+    # 1. Inline data URIs on recent messages.
     for msg in reversed(messages or []):
         content = msg.get("content") if isinstance(msg, dict) else None
         if isinstance(content, list):
@@ -365,27 +422,38 @@ async def _extract_attached_image(
                     except Exception:
                         pass
 
-    # Files: try local path, then URL.
+    # 2. Files on messages, newest first.
+    for msg in reversed(messages or []):
+        if not isinstance(msg, dict):
+            continue
+        msg_files = msg.get("files")
+        if not isinstance(msg_files, list):
+            continue
+        for f in msg_files:
+            if not isinstance(f, dict) or not _file_dict_is_image(f):
+                continue
+            data = _read_file_dict(f)
+            if data is not None:
+                return data
+
+    # 3. __files__ param (current user upload, sometimes only here).
     for f in files or []:
-        if not isinstance(f, dict):
-            continue
-        ftype = (f.get("type") or "").lower()
-        fname = (f.get("name") or f.get("filename") or "").lower()
-        is_image = "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
-        if not is_image:
+        if not isinstance(f, dict) or not _file_dict_is_image(f):
             continue
+        data = _read_file_dict(f)
+        if data is not None:
+            return data
 
-        for path_key in ("path", "filepath", "file_path"):
-            path = f.get(path_key)
-            if path:
-                try:
-                    with open(path, "rb") as fh:
-                        return fh.read()
-                except OSError:
-                    pass
-
-        url = f.get("url")
-        if url:
+    # 4. Last-resort URL fetch (no auth — only works for public endpoints).
+    for source in [files or []] + [
+        (msg.get("files") or []) for msg in reversed(messages or []) if isinstance(msg, dict)
+    ]:
+        for f in source:
+            if not isinstance(f, dict) or not _file_dict_is_image(f):
+                continue
+            url = f.get("url")
+            if not url:
+                continue
             full = url if url.startswith("http") else f"http://localhost:8080{url}"
             try:
                 async with session.get(full) as resp: