smart_image_gen v0.7: edit_image finds previously-emitted images

Bug: after generate_image surfaced an image via the files event, the
next edit_image call returned 'No image found in the chat'. The image
was attached to the assistant's message, but _extract_attached_image
only scanned the user's __files__ param and image_url content blocks
on user messages — it never looked at messages.files for any role.

Fix: rewrite extraction to scan messages[].files in reverse for ALL
roles, so an assistant-emitted image from a prior tool call is found
the same way as a user-attached upload. Use Open WebUI's internal
Files.get_file_by_id when the file dict has an id, so we get raw
bytes from disk without going through the auth-protected
/api/v1/files/{id}/content endpoint. Old path-key and URL-fetch
paths kept as fallbacks.

Refactored shared helpers _file_dict_is_image and _read_file_dict
out of the loop to keep the search logic readable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-19 14:46:10 -05:00
parent d935e24624
commit f6f5690fcd

View File

@@ -1,7 +1,7 @@
"""
title: Smart Image Generator & Editor (ComfyUI)
author: ai-stack
version: 0.6.0
version: 0.7.0
description: Generate or edit images via ComfyUI with automatic SDXL
checkpoint routing. Two methods — generate_image (txt2img) and
edit_image (img2img on the user's most recently attached image). The
@@ -34,6 +34,7 @@ from pydantic import BaseModel, Field
# falls back to emitting a markdown data-URI message.
try:
from fastapi import UploadFile
from open_webui.models.files import Files
from open_webui.models.users import Users
from open_webui.routers.files import upload_file_handler
@@ -338,20 +339,76 @@ def _build_img2img(positive: str, negative: str, settings: dict,
}
def _file_dict_is_image(f: dict) -> bool:
ftype = (f.get("type") or "").lower()
fname = (f.get("name") or f.get("filename") or "").lower()
return "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
def _read_file_dict(f: dict) -> Optional[bytes]:
"""
Try to read raw bytes for one file dict. Path keys first (covers local
uploads), then Open WebUI's Files model lookup by id (covers assistant-
emitted images that only have an id + relative URL). Returns None if
no method worked.
"""
for path_key in ("path", "filepath", "file_path"):
path = f.get(path_key)
if path:
try:
with open(path, "rb") as fh:
return fh.read()
except OSError:
pass
fid = f.get("id")
if _OPENWEBUI_RUNTIME and fid:
try:
file_model = Files.get_file_by_id(fid)
if file_model is not None:
# FileModel may expose path directly or under .meta
path = getattr(file_model, "path", None)
if not path:
meta = getattr(file_model, "meta", None) or {}
if isinstance(meta, dict):
path = meta.get("path")
else:
path = getattr(meta, "path", None)
if path:
try:
with open(path, "rb") as fh:
return fh.read()
except OSError:
pass
except Exception:
pass
return None
async def _extract_attached_image(
files: Optional[list],
messages: Optional[list],
session: aiohttp.ClientSession,
) -> Optional[bytes]:
"""
Find the most recent image the user attached to the chat. Tries three
sources in order: (1) base64 data URIs in `image_url` content blocks
of the recent messages (works for vision-capable models), (2) a local
filesystem path on the file dict (open-webui stores uploads under
/app/backend/data/uploads/), (3) the file's url field, fetched over
HTTP. Returns raw image bytes, or None if nothing matched.
Find the most recent image in the chat — including images previously
emitted by this tool itself. Search order (most recent first):
1. Inline base64 data URIs in `image_url` content blocks of recent
messages (vision-model uploads, paste-from-clipboard).
2. Files attached to messages in the conversation, scanned in
REVERSE so the newest image wins. This covers two cases:
a. Files the user just attached (current user message).
b. Files the assistant emitted via prior `generate_image` /
`edit_image` calls (attached to assistant messages by the
`files` event in _push_image_to_chat).
3. The __files__ tool param as a final fallback (some Open WebUI
versions pass user uploads here instead of on the message).
4. Best-effort URL fetch on any leftover file dict (likely fails
on auth-protected endpoints — last resort).
"""
# Messages: standard OpenAI image_url content blocks.
# 1. Inline data URIs on recent messages.
for msg in reversed(messages or []):
content = msg.get("content") if isinstance(msg, dict) else None
if isinstance(content, list):
@@ -365,27 +422,38 @@ async def _extract_attached_image(
except Exception:
pass
# Files: try local path, then URL.
# 2. Files on messages, newest first.
for msg in reversed(messages or []):
if not isinstance(msg, dict):
continue
msg_files = msg.get("files")
if not isinstance(msg_files, list):
continue
for f in msg_files:
if not isinstance(f, dict) or not _file_dict_is_image(f):
continue
data = _read_file_dict(f)
if data is not None:
return data
# 3. __files__ param (current user upload, sometimes only here).
for f in files or []:
if not isinstance(f, dict):
continue
ftype = (f.get("type") or "").lower()
fname = (f.get("name") or f.get("filename") or "").lower()
is_image = "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
if not is_image:
if not isinstance(f, dict) or not _file_dict_is_image(f):
continue
data = _read_file_dict(f)
if data is not None:
return data
for path_key in ("path", "filepath", "file_path"):
path = f.get(path_key)
if path:
try:
with open(path, "rb") as fh:
return fh.read()
except OSError:
pass
url = f.get("url")
if url:
# 4. Last-resort URL fetch (no auth — only works for public endpoints).
for source in [files or []] + [
(msg.get("files") or []) for msg in reversed(messages or []) if isinstance(msg, dict)
]:
for f in source:
if not isinstance(f, dict) or not _file_dict_is_image(f):
continue
url = f.get("url")
if not url:
continue
full = url if url.startswith("http") else f"http://localhost:8080{url}"
try:
async with session.get(full) as resp: