smart_image_gen v0.7: edit_image finds previously-emitted images
Bug: after generate_image surfaced an image via the files event, the
next edit_image call returned 'No image found in the chat'. The image
was attached to the assistant's message, but _extract_attached_image
only scanned the user's __files__ param and image_url content blocks
on user messages — it never looked at messages.files for any role.
Fix: rewrite extraction to scan messages[].files in reverse for ALL
roles, so an assistant-emitted image from a prior tool call is found
the same way as a user-attached upload. Use Open WebUI's internal
Files.get_file_by_id when the file dict has an id, so we get raw
bytes from disk without going through the auth-protected
/api/v1/files/{id}/content endpoint. Old path-key and URL-fetch
paths kept as fallbacks.
Refactored shared helpers _file_dict_is_image and _read_file_dict
out of the loop to keep the search logic readable.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
title: Smart Image Generator & Editor (ComfyUI)
|
||||
author: ai-stack
|
||||
version: 0.6.0
|
||||
version: 0.7.0
|
||||
description: Generate or edit images via ComfyUI with automatic SDXL
|
||||
checkpoint routing. Two methods — generate_image (txt2img) and
|
||||
edit_image (img2img on the user's most recently attached image). The
|
||||
@@ -34,6 +34,7 @@ from pydantic import BaseModel, Field
|
||||
# falls back to emitting a markdown data-URI message.
|
||||
try:
|
||||
from fastapi import UploadFile
|
||||
from open_webui.models.files import Files
|
||||
from open_webui.models.users import Users
|
||||
from open_webui.routers.files import upload_file_handler
|
||||
|
||||
@@ -338,20 +339,76 @@ def _build_img2img(positive: str, negative: str, settings: dict,
|
||||
}
|
||||
|
||||
|
||||
def _file_dict_is_image(f: dict) -> bool:
|
||||
ftype = (f.get("type") or "").lower()
|
||||
fname = (f.get("name") or f.get("filename") or "").lower()
|
||||
return "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
|
||||
|
||||
|
||||
def _read_file_dict(f: dict) -> Optional[bytes]:
|
||||
"""
|
||||
Try to read raw bytes for one file dict. Path keys first (covers local
|
||||
uploads), then Open WebUI's Files model lookup by id (covers assistant-
|
||||
emitted images that only have an id + relative URL). Returns None if
|
||||
no method worked.
|
||||
"""
|
||||
for path_key in ("path", "filepath", "file_path"):
|
||||
path = f.get(path_key)
|
||||
if path:
|
||||
try:
|
||||
with open(path, "rb") as fh:
|
||||
return fh.read()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
fid = f.get("id")
|
||||
if _OPENWEBUI_RUNTIME and fid:
|
||||
try:
|
||||
file_model = Files.get_file_by_id(fid)
|
||||
if file_model is not None:
|
||||
# FileModel may expose path directly or under .meta
|
||||
path = getattr(file_model, "path", None)
|
||||
if not path:
|
||||
meta = getattr(file_model, "meta", None) or {}
|
||||
if isinstance(meta, dict):
|
||||
path = meta.get("path")
|
||||
else:
|
||||
path = getattr(meta, "path", None)
|
||||
if path:
|
||||
try:
|
||||
with open(path, "rb") as fh:
|
||||
return fh.read()
|
||||
except OSError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def _extract_attached_image(
|
||||
files: Optional[list],
|
||||
messages: Optional[list],
|
||||
session: aiohttp.ClientSession,
|
||||
) -> Optional[bytes]:
|
||||
"""
|
||||
Find the most recent image the user attached to the chat. Tries three
|
||||
sources in order: (1) base64 data URIs in `image_url` content blocks
|
||||
of the recent messages (works for vision-capable models), (2) a local
|
||||
filesystem path on the file dict (open-webui stores uploads under
|
||||
/app/backend/data/uploads/), (3) the file's url field, fetched over
|
||||
HTTP. Returns raw image bytes, or None if nothing matched.
|
||||
Find the most recent image in the chat — including images previously
|
||||
emitted by this tool itself. Search order (most recent first):
|
||||
|
||||
1. Inline base64 data URIs in `image_url` content blocks of recent
|
||||
messages (vision-model uploads, paste-from-clipboard).
|
||||
2. Files attached to messages in the conversation, scanned in
|
||||
REVERSE so the newest image wins. This covers two cases:
|
||||
a. Files the user just attached (current user message).
|
||||
b. Files the assistant emitted via prior `generate_image` /
|
||||
`edit_image` calls (attached to assistant messages by the
|
||||
`files` event in _push_image_to_chat).
|
||||
3. The __files__ tool param as a final fallback (some Open WebUI
|
||||
versions pass user uploads here instead of on the message).
|
||||
4. Best-effort URL fetch on any leftover file dict (likely fails
|
||||
on auth-protected endpoints — last resort).
|
||||
"""
|
||||
# Messages: standard OpenAI image_url content blocks.
|
||||
# 1. Inline data URIs on recent messages.
|
||||
for msg in reversed(messages or []):
|
||||
content = msg.get("content") if isinstance(msg, dict) else None
|
||||
if isinstance(content, list):
|
||||
@@ -365,27 +422,38 @@ async def _extract_attached_image(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Files: try local path, then URL.
|
||||
# 2. Files on messages, newest first.
|
||||
for msg in reversed(messages or []):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg_files = msg.get("files")
|
||||
if not isinstance(msg_files, list):
|
||||
continue
|
||||
for f in msg_files:
|
||||
if not isinstance(f, dict) or not _file_dict_is_image(f):
|
||||
continue
|
||||
data = _read_file_dict(f)
|
||||
if data is not None:
|
||||
return data
|
||||
|
||||
# 3. __files__ param (current user upload, sometimes only here).
|
||||
for f in files or []:
|
||||
if not isinstance(f, dict):
|
||||
continue
|
||||
ftype = (f.get("type") or "").lower()
|
||||
fname = (f.get("name") or f.get("filename") or "").lower()
|
||||
is_image = "image" in ftype or fname.endswith((".png", ".jpg", ".jpeg", ".webp"))
|
||||
if not is_image:
|
||||
if not isinstance(f, dict) or not _file_dict_is_image(f):
|
||||
continue
|
||||
data = _read_file_dict(f)
|
||||
if data is not None:
|
||||
return data
|
||||
|
||||
for path_key in ("path", "filepath", "file_path"):
|
||||
path = f.get(path_key)
|
||||
if path:
|
||||
try:
|
||||
with open(path, "rb") as fh:
|
||||
return fh.read()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
url = f.get("url")
|
||||
if url:
|
||||
# 4. Last-resort URL fetch (no auth — only works for public endpoints).
|
||||
for source in [files or []] + [
|
||||
(msg.get("files") or []) for msg in reversed(messages or []) if isinstance(msg, dict)
|
||||
]:
|
||||
for f in source:
|
||||
if not isinstance(f, dict) or not _file_dict_is_image(f):
|
||||
continue
|
||||
url = f.get("url")
|
||||
if not url:
|
||||
continue
|
||||
full = url if url.startswith("http") else f"http://localhost:8080{url}"
|
||||
try:
|
||||
async with session.get(full) as resp:
|
||||
|
||||
Reference in New Issue
Block a user