fix: wrap image bytes in BytesIO for gpt-image edit endpoint
The OpenAI SDK's legacy multipart path only accepts dall-e-2 when raw bytes are passed. Wrapping in io.BytesIO with a name attribute routes through the newer path that supports gpt-image-* models. Also removes output_format from the edit call as that endpoint does not support it.
This commit is contained in:
parent
770f408dad
commit
d90db2933e
2 changed files with 31 additions and 14 deletions
|
|
@ -3,6 +3,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
from pathlib import Path
|
||||
from typing import Literal, override
|
||||
|
||||
|
|
@ -128,7 +129,6 @@ class OpenAIImageProvider(Provider):
|
|||
target_config.reference_images,
|
||||
project_dir,
|
||||
size,
|
||||
output_format,
|
||||
)
|
||||
else:
|
||||
response = await _generate_new(
|
||||
|
|
@ -183,17 +183,30 @@ async def _generate_edit(
|
|||
reference_images: list[str],
|
||||
project_dir: Path,
|
||||
size: _SIZE | None,
|
||||
output_format: str | None = None,
|
||||
) -> ImagesResponse:
|
||||
"""Generate an image using reference images via the edits endpoint.
|
||||
|
||||
gpt-image-* models accept up to 16 images and return b64 by default
|
||||
(they reject ``response_format``). DALL-E 2 accepts only one image.
|
||||
(they reject ``response_format`` and ``output_format``).
|
||||
DALL-E 2 accepts only one image.
|
||||
"""
|
||||
images = [(project_dir / name).read_bytes() for name in reference_images]
|
||||
image: bytes | list[bytes] = images[0] if len(images) == 1 else images
|
||||
raw_images = [(project_dir / name).read_bytes() for name in reference_images]
|
||||
|
||||
if model.startswith("gpt-image-"):
|
||||
# gpt-image-* models require file-like objects with a name attribute;
|
||||
# raw bytes trigger the legacy multipart path that only accepts dall-e-2.
|
||||
def _to_named_buf(data: bytes, name: str) -> io.BytesIO:
|
||||
buf = io.BytesIO(data)
|
||||
buf.name = name
|
||||
return buf
|
||||
|
||||
file_images = [
|
||||
_to_named_buf(data, name)
|
||||
for data, name in zip(raw_images, reference_images, strict=True)
|
||||
]
|
||||
image: io.BytesIO | list[io.BytesIO] = (
|
||||
file_images[0] if len(file_images) == 1 else file_images
|
||||
)
|
||||
kwargs: dict[str, object] = {
|
||||
"image": image,
|
||||
"prompt": prompt,
|
||||
|
|
@ -202,12 +215,13 @@ async def _generate_edit(
|
|||
}
|
||||
if size is not None:
|
||||
kwargs["size"] = size
|
||||
if output_format is not None:
|
||||
kwargs["output_format"] = output_format
|
||||
return await client.images.edit(**kwargs) # pyright: ignore[reportCallIssue,reportArgumentType,reportUnknownVariableType]
|
||||
|
||||
dalle_image: bytes | list[bytes] = (
|
||||
raw_images[0] if len(raw_images) == 1 else raw_images
|
||||
)
|
||||
kwargs = {
|
||||
"image": image,
|
||||
"image": dalle_image,
|
||||
"prompt": prompt,
|
||||
"model": model,
|
||||
"n": 1,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Mock-heavy tests produce many Any-typed expressions from MagicMock.
|
|||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
|
|
@ -480,8 +481,10 @@ class TestOpenAIImageProvider:
|
|||
)
|
||||
|
||||
call_args = mock_client.images.edit.call_args
|
||||
# Single reference image should be passed as raw bytes
|
||||
assert call_args.kwargs["image"] == b"reference data"
|
||||
# gpt-image-* models pass a BytesIO with a name attribute
|
||||
img_arg = call_args.kwargs["image"]
|
||||
assert img_arg.read() == b"reference data"
|
||||
assert hasattr(img_arg, "name")
|
||||
|
||||
output = project_dir / "out.png"
|
||||
assert output.exists()
|
||||
|
|
@ -514,9 +517,9 @@ class TestOpenAIImageProvider:
|
|||
)
|
||||
|
||||
call_args = mock_client.images.edit.call_args
|
||||
# Multiple reference images should be passed as a list of bytes
|
||||
image_arg: list[bytes] = call_args.kwargs["image"]
|
||||
# gpt-image-* models pass a list of BytesIO with name attributes
|
||||
image_arg: list[io.BytesIO] = call_args.kwargs["image"]
|
||||
assert isinstance(image_arg, list)
|
||||
assert len(image_arg) == 2
|
||||
assert image_arg[0] == b"ref1 data"
|
||||
assert image_arg[1] == b"ref2 data"
|
||||
assert image_arg[0].read() == b"ref1 data"
|
||||
assert image_arg[1].read() == b"ref2 data"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue