fix: wrap image bytes in BytesIO for gpt-image edit endpoint

The OpenAI SDK's legacy multipart path only accepts dall-e-2 when
raw bytes are passed. Wrapping in io.BytesIO with a name attribute
routes through the newer path that supports gpt-image-* models.

Also removes output_format from the edit call as that endpoint
does not support it.
This commit is contained in:
Konstantin Fickel 2026-03-05 21:21:26 +01:00
parent 770f408dad
commit d90db2933e
Signed by: kfickel
GPG key ID: A793722F9933C1A5
2 changed files with 31 additions and 14 deletions

View file

@ -3,6 +3,7 @@
from __future__ import annotations from __future__ import annotations
import base64 import base64
import io
from pathlib import Path from pathlib import Path
from typing import Literal, override from typing import Literal, override
@ -128,7 +129,6 @@ class OpenAIImageProvider(Provider):
target_config.reference_images, target_config.reference_images,
project_dir, project_dir,
size, size,
output_format,
) )
else: else:
response = await _generate_new( response = await _generate_new(
@ -183,17 +183,30 @@ async def _generate_edit(
reference_images: list[str], reference_images: list[str],
project_dir: Path, project_dir: Path,
size: _SIZE | None, size: _SIZE | None,
output_format: str | None = None,
) -> ImagesResponse: ) -> ImagesResponse:
"""Generate an image using reference images via the edits endpoint. """Generate an image using reference images via the edits endpoint.
gpt-image-* models accept up to 16 images and return b64 by default gpt-image-* models accept up to 16 images and return b64 by default
(they reject ``response_format``). DALL-E 2 accepts only one image. (they reject ``response_format`` and ``output_format``).
DALL-E 2 accepts only one image.
""" """
images = [(project_dir / name).read_bytes() for name in reference_images] raw_images = [(project_dir / name).read_bytes() for name in reference_images]
image: bytes | list[bytes] = images[0] if len(images) == 1 else images
if model.startswith("gpt-image-"): if model.startswith("gpt-image-"):
# gpt-image-* models require file-like objects with a name attribute;
# raw bytes trigger the legacy multipart path that only accepts dall-e-2.
def _to_named_buf(data: bytes, name: str) -> io.BytesIO:
buf = io.BytesIO(data)
buf.name = name
return buf
file_images = [
_to_named_buf(data, name)
for data, name in zip(raw_images, reference_images, strict=True)
]
image: io.BytesIO | list[io.BytesIO] = (
file_images[0] if len(file_images) == 1 else file_images
)
kwargs: dict[str, object] = { kwargs: dict[str, object] = {
"image": image, "image": image,
"prompt": prompt, "prompt": prompt,
@ -202,12 +215,13 @@ async def _generate_edit(
} }
if size is not None: if size is not None:
kwargs["size"] = size kwargs["size"] = size
if output_format is not None:
kwargs["output_format"] = output_format
return await client.images.edit(**kwargs) # pyright: ignore[reportCallIssue,reportArgumentType,reportUnknownVariableType] return await client.images.edit(**kwargs) # pyright: ignore[reportCallIssue,reportArgumentType,reportUnknownVariableType]
dalle_image: bytes | list[bytes] = (
raw_images[0] if len(raw_images) == 1 else raw_images
)
kwargs = { kwargs = {
"image": image, "image": dalle_image,
"prompt": prompt, "prompt": prompt,
"model": model, "model": model,
"n": 1, "n": 1,

View file

@ -7,6 +7,7 @@ Mock-heavy tests produce many Any-typed expressions from MagicMock.
from __future__ import annotations from __future__ import annotations
import base64 import base64
import io
from pathlib import Path from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch from unittest.mock import AsyncMock, MagicMock, patch
@ -480,8 +481,10 @@ class TestOpenAIImageProvider:
) )
call_args = mock_client.images.edit.call_args call_args = mock_client.images.edit.call_args
# Single reference image should be passed as raw bytes # gpt-image-* models pass a BytesIO with a name attribute
assert call_args.kwargs["image"] == b"reference data" img_arg = call_args.kwargs["image"]
assert img_arg.read() == b"reference data"
assert hasattr(img_arg, "name")
output = project_dir / "out.png" output = project_dir / "out.png"
assert output.exists() assert output.exists()
@ -514,9 +517,9 @@ class TestOpenAIImageProvider:
) )
call_args = mock_client.images.edit.call_args call_args = mock_client.images.edit.call_args
# Multiple reference images should be passed as a list of bytes # gpt-image-* models pass a list of BytesIO with name attributes
image_arg: list[bytes] = call_args.kwargs["image"] image_arg: list[io.BytesIO] = call_args.kwargs["image"]
assert isinstance(image_arg, list) assert isinstance(image_arg, list)
assert len(image_arg) == 2 assert len(image_arg) == 2
assert image_arg[0] == b"ref1 data" assert image_arg[0].read() == b"ref1 data"
assert image_arg[1] == b"ref2 data" assert image_arg[1].read() == b"ref2 data"