From d90db2933e25114dac0f1bf303a0c6e6002f03b9 Mon Sep 17 00:00:00 2001 From: Konstantin Fickel Date: Thu, 5 Mar 2026 21:21:26 +0100 Subject: [PATCH] fix: wrap image bytes in BytesIO for gpt-image edit endpoint The OpenAI SDK's legacy multipart path only accepts dall-e-2 when raw bytes are passed. Wrapping in io.BytesIO with a name attribute routes through the newer path that supports gpt-image-* models. Also removes output_format from the edit call as that endpoint does not support it. --- hokusai/providers/openai_image.py | 30 ++++++++++++++++++++++-------- tests/test_providers.py | 15 +++++++++------ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/hokusai/providers/openai_image.py b/hokusai/providers/openai_image.py index 3cedb0b..2bbb2d9 100644 --- a/hokusai/providers/openai_image.py +++ b/hokusai/providers/openai_image.py @@ -3,6 +3,7 @@ from __future__ import annotations import base64 +import io from pathlib import Path from typing import Literal, override @@ -128,7 +129,6 @@ class OpenAIImageProvider(Provider): target_config.reference_images, project_dir, size, - output_format, ) else: response = await _generate_new( @@ -183,17 +183,30 @@ async def _generate_edit( reference_images: list[str], project_dir: Path, size: _SIZE | None, - output_format: str | None = None, ) -> ImagesResponse: """Generate an image using reference images via the edits endpoint. gpt-image-* models accept up to 16 images and return b64 by default - (they reject ``response_format``). DALL-E 2 accepts only one image. + (they reject ``response_format`` and ``output_format``). + DALL-E 2 accepts only one image. """ - images = [(project_dir / name).read_bytes() for name in reference_images] - image: bytes | list[bytes] = images[0] if len(images) == 1 else images + raw_images = [(project_dir / name).read_bytes() for name in reference_images] if model.startswith("gpt-image-"): + # gpt-image-* models require file-like objects with a name attribute; + # raw bytes trigger the legacy multipart path that only accepts dall-e-2. + def _to_named_buf(data: bytes, name: str) -> io.BytesIO: + buf = io.BytesIO(data) + buf.name = name + return buf + + file_images = [ + _to_named_buf(data, name) + for data, name in zip(raw_images, reference_images, strict=True) + ] + image: io.BytesIO | list[io.BytesIO] = ( + file_images[0] if len(file_images) == 1 else file_images + ) kwargs: dict[str, object] = { "image": image, "prompt": prompt, @@ -202,12 +215,13 @@ async def _generate_edit( } if size is not None: kwargs["size"] = size - if output_format is not None: - kwargs["output_format"] = output_format return await client.images.edit(**kwargs) # pyright: ignore[reportCallIssue,reportArgumentType,reportUnknownVariableType] + dalle_image: bytes | list[bytes] = ( + raw_images[0] if len(raw_images) == 1 else raw_images + ) kwargs = { - "image": image, + "image": dalle_image, "prompt": prompt, "model": model, "n": 1, diff --git a/tests/test_providers.py b/tests/test_providers.py index 1d63e01..0e0659f 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -7,6 +7,7 @@ Mock-heavy tests produce many Any-typed expressions from MagicMock. from __future__ import annotations import base64 +import io from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch @@ -480,8 +481,10 @@ class TestOpenAIImageProvider: ) call_args = mock_client.images.edit.call_args - # Single reference image should be passed as raw bytes - assert call_args.kwargs["image"] == b"reference data" + # gpt-image-* models pass a BytesIO with a name attribute + img_arg = call_args.kwargs["image"] + assert img_arg.read() == b"reference data" + assert hasattr(img_arg, "name") output = project_dir / "out.png" assert output.exists() @@ -514,9 +517,9 @@ class TestOpenAIImageProvider: ) call_args = mock_client.images.edit.call_args - # Multiple reference images should be passed as a list of bytes - image_arg: list[bytes] = call_args.kwargs["image"] + # gpt-image-* models pass a list of BytesIO with name attributes + image_arg: list[io.BytesIO] = call_args.kwargs["image"] assert isinstance(image_arg, list) assert len(image_arg) == 2 - assert image_arg[0] == b"ref1 data" - assert image_arg[1] == b"ref2 data" + assert image_arg[0].read() == b"ref1 data" + assert image_arg[1].read() == b"ref2 data"