From d90db2933e25114dac0f1bf303a0c6e6002f03b9 Mon Sep 17 00:00:00 2001
From: Konstantin Fickel <mail@konstantinfickel.de>
Date: Thu, 5 Mar 2026 21:21:26 +0100
Subject: [PATCH] fix: wrap image bytes in BytesIO for gpt-image edit endpoint

The OpenAI SDK's legacy multipart path only accepts dall-e-2 when
raw bytes are passed. Wrapping in io.BytesIO with a name attribute
routes through the newer path that supports gpt-image-* models.

Also removes output_format from the edit call as that endpoint
does not support it.
---
 hokusai/providers/openai_image.py | 30 ++++++++++++++++++++++--------
 tests/test_providers.py           | 15 +++++++++------
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/hokusai/providers/openai_image.py b/hokusai/providers/openai_image.py
index 3cedb0b..2bbb2d9 100644
--- a/hokusai/providers/openai_image.py
+++ b/hokusai/providers/openai_image.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import base64
+import io
 from pathlib import Path
 from typing import Literal, override
 
@@ -128,7 +129,6 @@ class OpenAIImageProvider(Provider):
                     target_config.reference_images,
                     project_dir,
                     size,
-                    output_format,
                 )
             else:
                 response = await _generate_new(
@@ -183,17 +183,30 @@ async def _generate_edit(
     reference_images: list[str],
     project_dir: Path,
     size: _SIZE | None,
-    output_format: str | None = None,
 ) -> ImagesResponse:
     """Generate an image using reference images via the edits endpoint.
 
     gpt-image-* models accept up to 16 images and return b64 by default
-    (they reject ``response_format``).  DALL-E 2 accepts only one image.
+    (they reject ``response_format`` and ``output_format``).
+    DALL-E 2 accepts only one image.
     """
-    images = [(project_dir / name).read_bytes() for name in reference_images]
-    image: bytes | list[bytes] = images[0] if len(images) == 1 else images
+    raw_images = [(project_dir / name).read_bytes() for name in reference_images]
 
     if model.startswith("gpt-image-"):
+        # gpt-image-* models require file-like objects with a name attribute;
+        # raw bytes trigger the legacy multipart path that only accepts dall-e-2.
+        def _to_named_buf(data: bytes, name: str) -> io.BytesIO:
+            buf = io.BytesIO(data)
+            buf.name = name
+            return buf
+
+        file_images = [
+            _to_named_buf(data, name)
+            for data, name in zip(raw_images, reference_images, strict=True)
+        ]
+        image: io.BytesIO | list[io.BytesIO] = (
+            file_images[0] if len(file_images) == 1 else file_images
+        )
         kwargs: dict[str, object] = {
             "image": image,
             "prompt": prompt,
@@ -202,12 +215,13 @@ async def _generate_edit(
         }
         if size is not None:
             kwargs["size"] = size
-        if output_format is not None:
-            kwargs["output_format"] = output_format
         return await client.images.edit(**kwargs)  # pyright: ignore[reportCallIssue,reportArgumentType,reportUnknownVariableType]
 
+    dalle_image: bytes | list[bytes] = (
+        raw_images[0] if len(raw_images) == 1 else raw_images
+    )
     kwargs = {
-        "image": image,
+        "image": dalle_image,
         "prompt": prompt,
         "model": model,
         "n": 1,
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 1d63e01..0e0659f 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -7,6 +7,7 @@ Mock-heavy tests produce many Any-typed expressions from MagicMock.
 from __future__ import annotations
 
 import base64
+import io
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -480,8 +481,10 @@ class TestOpenAIImageProvider:
             )
 
             call_args = mock_client.images.edit.call_args
-            # Single reference image should be passed as raw bytes
-            assert call_args.kwargs["image"] == b"reference data"
+            # gpt-image-* models pass a BytesIO with a name attribute
+            img_arg = call_args.kwargs["image"]
+            assert img_arg.read() == b"reference data"
+            assert hasattr(img_arg, "name")
 
         output = project_dir / "out.png"
         assert output.exists()
@@ -514,9 +517,9 @@ class TestOpenAIImageProvider:
             )
 
             call_args = mock_client.images.edit.call_args
-            # Multiple reference images should be passed as a list of bytes
-            image_arg: list[bytes] = call_args.kwargs["image"]
+            # gpt-image-* models pass a list of BytesIO with name attributes
+            image_arg: list[io.BytesIO] = call_args.kwargs["image"]
             assert isinstance(image_arg, list)
             assert len(image_arg) == 2
-            assert image_arg[0] == b"ref1 data"
-            assert image_arg[1] == b"ref2 data"
+            assert image_arg[0].read() == b"ref1 data"
+            assert image_arg[1].read() == b"ref2 data"