feat: add OpenAI as provider for text and image generation

- Add openai_text.py: text generation via OpenAI chat completions API
  (gpt-4o, gpt-4o-mini, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini)
- Add openai_image.py: image generation via OpenAI images API
  (gpt-image-1 with reference image support, dall-e-3, dall-e-2)
- Refactor builder provider dispatch from TargetType to model-name index
  to support multiple providers per target type
- Fix circular import between config.py and providers/__init__.py
  using TYPE_CHECKING guard
- Fix stale default model assertions in tests
- Add openai>=1.0.0 dependency
This commit is contained in:
Konstantin Fickel 2026-02-15 13:48:06 +01:00
parent d0dac5b1bf
commit 870023865d
Signed by: kfickel
GPG key ID: A793722F9933C1A5
9 changed files with 571 additions and 58 deletions

View file

@ -4,10 +4,13 @@ from __future__ import annotations
import abc
from pathlib import Path
from typing import TYPE_CHECKING
from bulkgen.config import TargetConfig
from bulkgen.providers.models import ModelInfo
if TYPE_CHECKING:
from bulkgen.config import TargetConfig
class Provider(abc.ABC):
"""Abstract base for generation providers."""

View file

@ -0,0 +1,194 @@
"""OpenAI image generation provider."""
from __future__ import annotations
import base64
from pathlib import Path
from typing import Literal, override
import httpx
from openai import AsyncOpenAI
from openai.types.images_response import ImagesResponse
from bulkgen.config import TargetConfig
from bulkgen.providers import Provider
from bulkgen.providers.models import Capability, ModelInfo
_SIZE = Literal[
"auto",
"1024x1024",
"1024x1536",
"1536x1024",
"1024x1792",
"1792x1024",
"256x256",
"512x512",
]
_VALID_SIZES: frozenset[str] = frozenset(
{
"auto",
"1024x1024",
"1024x1536",
"1536x1024",
"1024x1792",
"1792x1024",
"256x256",
"512x512",
}
)
def _build_size(width: int | None, height: int | None) -> _SIZE | None:
"""Convert width/height to an OpenAI size string, or *None* for the default."""
if width is None and height is None:
return None
w = width or 1024
h = height or 1024
size = f"{w}x{h}"
if size not in _VALID_SIZES:
msg = f"Unsupported OpenAI image size '{size}'. Valid sizes: {', '.join(sorted(_VALID_SIZES))}"
raise ValueError(msg)
return size # pyright: ignore[reportReturnType]
class OpenAIImageProvider(Provider):
"""Generates images via the OpenAI API."""
_api_key: str
def __init__(self, api_key: str) -> None:
self._api_key = api_key
@staticmethod
@override
def get_provided_models() -> list[ModelInfo]:
return [
ModelInfo(
name="gpt-image-1",
provider="OpenAI",
type="image",
capabilities=[
Capability.TEXT_TO_IMAGE,
Capability.REFERENCE_IMAGES,
],
),
ModelInfo(
name="dall-e-3",
provider="OpenAI",
type="image",
capabilities=[Capability.TEXT_TO_IMAGE],
),
ModelInfo(
name="dall-e-2",
provider="OpenAI",
type="image",
capabilities=[Capability.TEXT_TO_IMAGE],
),
]
@override
async def generate(
self,
target_name: str,
target_config: TargetConfig,
resolved_prompt: str,
resolved_model: ModelInfo,
project_dir: Path,
) -> None:
output_path = project_dir / target_name
size = _build_size(target_config.width, target_config.height)
async with AsyncOpenAI(api_key=self._api_key) as client:
if target_config.reference_images:
response = await _generate_edit(
client,
resolved_prompt,
resolved_model.name,
target_config.reference_images,
project_dir,
size,
)
else:
response = await _generate_new(
client,
resolved_prompt,
resolved_model.name,
size,
)
image_data = _extract_image_bytes(response, resolved_model.name)
_ = output_path.write_bytes(image_data)
async def _generate_new(
client: AsyncOpenAI,
prompt: str,
model: str,
size: _SIZE | None,
) -> ImagesResponse:
"""Generate a new image from a text prompt."""
if size is not None:
return await client.images.generate(
prompt=prompt,
model=model,
n=1,
response_format="b64_json",
size=size,
)
return await client.images.generate(
prompt=prompt,
model=model,
n=1,
response_format="b64_json",
)
async def _generate_edit(
client: AsyncOpenAI,
prompt: str,
model: str,
reference_images: list[str],
project_dir: Path,
size: _SIZE | None,
) -> ImagesResponse:
"""Generate an image using a reference image via the edits endpoint."""
ref_path = project_dir / reference_images[0]
image_bytes = ref_path.read_bytes()
if size is not None:
return await client.images.edit(
image=image_bytes,
prompt=prompt,
model=model,
n=1,
response_format="b64_json",
size=size, # pyright: ignore[reportArgumentType]
)
return await client.images.edit(
image=image_bytes,
prompt=prompt,
model=model,
n=1,
response_format="b64_json",
)
def _extract_image_bytes(response: ImagesResponse, model: str) -> bytes:
"""Extract image bytes from an OpenAI images response."""
if not response.data:
msg = f"OpenAI {model} returned no images"
raise RuntimeError(msg)
image = response.data[0]
if image.b64_json is not None:
return base64.b64decode(image.b64_json)
if image.url is not None:
resp = httpx.get(image.url)
_ = resp.raise_for_status()
return resp.content
msg = f"OpenAI {model} returned neither b64_json nor url"
raise RuntimeError(msg)

View file

@ -0,0 +1,169 @@
"""OpenAI text generation provider."""
from __future__ import annotations
import base64
import mimetypes
from pathlib import Path
from typing import override
from openai import AsyncOpenAI
from openai.types.chat import (
ChatCompletionContentPartImageParam,
ChatCompletionContentPartParam,
ChatCompletionContentPartTextParam,
ChatCompletionUserMessageParam,
)
from bulkgen.config import IMAGE_EXTENSIONS, TargetConfig
from bulkgen.providers import Provider
from bulkgen.providers.models import Capability, ModelInfo
def _image_to_data_url(path: Path) -> str:
"""Read an image file and return a ``data:`` URL with base64-encoded content."""
mime = mimetypes.guess_type(path.name)[0] or "image/png"
b64 = base64.b64encode(path.read_bytes()).decode("ascii")
return f"data:{mime};base64,{b64}"
class OpenAITextProvider(Provider):
"""Generates text via the OpenAI API."""
_api_key: str
def __init__(self, api_key: str) -> None:
self._api_key = api_key
@staticmethod
@override
def get_provided_models() -> list[ModelInfo]:
return [
ModelInfo(
name="gpt-4o",
provider="OpenAI",
type="text",
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
),
ModelInfo(
name="gpt-4o-mini",
provider="OpenAI",
type="text",
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
),
ModelInfo(
name="gpt-4.1",
provider="OpenAI",
type="text",
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
),
ModelInfo(
name="gpt-4.1-mini",
provider="OpenAI",
type="text",
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
),
ModelInfo(
name="gpt-4.1-nano",
provider="OpenAI",
type="text",
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
),
ModelInfo(
name="o3-mini",
provider="OpenAI",
type="text",
capabilities=[Capability.TEXT_GENERATION],
),
]
@override
async def generate(
self,
target_name: str,
target_config: TargetConfig,
resolved_prompt: str,
resolved_model: ModelInfo,
project_dir: Path,
) -> None:
output_path = project_dir / target_name
all_input_names = list(target_config.inputs) + list(
target_config.reference_images
)
has_images = any(
(project_dir / name).suffix.lower() in IMAGE_EXTENSIONS
for name in all_input_names
)
if has_images:
message = _build_multimodal_message(
resolved_prompt, all_input_names, project_dir
)
else:
message = _build_text_message(resolved_prompt, all_input_names, project_dir)
async with AsyncOpenAI(api_key=self._api_key) as client:
response = await client.chat.completions.create(
model=resolved_model.name,
messages=[message],
)
if not response.choices:
msg = f"OpenAI API returned no choices for target '{target_name}'"
raise RuntimeError(msg)
content = response.choices[0].message.content
if content is None:
msg = f"OpenAI API returned empty content for target '{target_name}'"
raise RuntimeError(msg)
_ = output_path.write_text(content)
def _build_text_message(
prompt: str,
input_names: list[str],
project_dir: Path,
) -> ChatCompletionUserMessageParam:
"""Build a plain-text message (no images)."""
parts: list[str] = [prompt]
for name in input_names:
file_content = (project_dir / name).read_text()
parts.append(f"\n--- Contents of {name} ---\n{file_content}")
return {"role": "user", "content": "\n".join(parts)}
def _build_multimodal_message(
prompt: str,
input_names: list[str],
project_dir: Path,
) -> ChatCompletionUserMessageParam:
"""Build a multimodal message with text and image parts."""
parts: list[ChatCompletionContentPartParam] = [
ChatCompletionContentPartTextParam(type="text", text=prompt),
]
for name in input_names:
input_path = project_dir / name
suffix = input_path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
data_url = _image_to_data_url(input_path)
parts.append(
ChatCompletionContentPartImageParam(
type="image_url",
image_url={"url": data_url},
)
)
else:
file_content = input_path.read_text()
parts.append(
ChatCompletionContentPartTextParam(
type="text",
text=f"\n--- Contents of {name} ---\n{file_content}",
)
)
return {"role": "user", "content": parts}

View file

@ -9,8 +9,12 @@ def get_all_models() -> list[ModelInfo]:
"""Return the merged list of models from all providers."""
from bulkgen.providers.blackforest import BlackForestProvider
from bulkgen.providers.mistral import MistralProvider
from bulkgen.providers.openai_image import OpenAIImageProvider
from bulkgen.providers.openai_text import OpenAITextProvider
return (
MistralProvider.get_provided_models()
+ BlackForestProvider.get_provided_models()
+ OpenAITextProvider.get_provided_models()
+ OpenAIImageProvider.get_provided_models()
)