feat: add OpenAI as provider for text and image generation

- Add openai_text.py: text generation via OpenAI chat completions API (gpt-4o, gpt-4o-mini, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini) - Add openai_image.py: image generation via OpenAI images API (gpt-image-1 with reference image support, dall-e-3, dall-e-2) - Refactor builder provider dispatch from TargetType to model-name index to support multiple providers per target type - Fix circular import between config.py and providers/__init__.py using TYPE_CHECKING guard - Fix stale default model assertions in tests - Add openai>=1.0.0 dependency
2026-02-15 13:48:06 +01:00 · 2026-02-15 13:48:06 +01:00 · 870023865d
commit 870023865d
parent d0dac5b1bf
9 changed files with 571 additions and 58 deletions
--- a/bulkgen/providers/openai_text.py
+++ b/bulkgen/providers/openai_text.py
@ -0,0 +1,169 @@
+"""OpenAI text generation provider."""
+
+from __future__ import annotations
+
+import base64
+import mimetypes
+from pathlib import Path
+from typing import override
+
+from openai import AsyncOpenAI
+from openai.types.chat import (
+    ChatCompletionContentPartImageParam,
+    ChatCompletionContentPartParam,
+    ChatCompletionContentPartTextParam,
+    ChatCompletionUserMessageParam,
+)
+
+from bulkgen.config import IMAGE_EXTENSIONS, TargetConfig
+from bulkgen.providers import Provider
+from bulkgen.providers.models import Capability, ModelInfo
+
+
+def _image_to_data_url(path: Path) -> str:
+    """Read an image file and return a ``data:`` URL with base64-encoded content."""
+    mime = mimetypes.guess_type(path.name)[0] or "image/png"
+    b64 = base64.b64encode(path.read_bytes()).decode("ascii")
+    return f"data:{mime};base64,{b64}"
+
+
+class OpenAITextProvider(Provider):
+    """Generates text via the OpenAI API."""
+
+    _api_key: str
+
+    def __init__(self, api_key: str) -> None:
+        self._api_key = api_key
+
+    @staticmethod
+    @override
+    def get_provided_models() -> list[ModelInfo]:
+        return [
+            ModelInfo(
+                name="gpt-4o",
+                provider="OpenAI",
+                type="text",
+                capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
+            ),
+            ModelInfo(
+                name="gpt-4o-mini",
+                provider="OpenAI",
+                type="text",
+                capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
+            ),
+            ModelInfo(
+                name="gpt-4.1",
+                provider="OpenAI",
+                type="text",
+                capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
+            ),
+            ModelInfo(
+                name="gpt-4.1-mini",
+                provider="OpenAI",
+                type="text",
+                capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
+            ),
+            ModelInfo(
+                name="gpt-4.1-nano",
+                provider="OpenAI",
+                type="text",
+                capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
+            ),
+            ModelInfo(
+                name="o3-mini",
+                provider="OpenAI",
+                type="text",
+                capabilities=[Capability.TEXT_GENERATION],
+            ),
+        ]
+
+    @override
+    async def generate(
+        self,
+        target_name: str,
+        target_config: TargetConfig,
+        resolved_prompt: str,
+        resolved_model: ModelInfo,
+        project_dir: Path,
+    ) -> None:
+        output_path = project_dir / target_name
+
+        all_input_names = list(target_config.inputs) + list(
+            target_config.reference_images
+        )
+
+        has_images = any(
+            (project_dir / name).suffix.lower() in IMAGE_EXTENSIONS
+            for name in all_input_names
+        )
+
+        if has_images:
+            message = _build_multimodal_message(
+                resolved_prompt, all_input_names, project_dir
+            )
+        else:
+            message = _build_text_message(resolved_prompt, all_input_names, project_dir)
+
+        async with AsyncOpenAI(api_key=self._api_key) as client:
+            response = await client.chat.completions.create(
+                model=resolved_model.name,
+                messages=[message],
+            )
+
+        if not response.choices:
+            msg = f"OpenAI API returned no choices for target '{target_name}'"
+            raise RuntimeError(msg)
+
+        content = response.choices[0].message.content
+        if content is None:
+            msg = f"OpenAI API returned empty content for target '{target_name}'"
+            raise RuntimeError(msg)
+
+        _ = output_path.write_text(content)
+
+
+def _build_text_message(
+    prompt: str,
+    input_names: list[str],
+    project_dir: Path,
+) -> ChatCompletionUserMessageParam:
+    """Build a plain-text message (no images)."""
+    parts: list[str] = [prompt]
+    for name in input_names:
+        file_content = (project_dir / name).read_text()
+        parts.append(f"\n--- Contents of {name} ---\n{file_content}")
+    return {"role": "user", "content": "\n".join(parts)}
+
+
+def _build_multimodal_message(
+    prompt: str,
+    input_names: list[str],
+    project_dir: Path,
+) -> ChatCompletionUserMessageParam:
+    """Build a multimodal message with text and image parts."""
+    parts: list[ChatCompletionContentPartParam] = [
+        ChatCompletionContentPartTextParam(type="text", text=prompt),
+    ]
+
+    for name in input_names:
+        input_path = project_dir / name
+        suffix = input_path.suffix.lower()
+
+        if suffix in IMAGE_EXTENSIONS:
+            data_url = _image_to_data_url(input_path)
+            parts.append(
+                ChatCompletionContentPartImageParam(
+                    type="image_url",
+                    image_url={"url": data_url},
+                )
+            )
+        else:
+            file_content = input_path.read_text()
+            parts.append(
+                ChatCompletionContentPartTextParam(
+                    type="text",
+                    text=f"\n--- Contents of {name} ---\n{file_content}",
+                )
+            )
+
+    return {"role": "user", "content": parts}