"""OpenAI text generation provider.""" from __future__ import annotations import base64 import mimetypes from pathlib import Path from typing import override from openai import AsyncOpenAI from openai.types.chat import ( ChatCompletionContentPartImageParam, ChatCompletionContentPartParam, ChatCompletionContentPartTextParam, ChatCompletionUserMessageParam, ) from bulkgen.config import IMAGE_EXTENSIONS, TargetConfig from bulkgen.providers import Provider from bulkgen.providers.models import Capability, ModelInfo def _image_to_data_url(path: Path) -> str: """Read an image file and return a ``data:`` URL with base64-encoded content.""" mime = mimetypes.guess_type(path.name)[0] or "image/png" b64 = base64.b64encode(path.read_bytes()).decode("ascii") return f"data:{mime};base64,{b64}" class OpenAITextProvider(Provider): """Generates text via the OpenAI API.""" _api_key: str def __init__(self, api_key: str) -> None: self._api_key = api_key @staticmethod @override def get_provided_models() -> list[ModelInfo]: return [ # GPT-5 family ModelInfo( name="gpt-5", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="gpt-5-mini", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="gpt-5-nano", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), # Reasoning models ModelInfo( name="o3", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="o4-mini", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="o3-pro", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION], ), # GPT-4 family ModelInfo( name="gpt-4o", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="gpt-4o-mini", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="gpt-4.1", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="gpt-4.1-mini", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="gpt-4.1-nano", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION, Capability.VISION], ), ModelInfo( name="o3-mini", provider="OpenAI", type="text", capabilities=[Capability.TEXT_GENERATION], ), ] @override async def generate( self, target_name: str, target_config: TargetConfig, resolved_prompt: str, resolved_model: ModelInfo, project_dir: Path, ) -> None: output_path = project_dir / target_name all_input_names = list(target_config.inputs) + list( target_config.reference_images ) has_images = any( (project_dir / name).suffix.lower() in IMAGE_EXTENSIONS for name in all_input_names ) if has_images: message = _build_multimodal_message( resolved_prompt, all_input_names, project_dir ) else: message = _build_text_message(resolved_prompt, all_input_names, project_dir) async with AsyncOpenAI(api_key=self._api_key) as client: response = await client.chat.completions.create( model=resolved_model.name, messages=[message], ) if not response.choices: msg = f"OpenAI API returned no choices for target '{target_name}'" raise RuntimeError(msg) content = response.choices[0].message.content if content is None: msg = f"OpenAI API returned empty content for target '{target_name}'" raise RuntimeError(msg) _ = output_path.write_text(content) def _build_text_message( prompt: str, input_names: list[str], project_dir: Path, ) -> ChatCompletionUserMessageParam: """Build a plain-text message (no images).""" parts: list[str] = [prompt] for name in input_names: file_content = (project_dir / name).read_text() parts.append(f"\n--- Contents of {name} ---\n{file_content}") return {"role": "user", "content": "\n".join(parts)} def _build_multimodal_message( prompt: str, input_names: list[str], project_dir: Path, ) -> ChatCompletionUserMessageParam: """Build a multimodal message with text and image parts.""" parts: list[ChatCompletionContentPartParam] = [ ChatCompletionContentPartTextParam(type="text", text=prompt), ] for name in input_names: input_path = project_dir / name suffix = input_path.suffix.lower() if suffix in IMAGE_EXTENSIONS: data_url = _image_to_data_url(input_path) parts.append( ChatCompletionContentPartImageParam( type="image_url", image_url={"url": data_url}, ) ) else: file_content = input_path.read_text() parts.append( ChatCompletionContentPartTextParam( type="text", text=f"\n--- Contents of {name} ---\n{file_content}", ) ) return {"role": "user", "content": parts}