Text: gpt-5, gpt-5-mini, gpt-5-nano (all with vision), o3, o4-mini (with vision), o3-pro (text only) Image: gpt-image-1.5, gpt-image-1-mini (both with reference images)
208 lines
6.5 KiB
Python
208 lines
6.5 KiB
Python
"""OpenAI text generation provider."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import mimetypes
|
|
from pathlib import Path
|
|
from typing import override
|
|
|
|
from openai import AsyncOpenAI
|
|
from openai.types.chat import (
|
|
ChatCompletionContentPartImageParam,
|
|
ChatCompletionContentPartParam,
|
|
ChatCompletionContentPartTextParam,
|
|
ChatCompletionUserMessageParam,
|
|
)
|
|
|
|
from bulkgen.config import IMAGE_EXTENSIONS, TargetConfig
|
|
from bulkgen.providers import Provider
|
|
from bulkgen.providers.models import Capability, ModelInfo
|
|
|
|
|
|
def _image_to_data_url(path: Path) -> str:
|
|
"""Read an image file and return a ``data:`` URL with base64-encoded content."""
|
|
mime = mimetypes.guess_type(path.name)[0] or "image/png"
|
|
b64 = base64.b64encode(path.read_bytes()).decode("ascii")
|
|
return f"data:{mime};base64,{b64}"
|
|
|
|
|
|
class OpenAITextProvider(Provider):
|
|
"""Generates text via the OpenAI API."""
|
|
|
|
_api_key: str
|
|
|
|
def __init__(self, api_key: str) -> None:
|
|
self._api_key = api_key
|
|
|
|
@staticmethod
|
|
@override
|
|
def get_provided_models() -> list[ModelInfo]:
|
|
return [
|
|
# GPT-5 family
|
|
ModelInfo(
|
|
name="gpt-5",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="gpt-5-mini",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="gpt-5-nano",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
# Reasoning models
|
|
ModelInfo(
|
|
name="o3",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="o4-mini",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="o3-pro",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION],
|
|
),
|
|
# GPT-4 family
|
|
ModelInfo(
|
|
name="gpt-4o",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="gpt-4o-mini",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="gpt-4.1",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="gpt-4.1-mini",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="gpt-4.1-nano",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION, Capability.VISION],
|
|
),
|
|
ModelInfo(
|
|
name="o3-mini",
|
|
provider="OpenAI",
|
|
type="text",
|
|
capabilities=[Capability.TEXT_GENERATION],
|
|
),
|
|
]
|
|
|
|
@override
|
|
async def generate(
|
|
self,
|
|
target_name: str,
|
|
target_config: TargetConfig,
|
|
resolved_prompt: str,
|
|
resolved_model: ModelInfo,
|
|
project_dir: Path,
|
|
) -> None:
|
|
output_path = project_dir / target_name
|
|
|
|
all_input_names = list(target_config.inputs) + list(
|
|
target_config.reference_images
|
|
)
|
|
|
|
has_images = any(
|
|
(project_dir / name).suffix.lower() in IMAGE_EXTENSIONS
|
|
for name in all_input_names
|
|
)
|
|
|
|
if has_images:
|
|
message = _build_multimodal_message(
|
|
resolved_prompt, all_input_names, project_dir
|
|
)
|
|
else:
|
|
message = _build_text_message(resolved_prompt, all_input_names, project_dir)
|
|
|
|
async with AsyncOpenAI(api_key=self._api_key) as client:
|
|
response = await client.chat.completions.create(
|
|
model=resolved_model.name,
|
|
messages=[message],
|
|
)
|
|
|
|
if not response.choices:
|
|
msg = f"OpenAI API returned no choices for target '{target_name}'"
|
|
raise RuntimeError(msg)
|
|
|
|
content = response.choices[0].message.content
|
|
if content is None:
|
|
msg = f"OpenAI API returned empty content for target '{target_name}'"
|
|
raise RuntimeError(msg)
|
|
|
|
_ = output_path.write_text(content)
|
|
|
|
|
|
def _build_text_message(
|
|
prompt: str,
|
|
input_names: list[str],
|
|
project_dir: Path,
|
|
) -> ChatCompletionUserMessageParam:
|
|
"""Build a plain-text message (no images)."""
|
|
parts: list[str] = [prompt]
|
|
for name in input_names:
|
|
file_content = (project_dir / name).read_text()
|
|
parts.append(f"\n--- Contents of {name} ---\n{file_content}")
|
|
return {"role": "user", "content": "\n".join(parts)}
|
|
|
|
|
|
def _build_multimodal_message(
|
|
prompt: str,
|
|
input_names: list[str],
|
|
project_dir: Path,
|
|
) -> ChatCompletionUserMessageParam:
|
|
"""Build a multimodal message with text and image parts."""
|
|
parts: list[ChatCompletionContentPartParam] = [
|
|
ChatCompletionContentPartTextParam(type="text", text=prompt),
|
|
]
|
|
|
|
for name in input_names:
|
|
input_path = project_dir / name
|
|
suffix = input_path.suffix.lower()
|
|
|
|
if suffix in IMAGE_EXTENSIONS:
|
|
data_url = _image_to_data_url(input_path)
|
|
parts.append(
|
|
ChatCompletionContentPartImageParam(
|
|
type="image_url",
|
|
image_url={"url": data_url},
|
|
)
|
|
)
|
|
else:
|
|
file_content = input_path.read_text()
|
|
parts.append(
|
|
ChatCompletionContentPartTextParam(
|
|
type="text",
|
|
text=f"\n--- Contents of {name} ---\n{file_content}",
|
|
)
|
|
)
|
|
|
|
return {"role": "user", "content": parts}
|