fix: send images to Mistral as base64 vision chunks instead of placeholders
The text provider now includes reference_images alongside inputs when building prompts. Image files are sent as base64 data URLs via ImageURLChunk for actual multimodal vision support, replacing the previous [Attached image: ...] placeholder text.
This commit is contained in:
parent
d565329e16
commit
6a9d7efd5d
2 changed files with 102 additions and 23 deletions
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
Mock-heavy tests produce many Any-typed expressions from MagicMock.
|
||||
"""
|
||||
# pyright: reportAny=false
|
||||
# pyright: reportAny=false, reportUnknownMemberType=false
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -312,8 +312,10 @@ class TestTextProvider:
|
|||
|
||||
call_args = mock_client.chat.complete_async.call_args
|
||||
messages = call_args.kwargs["messages"]
|
||||
prompt_text = messages[0].content
|
||||
assert "[Attached image: photo.png]" in prompt_text
|
||||
chunks = messages[0].content
|
||||
assert isinstance(chunks, list)
|
||||
assert chunks[0].text == "Describe this image"
|
||||
assert chunks[1].image_url.url.startswith("data:image/png;base64,")
|
||||
|
||||
async def test_text_no_choices_raises(self, project_dir: Path) -> None:
|
||||
target_config = TargetConfig(prompt="x")
|
||||
|
|
@ -374,9 +376,38 @@ class TestTextProvider:
|
|||
)
|
||||
|
||||
call_args = mock_client.chat.complete_async.call_args
|
||||
prompt_text = call_args.kwargs["messages"][0].content
|
||||
assert "--- Contents of a.txt ---" in prompt_text
|
||||
assert "content A" in prompt_text
|
||||
assert "--- Contents of b.txt ---" in prompt_text
|
||||
assert "content B" in prompt_text
|
||||
assert "[Attached image: c.png]" in prompt_text
|
||||
chunks = call_args.kwargs["messages"][0].content
|
||||
assert isinstance(chunks, list)
|
||||
# TextChunk for prompt, TextChunk for a.txt, TextChunk for b.txt,
|
||||
# ImageURLChunk for c.png
|
||||
assert chunks[0].text == "Combine all"
|
||||
assert "content A" in chunks[1].text
|
||||
assert "content B" in chunks[2].text
|
||||
assert chunks[3].image_url.url.startswith("data:image/png;base64,")
|
||||
|
||||
async def test_text_with_reference_images(self, project_dir: Path) -> None:
|
||||
_ = (project_dir / "ref.png").write_bytes(b"\x89PNG")
|
||||
|
||||
target_config = TargetConfig(
|
||||
prompt="Describe the style", reference_images=["ref.png"]
|
||||
)
|
||||
response = _make_text_response("A stylized image")
|
||||
|
||||
with patch("bulkgen.providers.text.Mistral") as mock_cls:
|
||||
mock_client = _make_mistral_mock(response)
|
||||
mock_cls.return_value = mock_client
|
||||
|
||||
provider = TextProvider(api_key="test-key")
|
||||
await provider.generate(
|
||||
target_name="desc.txt",
|
||||
target_config=target_config,
|
||||
resolved_prompt="Describe the style",
|
||||
resolved_model="mistral-large-latest",
|
||||
project_dir=project_dir,
|
||||
)
|
||||
|
||||
call_args = mock_client.chat.complete_async.call_args
|
||||
chunks = call_args.kwargs["messages"][0].content
|
||||
assert isinstance(chunks, list)
|
||||
assert chunks[0].text == "Describe the style"
|
||||
assert chunks[1].image_url.url.startswith("data:image/png;base64,")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue