hokusai/tests/test_state.py
Konstantin Fickel 4def49350e
All checks were successful
Continuous Integration / Build Package (push) Successful in 35s
Continuous Integration / Lint, Check & Test (push) Successful in 57s
chore: rename bulkgen to hokusai
2026-02-20 17:08:12 +01:00

312 lines
9.5 KiB
Python

"""Integration tests for hokusai.state."""
from __future__ import annotations
from pathlib import Path
import yaml
from hokusai.state import (
BuildState,
TargetState,
hash_file,
is_target_dirty,
load_state,
record_target_state,
save_state,
state_filename,
)
_PROJECT = "test"
class TestHashFunctions:
"""Test hashing helpers."""
def test_hash_file_deterministic(self, project_dir: Path) -> None:
f = project_dir / "data.txt"
_ = f.write_text("hello world")
assert hash_file(f) == hash_file(f)
def test_hash_file_changes_with_content(self, project_dir: Path) -> None:
f = project_dir / "data.txt"
_ = f.write_text("version 1")
h1 = hash_file(f)
_ = f.write_text("version 2")
h2 = hash_file(f)
assert h1 != h2
class TestStateFilename:
"""Test state filename derivation."""
def test_state_filename(self) -> None:
assert state_filename("cards") == ".cards.hokusai-state.yaml"
def test_state_filename_simple(self) -> None:
assert state_filename("project") == ".project.hokusai-state.yaml"
class TestStatePersistence:
"""Test save/load round-trip of build state."""
def test_load_missing_file_returns_empty(self, project_dir: Path) -> None:
state = load_state(project_dir, _PROJECT)
assert state.targets == {}
def test_save_and_load_round_trip(self, project_dir: Path) -> None:
state = BuildState(
targets={
"out.txt": TargetState(
input_hashes={"dep.txt": "abc123"},
prompt="Generate something",
model="mistral-large-latest",
extra_params={},
)
}
)
save_state(state, project_dir, _PROJECT)
loaded = load_state(project_dir, _PROJECT)
assert loaded.targets["out.txt"].model == "mistral-large-latest"
assert loaded.targets["out.txt"].input_hashes == {"dep.txt": "abc123"}
assert loaded.targets["out.txt"].prompt == "Generate something"
def test_load_empty_yaml(self, project_dir: Path) -> None:
_ = (project_dir / state_filename(_PROJECT)).write_text("")
state = load_state(project_dir, _PROJECT)
assert state.targets == {}
def test_save_overwrites_existing(self, project_dir: Path) -> None:
state1 = BuildState(
targets={"a.txt": TargetState(input_hashes={}, prompt="p1", model="m1")}
)
save_state(state1, project_dir, _PROJECT)
state2 = BuildState(
targets={"b.txt": TargetState(input_hashes={}, prompt="p2", model="m2")}
)
save_state(state2, project_dir, _PROJECT)
loaded = load_state(project_dir, _PROJECT)
assert "b.txt" in loaded.targets
assert "a.txt" not in loaded.targets
def test_state_file_is_valid_yaml(self, project_dir: Path) -> None:
state = BuildState(
targets={
"out.txt": TargetState(
input_hashes={"f.txt": "hash"},
prompt="do something",
model="m",
extra_params={"width": 512},
)
}
)
save_state(state, project_dir, _PROJECT)
raw: object = yaml.safe_load( # pyright: ignore[reportAny]
(project_dir / state_filename(_PROJECT)).read_text()
)
assert isinstance(raw, dict)
assert "targets" in raw
class TestIsDirty:
"""Test dirty-checking logic with real files."""
def _setup_target(
self, project_dir: Path, *, dep_content: str = "dep data"
) -> tuple[BuildState, list[Path]]:
"""Create a built target with one dependency and return (state, dep_files)."""
dep = project_dir / "dep.txt"
_ = dep.write_text(dep_content)
output = project_dir / "out.txt"
_ = output.write_text("generated output")
state = BuildState()
dep_files = [dep]
record_target_state(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
return state, dep_files
def test_clean_target_not_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
assert not is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
def test_missing_output_is_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
(project_dir / "out.txt").unlink()
assert is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
def test_changed_dep_is_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
_ = (project_dir / "dep.txt").write_text("MODIFIED content")
assert is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
def test_changed_prompt_is_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
assert is_target_dirty(
"out.txt",
resolved_prompt="DIFFERENT prompt",
model="model-v1",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
def test_changed_model_is_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
assert is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v2",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
def test_changed_extra_params_is_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
assert is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=dep_files,
extra_params={"width": 512},
state=state,
project_dir=project_dir,
)
def test_never_built_target_is_dirty(self, project_dir: Path) -> None:
_ = (project_dir / "out.txt").write_text("exists but never recorded")
assert is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=[],
extra_params={},
state=BuildState(),
project_dir=project_dir,
)
def test_new_dep_added_is_dirty(self, project_dir: Path) -> None:
state, dep_files = self._setup_target(project_dir)
new_dep = project_dir / "extra.txt"
_ = new_dep.write_text("extra dep")
dep_files.append(new_dep)
assert is_target_dirty(
"out.txt",
resolved_prompt="prompt",
model="model-v1",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
class TestRecordAndDirtyRoundTrip:
"""Test that recording state then checking produces consistent results."""
def test_record_then_check_not_dirty(self, project_dir: Path) -> None:
dep = project_dir / "input.txt"
_ = dep.write_text("data")
output = project_dir / "result.md"
_ = output.write_text("result")
state = BuildState()
dep_files = [dep]
record_target_state(
"result.md",
resolved_prompt="do the thing",
model="mistral-large-latest",
dep_files=dep_files,
extra_params={"width": 100},
state=state,
project_dir=project_dir,
)
assert not is_target_dirty(
"result.md",
resolved_prompt="do the thing",
model="mistral-large-latest",
dep_files=dep_files,
extra_params={"width": 100},
state=state,
project_dir=project_dir,
)
def test_state_survives_save_load_cycle(self, project_dir: Path) -> None:
dep = project_dir / "input.txt"
_ = dep.write_text("data")
output = project_dir / "result.md"
_ = output.write_text("result")
state = BuildState()
dep_files = [dep]
record_target_state(
"result.md",
resolved_prompt="do the thing",
model="mistral-large-latest",
dep_files=dep_files,
extra_params={},
state=state,
project_dir=project_dir,
)
save_state(state, project_dir, _PROJECT)
loaded_state = load_state(project_dir, _PROJECT)
assert not is_target_dirty(
"result.md",
resolved_prompt="do the thing",
model="mistral-large-latest",
dep_files=dep_files,
extra_params={},
state=loaded_state,
project_dir=project_dir,
)