hokusai/bulkgen/state.py
Konstantin Fickel 0ecf1f0f9e
All checks were successful
Continuous Integration / Build Package (push) Successful in 48s
Continuous Integration / Lint, Check & Test (push) Successful in 1m1s
refactor: use project-named state file and store prompt/params directly
- State filename now derives from config: cards.bulkgen.yaml produces
  .cards.bulkgen-state.yaml instead of .bulkgen.state.yaml
- Store resolved prompt text and extra params directly in state file
  instead of hashing them, making state files human-readable
- Only file input contents remain hashed (SHA-256)
- Thread project_name through builder and CLI
- Remove hash_string() and _extra_hash() helpers
- Update .gitignore pattern to .*.bulkgen-state.yaml
2026-02-15 13:56:12 +01:00

132 lines
3.6 KiB
Python

"""Incremental build state tracking via ``.<project>.bulkgen-state.yaml``."""
from __future__ import annotations
import hashlib
from pathlib import Path
import yaml
from pydantic import BaseModel
def state_filename(project_name: str) -> str:
"""Return the state filename for a given project name.
For a config file named ``cards.bulkgen.yaml`` the project name is
``cards`` and the state file is ``.cards.bulkgen-state.yaml``.
"""
return f".{project_name}.bulkgen-state.yaml"
class TargetState(BaseModel):
"""Recorded state of a single target from its last successful build."""
input_hashes: dict[str, str]
prompt: str
model: str
extra_params: dict[str, object] = {}
class BuildState(BaseModel):
"""Full build state persisted to disk."""
targets: dict[str, TargetState] = {}
def hash_file(path: Path) -> str:
"""Compute the SHA-256 hex digest of a file's contents."""
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def load_state(project_dir: Path, project_name: str) -> BuildState:
"""Load build state from disk, returning empty state if the file is missing."""
state_path = project_dir / state_filename(project_name)
if not state_path.exists():
return BuildState()
with state_path.open() as f:
raw = yaml.safe_load(f) # pyright: ignore[reportAny]
if raw is None:
return BuildState()
return BuildState.model_validate(raw)
def save_state(state: BuildState, project_dir: Path, project_name: str) -> None:
"""Persist build state to disk."""
state_path = project_dir / state_filename(project_name)
with state_path.open("w") as f:
yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)
def is_target_dirty(
target_name: str,
*,
resolved_prompt: str,
model: str,
dep_files: list[Path],
extra_params: dict[str, object],
state: BuildState,
project_dir: Path,
) -> bool:
"""Determine whether a target needs rebuilding.
A target is dirty if:
- Its output file does not exist
- It has never been built (not recorded in state)
- Any dependency file hash has changed
- The resolved prompt text has changed
- The model has changed
- Extra parameters (width, height, etc.) have changed
"""
output_path = project_dir / target_name
if not output_path.exists():
return True
if target_name not in state.targets:
return True
prev = state.targets[target_name]
if prev.model != model:
return True
if prev.prompt != resolved_prompt:
return True
if prev.extra_params != extra_params:
return True
for dep_path in dep_files:
dep_key = str(dep_path.relative_to(project_dir))
current_hash = hash_file(dep_path)
if prev.input_hashes.get(dep_key) != current_hash:
return True
return False
def record_target_state(
target_name: str,
*,
resolved_prompt: str,
model: str,
dep_files: list[Path],
extra_params: dict[str, object],
state: BuildState,
project_dir: Path,
) -> None:
"""Record the state of a successfully built target."""
input_hashes: dict[str, str] = {}
for dep_path in dep_files:
dep_key = str(dep_path.relative_to(project_dir))
input_hashes[dep_key] = hash_file(dep_path)
state.targets[target_name] = TargetState(
input_hashes=input_hashes,
prompt=resolved_prompt,
model=model,
extra_params=extra_params,
)