hokusai/bulkgen/state.py
Konstantin Fickel 7ab25d49cb
refactor: switch to basedpyright, remove pydantic-settings
- Replace pyright with basedpyright in devenv.nix (custom hook)
- Add basedpyright to devenv packages
- Fix all basedpyright warnings: add DiGraph[str] type args, annotate
  class attributes, narrow SyncResponse, handle unused call results,
  suppress unavoidable Any from yaml.safe_load and untyped blackforest
- Replace pydantic-settings[yaml] with direct pyyaml dependency
- Update CLAUDE.md to reflect basedpyright and dependency changes
2026-02-13 20:25:28 +01:00

137 lines
3.7 KiB
Python

"""Incremental build state tracking via ``.bulkgen.state.yaml``."""
from __future__ import annotations
import hashlib
from pathlib import Path
import yaml
from pydantic import BaseModel
STATE_FILENAME = ".bulkgen.state.yaml"
class TargetState(BaseModel):
"""Recorded state of a single target from its last successful build."""
input_hashes: dict[str, str]
prompt_hash: str
model: str
extra_hash: str = ""
class BuildState(BaseModel):
"""Full build state persisted to disk."""
targets: dict[str, TargetState] = {}
def hash_file(path: Path) -> str:
"""Compute the SHA-256 hex digest of a file's contents."""
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def hash_string(value: str) -> str:
"""Compute the SHA-256 hex digest of a string."""
return hashlib.sha256(value.encode("utf-8")).hexdigest()
def load_state(project_dir: Path) -> BuildState:
"""Load build state from disk, returning empty state if the file is missing."""
state_path = project_dir / STATE_FILENAME
if not state_path.exists():
return BuildState()
with state_path.open() as f:
raw = yaml.safe_load(f) # pyright: ignore[reportAny]
if raw is None:
return BuildState()
return BuildState.model_validate(raw)
def save_state(state: BuildState, project_dir: Path) -> None:
"""Persist build state to disk."""
state_path = project_dir / STATE_FILENAME
with state_path.open("w") as f:
yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)
def _extra_hash(params: dict[str, object]) -> str:
"""Hash extra target parameters (width, height, etc.) for change detection."""
if not params:
return ""
return hash_string(str(sorted(params.items())))
def is_target_dirty(
target_name: str,
*,
resolved_prompt: str,
model: str,
dep_files: list[Path],
extra_params: dict[str, object],
state: BuildState,
project_dir: Path,
) -> bool:
"""Determine whether a target needs rebuilding.
A target is dirty if:
- Its output file does not exist
- It has never been built (not recorded in state)
- Any dependency file hash has changed
- The resolved prompt text has changed
- The model has changed
- Extra parameters (width, height, etc.) have changed
"""
output_path = project_dir / target_name
if not output_path.exists():
return True
if target_name not in state.targets:
return True
prev = state.targets[target_name]
if prev.model != model:
return True
if prev.prompt_hash != hash_string(resolved_prompt):
return True
if prev.extra_hash != _extra_hash(extra_params):
return True
for dep_path in dep_files:
dep_key = str(dep_path.relative_to(project_dir))
current_hash = hash_file(dep_path)
if prev.input_hashes.get(dep_key) != current_hash:
return True
return False
def record_target_state(
target_name: str,
*,
resolved_prompt: str,
model: str,
dep_files: list[Path],
extra_params: dict[str, object],
state: BuildState,
project_dir: Path,
) -> None:
"""Record the state of a successfully built target."""
input_hashes: dict[str, str] = {}
for dep_path in dep_files:
dep_key = str(dep_path.relative_to(project_dir))
input_hashes[dep_key] = hash_file(dep_path)
state.targets[target_name] = TargetState(
input_hashes=input_hashes,
prompt_hash=hash_string(resolved_prompt),
model=model,
extra_hash=_extra_hash(extra_params),
)