From ce2160bd6c5d4c30ed7cadd88b01da50924f705a Mon Sep 17 00:00:00 2001 From: Konstantin Fickel Date: Fri, 13 Feb 2026 20:08:16 +0100 Subject: [PATCH] feat: add incremental build state tracking - TargetState/BuildState pydantic models for .bulkgen.state.yaml - SHA-256 hashing for files and strings - is_target_dirty() checks output existence, input hashes, prompt, model, and extra params - record_target_state() persists hashes after successful builds - load_state()/save_state() for YAML serialization --- bulkgen/state.py | 137 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 bulkgen/state.py diff --git a/bulkgen/state.py b/bulkgen/state.py new file mode 100644 index 0000000..0a354b0 --- /dev/null +++ b/bulkgen/state.py @@ -0,0 +1,137 @@ +"""Incremental build state tracking via ``.bulkgen.state.yaml``.""" + +from __future__ import annotations + +import hashlib +from pathlib import Path + +import yaml +from pydantic import BaseModel + +STATE_FILENAME = ".bulkgen.state.yaml" + + +class TargetState(BaseModel): + """Recorded state of a single target from its last successful build.""" + + input_hashes: dict[str, str] + prompt_hash: str + model: str + extra_hash: str = "" + + +class BuildState(BaseModel): + """Full build state persisted to disk.""" + + targets: dict[str, TargetState] = {} + + +def hash_file(path: Path) -> str: + """Compute the SHA-256 hex digest of a file's contents.""" + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return h.hexdigest() + + +def hash_string(value: str) -> str: + """Compute the SHA-256 hex digest of a string.""" + return hashlib.sha256(value.encode("utf-8")).hexdigest() + + +def load_state(project_dir: Path) -> BuildState: + """Load build state from disk, returning empty state if the file is missing.""" + state_path = project_dir / STATE_FILENAME + if not state_path.exists(): + return BuildState() + with state_path.open() as f: + raw = yaml.safe_load(f) + if raw is None: + return BuildState() + return BuildState.model_validate(raw) + + +def save_state(state: BuildState, project_dir: Path) -> None: + """Persist build state to disk.""" + state_path = project_dir / STATE_FILENAME + with state_path.open("w") as f: + yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False) + + +def _extra_hash(params: dict[str, object]) -> str: + """Hash extra target parameters (width, height, etc.) for change detection.""" + if not params: + return "" + return hash_string(str(sorted(params.items()))) + + +def is_target_dirty( + target_name: str, + *, + resolved_prompt: str, + model: str, + dep_files: list[Path], + extra_params: dict[str, object], + state: BuildState, + project_dir: Path, +) -> bool: + """Determine whether a target needs rebuilding. + + A target is dirty if: + - Its output file does not exist + - It has never been built (not recorded in state) + - Any dependency file hash has changed + - The resolved prompt text has changed + - The model has changed + - Extra parameters (width, height, etc.) have changed + """ + output_path = project_dir / target_name + if not output_path.exists(): + return True + + if target_name not in state.targets: + return True + + prev = state.targets[target_name] + + if prev.model != model: + return True + + if prev.prompt_hash != hash_string(resolved_prompt): + return True + + if prev.extra_hash != _extra_hash(extra_params): + return True + + for dep_path in dep_files: + dep_key = str(dep_path.relative_to(project_dir)) + current_hash = hash_file(dep_path) + if prev.input_hashes.get(dep_key) != current_hash: + return True + + return False + + +def record_target_state( + target_name: str, + *, + resolved_prompt: str, + model: str, + dep_files: list[Path], + extra_params: dict[str, object], + state: BuildState, + project_dir: Path, +) -> None: + """Record the state of a successfully built target.""" + input_hashes: dict[str, str] = {} + for dep_path in dep_files: + dep_key = str(dep_path.relative_to(project_dir)) + input_hashes[dep_key] = hash_file(dep_path) + + state.targets[target_name] = TargetState( + input_hashes=input_hashes, + prompt_hash=hash_string(resolved_prompt), + model=model, + extra_hash=_extra_hash(extra_params), + )