- TargetState/BuildState pydantic models for .bulkgen.state.yaml - SHA-256 hashing for files and strings - is_target_dirty() checks output existence, input hashes, prompt, model, and extra params - record_target_state() persists hashes after successful builds - load_state()/save_state() for YAML serialization
137 lines
3.6 KiB
Python
137 lines
3.6 KiB
Python
"""Incremental build state tracking via ``.bulkgen.state.yaml``."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
from pydantic import BaseModel
|
|
|
|
STATE_FILENAME = ".bulkgen.state.yaml"
|
|
|
|
|
|
class TargetState(BaseModel):
|
|
"""Recorded state of a single target from its last successful build."""
|
|
|
|
input_hashes: dict[str, str]
|
|
prompt_hash: str
|
|
model: str
|
|
extra_hash: str = ""
|
|
|
|
|
|
class BuildState(BaseModel):
|
|
"""Full build state persisted to disk."""
|
|
|
|
targets: dict[str, TargetState] = {}
|
|
|
|
|
|
def hash_file(path: Path) -> str:
|
|
"""Compute the SHA-256 hex digest of a file's contents."""
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def hash_string(value: str) -> str:
|
|
"""Compute the SHA-256 hex digest of a string."""
|
|
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
|
|
|
|
|
def load_state(project_dir: Path) -> BuildState:
|
|
"""Load build state from disk, returning empty state if the file is missing."""
|
|
state_path = project_dir / STATE_FILENAME
|
|
if not state_path.exists():
|
|
return BuildState()
|
|
with state_path.open() as f:
|
|
raw = yaml.safe_load(f)
|
|
if raw is None:
|
|
return BuildState()
|
|
return BuildState.model_validate(raw)
|
|
|
|
|
|
def save_state(state: BuildState, project_dir: Path) -> None:
|
|
"""Persist build state to disk."""
|
|
state_path = project_dir / STATE_FILENAME
|
|
with state_path.open("w") as f:
|
|
yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)
|
|
|
|
|
|
def _extra_hash(params: dict[str, object]) -> str:
|
|
"""Hash extra target parameters (width, height, etc.) for change detection."""
|
|
if not params:
|
|
return ""
|
|
return hash_string(str(sorted(params.items())))
|
|
|
|
|
|
def is_target_dirty(
|
|
target_name: str,
|
|
*,
|
|
resolved_prompt: str,
|
|
model: str,
|
|
dep_files: list[Path],
|
|
extra_params: dict[str, object],
|
|
state: BuildState,
|
|
project_dir: Path,
|
|
) -> bool:
|
|
"""Determine whether a target needs rebuilding.
|
|
|
|
A target is dirty if:
|
|
- Its output file does not exist
|
|
- It has never been built (not recorded in state)
|
|
- Any dependency file hash has changed
|
|
- The resolved prompt text has changed
|
|
- The model has changed
|
|
- Extra parameters (width, height, etc.) have changed
|
|
"""
|
|
output_path = project_dir / target_name
|
|
if not output_path.exists():
|
|
return True
|
|
|
|
if target_name not in state.targets:
|
|
return True
|
|
|
|
prev = state.targets[target_name]
|
|
|
|
if prev.model != model:
|
|
return True
|
|
|
|
if prev.prompt_hash != hash_string(resolved_prompt):
|
|
return True
|
|
|
|
if prev.extra_hash != _extra_hash(extra_params):
|
|
return True
|
|
|
|
for dep_path in dep_files:
|
|
dep_key = str(dep_path.relative_to(project_dir))
|
|
current_hash = hash_file(dep_path)
|
|
if prev.input_hashes.get(dep_key) != current_hash:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def record_target_state(
|
|
target_name: str,
|
|
*,
|
|
resolved_prompt: str,
|
|
model: str,
|
|
dep_files: list[Path],
|
|
extra_params: dict[str, object],
|
|
state: BuildState,
|
|
project_dir: Path,
|
|
) -> None:
|
|
"""Record the state of a successfully built target."""
|
|
input_hashes: dict[str, str] = {}
|
|
for dep_path in dep_files:
|
|
dep_key = str(dep_path.relative_to(project_dir))
|
|
input_hashes[dep_key] = hash_file(dep_path)
|
|
|
|
state.targets[target_name] = TargetState(
|
|
input_hashes=input_hashes,
|
|
prompt_hash=hash_string(resolved_prompt),
|
|
model=model,
|
|
extra_hash=_extra_hash(extra_params),
|
|
)
|