feat: add incremental build state tracking
- TargetState/BuildState pydantic models for .bulkgen.state.yaml - SHA-256 hashing for files and strings - is_target_dirty() checks output existence, input hashes, prompt, model, and extra params - record_target_state() persists hashes after successful builds - load_state()/save_state() for YAML serialization
This commit is contained in:
parent
bda2b8c8e7
commit
ce2160bd6c
1 changed files with 137 additions and 0 deletions
137
bulkgen/state.py
Normal file
137
bulkgen/state.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""Incremental build state tracking via ``.bulkgen.state.yaml``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
STATE_FILENAME = ".bulkgen.state.yaml"
|
||||
|
||||
|
||||
class TargetState(BaseModel):
|
||||
"""Recorded state of a single target from its last successful build."""
|
||||
|
||||
input_hashes: dict[str, str]
|
||||
prompt_hash: str
|
||||
model: str
|
||||
extra_hash: str = ""
|
||||
|
||||
|
||||
class BuildState(BaseModel):
|
||||
"""Full build state persisted to disk."""
|
||||
|
||||
targets: dict[str, TargetState] = {}
|
||||
|
||||
|
||||
def hash_file(path: Path) -> str:
|
||||
"""Compute the SHA-256 hex digest of a file's contents."""
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def hash_string(value: str) -> str:
|
||||
"""Compute the SHA-256 hex digest of a string."""
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def load_state(project_dir: Path) -> BuildState:
|
||||
"""Load build state from disk, returning empty state if the file is missing."""
|
||||
state_path = project_dir / STATE_FILENAME
|
||||
if not state_path.exists():
|
||||
return BuildState()
|
||||
with state_path.open() as f:
|
||||
raw = yaml.safe_load(f)
|
||||
if raw is None:
|
||||
return BuildState()
|
||||
return BuildState.model_validate(raw)
|
||||
|
||||
|
||||
def save_state(state: BuildState, project_dir: Path) -> None:
|
||||
"""Persist build state to disk."""
|
||||
state_path = project_dir / STATE_FILENAME
|
||||
with state_path.open("w") as f:
|
||||
yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
|
||||
def _extra_hash(params: dict[str, object]) -> str:
|
||||
"""Hash extra target parameters (width, height, etc.) for change detection."""
|
||||
if not params:
|
||||
return ""
|
||||
return hash_string(str(sorted(params.items())))
|
||||
|
||||
|
||||
def is_target_dirty(
|
||||
target_name: str,
|
||||
*,
|
||||
resolved_prompt: str,
|
||||
model: str,
|
||||
dep_files: list[Path],
|
||||
extra_params: dict[str, object],
|
||||
state: BuildState,
|
||||
project_dir: Path,
|
||||
) -> bool:
|
||||
"""Determine whether a target needs rebuilding.
|
||||
|
||||
A target is dirty if:
|
||||
- Its output file does not exist
|
||||
- It has never been built (not recorded in state)
|
||||
- Any dependency file hash has changed
|
||||
- The resolved prompt text has changed
|
||||
- The model has changed
|
||||
- Extra parameters (width, height, etc.) have changed
|
||||
"""
|
||||
output_path = project_dir / target_name
|
||||
if not output_path.exists():
|
||||
return True
|
||||
|
||||
if target_name not in state.targets:
|
||||
return True
|
||||
|
||||
prev = state.targets[target_name]
|
||||
|
||||
if prev.model != model:
|
||||
return True
|
||||
|
||||
if prev.prompt_hash != hash_string(resolved_prompt):
|
||||
return True
|
||||
|
||||
if prev.extra_hash != _extra_hash(extra_params):
|
||||
return True
|
||||
|
||||
for dep_path in dep_files:
|
||||
dep_key = str(dep_path.relative_to(project_dir))
|
||||
current_hash = hash_file(dep_path)
|
||||
if prev.input_hashes.get(dep_key) != current_hash:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def record_target_state(
|
||||
target_name: str,
|
||||
*,
|
||||
resolved_prompt: str,
|
||||
model: str,
|
||||
dep_files: list[Path],
|
||||
extra_params: dict[str, object],
|
||||
state: BuildState,
|
||||
project_dir: Path,
|
||||
) -> None:
|
||||
"""Record the state of a successfully built target."""
|
||||
input_hashes: dict[str, str] = {}
|
||||
for dep_path in dep_files:
|
||||
dep_key = str(dep_path.relative_to(project_dir))
|
||||
input_hashes[dep_key] = hash_file(dep_path)
|
||||
|
||||
state.targets[target_name] = TargetState(
|
||||
input_hashes=input_hashes,
|
||||
prompt_hash=hash_string(resolved_prompt),
|
||||
model=model,
|
||||
extra_hash=_extra_hash(extra_params),
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue