- State filename now derives from config: cards.bulkgen.yaml produces .cards.bulkgen-state.yaml instead of .bulkgen.state.yaml - Store resolved prompt text and extra params directly in state file instead of hashing them, making state files human-readable - Only file input contents remain hashed (SHA-256) - Thread project_name through builder and CLI - Remove hash_string() and _extra_hash() helpers - Update .gitignore pattern to .*.bulkgen-state.yaml
132 lines
3.6 KiB
Python
132 lines
3.6 KiB
Python
"""Incremental build state tracking via ``.<project>.bulkgen-state.yaml``."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
from pydantic import BaseModel
|
|
|
|
|
|
def state_filename(project_name: str) -> str:
|
|
"""Return the state filename for a given project name.
|
|
|
|
For a config file named ``cards.bulkgen.yaml`` the project name is
|
|
``cards`` and the state file is ``.cards.bulkgen-state.yaml``.
|
|
"""
|
|
return f".{project_name}.bulkgen-state.yaml"
|
|
|
|
|
|
class TargetState(BaseModel):
|
|
"""Recorded state of a single target from its last successful build."""
|
|
|
|
input_hashes: dict[str, str]
|
|
prompt: str
|
|
model: str
|
|
extra_params: dict[str, object] = {}
|
|
|
|
|
|
class BuildState(BaseModel):
|
|
"""Full build state persisted to disk."""
|
|
|
|
targets: dict[str, TargetState] = {}
|
|
|
|
|
|
def hash_file(path: Path) -> str:
|
|
"""Compute the SHA-256 hex digest of a file's contents."""
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def load_state(project_dir: Path, project_name: str) -> BuildState:
|
|
"""Load build state from disk, returning empty state if the file is missing."""
|
|
state_path = project_dir / state_filename(project_name)
|
|
if not state_path.exists():
|
|
return BuildState()
|
|
with state_path.open() as f:
|
|
raw = yaml.safe_load(f) # pyright: ignore[reportAny]
|
|
if raw is None:
|
|
return BuildState()
|
|
return BuildState.model_validate(raw)
|
|
|
|
|
|
def save_state(state: BuildState, project_dir: Path, project_name: str) -> None:
|
|
"""Persist build state to disk."""
|
|
state_path = project_dir / state_filename(project_name)
|
|
with state_path.open("w") as f:
|
|
yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)
|
|
|
|
|
|
def is_target_dirty(
|
|
target_name: str,
|
|
*,
|
|
resolved_prompt: str,
|
|
model: str,
|
|
dep_files: list[Path],
|
|
extra_params: dict[str, object],
|
|
state: BuildState,
|
|
project_dir: Path,
|
|
) -> bool:
|
|
"""Determine whether a target needs rebuilding.
|
|
|
|
A target is dirty if:
|
|
- Its output file does not exist
|
|
- It has never been built (not recorded in state)
|
|
- Any dependency file hash has changed
|
|
- The resolved prompt text has changed
|
|
- The model has changed
|
|
- Extra parameters (width, height, etc.) have changed
|
|
"""
|
|
output_path = project_dir / target_name
|
|
if not output_path.exists():
|
|
return True
|
|
|
|
if target_name not in state.targets:
|
|
return True
|
|
|
|
prev = state.targets[target_name]
|
|
|
|
if prev.model != model:
|
|
return True
|
|
|
|
if prev.prompt != resolved_prompt:
|
|
return True
|
|
|
|
if prev.extra_params != extra_params:
|
|
return True
|
|
|
|
for dep_path in dep_files:
|
|
dep_key = str(dep_path.relative_to(project_dir))
|
|
current_hash = hash_file(dep_path)
|
|
if prev.input_hashes.get(dep_key) != current_hash:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def record_target_state(
|
|
target_name: str,
|
|
*,
|
|
resolved_prompt: str,
|
|
model: str,
|
|
dep_files: list[Path],
|
|
extra_params: dict[str, object],
|
|
state: BuildState,
|
|
project_dir: Path,
|
|
) -> None:
|
|
"""Record the state of a successfully built target."""
|
|
input_hashes: dict[str, str] = {}
|
|
for dep_path in dep_files:
|
|
dep_key = str(dep_path.relative_to(project_dir))
|
|
input_hashes[dep_key] = hash_file(dep_path)
|
|
|
|
state.targets[target_name] = TargetState(
|
|
input_hashes=input_hashes,
|
|
prompt=resolved_prompt,
|
|
model=model,
|
|
extra_params=extra_params,
|
|
)
|