From ce2160bd6c5d4c30ed7cadd88b01da50924f705a Mon Sep 17 00:00:00 2001
From: Konstantin Fickel <mail@konstantinfickel.de>
Date: Fri, 13 Feb 2026 20:08:16 +0100
Subject: [PATCH] feat: add incremental build state tracking

- TargetState/BuildState pydantic models for .bulkgen.state.yaml
- SHA-256 hashing for files and strings
- is_target_dirty() checks output existence, input hashes, prompt, model, and extra params
- record_target_state() persists hashes after successful builds
- load_state()/save_state() for YAML serialization
---
 bulkgen/state.py | 137 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 bulkgen/state.py

diff --git a/bulkgen/state.py b/bulkgen/state.py
new file mode 100644
index 0000000..0a354b0
--- /dev/null
+++ b/bulkgen/state.py
@@ -0,0 +1,137 @@
+"""Incremental build state tracking via ``.bulkgen.state.yaml``."""
+
+from __future__ import annotations
+
+import hashlib
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel
+
+STATE_FILENAME = ".bulkgen.state.yaml"
+
+
+class TargetState(BaseModel):
+    """Recorded state of a single target from its last successful build."""
+
+    input_hashes: dict[str, str]
+    prompt_hash: str
+    model: str
+    extra_hash: str = ""
+
+
+class BuildState(BaseModel):
+    """Full build state persisted to disk."""
+
+    targets: dict[str, TargetState] = {}
+
+
+def hash_file(path: Path) -> str:
+    """Compute the SHA-256 hex digest of a file's contents."""
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(8192), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def hash_string(value: str) -> str:
+    """Compute the SHA-256 hex digest of a string."""
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()
+
+
+def load_state(project_dir: Path) -> BuildState:
+    """Load build state from disk, returning empty state if the file is missing."""
+    state_path = project_dir / STATE_FILENAME
+    if not state_path.exists():
+        return BuildState()
+    with state_path.open() as f:
+        raw = yaml.safe_load(f)
+    if raw is None:
+        return BuildState()
+    return BuildState.model_validate(raw)
+
+
+def save_state(state: BuildState, project_dir: Path) -> None:
+    """Persist build state to disk."""
+    state_path = project_dir / STATE_FILENAME
+    with state_path.open("w") as f:
+        yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)
+
+
+def _extra_hash(params: dict[str, object]) -> str:
+    """Hash extra target parameters (width, height, etc.) for change detection."""
+    if not params:
+        return ""
+    return hash_string(str(sorted(params.items())))
+
+
+def is_target_dirty(
+    target_name: str,
+    *,
+    resolved_prompt: str,
+    model: str,
+    dep_files: list[Path],
+    extra_params: dict[str, object],
+    state: BuildState,
+    project_dir: Path,
+) -> bool:
+    """Determine whether a target needs rebuilding.
+
+    A target is dirty if:
+    - Its output file does not exist
+    - It has never been built (not recorded in state)
+    - Any dependency file hash has changed
+    - The resolved prompt text has changed
+    - The model has changed
+    - Extra parameters (width, height, etc.) have changed
+    """
+    output_path = project_dir / target_name
+    if not output_path.exists():
+        return True
+
+    if target_name not in state.targets:
+        return True
+
+    prev = state.targets[target_name]
+
+    if prev.model != model:
+        return True
+
+    if prev.prompt_hash != hash_string(resolved_prompt):
+        return True
+
+    if prev.extra_hash != _extra_hash(extra_params):
+        return True
+
+    for dep_path in dep_files:
+        dep_key = str(dep_path.relative_to(project_dir))
+        current_hash = hash_file(dep_path)
+        if prev.input_hashes.get(dep_key) != current_hash:
+            return True
+
+    return False
+
+
+def record_target_state(
+    target_name: str,
+    *,
+    resolved_prompt: str,
+    model: str,
+    dep_files: list[Path],
+    extra_params: dict[str, object],
+    state: BuildState,
+    project_dir: Path,
+) -> None:
+    """Record the state of a successfully built target."""
+    input_hashes: dict[str, str] = {}
+    for dep_path in dep_files:
+        dep_key = str(dep_path.relative_to(project_dir))
+        input_hashes[dep_key] = hash_file(dep_path)
+
+    state.targets[target_name] = TargetState(
+        input_hashes=input_hashes,
+        prompt_hash=hash_string(resolved_prompt),
+        model=model,
+        extra_hash=_extra_hash(extra_params),
+    )