refactor: use project-named state file and store prompt/params directly

- State filename now derives from config: cards.bulkgen.yaml produces .cards.bulkgen-state.yaml instead of .bulkgen.state.yaml - Store resolved prompt text and extra params directly in state file instead of hashing them, making state files human-readable - Only file input contents remain hashed (SHA-256) - Thread project_name through builder and CLI - Remove hash_string() and _extra_hash() helpers - Update .gitignore pattern to .*.bulkgen-state.yaml
2026-02-15 13:56:12 +01:00 · 2026-02-15 13:56:12 +01:00 · 0ecf1f0f9e
commit 0ecf1f0f9e
parent 870023865d
7 changed files with 98 additions and 82 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,7 +15,7 @@ wheels/
 .pre-commit-config.yaml

 # bulkgen state
-.bulkgen.state.yaml
+.*.bulkgen-state.yaml

 # Nix
 result
--- a/bulkgen/builder.py
+++ b/bulkgen/builder.py
@ -147,6 +147,7 @@ async def _build_single_target(
 async def run_build(
    config: ProjectConfig,
    project_dir: Path,
+    project_name: str,
    target: str | None = None,
    on_progress: ProgressCallback = _noop_callback,
 ) -> BuildResult:
@ -171,7 +172,7 @@ async def run_build(
            raise ValueError(msg)
        graph = get_subgraph_for_target(graph, target)

-    state = load_state(project_dir)
+    state = load_state(project_dir, project_name)
    generations = get_build_order(graph)
    target_names = set(config.targets)

@ -209,7 +210,7 @@ async def run_build(
        )

        _process_outcomes(outcomes, config, project_dir, state, result, on_progress)
-        save_state(state, project_dir)
+        save_state(state, project_dir, project_name)

    return result

--- a/bulkgen/cli.py
+++ b/bulkgen/cli.py
@ -14,9 +14,21 @@ from bulkgen.builder import BuildEvent, BuildResult, run_build
 from bulkgen.config import ProjectConfig, load_config
 from bulkgen.graph import build_graph, get_build_order
 from bulkgen.providers.registry import get_all_models
+from bulkgen.state import state_filename

 app = typer.Typer(name="bulkgen", help="AI artifact build tool.")

+_CONFIG_SUFFIX = ".bulkgen.yaml"
+
+
+def _project_name(config_path: Path) -> str:
+    """Derive the project name from a config path.
+
+    ``cards.bulkgen.yaml`` → ``cards``
+    """
+    name = config_path.name
+    return name.removesuffix(_CONFIG_SUFFIX)
+

 def _find_config(directory: Path) -> Path:
    """Find the single ``*.bulkgen.yaml`` file in *directory*."""
@ -49,7 +61,7 @@ def _format_elapsed(seconds: float) -> str:


 def _run_build(
-    config: ProjectConfig, project_dir: Path, target: str | None
+    config: ProjectConfig, project_dir: Path, project_name: str, target: str | None
 ) -> tuple[BuildResult, float]:
    """Run the async build with click-styled progress output and timing."""

@ -85,7 +97,7 @@ def _run_build(

    start = time.monotonic()
    result = asyncio.run(
-        run_build(config, project_dir, target, on_progress=on_progress)
+        run_build(config, project_dir, project_name, target, on_progress=on_progress)
    )
    elapsed = time.monotonic() - start

@ -102,10 +114,11 @@ def build(
    project_dir = Path.cwd()
    config_path = _find_config(project_dir)
    config = load_config(config_path)
+    name = _project_name(config_path)

    click.echo(click.style("bulkgen", fg="cyan", bold=True) + " building targets...\n")

-    result, elapsed = _run_build(config, project_dir, target)
+    result, elapsed = _run_build(config, project_dir, name, target)

    # Summary
    click.echo("")
@ -131,6 +144,7 @@ def clean() -> None:
    project_dir = Path.cwd()
    config_path = _find_config(project_dir)
    config = load_config(config_path)
+    state_name = state_filename(_project_name(config_path))

    removed = 0
    for target_name in config.targets:
@ -140,10 +154,10 @@ def clean() -> None:
            click.echo(click.style("  rm ", fg="red") + target_name)
            removed += 1

-    state_path = project_dir / ".bulkgen.state.yaml"
+    state_path = project_dir / state_name
    if state_path.exists():
        state_path.unlink()
-        click.echo(click.style("  rm ", fg="red") + ".bulkgen.state.yaml")
+        click.echo(click.style("  rm ", fg="red") + state_name)

    click.echo(click.style(f"\nCleaned {removed} artifact(s)", bold=True))

--- a/bulkgen/state.py
+++ b/bulkgen/state.py
@ -1,4 +1,4 @@
-"""Incremental build state tracking via ``.bulkgen.state.yaml``."""
+"""Incremental build state tracking via ``.<project>.bulkgen-state.yaml``."""

 from __future__ import annotations

@ -8,16 +8,23 @@ from pathlib import Path
 import yaml
 from pydantic import BaseModel

-STATE_FILENAME = ".bulkgen.state.yaml"
+
+def state_filename(project_name: str) -> str:
+    """Return the state filename for a given project name.
+
+    For a config file named ``cards.bulkgen.yaml`` the project name is
+    ``cards`` and the state file is ``.cards.bulkgen-state.yaml``.
+    """
+    return f".{project_name}.bulkgen-state.yaml"


 class TargetState(BaseModel):
    """Recorded state of a single target from its last successful build."""

    input_hashes: dict[str, str]
-    prompt_hash: str
+    prompt: str
    model: str
-    extra_hash: str = ""
+    extra_params: dict[str, object] = {}


 class BuildState(BaseModel):
@ -35,14 +42,9 @@ def hash_file(path: Path) -> str:
    return h.hexdigest()


-def hash_string(value: str) -> str:
-    """Compute the SHA-256 hex digest of a string."""
-    return hashlib.sha256(value.encode("utf-8")).hexdigest()
-
-
-def load_state(project_dir: Path) -> BuildState:
+def load_state(project_dir: Path, project_name: str) -> BuildState:
    """Load build state from disk, returning empty state if the file is missing."""
-    state_path = project_dir / STATE_FILENAME
+    state_path = project_dir / state_filename(project_name)
    if not state_path.exists():
        return BuildState()
    with state_path.open() as f:
@ -52,20 +54,13 @@ def load_state(project_dir: Path) -> BuildState:
    return BuildState.model_validate(raw)


-def save_state(state: BuildState, project_dir: Path) -> None:
+def save_state(state: BuildState, project_dir: Path, project_name: str) -> None:
    """Persist build state to disk."""
-    state_path = project_dir / STATE_FILENAME
+    state_path = project_dir / state_filename(project_name)
    with state_path.open("w") as f:
        yaml.dump(state.model_dump(), f, default_flow_style=False, sort_keys=False)


-def _extra_hash(params: dict[str, object]) -> str:
-    """Hash extra target parameters (width, height, etc.) for change detection."""
-    if not params:
-        return ""
-    return hash_string(str(sorted(params.items())))
-
-
 def is_target_dirty(
    target_name: str,
    *,
@ -98,10 +93,10 @@ def is_target_dirty(
    if prev.model != model:
        return True

-    if prev.prompt_hash != hash_string(resolved_prompt):
+    if prev.prompt != resolved_prompt:
        return True

-    if prev.extra_hash != _extra_hash(extra_params):
+    if prev.extra_params != extra_params:
        return True

    for dep_path in dep_files:
@ -131,7 +126,7 @@ def record_target_state(

    state.targets[target_name] = TargetState(
        input_hashes=input_hashes,
-        prompt_hash=hash_string(resolved_prompt),
+        prompt=resolved_prompt,
        model=model,
-        extra_hash=_extra_hash(extra_params),
+        extra_params=extra_params,
    )
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@ -23,6 +23,8 @@ from bulkgen.state import load_state

 WriteConfig = Callable[[dict[str, object]], ProjectConfig]

+_PROJECT = "project"
+

 _FAKE_TEXT_MODELS = [
    ModelInfo(
@ -196,7 +198,7 @@ class TestRunBuild:
        self, project_dir: Path, simple_text_config: ProjectConfig
    ) -> None:
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            result = await run_build(simple_text_config, project_dir)
+            result = await run_build(simple_text_config, project_dir, _PROJECT)

        assert result.built == ["output.txt"]
        assert result.skipped == []
@ -207,7 +209,7 @@ class TestRunBuild:
        self, project_dir: Path, multi_target_config: ProjectConfig
    ) -> None:
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            result = await run_build(multi_target_config, project_dir)
+            result = await run_build(multi_target_config, project_dir, _PROJECT)

        assert "summary.md" in result.built
        assert "final.txt" in result.built
@ -222,10 +224,10 @@ class TestRunBuild:
        self, project_dir: Path, simple_text_config: ProjectConfig
    ) -> None:
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            result1 = await run_build(simple_text_config, project_dir)
+            result1 = await run_build(simple_text_config, project_dir, _PROJECT)
            assert result1.built == ["output.txt"]

-            result2 = await run_build(simple_text_config, project_dir)
+            result2 = await run_build(simple_text_config, project_dir, _PROJECT)
            assert result2.skipped == ["output.txt"]
            assert result2.built == []

@ -234,11 +236,11 @@ class TestRunBuild:
    ) -> None:
        config1 = write_config({"targets": {"out.txt": {"prompt": "version 1"}}})
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            r1 = await run_build(config1, project_dir)
+            r1 = await run_build(config1, project_dir, _PROJECT)
            assert r1.built == ["out.txt"]

            config2 = write_config({"targets": {"out.txt": {"prompt": "version 2"}}})
-            r2 = await run_build(config2, project_dir)
+            r2 = await run_build(config2, project_dir, _PROJECT)
            assert r2.built == ["out.txt"]

    async def test_rebuild_after_input_change(
@ -249,11 +251,11 @@ class TestRunBuild:
            {"targets": {"out.md": {"prompt": "x", "inputs": ["data.txt"]}}}
        )
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            r1 = await run_build(config, project_dir)
+            r1 = await run_build(config, project_dir, _PROJECT)
            assert r1.built == ["out.md"]

            _ = (project_dir / "data.txt").write_text("modified")
-            r2 = await run_build(config, project_dir)
+            r2 = await run_build(config, project_dir, _PROJECT)
            assert r2.built == ["out.md"]

    async def test_selective_build_single_target(
@ -261,7 +263,7 @@ class TestRunBuild:
    ) -> None:
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
            result = await run_build(
-                multi_target_config, project_dir, target="summary.md"
+                multi_target_config, project_dir, _PROJECT, target="summary.md"
            )

        assert "summary.md" in result.built
@ -274,7 +276,7 @@ class TestRunBuild:
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
            with pytest.raises(ValueError, match="Unknown target"):
                _ = await run_build(
-                    simple_text_config, project_dir, target="nonexistent.txt"
+                    simple_text_config, project_dir, _PROJECT, target="nonexistent.txt"
                )

    async def test_failed_target_isolates_independent(
@ -322,7 +324,7 @@ class TestRunBuild:
            "bulkgen.builder._create_providers",
            return_value=[routing_provider, FakeImageProvider()],
        ):
-            result = await run_build(config, project_dir)
+            result = await run_build(config, project_dir, _PROJECT)

        assert "fail.txt" in result.failed
        assert "ok.txt" in result.built
@ -343,7 +345,7 @@ class TestRunBuild:
            "bulkgen.builder._create_providers",
            return_value=[FailingTextProvider(), FakeImageProvider()],
        ):
-            result = await run_build(config, project_dir)
+            result = await run_build(config, project_dir, _PROJECT)

        assert "base.txt" in result.failed
        assert "child.txt" in result.failed
@ -356,7 +358,7 @@ class TestRunBuild:
            "bulkgen.builder._create_providers",
            return_value=[],
        ):
-            result = await run_build(simple_text_config, project_dir)
+            result = await run_build(simple_text_config, project_dir, _PROJECT)

        assert "output.txt" in result.failed
        assert "No provider available" in result.failed["output.txt"]
@ -373,9 +375,9 @@ class TestRunBuild:
            }
        )
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            _ = await run_build(config, project_dir)
+            _ = await run_build(config, project_dir, _PROJECT)

-        state = load_state(project_dir)
+        state = load_state(project_dir, _PROJECT)
        assert "a.txt" in state.targets
        assert "b.txt" in state.targets

@ -385,7 +387,7 @@ class TestRunBuild:
        config = write_config({"targets": {"out.txt": {"prompt": prompt_file.name}}})

        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            result = await run_build(config, project_dir)
+            result = await run_build(config, project_dir, _PROJECT)

        assert result.built == ["out.txt"]
        content = (project_dir / "out.txt").read_text()
@ -395,12 +397,12 @@ class TestRunBuild:
        self, project_dir: Path, simple_text_config: ProjectConfig
    ) -> None:
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            r1 = await run_build(simple_text_config, project_dir)
+            r1 = await run_build(simple_text_config, project_dir, _PROJECT)
            assert r1.built == ["output.txt"]

            (project_dir / "output.txt").unlink()

-            r2 = await run_build(simple_text_config, project_dir)
+            r2 = await run_build(simple_text_config, project_dir, _PROJECT)
            assert r2.built == ["output.txt"]

    async def test_diamond_dependency_all_built(
@ -420,7 +422,7 @@ class TestRunBuild:
            }
        )
        with patch("bulkgen.builder._create_providers", return_value=_fake_providers()):
-            result = await run_build(config, project_dir)
+            result = await run_build(config, project_dir, _PROJECT)

        assert set(result.built) == {"left.md", "right.md", "merge.txt"}
        assert result.failed == {}
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -132,7 +132,8 @@ class TestBuildCommand:

        assert result.exit_code == 0
        call_args = mock_run.call_args
-        assert call_args[0][2] == "output.txt"
+        # positional args: (config, project_dir, project_name, target)
+        assert call_args[0][3] == "output.txt"


 class TestCleanCommand:
@ -141,7 +142,8 @@ class TestCleanCommand:
    def test_clean_removes_targets(self, cli_project: Path) -> None:
        _ = (cli_project / "output.txt").write_text("generated")
        _ = (cli_project / "image.png").write_bytes(b"\x89PNG")
-        _ = (cli_project / ".bulkgen.state.yaml").write_text("targets: {}")
+        state_file = ".project.bulkgen-state.yaml"
+        _ = (cli_project / state_file).write_text("targets: {}")

        with patch("bulkgen.cli.Path") as mock_path_cls:
            mock_path_cls.cwd.return_value = cli_project
@ -150,7 +152,7 @@ class TestCleanCommand:
        assert result.exit_code == 0
        assert not (cli_project / "output.txt").exists()
        assert not (cli_project / "image.png").exists()
-        assert not (cli_project / ".bulkgen.state.yaml").exists()
+        assert not (cli_project / state_file).exists()
        assert "Cleaned 2 artifact(s)" in result.output

    def test_clean_no_artifacts(self, cli_project: Path) -> None:
--- a/tests/test_state.py
+++ b/tests/test_state.py
@ -10,13 +10,15 @@ from bulkgen.state import (
    BuildState,
    TargetState,
    hash_file,
-    hash_string,
    is_target_dirty,
    load_state,
    record_target_state,
    save_state,
+    state_filename,
 )

+_PROJECT = "test"
+

 class TestHashFunctions:
    """Test hashing helpers."""
@ -34,18 +36,22 @@ class TestHashFunctions:
        h2 = hash_file(f)
        assert h1 != h2

-    def test_hash_string_deterministic(self) -> None:
-        assert hash_string("abc") == hash_string("abc")

-    def test_hash_string_differs(self) -> None:
-        assert hash_string("abc") != hash_string("xyz")
+class TestStateFilename:
+    """Test state filename derivation."""
+
+    def test_state_filename(self) -> None:
+        assert state_filename("cards") == ".cards.bulkgen-state.yaml"
+
+    def test_state_filename_simple(self) -> None:
+        assert state_filename("project") == ".project.bulkgen-state.yaml"


 class TestStatePersistence:
    """Test save/load round-trip of build state."""

    def test_load_missing_file_returns_empty(self, project_dir: Path) -> None:
-        state = load_state(project_dir)
+        state = load_state(project_dir, _PROJECT)
        assert state.targets == {}

    def test_save_and_load_round_trip(self, project_dir: Path) -> None:
@ -53,40 +59,36 @@ class TestStatePersistence:
            targets={
                "out.txt": TargetState(
                    input_hashes={"dep.txt": "abc123"},
-                    prompt_hash="prompt_hash_val",
+                    prompt="Generate something",
                    model="mistral-large-latest",
-                    extra_hash="",
+                    extra_params={},
                )
            }
        )
-        save_state(state, project_dir)
-        loaded = load_state(project_dir)
+        save_state(state, project_dir, _PROJECT)
+        loaded = load_state(project_dir, _PROJECT)

        assert loaded.targets["out.txt"].model == "mistral-large-latest"
        assert loaded.targets["out.txt"].input_hashes == {"dep.txt": "abc123"}
-        assert loaded.targets["out.txt"].prompt_hash == "prompt_hash_val"
+        assert loaded.targets["out.txt"].prompt == "Generate something"

    def test_load_empty_yaml(self, project_dir: Path) -> None:
-        _ = (project_dir / ".bulkgen.state.yaml").write_text("")
-        state = load_state(project_dir)
+        _ = (project_dir / state_filename(_PROJECT)).write_text("")
+        state = load_state(project_dir, _PROJECT)
        assert state.targets == {}

    def test_save_overwrites_existing(self, project_dir: Path) -> None:
        state1 = BuildState(
-            targets={
-                "a.txt": TargetState(input_hashes={}, prompt_hash="h1", model="m1")
-            }
+            targets={"a.txt": TargetState(input_hashes={}, prompt="p1", model="m1")}
        )
-        save_state(state1, project_dir)
+        save_state(state1, project_dir, _PROJECT)

        state2 = BuildState(
-            targets={
-                "b.txt": TargetState(input_hashes={}, prompt_hash="h2", model="m2")
-            }
+            targets={"b.txt": TargetState(input_hashes={}, prompt="p2", model="m2")}
        )
-        save_state(state2, project_dir)
+        save_state(state2, project_dir, _PROJECT)

-        loaded = load_state(project_dir)
+        loaded = load_state(project_dir, _PROJECT)
        assert "b.txt" in loaded.targets
        assert "a.txt" not in loaded.targets

@ -95,16 +97,16 @@ class TestStatePersistence:
            targets={
                "out.txt": TargetState(
                    input_hashes={"f.txt": "hash"},
-                    prompt_hash="ph",
+                    prompt="do something",
                    model="m",
-                    extra_hash="eh",
+                    extra_params={"width": 512},
                )
            }
        )
-        save_state(state, project_dir)
+        save_state(state, project_dir, _PROJECT)

        raw: object = yaml.safe_load(  # pyright: ignore[reportAny]
-            (project_dir / ".bulkgen.state.yaml").read_text()
+            (project_dir / state_filename(_PROJECT)).read_text()
        )
        assert isinstance(raw, dict)
        assert "targets" in raw
@ -296,9 +298,9 @@ class TestRecordAndDirtyRoundTrip:
            state=state,
            project_dir=project_dir,
        )
-        save_state(state, project_dir)
+        save_state(state, project_dir, _PROJECT)

-        loaded_state = load_state(project_dir)
+        loaded_state = load_state(project_dir, _PROJECT)
        assert not is_target_dirty(
            "result.md",
            resolved_prompt="do the thing",