feat: add archive_folder support for preserving previous generations

When archive_folder is set in the project config, artifacts are moved to
numbered archive copies (e.g. x.01.jpg, x.02.jpg) instead of being
overwritten or deleted.

- Build command archives existing artifacts before rebuilding dirty targets
- Clean command moves files to archive instead of deleting them
- Subfolder structure is preserved in the archive directory
- State file is always deleted, never archived
This commit is contained in:
Konstantin Fickel 2026-02-21 11:36:45 +01:00
parent 9ace38c806
commit 24cade558a
Signed by: kfickel
GPG key ID: A793722F9933C1A5
7 changed files with 272 additions and 8 deletions

41
hokusai/archive.py Normal file
View file

@ -0,0 +1,41 @@
"""Archive helper: move files to a numbered archive folder."""
from __future__ import annotations
import shutil
from pathlib import Path
def _next_archive_path(archive_dir: Path, relative: Path) -> Path:
"""Compute the next available numbered archive path.
``img/x.jpg`` becomes ``archive/img/x.01.jpg``, then ``x.02.jpg``, etc.
"""
stem = relative.stem
suffix = relative.suffix
parent = archive_dir / relative.parent
n = 1
while True:
candidate = parent / f"{stem}.{n:02d}{suffix}"
if not candidate.exists():
return candidate
n += 1
def archive_file(
file_path: Path, project_dir: Path, archive_folder: str
) -> Path | None:
"""Move *file_path* into the archive folder with an incremented number.
Returns the destination path, or ``None`` if *file_path* does not exist.
"""
if not file_path.exists():
return None
relative = file_path.relative_to(project_dir)
archive_dir = project_dir / archive_folder
dest = _next_archive_path(archive_dir, relative)
dest.parent.mkdir(parents=True, exist_ok=True)
_ = shutil.move(str(file_path), dest)
return dest

View file

@ -11,6 +11,7 @@ from pathlib import Path
import httpx
from hokusai.archive import archive_file
from hokusai.config import DownloadTargetConfig, GenerateTargetConfig, ProjectConfig
from hokusai.graph import build_graph, get_build_order, get_subgraph_for_target
from hokusai.prompt import extract_placeholder_files, resolve_prompt
@ -145,8 +146,14 @@ async def _build_single_target(
provider_index: dict[str, Provider],
) -> None:
"""Build a single target by dispatching to the appropriate provider."""
output_path = project_dir / target_name
# Ensure parent directories exist for targets in subfolders.
(project_dir / target_name).parent.mkdir(parents=True, exist_ok=True)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Archive the existing artifact before overwriting.
if config.archive_folder is not None:
_ = archive_file(output_path, project_dir, config.archive_folder)
target_cfg = config.targets[target_name]

View file

@ -12,6 +12,7 @@ import typer
import yaml
from typer.core import TyperGroup
from hokusai.archive import archive_file
from hokusai.builder import BuildEvent, BuildResult, run_build
from hokusai.config import GenerateTargetConfig, ProjectConfig, load_config
from hokusai.graph import build_graph, get_build_order
@ -193,18 +194,33 @@ def clean() -> None:
config = load_config(config_path)
state_name = state_filename(_project_name(config_path))
archive_folder = config.archive_folder
removed = 0
dirs_to_check: set[Path] = set()
for target_name in config.targets:
target_path = project_dir / target_name
if target_path.exists():
if not target_path.exists():
continue
if archive_folder is not None:
dest = archive_file(target_path, project_dir, archive_folder)
if dest is not None:
rel_dest = dest.relative_to(project_dir)
click.echo(
click.style(" mv ", fg="yellow")
+ click.style(target_name, bold=True)
+ click.style(f" -> {rel_dest}", dim=True)
)
else:
target_path.unlink()
click.echo(click.style(" rm ", fg="red") + target_name)
removed += 1
# Track parent dirs that may now be empty.
parent = target_path.parent
if parent != project_dir:
dirs_to_check.add(parent)
removed += 1
# Track parent dirs that may now be empty.
parent = target_path.parent
if parent != project_dir:
dirs_to_check.add(parent)
# Remove empty subdirectories left behind, bottom-up.
for d in sorted(dirs_to_check, key=lambda p: len(p.parts), reverse=True):
@ -220,7 +236,8 @@ def clean() -> None:
state_path.unlink()
click.echo(click.style(" rm ", fg="red") + state_name)
click.echo(click.style(f"\nCleaned {removed} artifact(s)", bold=True))
verb = "Archived" if archive_folder is not None else "Cleaned"
click.echo(click.style(f"\n{verb} {removed} artifact(s)", bold=True))
@app.command()

View file

@ -65,6 +65,7 @@ class ProjectConfig(BaseModel):
"""Top-level configuration parsed from ``<name>.hokusai.yaml``."""
defaults: Defaults = Defaults()
archive_folder: str | None = None
targets: dict[str, TargetConfig]
@model_validator(mode="after")

75
tests/test_archive.py Normal file
View file

@ -0,0 +1,75 @@
"""Tests for hokusai.archive."""
from __future__ import annotations
from pathlib import Path
from hokusai.archive import archive_file
class TestArchiveFile:
"""Test the archive_file helper."""
def test_archives_with_01_suffix(self, tmp_path: Path) -> None:
src = tmp_path / "image.jpg"
_ = src.write_text("v1")
dest = archive_file(src, tmp_path, "archive")
assert dest is not None
assert dest == tmp_path / "archive" / "image.01.jpg"
assert dest.read_text() == "v1"
assert not src.exists()
def test_increments_number(self, tmp_path: Path) -> None:
archive_dir = tmp_path / "archive"
archive_dir.mkdir()
_ = (archive_dir / "image.01.jpg").write_text("old")
src = tmp_path / "image.jpg"
_ = src.write_text("v2")
dest = archive_file(src, tmp_path, "archive")
assert dest is not None
assert dest == archive_dir / "image.02.jpg"
assert dest.read_text() == "v2"
def test_preserves_subfolder_structure(self, tmp_path: Path) -> None:
sub = tmp_path / "img"
sub.mkdir()
src = sub / "photo.png"
_ = src.write_text("data")
dest = archive_file(src, tmp_path, "archive")
assert dest is not None
assert dest == tmp_path / "archive" / "img" / "photo.01.png"
def test_returns_none_for_missing_file(self, tmp_path: Path) -> None:
src = tmp_path / "nonexistent.txt"
assert archive_file(src, tmp_path, "archive") is None
def test_creates_archive_dir(self, tmp_path: Path) -> None:
src = tmp_path / "file.txt"
_ = src.write_text("content")
dest = archive_file(src, tmp_path, "my_archive")
assert dest is not None
assert (tmp_path / "my_archive").is_dir()
def test_skips_existing_numbers(self, tmp_path: Path) -> None:
archive_dir = tmp_path / "archive"
archive_dir.mkdir()
_ = (archive_dir / "x.01.txt").write_text("a")
_ = (archive_dir / "x.02.txt").write_text("b")
_ = (archive_dir / "x.03.txt").write_text("c")
src = tmp_path / "x.txt"
_ = src.write_text("d")
dest = archive_file(src, tmp_path, "archive")
assert dest is not None
assert dest == archive_dir / "x.04.txt"

View file

@ -412,6 +412,66 @@ class TestRunBuild:
assert result.failed == {}
class TestArchiveOnBuild:
"""Test that build archives existing artifacts when archive_folder is set."""
async def test_build_archives_existing_file(
self, project_dir: Path, write_config: WriteConfig
) -> None:
config = write_config(
{
"archive_folder": "archive",
"targets": {"out.txt": {"prompt": "version 1"}},
}
)
with patch("hokusai.builder._create_providers", return_value=_fake_providers()):
r1 = await run_build(config, project_dir, _PROJECT)
assert r1.built == ["out.txt"]
v1_content = (project_dir / "out.txt").read_text()
config2 = write_config(
{
"archive_folder": "archive",
"targets": {"out.txt": {"prompt": "version 2"}},
}
)
r2 = await run_build(config2, project_dir, _PROJECT)
assert r2.built == ["out.txt"]
# v1 should be archived, v2 should be current
archived = project_dir / "archive" / "out.01.txt"
assert archived.exists()
assert archived.read_text() == v1_content
assert (project_dir / "out.txt").exists()
async def test_build_no_archive_without_setting(
self, project_dir: Path, simple_text_config: ProjectConfig
) -> None:
with patch("hokusai.builder._create_providers", return_value=_fake_providers()):
r1 = await run_build(simple_text_config, project_dir, _PROJECT)
assert r1.built == ["output.txt"]
assert not (project_dir / "archive").exists()
async def test_build_archives_increment(
self, project_dir: Path, write_config: WriteConfig
) -> None:
config_raw: dict[str, object] = {
"archive_folder": "archive",
"targets": {"out.txt": {"prompt": "v"}},
}
with patch("hokusai.builder._create_providers", return_value=_fake_providers()):
for i in range(1, 4):
cfg = write_config(
{**config_raw, "targets": {"out.txt": {"prompt": f"v{i}"}}}
)
_ = await run_build(cfg, project_dir, _PROJECT)
assert (project_dir / "archive" / "out.01.txt").exists()
assert (project_dir / "archive" / "out.02.txt").exists()
assert not (project_dir / "archive" / "out.03.txt").exists()
class TestDownloadTarget:
"""Tests for download-type targets that fetch files from URLs."""

View file

@ -174,6 +174,69 @@ class TestCleanCommand:
assert "Cleaned 1 artifact(s)" in result.output
assert not (cli_project / "output.txt").exists()
def test_clean_archives_when_archive_folder_set(self, tmp_path: Path) -> None:
config = {
"archive_folder": "archive",
"targets": {
"output.txt": {"prompt": "Generate text"},
"image.png": {"prompt": "Generate image"},
},
}
_ = (tmp_path / "project.hokusai.yaml").write_text(
yaml.dump(config, default_flow_style=False)
)
_ = (tmp_path / "output.txt").write_text("generated text")
_ = (tmp_path / "image.png").write_bytes(b"\x89PNG")
with patch("hokusai.cli.Path") as mock_path_cls:
mock_path_cls.cwd.return_value = tmp_path
result = runner.invoke(app, ["clean"])
assert result.exit_code == 0
assert "Archived 2 artifact(s)" in result.output
assert "mv" in result.output
assert not (tmp_path / "output.txt").exists()
assert not (tmp_path / "image.png").exists()
assert (tmp_path / "archive" / "output.01.txt").read_text() == "generated text"
assert (tmp_path / "archive" / "image.01.png").read_bytes() == b"\x89PNG"
def test_clean_archive_preserves_subfolders(self, tmp_path: Path) -> None:
config = {
"archive_folder": "archive",
"targets": {"img/photo.png": {"prompt": "photo"}},
}
_ = (tmp_path / "project.hokusai.yaml").write_text(
yaml.dump(config, default_flow_style=False)
)
(tmp_path / "img").mkdir()
_ = (tmp_path / "img" / "photo.png").write_bytes(b"img")
with patch("hokusai.cli.Path") as mock_path_cls:
mock_path_cls.cwd.return_value = tmp_path
result = runner.invoke(app, ["clean"])
assert result.exit_code == 0
assert (tmp_path / "archive" / "img" / "photo.01.png").exists()
def test_clean_archive_still_deletes_state(self, tmp_path: Path) -> None:
config = {
"archive_folder": "archive",
"targets": {"output.txt": {"prompt": "text"}},
}
_ = (tmp_path / "project.hokusai.yaml").write_text(
yaml.dump(config, default_flow_style=False)
)
_ = (tmp_path / "output.txt").write_text("data")
state_file = ".project.hokusai-state.yaml"
_ = (tmp_path / state_file).write_text("targets: {}")
with patch("hokusai.cli.Path") as mock_path_cls:
mock_path_cls.cwd.return_value = tmp_path
result = runner.invoke(app, ["clean"])
assert result.exit_code == 0
assert not (tmp_path / state_file).exists()
class TestGraphCommand:
"""Test the graph CLI command."""