feat: add archive_folder support for preserving previous generations
When archive_folder is set in the project config, artifacts are moved to numbered archive copies (e.g. x.01.jpg, x.02.jpg) instead of being overwritten or deleted. - Build command archives existing artifacts before rebuilding dirty targets - Clean command moves files to archive instead of deleting them - Subfolder structure is preserved in the archive directory - State file is always deleted, never archived
This commit is contained in:
parent
9ace38c806
commit
24cade558a
7 changed files with 272 additions and 8 deletions
41
hokusai/archive.py
Normal file
41
hokusai/archive.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
"""Archive helper: move files to a numbered archive folder."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _next_archive_path(archive_dir: Path, relative: Path) -> Path:
|
||||||
|
"""Compute the next available numbered archive path.
|
||||||
|
|
||||||
|
``img/x.jpg`` becomes ``archive/img/x.01.jpg``, then ``x.02.jpg``, etc.
|
||||||
|
"""
|
||||||
|
stem = relative.stem
|
||||||
|
suffix = relative.suffix
|
||||||
|
parent = archive_dir / relative.parent
|
||||||
|
|
||||||
|
n = 1
|
||||||
|
while True:
|
||||||
|
candidate = parent / f"{stem}.{n:02d}{suffix}"
|
||||||
|
if not candidate.exists():
|
||||||
|
return candidate
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
|
||||||
|
def archive_file(
|
||||||
|
file_path: Path, project_dir: Path, archive_folder: str
|
||||||
|
) -> Path | None:
|
||||||
|
"""Move *file_path* into the archive folder with an incremented number.
|
||||||
|
|
||||||
|
Returns the destination path, or ``None`` if *file_path* does not exist.
|
||||||
|
"""
|
||||||
|
if not file_path.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
relative = file_path.relative_to(project_dir)
|
||||||
|
archive_dir = project_dir / archive_folder
|
||||||
|
dest = _next_archive_path(archive_dir, relative)
|
||||||
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
_ = shutil.move(str(file_path), dest)
|
||||||
|
return dest
|
||||||
|
|
@ -11,6 +11,7 @@ from pathlib import Path
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
from hokusai.archive import archive_file
|
||||||
from hokusai.config import DownloadTargetConfig, GenerateTargetConfig, ProjectConfig
|
from hokusai.config import DownloadTargetConfig, GenerateTargetConfig, ProjectConfig
|
||||||
from hokusai.graph import build_graph, get_build_order, get_subgraph_for_target
|
from hokusai.graph import build_graph, get_build_order, get_subgraph_for_target
|
||||||
from hokusai.prompt import extract_placeholder_files, resolve_prompt
|
from hokusai.prompt import extract_placeholder_files, resolve_prompt
|
||||||
|
|
@ -145,8 +146,14 @@ async def _build_single_target(
|
||||||
provider_index: dict[str, Provider],
|
provider_index: dict[str, Provider],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Build a single target by dispatching to the appropriate provider."""
|
"""Build a single target by dispatching to the appropriate provider."""
|
||||||
|
output_path = project_dir / target_name
|
||||||
|
|
||||||
# Ensure parent directories exist for targets in subfolders.
|
# Ensure parent directories exist for targets in subfolders.
|
||||||
(project_dir / target_name).parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Archive the existing artifact before overwriting.
|
||||||
|
if config.archive_folder is not None:
|
||||||
|
_ = archive_file(output_path, project_dir, config.archive_folder)
|
||||||
|
|
||||||
target_cfg = config.targets[target_name]
|
target_cfg = config.targets[target_name]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import typer
|
||||||
import yaml
|
import yaml
|
||||||
from typer.core import TyperGroup
|
from typer.core import TyperGroup
|
||||||
|
|
||||||
|
from hokusai.archive import archive_file
|
||||||
from hokusai.builder import BuildEvent, BuildResult, run_build
|
from hokusai.builder import BuildEvent, BuildResult, run_build
|
||||||
from hokusai.config import GenerateTargetConfig, ProjectConfig, load_config
|
from hokusai.config import GenerateTargetConfig, ProjectConfig, load_config
|
||||||
from hokusai.graph import build_graph, get_build_order
|
from hokusai.graph import build_graph, get_build_order
|
||||||
|
|
@ -193,18 +194,33 @@ def clean() -> None:
|
||||||
config = load_config(config_path)
|
config = load_config(config_path)
|
||||||
state_name = state_filename(_project_name(config_path))
|
state_name = state_filename(_project_name(config_path))
|
||||||
|
|
||||||
|
archive_folder = config.archive_folder
|
||||||
|
|
||||||
removed = 0
|
removed = 0
|
||||||
dirs_to_check: set[Path] = set()
|
dirs_to_check: set[Path] = set()
|
||||||
for target_name in config.targets:
|
for target_name in config.targets:
|
||||||
target_path = project_dir / target_name
|
target_path = project_dir / target_name
|
||||||
if target_path.exists():
|
if not target_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if archive_folder is not None:
|
||||||
|
dest = archive_file(target_path, project_dir, archive_folder)
|
||||||
|
if dest is not None:
|
||||||
|
rel_dest = dest.relative_to(project_dir)
|
||||||
|
click.echo(
|
||||||
|
click.style(" mv ", fg="yellow")
|
||||||
|
+ click.style(target_name, bold=True)
|
||||||
|
+ click.style(f" -> {rel_dest}", dim=True)
|
||||||
|
)
|
||||||
|
else:
|
||||||
target_path.unlink()
|
target_path.unlink()
|
||||||
click.echo(click.style(" rm ", fg="red") + target_name)
|
click.echo(click.style(" rm ", fg="red") + target_name)
|
||||||
removed += 1
|
|
||||||
# Track parent dirs that may now be empty.
|
removed += 1
|
||||||
parent = target_path.parent
|
# Track parent dirs that may now be empty.
|
||||||
if parent != project_dir:
|
parent = target_path.parent
|
||||||
dirs_to_check.add(parent)
|
if parent != project_dir:
|
||||||
|
dirs_to_check.add(parent)
|
||||||
|
|
||||||
# Remove empty subdirectories left behind, bottom-up.
|
# Remove empty subdirectories left behind, bottom-up.
|
||||||
for d in sorted(dirs_to_check, key=lambda p: len(p.parts), reverse=True):
|
for d in sorted(dirs_to_check, key=lambda p: len(p.parts), reverse=True):
|
||||||
|
|
@ -220,7 +236,8 @@ def clean() -> None:
|
||||||
state_path.unlink()
|
state_path.unlink()
|
||||||
click.echo(click.style(" rm ", fg="red") + state_name)
|
click.echo(click.style(" rm ", fg="red") + state_name)
|
||||||
|
|
||||||
click.echo(click.style(f"\nCleaned {removed} artifact(s)", bold=True))
|
verb = "Archived" if archive_folder is not None else "Cleaned"
|
||||||
|
click.echo(click.style(f"\n{verb} {removed} artifact(s)", bold=True))
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,7 @@ class ProjectConfig(BaseModel):
|
||||||
"""Top-level configuration parsed from ``<name>.hokusai.yaml``."""
|
"""Top-level configuration parsed from ``<name>.hokusai.yaml``."""
|
||||||
|
|
||||||
defaults: Defaults = Defaults()
|
defaults: Defaults = Defaults()
|
||||||
|
archive_folder: str | None = None
|
||||||
targets: dict[str, TargetConfig]
|
targets: dict[str, TargetConfig]
|
||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
|
|
|
||||||
75
tests/test_archive.py
Normal file
75
tests/test_archive.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
"""Tests for hokusai.archive."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from hokusai.archive import archive_file
|
||||||
|
|
||||||
|
|
||||||
|
class TestArchiveFile:
|
||||||
|
"""Test the archive_file helper."""
|
||||||
|
|
||||||
|
def test_archives_with_01_suffix(self, tmp_path: Path) -> None:
|
||||||
|
src = tmp_path / "image.jpg"
|
||||||
|
_ = src.write_text("v1")
|
||||||
|
|
||||||
|
dest = archive_file(src, tmp_path, "archive")
|
||||||
|
|
||||||
|
assert dest is not None
|
||||||
|
assert dest == tmp_path / "archive" / "image.01.jpg"
|
||||||
|
assert dest.read_text() == "v1"
|
||||||
|
assert not src.exists()
|
||||||
|
|
||||||
|
def test_increments_number(self, tmp_path: Path) -> None:
|
||||||
|
archive_dir = tmp_path / "archive"
|
||||||
|
archive_dir.mkdir()
|
||||||
|
_ = (archive_dir / "image.01.jpg").write_text("old")
|
||||||
|
|
||||||
|
src = tmp_path / "image.jpg"
|
||||||
|
_ = src.write_text("v2")
|
||||||
|
|
||||||
|
dest = archive_file(src, tmp_path, "archive")
|
||||||
|
|
||||||
|
assert dest is not None
|
||||||
|
assert dest == archive_dir / "image.02.jpg"
|
||||||
|
assert dest.read_text() == "v2"
|
||||||
|
|
||||||
|
def test_preserves_subfolder_structure(self, tmp_path: Path) -> None:
|
||||||
|
sub = tmp_path / "img"
|
||||||
|
sub.mkdir()
|
||||||
|
src = sub / "photo.png"
|
||||||
|
_ = src.write_text("data")
|
||||||
|
|
||||||
|
dest = archive_file(src, tmp_path, "archive")
|
||||||
|
|
||||||
|
assert dest is not None
|
||||||
|
assert dest == tmp_path / "archive" / "img" / "photo.01.png"
|
||||||
|
|
||||||
|
def test_returns_none_for_missing_file(self, tmp_path: Path) -> None:
|
||||||
|
src = tmp_path / "nonexistent.txt"
|
||||||
|
assert archive_file(src, tmp_path, "archive") is None
|
||||||
|
|
||||||
|
def test_creates_archive_dir(self, tmp_path: Path) -> None:
|
||||||
|
src = tmp_path / "file.txt"
|
||||||
|
_ = src.write_text("content")
|
||||||
|
|
||||||
|
dest = archive_file(src, tmp_path, "my_archive")
|
||||||
|
|
||||||
|
assert dest is not None
|
||||||
|
assert (tmp_path / "my_archive").is_dir()
|
||||||
|
|
||||||
|
def test_skips_existing_numbers(self, tmp_path: Path) -> None:
|
||||||
|
archive_dir = tmp_path / "archive"
|
||||||
|
archive_dir.mkdir()
|
||||||
|
_ = (archive_dir / "x.01.txt").write_text("a")
|
||||||
|
_ = (archive_dir / "x.02.txt").write_text("b")
|
||||||
|
_ = (archive_dir / "x.03.txt").write_text("c")
|
||||||
|
|
||||||
|
src = tmp_path / "x.txt"
|
||||||
|
_ = src.write_text("d")
|
||||||
|
|
||||||
|
dest = archive_file(src, tmp_path, "archive")
|
||||||
|
|
||||||
|
assert dest is not None
|
||||||
|
assert dest == archive_dir / "x.04.txt"
|
||||||
|
|
@ -412,6 +412,66 @@ class TestRunBuild:
|
||||||
assert result.failed == {}
|
assert result.failed == {}
|
||||||
|
|
||||||
|
|
||||||
|
class TestArchiveOnBuild:
|
||||||
|
"""Test that build archives existing artifacts when archive_folder is set."""
|
||||||
|
|
||||||
|
async def test_build_archives_existing_file(
|
||||||
|
self, project_dir: Path, write_config: WriteConfig
|
||||||
|
) -> None:
|
||||||
|
config = write_config(
|
||||||
|
{
|
||||||
|
"archive_folder": "archive",
|
||||||
|
"targets": {"out.txt": {"prompt": "version 1"}},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
with patch("hokusai.builder._create_providers", return_value=_fake_providers()):
|
||||||
|
r1 = await run_build(config, project_dir, _PROJECT)
|
||||||
|
assert r1.built == ["out.txt"]
|
||||||
|
v1_content = (project_dir / "out.txt").read_text()
|
||||||
|
|
||||||
|
config2 = write_config(
|
||||||
|
{
|
||||||
|
"archive_folder": "archive",
|
||||||
|
"targets": {"out.txt": {"prompt": "version 2"}},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
r2 = await run_build(config2, project_dir, _PROJECT)
|
||||||
|
assert r2.built == ["out.txt"]
|
||||||
|
|
||||||
|
# v1 should be archived, v2 should be current
|
||||||
|
archived = project_dir / "archive" / "out.01.txt"
|
||||||
|
assert archived.exists()
|
||||||
|
assert archived.read_text() == v1_content
|
||||||
|
assert (project_dir / "out.txt").exists()
|
||||||
|
|
||||||
|
async def test_build_no_archive_without_setting(
|
||||||
|
self, project_dir: Path, simple_text_config: ProjectConfig
|
||||||
|
) -> None:
|
||||||
|
with patch("hokusai.builder._create_providers", return_value=_fake_providers()):
|
||||||
|
r1 = await run_build(simple_text_config, project_dir, _PROJECT)
|
||||||
|
assert r1.built == ["output.txt"]
|
||||||
|
|
||||||
|
assert not (project_dir / "archive").exists()
|
||||||
|
|
||||||
|
async def test_build_archives_increment(
|
||||||
|
self, project_dir: Path, write_config: WriteConfig
|
||||||
|
) -> None:
|
||||||
|
config_raw: dict[str, object] = {
|
||||||
|
"archive_folder": "archive",
|
||||||
|
"targets": {"out.txt": {"prompt": "v"}},
|
||||||
|
}
|
||||||
|
with patch("hokusai.builder._create_providers", return_value=_fake_providers()):
|
||||||
|
for i in range(1, 4):
|
||||||
|
cfg = write_config(
|
||||||
|
{**config_raw, "targets": {"out.txt": {"prompt": f"v{i}"}}}
|
||||||
|
)
|
||||||
|
_ = await run_build(cfg, project_dir, _PROJECT)
|
||||||
|
|
||||||
|
assert (project_dir / "archive" / "out.01.txt").exists()
|
||||||
|
assert (project_dir / "archive" / "out.02.txt").exists()
|
||||||
|
assert not (project_dir / "archive" / "out.03.txt").exists()
|
||||||
|
|
||||||
|
|
||||||
class TestDownloadTarget:
|
class TestDownloadTarget:
|
||||||
"""Tests for download-type targets that fetch files from URLs."""
|
"""Tests for download-type targets that fetch files from URLs."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -174,6 +174,69 @@ class TestCleanCommand:
|
||||||
assert "Cleaned 1 artifact(s)" in result.output
|
assert "Cleaned 1 artifact(s)" in result.output
|
||||||
assert not (cli_project / "output.txt").exists()
|
assert not (cli_project / "output.txt").exists()
|
||||||
|
|
||||||
|
def test_clean_archives_when_archive_folder_set(self, tmp_path: Path) -> None:
|
||||||
|
config = {
|
||||||
|
"archive_folder": "archive",
|
||||||
|
"targets": {
|
||||||
|
"output.txt": {"prompt": "Generate text"},
|
||||||
|
"image.png": {"prompt": "Generate image"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_ = (tmp_path / "project.hokusai.yaml").write_text(
|
||||||
|
yaml.dump(config, default_flow_style=False)
|
||||||
|
)
|
||||||
|
_ = (tmp_path / "output.txt").write_text("generated text")
|
||||||
|
_ = (tmp_path / "image.png").write_bytes(b"\x89PNG")
|
||||||
|
|
||||||
|
with patch("hokusai.cli.Path") as mock_path_cls:
|
||||||
|
mock_path_cls.cwd.return_value = tmp_path
|
||||||
|
result = runner.invoke(app, ["clean"])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert "Archived 2 artifact(s)" in result.output
|
||||||
|
assert "mv" in result.output
|
||||||
|
assert not (tmp_path / "output.txt").exists()
|
||||||
|
assert not (tmp_path / "image.png").exists()
|
||||||
|
assert (tmp_path / "archive" / "output.01.txt").read_text() == "generated text"
|
||||||
|
assert (tmp_path / "archive" / "image.01.png").read_bytes() == b"\x89PNG"
|
||||||
|
|
||||||
|
def test_clean_archive_preserves_subfolders(self, tmp_path: Path) -> None:
|
||||||
|
config = {
|
||||||
|
"archive_folder": "archive",
|
||||||
|
"targets": {"img/photo.png": {"prompt": "photo"}},
|
||||||
|
}
|
||||||
|
_ = (tmp_path / "project.hokusai.yaml").write_text(
|
||||||
|
yaml.dump(config, default_flow_style=False)
|
||||||
|
)
|
||||||
|
(tmp_path / "img").mkdir()
|
||||||
|
_ = (tmp_path / "img" / "photo.png").write_bytes(b"img")
|
||||||
|
|
||||||
|
with patch("hokusai.cli.Path") as mock_path_cls:
|
||||||
|
mock_path_cls.cwd.return_value = tmp_path
|
||||||
|
result = runner.invoke(app, ["clean"])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (tmp_path / "archive" / "img" / "photo.01.png").exists()
|
||||||
|
|
||||||
|
def test_clean_archive_still_deletes_state(self, tmp_path: Path) -> None:
|
||||||
|
config = {
|
||||||
|
"archive_folder": "archive",
|
||||||
|
"targets": {"output.txt": {"prompt": "text"}},
|
||||||
|
}
|
||||||
|
_ = (tmp_path / "project.hokusai.yaml").write_text(
|
||||||
|
yaml.dump(config, default_flow_style=False)
|
||||||
|
)
|
||||||
|
_ = (tmp_path / "output.txt").write_text("data")
|
||||||
|
state_file = ".project.hokusai-state.yaml"
|
||||||
|
_ = (tmp_path / state_file).write_text("targets: {}")
|
||||||
|
|
||||||
|
with patch("hokusai.cli.Path") as mock_path_cls:
|
||||||
|
mock_path_cls.cwd.return_value = tmp_path
|
||||||
|
result = runner.invoke(app, ["clean"])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert not (tmp_path / state_file).exists()
|
||||||
|
|
||||||
|
|
||||||
class TestGraphCommand:
|
class TestGraphCommand:
|
||||||
"""Test the graph CLI command."""
|
"""Test the graph CLI command."""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue