diff --git a/src/streamer/localize/__init__.py b/src/streamer/localize/__init__.py index 31f4f71..97456a2 100644 --- a/src/streamer/localize/__init__.py +++ b/src/streamer/localize/__init__.py @@ -1,6 +1,6 @@ from .localize import localize_stream_file -from .repostory_configuration import RepositoryConfiguration from .localized_shard import LocalizedShard +from .repository_configuration import RepositoryConfiguration __all__ = [ "RepositoryConfiguration", diff --git a/src/streamer/localize/localize.py b/src/streamer/localize/localize.py index adb94a2..5875c87 100644 --- a/src/streamer/localize/localize.py +++ b/src/streamer/localize/localize.py @@ -7,7 +7,7 @@ from .extract_datetime import ( extract_datetime_from_marker_list, ) from .localized_shard import LocalizedShard -from .repostory_configuration import RepositoryConfiguration +from .repository_configuration import RepositoryConfiguration def localize_shard( @@ -25,7 +25,7 @@ def localize_shard( if marker in config.markers: marker_definition = config.markers[marker] for placement in marker_definition.placements: - if set(placement.if_with) <= set(shard.markers): + if placement.if_with <= set(shard.markers): dimension = config.dimensions[placement.dimension] value = placement.value or marker diff --git a/src/streamer/localize/repository_configuration.py b/src/streamer/localize/repository_configuration.py new file mode 100644 index 0000000..a0c7814 --- /dev/null +++ b/src/streamer/localize/repository_configuration.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel + + +class Dimension(BaseModel): + display_name: str + comment: Optional[str] = None + propagate: bool = False + + +class MarkerPlacement(BaseModel): + if_with: set[str] = set() + dimension: str + value: str | None = None + + +class Marker(BaseModel): + display_name: str + placements: list[MarkerPlacement] = [] + + +class RepositoryConfiguration(BaseModel): + dimensions: dict[str, Dimension] + markers: dict[str, Marker] + + +def merge_single_dimension(base: Dimension, second: Dimension) -> Dimension: + second_fields_set = getattr(second, "model_fields_set", set()) + + return Dimension( + display_name=second.display_name or base.display_name, + comment=base.comment if second.comment is None else second.comment, + propagate=second.propagate + if "propagate" in second_fields_set + else base.propagate, + ) + + +def merge_dimensions( + base: dict[str, Dimension], second: dict[str, Dimension] +) -> dict[str, Dimension]: + merged: dict[str, Dimension] = dict(base) + for key, second_dimension in second.items(): + if key in merged: + merged[key] = merge_single_dimension(merged[key], second_dimension) + else: + merged[key] = second_dimension + return merged + + +def _placement_identity(p: MarkerPlacement) -> tuple[frozenset[str], str]: + return (frozenset(p.if_with), p.dimension) + + +def merge_single_marker(base: Marker, second: Marker) -> Marker: + merged_display_name = second.display_name or base.display_name + + merged_placements: list[MarkerPlacement] = [] + seen: dict[tuple[frozenset[str], str], int] = {} + + for placement in base.placements: + ident = _placement_identity(placement) + seen[ident] = len(merged_placements) + merged_placements.append(placement) + + for placement in second.placements: + ident = _placement_identity(placement) + if ident in seen: + merged_placements[seen[ident]] = placement + else: + seen[ident] = len(merged_placements) + merged_placements.append(placement) + + return Marker(display_name=merged_display_name, placements=merged_placements) + + +def merge_markers( + base: dict[str, Marker], second: dict[str, Marker] +) -> dict[str, Marker]: + merged: dict[str, Marker] = dict(base) + for key, second_marker in second.items(): + if key in merged: + merged[key] = merge_single_marker(merged[key], second_marker) + else: + merged[key] = second_marker + return merged + + +def merge_repository_configuration( + base: RepositoryConfiguration, second: RepositoryConfiguration +) -> RepositoryConfiguration: + return RepositoryConfiguration( + dimensions=merge_dimensions(base.dimensions, second.dimensions), + markers=merge_markers(base.markers, second.markers), + ) + + +__all__ = [ + "Dimension", + "Marker", + "MarkerPlacement", + "RepositoryConfiguration", + "merge_repository_configuration", +] diff --git a/src/streamer/localize/repostory_configuration.py b/src/streamer/localize/repostory_configuration.py deleted file mode 100644 index 3e37651..0000000 --- a/src/streamer/localize/repostory_configuration.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel - - -class Dimension(BaseModel): - display_name: str - comment: Optional[str] = None - propagate: bool = False - - -class MarkerPlacement(BaseModel): - if_with: list[str] = [] - dimension: str - value: str | None = None - - -class Marker(BaseModel): - display_name: str - placements: list[MarkerPlacement] - - -class RepositoryConfiguration(BaseModel): - dimensions: dict[str, Dimension] - markers: dict[str, Marker] - - -__all__ = ["Dimension", "Marker", "MarkerPlacement", "RepositoryConfiguration"] diff --git a/test/localize/test_repository_configuration_merge.py b/test/localize/test_repository_configuration_merge.py new file mode 100644 index 0000000..f5d345d --- /dev/null +++ b/test/localize/test_repository_configuration_merge.py @@ -0,0 +1,365 @@ +import pytest + +from streamer.localize.repository_configuration import ( + Dimension, + Marker, + MarkerPlacement, + RepositoryConfiguration, + merge_dimensions, + merge_markers, + merge_repository_configuration, + merge_single_dimension, + merge_single_marker, +) + + +class TestMergeSingleDimension: + def test_second_overrides_display_name_when_non_empty(self): + base = Dimension(display_name="Base", comment="c1", propagate=True) + second = Dimension(display_name="Second", comment="c2", propagate=False) + + merged = merge_single_dimension(base, second) + + assert merged.display_name == "Second" + assert merged.comment == "c2" + assert merged.propagate is False + + def test_second_empty_display_name_falls_back_to_base(self): + base = Dimension(display_name="Base", comment="c1", propagate=True) + second = Dimension(display_name="", comment="c2", propagate=False) + + merged = merge_single_dimension(base, second) + + assert merged.display_name == "Base" + assert merged.comment == "c2" + assert merged.propagate is False + + def test_second_comment_none_does_not_erase_base_comment(self): + base = Dimension(display_name="Base", comment="keep", propagate=True) + second = Dimension(display_name="Second", comment=None, propagate=False) + + merged = merge_single_dimension(base, second) + + assert merged.display_name == "Second" + assert merged.comment == "keep" + + def test_second_comment_non_none_overrides_base_comment(self): + base = Dimension(display_name="Base", comment="c1", propagate=True) + second = Dimension(display_name="Second", comment="c2", propagate=True) + + merged = merge_single_dimension(base, second) + + assert merged.comment == "c2" + + def test_second_propagate_overrides_base_when_provided(self): + base = Dimension(display_name="Base", comment="c1", propagate=True) + second = Dimension(display_name="Second", comment="c2", propagate=False) + + merged = merge_single_dimension(base, second) + + assert merged.propagate is False + + def test_propagate_merging_retains_base_when_second_not_provided(self): + base = Dimension(display_name="Base", comment="c1", propagate=True) + second = Dimension(display_name="Second", comment="c2") + + merged = merge_single_dimension(base, second) + + assert merged.propagate is True + + +class TestMergeDimensions: + def test_adds_new_keys_from_second(self): + base = {"a": Dimension(display_name="A", propagate=True)} + second = {"b": Dimension(display_name="B", propagate=False)} + + merged = merge_dimensions(base, second) + + assert set(merged.keys()) == {"a", "b"} + assert merged["a"].display_name == "A" + assert merged["b"].display_name == "B" + + def test_merges_existing_keys(self): + base = {"a": Dimension(display_name="A", comment="c1", propagate=True)} + second = {"a": Dimension(display_name="A2", comment=None, propagate=False)} + + merged = merge_dimensions(base, second) + + assert merged["a"].display_name == "A2" + assert merged["a"].comment == "c1" + assert merged["a"].propagate is False + + def test_does_not_mutate_inputs(self): + base = {"a": Dimension(display_name="A", comment="c1", propagate=True)} + second = {"b": Dimension(display_name="B", comment="c2", propagate=False)} + + merged = merge_dimensions(base, second) + + assert "b" not in base + assert "a" not in second + assert set(merged.keys()) == {"a", "b"} + + +class TestMergeSingleMarker: + def test_second_overrides_display_name_when_non_empty(self): + base = Marker( + display_name="Base", + placements=[MarkerPlacement(dimension="project", value=None)], + ) + second = Marker( + display_name="Second", + placements=[MarkerPlacement(dimension="timesheet", value="coding")], + ) + + merged = merge_single_marker(base, second) + + assert merged.display_name == "Second" + assert merged.placements == [ + MarkerPlacement(dimension="project", value=None, if_with=set()), + MarkerPlacement(dimension="timesheet", value="coding", if_with=set()), + ] + + def test_second_empty_display_name_falls_back_to_base(self): + base = Marker(display_name="Base", placements=[]) + second = Marker(display_name="", placements=[]) + + merged = merge_single_marker(base, second) + + assert merged.display_name == "Base" + + def test_appends_new_placements(self): + base = Marker( + display_name="Base", + placements=[ + MarkerPlacement(dimension="project"), + ], + ) + second = Marker( + display_name="Second", + placements=[ + MarkerPlacement( + if_with={"Timesheet"}, dimension="timesheet", value="x" + ), + ], + ) + + merged = merge_single_marker(base, second) + + assert merged.placements == [ + MarkerPlacement(dimension="project"), + MarkerPlacement(if_with={"Timesheet"}, dimension="timesheet", value="x"), + ] + + def test_deduplicates_by_identity_and_second_overrides_base(self): + base = Marker( + display_name="Base", + placements=[ + MarkerPlacement(if_with={"A"}, dimension="d", value="v"), + MarkerPlacement(if_with={"B"}, dimension="d", value="v2"), + ], + ) + second = Marker( + display_name="Second", + placements=[ + MarkerPlacement(if_with={"A"}, dimension="d", value="v"), + MarkerPlacement(if_with={"C"}, dimension="d", value="v3"), + ], + ) + + merged = merge_single_marker(base, second) + + assert merged.placements == [ + MarkerPlacement(if_with={"A"}, dimension="d", value="v"), + MarkerPlacement(if_with={"B"}, dimension="d", value="v2"), + MarkerPlacement(if_with={"C"}, dimension="d", value="v3"), + ] + + def test_identity_is_order_insensitive_for_if_with(self): + base = Marker( + display_name="Base", + placements=[MarkerPlacement(if_with={"A", "B"}, dimension="d", value="v")], + ) + second = Marker( + display_name="Second", + placements=[MarkerPlacement(if_with={"B", "A"}, dimension="d", value="v2")], + ) + + merged = merge_single_marker(base, second) + + # With `if_with` as a set, identity is order-insensitive; second overrides base. + assert merged.placements == [ + MarkerPlacement(if_with={"A", "B"}, dimension="d", value="v2"), + ] + + +class TestMergeMarkers: + def test_adds_new_marker_keys_from_second(self): + base = {"M1": Marker(display_name="M1", placements=[])} + second = {"M2": Marker(display_name="M2", placements=[])} + + merged = merge_markers(base, second) + + assert set(merged.keys()) == {"M1", "M2"} + + def test_merges_existing_marker_keys(self): + base = { + "M": Marker( + display_name="Base", + placements=[MarkerPlacement(dimension="project")], + ) + } + second = { + "M": Marker( + display_name="Second", + placements=[ + MarkerPlacement( + if_with={"Timesheet"}, dimension="timesheet", value="coding" + ) + ], + ) + } + + merged = merge_markers(base, second) + + assert merged["M"].display_name == "Second" + assert merged["M"].placements == [ + MarkerPlacement(dimension="project", value=None, if_with=set()), + MarkerPlacement( + if_with={"Timesheet"}, dimension="timesheet", value="coding" + ), + ] + + def test_does_not_mutate_inputs(self): + base = {"M1": Marker(display_name="M1", placements=[])} + second = {"M2": Marker(display_name="M2", placements=[])} + + merged = merge_markers(base, second) + + assert "M2" not in base + assert "M1" not in second + assert set(merged.keys()) == {"M1", "M2"} + + +class TestMergeRepositoryConfiguration: + def test_merges_dimensions_and_markers(self): + base = RepositoryConfiguration( + dimensions={ + "project": Dimension( + display_name="Project", comment="c1", propagate=True + ), + "moment": Dimension( + display_name="Moment", comment="c2", propagate=True + ), + }, + markers={ + "Streamer": Marker( + display_name="Streamer", + placements=[MarkerPlacement(dimension="project")], + ) + }, + ) + + second = RepositoryConfiguration( + dimensions={ + "project": Dimension(display_name="Project2", propagate=False), + "timesheet": Dimension( + display_name="Timesheet", comment="c3", propagate=False + ), + }, + markers={ + "Streamer": Marker( + display_name="Streamer2", + placements=[ + MarkerPlacement( + if_with={"Timesheet"}, dimension="timesheet", value="coding" + ) + ], + ), + "JobHunting": Marker( + display_name="JobHunting", + placements=[MarkerPlacement(dimension="project")], + ), + }, + ) + + merged = merge_repository_configuration(base, second) + + assert set(merged.dimensions.keys()) == {"project", "moment", "timesheet"} + assert merged.dimensions["project"].display_name == "Project2" + assert merged.dimensions["project"].comment == "c1" + assert merged.dimensions["project"].propagate is False + assert merged.dimensions["moment"].display_name == "Moment" + assert merged.dimensions["timesheet"].display_name == "Timesheet" + + assert set(merged.markers.keys()) == {"Streamer", "JobHunting"} + assert merged.markers["Streamer"].display_name == "Streamer2" + assert merged.markers["Streamer"].placements == [ + MarkerPlacement(dimension="project", value=None, if_with=set()), + MarkerPlacement( + if_with={"Timesheet"}, dimension="timesheet", value="coding" + ), + ] + assert merged.markers["JobHunting"].placements == [ + MarkerPlacement(dimension="project", value=None, if_with=set()) + ] + + def test_does_not_mutate_base_or_second(self): + base = RepositoryConfiguration( + dimensions={"a": Dimension(display_name="A", propagate=True)}, + markers={"M": Marker(display_name="M", placements=[])}, + ) + second = RepositoryConfiguration( + dimensions={"b": Dimension(display_name="B", propagate=False)}, + markers={"N": Marker(display_name="N", placements=[])}, + ) + + _ = merge_repository_configuration(base, second) + + assert set(base.dimensions.keys()) == {"a"} + assert set(second.dimensions.keys()) == {"b"} + assert set(base.markers.keys()) == {"M"} + assert set(second.markers.keys()) == {"N"} + + def test_merge_is_associative_for_non_conflicting_inputs(self): + a = RepositoryConfiguration( + dimensions={"d1": Dimension(display_name="D1", propagate=True)}, + markers={"m1": Marker(display_name="M1", placements=[])}, + ) + b = RepositoryConfiguration( + dimensions={"d2": Dimension(display_name="D2", propagate=False)}, + markers={"m2": Marker(display_name="M2", placements=[])}, + ) + c = RepositoryConfiguration( + dimensions={"d3": Dimension(display_name="D3", propagate=False)}, + markers={"m3": Marker(display_name="M3", placements=[])}, + ) + + left = merge_repository_configuration(merge_repository_configuration(a, b), c) + right = merge_repository_configuration(a, merge_repository_configuration(b, c)) + + assert left == right + assert set(left.dimensions.keys()) == {"d1", "d2", "d3"} + assert set(left.markers.keys()) == {"m1", "m2", "m3"} + + +@pytest.mark.parametrize( + ("base", "second", "expected_propagate"), + [ + ( + RepositoryConfiguration( + dimensions={"d": Dimension(display_name="D", propagate=True)}, + markers={}, + ), + RepositoryConfiguration( + dimensions={"d": Dimension(display_name="D2")}, + markers={}, + ), + True, + ) + ], +) +def test_merge_repository_configuration_propagate_preserves_base_when_omitted( + base, second, expected_propagate +): + merged = merge_repository_configuration(base, second) + assert merged.dimensions["d"].propagate is expected_propagate diff --git a/test/test_localize.py b/test/test_localize.py index 5f1ed65..d539a62 100644 --- a/test/test_localize.py +++ b/test/test_localize.py @@ -2,7 +2,7 @@ from datetime import datetime from streamer.localize.localize import localize_stream_file from streamer.localize.localized_shard import LocalizedShard -from streamer.localize.repostory_configuration import ( +from streamer.localize.repository_configuration import ( Dimension, Marker, MarkerPlacement, @@ -34,7 +34,7 @@ repository_configuration = RepositoryConfiguration( placements=[ MarkerPlacement(dimension="project"), MarkerPlacement( - if_with=["Timesheet"], dimension="timesheet", value="coding" + if_with={"Timesheet"}, dimension="timesheet", value="coding" ), ], ),