feat: extract date & time from tags

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
Konstantin Fickel 2026-01-31 17:15:01 +01:00
parent ee91b2e8db
commit d5b1541436
Signed by: kfickel
GPG key ID: A793722F9933C1A5
6 changed files with 246 additions and 22 deletions

View file

@ -1,9 +1,9 @@
from datetime import datetime
import re
import os
import re
from datetime import date, datetime, time
def extract_date_from_file_name(file_name: str) -> datetime | None:
def extract_datetime_from_file_name(file_name: str) -> datetime | None:
FILE_NAME_REGEX = r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+.md$"
base_name = os.path.basename(file_name)
match = re.match(FILE_NAME_REGEX, base_name)
@ -17,4 +17,76 @@ def extract_date_from_file_name(file_name: str) -> datetime | None:
return None
__all__ = ["extract_date_from_file_name"]
def extract_datetime_from_marker(marker: str) -> datetime | None:
"""
Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
Returns:
Parsed datetime if the format is fulfilled and values are valid, else None.
"""
if not re.fullmatch(r"\d{14}", marker or ""):
return None
try:
return datetime.strptime(marker, "%Y%m%d%H%M%S")
except ValueError:
return None
def extract_date_from_marker(marker: str) -> date | None:
"""
Extract a date from a marker string in the exact format: YYYYMMDD.
Returns:
Parsed date if the format is fulfilled and values are valid, else None.
"""
if not re.fullmatch(r"\d{8}", marker or ""):
return None
try:
return datetime.strptime(marker, "%Y%m%d").date()
except ValueError:
return None
def extract_time_from_marker(marker: str) -> time | None: # noqa: F821
"""
Extract a time from a marker string in the exact format: HHMMSS.
Returns:
Parsed time if the format is fulfilled and values are valid, else None.
"""
if not re.fullmatch(r"\d{6}", marker or ""):
return None
try:
return datetime.strptime(marker, "%H%M%S").time()
except ValueError:
return None
def extract_datetime_from_marker_list(markers: list[str], inherited_datetime: datetime):
shard_time: time | None = None
shard_date: date | None = None
for marker in markers[::-1]:
if parsed_time := extract_time_from_marker(marker):
shard_time = parsed_time
if parsed_date := extract_date_from_marker(marker):
shard_date = parsed_date
if parsed_datetime := extract_datetime_from_marker(marker):
shard_date = parsed_datetime.date()
shard_time = parsed_datetime.time()
if shard_date and not shard_time:
return datetime.combine(shard_date, time(0, 0, 0))
return datetime.combine(
shard_date or inherited_datetime.date(), shard_time or inherited_datetime.time()
)
__all__ = [
"extract_datetime_from_file_name",
"extract_datetime_from_marker",
"extract_date_from_marker",
"extract_time_from_marker",
"extract_datetime_from_marker_list",
]

View file

@ -1,19 +1,29 @@
from datetime import datetime
from streamer.parse.shard import Shard, StreamFile
from .repostory_configuration import RepositoryConfiguration
from .extract_datetime import (
extract_datetime_from_file_name,
extract_datetime_from_marker_list,
)
from .localized_shard import LocalizedShard
from .extract_datetime import extract_date_from_file_name
from .repostory_configuration import RepositoryConfiguration
def localize_shard(
shard: Shard, config: RepositoryConfiguration, propagated: dict[str, str]
shard: Shard,
config: RepositoryConfiguration,
propagated: dict[str, str],
moment: datetime,
) -> LocalizedShard:
position = {**propagated}
private_position: dict[str, str] = {}
adjusted_moment: datetime = extract_datetime_from_marker_list(shard.markers, moment)
for marker in shard.markers:
normalized_marker = marker.lower()
if marker_definition := config.markers[normalized_marker]:
dimension_name = marker_definition.dimension
dimension = config.dimensions[marker_definition.dimension]
@ -23,24 +33,30 @@ def localize_shard(
else:
private_position[dimension_name] = normalized_marker
children = [localize_shard(child, config, position) for child in shard.children]
children = [
localize_shard(child, config, position, adjusted_moment)
for child in shard.children
]
position.update(private_position)
return LocalizedShard(
**shard.model_dump(exclude={"children"}), location=position, children=children
**shard.model_dump(exclude={"children"}),
location=position,
children=children,
moment=adjusted_moment,
)
def localize_stream_file(
stream_file: StreamFile, config: RepositoryConfiguration
) -> LocalizedShard | None:
shard_date = extract_date_from_file_name(stream_file.filename)
shard_date = extract_datetime_from_file_name(stream_file.filename)
if not shard_date or not stream_file.shard:
raise ValueError("Could not extract date")
return localize_shard(stream_file.shard, config, {"moment": shard_date.isoformat()})
return localize_shard(stream_file.shard, config, {}, shard_date)
__all__ = ["localize_stream_file"]

View file

@ -1,8 +1,12 @@
from __future__ import annotations
from datetime import datetime
from streamer.parse.shard import Shard
class LocalizedShard(Shard):
moment: datetime
location: dict[str, str]
children: list[LocalizedShard] = [] # pyright: ignore[reportIncompatibleVariableOverride]

View file

@ -1,3 +1,5 @@
from __future__ import annotations
from streamer.parse.shard import Shard, StreamFile
@ -7,7 +9,7 @@ class ShardWithMarkdown(Shard):
class StreamFileWithMarkdown(StreamFile):
shard: ShardWithMarkdown | None = None # pyright: ignore[reportIncompatibleVariableOverride]
shard: ShardWithMarkdown | None = None
def attach_markdown_shard(shard: Shard, markdown_text: str) -> ShardWithMarkdown:
@ -15,7 +17,9 @@ def attach_markdown_shard(shard: Shard, markdown_text: str) -> ShardWithMarkdown
markdown_content = "\n".join(lines[shard.start_line - 1 : shard.end_line])
return ShardWithMarkdown(
**shard.model_dump(exclude=["children"]),
children=map(lambda child: attach_markdown_shard(child, markdown_text), shard.children),
children=[
attach_markdown_shard(child, markdown_text) for child in shard.children
],
markdown_content=markdown_content,
)

View file

@ -1,32 +1,157 @@
from datetime import datetime
from streamer.localize.extract_datetime import extract_date_from_file_name
from datetime import date, datetime, time
from streamer.localize.extract_datetime import (
extract_date_from_marker,
extract_datetime_from_file_name,
extract_datetime_from_marker,
extract_datetime_from_marker_list,
extract_time_from_marker,
)
class TestExtractDateTime:
def test_extract_date_from_file_name_valid(self):
file_name = "20230101-123456 Some Text.md"
assert datetime(2023, 1, 1, 12, 34, 56) == extract_date_from_file_name(
assert datetime(2023, 1, 1, 12, 34, 56) == extract_datetime_from_file_name(
file_name
)
def test_extract_date_from_file_name_invalid(self):
file_name = "invalid-file-name.md"
assert extract_date_from_file_name(file_name) is None
assert extract_datetime_from_file_name(file_name) is None
def test_extract_date_from_file_name_without_time(self):
file_name = "20230101 Some Text.md"
assert datetime(2023, 1, 1, 0, 0, 0) == extract_date_from_file_name(file_name)
assert datetime(2023, 1, 1, 0, 0, 0) == extract_datetime_from_file_name(
file_name
)
def test_extract_date_from_file_name_short_time(self):
file_name = "20230101-1234 Some Text.md"
assert datetime(2023, 1, 1, 12, 34, 0) == extract_date_from_file_name(file_name)
assert datetime(2023, 1, 1, 12, 34, 0) == extract_datetime_from_file_name(
file_name
)
def test_extract_date_from_file_name_empty_string(self):
file_name = ""
assert extract_date_from_file_name(file_name) is None
assert extract_datetime_from_file_name(file_name) is None
def test_extract_date_from_file_name_with_full_path(self):
file_name = "/path/to/20230101-123456 Some Text.md"
assert datetime(2023, 1, 1, 12, 34, 56) == extract_date_from_file_name(
assert datetime(2023, 1, 1, 12, 34, 56) == extract_datetime_from_file_name(
file_name
)
class TestExtractMarkerDateTime:
def test_extract_datetime_from_marker_valid(self):
marker = "20250101150000"
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker(marker)
def test_extract_datetime_from_marker_invalid_format(self):
assert extract_datetime_from_marker("2025010115000") is None # too short
assert extract_datetime_from_marker("202501011500000") is None # too long
assert extract_datetime_from_marker("2025-01-01T150000") is None # separators
assert extract_datetime_from_marker("2025010115000a") is None # non-digit
assert extract_datetime_from_marker("") is None
def test_extract_datetime_from_marker_invalid_values(self):
assert extract_datetime_from_marker("20250230120000") is None # Feb 30
assert extract_datetime_from_marker("20250101126000") is None # minute 60
assert extract_datetime_from_marker("20250101240000") is None # hour 24
class TestExtractMarkerDate:
def test_extract_date_from_marker_valid(self):
marker = "20250101"
assert date(2025, 1, 1) == extract_date_from_marker(marker)
def test_extract_date_from_marker_invalid_format(self):
assert extract_date_from_marker("2025010") is None # too short
assert extract_date_from_marker("202501011") is None # too long
assert extract_date_from_marker("2025-01-01") is None # separators
assert extract_date_from_marker("2025010a") is None # non-digit
assert extract_date_from_marker("") is None
def test_extract_date_from_marker_invalid_values(self):
assert extract_date_from_marker("20250230") is None # Feb 30
assert extract_date_from_marker("20251301") is None # month 13
assert extract_date_from_marker("20250132") is None # day 32
class TestExtractMarkerTime:
def test_extract_time_from_marker_valid(self):
marker = "150000"
assert time(15, 0, 0) == extract_time_from_marker(marker)
def test_extract_time_from_marker_invalid_format(self):
assert extract_time_from_marker("15000") is None # too short
assert extract_time_from_marker("1500000") is None # too long
assert extract_time_from_marker("15:00:00") is None # separators
assert extract_time_from_marker("15000a") is None # non-digit
assert extract_time_from_marker("") is None
def test_extract_time_from_marker_invalid_values(self):
assert extract_time_from_marker("240000") is None # hour 24
assert extract_time_from_marker("156000") is None # minute 60
assert extract_time_from_marker("150060") is None # second 60
class TestExtractDateTimeFromMarkerList:
def test_no_markers_inherits_datetime(self):
inherited = datetime(2025, 1, 2, 3, 4, 5)
assert inherited == extract_datetime_from_marker_list([], inherited)
def test_unrelated_markers_inherits_datetime(self):
inherited = datetime(2025, 1, 2, 3, 4, 5)
markers = ["not-a-marker", "2025-01-01", "1500", "1234567"]
assert inherited == extract_datetime_from_marker_list(markers, inherited)
def test_date_only_marker_sets_midnight(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["20250101"]
assert datetime(2025, 1, 1, 0, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)
def test_time_only_marker_inherits_date(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["150000"]
assert datetime(2025, 6, 7, 15, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)
def test_datetime_marker_overrides_both_date_and_time(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["20250101150000"]
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)
def test_combined_date_and_time_markers(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["20250101", "150000"]
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)
def test_first_marker_wins_when_multiple_dates_or_times(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["20250101", "150000", "20250102", "160000"]
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)
def test_last_separated_date_and_time_win(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["20250101", "150000", "20250102160000"]
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)
def test_invalid_date_or_time_markers_are_ignored(self):
inherited = datetime(2025, 6, 7, 8, 9, 10)
markers = ["20251301", "240000", "20250101", "150000"]
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
markers, inherited
)

View file

@ -1,3 +1,5 @@
from datetime import datetime
from streamer.localize.localize import localize_stream_file
from streamer.localize.localized_shard import LocalizedShard
from streamer.localize.repostory_configuration import (
@ -37,10 +39,11 @@ class TestExtractDateTime:
assert localize_stream_file(
stream_file, repository_configuration
) == LocalizedShard(
moment=datetime(2025, 6, 22, 12, 10, 0, 0),
markers=["Streamer"],
tags=[],
start_line=1,
end_line=1,
children=[],
location={"moment": "2025-06-22T12:10:00", "project": "streamer"},
location={"project": "streamer"},
)