feat: extract date & time from tags
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
parent
ee91b2e8db
commit
d5b1541436
6 changed files with 246 additions and 22 deletions
|
|
@ -1,9 +1,9 @@
|
||||||
from datetime import datetime
|
|
||||||
import re
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import date, datetime, time
|
||||||
|
|
||||||
|
|
||||||
def extract_date_from_file_name(file_name: str) -> datetime | None:
|
def extract_datetime_from_file_name(file_name: str) -> datetime | None:
|
||||||
FILE_NAME_REGEX = r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+.md$"
|
FILE_NAME_REGEX = r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+.md$"
|
||||||
base_name = os.path.basename(file_name)
|
base_name = os.path.basename(file_name)
|
||||||
match = re.match(FILE_NAME_REGEX, base_name)
|
match = re.match(FILE_NAME_REGEX, base_name)
|
||||||
|
|
@ -17,4 +17,76 @@ def extract_date_from_file_name(file_name: str) -> datetime | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["extract_date_from_file_name"]
|
def extract_datetime_from_marker(marker: str) -> datetime | None:
|
||||||
|
"""
|
||||||
|
Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed datetime if the format is fulfilled and values are valid, else None.
|
||||||
|
"""
|
||||||
|
if not re.fullmatch(r"\d{14}", marker or ""):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.strptime(marker, "%Y%m%d%H%M%S")
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_date_from_marker(marker: str) -> date | None:
|
||||||
|
"""
|
||||||
|
Extract a date from a marker string in the exact format: YYYYMMDD.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed date if the format is fulfilled and values are valid, else None.
|
||||||
|
"""
|
||||||
|
if not re.fullmatch(r"\d{8}", marker or ""):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.strptime(marker, "%Y%m%d").date()
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_time_from_marker(marker: str) -> time | None: # noqa: F821
|
||||||
|
"""
|
||||||
|
Extract a time from a marker string in the exact format: HHMMSS.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed time if the format is fulfilled and values are valid, else None.
|
||||||
|
"""
|
||||||
|
if not re.fullmatch(r"\d{6}", marker or ""):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.strptime(marker, "%H%M%S").time()
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_datetime_from_marker_list(markers: list[str], inherited_datetime: datetime):
|
||||||
|
shard_time: time | None = None
|
||||||
|
shard_date: date | None = None
|
||||||
|
|
||||||
|
for marker in markers[::-1]:
|
||||||
|
if parsed_time := extract_time_from_marker(marker):
|
||||||
|
shard_time = parsed_time
|
||||||
|
if parsed_date := extract_date_from_marker(marker):
|
||||||
|
shard_date = parsed_date
|
||||||
|
if parsed_datetime := extract_datetime_from_marker(marker):
|
||||||
|
shard_date = parsed_datetime.date()
|
||||||
|
shard_time = parsed_datetime.time()
|
||||||
|
|
||||||
|
if shard_date and not shard_time:
|
||||||
|
return datetime.combine(shard_date, time(0, 0, 0))
|
||||||
|
|
||||||
|
return datetime.combine(
|
||||||
|
shard_date or inherited_datetime.date(), shard_time or inherited_datetime.time()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"extract_datetime_from_file_name",
|
||||||
|
"extract_datetime_from_marker",
|
||||||
|
"extract_date_from_marker",
|
||||||
|
"extract_time_from_marker",
|
||||||
|
"extract_datetime_from_marker_list",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,29 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from streamer.parse.shard import Shard, StreamFile
|
from streamer.parse.shard import Shard, StreamFile
|
||||||
|
|
||||||
from .repostory_configuration import RepositoryConfiguration
|
from .extract_datetime import (
|
||||||
|
extract_datetime_from_file_name,
|
||||||
|
extract_datetime_from_marker_list,
|
||||||
|
)
|
||||||
from .localized_shard import LocalizedShard
|
from .localized_shard import LocalizedShard
|
||||||
from .extract_datetime import extract_date_from_file_name
|
from .repostory_configuration import RepositoryConfiguration
|
||||||
|
|
||||||
|
|
||||||
def localize_shard(
|
def localize_shard(
|
||||||
shard: Shard, config: RepositoryConfiguration, propagated: dict[str, str]
|
shard: Shard,
|
||||||
|
config: RepositoryConfiguration,
|
||||||
|
propagated: dict[str, str],
|
||||||
|
moment: datetime,
|
||||||
) -> LocalizedShard:
|
) -> LocalizedShard:
|
||||||
position = {**propagated}
|
position = {**propagated}
|
||||||
private_position: dict[str, str] = {}
|
private_position: dict[str, str] = {}
|
||||||
|
|
||||||
|
adjusted_moment: datetime = extract_datetime_from_marker_list(shard.markers, moment)
|
||||||
|
|
||||||
for marker in shard.markers:
|
for marker in shard.markers:
|
||||||
normalized_marker = marker.lower()
|
normalized_marker = marker.lower()
|
||||||
|
|
||||||
if marker_definition := config.markers[normalized_marker]:
|
if marker_definition := config.markers[normalized_marker]:
|
||||||
dimension_name = marker_definition.dimension
|
dimension_name = marker_definition.dimension
|
||||||
dimension = config.dimensions[marker_definition.dimension]
|
dimension = config.dimensions[marker_definition.dimension]
|
||||||
|
|
@ -23,24 +33,30 @@ def localize_shard(
|
||||||
else:
|
else:
|
||||||
private_position[dimension_name] = normalized_marker
|
private_position[dimension_name] = normalized_marker
|
||||||
|
|
||||||
children = [localize_shard(child, config, position) for child in shard.children]
|
children = [
|
||||||
|
localize_shard(child, config, position, adjusted_moment)
|
||||||
|
for child in shard.children
|
||||||
|
]
|
||||||
|
|
||||||
position.update(private_position)
|
position.update(private_position)
|
||||||
|
|
||||||
return LocalizedShard(
|
return LocalizedShard(
|
||||||
**shard.model_dump(exclude={"children"}), location=position, children=children
|
**shard.model_dump(exclude={"children"}),
|
||||||
|
location=position,
|
||||||
|
children=children,
|
||||||
|
moment=adjusted_moment,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def localize_stream_file(
|
def localize_stream_file(
|
||||||
stream_file: StreamFile, config: RepositoryConfiguration
|
stream_file: StreamFile, config: RepositoryConfiguration
|
||||||
) -> LocalizedShard | None:
|
) -> LocalizedShard | None:
|
||||||
shard_date = extract_date_from_file_name(stream_file.filename)
|
shard_date = extract_datetime_from_file_name(stream_file.filename)
|
||||||
|
|
||||||
if not shard_date or not stream_file.shard:
|
if not shard_date or not stream_file.shard:
|
||||||
raise ValueError("Could not extract date")
|
raise ValueError("Could not extract date")
|
||||||
|
|
||||||
return localize_shard(stream_file.shard, config, {"moment": shard_date.isoformat()})
|
return localize_shard(stream_file.shard, config, {}, shard_date)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["localize_stream_file"]
|
__all__ = ["localize_stream_file"]
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,12 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from streamer.parse.shard import Shard
|
from streamer.parse.shard import Shard
|
||||||
|
|
||||||
|
|
||||||
class LocalizedShard(Shard):
|
class LocalizedShard(Shard):
|
||||||
|
moment: datetime
|
||||||
location: dict[str, str]
|
location: dict[str, str]
|
||||||
children: list[LocalizedShard] = [] # pyright: ignore[reportIncompatibleVariableOverride]
|
children: list[LocalizedShard] = [] # pyright: ignore[reportIncompatibleVariableOverride]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from streamer.parse.shard import Shard, StreamFile
|
from streamer.parse.shard import Shard, StreamFile
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -7,7 +9,7 @@ class ShardWithMarkdown(Shard):
|
||||||
|
|
||||||
|
|
||||||
class StreamFileWithMarkdown(StreamFile):
|
class StreamFileWithMarkdown(StreamFile):
|
||||||
shard: ShardWithMarkdown | None = None # pyright: ignore[reportIncompatibleVariableOverride]
|
shard: ShardWithMarkdown | None = None
|
||||||
|
|
||||||
|
|
||||||
def attach_markdown_shard(shard: Shard, markdown_text: str) -> ShardWithMarkdown:
|
def attach_markdown_shard(shard: Shard, markdown_text: str) -> ShardWithMarkdown:
|
||||||
|
|
@ -15,7 +17,9 @@ def attach_markdown_shard(shard: Shard, markdown_text: str) -> ShardWithMarkdown
|
||||||
markdown_content = "\n".join(lines[shard.start_line - 1 : shard.end_line])
|
markdown_content = "\n".join(lines[shard.start_line - 1 : shard.end_line])
|
||||||
return ShardWithMarkdown(
|
return ShardWithMarkdown(
|
||||||
**shard.model_dump(exclude=["children"]),
|
**shard.model_dump(exclude=["children"]),
|
||||||
children=map(lambda child: attach_markdown_shard(child, markdown_text), shard.children),
|
children=[
|
||||||
|
attach_markdown_shard(child, markdown_text) for child in shard.children
|
||||||
|
],
|
||||||
markdown_content=markdown_content,
|
markdown_content=markdown_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,157 @@
|
||||||
from datetime import datetime
|
from datetime import date, datetime, time
|
||||||
from streamer.localize.extract_datetime import extract_date_from_file_name
|
|
||||||
|
from streamer.localize.extract_datetime import (
|
||||||
|
extract_date_from_marker,
|
||||||
|
extract_datetime_from_file_name,
|
||||||
|
extract_datetime_from_marker,
|
||||||
|
extract_datetime_from_marker_list,
|
||||||
|
extract_time_from_marker,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestExtractDateTime:
|
class TestExtractDateTime:
|
||||||
def test_extract_date_from_file_name_valid(self):
|
def test_extract_date_from_file_name_valid(self):
|
||||||
file_name = "20230101-123456 Some Text.md"
|
file_name = "20230101-123456 Some Text.md"
|
||||||
assert datetime(2023, 1, 1, 12, 34, 56) == extract_date_from_file_name(
|
assert datetime(2023, 1, 1, 12, 34, 56) == extract_datetime_from_file_name(
|
||||||
file_name
|
file_name
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_extract_date_from_file_name_invalid(self):
|
def test_extract_date_from_file_name_invalid(self):
|
||||||
file_name = "invalid-file-name.md"
|
file_name = "invalid-file-name.md"
|
||||||
assert extract_date_from_file_name(file_name) is None
|
assert extract_datetime_from_file_name(file_name) is None
|
||||||
|
|
||||||
def test_extract_date_from_file_name_without_time(self):
|
def test_extract_date_from_file_name_without_time(self):
|
||||||
file_name = "20230101 Some Text.md"
|
file_name = "20230101 Some Text.md"
|
||||||
assert datetime(2023, 1, 1, 0, 0, 0) == extract_date_from_file_name(file_name)
|
assert datetime(2023, 1, 1, 0, 0, 0) == extract_datetime_from_file_name(
|
||||||
|
file_name
|
||||||
|
)
|
||||||
|
|
||||||
def test_extract_date_from_file_name_short_time(self):
|
def test_extract_date_from_file_name_short_time(self):
|
||||||
file_name = "20230101-1234 Some Text.md"
|
file_name = "20230101-1234 Some Text.md"
|
||||||
assert datetime(2023, 1, 1, 12, 34, 0) == extract_date_from_file_name(file_name)
|
assert datetime(2023, 1, 1, 12, 34, 0) == extract_datetime_from_file_name(
|
||||||
|
file_name
|
||||||
|
)
|
||||||
|
|
||||||
def test_extract_date_from_file_name_empty_string(self):
|
def test_extract_date_from_file_name_empty_string(self):
|
||||||
file_name = ""
|
file_name = ""
|
||||||
assert extract_date_from_file_name(file_name) is None
|
assert extract_datetime_from_file_name(file_name) is None
|
||||||
|
|
||||||
def test_extract_date_from_file_name_with_full_path(self):
|
def test_extract_date_from_file_name_with_full_path(self):
|
||||||
file_name = "/path/to/20230101-123456 Some Text.md"
|
file_name = "/path/to/20230101-123456 Some Text.md"
|
||||||
assert datetime(2023, 1, 1, 12, 34, 56) == extract_date_from_file_name(
|
assert datetime(2023, 1, 1, 12, 34, 56) == extract_datetime_from_file_name(
|
||||||
file_name
|
file_name
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractMarkerDateTime:
|
||||||
|
def test_extract_datetime_from_marker_valid(self):
|
||||||
|
marker = "20250101150000"
|
||||||
|
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker(marker)
|
||||||
|
|
||||||
|
def test_extract_datetime_from_marker_invalid_format(self):
|
||||||
|
assert extract_datetime_from_marker("2025010115000") is None # too short
|
||||||
|
assert extract_datetime_from_marker("202501011500000") is None # too long
|
||||||
|
assert extract_datetime_from_marker("2025-01-01T150000") is None # separators
|
||||||
|
assert extract_datetime_from_marker("2025010115000a") is None # non-digit
|
||||||
|
assert extract_datetime_from_marker("") is None
|
||||||
|
|
||||||
|
def test_extract_datetime_from_marker_invalid_values(self):
|
||||||
|
assert extract_datetime_from_marker("20250230120000") is None # Feb 30
|
||||||
|
assert extract_datetime_from_marker("20250101126000") is None # minute 60
|
||||||
|
assert extract_datetime_from_marker("20250101240000") is None # hour 24
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractMarkerDate:
|
||||||
|
def test_extract_date_from_marker_valid(self):
|
||||||
|
marker = "20250101"
|
||||||
|
assert date(2025, 1, 1) == extract_date_from_marker(marker)
|
||||||
|
|
||||||
|
def test_extract_date_from_marker_invalid_format(self):
|
||||||
|
assert extract_date_from_marker("2025010") is None # too short
|
||||||
|
assert extract_date_from_marker("202501011") is None # too long
|
||||||
|
assert extract_date_from_marker("2025-01-01") is None # separators
|
||||||
|
assert extract_date_from_marker("2025010a") is None # non-digit
|
||||||
|
assert extract_date_from_marker("") is None
|
||||||
|
|
||||||
|
def test_extract_date_from_marker_invalid_values(self):
|
||||||
|
assert extract_date_from_marker("20250230") is None # Feb 30
|
||||||
|
assert extract_date_from_marker("20251301") is None # month 13
|
||||||
|
assert extract_date_from_marker("20250132") is None # day 32
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractMarkerTime:
|
||||||
|
def test_extract_time_from_marker_valid(self):
|
||||||
|
marker = "150000"
|
||||||
|
assert time(15, 0, 0) == extract_time_from_marker(marker)
|
||||||
|
|
||||||
|
def test_extract_time_from_marker_invalid_format(self):
|
||||||
|
assert extract_time_from_marker("15000") is None # too short
|
||||||
|
assert extract_time_from_marker("1500000") is None # too long
|
||||||
|
assert extract_time_from_marker("15:00:00") is None # separators
|
||||||
|
assert extract_time_from_marker("15000a") is None # non-digit
|
||||||
|
assert extract_time_from_marker("") is None
|
||||||
|
|
||||||
|
def test_extract_time_from_marker_invalid_values(self):
|
||||||
|
assert extract_time_from_marker("240000") is None # hour 24
|
||||||
|
assert extract_time_from_marker("156000") is None # minute 60
|
||||||
|
assert extract_time_from_marker("150060") is None # second 60
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDateTimeFromMarkerList:
|
||||||
|
def test_no_markers_inherits_datetime(self):
|
||||||
|
inherited = datetime(2025, 1, 2, 3, 4, 5)
|
||||||
|
assert inherited == extract_datetime_from_marker_list([], inherited)
|
||||||
|
|
||||||
|
def test_unrelated_markers_inherits_datetime(self):
|
||||||
|
inherited = datetime(2025, 1, 2, 3, 4, 5)
|
||||||
|
markers = ["not-a-marker", "2025-01-01", "1500", "1234567"]
|
||||||
|
assert inherited == extract_datetime_from_marker_list(markers, inherited)
|
||||||
|
|
||||||
|
def test_date_only_marker_sets_midnight(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["20250101"]
|
||||||
|
assert datetime(2025, 1, 1, 0, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_time_only_marker_inherits_date(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["150000"]
|
||||||
|
assert datetime(2025, 6, 7, 15, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_datetime_marker_overrides_both_date_and_time(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["20250101150000"]
|
||||||
|
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_combined_date_and_time_markers(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["20250101", "150000"]
|
||||||
|
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_first_marker_wins_when_multiple_dates_or_times(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["20250101", "150000", "20250102", "160000"]
|
||||||
|
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_last_separated_date_and_time_win(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["20250101", "150000", "20250102160000"]
|
||||||
|
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_invalid_date_or_time_markers_are_ignored(self):
|
||||||
|
inherited = datetime(2025, 6, 7, 8, 9, 10)
|
||||||
|
markers = ["20251301", "240000", "20250101", "150000"]
|
||||||
|
assert datetime(2025, 1, 1, 15, 0, 0) == extract_datetime_from_marker_list(
|
||||||
|
markers, inherited
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from streamer.localize.localize import localize_stream_file
|
from streamer.localize.localize import localize_stream_file
|
||||||
from streamer.localize.localized_shard import LocalizedShard
|
from streamer.localize.localized_shard import LocalizedShard
|
||||||
from streamer.localize.repostory_configuration import (
|
from streamer.localize.repostory_configuration import (
|
||||||
|
|
@ -37,10 +39,11 @@ class TestExtractDateTime:
|
||||||
assert localize_stream_file(
|
assert localize_stream_file(
|
||||||
stream_file, repository_configuration
|
stream_file, repository_configuration
|
||||||
) == LocalizedShard(
|
) == LocalizedShard(
|
||||||
|
moment=datetime(2025, 6, 22, 12, 10, 0, 0),
|
||||||
markers=["Streamer"],
|
markers=["Streamer"],
|
||||||
tags=[],
|
tags=[],
|
||||||
start_line=1,
|
start_line=1,
|
||||||
end_line=1,
|
end_line=1,
|
||||||
children=[],
|
children=[],
|
||||||
location={"moment": "2025-06-22T12:10:00", "project": "streamer"},
|
location={"project": "streamer"},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue