streamd/src/streamer/localize/extract_datetime.py
Konstantin Fickel d5b1541436
feat: extract date & time from tags
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
2026-01-31 17:15:01 +01:00

92 lines
2.8 KiB
Python

import os
import re
from datetime import date, datetime, time
def extract_datetime_from_file_name(file_name: str) -> datetime | None:
FILE_NAME_REGEX = r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+.md$"
base_name = os.path.basename(file_name)
match = re.match(FILE_NAME_REGEX, base_name)
if match:
date_str = match.group("date")
time_str = match.group("time") or ""
time_str = time_str.ljust(6, "0")
datetime_str = f"{date_str} {time_str[:2]}:{time_str[2:4]}:{time_str[4:]}"
return datetime.strptime(datetime_str, "%Y%m%d %H:%M:%S")
return None
def extract_datetime_from_marker(marker: str) -> datetime | None:
"""
Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
Returns:
Parsed datetime if the format is fulfilled and values are valid, else None.
"""
if not re.fullmatch(r"\d{14}", marker or ""):
return None
try:
return datetime.strptime(marker, "%Y%m%d%H%M%S")
except ValueError:
return None
def extract_date_from_marker(marker: str) -> date | None:
"""
Extract a date from a marker string in the exact format: YYYYMMDD.
Returns:
Parsed date if the format is fulfilled and values are valid, else None.
"""
if not re.fullmatch(r"\d{8}", marker or ""):
return None
try:
return datetime.strptime(marker, "%Y%m%d").date()
except ValueError:
return None
def extract_time_from_marker(marker: str) -> time | None: # noqa: F821
"""
Extract a time from a marker string in the exact format: HHMMSS.
Returns:
Parsed time if the format is fulfilled and values are valid, else None.
"""
if not re.fullmatch(r"\d{6}", marker or ""):
return None
try:
return datetime.strptime(marker, "%H%M%S").time()
except ValueError:
return None
def extract_datetime_from_marker_list(markers: list[str], inherited_datetime: datetime):
shard_time: time | None = None
shard_date: date | None = None
for marker in markers[::-1]:
if parsed_time := extract_time_from_marker(marker):
shard_time = parsed_time
if parsed_date := extract_date_from_marker(marker):
shard_date = parsed_date
if parsed_datetime := extract_datetime_from_marker(marker):
shard_date = parsed_datetime.date()
shard_time = parsed_datetime.time()
if shard_date and not shard_time:
return datetime.combine(shard_date, time(0, 0, 0))
return datetime.combine(
shard_date or inherited_datetime.date(), shard_time or inherited_datetime.time()
)
__all__ = [
"extract_datetime_from_file_name",
"extract_datetime_from_marker",
"extract_date_from_marker",
"extract_time_from_marker",
"extract_datetime_from_marker_list",
]