streamd/src/localize/datetime.rs
Konstantin Fickel e562af0dc3
All checks were successful
Release / Build and Release (push) Successful in 6s
Continuous Integration / Lint, Check & Test (push) Successful in 1m26s
Continuous Integration / Build Package (push) Successful in 1m36s
fix: clippy warnings
2026-04-07 13:43:40 +02:00

420 lines
15 KiB
Rust

use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz;
use once_cell::sync::Lazy;
use regex::Regex;
use std::path::Path;
/// Regex for extracting date and optional time from file names.
/// Format: YYYYMMDD or YYYYMMDD-HHMMSS (time can be 4-6 digits)
static FILE_NAME_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+\.md$").unwrap());
/// Regex for validating datetime marker format (14 digits).
static DATETIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{14}$").unwrap());
/// Regex for validating date marker format (8 digits).
static DATE_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{8}$").unwrap());
/// Regex for validating time marker format (6 digits).
static TIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{6}$").unwrap());
/// Convert a NaiveDateTime to UTC via the given timezone.
/// Falls back to the earliest local interpretation for ambiguous DST times.
fn naive_to_utc(dt: NaiveDateTime, tz: Tz) -> Option<DateTime<Utc>> {
tz.from_local_datetime(&dt)
.single()
.or_else(|| tz.from_local_datetime(&dt).earliest())
.map(|dt| dt.with_timezone(&Utc))
}
/// Extract a datetime from a file name in the format YYYYMMDD-HHMMSS.
///
/// The time component is optional and can be 4-6 digits (HHMM, HHMMS, or HHMMSS).
/// The datetime is interpreted in the given timezone.
///
/// # Examples
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56 in tz
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00 in tz
/// - "invalid-file-name.md" -> None
pub fn extract_datetime_from_file_name(file_name: &str, tz: Tz) -> Option<DateTime<Utc>> {
let base_name = Path::new(file_name)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or(file_name);
let captures = FILE_NAME_REGEX.captures(base_name)?;
let date_str = captures.name("date")?.as_str();
let time_str = captures.name("time").map(|m| m.as_str()).unwrap_or("");
// Pad time string to 6 digits
let time_str = format!("{:0<6}", time_str);
let datetime_str = format!(
"{} {}:{}:{}",
date_str,
&time_str[0..2],
&time_str[2..4],
&time_str[4..6]
);
NaiveDateTime::parse_from_str(&datetime_str, "%Y%m%d %H:%M:%S")
.ok()
.and_then(|dt| naive_to_utc(dt, tz))
}
/// Parse a 14-digit marker string as a NaiveDateTime without timezone conversion.
fn parse_naive_datetime_from_marker(marker: &str) -> Option<NaiveDateTime> {
if !DATETIME_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S").ok()
}
/// Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
///
/// The datetime is interpreted in the given timezone.
/// Returns the parsed datetime if the format matches and values are valid.
pub fn extract_datetime_from_marker(marker: &str, tz: Tz) -> Option<DateTime<Utc>> {
parse_naive_datetime_from_marker(marker).and_then(|dt| naive_to_utc(dt, tz))
}
/// Extract a date from a marker string in the exact format: YYYYMMDD.
///
/// Returns the parsed date if the format matches and values are valid.
pub fn extract_date_from_marker(marker: &str) -> Option<NaiveDate> {
if !DATE_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveDate::parse_from_str(marker, "%Y%m%d").ok()
}
/// Extract a time from a marker string in the exact format: HHMMSS.
///
/// Returns the parsed time if the format matches and values are valid.
pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
if !TIME_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveTime::parse_from_str(marker, "%H%M%S").ok()
}
/// Extract a datetime from a list of markers, using an inherited datetime as fallback.
///
/// The function processes markers in reverse order, allowing later markers to override
/// earlier ones. It combines date-only and time-only markers when both are present.
/// All naive datetimes (from markers and the inherited fallback) are interpreted in `tz`.
///
/// Rules:
/// - If a full datetime marker (14 digits) is found, it sets both date and time
/// - If only a date marker is found, the time defaults to midnight
/// - If only a time marker is found, the date is inherited
/// - If no valid markers are found, the inherited datetime is returned
pub fn extract_datetime_from_marker_list(
markers: &[String],
inherited_datetime: DateTime<Utc>,
tz: Tz,
) -> DateTime<Utc> {
let mut shard_time: Option<NaiveTime> = None;
let mut shard_date: Option<NaiveDate> = None;
// Process markers in reverse order (last wins)
for marker in markers.iter().rev() {
if let Some(time) = extract_time_from_marker(marker) {
shard_time = Some(time);
}
if let Some(date) = extract_date_from_marker(marker) {
shard_date = Some(date);
}
if let Some(naive_dt) = parse_naive_datetime_from_marker(marker) {
shard_date = Some(naive_dt.date());
shard_time = Some(naive_dt.time());
}
}
// Interpret the inherited datetime in the configured timezone for fallback values
let inherited_local = inherited_datetime.with_timezone(&tz).naive_local();
// Combine date and time, applying defaults as needed
let final_date = shard_date.unwrap_or_else(|| inherited_local.date());
let final_time = match (shard_date, shard_time) {
// If we have a date but no time, use midnight
(Some(_), None) => NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
// Otherwise use the shard time or inherit
_ => shard_time.unwrap_or_else(|| inherited_local.time()),
};
let naive = NaiveDateTime::new(final_date, final_time);
naive_to_utc(naive, tz).unwrap_or(inherited_datetime)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
use chrono_tz::UTC;
#[test]
fn test_extract_date_from_file_name_valid() {
let file_name = "20230101-123456 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_invalid() {
let file_name = "invalid-file-name.md";
assert_eq!(extract_datetime_from_file_name(file_name, UTC), None);
}
#[test]
fn test_extract_date_from_file_name_without_time() {
let file_name = "20230101 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_short_time() {
let file_name = "20230101-1234 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 0).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_empty_string() {
let file_name = "";
assert_eq!(extract_datetime_from_file_name(file_name, UTC), None);
}
#[test]
fn test_extract_date_from_file_name_with_full_path() {
let file_name = "/path/to/20230101-123456 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_with_timezone_offset() {
// Europe/Berlin is UTC+1 in January (CET)
let file_name = "20230101-120000 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name, chrono_tz::Europe::Berlin),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 11, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_valid() {
let marker = "20250101150000";
assert_eq!(
extract_datetime_from_marker(marker, UTC),
Some(Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_with_timezone_offset() {
// Europe/Berlin is UTC+1 in January (CET)
let marker = "20250101150000";
assert_eq!(
extract_datetime_from_marker(marker, chrono_tz::Europe::Berlin),
Some(Utc.with_ymd_and_hms(2025, 1, 1, 14, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_invalid_format() {
assert_eq!(extract_datetime_from_marker("2025010115000", UTC), None); // too short
assert_eq!(extract_datetime_from_marker("202501011500000", UTC), None); // too long
assert_eq!(extract_datetime_from_marker("2025-01-01T150000", UTC), None); // separators
assert_eq!(extract_datetime_from_marker("2025010115000a", UTC), None); // non-digit
assert_eq!(extract_datetime_from_marker("", UTC), None);
}
#[test]
fn test_extract_datetime_from_marker_invalid_values() {
assert_eq!(extract_datetime_from_marker("20250230120000", UTC), None); // Feb 30
assert_eq!(extract_datetime_from_marker("20250101126000", UTC), None); // minute 60
assert_eq!(extract_datetime_from_marker("20250101240000", UTC), None); // hour 24
}
#[test]
fn test_extract_date_from_marker_valid() {
let marker = "20250101";
assert_eq!(
extract_date_from_marker(marker),
Some(NaiveDate::from_ymd_opt(2025, 1, 1).unwrap())
);
}
#[test]
fn test_extract_date_from_marker_invalid_format() {
assert_eq!(extract_date_from_marker("2025010"), None); // too short
assert_eq!(extract_date_from_marker("202501011"), None); // too long
assert_eq!(extract_date_from_marker("2025-01-01"), None); // separators
assert_eq!(extract_date_from_marker("2025010a"), None); // non-digit
assert_eq!(extract_date_from_marker(""), None);
}
#[test]
fn test_extract_date_from_marker_invalid_values() {
assert_eq!(extract_date_from_marker("20250230"), None); // Feb 30
assert_eq!(extract_date_from_marker("20251301"), None); // month 13
assert_eq!(extract_date_from_marker("20250132"), None); // day 32
}
#[test]
fn test_extract_time_from_marker_valid() {
let marker = "150000";
assert_eq!(
extract_time_from_marker(marker),
Some(NaiveTime::from_hms_opt(15, 0, 0).unwrap())
);
}
#[test]
fn test_extract_time_from_marker_invalid_format() {
assert_eq!(extract_time_from_marker("15000"), None); // too short
assert_eq!(extract_time_from_marker("1500000"), None); // too long
assert_eq!(extract_time_from_marker("15:00:00"), None); // separators
assert_eq!(extract_time_from_marker("15000a"), None); // non-digit
assert_eq!(extract_time_from_marker(""), None);
}
#[test]
fn test_extract_time_from_marker_invalid_values() {
assert_eq!(extract_time_from_marker("240000"), None); // hour 24
assert_eq!(extract_time_from_marker("156000"), None); // minute 60
// Note: chrono allows leap seconds (60), so 150060 is valid
}
#[test]
fn test_no_markers_inherits_datetime() {
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
assert_eq!(
extract_datetime_from_marker_list(&[], inherited, UTC),
inherited
);
}
#[test]
fn test_unrelated_markers_inherits_datetime() {
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
let markers: Vec<String> = vec![
"not-a-marker".to_string(),
"2025-01-01".to_string(),
"1500".to_string(),
"1234567".to_string(),
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
inherited
);
}
#[test]
fn test_date_only_marker_sets_midnight() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap()
);
}
#[test]
fn test_time_only_marker_inherits_date() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 6, 7, 15, 0, 0).unwrap()
);
}
#[test]
fn test_datetime_marker_overrides_both_date_and_time() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_combined_date_and_time_markers() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101".to_string(), "150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_first_marker_wins_when_multiple_dates_or_times() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec![
"20250101".to_string(),
"150000".to_string(),
"20250102".to_string(),
"160000".to_string(),
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_last_separated_date_and_time_win() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec![
"20250101".to_string(),
"150000".to_string(),
"20250102160000".to_string(),
];
// The first date (20250101) and first time (150000) should win over the later combined datetime
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_invalid_date_or_time_markers_are_ignored() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec![
"20251301".to_string(), // invalid month
"240000".to_string(), // invalid hour
"20250101".to_string(), // valid
"150000".to_string(), // valid
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_marker_list_with_timezone_offset() {
// Europe/Berlin is UTC+2 in summer (CEST)
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, chrono_tz::Europe::Berlin),
Utc.with_ymd_and_hms(2025, 6, 7, 13, 0, 0).unwrap()
);
}
}