refactor: rewrite in rust
All checks were successful
Continuous Integration / Lint, Check & Test (push) Successful in 1m38s
Continuous Integration / Build Package (push) Successful in 1m54s

This commit is contained in:
Konstantin Fickel 2026-03-29 18:19:15 +02:00
parent 20a3e8b437
commit ed493cff29
Signed by: kfickel
GPG key ID: A793722F9933C1A5
72 changed files with 5684 additions and 3688 deletions

365
src/localize/datetime.rs Normal file
View file

@ -0,0 +1,365 @@
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc};
use once_cell::sync::Lazy;
use regex::Regex;
use std::path::Path;
/// Regex for extracting date and optional time from file names.
/// Format: YYYYMMDD or YYYYMMDD-HHMMSS (time can be 4-6 digits)
static FILE_NAME_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+\.md$").unwrap());
/// Regex for validating datetime marker format (14 digits).
static DATETIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{14}$").unwrap());
/// Regex for validating date marker format (8 digits).
static DATE_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{8}$").unwrap());
/// Regex for validating time marker format (6 digits).
static TIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{6}$").unwrap());
/// Extract a datetime from a file name in the format YYYYMMDD-HHMMSS.
///
/// The time component is optional and can be 4-6 digits (HHMM, HHMMS, or HHMMSS).
///
/// # Examples
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00
/// - "invalid-file-name.md" -> None
pub fn extract_datetime_from_file_name(file_name: &str) -> Option<DateTime<Utc>> {
let base_name = Path::new(file_name)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or(file_name);
let captures = FILE_NAME_REGEX.captures(base_name)?;
let date_str = captures.name("date")?.as_str();
let time_str = captures.name("time").map(|m| m.as_str()).unwrap_or("");
// Pad time string to 6 digits
let time_str = format!("{:0<6}", time_str);
let datetime_str = format!(
"{} {}:{}:{}",
date_str,
&time_str[0..2],
&time_str[2..4],
&time_str[4..6]
);
NaiveDateTime::parse_from_str(&datetime_str, "%Y%m%d %H:%M:%S")
.ok()
.map(|dt| dt.and_utc())
}
/// Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
///
/// Returns the parsed datetime if the format matches and values are valid.
pub fn extract_datetime_from_marker(marker: &str) -> Option<DateTime<Utc>> {
if !DATETIME_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S")
.ok()
.map(|dt| dt.and_utc())
}
/// Extract a date from a marker string in the exact format: YYYYMMDD.
///
/// Returns the parsed date if the format matches and values are valid.
pub fn extract_date_from_marker(marker: &str) -> Option<NaiveDate> {
if !DATE_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveDate::parse_from_str(marker, "%Y%m%d").ok()
}
/// Extract a time from a marker string in the exact format: HHMMSS.
///
/// Returns the parsed time if the format matches and values are valid.
pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
if !TIME_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveTime::parse_from_str(marker, "%H%M%S").ok()
}
/// Extract a datetime from a list of markers, using an inherited datetime as fallback.
///
/// The function processes markers in reverse order, allowing later markers to override
/// earlier ones. It combines date-only and time-only markers when both are present.
///
/// Rules:
/// - If a full datetime marker (14 digits) is found, it sets both date and time
/// - If only a date marker is found, the time defaults to midnight
/// - If only a time marker is found, the date is inherited
/// - If no valid markers are found, the inherited datetime is returned
pub fn extract_datetime_from_marker_list(
markers: &[String],
inherited_datetime: DateTime<Utc>,
) -> DateTime<Utc> {
let mut shard_time: Option<NaiveTime> = None;
let mut shard_date: Option<NaiveDate> = None;
// Process markers in reverse order (last wins)
for marker in markers.iter().rev() {
if let Some(time) = extract_time_from_marker(marker) {
shard_time = Some(time);
}
if let Some(date) = extract_date_from_marker(marker) {
shard_date = Some(date);
}
if let Some(datetime) = extract_datetime_from_marker(marker) {
shard_date = Some(datetime.naive_utc().date());
shard_time = Some(datetime.naive_utc().time());
}
}
// Combine date and time, applying defaults as needed
let final_date = shard_date.unwrap_or_else(|| inherited_datetime.naive_utc().date());
let final_time = match (shard_date, shard_time) {
// If we have a date but no time, use midnight
(Some(_), None) => NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
// Otherwise use the shard time or inherit
_ => shard_time.unwrap_or_else(|| inherited_datetime.naive_utc().time()),
};
NaiveDateTime::new(final_date, final_time).and_utc()
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
#[test]
fn test_extract_date_from_file_name_valid() {
let file_name = "20230101-123456 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_invalid() {
let file_name = "invalid-file-name.md";
assert_eq!(extract_datetime_from_file_name(file_name), None);
}
#[test]
fn test_extract_date_from_file_name_without_time() {
let file_name = "20230101 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_short_time() {
let file_name = "20230101-1234 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 0).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_empty_string() {
let file_name = "";
assert_eq!(extract_datetime_from_file_name(file_name), None);
}
#[test]
fn test_extract_date_from_file_name_with_full_path() {
let file_name = "/path/to/20230101-123456 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_valid() {
let marker = "20250101150000";
assert_eq!(
extract_datetime_from_marker(marker),
Some(Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_invalid_format() {
assert_eq!(extract_datetime_from_marker("2025010115000"), None); // too short
assert_eq!(extract_datetime_from_marker("202501011500000"), None); // too long
assert_eq!(extract_datetime_from_marker("2025-01-01T150000"), None); // separators
assert_eq!(extract_datetime_from_marker("2025010115000a"), None); // non-digit
assert_eq!(extract_datetime_from_marker(""), None);
}
#[test]
fn test_extract_datetime_from_marker_invalid_values() {
assert_eq!(extract_datetime_from_marker("20250230120000"), None); // Feb 30
assert_eq!(extract_datetime_from_marker("20250101126000"), None); // minute 60
assert_eq!(extract_datetime_from_marker("20250101240000"), None); // hour 24
}
#[test]
fn test_extract_date_from_marker_valid() {
let marker = "20250101";
assert_eq!(
extract_date_from_marker(marker),
Some(NaiveDate::from_ymd_opt(2025, 1, 1).unwrap())
);
}
#[test]
fn test_extract_date_from_marker_invalid_format() {
assert_eq!(extract_date_from_marker("2025010"), None); // too short
assert_eq!(extract_date_from_marker("202501011"), None); // too long
assert_eq!(extract_date_from_marker("2025-01-01"), None); // separators
assert_eq!(extract_date_from_marker("2025010a"), None); // non-digit
assert_eq!(extract_date_from_marker(""), None);
}
#[test]
fn test_extract_date_from_marker_invalid_values() {
assert_eq!(extract_date_from_marker("20250230"), None); // Feb 30
assert_eq!(extract_date_from_marker("20251301"), None); // month 13
assert_eq!(extract_date_from_marker("20250132"), None); // day 32
}
#[test]
fn test_extract_time_from_marker_valid() {
let marker = "150000";
assert_eq!(
extract_time_from_marker(marker),
Some(NaiveTime::from_hms_opt(15, 0, 0).unwrap())
);
}
#[test]
fn test_extract_time_from_marker_invalid_format() {
assert_eq!(extract_time_from_marker("15000"), None); // too short
assert_eq!(extract_time_from_marker("1500000"), None); // too long
assert_eq!(extract_time_from_marker("15:00:00"), None); // separators
assert_eq!(extract_time_from_marker("15000a"), None); // non-digit
assert_eq!(extract_time_from_marker(""), None);
}
#[test]
fn test_extract_time_from_marker_invalid_values() {
assert_eq!(extract_time_from_marker("240000"), None); // hour 24
assert_eq!(extract_time_from_marker("156000"), None); // minute 60
// Note: chrono allows leap seconds (60), so 150060 is valid
}
#[test]
fn test_no_markers_inherits_datetime() {
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
assert_eq!(extract_datetime_from_marker_list(&[], inherited), inherited);
}
#[test]
fn test_unrelated_markers_inherits_datetime() {
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
let markers: Vec<String> = vec![
"not-a-marker".to_string(),
"2025-01-01".to_string(),
"1500".to_string(),
"1234567".to_string(),
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
inherited
);
}
#[test]
fn test_date_only_marker_sets_midnight() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap()
);
}
#[test]
fn test_time_only_marker_inherits_date() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 6, 7, 15, 0, 0).unwrap()
);
}
#[test]
fn test_datetime_marker_overrides_both_date_and_time() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_combined_date_and_time_markers() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101".to_string(), "150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_first_marker_wins_when_multiple_dates_or_times() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec![
"20250101".to_string(),
"150000".to_string(),
"20250102".to_string(),
"160000".to_string(),
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_last_separated_date_and_time_win() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec![
"20250101".to_string(),
"150000".to_string(),
"20250102160000".to_string(),
];
// The first date (20250101) and first time (150000) should win over the later combined datetime
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_invalid_date_or_time_markers_are_ignored() {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec![
"20251301".to_string(), // invalid month
"240000".to_string(), // invalid hour
"20250101".to_string(), // valid
"150000".to_string(), // valid
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
}