chore: fix timezone handling
Some checks failed
Continuous Integration / Lint, Check & Test (push) Failing after 57s
Release / Build and Release (push) Successful in 5s
Continuous Integration / Build Package (push) Successful in 1m43s

This commit is contained in:
Konstantin Fickel 2026-04-07 13:26:34 +02:00
parent e8dc2013bc
commit 5dca68037d
Signed by: kfickel
GPG key ID: A793722F9933C1A5
7 changed files with 194 additions and 94 deletions

View file

@ -1,4 +1,5 @@
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc};
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz;
use once_cell::sync::Lazy;
use regex::Regex;
use std::path::Path;
@ -17,15 +18,25 @@ static DATE_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{8}$").unwr
/// Regex for validating time marker format (6 digits).
static TIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{6}$").unwrap());
/// Convert a NaiveDateTime to UTC via the given timezone.
/// Falls back to the earliest local interpretation for ambiguous DST times.
fn naive_to_utc(dt: NaiveDateTime, tz: Tz) -> Option<DateTime<Utc>> {
tz.from_local_datetime(&dt)
.single()
.or_else(|| tz.from_local_datetime(&dt).earliest())
.map(|dt| dt.with_timezone(&Utc))
}
/// Extract a datetime from a file name in the format YYYYMMDD-HHMMSS.
///
/// The time component is optional and can be 4-6 digits (HHMM, HHMMS, or HHMMSS).
/// The datetime is interpreted in the given timezone.
///
/// # Examples
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56 in tz
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00 in tz
/// - "invalid-file-name.md" -> None
pub fn extract_datetime_from_file_name(file_name: &str) -> Option<DateTime<Utc>> {
pub fn extract_datetime_from_file_name(file_name: &str, tz: Tz) -> Option<DateTime<Utc>> {
let base_name = Path::new(file_name)
.file_name()
.and_then(|s| s.to_str())
@ -48,20 +59,23 @@ pub fn extract_datetime_from_file_name(file_name: &str) -> Option<DateTime<Utc>>
NaiveDateTime::parse_from_str(&datetime_str, "%Y%m%d %H:%M:%S")
.ok()
.map(|dt| dt.and_utc())
.and_then(|dt| naive_to_utc(dt, tz))
}
/// Parse a 14-digit marker string as a NaiveDateTime without timezone conversion.
fn parse_naive_datetime_from_marker(marker: &str) -> Option<NaiveDateTime> {
if !DATETIME_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S").ok()
}
/// Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
///
/// The datetime is interpreted in the given timezone.
/// Returns the parsed datetime if the format matches and values are valid.
pub fn extract_datetime_from_marker(marker: &str) -> Option<DateTime<Utc>> {
if !DATETIME_MARKER_REGEX.is_match(marker) {
return None;
}
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S")
.ok()
.map(|dt| dt.and_utc())
pub fn extract_datetime_from_marker(marker: &str, tz: Tz) -> Option<DateTime<Utc>> {
parse_naive_datetime_from_marker(marker).and_then(|dt| naive_to_utc(dt, tz))
}
/// Extract a date from a marker string in the exact format: YYYYMMDD.
@ -90,6 +104,7 @@ pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
///
/// The function processes markers in reverse order, allowing later markers to override
/// earlier ones. It combines date-only and time-only markers when both are present.
/// All naive datetimes (from markers and the inherited fallback) are interpreted in `tz`.
///
/// Rules:
/// - If a full datetime marker (14 digits) is found, it sets both date and time
@ -99,6 +114,7 @@ pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
pub fn extract_datetime_from_marker_list(
markers: &[String],
inherited_datetime: DateTime<Utc>,
tz: Tz,
) -> DateTime<Utc> {
let mut shard_time: Option<NaiveTime> = None;
let mut shard_date: Option<NaiveDate> = None;
@ -111,34 +127,39 @@ pub fn extract_datetime_from_marker_list(
if let Some(date) = extract_date_from_marker(marker) {
shard_date = Some(date);
}
if let Some(datetime) = extract_datetime_from_marker(marker) {
shard_date = Some(datetime.naive_utc().date());
shard_time = Some(datetime.naive_utc().time());
if let Some(naive_dt) = parse_naive_datetime_from_marker(marker) {
shard_date = Some(naive_dt.date());
shard_time = Some(naive_dt.time());
}
}
// Interpret the inherited datetime in the configured timezone for fallback values
let inherited_local = inherited_datetime.with_timezone(&tz).naive_local();
// Combine date and time, applying defaults as needed
let final_date = shard_date.unwrap_or_else(|| inherited_datetime.naive_utc().date());
let final_date = shard_date.unwrap_or_else(|| inherited_local.date());
let final_time = match (shard_date, shard_time) {
// If we have a date but no time, use midnight
(Some(_), None) => NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
// Otherwise use the shard time or inherit
_ => shard_time.unwrap_or_else(|| inherited_datetime.naive_utc().time()),
_ => shard_time.unwrap_or_else(|| inherited_local.time()),
};
NaiveDateTime::new(final_date, final_time).and_utc()
let naive = NaiveDateTime::new(final_date, final_time);
naive_to_utc(naive, tz).unwrap_or_else(|| inherited_datetime)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
use chrono_tz::UTC;
#[test]
fn test_extract_date_from_file_name_valid() {
let file_name = "20230101-123456 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
);
}
@ -146,14 +167,14 @@ mod tests {
#[test]
fn test_extract_date_from_file_name_invalid() {
let file_name = "invalid-file-name.md";
assert_eq!(extract_datetime_from_file_name(file_name), None);
assert_eq!(extract_datetime_from_file_name(file_name, UTC), None);
}
#[test]
fn test_extract_date_from_file_name_without_time() {
let file_name = "20230101 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap())
);
}
@ -162,7 +183,7 @@ mod tests {
fn test_extract_date_from_file_name_short_time() {
let file_name = "20230101-1234 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 0).unwrap())
);
}
@ -170,41 +191,61 @@ mod tests {
#[test]
fn test_extract_date_from_file_name_empty_string() {
let file_name = "";
assert_eq!(extract_datetime_from_file_name(file_name), None);
assert_eq!(extract_datetime_from_file_name(file_name, UTC), None);
}
#[test]
fn test_extract_date_from_file_name_with_full_path() {
let file_name = "/path/to/20230101-123456 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name),
extract_datetime_from_file_name(file_name, UTC),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
);
}
#[test]
fn test_extract_date_from_file_name_with_timezone_offset() {
// Europe/Berlin is UTC+1 in January (CET)
let file_name = "20230101-120000 Some Text.md";
assert_eq!(
extract_datetime_from_file_name(file_name, chrono_tz::Europe::Berlin),
Some(Utc.with_ymd_and_hms(2023, 1, 1, 11, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_valid() {
let marker = "20250101150000";
assert_eq!(
extract_datetime_from_marker(marker),
extract_datetime_from_marker(marker, UTC),
Some(Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_with_timezone_offset() {
// Europe/Berlin is UTC+1 in January (CET)
let marker = "20250101150000";
assert_eq!(
extract_datetime_from_marker(marker, chrono_tz::Europe::Berlin),
Some(Utc.with_ymd_and_hms(2025, 1, 1, 14, 0, 0).unwrap())
);
}
#[test]
fn test_extract_datetime_from_marker_invalid_format() {
assert_eq!(extract_datetime_from_marker("2025010115000"), None); // too short
assert_eq!(extract_datetime_from_marker("202501011500000"), None); // too long
assert_eq!(extract_datetime_from_marker("2025-01-01T150000"), None); // separators
assert_eq!(extract_datetime_from_marker("2025010115000a"), None); // non-digit
assert_eq!(extract_datetime_from_marker(""), None);
assert_eq!(extract_datetime_from_marker("2025010115000", UTC), None); // too short
assert_eq!(extract_datetime_from_marker("202501011500000", UTC), None); // too long
assert_eq!(extract_datetime_from_marker("2025-01-01T150000", UTC), None); // separators
assert_eq!(extract_datetime_from_marker("2025010115000a", UTC), None); // non-digit
assert_eq!(extract_datetime_from_marker("", UTC), None);
}
#[test]
fn test_extract_datetime_from_marker_invalid_values() {
assert_eq!(extract_datetime_from_marker("20250230120000"), None); // Feb 30
assert_eq!(extract_datetime_from_marker("20250101126000"), None); // minute 60
assert_eq!(extract_datetime_from_marker("20250101240000"), None); // hour 24
assert_eq!(extract_datetime_from_marker("20250230120000", UTC), None); // Feb 30
assert_eq!(extract_datetime_from_marker("20250101126000", UTC), None); // minute 60
assert_eq!(extract_datetime_from_marker("20250101240000", UTC), None); // hour 24
}
#[test]
@ -260,7 +301,10 @@ mod tests {
#[test]
fn test_no_markers_inherits_datetime() {
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
assert_eq!(extract_datetime_from_marker_list(&[], inherited), inherited);
assert_eq!(
extract_datetime_from_marker_list(&[], inherited, UTC),
inherited
);
}
#[test]
@ -273,7 +317,7 @@ mod tests {
"1234567".to_string(),
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
inherited
);
}
@ -283,7 +327,7 @@ mod tests {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap()
);
}
@ -293,7 +337,7 @@ mod tests {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 6, 7, 15, 0, 0).unwrap()
);
}
@ -303,7 +347,7 @@ mod tests {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
@ -313,7 +357,7 @@ mod tests {
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["20250101".to_string(), "150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
@ -328,7 +372,7 @@ mod tests {
"160000".to_string(),
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
@ -343,7 +387,7 @@ mod tests {
];
// The first date (20250101) and first time (150000) should win over the later combined datetime
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
@ -358,8 +402,19 @@ mod tests {
"150000".to_string(), // valid
];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited),
extract_datetime_from_marker_list(&markers, inherited, UTC),
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
);
}
#[test]
fn test_marker_list_with_timezone_offset() {
// Europe/Berlin is UTC+2 in summer (CEST)
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
let markers = vec!["150000".to_string()];
assert_eq!(
extract_datetime_from_marker_list(&markers, inherited, chrono_tz::Europe::Berlin),
Utc.with_ymd_and_hms(2025, 6, 7, 13, 0, 0).unwrap()
);
}
}