chore: fix timezone handling
This commit is contained in:
parent
e8dc2013bc
commit
5dca68037d
7 changed files with 194 additions and 94 deletions
|
|
@ -1,4 +1,5 @@
|
|||
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
||||
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
|
||||
use chrono_tz::Tz;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::path::Path;
|
||||
|
|
@ -17,15 +18,25 @@ static DATE_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{8}$").unwr
|
|||
/// Regex for validating time marker format (6 digits).
|
||||
static TIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{6}$").unwrap());
|
||||
|
||||
/// Convert a NaiveDateTime to UTC via the given timezone.
|
||||
/// Falls back to the earliest local interpretation for ambiguous DST times.
|
||||
fn naive_to_utc(dt: NaiveDateTime, tz: Tz) -> Option<DateTime<Utc>> {
|
||||
tz.from_local_datetime(&dt)
|
||||
.single()
|
||||
.or_else(|| tz.from_local_datetime(&dt).earliest())
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
}
|
||||
|
||||
/// Extract a datetime from a file name in the format YYYYMMDD-HHMMSS.
|
||||
///
|
||||
/// The time component is optional and can be 4-6 digits (HHMM, HHMMS, or HHMMSS).
|
||||
/// The datetime is interpreted in the given timezone.
|
||||
///
|
||||
/// # Examples
|
||||
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56
|
||||
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00
|
||||
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56 in tz
|
||||
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00 in tz
|
||||
/// - "invalid-file-name.md" -> None
|
||||
pub fn extract_datetime_from_file_name(file_name: &str) -> Option<DateTime<Utc>> {
|
||||
pub fn extract_datetime_from_file_name(file_name: &str, tz: Tz) -> Option<DateTime<Utc>> {
|
||||
let base_name = Path::new(file_name)
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
|
|
@ -48,20 +59,23 @@ pub fn extract_datetime_from_file_name(file_name: &str) -> Option<DateTime<Utc>>
|
|||
|
||||
NaiveDateTime::parse_from_str(&datetime_str, "%Y%m%d %H:%M:%S")
|
||||
.ok()
|
||||
.map(|dt| dt.and_utc())
|
||||
.and_then(|dt| naive_to_utc(dt, tz))
|
||||
}
|
||||
|
||||
/// Parse a 14-digit marker string as a NaiveDateTime without timezone conversion.
|
||||
fn parse_naive_datetime_from_marker(marker: &str) -> Option<NaiveDateTime> {
|
||||
if !DATETIME_MARKER_REGEX.is_match(marker) {
|
||||
return None;
|
||||
}
|
||||
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S").ok()
|
||||
}
|
||||
|
||||
/// Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
|
||||
///
|
||||
/// The datetime is interpreted in the given timezone.
|
||||
/// Returns the parsed datetime if the format matches and values are valid.
|
||||
pub fn extract_datetime_from_marker(marker: &str) -> Option<DateTime<Utc>> {
|
||||
if !DATETIME_MARKER_REGEX.is_match(marker) {
|
||||
return None;
|
||||
}
|
||||
|
||||
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S")
|
||||
.ok()
|
||||
.map(|dt| dt.and_utc())
|
||||
pub fn extract_datetime_from_marker(marker: &str, tz: Tz) -> Option<DateTime<Utc>> {
|
||||
parse_naive_datetime_from_marker(marker).and_then(|dt| naive_to_utc(dt, tz))
|
||||
}
|
||||
|
||||
/// Extract a date from a marker string in the exact format: YYYYMMDD.
|
||||
|
|
@ -90,6 +104,7 @@ pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
|
|||
///
|
||||
/// The function processes markers in reverse order, allowing later markers to override
|
||||
/// earlier ones. It combines date-only and time-only markers when both are present.
|
||||
/// All naive datetimes (from markers and the inherited fallback) are interpreted in `tz`.
|
||||
///
|
||||
/// Rules:
|
||||
/// - If a full datetime marker (14 digits) is found, it sets both date and time
|
||||
|
|
@ -99,6 +114,7 @@ pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
|
|||
pub fn extract_datetime_from_marker_list(
|
||||
markers: &[String],
|
||||
inherited_datetime: DateTime<Utc>,
|
||||
tz: Tz,
|
||||
) -> DateTime<Utc> {
|
||||
let mut shard_time: Option<NaiveTime> = None;
|
||||
let mut shard_date: Option<NaiveDate> = None;
|
||||
|
|
@ -111,34 +127,39 @@ pub fn extract_datetime_from_marker_list(
|
|||
if let Some(date) = extract_date_from_marker(marker) {
|
||||
shard_date = Some(date);
|
||||
}
|
||||
if let Some(datetime) = extract_datetime_from_marker(marker) {
|
||||
shard_date = Some(datetime.naive_utc().date());
|
||||
shard_time = Some(datetime.naive_utc().time());
|
||||
if let Some(naive_dt) = parse_naive_datetime_from_marker(marker) {
|
||||
shard_date = Some(naive_dt.date());
|
||||
shard_time = Some(naive_dt.time());
|
||||
}
|
||||
}
|
||||
|
||||
// Interpret the inherited datetime in the configured timezone for fallback values
|
||||
let inherited_local = inherited_datetime.with_timezone(&tz).naive_local();
|
||||
|
||||
// Combine date and time, applying defaults as needed
|
||||
let final_date = shard_date.unwrap_or_else(|| inherited_datetime.naive_utc().date());
|
||||
let final_date = shard_date.unwrap_or_else(|| inherited_local.date());
|
||||
let final_time = match (shard_date, shard_time) {
|
||||
// If we have a date but no time, use midnight
|
||||
(Some(_), None) => NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
|
||||
// Otherwise use the shard time or inherit
|
||||
_ => shard_time.unwrap_or_else(|| inherited_datetime.naive_utc().time()),
|
||||
_ => shard_time.unwrap_or_else(|| inherited_local.time()),
|
||||
};
|
||||
|
||||
NaiveDateTime::new(final_date, final_time).and_utc()
|
||||
let naive = NaiveDateTime::new(final_date, final_time);
|
||||
naive_to_utc(naive, tz).unwrap_or_else(|| inherited_datetime)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::TimeZone;
|
||||
use chrono_tz::UTC;
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_file_name_valid() {
|
||||
let file_name = "20230101-123456 Some Text.md";
|
||||
assert_eq!(
|
||||
extract_datetime_from_file_name(file_name),
|
||||
extract_datetime_from_file_name(file_name, UTC),
|
||||
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
|
||||
);
|
||||
}
|
||||
|
|
@ -146,14 +167,14 @@ mod tests {
|
|||
#[test]
|
||||
fn test_extract_date_from_file_name_invalid() {
|
||||
let file_name = "invalid-file-name.md";
|
||||
assert_eq!(extract_datetime_from_file_name(file_name), None);
|
||||
assert_eq!(extract_datetime_from_file_name(file_name, UTC), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_file_name_without_time() {
|
||||
let file_name = "20230101 Some Text.md";
|
||||
assert_eq!(
|
||||
extract_datetime_from_file_name(file_name),
|
||||
extract_datetime_from_file_name(file_name, UTC),
|
||||
Some(Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap())
|
||||
);
|
||||
}
|
||||
|
|
@ -162,7 +183,7 @@ mod tests {
|
|||
fn test_extract_date_from_file_name_short_time() {
|
||||
let file_name = "20230101-1234 Some Text.md";
|
||||
assert_eq!(
|
||||
extract_datetime_from_file_name(file_name),
|
||||
extract_datetime_from_file_name(file_name, UTC),
|
||||
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 0).unwrap())
|
||||
);
|
||||
}
|
||||
|
|
@ -170,41 +191,61 @@ mod tests {
|
|||
#[test]
|
||||
fn test_extract_date_from_file_name_empty_string() {
|
||||
let file_name = "";
|
||||
assert_eq!(extract_datetime_from_file_name(file_name), None);
|
||||
assert_eq!(extract_datetime_from_file_name(file_name, UTC), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_file_name_with_full_path() {
|
||||
let file_name = "/path/to/20230101-123456 Some Text.md";
|
||||
assert_eq!(
|
||||
extract_datetime_from_file_name(file_name),
|
||||
extract_datetime_from_file_name(file_name, UTC),
|
||||
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_file_name_with_timezone_offset() {
|
||||
// Europe/Berlin is UTC+1 in January (CET)
|
||||
let file_name = "20230101-120000 Some Text.md";
|
||||
assert_eq!(
|
||||
extract_datetime_from_file_name(file_name, chrono_tz::Europe::Berlin),
|
||||
Some(Utc.with_ymd_and_hms(2023, 1, 1, 11, 0, 0).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_datetime_from_marker_valid() {
|
||||
let marker = "20250101150000";
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker(marker),
|
||||
extract_datetime_from_marker(marker, UTC),
|
||||
Some(Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_datetime_from_marker_with_timezone_offset() {
|
||||
// Europe/Berlin is UTC+1 in January (CET)
|
||||
let marker = "20250101150000";
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker(marker, chrono_tz::Europe::Berlin),
|
||||
Some(Utc.with_ymd_and_hms(2025, 1, 1, 14, 0, 0).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_datetime_from_marker_invalid_format() {
|
||||
assert_eq!(extract_datetime_from_marker("2025010115000"), None); // too short
|
||||
assert_eq!(extract_datetime_from_marker("202501011500000"), None); // too long
|
||||
assert_eq!(extract_datetime_from_marker("2025-01-01T150000"), None); // separators
|
||||
assert_eq!(extract_datetime_from_marker("2025010115000a"), None); // non-digit
|
||||
assert_eq!(extract_datetime_from_marker(""), None);
|
||||
assert_eq!(extract_datetime_from_marker("2025010115000", UTC), None); // too short
|
||||
assert_eq!(extract_datetime_from_marker("202501011500000", UTC), None); // too long
|
||||
assert_eq!(extract_datetime_from_marker("2025-01-01T150000", UTC), None); // separators
|
||||
assert_eq!(extract_datetime_from_marker("2025010115000a", UTC), None); // non-digit
|
||||
assert_eq!(extract_datetime_from_marker("", UTC), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_datetime_from_marker_invalid_values() {
|
||||
assert_eq!(extract_datetime_from_marker("20250230120000"), None); // Feb 30
|
||||
assert_eq!(extract_datetime_from_marker("20250101126000"), None); // minute 60
|
||||
assert_eq!(extract_datetime_from_marker("20250101240000"), None); // hour 24
|
||||
assert_eq!(extract_datetime_from_marker("20250230120000", UTC), None); // Feb 30
|
||||
assert_eq!(extract_datetime_from_marker("20250101126000", UTC), None); // minute 60
|
||||
assert_eq!(extract_datetime_from_marker("20250101240000", UTC), None); // hour 24
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -260,7 +301,10 @@ mod tests {
|
|||
#[test]
|
||||
fn test_no_markers_inherits_datetime() {
|
||||
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
|
||||
assert_eq!(extract_datetime_from_marker_list(&[], inherited), inherited);
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&[], inherited, UTC),
|
||||
inherited
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -273,7 +317,7 @@ mod tests {
|
|||
"1234567".to_string(),
|
||||
];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
inherited
|
||||
);
|
||||
}
|
||||
|
|
@ -283,7 +327,7 @@ mod tests {
|
|||
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
||||
let markers = vec!["20250101".to_string()];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
|
@ -293,7 +337,7 @@ mod tests {
|
|||
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
||||
let markers = vec!["150000".to_string()];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 6, 7, 15, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
|
@ -303,7 +347,7 @@ mod tests {
|
|||
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
||||
let markers = vec!["20250101150000".to_string()];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
|
@ -313,7 +357,7 @@ mod tests {
|
|||
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
||||
let markers = vec!["20250101".to_string(), "150000".to_string()];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
|
@ -328,7 +372,7 @@ mod tests {
|
|||
"160000".to_string(),
|
||||
];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
|
@ -343,7 +387,7 @@ mod tests {
|
|||
];
|
||||
// The first date (20250101) and first time (150000) should win over the later combined datetime
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
|
@ -358,8 +402,19 @@ mod tests {
|
|||
"150000".to_string(), // valid
|
||||
];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited),
|
||||
extract_datetime_from_marker_list(&markers, inherited, UTC),
|
||||
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_marker_list_with_timezone_offset() {
|
||||
// Europe/Berlin is UTC+2 in summer (CEST)
|
||||
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
||||
let markers = vec!["150000".to_string()];
|
||||
assert_eq!(
|
||||
extract_datetime_from_marker_list(&markers, inherited, chrono_tz::Europe::Berlin),
|
||||
Utc.with_ymd_and_hms(2025, 6, 7, 13, 0, 0).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue