365 lines
13 KiB
Rust
365 lines
13 KiB
Rust
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
|
use once_cell::sync::Lazy;
|
|
use regex::Regex;
|
|
use std::path::Path;
|
|
|
|
/// Regex for extracting date and optional time from file names.
|
|
/// Format: YYYYMMDD or YYYYMMDD-HHMMSS (time can be 4-6 digits)
|
|
static FILE_NAME_REGEX: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new(r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+\.md$").unwrap());
|
|
|
|
/// Regex for validating datetime marker format (14 digits).
|
|
static DATETIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{14}$").unwrap());
|
|
|
|
/// Regex for validating date marker format (8 digits).
|
|
static DATE_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{8}$").unwrap());
|
|
|
|
/// Regex for validating time marker format (6 digits).
|
|
static TIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{6}$").unwrap());
|
|
|
|
/// Extract a datetime from a file name in the format YYYYMMDD-HHMMSS.
|
|
///
|
|
/// The time component is optional and can be 4-6 digits (HHMM, HHMMS, or HHMMSS).
|
|
///
|
|
/// # Examples
|
|
/// - "20230101-123456 Some Text.md" -> DateTime for 2023-01-01 12:34:56
|
|
/// - "20230101 Some Text.md" -> DateTime for 2023-01-01 00:00:00
|
|
/// - "invalid-file-name.md" -> None
|
|
pub fn extract_datetime_from_file_name(file_name: &str) -> Option<DateTime<Utc>> {
|
|
let base_name = Path::new(file_name)
|
|
.file_name()
|
|
.and_then(|s| s.to_str())
|
|
.unwrap_or(file_name);
|
|
|
|
let captures = FILE_NAME_REGEX.captures(base_name)?;
|
|
let date_str = captures.name("date")?.as_str();
|
|
let time_str = captures.name("time").map(|m| m.as_str()).unwrap_or("");
|
|
|
|
// Pad time string to 6 digits
|
|
let time_str = format!("{:0<6}", time_str);
|
|
|
|
let datetime_str = format!(
|
|
"{} {}:{}:{}",
|
|
date_str,
|
|
&time_str[0..2],
|
|
&time_str[2..4],
|
|
&time_str[4..6]
|
|
);
|
|
|
|
NaiveDateTime::parse_from_str(&datetime_str, "%Y%m%d %H:%M:%S")
|
|
.ok()
|
|
.map(|dt| dt.and_utc())
|
|
}
|
|
|
|
/// Extract a datetime from a marker string in the exact format: YYYYMMDDHHMMSS.
|
|
///
|
|
/// Returns the parsed datetime if the format matches and values are valid.
|
|
pub fn extract_datetime_from_marker(marker: &str) -> Option<DateTime<Utc>> {
|
|
if !DATETIME_MARKER_REGEX.is_match(marker) {
|
|
return None;
|
|
}
|
|
|
|
NaiveDateTime::parse_from_str(marker, "%Y%m%d%H%M%S")
|
|
.ok()
|
|
.map(|dt| dt.and_utc())
|
|
}
|
|
|
|
/// Extract a date from a marker string in the exact format: YYYYMMDD.
|
|
///
|
|
/// Returns the parsed date if the format matches and values are valid.
|
|
pub fn extract_date_from_marker(marker: &str) -> Option<NaiveDate> {
|
|
if !DATE_MARKER_REGEX.is_match(marker) {
|
|
return None;
|
|
}
|
|
|
|
NaiveDate::parse_from_str(marker, "%Y%m%d").ok()
|
|
}
|
|
|
|
/// Extract a time from a marker string in the exact format: HHMMSS.
|
|
///
|
|
/// Returns the parsed time if the format matches and values are valid.
|
|
pub fn extract_time_from_marker(marker: &str) -> Option<NaiveTime> {
|
|
if !TIME_MARKER_REGEX.is_match(marker) {
|
|
return None;
|
|
}
|
|
|
|
NaiveTime::parse_from_str(marker, "%H%M%S").ok()
|
|
}
|
|
|
|
/// Extract a datetime from a list of markers, using an inherited datetime as fallback.
|
|
///
|
|
/// The function processes markers in reverse order, allowing later markers to override
|
|
/// earlier ones. It combines date-only and time-only markers when both are present.
|
|
///
|
|
/// Rules:
|
|
/// - If a full datetime marker (14 digits) is found, it sets both date and time
|
|
/// - If only a date marker is found, the time defaults to midnight
|
|
/// - If only a time marker is found, the date is inherited
|
|
/// - If no valid markers are found, the inherited datetime is returned
|
|
pub fn extract_datetime_from_marker_list(
|
|
markers: &[String],
|
|
inherited_datetime: DateTime<Utc>,
|
|
) -> DateTime<Utc> {
|
|
let mut shard_time: Option<NaiveTime> = None;
|
|
let mut shard_date: Option<NaiveDate> = None;
|
|
|
|
// Process markers in reverse order (last wins)
|
|
for marker in markers.iter().rev() {
|
|
if let Some(time) = extract_time_from_marker(marker) {
|
|
shard_time = Some(time);
|
|
}
|
|
if let Some(date) = extract_date_from_marker(marker) {
|
|
shard_date = Some(date);
|
|
}
|
|
if let Some(datetime) = extract_datetime_from_marker(marker) {
|
|
shard_date = Some(datetime.naive_utc().date());
|
|
shard_time = Some(datetime.naive_utc().time());
|
|
}
|
|
}
|
|
|
|
// Combine date and time, applying defaults as needed
|
|
let final_date = shard_date.unwrap_or_else(|| inherited_datetime.naive_utc().date());
|
|
let final_time = match (shard_date, shard_time) {
|
|
// If we have a date but no time, use midnight
|
|
(Some(_), None) => NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
|
|
// Otherwise use the shard time or inherit
|
|
_ => shard_time.unwrap_or_else(|| inherited_datetime.naive_utc().time()),
|
|
};
|
|
|
|
NaiveDateTime::new(final_date, final_time).and_utc()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use chrono::TimeZone;
|
|
|
|
#[test]
|
|
fn test_extract_date_from_file_name_valid() {
|
|
let file_name = "20230101-123456 Some Text.md";
|
|
assert_eq!(
|
|
extract_datetime_from_file_name(file_name),
|
|
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_file_name_invalid() {
|
|
let file_name = "invalid-file-name.md";
|
|
assert_eq!(extract_datetime_from_file_name(file_name), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_file_name_without_time() {
|
|
let file_name = "20230101 Some Text.md";
|
|
assert_eq!(
|
|
extract_datetime_from_file_name(file_name),
|
|
Some(Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_file_name_short_time() {
|
|
let file_name = "20230101-1234 Some Text.md";
|
|
assert_eq!(
|
|
extract_datetime_from_file_name(file_name),
|
|
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 0).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_file_name_empty_string() {
|
|
let file_name = "";
|
|
assert_eq!(extract_datetime_from_file_name(file_name), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_file_name_with_full_path() {
|
|
let file_name = "/path/to/20230101-123456 Some Text.md";
|
|
assert_eq!(
|
|
extract_datetime_from_file_name(file_name),
|
|
Some(Utc.with_ymd_and_hms(2023, 1, 1, 12, 34, 56).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_datetime_from_marker_valid() {
|
|
let marker = "20250101150000";
|
|
assert_eq!(
|
|
extract_datetime_from_marker(marker),
|
|
Some(Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_datetime_from_marker_invalid_format() {
|
|
assert_eq!(extract_datetime_from_marker("2025010115000"), None); // too short
|
|
assert_eq!(extract_datetime_from_marker("202501011500000"), None); // too long
|
|
assert_eq!(extract_datetime_from_marker("2025-01-01T150000"), None); // separators
|
|
assert_eq!(extract_datetime_from_marker("2025010115000a"), None); // non-digit
|
|
assert_eq!(extract_datetime_from_marker(""), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_datetime_from_marker_invalid_values() {
|
|
assert_eq!(extract_datetime_from_marker("20250230120000"), None); // Feb 30
|
|
assert_eq!(extract_datetime_from_marker("20250101126000"), None); // minute 60
|
|
assert_eq!(extract_datetime_from_marker("20250101240000"), None); // hour 24
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_marker_valid() {
|
|
let marker = "20250101";
|
|
assert_eq!(
|
|
extract_date_from_marker(marker),
|
|
Some(NaiveDate::from_ymd_opt(2025, 1, 1).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_marker_invalid_format() {
|
|
assert_eq!(extract_date_from_marker("2025010"), None); // too short
|
|
assert_eq!(extract_date_from_marker("202501011"), None); // too long
|
|
assert_eq!(extract_date_from_marker("2025-01-01"), None); // separators
|
|
assert_eq!(extract_date_from_marker("2025010a"), None); // non-digit
|
|
assert_eq!(extract_date_from_marker(""), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_date_from_marker_invalid_values() {
|
|
assert_eq!(extract_date_from_marker("20250230"), None); // Feb 30
|
|
assert_eq!(extract_date_from_marker("20251301"), None); // month 13
|
|
assert_eq!(extract_date_from_marker("20250132"), None); // day 32
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_time_from_marker_valid() {
|
|
let marker = "150000";
|
|
assert_eq!(
|
|
extract_time_from_marker(marker),
|
|
Some(NaiveTime::from_hms_opt(15, 0, 0).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_time_from_marker_invalid_format() {
|
|
assert_eq!(extract_time_from_marker("15000"), None); // too short
|
|
assert_eq!(extract_time_from_marker("1500000"), None); // too long
|
|
assert_eq!(extract_time_from_marker("15:00:00"), None); // separators
|
|
assert_eq!(extract_time_from_marker("15000a"), None); // non-digit
|
|
assert_eq!(extract_time_from_marker(""), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_time_from_marker_invalid_values() {
|
|
assert_eq!(extract_time_from_marker("240000"), None); // hour 24
|
|
assert_eq!(extract_time_from_marker("156000"), None); // minute 60
|
|
// Note: chrono allows leap seconds (60), so 150060 is valid
|
|
}
|
|
|
|
#[test]
|
|
fn test_no_markers_inherits_datetime() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
|
|
assert_eq!(extract_datetime_from_marker_list(&[], inherited), inherited);
|
|
}
|
|
|
|
#[test]
|
|
fn test_unrelated_markers_inherits_datetime() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
|
|
let markers: Vec<String> = vec![
|
|
"not-a-marker".to_string(),
|
|
"2025-01-01".to_string(),
|
|
"1500".to_string(),
|
|
"1234567".to_string(),
|
|
];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
inherited
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_date_only_marker_sets_midnight() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec!["20250101".to_string()];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_time_only_marker_inherits_date() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec!["150000".to_string()];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 6, 7, 15, 0, 0).unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_datetime_marker_overrides_both_date_and_time() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec!["20250101150000".to_string()];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_combined_date_and_time_markers() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec!["20250101".to_string(), "150000".to_string()];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_first_marker_wins_when_multiple_dates_or_times() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec![
|
|
"20250101".to_string(),
|
|
"150000".to_string(),
|
|
"20250102".to_string(),
|
|
"160000".to_string(),
|
|
];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_last_separated_date_and_time_win() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec![
|
|
"20250101".to_string(),
|
|
"150000".to_string(),
|
|
"20250102160000".to_string(),
|
|
];
|
|
// The first date (20250101) and first time (150000) should win over the later combined datetime
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_date_or_time_markers_are_ignored() {
|
|
let inherited = Utc.with_ymd_and_hms(2025, 6, 7, 8, 9, 10).unwrap();
|
|
let markers = vec![
|
|
"20251301".to_string(), // invalid month
|
|
"240000".to_string(), // invalid hour
|
|
"20250101".to_string(), // valid
|
|
"150000".to_string(), // valid
|
|
];
|
|
assert_eq!(
|
|
extract_datetime_from_marker_list(&markers, inherited),
|
|
Utc.with_ymd_and_hms(2025, 1, 1, 15, 0, 0).unwrap()
|
|
);
|
|
}
|
|
}
|