feat: add streamd daily command and file_type dimension #88
4 changed files with 86 additions and 2 deletions
|
|
@ -9,6 +9,11 @@ use std::path::Path;
|
|||
static FILE_NAME_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"^(?P<date>\d{8})(?:-(?P<time>\d{4,6}))?.+\.md$").unwrap());
|
||||
|
||||
/// Regex for extracting a file-type prefix from file names.
|
||||
/// Matches filenames like `20260412-123456_daily.md` or `20260412_daily Some Title.md`.
|
||||
static FILE_TYPE_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"^\d{8}(?:-\d{4,6})?_([a-zA-Z0-9]+)").unwrap());
|
||||
|
||||
/// Regex for validating datetime marker format (14 digits).
|
||||
static DATETIME_MARKER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{14}$").unwrap());
|
||||
|
||||
|
|
@ -62,6 +67,28 @@ pub fn extract_datetime_from_file_name(file_name: &str, tz: Tz) -> Option<DateTi
|
|||
.and_then(|dt| naive_to_utc(dt, tz))
|
||||
}
|
||||
|
||||
/// Extract the file-type prefix from a filename.
|
||||
///
|
||||
/// Filenames with a `_prefix` segment after the timestamp (and optional time component)
|
||||
/// are recognised. The prefix must consist of alphanumeric characters only.
|
||||
///
|
||||
/// # Examples
|
||||
/// - `"20260412-123456_daily.md"` → `Some("daily")`
|
||||
/// - `"20260412_daily Some Title.md"` → `Some("daily")`
|
||||
/// - `"20260412-123456 Some Title.md"` → `None`
|
||||
/// - `"/path/to/20260412-123456_daily.md"` → `Some("daily")`
|
||||
pub fn extract_file_type_from_file_name(file_name: &str) -> Option<String> {
|
||||
let base_name = Path::new(file_name)
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or(file_name);
|
||||
|
||||
FILE_TYPE_REGEX
|
||||
.captures(base_name)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str().to_string())
|
||||
}
|
||||
|
||||
/// Parse a 14-digit marker string as a NaiveDateTime without timezone conversion.
|
||||
fn parse_naive_datetime_from_marker(marker: &str) -> Option<NaiveDateTime> {
|
||||
if !DATETIME_MARKER_REGEX.is_match(marker) {
|
||||
|
|
@ -155,6 +182,51 @@ mod tests {
|
|||
use chrono::TimeZone;
|
||||
use chrono_tz::UTC;
|
||||
|
||||
#[test]
|
||||
fn test_extract_file_type_with_time() {
|
||||
assert_eq!(
|
||||
extract_file_type_from_file_name("20260412-123456_daily.md"),
|
||||
Some("daily".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_file_type_with_time_and_title() {
|
||||
assert_eq!(
|
||||
extract_file_type_from_file_name("20260412-123456_daily Some Title.md"),
|
||||
Some("daily".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_file_type_without_time() {
|
||||
assert_eq!(
|
||||
extract_file_type_from_file_name("20260412_daily.md"),
|
||||
Some("daily".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_file_type_without_prefix() {
|
||||
assert_eq!(
|
||||
extract_file_type_from_file_name("20260412-123456 Some Title.md"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_file_type_with_full_path() {
|
||||
assert_eq!(
|
||||
extract_file_type_from_file_name("/path/to/20260412-123456_daily.md"),
|
||||
Some("daily".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_file_type_no_timestamp() {
|
||||
assert_eq!(extract_file_type_from_file_name("notes.md"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_file_name_valid() {
|
||||
let file_name = "20230101-123456 Some Text.md";
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ pub use configuration::{
|
|||
};
|
||||
pub use datetime::{
|
||||
extract_date_from_marker, extract_datetime_from_file_name, extract_datetime_from_marker,
|
||||
extract_datetime_from_marker_list, extract_time_from_marker,
|
||||
extract_datetime_from_marker_list, extract_file_type_from_file_name, extract_time_from_marker,
|
||||
};
|
||||
pub use preconfigured::TaskConfiguration;
|
||||
pub use shard::{localize_shard, localize_stream_file};
|
||||
|
|
|
|||
|
|
@ -20,6 +20,12 @@ pub static TaskConfiguration: Lazy<RepositoryConfiguration> = Lazy::new(|| {
|
|||
.with_comment("Project the task is attached to")
|
||||
.with_propagate(true),
|
||||
)
|
||||
.with_dimension(
|
||||
"file_type",
|
||||
Dimension::new("File Type")
|
||||
.with_comment("Type of file derived from filename prefix (e.g. 'daily')")
|
||||
.with_propagate(true),
|
||||
)
|
||||
.with_marker(
|
||||
"Task",
|
||||
Marker::new("Task").with_placements(vec![
|
||||
|
|
|
|||
|
|
@ -5,7 +5,10 @@ use indexmap::{IndexMap, IndexSet};
|
|||
use crate::error::StreamdError;
|
||||
use crate::models::{LocalizedShard, RepositoryConfiguration, Shard, StreamFile};
|
||||
|
||||
use super::datetime::{extract_datetime_from_file_name, extract_datetime_from_marker_list};
|
||||
use super::datetime::{
|
||||
extract_datetime_from_file_name, extract_datetime_from_marker_list,
|
||||
extract_file_type_from_file_name,
|
||||
};
|
||||
|
||||
/// Localize a shard within the repository's coordinate system.
|
||||
///
|
||||
|
|
@ -102,6 +105,9 @@ pub fn localize_stream_file(
|
|||
|
||||
let mut initial_location = IndexMap::new();
|
||||
initial_location.insert("file".to_string(), stream_file.file_name.clone());
|
||||
if let Some(file_type) = extract_file_type_from_file_name(&stream_file.file_name) {
|
||||
initial_location.insert("file_type".to_string(), file_type);
|
||||
}
|
||||
|
||||
Ok(localize_shard(
|
||||
shard,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue