From e15e6f105313dc373650ad6a51a3390b4921c296 Mon Sep 17 00:00:00 2001 From: Konstantin Fickel Date: Mon, 13 Apr 2026 19:26:09 +0200 Subject: [PATCH 1/5] fix: broken tasks extraction --- src/extract/parser.rs | 68 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/src/extract/parser.rs b/src/extract/parser.rs index 1326371..b1d0374 100644 --- a/src/extract/parser.rs +++ b/src/extract/parser.rs @@ -12,6 +12,8 @@ struct BlockInfo { end_line: usize, block_type: BlockType, events: Vec>, + /// Nested list items contained within this block (for ListItem blocks with sub-lists). + nested_items: Vec, } #[derive(Debug, Clone, PartialEq)] @@ -110,12 +112,14 @@ pub fn parse_markdown_file(file_name: &str, file_content: &str) -> StreamFile { fn collect_blocks(content: &str, parser: Parser) -> Vec { let mut blocks = Vec::new(); let mut current_block: Option = None; - let _current_events: Vec> = Vec::new(); let mut depth = 0; let mut list_items: Vec = Vec::new(); let mut in_list = false; let mut list_start_line = 0; + // Stack for nested lists: (saved current_block, saved list_items, saved list_start_line) + let mut list_nesting_stack: Vec<(Option, Vec, usize)> = Vec::new(); + // Pre-compute line starts for offset-to-line mapping let line_starts: Vec = std::iter::once(0) .chain(content.match_indices('\n').map(|(i, _)| i + 1)) @@ -135,6 +139,7 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { end_line: line, block_type: BlockType::Paragraph, events: Vec::new(), + nested_items: Vec::new(), }); } depth += 1; @@ -166,6 +171,7 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { end_line: line, block_type: BlockType::Heading(heading_level), events: Vec::new(), + nested_items: Vec::new(), }); } depth += 1; @@ -186,7 +192,15 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { } } Event::Start(Tag::List(_)) => { - if !in_list { + if in_list { + // Entering a nested list: save current list item and collected items + list_nesting_stack.push(( + current_block.take(), + std::mem::take(&mut list_items), + list_start_line, + )); + list_start_line = line; + } else { in_list = true; list_start_line = line; list_items.clear(); @@ -195,7 +209,18 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { } Event::End(TagEnd::List(_)) => { depth -= 1; - if depth == 0 && in_list { + if let Some((parent_block, parent_items, parent_start_line)) = + list_nesting_stack.pop() + { + // Nested list ended: attach collected items as nested children of parent item + let nested = std::mem::take(&mut list_items); + list_start_line = parent_start_line; + list_items = parent_items; + current_block = parent_block.map(|mut item| { + item.nested_items = nested; + item + }); + } else if depth == 0 && in_list { in_list = false; // Create a list block containing all list items if !list_items.is_empty() { @@ -204,6 +229,7 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { end_line: line, block_type: BlockType::List, events: vec![], // List events are handled through list_items + nested_items: vec![], }); // Store list items for later processing for item in list_items.drain(..) { @@ -222,6 +248,7 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { end_line: line, block_type: BlockType::ListItem, events: Vec::new(), + nested_items: Vec::new(), }); } } @@ -240,6 +267,7 @@ fn collect_blocks(content: &str, parser: Parser) -> Vec { end_line: line, block_type: BlockType::CodeBlock, events: Vec::new(), + nested_items: Vec::new(), }); } depth += 1; @@ -507,13 +535,21 @@ fn parse_single_block_shard( } } BlockType::List | BlockType::ListItem => { - // List handling is complex - for now, extract any markers/tags let (markers, tags) = extract_block_markers_and_tags(block); - if markers.is_empty() { + // Recursively build child shards from nested list items + let children: Vec = block + .nested_items + .iter() + .filter_map(|item| { + let (child, _) = parse_single_block_shard(item, item.start_line, item.end_line); + child + }) + .collect(); + if markers.is_empty() && children.is_empty() { (None, tags) } else { ( - Some(build_shard(start_line, end_line, markers, tags, vec![])), + Some(build_shard(start_line, end_line, markers, tags, children)), vec![], ) } @@ -716,6 +752,26 @@ mod tests { ); } + #[test] + fn test_parse_nested_list_creates_three_shards() { + let content = "* @Task 1\n * @Task 2\n* @Task 3"; + let result = parse_markdown_file(&make_file_name(), content); + let root = result.shard.unwrap(); + // The root shard should have two top-level children: @Task 1 and @Task 3 + assert_eq!(root.children.len(), 2, "expected 2 top-level shards"); + let task1 = &root.children[0]; + let task3 = &root.children[1]; + // @Task 1 must carry its marker and contain @Task 2 as a child + assert_eq!(task1.markers, vec!["Task"], "@Task 1 marker"); + assert_eq!(task1.children.len(), 1, "@Task 1 should have one child"); + let task2 = &task1.children[0]; + assert_eq!(task2.markers, vec!["Task"], "@Task 2 marker"); + assert!(task2.children.is_empty(), "@Task 2 should have no children"); + // @Task 3 is a sibling of @Task 1 + assert_eq!(task3.markers, vec!["Task"], "@Task 3 marker"); + assert!(task3.children.is_empty(), "@Task 3 should have no children"); + } + #[test] fn test_parse_continues_looking_for_markers_after_first_link_marker() { let result = parse_markdown_file( From b653590c366a818b4781b41bd0aff11f1f80db50 Mon Sep 17 00:00:00 2001 From: Konstantin Fickel Date: Mon, 13 Apr 2026 19:30:59 +0200 Subject: [PATCH 2/5] feat(localize): extract file_type from filename prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `extract_file_type_from_file_name` to parse prefixes like `_daily` from filenames (e.g. `20260412-123456_daily.md` → `"daily"`). Insert the result into `initial_location` in `localize_stream_file` so all localized shards carry a `file_type` dimension value. Also register the `file_type` dimension in `TaskConfiguration` so the propagation contract is documented. --- src/localize/datetime.rs | 72 +++++++++++++++++++++++++++++++++++ src/localize/mod.rs | 2 +- src/localize/preconfigured.rs | 6 +++ src/localize/shard.rs | 8 +++- 4 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/localize/datetime.rs b/src/localize/datetime.rs index 9f6d2ec..f5fd601 100644 --- a/src/localize/datetime.rs +++ b/src/localize/datetime.rs @@ -9,6 +9,11 @@ use std::path::Path; static FILE_NAME_REGEX: Lazy = Lazy::new(|| Regex::new(r"^(?P\d{8})(?:-(?P