From de99ffe83ef809aab02d2c4312060785e7e30f3c Mon Sep 17 00:00:00 2001 From: Konstantin Fickel Date: Fri, 20 Jun 2025 21:59:05 +0200 Subject: [PATCH] fix: handle empty block correctly Signed-off-by: Konstantin Fickel --- src/streamer/parse.py | 11 +++++++---- test/test_parse.py | 7 ------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/streamer/parse.py b/src/streamer/parse.py index 949a655..3ef2200 100644 --- a/src/streamer/parse.py +++ b/src/streamer/parse.py @@ -3,7 +3,7 @@ from itertools import takewhile, dropwhile from typing import Optional, TypeVar from pydantic import BaseModel from mistletoe import Document -from mistletoe.markdown_renderer import MarkdownRenderer, Fragment +from mistletoe.markdown_renderer import MarkdownRenderer, Fragment, BlankLine from mistletoe.span_token import SpanToken, RawText from mistletoe.block_token import Paragraph, BlockToken, Heading from mistletoe.token import Token @@ -75,10 +75,10 @@ def find_shard_positions(block_tokens: list[BlockToken]) -> list[int]: ] -T = TypeVar("T") +A = TypeVar("A") -def split_at(list_to_be_split: list[T], positions: list[int]): +def split_at(list_to_be_split: list[A], positions: list[int]): positions = sorted(set([0, *positions, len(list_to_be_split)])) return [list_to_be_split[left:right] for left, right in pairwise(positions)] @@ -202,7 +202,10 @@ def parse_header_shards( slice_positions = find_heading_positions(block_tokens, header_level_for_slicing) if first_token_is_header: - slice_positions.append(1) + for end_of_header in range(0, len(block_tokens) - 1): + if not isinstance(block_tokens[end_of_header + 1], BlankLine): + continue + slice_positions.append(end_of_header) is_first_slice_part_of_parent_shard = 0 not in slice_positions sliced_by_heading_level = split_at(block_tokens, slice_positions) diff --git a/test/test_parse.py b/test/test_parse.py index 8beef74..4b025a0 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -159,13 +159,6 @@ class TestParseProcess: start_line=1, end_line=4, children=[ - Shard( - markers=[], - tags=[], - start_line=2, - end_line=2, - children=[], - ), Shard( markers=["Marker1"], tags=["Tag2"],