fix: handle empty block correctly

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
Konstantin Fickel 2025-06-20 21:59:05 +02:00
parent 63ce959d4c
commit de99ffe83e
2 changed files with 7 additions and 11 deletions

View file

@ -3,7 +3,7 @@ from itertools import takewhile, dropwhile
from typing import Optional, TypeVar from typing import Optional, TypeVar
from pydantic import BaseModel from pydantic import BaseModel
from mistletoe import Document from mistletoe import Document
from mistletoe.markdown_renderer import MarkdownRenderer, Fragment from mistletoe.markdown_renderer import MarkdownRenderer, Fragment, BlankLine
from mistletoe.span_token import SpanToken, RawText from mistletoe.span_token import SpanToken, RawText
from mistletoe.block_token import Paragraph, BlockToken, Heading from mistletoe.block_token import Paragraph, BlockToken, Heading
from mistletoe.token import Token from mistletoe.token import Token
@ -75,10 +75,10 @@ def find_shard_positions(block_tokens: list[BlockToken]) -> list[int]:
] ]
T = TypeVar("T") A = TypeVar("A")
def split_at(list_to_be_split: list[T], positions: list[int]): def split_at(list_to_be_split: list[A], positions: list[int]):
positions = sorted(set([0, *positions, len(list_to_be_split)])) positions = sorted(set([0, *positions, len(list_to_be_split)]))
return [list_to_be_split[left:right] for left, right in pairwise(positions)] return [list_to_be_split[left:right] for left, right in pairwise(positions)]
@ -202,7 +202,10 @@ def parse_header_shards(
slice_positions = find_heading_positions(block_tokens, header_level_for_slicing) slice_positions = find_heading_positions(block_tokens, header_level_for_slicing)
if first_token_is_header: if first_token_is_header:
slice_positions.append(1) for end_of_header in range(0, len(block_tokens) - 1):
if not isinstance(block_tokens[end_of_header + 1], BlankLine):
continue
slice_positions.append(end_of_header)
is_first_slice_part_of_parent_shard = 0 not in slice_positions is_first_slice_part_of_parent_shard = 0 not in slice_positions
sliced_by_heading_level = split_at(block_tokens, slice_positions) sliced_by_heading_level = split_at(block_tokens, slice_positions)

View file

@ -159,13 +159,6 @@ class TestParseProcess:
start_line=1, start_line=1,
end_line=4, end_line=4,
children=[ children=[
Shard(
markers=[],
tags=[],
start_line=2,
end_line=2,
children=[],
),
Shard( Shard(
markers=["Marker1"], markers=["Marker1"],
tags=["Tag2"], tags=["Tag2"],