fix: handle empty block correctly

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
Konstantin Fickel 2025-06-20 21:59:05 +02:00
parent 63ce959d4c
commit de99ffe83e
2 changed files with 7 additions and 11 deletions

View file

@ -3,7 +3,7 @@ from itertools import takewhile, dropwhile
from typing import Optional, TypeVar
from pydantic import BaseModel
from mistletoe import Document
from mistletoe.markdown_renderer import MarkdownRenderer, Fragment
from mistletoe.markdown_renderer import MarkdownRenderer, Fragment, BlankLine
from mistletoe.span_token import SpanToken, RawText
from mistletoe.block_token import Paragraph, BlockToken, Heading
from mistletoe.token import Token
@ -75,10 +75,10 @@ def find_shard_positions(block_tokens: list[BlockToken]) -> list[int]:
]
T = TypeVar("T")
A = TypeVar("A")
def split_at(list_to_be_split: list[T], positions: list[int]):
def split_at(list_to_be_split: list[A], positions: list[int]):
positions = sorted(set([0, *positions, len(list_to_be_split)]))
return [list_to_be_split[left:right] for left, right in pairwise(positions)]
@ -202,7 +202,10 @@ def parse_header_shards(
slice_positions = find_heading_positions(block_tokens, header_level_for_slicing)
if first_token_is_header:
slice_positions.append(1)
for end_of_header in range(0, len(block_tokens) - 1):
if not isinstance(block_tokens[end_of_header + 1], BlankLine):
continue
slice_positions.append(end_of_header)
is_first_slice_part_of_parent_shard = 0 not in slice_positions
sliced_by_heading_level = split_at(block_tokens, slice_positions)

View file

@ -159,13 +159,6 @@ class TestParseProcess:
start_line=1,
end_line=4,
children=[
Shard(
markers=[],
tags=[],
start_line=2,
end_line=2,
children=[],
),
Shard(
markers=["Marker1"],
tags=["Tag2"],