feat: fix heading & paragraph parsing

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
Konstantin Fickel 2025-06-21 16:02:16 +02:00
parent de99ffe83e
commit 0f645e7e9b
2 changed files with 228 additions and 202 deletions

View file

@ -9,7 +9,7 @@ class TestParseProcess:
def test_parse_empty_file(self):
assert parse_markdown_file(self.file_name, "") == StreamFile(
filename=self.file_name, shard=None
filename=self.file_name, shard=Shard(start_line=1, end_line=1)
)
def test_parse_basic_one_line_file(self):
@ -17,11 +17,8 @@ class TestParseProcess:
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=1,
children=[],
),
)
@ -109,37 +106,27 @@ class TestParseProcess:
def test_parse_split_paragraph_with_inner_tags_at_more_positions(self):
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=["Tag1", "Tag2"],
start_line=1,
end_line=5,
children=[
Shard(
markers=["Marker"],
tags=[],
start_line=3,
end_line=3,
children=[],
),
],
),
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=["Tag1", "Tag2"],
start_line=1,
end_line=5,
children=[
Shard(
markers=["Marker"], tags=[], start_line=3, end_line=3, children=[]
),
],
)
def test_parse_header_without_markers(self):
file_text = "# Heading\n\n## Subheading"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=3,
children=[],
),
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1,
end_line=3,
children=[],
)
def test_parse_split_at_heading_if_marker_on_subheading(self):
@ -192,28 +179,85 @@ class TestParseProcess:
def test_continue_full_parsing_before_headings_start(self):
file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=5,
children=[
Shard(
markers=["Marker1"],
tags=[],
start_line=3,
end_line=3,
children=[],
),
Shard(
markers=["Marker2"],
tags=[],
start_line=5,
end_line=5,
children=[],
),
],
),
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1,
end_line=5,
children=[
Shard(
markers=[],
tags=[],
start_line=1,
end_line=4,
children=[
Shard(
markers=["Marker1"],
tags=[],
start_line=3,
end_line=3,
children=[],
)
],
),
Shard(
markers=["Marker2"], tags=[], start_line=5, end_line=5, children=[]
),
],
)
def test_complex_heading_structure(self):
file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1,
end_line=7,
children=[
Shard(
markers=[],
tags=[],
start_line=1,
end_line=2,
children=[
Shard(
markers=[],
tags=["Preamble"],
start_line=1,
end_line=1,
children=[],
),
Shard(
markers=["Intro"],
tags=[],
start_line=2,
end_line=2,
children=[],
),
],
),
Shard(
markers=["Title"],
tags=[],
start_line=3,
end_line=7,
children=[
Shard(
markers=["Chapter1"],
tags=[],
start_line=4,
end_line=4,
children=[],
),
Shard(
markers=["Chapter2"],
tags=[],
start_line=5,
end_line=7,
children=[],
),
],
),
],
)