streamd/test/parse/test_parse.py
Konstantin Fickel 79095bad4a
refactor: store file in position, rename filename to file_name
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
2026-01-31 17:22:05 +01:00

344 lines
11 KiB
Python

from faker import Faker
from streamer.parse import Shard, StreamFile, parse_markdown_file
fake = Faker()
class TestParseProcess:
file_name: str = fake.file_name(extension="md")
def test_parse_empty_file(self):
assert parse_markdown_file(self.file_name, "") == StreamFile(
file_name=self.file_name, shard=Shard(start_line=1, end_line=1)
)
def test_parse_basic_one_line_file(self):
test_file = "Hello World"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
file_name=self.file_name,
shard=Shard(
start_line=1,
end_line=1,
),
)
def test_parse_basic_multi_line_file(self):
test_file = "Hello World\n\nHello again!"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
file_name=self.file_name,
shard=Shard(
start_line=1,
end_line=3,
),
)
def test_parse_single_line_with_tag(self):
test_file = "@Tag Hello World"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
file_name=self.file_name,
shard=Shard(
markers=["Tag"],
start_line=1,
end_line=1,
),
)
def test_parse_single_line_with_two_tags(self):
test_file = "@Marker1 @Marker2 Hello World"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
file_name=self.file_name,
shard=Shard(
markers=["Marker1", "Marker2"],
start_line=1,
end_line=1,
),
)
def test_parse_single_line_with_two_tags_and_misplaced_tag(self):
test_file = "@Tag1 @Tag2 Hello World @Tag3"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
file_name=self.file_name,
shard=Shard(
markers=["Tag1", "Tag2"],
tags=["Tag3"],
start_line=1,
end_line=1,
),
)
def test_parse_split_paragraphs_into_shards(self):
file_text = "Hello World!\n\n@Tag1 Block 1\n\n@Tag2 Block 2"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
file_name=self.file_name,
shard=Shard(
start_line=1,
end_line=5,
children=[
Shard(
markers=["Tag1"],
start_line=3,
end_line=3,
),
Shard(
markers=["Tag2"],
start_line=5,
end_line=5,
),
],
),
)
def test_parse_split_paragraph_with_inner_tags_at_more_positions(self):
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
tags=["Tag1", "Tag2"],
start_line=1,
end_line=5,
children=[
Shard(markers=["Marker"], start_line=3, end_line=3, children=[]),
],
)
def test_parse_header_without_markers(self):
file_text = "# Heading\n\n## Subheading"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
start_line=1,
end_line=3,
)
def test_parse_split_at_heading_if_marker_on_subheading(self):
file_text = "# Heading @Tag1\n\n## @Marker1 Subheading @Tag2\n\n# Heading @Tag3"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
file_name=self.file_name,
shard=Shard(
start_line=1,
end_line=5,
children=[
Shard(
tags=["Tag1"],
start_line=1,
end_line=4,
children=[
Shard(
markers=["Marker1"],
tags=["Tag2"],
start_line=3,
end_line=4,
),
],
),
Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]),
],
),
)
def test_parse_only_parse_releveant_levels(self):
file_text = "# @Marker1 Heading @Tag1\n\n## Subheading @Tag2"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
file_name=self.file_name,
shard=Shard(
markers=["Marker1"],
tags=["Tag1", "Tag2"],
start_line=1,
end_line=3,
),
)
def test_parse_fullly_before_headings_start(self):
file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
start_line=1,
end_line=5,
children=[
Shard(
start_line=1,
end_line=4,
children=[
Shard(
markers=["Marker1"],
start_line=3,
end_line=3,
)
],
),
Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]),
],
)
def test_parse_complex_heading_structure(self):
file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
start_line=1,
end_line=7,
children=[
Shard(
start_line=1,
end_line=2,
children=[
Shard(
tags=["Preamble"],
start_line=1,
end_line=1,
),
Shard(
markers=["Intro"],
start_line=2,
end_line=2,
),
],
),
Shard(
markers=["Title"],
start_line=3,
end_line=7,
children=[
Shard(
markers=["Chapter1"],
start_line=4,
end_line=4,
),
Shard(
markers=["Chapter2"],
start_line=5,
end_line=7,
),
],
),
],
)
def test_simple_list(self):
file_text = "* hello world\n * @Marker i've got a marker"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1,
end_line=2,
children=[
Shard(
markers=["Marker"], tags=[], start_line=2, end_line=2, children=[]
)
],
)
def test_parse_complex_list(self):
file_text = """* I'm the parent!
* @Marker1 I've got a marker\n
* I've got no marker!
* I've got a child with a marker!
* @Marker2 I'm the child with the marker
"""
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1,
end_line=6,
children=[
Shard(
markers=[],
tags=[],
start_line=2,
end_line=6,
children=[
Shard(
markers=["Marker1"],
tags=[],
start_line=2,
end_line=3,
children=[],
),
Shard(
markers=[],
tags=[],
start_line=5,
end_line=6,
children=[
Shard(
markers=["Marker2"],
tags=[],
start_line=6,
end_line=6,
children=[],
)
],
),
],
)
],
)
def test_parse_ignores_tags_in_code(self):
file_text = "```\n@Marker\n```"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1,
end_line=3,
children=[],
)
def test_parse_finds_tags_in_italic_text(self):
file_text = "*@ItalicMarker*"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=["ItalicMarker"],
tags=[],
start_line=1,
end_line=1,
children=[],
)
def test_parse_finds_tags_in_bold_text(self):
file_text = "**@BoldMarker**"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=["BoldMarker"],
tags=[],
start_line=1,
end_line=1,
children=[],
)
def test_parse_finds_tags_in_strikethrough_text(self):
file_text = "~~@StrikeMarker~~"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=["StrikeMarker"],
tags=[],
start_line=1,
end_line=1,
children=[],
)
def test_parse_finds_tags_in_link(self):
file_text = "[@LinkMarker](https://konstantinfickel.de)"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=["LinkMarker"],
tags=[],
start_line=1,
end_line=1,
children=[],
)
def test_parse_continues_looking_for_markers_after_first_link_marker(self):
file_text = "[@LinkMarker1](https://konstantinfickel.de1) [@LinkMarker2](https://konstantinfickel.de)"
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=["LinkMarker1", "LinkMarker2"],
tags=[],
start_line=1,
end_line=1,
children=[],
)