streamd/test/test_parse.py
Konstantin Fickel 695a28e715 chore: refactor and simplify parsing
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
2025-06-22 12:52:15 +02:00

129 lines
4 KiB
Python

from streamer.parse import StreamFile, parse_markdown_file, Shard
from faker import Faker
fake = Faker()
class TestParseProcess:
file_name: str = fake.file_name(extension="md")
def test_parse_empty_file(self):
assert parse_markdown_file(self.file_name, "") == StreamFile(
filename=self.file_name, shard=None
)
def test_parse_basic_one_line_file(self):
test_file = "Hello World"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=1,
children=[],
),
)
def test_parse_basic_multi_line_file(self):
test_file = "Hello World\n\nHello again!"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=3,
children=[],
),
)
def test_parse_single_line_with_tag(self):
test_file = "@Tag Hello World"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=["Tag"],
tags=[],
start_line=1,
end_line=1,
children=[],
),
)
def test_parse_single_line_with_two_tags(self):
test_file = "@Tag1 @Tag2 Hello World"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=["Tag1", "Tag2"],
tags=[],
start_line=1,
end_line=1,
children=[],
),
)
def test_parse_single_line_with_two_tags_and_misplaced_tag(self):
test_file = "@Tag1 @Tag2 Hello World @Tag3"
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=["Tag1", "Tag2"],
tags=["Tag3"],
start_line=1,
end_line=1,
children=[],
),
)
def test_parse_split_paragraphs_into_shards(self):
file_text = "Hello World!\n\n@Tag1 Block 1\n\n@Tag2 Block 2"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=5,
children=[
Shard(
markers=["Tag1"],
tags=[],
start_line=3,
end_line=3,
children=[],
),
Shard(
markers=["Tag2"],
tags=[],
start_line=5,
end_line=5,
children=[],
),
],
),
)
def test_parse_split_paragraph_with_inner_tags_at_more_positions(self):
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=["Tag1", "Tag2"],
start_line=1,
end_line=5,
children=[
Shard(
markers=["Marker"],
tags=[],
start_line=3,
end_line=3,
children=[],
),
],
),
)