230 lines
7.4 KiB
Python
230 lines
7.4 KiB
Python
from streamer.parse import StreamFile, parse_markdown_file, Shard
|
|
from faker import Faker
|
|
|
|
fake = Faker()
|
|
|
|
|
|
class TestParseProcess:
|
|
file_name: str = fake.file_name(extension="md")
|
|
|
|
def test_parse_empty_file(self):
|
|
assert parse_markdown_file(self.file_name, "") == StreamFile(
|
|
filename=self.file_name, shard=Shard(start_line=1, end_line=1)
|
|
)
|
|
|
|
def test_parse_basic_one_line_file(self):
|
|
test_file = "Hello World"
|
|
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
start_line=1,
|
|
end_line=1,
|
|
),
|
|
)
|
|
|
|
def test_parse_basic_multi_line_file(self):
|
|
test_file = "Hello World\n\nHello again!"
|
|
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
start_line=1,
|
|
end_line=3,
|
|
),
|
|
)
|
|
|
|
def test_parse_single_line_with_tag(self):
|
|
test_file = "@Tag Hello World"
|
|
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
markers=["Tag"],
|
|
start_line=1,
|
|
end_line=1,
|
|
),
|
|
)
|
|
|
|
def test_parse_single_line_with_two_tags(self):
|
|
test_file = "@Tag1 @Tag2 Hello World"
|
|
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
markers=["Tag1", "Tag2"],
|
|
start_line=1,
|
|
end_line=1,
|
|
),
|
|
)
|
|
|
|
def test_parse_single_line_with_two_tags_and_misplaced_tag(self):
|
|
test_file = "@Tag1 @Tag2 Hello World @Tag3"
|
|
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
markers=["Tag1", "Tag2"],
|
|
tags=["Tag3"],
|
|
start_line=1,
|
|
end_line=1,
|
|
),
|
|
)
|
|
|
|
def test_parse_split_paragraphs_into_shards(self):
|
|
file_text = "Hello World!\n\n@Tag1 Block 1\n\n@Tag2 Block 2"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
start_line=1,
|
|
end_line=5,
|
|
children=[
|
|
Shard(
|
|
markers=["Tag1"],
|
|
start_line=3,
|
|
end_line=3,
|
|
),
|
|
Shard(
|
|
markers=["Tag2"],
|
|
start_line=5,
|
|
end_line=5,
|
|
),
|
|
],
|
|
),
|
|
)
|
|
|
|
def test_parse_split_paragraph_with_inner_tags_at_more_positions(self):
|
|
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
|
tags=["Tag1", "Tag2"],
|
|
start_line=1,
|
|
end_line=5,
|
|
children=[
|
|
Shard(markers=["Marker"], start_line=3, end_line=3, children=[]),
|
|
],
|
|
)
|
|
|
|
def test_parse_header_without_markers(self):
|
|
file_text = "# Heading\n\n## Subheading"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
|
start_line=1,
|
|
end_line=3,
|
|
)
|
|
|
|
def test_parse_split_at_heading_if_marker_on_subheading(self):
|
|
file_text = "# Heading @Tag1\n\n## @Marker1 Subheading @Tag2\n\n# Heading @Tag3"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
start_line=1,
|
|
end_line=5,
|
|
children=[
|
|
Shard(
|
|
tags=["Tag1"],
|
|
start_line=1,
|
|
end_line=4,
|
|
children=[
|
|
Shard(
|
|
markers=["Marker1"],
|
|
tags=["Tag2"],
|
|
start_line=3,
|
|
end_line=4,
|
|
),
|
|
],
|
|
),
|
|
Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]),
|
|
],
|
|
),
|
|
)
|
|
|
|
def test_parse_only_parse_releveant_levels(self):
|
|
file_text = "# @Marker1 Heading @Tag1\n\n## Subheading @Tag2"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
|
filename=self.file_name,
|
|
shard=Shard(
|
|
markers=["Marker1"],
|
|
tags=["Tag1", "Tag2"],
|
|
start_line=1,
|
|
end_line=3,
|
|
),
|
|
)
|
|
|
|
def test_continue_full_parsing_before_headings_start(self):
|
|
file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
|
start_line=1,
|
|
end_line=5,
|
|
children=[
|
|
Shard(
|
|
start_line=1,
|
|
end_line=4,
|
|
children=[
|
|
Shard(
|
|
markers=["Marker1"],
|
|
start_line=3,
|
|
end_line=3,
|
|
)
|
|
],
|
|
),
|
|
Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]),
|
|
],
|
|
)
|
|
|
|
def test_complex_heading_structure(self):
|
|
file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
|
start_line=1,
|
|
end_line=7,
|
|
children=[
|
|
Shard(
|
|
start_line=1,
|
|
end_line=2,
|
|
children=[
|
|
Shard(
|
|
tags=["Preamble"],
|
|
start_line=1,
|
|
end_line=1,
|
|
),
|
|
Shard(
|
|
markers=["Intro"],
|
|
start_line=2,
|
|
end_line=2,
|
|
),
|
|
],
|
|
),
|
|
Shard(
|
|
markers=["Title"],
|
|
start_line=3,
|
|
end_line=7,
|
|
children=[
|
|
Shard(
|
|
markers=["Chapter1"],
|
|
start_line=4,
|
|
end_line=4,
|
|
),
|
|
Shard(
|
|
markers=["Chapter2"],
|
|
start_line=5,
|
|
end_line=7,
|
|
),
|
|
],
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_simple_list(self):
|
|
file_text = "* hello world\n * @Marker i've got a marker"
|
|
|
|
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
|
markers=[],
|
|
tags=[],
|
|
start_line=1,
|
|
end_line=2,
|
|
children=[
|
|
Shard(
|
|
markers=["Marker"], tags=[], start_line=2, end_line=2, children=[]
|
|
)
|
|
],
|
|
)
|