from faker import Faker from streamer.parse import Shard, StreamFile, parse_markdown_file fake = Faker() class TestParseProcess: file_name: str = fake.file_name(extension="md") def test_parse_empty_file(self): assert parse_markdown_file(self.file_name, "") == StreamFile( filename=self.file_name, shard=Shard(start_line=1, end_line=1) ) def test_parse_basic_one_line_file(self): test_file = "Hello World" assert parse_markdown_file(self.file_name, test_file) == StreamFile( filename=self.file_name, shard=Shard( start_line=1, end_line=1, ), ) def test_parse_basic_multi_line_file(self): test_file = "Hello World\n\nHello again!" assert parse_markdown_file(self.file_name, test_file) == StreamFile( filename=self.file_name, shard=Shard( start_line=1, end_line=3, ), ) def test_parse_single_line_with_tag(self): test_file = "@Tag Hello World" assert parse_markdown_file(self.file_name, test_file) == StreamFile( filename=self.file_name, shard=Shard( markers=["Tag"], start_line=1, end_line=1, ), ) def test_parse_single_line_with_two_tags(self): test_file = "@Marker1 @Marker2 Hello World" assert parse_markdown_file(self.file_name, test_file) == StreamFile( filename=self.file_name, shard=Shard( markers=["Marker1", "Marker2"], start_line=1, end_line=1, ), ) def test_parse_single_line_with_two_tags_and_misplaced_tag(self): test_file = "@Tag1 @Tag2 Hello World @Tag3" assert parse_markdown_file(self.file_name, test_file) == StreamFile( filename=self.file_name, shard=Shard( markers=["Tag1", "Tag2"], tags=["Tag3"], start_line=1, end_line=1, ), ) def test_parse_split_paragraphs_into_shards(self): file_text = "Hello World!\n\n@Tag1 Block 1\n\n@Tag2 Block 2" assert parse_markdown_file(self.file_name, file_text) == StreamFile( filename=self.file_name, shard=Shard( start_line=1, end_line=5, children=[ Shard( markers=["Tag1"], start_line=3, end_line=3, ), Shard( markers=["Tag2"], start_line=5, end_line=5, ), ], ), ) def test_parse_split_paragraph_with_inner_tags_at_more_positions(self): file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2" assert parse_markdown_file(self.file_name, file_text).shard == Shard( tags=["Tag1", "Tag2"], start_line=1, end_line=5, children=[ Shard(markers=["Marker"], start_line=3, end_line=3, children=[]), ], ) def test_parse_header_without_markers(self): file_text = "# Heading\n\n## Subheading" assert parse_markdown_file(self.file_name, file_text).shard == Shard( start_line=1, end_line=3, ) def test_parse_split_at_heading_if_marker_on_subheading(self): file_text = "# Heading @Tag1\n\n## @Marker1 Subheading @Tag2\n\n# Heading @Tag3" assert parse_markdown_file(self.file_name, file_text) == StreamFile( filename=self.file_name, shard=Shard( start_line=1, end_line=5, children=[ Shard( tags=["Tag1"], start_line=1, end_line=4, children=[ Shard( markers=["Marker1"], tags=["Tag2"], start_line=3, end_line=4, ), ], ), Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]), ], ), ) def test_parse_only_parse_releveant_levels(self): file_text = "# @Marker1 Heading @Tag1\n\n## Subheading @Tag2" assert parse_markdown_file(self.file_name, file_text) == StreamFile( filename=self.file_name, shard=Shard( markers=["Marker1"], tags=["Tag1", "Tag2"], start_line=1, end_line=3, ), ) def test_parse_fullly_before_headings_start(self): file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!" assert parse_markdown_file(self.file_name, file_text).shard == Shard( start_line=1, end_line=5, children=[ Shard( start_line=1, end_line=4, children=[ Shard( markers=["Marker1"], start_line=3, end_line=3, ) ], ), Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]), ], ) def test_parse_complex_heading_structure(self): file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2" assert parse_markdown_file(self.file_name, file_text).shard == Shard( start_line=1, end_line=7, children=[ Shard( start_line=1, end_line=2, children=[ Shard( tags=["Preamble"], start_line=1, end_line=1, ), Shard( markers=["Intro"], start_line=2, end_line=2, ), ], ), Shard( markers=["Title"], start_line=3, end_line=7, children=[ Shard( markers=["Chapter1"], start_line=4, end_line=4, ), Shard( markers=["Chapter2"], start_line=5, end_line=7, ), ], ), ], ) def test_simple_list(self): file_text = "* hello world\n * @Marker i've got a marker" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=[], tags=[], start_line=1, end_line=2, children=[ Shard( markers=["Marker"], tags=[], start_line=2, end_line=2, children=[] ) ], ) def test_parse_complex_list(self): file_text = """* I'm the parent! * @Marker1 I've got a marker\n * I've got no marker! * I've got a child with a marker! * @Marker2 I'm the child with the marker """ assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=[], tags=[], start_line=1, end_line=6, children=[ Shard( markers=[], tags=[], start_line=2, end_line=6, children=[ Shard( markers=["Marker1"], tags=[], start_line=2, end_line=3, children=[], ), Shard( markers=[], tags=[], start_line=5, end_line=6, children=[ Shard( markers=["Marker2"], tags=[], start_line=6, end_line=6, children=[], ) ], ), ], ) ], ) def test_parse_ignores_tags_in_code(self): file_text = "```\n@Marker\n```" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=[], tags=[], start_line=1, end_line=3, children=[], ) def test_parse_finds_tags_in_italic_text(self): file_text = "*@ItalicMarker*" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=["ItalicMarker"], tags=[], start_line=1, end_line=1, children=[], ) def test_parse_finds_tags_in_bold_text(self): file_text = "**@BoldMarker**" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=["BoldMarker"], tags=[], start_line=1, end_line=1, children=[], ) def test_parse_finds_tags_in_strikethrough_text(self): file_text = "~~@StrikeMarker~~" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=["StrikeMarker"], tags=[], start_line=1, end_line=1, children=[], ) def test_parse_finds_tags_in_link(self): file_text = "[@LinkMarker](https://konstantinfickel.de)" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=["LinkMarker"], tags=[], start_line=1, end_line=1, children=[], ) def test_parse_continues_looking_for_markers_after_first_link_marker(self): file_text = "[@LinkMarker1](https://konstantinfickel.de1) [@LinkMarker2](https://konstantinfickel.de)" assert parse_markdown_file(self.file_name, file_text).shard == Shard( markers=["LinkMarker1", "LinkMarker2"], tags=[], start_line=1, end_line=1, children=[], )