feat: add attach_markdown
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
parent
6bc92ef59c
commit
1ce0790c0c
3 changed files with 80 additions and 3 deletions
76
test/parse/test_attach_markdown.py
Normal file
76
test/parse/test_attach_markdown.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
from faker import Faker
|
||||
|
||||
from streamer.parse import Shard, StreamFile
|
||||
from streamer.parse.attach_markdown import (
|
||||
ShardWithMarkdown,
|
||||
StreamFileWithMarkdown,
|
||||
attach_markdown,
|
||||
)
|
||||
|
||||
fake = Faker()
|
||||
|
||||
|
||||
class TestAttachMarkdown:
|
||||
file_name: str = fake.file_name(extension="md")
|
||||
|
||||
def test_attach_markdown_with_shard(self):
|
||||
markdown_text = "Hello World\n\nThis is a test."
|
||||
shard = Shard(start_line=1, end_line=3)
|
||||
stream_file = StreamFile(filename=self.file_name, shard=shard)
|
||||
|
||||
result = attach_markdown(stream_file, markdown_text)
|
||||
|
||||
assert result == StreamFileWithMarkdown(
|
||||
filename=self.file_name,
|
||||
shard=ShardWithMarkdown(
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
markdown_content=markdown_text,
|
||||
markers=[],
|
||||
tags=[],
|
||||
children=[],
|
||||
),
|
||||
)
|
||||
|
||||
def test_attach_markdown_without_shard(self):
|
||||
stream_file = StreamFile(filename=self.file_name, shard=None)
|
||||
|
||||
result = attach_markdown(stream_file, "Some markdown text")
|
||||
|
||||
assert result == StreamFileWithMarkdown(filename=self.file_name, shard=None)
|
||||
|
||||
def test_attach_markdown_with_nested_shards(self):
|
||||
markdown_text = "Header\n\n@Marker1 Content 1\n\n@Marker2 Content 2"
|
||||
shard = Shard(
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(markers=["Marker1"], start_line=3, end_line=3),
|
||||
Shard(markers=["Marker2"], start_line=5, end_line=5),
|
||||
],
|
||||
)
|
||||
stream_file = StreamFile(filename=self.file_name, shard=shard)
|
||||
|
||||
result = attach_markdown(stream_file, markdown_text)
|
||||
|
||||
assert result.filename == self.file_name
|
||||
assert result.shard is not None
|
||||
assert result.shard.start_line == 1
|
||||
assert result.shard.end_line == 5
|
||||
assert (
|
||||
result.shard.markdown_content
|
||||
== "Header\n\n@Marker1 Content 1\n\n@Marker2 Content 2"
|
||||
)
|
||||
assert len(result.shard.children) == 2
|
||||
|
||||
# Check first child
|
||||
assert result.shard.children[0].markers == ["Marker1"]
|
||||
assert result.shard.children[0].start_line == 3
|
||||
assert result.shard.children[0].end_line == 3
|
||||
assert result.shard.children[0].markdown_content == "@Marker1 Content 1"
|
||||
|
||||
# Check second child
|
||||
assert result.shard.children[1].markers == ["Marker2"]
|
||||
assert result.shard.children[1].start_line == 5
|
||||
assert result.shard.children[1].end_line == 5
|
||||
assert result.shard.children[1].markdown_content == "@Marker2 Content 2"
|
||||
344
test/parse/test_parse.py
Normal file
344
test/parse/test_parse.py
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
from faker import Faker
|
||||
|
||||
from streamer.parse import Shard, StreamFile, parse_markdown_file
|
||||
|
||||
fake = Faker()
|
||||
|
||||
|
||||
class TestParseProcess:
|
||||
file_name: str = fake.file_name(extension="md")
|
||||
|
||||
def test_parse_empty_file(self):
|
||||
assert parse_markdown_file(self.file_name, "") == StreamFile(
|
||||
filename=self.file_name, shard=Shard(start_line=1, end_line=1)
|
||||
)
|
||||
|
||||
def test_parse_basic_one_line_file(self):
|
||||
test_file = "Hello World"
|
||||
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_basic_multi_line_file(self):
|
||||
test_file = "Hello World\n\nHello again!"
|
||||
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_single_line_with_tag(self):
|
||||
test_file = "@Tag Hello World"
|
||||
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=["Tag"],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_single_line_with_two_tags(self):
|
||||
test_file = "@Marker1 @Marker2 Hello World"
|
||||
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=["Marker1", "Marker2"],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_single_line_with_two_tags_and_misplaced_tag(self):
|
||||
test_file = "@Tag1 @Tag2 Hello World @Tag3"
|
||||
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=["Tag1", "Tag2"],
|
||||
tags=["Tag3"],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_split_paragraphs_into_shards(self):
|
||||
file_text = "Hello World!\n\n@Tag1 Block 1\n\n@Tag2 Block 2"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Tag1"],
|
||||
start_line=3,
|
||||
end_line=3,
|
||||
),
|
||||
Shard(
|
||||
markers=["Tag2"],
|
||||
start_line=5,
|
||||
end_line=5,
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_split_paragraph_with_inner_tags_at_more_positions(self):
|
||||
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
tags=["Tag1", "Tag2"],
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(markers=["Marker"], start_line=3, end_line=3, children=[]),
|
||||
],
|
||||
)
|
||||
|
||||
def test_parse_header_without_markers(self):
|
||||
file_text = "# Heading\n\n## Subheading"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
)
|
||||
|
||||
def test_parse_split_at_heading_if_marker_on_subheading(self):
|
||||
file_text = "# Heading @Tag1\n\n## @Marker1 Subheading @Tag2\n\n# Heading @Tag3"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
tags=["Tag1"],
|
||||
start_line=1,
|
||||
end_line=4,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker1"],
|
||||
tags=["Tag2"],
|
||||
start_line=3,
|
||||
end_line=4,
|
||||
),
|
||||
],
|
||||
),
|
||||
Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_only_parse_releveant_levels(self):
|
||||
file_text = "# @Marker1 Heading @Tag1\n\n## Subheading @Tag2"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=["Marker1"],
|
||||
tags=["Tag1", "Tag2"],
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_fullly_before_headings_start(self):
|
||||
file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
start_line=1,
|
||||
end_line=4,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker1"],
|
||||
start_line=3,
|
||||
end_line=3,
|
||||
)
|
||||
],
|
||||
),
|
||||
Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]),
|
||||
],
|
||||
)
|
||||
|
||||
def test_parse_complex_heading_structure(self):
|
||||
file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
start_line=1,
|
||||
end_line=7,
|
||||
children=[
|
||||
Shard(
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
children=[
|
||||
Shard(
|
||||
tags=["Preamble"],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
),
|
||||
Shard(
|
||||
markers=["Intro"],
|
||||
start_line=2,
|
||||
end_line=2,
|
||||
),
|
||||
],
|
||||
),
|
||||
Shard(
|
||||
markers=["Title"],
|
||||
start_line=3,
|
||||
end_line=7,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Chapter1"],
|
||||
start_line=4,
|
||||
end_line=4,
|
||||
),
|
||||
Shard(
|
||||
markers=["Chapter2"],
|
||||
start_line=5,
|
||||
end_line=7,
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
def test_simple_list(self):
|
||||
file_text = "* hello world\n * @Marker i've got a marker"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker"], tags=[], start_line=2, end_line=2, children=[]
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def test_parse_complex_list(self):
|
||||
file_text = """* I'm the parent!
|
||||
* @Marker1 I've got a marker\n
|
||||
* I've got no marker!
|
||||
* I've got a child with a marker!
|
||||
* @Marker2 I'm the child with the marker
|
||||
"""
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=6,
|
||||
children=[
|
||||
Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=2,
|
||||
end_line=6,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker1"],
|
||||
tags=[],
|
||||
start_line=2,
|
||||
end_line=3,
|
||||
children=[],
|
||||
),
|
||||
Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=5,
|
||||
end_line=6,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker2"],
|
||||
tags=[],
|
||||
start_line=6,
|
||||
end_line=6,
|
||||
children=[],
|
||||
)
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def test_parse_ignores_tags_in_code(self):
|
||||
file_text = "```\n@Marker\n```"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
children=[],
|
||||
)
|
||||
|
||||
def test_parse_finds_tags_in_italic_text(self):
|
||||
file_text = "*@ItalicMarker*"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=["ItalicMarker"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
)
|
||||
|
||||
def test_parse_finds_tags_in_bold_text(self):
|
||||
file_text = "**@BoldMarker**"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=["BoldMarker"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
)
|
||||
|
||||
def test_parse_finds_tags_in_strikethrough_text(self):
|
||||
file_text = "~~@StrikeMarker~~"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=["StrikeMarker"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
)
|
||||
|
||||
def test_parse_finds_tags_in_link(self):
|
||||
file_text = "[@LinkMarker](https://konstantinfickel.de)"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=["LinkMarker"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
)
|
||||
|
||||
def test_parse_continues_looking_for_markers_after_first_link_marker(self):
|
||||
file_text = "[@LinkMarker1](https://konstantinfickel.de1) [@LinkMarker2](https://konstantinfickel.de)"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=["LinkMarker1", "LinkMarker2"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue