feat: support splitting paragraphs into multiple shards

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
Konstantin Fickel 2025-06-20 15:34:02 +02:00
parent 2091e5c98d
commit 9b13370409
2 changed files with 86 additions and 19 deletions

View file

@ -19,7 +19,6 @@ class TestParseProcess:
shard=Shard(
markers=[],
tags=[],
content=test_file,
start_line=1,
end_line=1,
children=[],
@ -33,7 +32,6 @@ class TestParseProcess:
shard=Shard(
markers=[],
tags=[],
content=test_file,
start_line=1,
end_line=2,
children=[],
@ -47,7 +45,6 @@ class TestParseProcess:
shard=Shard(
markers=["Tag"],
tags=[],
content=test_file,
start_line=1,
end_line=1,
children=[],
@ -61,7 +58,6 @@ class TestParseProcess:
shard=Shard(
markers=["Tag1", "Tag2"],
tags=[],
content=test_file,
start_line=1,
end_line=1,
children=[],
@ -75,9 +71,37 @@ class TestParseProcess:
shard=Shard(
markers=["Tag1", "Tag2"],
tags=["Tag3"],
content=test_file,
start_line=1,
end_line=1,
children=[],
),
)
def test_parse_split_paragraphs_into_shards(self):
file_text = f"Hello World!\n\n@Tag1 Block 1\n\n@Tag2 Block 2"
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name,
shard=Shard(
markers=[],
tags=[],
start_line=1,
end_line=5,
children=[
Shard(
markers=["Tag1"],
tags=[],
start_line=3,
end_line=3,
children=[],
),
Shard(
markers=["Tag2"],
tags=[],
start_line=5,
end_line=5,
children=[],
),
],
),
)