From 695a28e715df98b673ed550c58f0c4e616131bac Mon Sep 17 00:00:00 2001
From: Konstantin Fickel <mail@konstantinfickel.de>
Date: Fri, 20 Jun 2025 16:26:17 +0200
Subject: [PATCH] chore: refactor and simplify parsing

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
---
 src/streamer/parse.py | 47 ++++++++++++++++++++++++-------------------
 test/test_parse.py    |  4 ++--
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/streamer/parse.py b/src/streamer/parse.py
index 5ba66f9..326b537 100644
--- a/src/streamer/parse.py
+++ b/src/streamer/parse.py
@@ -91,6 +91,9 @@ def split_at(list_to_be_split: list[T], positions: list[int]):
 def to_shard(
     tokens: list[Token], start_line: int, end_line: int, children: list[Shard] = []
 ) -> Shard:
+    if len(children) == 1 and len(tokens) == 0:
+        return children[0]
+
     markers, tags = extract_markers_and_tags(tokens) if len(tokens) > 0 else ([], [])
 
     return Shard(
@@ -102,6 +105,28 @@ def to_shard(
     )
 
 
+def parse_paragraph_shards(block_tokens: list[BlockToken], end_line: int) -> Shard:
+    start_line = block_tokens[0].line_number
+    shard_starts = find_shard_positions(block_tokens)
+
+    child_shards: list[Shard] = []
+    own_elements: list[BlockToken] = []
+
+    for i in range(len(block_tokens)):
+        token = block_tokens[i]
+        if i in shard_starts:
+            shard_end_line = (
+                block_tokens[i + 1].line_number - 1
+                if i + 1 < len(block_tokens)
+                else end_line
+            )
+            child_shards.append(to_shard([token], token.line_number, shard_end_line))
+        else:
+            own_elements.append(token)
+
+    return to_shard(own_elements, start_line, end_line, children=child_shards)
+
+
 def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
     shard = None
     with TagMarkdownRenderer():
@@ -109,27 +134,7 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
         line_count = len(file_content.splitlines())
 
         if block_tokens := ast.children:
-            shard_starts = find_shard_positions(block_tokens)
-
-            child_shards: list[Shard] = []
-            own_elements: list[BlockToken] = []
-
-            for i in range(len(block_tokens)):
-                token = block_tokens[i]
-                if i in shard_starts:
-                    end_line = (
-                        block_tokens[i + 1].line_number - 1
-                        if i + 1 < len(block_tokens)
-                        else line_count
-                    )
-                    child_shards.append(to_shard([token], token.line_number, end_line))
-                else:
-                    own_elements.append(token)
-
-            if len(child_shards) == 1 and len(own_elements) == 0:
-                shard = child_shards[0]
-            else:
-                shard = to_shard(own_elements, 1, line_count, children=child_shards)
+            shard = parse_paragraph_shards(block_tokens, line_count)
 
     return StreamFile(shard=shard, filename=file_name)
 
diff --git a/test/test_parse.py b/test/test_parse.py
index 2b5b4e0..b9f24bb 100644
--- a/test/test_parse.py
+++ b/test/test_parse.py
@@ -26,14 +26,14 @@ class TestParseProcess:
         )
 
     def test_parse_basic_multi_line_file(self):
-        test_file = "Hello World\nHello again!"
+        test_file = "Hello World\n\nHello again!"
         assert parse_markdown_file(self.file_name, test_file) == StreamFile(
             filename=self.file_name,
             shard=Shard(
                 markers=[],
                 tags=[],
                 start_line=1,
-                end_line=2,
+                end_line=3,
                 children=[],
             ),
         )