From 082c13b0464df808add16da821e3fd3cff3d3752 Mon Sep 17 00:00:00 2001
From: Konstantin Fickel <mail@konstantinfickel.de>
Date: Sat, 21 Jun 2025 16:23:04 +0200
Subject: [PATCH] chore: cleanup, make pyright a bit happier

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
---
 src/streamer/parse.py | 55 ++++++++++++++++++++++++++-----------------
 test/test_parse.py    | 54 +++---------------------------------------
 2 files changed, 37 insertions(+), 72 deletions(-)

diff --git a/src/streamer/parse.py b/src/streamer/parse.py
index 197c498..693960e 100644
--- a/src/streamer/parse.py
+++ b/src/streamer/parse.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
-from itertools import takewhile, dropwhile
-from typing import Optional, TypeVar
+from typing import Iterable, Optional, TypeVar
 from pydantic import BaseModel
 from mistletoe import Document
 from mistletoe.markdown_renderer import MarkdownRenderer, Fragment
@@ -42,30 +41,43 @@ class StreamFile(BaseModel):
 T = TypeVar("T")
 
 
-def extract_tags(tokens: list[Token]) -> list[str]:
-    tags: iter[Tag] = filter(lambda token: isinstance(token, Tag), tokens)
-    return list(map(lambda marker: marker.content, tags))
+def get_line_number(block_token: BlockToken) -> int:
+    return block_token.line_number  # type: ignore
 
 
-def extract_markers_and_tags(tokens: BlockToken) -> tuple[list[str], list[str]]:
-    def marker_boundary_check(token: Token):
-        return isinstance(token, Tag) or (
-            isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)
-        )
+def extract_tags(tokens: Iterable[Token]) -> list[str]:
+    return [token.content for token in tokens if isinstance(token, Tag)]
 
-    marker_region = takewhile(marker_boundary_check, tokens.children)
-    tag_region = dropwhile(marker_boundary_check, tokens.children)
 
-    return extract_tags(marker_region), extract_tags(tag_region)
+def extract_markers_and_tags(block_token: BlockToken) -> tuple[list[str], list[str]]:
+    markers, tags = [], []
+    is_marker = True
+
+    if block_token.children is None:
+        return [], []
+
+    for token in block_token.children:
+        if isinstance(token, Tag):
+            if is_marker:
+                markers.append(token)
+            else:
+                tags.append(token)
+        elif not (isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)):
+            is_marker = False
+
+    return extract_tags(markers), extract_tags(tags)
 
 
 def has_markers(block_token: BlockToken) -> bool:
+    if block_token.children is None:
+        return False
+
     for child in block_token.children:
         if isinstance(child, Tag):
             return True
-        if isinstance(child, RawText) and re.match(r"^[\s]*$", child.content):
-            continue
-        return False
+        elif not (isinstance(child, RawText) and re.match(r"^[\s]*$", child.content)):
+            return False
+    return False
 
 
 def find_paragraph_shard_positions(block_tokens: list[BlockToken]) -> list[int]:
@@ -158,9 +170,9 @@ def parse_paragraph_shards(
             is_first_block_heading = i == 0
 
         if i in paragraph_positions or (i == 0 and is_first_block_heading):
-            child_start_line = token.line_number
+            child_start_line = get_line_number(token)
             child_end_line = (
-                block_tokens[i + 1].line_number - 1
+                get_line_number(block_tokens[i + 1]) - 1
                 if i + 1 < len(block_tokens)
                 else end_line
             )
@@ -201,9 +213,9 @@ def parse_header_shards(
 
     children = []
     for i, child_blocks in enumerate(block_tokens_split_by_heading):
-        child_start_line = child_blocks[0].line_number
+        child_start_line = get_line_number(child_blocks[0])
         child_end_line = (
-            block_tokens_split_by_heading[i + 1][0].line_number - 1
+            get_line_number(block_tokens_split_by_heading[i + 1][0]) - 1
             if i + 1 < len(block_tokens_split_by_heading)
             else end_line
         )
@@ -227,7 +239,8 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
     with TagMarkdownRenderer():
         ast = Document(file_content)
 
-        if block_tokens := ast.children:
+        block_tokens: list[BlockToken] = ast.children  # type: ignore
+        if len(block_tokens) > 0:
             if parsed_shard := parse_header_shards(
                 block_tokens, shard.start_line, shard.end_line
             ):
diff --git a/test/test_parse.py b/test/test_parse.py
index 4fd4d56..e7b6562 100644
--- a/test/test_parse.py
+++ b/test/test_parse.py
@@ -27,11 +27,8 @@ class TestParseProcess:
         assert parse_markdown_file(self.file_name, test_file) == StreamFile(
             filename=self.file_name,
             shard=Shard(
-                markers=[],
-                tags=[],
                 start_line=1,
                 end_line=3,
-                children=[],
             ),
         )
 
@@ -41,10 +38,8 @@ class TestParseProcess:
             filename=self.file_name,
             shard=Shard(
                 markers=["Tag"],
-                tags=[],
                 start_line=1,
                 end_line=1,
-                children=[],
             ),
         )
 
@@ -54,10 +49,8 @@ class TestParseProcess:
             filename=self.file_name,
             shard=Shard(
                 markers=["Tag1", "Tag2"],
-                tags=[],
                 start_line=1,
                 end_line=1,
-                children=[],
             ),
         )
 
@@ -70,7 +63,6 @@ class TestParseProcess:
                 tags=["Tag3"],
                 start_line=1,
                 end_line=1,
-                children=[],
             ),
         )
 
@@ -80,24 +72,18 @@ class TestParseProcess:
         assert parse_markdown_file(self.file_name, file_text) == StreamFile(
             filename=self.file_name,
             shard=Shard(
-                markers=[],
-                tags=[],
                 start_line=1,
                 end_line=5,
                 children=[
                     Shard(
                         markers=["Tag1"],
-                        tags=[],
                         start_line=3,
                         end_line=3,
-                        children=[],
                     ),
                     Shard(
                         markers=["Tag2"],
-                        tags=[],
                         start_line=5,
                         end_line=5,
-                        children=[],
                     ),
                 ],
             ),
@@ -107,14 +93,11 @@ class TestParseProcess:
         file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
 
         assert parse_markdown_file(self.file_name, file_text).shard == Shard(
-            markers=[],
             tags=["Tag1", "Tag2"],
             start_line=1,
             end_line=5,
             children=[
-                Shard(
-                    markers=["Marker"], tags=[], start_line=3, end_line=3, children=[]
-                ),
+                Shard(markers=["Marker"], start_line=3, end_line=3, children=[]),
             ],
         )
 
@@ -122,11 +105,8 @@ class TestParseProcess:
         file_text = "# Heading\n\n## Subheading"
 
         assert parse_markdown_file(self.file_name, file_text).shard == Shard(
-            markers=[],
-            tags=[],
             start_line=1,
             end_line=3,
-            children=[],
         )
 
     def test_parse_split_at_heading_if_marker_on_subheading(self):
@@ -135,13 +115,10 @@ class TestParseProcess:
         assert parse_markdown_file(self.file_name, file_text) == StreamFile(
             filename=self.file_name,
             shard=Shard(
-                markers=[],
-                tags=[],
                 start_line=1,
                 end_line=5,
                 children=[
                     Shard(
-                        markers=[],
                         tags=["Tag1"],
                         start_line=1,
                         end_line=4,
@@ -151,13 +128,10 @@ class TestParseProcess:
                                 tags=["Tag2"],
                                 start_line=3,
                                 end_line=4,
-                                children=[],
                             ),
                         ],
                     ),
-                    Shard(
-                        markers=[], tags=["Tag3"], start_line=5, end_line=5, children=[]
-                    ),
+                    Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]),
                 ],
             ),
         )
@@ -172,7 +146,6 @@ class TestParseProcess:
                 tags=["Tag1", "Tag2"],
                 start_line=1,
                 end_line=3,
-                children=[],
             ),
         )
 
@@ -180,29 +153,21 @@ class TestParseProcess:
         file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
 
         assert parse_markdown_file(self.file_name, file_text).shard == Shard(
-            markers=[],
-            tags=[],
             start_line=1,
             end_line=5,
             children=[
                 Shard(
-                    markers=[],
-                    tags=[],
                     start_line=1,
                     end_line=4,
                     children=[
                         Shard(
                             markers=["Marker1"],
-                            tags=[],
                             start_line=3,
                             end_line=3,
-                            children=[],
                         )
                     ],
                 ),
-                Shard(
-                    markers=["Marker2"], tags=[], start_line=5, end_line=5, children=[]
-                ),
+                Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]),
             ],
         )
 
@@ -210,52 +175,39 @@ class TestParseProcess:
         file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
 
         assert parse_markdown_file(self.file_name, file_text).shard == Shard(
-            markers=[],
-            tags=[],
             start_line=1,
             end_line=7,
             children=[
                 Shard(
-                    markers=[],
-                    tags=[],
                     start_line=1,
                     end_line=2,
                     children=[
                         Shard(
-                            markers=[],
                             tags=["Preamble"],
                             start_line=1,
                             end_line=1,
-                            children=[],
                         ),
                         Shard(
                             markers=["Intro"],
-                            tags=[],
                             start_line=2,
                             end_line=2,
-                            children=[],
                         ),
                     ],
                 ),
                 Shard(
                     markers=["Title"],
-                    tags=[],
                     start_line=3,
                     end_line=7,
                     children=[
                         Shard(
                             markers=["Chapter1"],
-                            tags=[],
                             start_line=4,
                             end_line=4,
-                            children=[],
                         ),
                         Shard(
                             markers=["Chapter2"],
-                            tags=[],
                             start_line=5,
                             end_line=7,
-                            children=[],
                         ),
                     ],
                 ),