chore: cleanup, make pyright a bit happier

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
2025-06-21 16:23:04 +02:00 · 2025-06-21 16:23:04 +02:00 · 082c13b046
commit 082c13b046
parent 0f645e7e9b
2 changed files with 37 additions and 72 deletions
--- a/src/streamer/parse.py
+++ b/src/streamer/parse.py
@ -1,6 +1,5 @@
 from __future__ import annotations
-from itertools import takewhile, dropwhile
-from typing import Optional, TypeVar
+from typing import Iterable, Optional, TypeVar
 from pydantic import BaseModel
 from mistletoe import Document
 from mistletoe.markdown_renderer import MarkdownRenderer, Fragment
@ -42,30 +41,43 @@ class StreamFile(BaseModel):
 T = TypeVar("T")


-def extract_tags(tokens: list[Token]) -> list[str]:
-    tags: iter[Tag] = filter(lambda token: isinstance(token, Tag), tokens)
-    return list(map(lambda marker: marker.content, tags))
+def get_line_number(block_token: BlockToken) -> int:
+    return block_token.line_number  # type: ignore


-def extract_markers_and_tags(tokens: BlockToken) -> tuple[list[str], list[str]]:
-    def marker_boundary_check(token: Token):
-        return isinstance(token, Tag) or (
-            isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)
-        )
+def extract_tags(tokens: Iterable[Token]) -> list[str]:
+    return [token.content for token in tokens if isinstance(token, Tag)]

-    marker_region = takewhile(marker_boundary_check, tokens.children)
-    tag_region = dropwhile(marker_boundary_check, tokens.children)

-    return extract_tags(marker_region), extract_tags(tag_region)
+def extract_markers_and_tags(block_token: BlockToken) -> tuple[list[str], list[str]]:
+    markers, tags = [], []
+    is_marker = True
+
+    if block_token.children is None:
+        return [], []
+
+    for token in block_token.children:
+        if isinstance(token, Tag):
+            if is_marker:
+                markers.append(token)
+            else:
+                tags.append(token)
+        elif not (isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)):
+            is_marker = False
+
+    return extract_tags(markers), extract_tags(tags)


 def has_markers(block_token: BlockToken) -> bool:
+    if block_token.children is None:
+        return False
+
    for child in block_token.children:
        if isinstance(child, Tag):
            return True
-        if isinstance(child, RawText) and re.match(r"^[\s]*$", child.content):
-            continue
-        return False
+        elif not (isinstance(child, RawText) and re.match(r"^[\s]*$", child.content)):
+            return False
+    return False


 def find_paragraph_shard_positions(block_tokens: list[BlockToken]) -> list[int]:
@ -158,9 +170,9 @@ def parse_paragraph_shards(
            is_first_block_heading = i == 0

        if i in paragraph_positions or (i == 0 and is_first_block_heading):
-            child_start_line = token.line_number
+            child_start_line = get_line_number(token)
            child_end_line = (
-                block_tokens[i + 1].line_number - 1
+                get_line_number(block_tokens[i + 1]) - 1
                if i + 1 < len(block_tokens)
                else end_line
            )
@ -201,9 +213,9 @@ def parse_header_shards(

    children = []
    for i, child_blocks in enumerate(block_tokens_split_by_heading):
-        child_start_line = child_blocks[0].line_number
+        child_start_line = get_line_number(child_blocks[0])
        child_end_line = (
-            block_tokens_split_by_heading[i + 1][0].line_number - 1
+            get_line_number(block_tokens_split_by_heading[i + 1][0]) - 1
            if i + 1 < len(block_tokens_split_by_heading)
            else end_line
        )
@ -227,7 +239,8 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
    with TagMarkdownRenderer():
        ast = Document(file_content)

-        if block_tokens := ast.children:
+        block_tokens: list[BlockToken] = ast.children  # type: ignore
+        if len(block_tokens) > 0:
            if parsed_shard := parse_header_shards(
                block_tokens, shard.start_line, shard.end_line
            ):