From 082c13b0464df808add16da821e3fd3cff3d3752 Mon Sep 17 00:00:00 2001 From: Konstantin Fickel Date: Sat, 21 Jun 2025 16:23:04 +0200 Subject: [PATCH] chore: cleanup, make pyright a bit happier Signed-off-by: Konstantin Fickel --- src/streamer/parse.py | 55 ++++++++++++++++++++++++++----------------- test/test_parse.py | 54 +++--------------------------------------- 2 files changed, 37 insertions(+), 72 deletions(-) diff --git a/src/streamer/parse.py b/src/streamer/parse.py index 197c498..693960e 100644 --- a/src/streamer/parse.py +++ b/src/streamer/parse.py @@ -1,6 +1,5 @@ from __future__ import annotations -from itertools import takewhile, dropwhile -from typing import Optional, TypeVar +from typing import Iterable, Optional, TypeVar from pydantic import BaseModel from mistletoe import Document from mistletoe.markdown_renderer import MarkdownRenderer, Fragment @@ -42,30 +41,43 @@ class StreamFile(BaseModel): T = TypeVar("T") -def extract_tags(tokens: list[Token]) -> list[str]: - tags: iter[Tag] = filter(lambda token: isinstance(token, Tag), tokens) - return list(map(lambda marker: marker.content, tags)) +def get_line_number(block_token: BlockToken) -> int: + return block_token.line_number # type: ignore -def extract_markers_and_tags(tokens: BlockToken) -> tuple[list[str], list[str]]: - def marker_boundary_check(token: Token): - return isinstance(token, Tag) or ( - isinstance(token, RawText) and re.match(r"^[\s]*$", token.content) - ) +def extract_tags(tokens: Iterable[Token]) -> list[str]: + return [token.content for token in tokens if isinstance(token, Tag)] - marker_region = takewhile(marker_boundary_check, tokens.children) - tag_region = dropwhile(marker_boundary_check, tokens.children) - return extract_tags(marker_region), extract_tags(tag_region) +def extract_markers_and_tags(block_token: BlockToken) -> tuple[list[str], list[str]]: + markers, tags = [], [] + is_marker = True + + if block_token.children is None: + return [], [] + + for token in block_token.children: + if isinstance(token, Tag): + if is_marker: + markers.append(token) + else: + tags.append(token) + elif not (isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)): + is_marker = False + + return extract_tags(markers), extract_tags(tags) def has_markers(block_token: BlockToken) -> bool: + if block_token.children is None: + return False + for child in block_token.children: if isinstance(child, Tag): return True - if isinstance(child, RawText) and re.match(r"^[\s]*$", child.content): - continue - return False + elif not (isinstance(child, RawText) and re.match(r"^[\s]*$", child.content)): + return False + return False def find_paragraph_shard_positions(block_tokens: list[BlockToken]) -> list[int]: @@ -158,9 +170,9 @@ def parse_paragraph_shards( is_first_block_heading = i == 0 if i in paragraph_positions or (i == 0 and is_first_block_heading): - child_start_line = token.line_number + child_start_line = get_line_number(token) child_end_line = ( - block_tokens[i + 1].line_number - 1 + get_line_number(block_tokens[i + 1]) - 1 if i + 1 < len(block_tokens) else end_line ) @@ -201,9 +213,9 @@ def parse_header_shards( children = [] for i, child_blocks in enumerate(block_tokens_split_by_heading): - child_start_line = child_blocks[0].line_number + child_start_line = get_line_number(child_blocks[0]) child_end_line = ( - block_tokens_split_by_heading[i + 1][0].line_number - 1 + get_line_number(block_tokens_split_by_heading[i + 1][0]) - 1 if i + 1 < len(block_tokens_split_by_heading) else end_line ) @@ -227,7 +239,8 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile: with TagMarkdownRenderer(): ast = Document(file_content) - if block_tokens := ast.children: + block_tokens: list[BlockToken] = ast.children # type: ignore + if len(block_tokens) > 0: if parsed_shard := parse_header_shards( block_tokens, shard.start_line, shard.end_line ): diff --git a/test/test_parse.py b/test/test_parse.py index 4fd4d56..e7b6562 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -27,11 +27,8 @@ class TestParseProcess: assert parse_markdown_file(self.file_name, test_file) == StreamFile( filename=self.file_name, shard=Shard( - markers=[], - tags=[], start_line=1, end_line=3, - children=[], ), ) @@ -41,10 +38,8 @@ class TestParseProcess: filename=self.file_name, shard=Shard( markers=["Tag"], - tags=[], start_line=1, end_line=1, - children=[], ), ) @@ -54,10 +49,8 @@ class TestParseProcess: filename=self.file_name, shard=Shard( markers=["Tag1", "Tag2"], - tags=[], start_line=1, end_line=1, - children=[], ), ) @@ -70,7 +63,6 @@ class TestParseProcess: tags=["Tag3"], start_line=1, end_line=1, - children=[], ), ) @@ -80,24 +72,18 @@ class TestParseProcess: assert parse_markdown_file(self.file_name, file_text) == StreamFile( filename=self.file_name, shard=Shard( - markers=[], - tags=[], start_line=1, end_line=5, children=[ Shard( markers=["Tag1"], - tags=[], start_line=3, end_line=3, - children=[], ), Shard( markers=["Tag2"], - tags=[], start_line=5, end_line=5, - children=[], ), ], ), @@ -107,14 +93,11 @@ class TestParseProcess: file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2" assert parse_markdown_file(self.file_name, file_text).shard == Shard( - markers=[], tags=["Tag1", "Tag2"], start_line=1, end_line=5, children=[ - Shard( - markers=["Marker"], tags=[], start_line=3, end_line=3, children=[] - ), + Shard(markers=["Marker"], start_line=3, end_line=3, children=[]), ], ) @@ -122,11 +105,8 @@ class TestParseProcess: file_text = "# Heading\n\n## Subheading" assert parse_markdown_file(self.file_name, file_text).shard == Shard( - markers=[], - tags=[], start_line=1, end_line=3, - children=[], ) def test_parse_split_at_heading_if_marker_on_subheading(self): @@ -135,13 +115,10 @@ class TestParseProcess: assert parse_markdown_file(self.file_name, file_text) == StreamFile( filename=self.file_name, shard=Shard( - markers=[], - tags=[], start_line=1, end_line=5, children=[ Shard( - markers=[], tags=["Tag1"], start_line=1, end_line=4, @@ -151,13 +128,10 @@ class TestParseProcess: tags=["Tag2"], start_line=3, end_line=4, - children=[], ), ], ), - Shard( - markers=[], tags=["Tag3"], start_line=5, end_line=5, children=[] - ), + Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]), ], ), ) @@ -172,7 +146,6 @@ class TestParseProcess: tags=["Tag1", "Tag2"], start_line=1, end_line=3, - children=[], ), ) @@ -180,29 +153,21 @@ class TestParseProcess: file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!" assert parse_markdown_file(self.file_name, file_text).shard == Shard( - markers=[], - tags=[], start_line=1, end_line=5, children=[ Shard( - markers=[], - tags=[], start_line=1, end_line=4, children=[ Shard( markers=["Marker1"], - tags=[], start_line=3, end_line=3, - children=[], ) ], ), - Shard( - markers=["Marker2"], tags=[], start_line=5, end_line=5, children=[] - ), + Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]), ], ) @@ -210,52 +175,39 @@ class TestParseProcess: file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2" assert parse_markdown_file(self.file_name, file_text).shard == Shard( - markers=[], - tags=[], start_line=1, end_line=7, children=[ Shard( - markers=[], - tags=[], start_line=1, end_line=2, children=[ Shard( - markers=[], tags=["Preamble"], start_line=1, end_line=1, - children=[], ), Shard( markers=["Intro"], - tags=[], start_line=2, end_line=2, - children=[], ), ], ), Shard( markers=["Title"], - tags=[], start_line=3, end_line=7, children=[ Shard( markers=["Chapter1"], - tags=[], start_line=4, end_line=4, - children=[], ), Shard( markers=["Chapter2"], - tags=[], start_line=5, end_line=7, - children=[], ), ], ),