chore: cleanup, make pyright a bit happier

Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
Konstantin Fickel 2025-06-21 16:23:04 +02:00
parent 0f645e7e9b
commit 082c13b046
2 changed files with 37 additions and 72 deletions

View file

@ -1,6 +1,5 @@
from __future__ import annotations from __future__ import annotations
from itertools import takewhile, dropwhile from typing import Iterable, Optional, TypeVar
from typing import Optional, TypeVar
from pydantic import BaseModel from pydantic import BaseModel
from mistletoe import Document from mistletoe import Document
from mistletoe.markdown_renderer import MarkdownRenderer, Fragment from mistletoe.markdown_renderer import MarkdownRenderer, Fragment
@ -42,29 +41,42 @@ class StreamFile(BaseModel):
T = TypeVar("T") T = TypeVar("T")
def extract_tags(tokens: list[Token]) -> list[str]: def get_line_number(block_token: BlockToken) -> int:
tags: iter[Tag] = filter(lambda token: isinstance(token, Tag), tokens) return block_token.line_number # type: ignore
return list(map(lambda marker: marker.content, tags))
def extract_markers_and_tags(tokens: BlockToken) -> tuple[list[str], list[str]]: def extract_tags(tokens: Iterable[Token]) -> list[str]:
def marker_boundary_check(token: Token): return [token.content for token in tokens if isinstance(token, Tag)]
return isinstance(token, Tag) or (
isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)
)
marker_region = takewhile(marker_boundary_check, tokens.children)
tag_region = dropwhile(marker_boundary_check, tokens.children)
return extract_tags(marker_region), extract_tags(tag_region) def extract_markers_and_tags(block_token: BlockToken) -> tuple[list[str], list[str]]:
markers, tags = [], []
is_marker = True
if block_token.children is None:
return [], []
for token in block_token.children:
if isinstance(token, Tag):
if is_marker:
markers.append(token)
else:
tags.append(token)
elif not (isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)):
is_marker = False
return extract_tags(markers), extract_tags(tags)
def has_markers(block_token: BlockToken) -> bool: def has_markers(block_token: BlockToken) -> bool:
if block_token.children is None:
return False
for child in block_token.children: for child in block_token.children:
if isinstance(child, Tag): if isinstance(child, Tag):
return True return True
if isinstance(child, RawText) and re.match(r"^[\s]*$", child.content): elif not (isinstance(child, RawText) and re.match(r"^[\s]*$", child.content)):
continue return False
return False return False
@ -158,9 +170,9 @@ def parse_paragraph_shards(
is_first_block_heading = i == 0 is_first_block_heading = i == 0
if i in paragraph_positions or (i == 0 and is_first_block_heading): if i in paragraph_positions or (i == 0 and is_first_block_heading):
child_start_line = token.line_number child_start_line = get_line_number(token)
child_end_line = ( child_end_line = (
block_tokens[i + 1].line_number - 1 get_line_number(block_tokens[i + 1]) - 1
if i + 1 < len(block_tokens) if i + 1 < len(block_tokens)
else end_line else end_line
) )
@ -201,9 +213,9 @@ def parse_header_shards(
children = [] children = []
for i, child_blocks in enumerate(block_tokens_split_by_heading): for i, child_blocks in enumerate(block_tokens_split_by_heading):
child_start_line = child_blocks[0].line_number child_start_line = get_line_number(child_blocks[0])
child_end_line = ( child_end_line = (
block_tokens_split_by_heading[i + 1][0].line_number - 1 get_line_number(block_tokens_split_by_heading[i + 1][0]) - 1
if i + 1 < len(block_tokens_split_by_heading) if i + 1 < len(block_tokens_split_by_heading)
else end_line else end_line
) )
@ -227,7 +239,8 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
with TagMarkdownRenderer(): with TagMarkdownRenderer():
ast = Document(file_content) ast = Document(file_content)
if block_tokens := ast.children: block_tokens: list[BlockToken] = ast.children # type: ignore
if len(block_tokens) > 0:
if parsed_shard := parse_header_shards( if parsed_shard := parse_header_shards(
block_tokens, shard.start_line, shard.end_line block_tokens, shard.start_line, shard.end_line
): ):

View file

@ -27,11 +27,8 @@ class TestParseProcess:
assert parse_markdown_file(self.file_name, test_file) == StreamFile( assert parse_markdown_file(self.file_name, test_file) == StreamFile(
filename=self.file_name, filename=self.file_name,
shard=Shard( shard=Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=3, end_line=3,
children=[],
), ),
) )
@ -41,10 +38,8 @@ class TestParseProcess:
filename=self.file_name, filename=self.file_name,
shard=Shard( shard=Shard(
markers=["Tag"], markers=["Tag"],
tags=[],
start_line=1, start_line=1,
end_line=1, end_line=1,
children=[],
), ),
) )
@ -54,10 +49,8 @@ class TestParseProcess:
filename=self.file_name, filename=self.file_name,
shard=Shard( shard=Shard(
markers=["Tag1", "Tag2"], markers=["Tag1", "Tag2"],
tags=[],
start_line=1, start_line=1,
end_line=1, end_line=1,
children=[],
), ),
) )
@ -70,7 +63,6 @@ class TestParseProcess:
tags=["Tag3"], tags=["Tag3"],
start_line=1, start_line=1,
end_line=1, end_line=1,
children=[],
), ),
) )
@ -80,24 +72,18 @@ class TestParseProcess:
assert parse_markdown_file(self.file_name, file_text) == StreamFile( assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name, filename=self.file_name,
shard=Shard( shard=Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=5, end_line=5,
children=[ children=[
Shard( Shard(
markers=["Tag1"], markers=["Tag1"],
tags=[],
start_line=3, start_line=3,
end_line=3, end_line=3,
children=[],
), ),
Shard( Shard(
markers=["Tag2"], markers=["Tag2"],
tags=[],
start_line=5, start_line=5,
end_line=5, end_line=5,
children=[],
), ),
], ],
), ),
@ -107,14 +93,11 @@ class TestParseProcess:
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2" file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
assert parse_markdown_file(self.file_name, file_text).shard == Shard( assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=["Tag1", "Tag2"], tags=["Tag1", "Tag2"],
start_line=1, start_line=1,
end_line=5, end_line=5,
children=[ children=[
Shard( Shard(markers=["Marker"], start_line=3, end_line=3, children=[]),
markers=["Marker"], tags=[], start_line=3, end_line=3, children=[]
),
], ],
) )
@ -122,11 +105,8 @@ class TestParseProcess:
file_text = "# Heading\n\n## Subheading" file_text = "# Heading\n\n## Subheading"
assert parse_markdown_file(self.file_name, file_text).shard == Shard( assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=3, end_line=3,
children=[],
) )
def test_parse_split_at_heading_if_marker_on_subheading(self): def test_parse_split_at_heading_if_marker_on_subheading(self):
@ -135,13 +115,10 @@ class TestParseProcess:
assert parse_markdown_file(self.file_name, file_text) == StreamFile( assert parse_markdown_file(self.file_name, file_text) == StreamFile(
filename=self.file_name, filename=self.file_name,
shard=Shard( shard=Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=5, end_line=5,
children=[ children=[
Shard( Shard(
markers=[],
tags=["Tag1"], tags=["Tag1"],
start_line=1, start_line=1,
end_line=4, end_line=4,
@ -151,13 +128,10 @@ class TestParseProcess:
tags=["Tag2"], tags=["Tag2"],
start_line=3, start_line=3,
end_line=4, end_line=4,
children=[],
), ),
], ],
), ),
Shard( Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]),
markers=[], tags=["Tag3"], start_line=5, end_line=5, children=[]
),
], ],
), ),
) )
@ -172,7 +146,6 @@ class TestParseProcess:
tags=["Tag1", "Tag2"], tags=["Tag1", "Tag2"],
start_line=1, start_line=1,
end_line=3, end_line=3,
children=[],
), ),
) )
@ -180,29 +153,21 @@ class TestParseProcess:
file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!" file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
assert parse_markdown_file(self.file_name, file_text).shard == Shard( assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=5, end_line=5,
children=[ children=[
Shard( Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=4, end_line=4,
children=[ children=[
Shard( Shard(
markers=["Marker1"], markers=["Marker1"],
tags=[],
start_line=3, start_line=3,
end_line=3, end_line=3,
children=[],
) )
], ],
), ),
Shard( Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]),
markers=["Marker2"], tags=[], start_line=5, end_line=5, children=[]
),
], ],
) )
@ -210,52 +175,39 @@ class TestParseProcess:
file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2" file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
assert parse_markdown_file(self.file_name, file_text).shard == Shard( assert parse_markdown_file(self.file_name, file_text).shard == Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=7, end_line=7,
children=[ children=[
Shard( Shard(
markers=[],
tags=[],
start_line=1, start_line=1,
end_line=2, end_line=2,
children=[ children=[
Shard( Shard(
markers=[],
tags=["Preamble"], tags=["Preamble"],
start_line=1, start_line=1,
end_line=1, end_line=1,
children=[],
), ),
Shard( Shard(
markers=["Intro"], markers=["Intro"],
tags=[],
start_line=2, start_line=2,
end_line=2, end_line=2,
children=[],
), ),
], ],
), ),
Shard( Shard(
markers=["Title"], markers=["Title"],
tags=[],
start_line=3, start_line=3,
end_line=7, end_line=7,
children=[ children=[
Shard( Shard(
markers=["Chapter1"], markers=["Chapter1"],
tags=[],
start_line=4, start_line=4,
end_line=4, end_line=4,
children=[],
), ),
Shard( Shard(
markers=["Chapter2"], markers=["Chapter2"],
tags=[],
start_line=5, start_line=5,
end_line=7, end_line=7,
children=[],
), ),
], ],
), ),