chore: cleanup, make pyright a bit happier
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
parent
0f645e7e9b
commit
082c13b046
2 changed files with 37 additions and 72 deletions
|
|
@ -1,6 +1,5 @@
|
|||
from __future__ import annotations
|
||||
from itertools import takewhile, dropwhile
|
||||
from typing import Optional, TypeVar
|
||||
from typing import Iterable, Optional, TypeVar
|
||||
from pydantic import BaseModel
|
||||
from mistletoe import Document
|
||||
from mistletoe.markdown_renderer import MarkdownRenderer, Fragment
|
||||
|
|
@ -42,30 +41,43 @@ class StreamFile(BaseModel):
|
|||
T = TypeVar("T")
|
||||
|
||||
|
||||
def extract_tags(tokens: list[Token]) -> list[str]:
|
||||
tags: iter[Tag] = filter(lambda token: isinstance(token, Tag), tokens)
|
||||
return list(map(lambda marker: marker.content, tags))
|
||||
def get_line_number(block_token: BlockToken) -> int:
|
||||
return block_token.line_number # type: ignore
|
||||
|
||||
|
||||
def extract_markers_and_tags(tokens: BlockToken) -> tuple[list[str], list[str]]:
|
||||
def marker_boundary_check(token: Token):
|
||||
return isinstance(token, Tag) or (
|
||||
isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)
|
||||
)
|
||||
def extract_tags(tokens: Iterable[Token]) -> list[str]:
|
||||
return [token.content for token in tokens if isinstance(token, Tag)]
|
||||
|
||||
marker_region = takewhile(marker_boundary_check, tokens.children)
|
||||
tag_region = dropwhile(marker_boundary_check, tokens.children)
|
||||
|
||||
return extract_tags(marker_region), extract_tags(tag_region)
|
||||
def extract_markers_and_tags(block_token: BlockToken) -> tuple[list[str], list[str]]:
|
||||
markers, tags = [], []
|
||||
is_marker = True
|
||||
|
||||
if block_token.children is None:
|
||||
return [], []
|
||||
|
||||
for token in block_token.children:
|
||||
if isinstance(token, Tag):
|
||||
if is_marker:
|
||||
markers.append(token)
|
||||
else:
|
||||
tags.append(token)
|
||||
elif not (isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)):
|
||||
is_marker = False
|
||||
|
||||
return extract_tags(markers), extract_tags(tags)
|
||||
|
||||
|
||||
def has_markers(block_token: BlockToken) -> bool:
|
||||
if block_token.children is None:
|
||||
return False
|
||||
|
||||
for child in block_token.children:
|
||||
if isinstance(child, Tag):
|
||||
return True
|
||||
if isinstance(child, RawText) and re.match(r"^[\s]*$", child.content):
|
||||
continue
|
||||
return False
|
||||
elif not (isinstance(child, RawText) and re.match(r"^[\s]*$", child.content)):
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def find_paragraph_shard_positions(block_tokens: list[BlockToken]) -> list[int]:
|
||||
|
|
@ -158,9 +170,9 @@ def parse_paragraph_shards(
|
|||
is_first_block_heading = i == 0
|
||||
|
||||
if i in paragraph_positions or (i == 0 and is_first_block_heading):
|
||||
child_start_line = token.line_number
|
||||
child_start_line = get_line_number(token)
|
||||
child_end_line = (
|
||||
block_tokens[i + 1].line_number - 1
|
||||
get_line_number(block_tokens[i + 1]) - 1
|
||||
if i + 1 < len(block_tokens)
|
||||
else end_line
|
||||
)
|
||||
|
|
@ -201,9 +213,9 @@ def parse_header_shards(
|
|||
|
||||
children = []
|
||||
for i, child_blocks in enumerate(block_tokens_split_by_heading):
|
||||
child_start_line = child_blocks[0].line_number
|
||||
child_start_line = get_line_number(child_blocks[0])
|
||||
child_end_line = (
|
||||
block_tokens_split_by_heading[i + 1][0].line_number - 1
|
||||
get_line_number(block_tokens_split_by_heading[i + 1][0]) - 1
|
||||
if i + 1 < len(block_tokens_split_by_heading)
|
||||
else end_line
|
||||
)
|
||||
|
|
@ -227,7 +239,8 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
|
|||
with TagMarkdownRenderer():
|
||||
ast = Document(file_content)
|
||||
|
||||
if block_tokens := ast.children:
|
||||
block_tokens: list[BlockToken] = ast.children # type: ignore
|
||||
if len(block_tokens) > 0:
|
||||
if parsed_shard := parse_header_shards(
|
||||
block_tokens, shard.start_line, shard.end_line
|
||||
):
|
||||
|
|
|
|||
|
|
@ -27,11 +27,8 @@ class TestParseProcess:
|
|||
assert parse_markdown_file(self.file_name, test_file) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
children=[],
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -41,10 +38,8 @@ class TestParseProcess:
|
|||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=["Tag"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -54,10 +49,8 @@ class TestParseProcess:
|
|||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=["Tag1", "Tag2"],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -70,7 +63,6 @@ class TestParseProcess:
|
|||
tags=["Tag3"],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -80,24 +72,18 @@ class TestParseProcess:
|
|||
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Tag1"],
|
||||
tags=[],
|
||||
start_line=3,
|
||||
end_line=3,
|
||||
children=[],
|
||||
),
|
||||
Shard(
|
||||
markers=["Tag2"],
|
||||
tags=[],
|
||||
start_line=5,
|
||||
end_line=5,
|
||||
children=[],
|
||||
),
|
||||
],
|
||||
),
|
||||
|
|
@ -107,14 +93,11 @@ class TestParseProcess:
|
|||
file_text = "Hello @Tag1 World!\n\n@Marker Block 1\n\nBlock 2 @Tag2"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=["Tag1", "Tag2"],
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker"], tags=[], start_line=3, end_line=3, children=[]
|
||||
),
|
||||
Shard(markers=["Marker"], start_line=3, end_line=3, children=[]),
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -122,11 +105,8 @@ class TestParseProcess:
|
|||
file_text = "# Heading\n\n## Subheading"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
children=[],
|
||||
)
|
||||
|
||||
def test_parse_split_at_heading_if_marker_on_subheading(self):
|
||||
|
|
@ -135,13 +115,10 @@ class TestParseProcess:
|
|||
assert parse_markdown_file(self.file_name, file_text) == StreamFile(
|
||||
filename=self.file_name,
|
||||
shard=Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
markers=[],
|
||||
tags=["Tag1"],
|
||||
start_line=1,
|
||||
end_line=4,
|
||||
|
|
@ -151,13 +128,10 @@ class TestParseProcess:
|
|||
tags=["Tag2"],
|
||||
start_line=3,
|
||||
end_line=4,
|
||||
children=[],
|
||||
),
|
||||
],
|
||||
),
|
||||
Shard(
|
||||
markers=[], tags=["Tag3"], start_line=5, end_line=5, children=[]
|
||||
),
|
||||
Shard(tags=["Tag3"], start_line=5, end_line=5, children=[]),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
|
@ -172,7 +146,6 @@ class TestParseProcess:
|
|||
tags=["Tag1", "Tag2"],
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
children=[],
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -180,29 +153,21 @@ class TestParseProcess:
|
|||
file_text = "Hello\n\n@Marker1 World!\n\n# @Marker2 I'm a heading!"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=5,
|
||||
children=[
|
||||
Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=4,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Marker1"],
|
||||
tags=[],
|
||||
start_line=3,
|
||||
end_line=3,
|
||||
children=[],
|
||||
)
|
||||
],
|
||||
),
|
||||
Shard(
|
||||
markers=["Marker2"], tags=[], start_line=5, end_line=5, children=[]
|
||||
),
|
||||
Shard(markers=["Marker2"], start_line=5, end_line=5, children=[]),
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -210,52 +175,39 @@ class TestParseProcess:
|
|||
file_text = "Preamble @Preamble\n## @Intro\n# @Title\n## @Chapter1\n## @Chapter2\n### Section 1\n### Section 2"
|
||||
|
||||
assert parse_markdown_file(self.file_name, file_text).shard == Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=7,
|
||||
children=[
|
||||
Shard(
|
||||
markers=[],
|
||||
tags=[],
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
children=[
|
||||
Shard(
|
||||
markers=[],
|
||||
tags=["Preamble"],
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
children=[],
|
||||
),
|
||||
Shard(
|
||||
markers=["Intro"],
|
||||
tags=[],
|
||||
start_line=2,
|
||||
end_line=2,
|
||||
children=[],
|
||||
),
|
||||
],
|
||||
),
|
||||
Shard(
|
||||
markers=["Title"],
|
||||
tags=[],
|
||||
start_line=3,
|
||||
end_line=7,
|
||||
children=[
|
||||
Shard(
|
||||
markers=["Chapter1"],
|
||||
tags=[],
|
||||
start_line=4,
|
||||
end_line=4,
|
||||
children=[],
|
||||
),
|
||||
Shard(
|
||||
markers=["Chapter2"],
|
||||
tags=[],
|
||||
start_line=5,
|
||||
end_line=7,
|
||||
children=[],
|
||||
),
|
||||
],
|
||||
),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue