feat: also support tags in further blocks
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
parent
42262844a6
commit
b7ad75f079
2 changed files with 64 additions and 27 deletions
|
|
@ -42,46 +42,56 @@ T = TypeVar("T")
|
|||
|
||||
|
||||
def extract_tags(tokens: list[Token]) -> list[str]:
|
||||
return list(map(
|
||||
lambda marker: marker.content,
|
||||
filter(lambda token: isinstance(token, Tag), tokens),
|
||||
))
|
||||
|
||||
|
||||
def extract_markers_and_tags(header: Optional[Token]) -> tuple[list[str], list[str]]:
|
||||
marker_boundary_check = lambda token: isinstance(token, Tag) or (
|
||||
isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)
|
||||
return list(
|
||||
map(
|
||||
lambda marker: marker.content,
|
||||
filter(lambda token: isinstance(token, Tag), tokens),
|
||||
)
|
||||
)
|
||||
marker_region = takewhile(marker_boundary_check, header.children)
|
||||
tag_region = dropwhile(marker_boundary_check, header.children)
|
||||
|
||||
return extract_tags(marker_region), extract_tags(tag_region)
|
||||
|
||||
def extract_markers_and_tags(tokens: list[Token]) -> tuple[list[str], list[str]]:
|
||||
def marker_boundary_check(token: Token):
|
||||
return isinstance(token, Tag) or (
|
||||
isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)
|
||||
)
|
||||
|
||||
marker_region = takewhile(marker_boundary_check, tokens[0].children)
|
||||
tag_region_first = dropwhile(marker_boundary_check, tokens[0].children)
|
||||
|
||||
tags: list[str] = extract_tags(tag_region_first)
|
||||
for token in tokens[1:]:
|
||||
tags.extend(extract_tags(token.children))
|
||||
|
||||
return extract_tags(marker_region), tags
|
||||
|
||||
|
||||
def has_markers(token: Token) -> bool:
|
||||
markers, _ = extract_markers_and_tags(token)
|
||||
markers, _ = extract_markers_and_tags([token])
|
||||
return len(markers) > 0
|
||||
|
||||
|
||||
def find_shard_positions(block_tokens: list[BlockToken]) -> list[int]:
|
||||
return [
|
||||
index for index, block_token in enumerate(block_tokens)
|
||||
index
|
||||
for index, block_token in enumerate(block_tokens)
|
||||
if isinstance(block_token, Paragraph) and has_markers(block_token)
|
||||
]
|
||||
|
||||
|
||||
T = TypeVar('T')
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def split_at(list_to_be_split: list[T], positions: list[int]):
|
||||
positions = sorted(set([0, *positions, len(list_to_be_split)]))
|
||||
|
||||
return [
|
||||
list_to_be_split[left : right]
|
||||
for left, right in pairwise(positions)
|
||||
]
|
||||
return [list_to_be_split[left:right] for left, right in pairwise(positions)]
|
||||
|
||||
def to_shard(tokens: list[Token], start_line: int, end_line: int, children: list[Shard] = []) -> Shard:
|
||||
markers, tags = extract_markers_and_tags(tokens[0]) if len(tokens) > 0 else ([], [])
|
||||
# TODO: also find tags of children!
|
||||
|
||||
def to_shard(
|
||||
tokens: list[Token], start_line: int, end_line: int, children: list[Shard] = []
|
||||
) -> Shard:
|
||||
markers, tags = extract_markers_and_tags(tokens) if len(tokens) > 0 else ([], [])
|
||||
|
||||
return Shard(
|
||||
markers=markers,
|
||||
|
|
@ -91,9 +101,10 @@ def to_shard(tokens: list[Token], start_line: int, end_line: int, children: list
|
|||
children=children,
|
||||
)
|
||||
|
||||
|
||||
def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
|
||||
shard = None
|
||||
with TagMarkdownRenderer() as renderer:
|
||||
with TagMarkdownRenderer():
|
||||
ast = Document(file_content)
|
||||
line_count = len(file_content.splitlines())
|
||||
|
||||
|
|
@ -106,11 +117,15 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
|
|||
for i in range(len(block_tokens)):
|
||||
token = block_tokens[i]
|
||||
if i in shard_starts:
|
||||
end_line = block_tokens[i + 1].line_number - 1 if i + 1 < len(block_tokens) else line_count
|
||||
end_line = (
|
||||
block_tokens[i + 1].line_number - 1
|
||||
if i + 1 < len(block_tokens)
|
||||
else line_count
|
||||
)
|
||||
child_shards.append(to_shard([token], token.line_number, end_line))
|
||||
else:
|
||||
own_elements.append(token)
|
||||
|
||||
|
||||
if len(child_shards) == 1 and len(own_elements) == 0:
|
||||
shard = child_shards[0]
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue