feat: full heading parsing also before headings
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
parent
6d61d67d2e
commit
63ce959d4c
2 changed files with 85 additions and 16 deletions
|
|
@ -26,11 +26,11 @@ class TagMarkdownRenderer(MarkdownRenderer):
|
|||
|
||||
|
||||
class Shard(BaseModel):
|
||||
markers: list[str]
|
||||
tags: list[str]
|
||||
markers: list[str] = []
|
||||
tags: list[str] = []
|
||||
start_line: int
|
||||
end_line: int
|
||||
children: list[Shard]
|
||||
children: list[Shard] = []
|
||||
|
||||
|
||||
class StreamFile(BaseModel):
|
||||
|
|
@ -101,6 +101,26 @@ def to_shard(
|
|||
)
|
||||
|
||||
|
||||
def append_children(shard: Shard, new_children: list[Shard] = []) -> Shard:
|
||||
shard_children = shard.children if len(shard.children) > 0 else []
|
||||
new_shard_children = shard_children + new_children
|
||||
|
||||
if (
|
||||
len(new_shard_children) == 1
|
||||
and len(shard.markers) == 0
|
||||
and len(shard.tags) == 0
|
||||
):
|
||||
return new_shard_children[0]
|
||||
|
||||
return Shard(
|
||||
markers=shard.markers,
|
||||
tags=shard.tags,
|
||||
start_line=shard.start_line,
|
||||
end_line=shard.end_line,
|
||||
children=shard_children + new_children,
|
||||
)
|
||||
|
||||
|
||||
def parse_paragraph_shards(block_tokens: list[BlockToken], end_line: int) -> Shard:
|
||||
start_line = block_tokens[0].line_number
|
||||
shard_starts = find_shard_positions(block_tokens)
|
||||
|
|
@ -148,15 +168,21 @@ def find_heading_positions(
|
|||
|
||||
|
||||
def parse_header_shards(
|
||||
block_tokens: list[BlockToken], end_line: int, ignore_first_token: bool = False
|
||||
block_tokens: list[BlockToken],
|
||||
start_line: int,
|
||||
end_line: int,
|
||||
first_token_is_header: bool = False,
|
||||
) -> Shard:
|
||||
if len(block_tokens) == 0:
|
||||
return Shard(start_line=start_line, end_line=end_line)
|
||||
|
||||
max_header_level_with_marker = optional_max(
|
||||
map(
|
||||
lambda heading: heading.level,
|
||||
filter(
|
||||
lambda block_token: isinstance(block_token, Heading)
|
||||
and has_markers(block_token),
|
||||
block_tokens[1:] if ignore_first_token else block_tokens,
|
||||
block_tokens[1:] if first_token_is_header else block_tokens,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
|
@ -169,12 +195,14 @@ def parse_header_shards(
|
|||
lambda heading: heading.level,
|
||||
filter(
|
||||
lambda block_token: isinstance(block_token, Heading),
|
||||
block_tokens[1:] if ignore_first_token else block_tokens,
|
||||
block_tokens[1:] if first_token_is_header else block_tokens,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
slice_positions = find_heading_positions(block_tokens, header_level_for_slicing)
|
||||
if first_token_is_header:
|
||||
slice_positions.append(1)
|
||||
is_first_slice_part_of_parent_shard = 0 not in slice_positions
|
||||
|
||||
sliced_by_heading_level = split_at(block_tokens, slice_positions)
|
||||
|
|
@ -190,16 +218,25 @@ def parse_header_shards(
|
|||
|
||||
child_shards = []
|
||||
for i in range(len(child_elements)):
|
||||
child_start_line = child_elements[i][0].line_number
|
||||
child_end_line = (
|
||||
child_elements[i + 1][0].line_number - 1
|
||||
if i + 1 < len(child_elements)
|
||||
else end_line
|
||||
)
|
||||
child_shards.append(
|
||||
parse_header_shards(child_elements[i], child_end_line, True)
|
||||
parse_header_shards(
|
||||
child_elements[i],
|
||||
child_start_line,
|
||||
child_end_line,
|
||||
first_token_is_header=True,
|
||||
)
|
||||
)
|
||||
|
||||
return to_shard(own_elements, block_tokens[0].line_number, end_line, child_shards)
|
||||
own_shard = parse_header_shards(
|
||||
own_elements, start_line, end_line, first_token_is_header=False
|
||||
)
|
||||
return append_children(own_shard, child_shards)
|
||||
|
||||
|
||||
def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
|
||||
|
|
@ -209,7 +246,7 @@ def parse_markdown_file(file_name: str, file_content: str) -> StreamFile:
|
|||
line_count = len(file_content.splitlines())
|
||||
|
||||
if block_tokens := ast.children:
|
||||
shard = parse_header_shards(block_tokens, line_count)
|
||||
shard = parse_header_shards(block_tokens, 1, line_count)
|
||||
|
||||
return StreamFile(shard=shard, filename=file_name)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue