feat: also parse within formatting
Signed-off-by: Konstantin Fickel <mail@konstantinfickel.de>
This commit is contained in:
parent
fa85017ce3
commit
8f5a000c5c
3 changed files with 186 additions and 34 deletions
|
|
@ -1,45 +1,84 @@
|
|||
import re
|
||||
from typing import Iterable
|
||||
from mistletoe.block_token import BlockToken
|
||||
from mistletoe.span_token import RawText
|
||||
from mistletoe.span_token import Emphasis, RawText, Strikethrough, Strong, Link
|
||||
from mistletoe.token import Token
|
||||
|
||||
from .markdown_tag import Tag
|
||||
|
||||
|
||||
def extract_tags(tokens: Iterable[Token]) -> list[str]:
|
||||
return [token.content for token in tokens if isinstance(token, Tag)]
|
||||
def extract_markers_and_tags_from_single_token(
|
||||
token: Token,
|
||||
marker_boundary_encountered: bool,
|
||||
return_at_first_marker: bool = False,
|
||||
) -> tuple[list[str], list[str], bool]:
|
||||
result_markers, result_tags = [], []
|
||||
result_marker_boundary_encountered = marker_boundary_encountered
|
||||
|
||||
if isinstance(token, Tag):
|
||||
if marker_boundary_encountered:
|
||||
result_tags.append(token.content)
|
||||
else:
|
||||
result_markers.append(token.content)
|
||||
elif isinstance(token, (Emphasis, Strong, Strikethrough, Link)):
|
||||
markers, tags, child_marker_boundary_encountered = (
|
||||
extract_markers_and_tags_from_tokens(
|
||||
token.children or [],
|
||||
marker_boundary_encountered,
|
||||
return_at_first_marker,
|
||||
)
|
||||
)
|
||||
result_markers.extend(markers)
|
||||
result_tags.extend(tags)
|
||||
result_marker_boundary_encountered = (
|
||||
marker_boundary_encountered or child_marker_boundary_encountered
|
||||
)
|
||||
elif isinstance(token, RawText) and re.match(r"^[\s]*$", token.content):
|
||||
pass
|
||||
else:
|
||||
result_marker_boundary_encountered = True
|
||||
|
||||
return result_markers, result_tags, result_marker_boundary_encountered
|
||||
|
||||
|
||||
def extract_markers_and_tags_from_tokens(
|
||||
tokens: Iterable[Token],
|
||||
marker_boundary_encountered: bool,
|
||||
return_at_first_marker: bool = False,
|
||||
) -> tuple[list[str], list[str], bool]:
|
||||
result_markers, result_tags = [], []
|
||||
result_marker_boundary_encountered = marker_boundary_encountered
|
||||
|
||||
for child in tokens:
|
||||
markers, tags, child_marker_boundary_encountered = (
|
||||
extract_markers_and_tags_from_single_token(
|
||||
child, result_marker_boundary_encountered, return_at_first_marker
|
||||
)
|
||||
)
|
||||
result_markers.extend(markers)
|
||||
result_tags.extend(tags)
|
||||
result_marker_boundary_encountered = (
|
||||
marker_boundary_encountered or child_marker_boundary_encountered
|
||||
)
|
||||
|
||||
if len(result_markers) > 0 and return_at_first_marker:
|
||||
break
|
||||
|
||||
return result_markers, result_tags, result_marker_boundary_encountered
|
||||
|
||||
|
||||
def extract_markers_and_tags(block_token: BlockToken) -> tuple[list[str], list[str]]:
|
||||
markers, tags = [], []
|
||||
is_marker = True
|
||||
|
||||
if block_token.children is None:
|
||||
return [], []
|
||||
|
||||
for token in block_token.children:
|
||||
if isinstance(token, Tag):
|
||||
if is_marker:
|
||||
markers.append(token)
|
||||
else:
|
||||
tags.append(token)
|
||||
elif not (isinstance(token, RawText) and re.match(r"^[\s]*$", token.content)):
|
||||
is_marker = False
|
||||
|
||||
return extract_tags(markers), extract_tags(tags)
|
||||
markers, tags, _ = extract_markers_and_tags_from_tokens(
|
||||
block_token.children or [], False
|
||||
)
|
||||
return markers, tags
|
||||
|
||||
|
||||
def has_markers(block_token: BlockToken) -> bool:
|
||||
if block_token.children is None:
|
||||
return False
|
||||
|
||||
for child in block_token.children:
|
||||
if isinstance(child, Tag):
|
||||
return True
|
||||
elif not (isinstance(child, RawText) and re.match(r"^[\s]*$", child.content)):
|
||||
return False
|
||||
return False
|
||||
markers, _, _ = extract_markers_and_tags_from_tokens(
|
||||
block_token.children or [], False, return_at_first_marker=True
|
||||
)
|
||||
return len(markers) > 0
|
||||
|
||||
|
||||
__all__ = ["extract_tags", "extract_markers_and_tags", "has_markers"]
|
||||
__all__ = ["extract_markers_and_tags", "has_markers"]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from mistletoe.span_token import SpanToken
|
|||
|
||||
class Tag(SpanToken):
|
||||
parse_inner = False
|
||||
pattern = re.compile(r"@([^\s]+)")
|
||||
pattern = re.compile(r"@([^\s*\x60~\[\]]+)")
|
||||
|
||||
|
||||
class TagMarkdownRenderer(MarkdownRenderer):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue