1import re
2import string
3from typing import TYPE_CHECKING, Match
4
5if TYPE_CHECKING:
6 from ..block_parser import BlockParser
7 from ..core import BlockState, InlineState
8 from ..inline_parser import InlineParser
9 from ..markdown import Markdown
10
11# because mismatch is too slow, add parsers for paragraph and text
12
13HARD_LINEBREAK_RE = re.compile(r" *\n\s*")
14PARAGRAPH = (
15 # start with none punctuation, not number, not whitespace
16 r"(?:^[^\s\d" + re.escape(string.punctuation) + r"][^\n]*\n)+"
17)
18
19__all__ = ["speedup"]
20
21
22def parse_text(inline: "InlineParser", m: Match[str], state: "InlineState") -> int:
23 text = m.group(0)
24 text = HARD_LINEBREAK_RE.sub("\n", text)
25 inline.process_text(text, state)
26 return m.end()
27
28
29def parse_paragraph(block: "BlockParser", m: Match[str], state: "BlockState") -> int:
30 text = m.group(0)
31 state.add_paragraph(text)
32 return m.end()
33
34
35def speedup(md: "Markdown") -> None:
36 """Increase the speed of parsing paragraph and inline text."""
37 md.block.register("paragraph", PARAGRAPH, parse_paragraph)
38
39 punc = r"\\><!\[_*`~\^\$="
40 text_pattern = r"[\s\S]+?(?=[" + punc + r"]|"
41 if "url_link" in md.inline.rules:
42 text_pattern += "https?:|"
43
44 if md.inline.hard_wrap:
45 text_pattern += r" *\n|"
46 else:
47 text_pattern += r" {2,}\n|"
48
49 text_pattern += r"$)"
50 md.inline.register("text", text_pattern, parse_text)