1"""Block-level tokenizer."""
2
3from __future__ import annotations
4
5import logging
6from typing import TYPE_CHECKING, Callable
7
8from . import rules_block
9from .ruler import Ruler
10from .rules_block.state_block import StateBlock
11from .token import Token
12from .utils import EnvType
13
14if TYPE_CHECKING:
15 from markdown_it import MarkdownIt
16
17LOGGER = logging.getLogger(__name__)
18
19
20RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
21"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
22
23`silent` disables token generation, useful for lookahead.
24"""
25
26_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
27 # First 2 params - rule name & source. Secondary array - list of rules,
28 # which can be terminated by this one.
29 ("table", rules_block.table, ["paragraph", "reference"]),
30 ("code", rules_block.code, []),
31 ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
32 (
33 "blockquote",
34 rules_block.blockquote,
35 ["paragraph", "reference", "blockquote", "list"],
36 ),
37 ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
38 ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
39 ("reference", rules_block.reference, []),
40 ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
41 ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
42 ("lheading", rules_block.lheading, []),
43 ("paragraph", rules_block.paragraph, []),
44]
45
46
47class ParserBlock:
48 """
49 ParserBlock#ruler -> Ruler
50
51 [[Ruler]] instance. Keep configuration of block rules.
52 """
53
54 def __init__(self) -> None:
55 self.ruler = Ruler[RuleFuncBlockType]()
56 for name, rule, alt in _rules:
57 self.ruler.push(name, rule, {"alt": alt})
58
59 def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
60 """Generate tokens for input range."""
61 rules = self.ruler.getRules("")
62 line = startLine
63 maxNesting = state.md.options.maxNesting
64 hasEmptyLines = False
65
66 while line < endLine:
67 state.line = line = state.skipEmptyLines(line)
68 if line >= endLine:
69 break
70 if state.sCount[line] < state.blkIndent:
71 # Termination condition for nested calls.
72 # Nested calls currently used for blockquotes & lists
73 break
74 if state.level >= maxNesting:
75 # If nesting level exceeded - skip tail to the end.
76 # That's not ordinary situation and we should not care about content.
77 state.line = endLine
78 break
79
80 # Try all possible rules.
81 # On success, rule should:
82 # - update `state.line`
83 # - update `state.tokens`
84 # - return True
85 for rule in rules:
86 if rule(state, line, endLine, False):
87 break
88
89 # set state.tight if we had an empty line before current tag
90 # i.e. latest empty line should not count
91 state.tight = not hasEmptyLines
92
93 line = state.line
94
95 # paragraph might "eat" one newline after it in nested lists
96 if (line - 1) < endLine and state.isEmpty(line - 1):
97 hasEmptyLines = True
98
99 if line < endLine and state.isEmpty(line):
100 hasEmptyLines = True
101 line += 1
102 state.line = line
103
104 def parse(
105 self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
106 ) -> list[Token] | None:
107 """Process input string and push block tokens into `outTokens`."""
108 if not src:
109 return None
110 state = StateBlock(src, md, env, outTokens)
111 self.tokenize(state, state.line, state.lineMax)
112 return state.tokens