1"""Block-level tokenizer."""
2
3from __future__ import annotations
4
5from collections.abc import Callable
6import logging
7from typing import TYPE_CHECKING
8
9from . import rules_block
10from .ruler import Ruler
11from .rules_block.state_block import StateBlock
12from .token import Token
13from .utils import EnvType
14
15if TYPE_CHECKING:
16 from markdown_it import MarkdownIt
17
18LOGGER = logging.getLogger(__name__)
19
20
21RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
22"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
23
24`silent` disables token generation, useful for lookahead.
25"""
26
27_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
28 # First 2 params - rule name & source. Secondary array - list of rules,
29 # which can be terminated by this one.
30 ("table", rules_block.table, ["paragraph", "reference"]),
31 ("code", rules_block.code, []),
32 ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
33 (
34 "blockquote",
35 rules_block.blockquote,
36 ["paragraph", "reference", "blockquote", "list"],
37 ),
38 ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
39 ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
40 ("reference", rules_block.reference, []),
41 ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
42 ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
43 ("lheading", rules_block.lheading, []),
44 ("paragraph", rules_block.paragraph, []),
45]
46
47
48class ParserBlock:
49 """
50 ParserBlock#ruler -> Ruler
51
52 [[Ruler]] instance. Keep configuration of block rules.
53 """
54
55 def __init__(self) -> None:
56 self.ruler = Ruler[RuleFuncBlockType]()
57 for name, rule, alt in _rules:
58 self.ruler.push(name, rule, {"alt": alt})
59
60 def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
61 """Generate tokens for input range."""
62 rules = self.ruler.getRules("")
63 line = startLine
64 maxNesting = state.md.options.maxNesting
65 hasEmptyLines = False
66
67 while line < endLine:
68 state.line = line = state.skipEmptyLines(line)
69 if line >= endLine:
70 break
71 if state.sCount[line] < state.blkIndent:
72 # Termination condition for nested calls.
73 # Nested calls currently used for blockquotes & lists
74 break
75 if state.level >= maxNesting:
76 # If nesting level exceeded - skip tail to the end.
77 # That's not ordinary situation and we should not care about content.
78 state.line = endLine
79 break
80
81 # Try all possible rules.
82 # On success, rule should:
83 # - update `state.line`
84 # - update `state.tokens`
85 # - return True
86 for rule in rules:
87 if rule(state, line, endLine, False):
88 break
89
90 # set state.tight if we had an empty line before current tag
91 # i.e. latest empty line should not count
92 state.tight = not hasEmptyLines
93
94 line = state.line
95
96 # paragraph might "eat" one newline after it in nested lists
97 if (line - 1) < endLine and state.isEmpty(line - 1):
98 hasEmptyLines = True
99
100 if line < endLine and state.isEmpty(line):
101 hasEmptyLines = True
102 line += 1
103 state.line = line
104
105 def parse(
106 self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
107 ) -> list[Token] | None:
108 """Process input string and push block tokens into `outTokens`."""
109 if not src:
110 return None
111 state = StateBlock(src, md, env, outTokens)
112 self.tokenize(state, state.line, state.lineMax)
113 return state.tokens