Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""because list is complex, split list parser in a new file"""
3from __future__ import annotations
5import re
6from typing import TYPE_CHECKING, Any, Iterable, Optional, Match
7from .util import expand_leading_tab, expand_tab, strip_end
9if TYPE_CHECKING:
10 from .block_parser import BlockParser
11 from .core import BlockState
13LIST_PATTERN = (
14 r"^(?P<list_1> {0,3})"
15 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])"
16 r"(?P<list_3>[ \t]*|[ \t].+)$"
17)
19_LINE_HAS_TEXT = re.compile(r"(\s*)\S")
22def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int:
23 """Parse tokens for ordered and unordered list."""
24 text = m.group("list_3")
25 if not text.strip():
26 # Example 285
27 # an empty list item cannot interrupt a paragraph
28 end_pos = state.append_paragraph()
29 if end_pos:
30 return end_pos
32 marker = m.group("list_2")
33 ordered = len(marker) > 1
34 depth = state.depth()
35 token: dict[str, Any] = {
36 "type": "list",
37 "children": [],
38 "tight": True,
39 "bullet": marker[-1],
40 "attrs": {
41 "depth": depth,
42 "ordered": ordered,
43 },
44 }
45 if ordered:
46 start = int(marker[:-1])
47 if start != 1:
48 # Example 304
49 # we allow only lists starting with 1 to interrupt paragraphs
50 end_pos = state.append_paragraph()
51 if end_pos:
52 return end_pos
53 token["attrs"]["start"] = start
55 state.cursor = m.end() + 1
56 groups: Optional[tuple[str, str, str]] = (m.group("list_1"), marker, text)
58 if depth >= block.max_nested_level - 1:
59 rules = list(block.list_rules)
60 rules.remove("list")
61 else:
62 rules = block.list_rules
64 bullet = _get_list_bullet(marker[-1])
65 while groups:
66 groups = _parse_list_item(block, bullet, groups, token, state, rules)
68 end_pos = token.pop("_end_pos", None)
69 _transform_tight_list(token)
70 if end_pos:
71 index = token.pop("_tok_index")
72 state.tokens.insert(index, token)
73 return end_pos # type: ignore[no-any-return]
75 state.append_token(token)
76 return state.cursor
79def _transform_tight_list(token: dict[str, Any]) -> None:
80 if token["tight"]:
81 # reset tight list item
82 for list_item in token["children"]:
83 for tok in list_item["children"]:
84 if tok["type"] == "paragraph":
85 tok["type"] = "block_text"
86 elif tok["type"] == "list":
87 _transform_tight_list(tok)
90def _parse_list_item(
91 block: "BlockParser",
92 bullet: str,
93 groups: tuple[str, str, str],
94 token: dict[str, Any],
95 state: "BlockState",
96 rules: list[str],
97) -> tuple[str, str, str] | None:
98 spaces, marker, text = groups
100 leading_width = len(spaces) + len(marker)
101 text, continue_width = _compile_continue_width(text, leading_width)
102 item_pattern = _compile_list_item_pattern(bullet, leading_width)
103 list_item_breaks = [
104 "thematic_break",
105 "fenced_code",
106 "atx_heading",
107 "block_quote",
108 "block_html",
109 "list",
110 ]
111 if "fenced_directive" in block.specification:
112 list_item_breaks.insert(1, "fenced_directive")
114 pairs = [(name, block.specification[name]) for name in list_item_breaks]
115 if leading_width < 3:
116 _repl_w = str(leading_width)
117 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs]
119 pairs.insert(1, ("list_item", item_pattern))
120 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs)
121 sc = re.compile(regex, re.M)
123 src = ""
124 next_group = None
125 prev_blank_line = False
126 pos = state.cursor
128 continue_space = " " * continue_width
129 while pos < state.cursor_max:
130 pos = state.find_line_end()
131 line = state.get_text(pos)
132 if block.BLANK_LINE.match(line):
133 src += "\n"
134 prev_blank_line = True
135 state.cursor = pos
136 continue
138 line = expand_leading_tab(line)
139 if line.startswith(continue_space):
140 if prev_blank_line and not text and not src.strip():
141 # Example 280
142 # A list item can begin with at most one blank line
143 break
145 src += line
146 prev_blank_line = False
147 state.cursor = pos
148 continue
150 m = sc.match(state.src, state.cursor)
151 if m:
152 tok_type = m.lastgroup
153 if tok_type == "list_item":
154 if prev_blank_line:
155 token["tight"] = False
156 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3"))
157 state.cursor = m.end() + 1
158 break
160 if tok_type == "list":
161 break
163 tok_index = len(state.tokens)
164 end_pos = block.parse_method(m, state)
165 if end_pos:
166 token["_tok_index"] = tok_index
167 token["_end_pos"] = end_pos
168 break
170 if prev_blank_line and not line.startswith(continue_space):
171 # not a continue line, and previous line is blank
172 break
174 src += line
175 state.cursor = pos
177 text += _clean_list_item_text(src, continue_width)
178 child = state.child_state(strip_end(text))
180 block.parse(child, rules)
182 if token["tight"] and _is_loose_list(child.tokens):
183 token["tight"] = False
185 token["children"].append(
186 {
187 "type": "list_item",
188 "children": child.tokens,
189 }
190 )
191 if next_group:
192 return next_group
194 return None
197def _get_list_bullet(c: str) -> str:
198 if c == ".":
199 bullet = r"\d{0,9}\."
200 elif c == ")":
201 bullet = r"\d{0,9}\)"
202 elif c == "*":
203 bullet = r"\*"
204 elif c == "+":
205 bullet = r"\+"
206 else:
207 bullet = "-"
208 return bullet
211def _compile_list_item_pattern(bullet: str, leading_width: int) -> str:
212 if leading_width > 3:
213 leading_width = 3
214 return (
215 r"^(?P<listitem_1> {0," + str(leading_width) + "})"
216 r"(?P<listitem_2>" + bullet + ")"
217 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$"
218 )
221def _compile_continue_width(text: str, leading_width: int) -> tuple[str, int]:
222 text = expand_leading_tab(text, 3)
223 text = expand_tab(text)
225 m2 = _LINE_HAS_TEXT.match(text)
226 if m2:
227 # indent code, startswith 5 spaces
228 if text.startswith(" "):
229 space_width = 1
230 else:
231 space_width = len(m2.group(1))
233 text = text[space_width:] + "\n"
234 else:
235 space_width = 1
236 text = ""
238 continue_width = leading_width + space_width
239 return text, continue_width
242def _clean_list_item_text(src: str, continue_width: int) -> str:
243 # according to Example 7, tab should be treated as 3 spaces
244 rv = []
245 trim_space = " " * continue_width
246 lines = src.split("\n")
247 for line in lines:
248 if line.startswith(trim_space):
249 line = line.replace(trim_space, "", 1)
250 # according to CommonMark Example 5
251 # tab should be treated as 4 spaces
252 line = expand_tab(line)
253 rv.append(line)
254 else:
255 rv.append(line)
257 return "\n".join(rv)
260def _is_loose_list(tokens: Iterable[dict[str, Any]]) -> bool:
261 paragraph_count = 0
262 for tok in tokens:
263 if tok["type"] == "blank_line":
264 return True
265 if tok["type"] == "paragraph":
266 paragraph_count += 1
267 if paragraph_count > 1:
268 return True
269 return False