1"""because list is complex, split list parser in a new file"""
2
3import re
4from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match
5from .util import expand_leading_tab, expand_tab, strip_end
6
7if TYPE_CHECKING:
8 from .block_parser import BlockParser
9 from .core import BlockState
10
11LIST_PATTERN = (
12 r"^(?P<list_1> {0,3})"
13 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])"
14 r"(?P<list_3>[ \t]*|[ \t].+)$"
15)
16
17_LINE_HAS_TEXT = re.compile(r"(\s*)\S")
18
19
20def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int:
21 """Parse tokens for ordered and unordered list."""
22 text = m.group("list_3")
23 if not text.strip():
24 # Example 285
25 # an empty list item cannot interrupt a paragraph
26 end_pos = state.append_paragraph()
27 if end_pos:
28 return end_pos
29
30 marker = m.group("list_2")
31 ordered = len(marker) > 1
32 depth = state.depth()
33 token: Dict[str, Any] = {
34 "type": "list",
35 "children": [],
36 "tight": True,
37 "bullet": marker[-1],
38 "attrs": {
39 "depth": depth,
40 "ordered": ordered,
41 },
42 }
43 if ordered:
44 start = int(marker[:-1])
45 if start != 1:
46 # Example 304
47 # we allow only lists starting with 1 to interrupt paragraphs
48 end_pos = state.append_paragraph()
49 if end_pos:
50 return end_pos
51 token["attrs"]["start"] = start
52
53 state.cursor = m.end() + 1
54 groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text)
55
56 if depth >= block.max_nested_level - 1:
57 rules = list(block.list_rules)
58 rules.remove("list")
59 else:
60 rules = block.list_rules
61
62 bullet = _get_list_bullet(marker[-1])
63 while groups:
64 groups = _parse_list_item(block, bullet, groups, token, state, rules)
65
66 end_pos = token.pop("_end_pos", None)
67 _transform_tight_list(token)
68 if end_pos:
69 index = token.pop("_tok_index")
70 state.tokens.insert(index, token)
71 return end_pos
72
73 state.append_token(token)
74 return state.cursor
75
76
77def _transform_tight_list(token: Dict[str, Any]) -> None:
78 if token["tight"]:
79 # reset tight list item
80 for list_item in token["children"]:
81 for tok in list_item["children"]:
82 if tok["type"] == "paragraph":
83 tok["type"] = "block_text"
84 elif tok["type"] == "list":
85 _transform_tight_list(tok)
86
87
88def _parse_list_item(
89 block: "BlockParser",
90 bullet: str,
91 groups: Tuple[str, str, str],
92 token: Dict[str, Any],
93 state: "BlockState",
94 rules: List[str],
95) -> Optional[Tuple[str, str, str]]:
96 spaces, marker, text = groups
97
98 leading_width = len(spaces) + len(marker)
99 text, continue_width = _compile_continue_width(text, leading_width)
100 item_pattern = _compile_list_item_pattern(bullet, leading_width)
101 pairs = [
102 ("thematic_break", block.specification["thematic_break"]),
103 ("fenced_code", block.specification["fenced_code"]),
104 ("atx_heading", block.specification["atx_heading"]),
105 ("block_quote", block.specification["block_quote"]),
106 ("block_html", block.specification["block_html"]),
107 ("list", block.specification["list"]),
108 ]
109 if leading_width < 3:
110 _repl_w = str(leading_width)
111 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs]
112
113 pairs.insert(1, ("list_item", item_pattern))
114 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs)
115 sc = re.compile(regex, re.M)
116
117 src = ""
118 next_group = None
119 prev_blank_line = False
120 pos = state.cursor
121
122 continue_space = " " * continue_width
123 while pos < state.cursor_max:
124 pos = state.find_line_end()
125 line = state.get_text(pos)
126 if block.BLANK_LINE.match(line):
127 src += "\n"
128 prev_blank_line = True
129 state.cursor = pos
130 continue
131
132 line = expand_leading_tab(line)
133 if line.startswith(continue_space):
134 if prev_blank_line and not text and not src.strip():
135 # Example 280
136 # A list item can begin with at most one blank line
137 break
138
139 src += line
140 prev_blank_line = False
141 state.cursor = pos
142 continue
143
144 m = sc.match(state.src, state.cursor)
145 if m:
146 tok_type = m.lastgroup
147 if tok_type == "list_item":
148 if prev_blank_line:
149 token["tight"] = False
150 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3"))
151 state.cursor = m.end() + 1
152 break
153
154 if tok_type == "list":
155 break
156
157 tok_index = len(state.tokens)
158 end_pos = block.parse_method(m, state)
159 if end_pos:
160 token["_tok_index"] = tok_index
161 token["_end_pos"] = end_pos
162 break
163
164 if prev_blank_line and not line.startswith(continue_space):
165 # not a continue line, and previous line is blank
166 break
167
168 src += line
169 state.cursor = pos
170
171 text += _clean_list_item_text(src, continue_width)
172 child = state.child_state(strip_end(text))
173
174 block.parse(child, rules)
175
176 if token["tight"] and _is_loose_list(child.tokens):
177 token["tight"] = False
178
179 token["children"].append(
180 {
181 "type": "list_item",
182 "children": child.tokens,
183 }
184 )
185 if next_group:
186 return next_group
187
188 return None
189
190
191def _get_list_bullet(c: str) -> str:
192 if c == ".":
193 bullet = r"\d{0,9}\."
194 elif c == ")":
195 bullet = r"\d{0,9}\)"
196 elif c == "*":
197 bullet = r"\*"
198 elif c == "+":
199 bullet = r"\+"
200 else:
201 bullet = "-"
202 return bullet
203
204
205def _compile_list_item_pattern(bullet: str, leading_width: int) -> str:
206 if leading_width > 3:
207 leading_width = 3
208 return (
209 r"^(?P<listitem_1> {0," + str(leading_width) + "})"
210 r"(?P<listitem_2>" + bullet + ")"
211 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$"
212 )
213
214
215def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]:
216 text = expand_leading_tab(text, 3)
217 text = expand_tab(text)
218
219 m2 = _LINE_HAS_TEXT.match(text)
220 if m2:
221 # indent code, startswith 5 spaces
222 if text.startswith(" "):
223 space_width = 1
224 else:
225 space_width = len(m2.group(1))
226
227 text = text[space_width:] + "\n"
228 else:
229 space_width = 1
230 text = ""
231
232 continue_width = leading_width + space_width
233 return text, continue_width
234
235
236def _clean_list_item_text(src: str, continue_width: int) -> str:
237 # according to Example 7, tab should be treated as 3 spaces
238 rv = []
239 trim_space = " " * continue_width
240 lines = src.split("\n")
241 for line in lines:
242 if line.startswith(trim_space):
243 line = line.replace(trim_space, "", 1)
244 # according to CommonMark Example 5
245 # tab should be treated as 4 spaces
246 line = expand_tab(line)
247 rv.append(line)
248 else:
249 rv.append(line)
250
251 return "\n".join(rv)
252
253
254def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool:
255 paragraph_count = 0
256 for tok in tokens:
257 if tok["type"] == "blank_line":
258 return True
259 if tok["type"] == "paragraph":
260 paragraph_count += 1
261 if paragraph_count > 1:
262 return True
263 return False