1# Lists
2import logging
3
4from ..common.utils import isStrSpace
5from .state_block import StateBlock
6
7LOGGER = logging.getLogger(__name__)
8
9
10# Search `[-+*][\n ]`, returns next pos after marker on success
11# or -1 on fail.
12def skipBulletListMarker(state: StateBlock, startLine: int) -> int:
13 pos = state.bMarks[startLine] + state.tShift[startLine]
14 maximum = state.eMarks[startLine]
15
16 try:
17 marker = state.src[pos]
18 except IndexError:
19 return -1
20 pos += 1
21
22 if marker not in ("*", "-", "+"):
23 return -1
24
25 if pos < maximum:
26 ch = state.src[pos]
27
28 if not isStrSpace(ch):
29 # " -test " - is not a list item
30 return -1
31
32 return pos
33
34
35# Search `\d+[.)][\n ]`, returns next pos after marker on success
36# or -1 on fail.
37def skipOrderedListMarker(state: StateBlock, startLine: int) -> int:
38 start = state.bMarks[startLine] + state.tShift[startLine]
39 pos = start
40 maximum = state.eMarks[startLine]
41
42 # List marker should have at least 2 chars (digit + dot)
43 if pos + 1 >= maximum:
44 return -1
45
46 ch = state.src[pos]
47 pos += 1
48
49 ch_ord = ord(ch)
50 # /* 0 */ /* 9 */
51 if ch_ord < 0x30 or ch_ord > 0x39:
52 return -1
53
54 while True:
55 # EOL -> fail
56 if pos >= maximum:
57 return -1
58
59 ch = state.src[pos]
60 pos += 1
61
62 # /* 0 */ /* 9 */
63 ch_ord = ord(ch)
64 if ch_ord >= 0x30 and ch_ord <= 0x39:
65 # List marker should have no more than 9 digits
66 # (prevents integer overflow in browsers)
67 if pos - start >= 10:
68 return -1
69
70 continue
71
72 # found valid marker
73 if ch in (")", "."):
74 break
75
76 return -1
77
78 if pos < maximum:
79 ch = state.src[pos]
80
81 if not isStrSpace(ch):
82 # " 1.test " - is not a list item
83 return -1
84
85 return pos
86
87
88def markTightParagraphs(state: StateBlock, idx: int) -> None:
89 level = state.level + 2
90
91 i = idx + 2
92 length = len(state.tokens) - 2
93 while i < length:
94 if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
95 state.tokens[i + 2].hidden = True
96 state.tokens[i].hidden = True
97 i += 2
98 i += 1
99
100
101def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
102 LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
103
104 isTerminatingParagraph = False
105 tight = True
106
107 if state.is_code_block(startLine):
108 return False
109
110 # Special case:
111 # - item 1
112 # - item 2
113 # - item 3
114 # - item 4
115 # - this one is a paragraph continuation
116 if (
117 state.listIndent >= 0
118 and state.sCount[startLine] - state.listIndent >= 4
119 and state.sCount[startLine] < state.blkIndent
120 ):
121 return False
122
123 # limit conditions when list can interrupt
124 # a paragraph (validation mode only)
125 # Next list item should still terminate previous list item
126 #
127 # This code can fail if plugins use blkIndent as well as lists,
128 # but I hope the spec gets fixed long before that happens.
129 #
130 if (
131 silent
132 and state.parentType == "paragraph"
133 and state.sCount[startLine] >= state.blkIndent
134 ):
135 isTerminatingParagraph = True
136
137 # Detect list type and position after marker
138 posAfterMarker = skipOrderedListMarker(state, startLine)
139 if posAfterMarker >= 0:
140 isOrdered = True
141 start = state.bMarks[startLine] + state.tShift[startLine]
142 markerValue = int(state.src[start : posAfterMarker - 1])
143
144 # If we're starting a new ordered list right after
145 # a paragraph, it should start with 1.
146 if isTerminatingParagraph and markerValue != 1:
147 return False
148 else:
149 posAfterMarker = skipBulletListMarker(state, startLine)
150 if posAfterMarker >= 0:
151 isOrdered = False
152 else:
153 return False
154
155 # If we're starting a new unordered list right after
156 # a paragraph, first line should not be empty.
157 if (
158 isTerminatingParagraph
159 and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]
160 ):
161 return False
162
163 # We should terminate list on style change. Remember first one to compare.
164 markerChar = state.src[posAfterMarker - 1]
165
166 # For validation mode we can terminate immediately
167 if silent:
168 return True
169
170 # Start list
171 listTokIdx = len(state.tokens)
172
173 if isOrdered:
174 token = state.push("ordered_list_open", "ol", 1)
175 if markerValue != 1:
176 token.attrs = {"start": markerValue}
177
178 else:
179 token = state.push("bullet_list_open", "ul", 1)
180
181 token.map = listLines = [startLine, 0]
182 token.markup = markerChar
183
184 #
185 # Iterate list items
186 #
187
188 nextLine = startLine
189 prevEmptyEnd = False
190 terminatorRules = state.md.block.ruler.getRules("list")
191
192 oldParentType = state.parentType
193 state.parentType = "list"
194
195 while nextLine < endLine:
196 pos = posAfterMarker
197 maximum = state.eMarks[nextLine]
198
199 initial = offset = (
200 state.sCount[nextLine]
201 + posAfterMarker
202 - (state.bMarks[startLine] + state.tShift[startLine])
203 )
204
205 while pos < maximum:
206 ch = state.src[pos]
207
208 if ch == "\t":
209 offset += 4 - (offset + state.bsCount[nextLine]) % 4
210 elif ch == " ":
211 offset += 1
212 else:
213 break
214
215 pos += 1
216
217 contentStart = pos
218
219 # trimming space in "- \n 3" case, indent is 1 here
220 indentAfterMarker = 1 if contentStart >= maximum else offset - initial
221
222 # If we have more than 4 spaces, the indent is 1
223 # (the rest is just indented code block)
224 if indentAfterMarker > 4:
225 indentAfterMarker = 1
226
227 # " - test"
228 # ^^^^^ - calculating total length of this thing
229 indent = initial + indentAfterMarker
230
231 # Run subparser & write tokens
232 token = state.push("list_item_open", "li", 1)
233 token.markup = markerChar
234 token.map = itemLines = [startLine, 0]
235 if isOrdered:
236 token.info = state.src[start : posAfterMarker - 1]
237
238 # Detect GFM task checkbox: `[ ] ` or `[x] `/`[X] ` at content start
239 checkboxLen = 0
240 if state.md.options.get("tasklists", False) and contentStart < maximum:
241 checked = _detect_task_checkbox(state.src, contentStart, maximum)
242 if checked is not None:
243 token.meta = {"checked": checked}
244 # Advance content past the checkbox: `[x]` (3 chars) + whitespace.
245 # `_detect_task_checkbox` already guarantees a whitespace char at
246 # pos+3, so we always consume 4 characters.
247 checkboxLen = 4
248
249 # change current state, then restore it after parser subcall
250 oldTight = state.tight
251 oldBMark = state.bMarks[startLine]
252 oldTShift = state.tShift[startLine]
253 oldSCount = state.sCount[startLine]
254
255 # - example list
256 # ^ listIndent position will be here
257 # ^ blkIndent position will be here
258 #
259 oldListIndent = state.listIndent
260 state.listIndent = state.blkIndent
261 state.blkIndent = indent
262
263 state.tight = True
264 state.tShift[startLine] = contentStart - state.bMarks[startLine]
265 state.sCount[startLine] = offset
266
267 # If we detected a checkbox, advance bMarks past it so that
268 # getLines() doesn't include the checkbox text in the content.
269 if checkboxLen:
270 state.bMarks[startLine] = contentStart + checkboxLen
271 state.tShift[startLine] = 0
272
273 if contentStart >= maximum and state.isEmpty(startLine + 1):
274 # workaround for this case
275 # (list item is empty, list terminates before "foo"):
276 # ~~~~~~~~
277 # -
278 #
279 # foo
280 # ~~~~~~~~
281 state.line = min(state.line + 2, endLine)
282 else:
283 # NOTE in list.js this was:
284 # state.md.block.tokenize(state, startLine, endLine, True)
285 # but tokeniz does not take the final parameter
286 state.md.block.tokenize(state, startLine, endLine)
287
288 # If any of list item is tight, mark list as tight
289 if (not state.tight) or prevEmptyEnd:
290 tight = False
291
292 # Item become loose if finish with empty line,
293 # but we should filter last element, because it means list finish
294 prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
295
296 state.blkIndent = state.listIndent
297 state.listIndent = oldListIndent
298 if checkboxLen:
299 state.bMarks[startLine] = oldBMark
300 state.tShift[startLine] = oldTShift
301 state.sCount[startLine] = oldSCount
302 state.tight = oldTight
303
304 token = state.push("list_item_close", "li", -1)
305 token.markup = markerChar
306
307 nextLine = startLine = state.line
308 itemLines[1] = nextLine
309
310 if nextLine >= endLine:
311 break
312
313 contentStart = state.bMarks[startLine]
314
315 #
316 # Try to check if list is terminated or continued.
317 #
318 if state.sCount[nextLine] < state.blkIndent:
319 break
320
321 if state.is_code_block(startLine):
322 break
323
324 # fail if terminating block found
325 terminate = False
326 for terminatorRule in terminatorRules:
327 if terminatorRule(state, nextLine, endLine, True):
328 terminate = True
329 break
330
331 if terminate:
332 break
333
334 # fail if list has another type
335 if isOrdered:
336 posAfterMarker = skipOrderedListMarker(state, nextLine)
337 if posAfterMarker < 0:
338 break
339 start = state.bMarks[nextLine] + state.tShift[nextLine]
340 else:
341 posAfterMarker = skipBulletListMarker(state, nextLine)
342 if posAfterMarker < 0:
343 break
344
345 if markerChar != state.src[posAfterMarker - 1]:
346 break
347
348 # Finalize list
349
350 # If any direct list item has a task checkbox, add class to the list
351 if state.md.options.get("tasklists", False):
352 containsTask = False
353 level = state.tokens[listTokIdx].level
354 for j in range(listTokIdx + 1, len(state.tokens)):
355 tok = state.tokens[j]
356 if (
357 tok.level == level + 1
358 and tok.type == "list_item_open"
359 and tok.meta
360 and "checked" in tok.meta
361 ):
362 tok.attrJoin("class", "task-list-item")
363 containsTask = True
364 if containsTask:
365 state.tokens[listTokIdx].attrJoin("class", "contains-task-list")
366
367 if isOrdered:
368 token = state.push("ordered_list_close", "ol", -1)
369 else:
370 token = state.push("bullet_list_close", "ul", -1)
371
372 token.markup = markerChar
373
374 listLines[1] = nextLine
375 state.line = nextLine
376
377 state.parentType = oldParentType
378
379 # mark paragraphs tight if needed
380 if tight:
381 markTightParagraphs(state, listTokIdx)
382
383 return True
384
385
386def _detect_task_checkbox(src: str, pos: int, maximum: int) -> bool | None:
387 """Detect ``[ ]``, ``[x]``, or ``[X]`` at *pos*, followed by whitespace.
388
389 Returns ``True`` (checked), ``False`` (unchecked), or ``None`` (no match).
390 """
391 # Need at least 4 chars: `[`, char, `]`, whitespace
392 if pos + 4 > maximum:
393 return None
394 if src[pos] != "[":
395 return None
396 inner = src[pos + 1]
397 if src[pos + 2] != "]":
398 return None
399 if inner == " ":
400 checked = False
401 elif inner in ("x", "X"):
402 checked = True
403 else:
404 return None
405 # After `]`, must have whitespace
406 if src[pos + 3] not in (" ", "\t"):
407 return None
408 return checked