Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/list_parser.py: 100%
155 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1import re
2from .core import BlockState
3from .util import (
4 strip_end,
5 expand_tab,
6 expand_leading_tab,
7)
8# because list is complex, split list parser in a new file
10LIST_PATTERN = (
11 r'^(?P<list_1> {0,3})'
12 r'(?P<list_2>[\*\+-]|\d{1,9}[.)])'
13 r'(?P<list_3>[ \t]*|[ \t].+)$'
14)
16_LINE_HAS_TEXT = re.compile(r'( *)\S')
19def parse_list(block, m: re.Match, state: BlockState) -> int:
20 """Parse tokens for ordered and unordered list."""
21 text = m.group('list_3')
22 if not text.strip():
23 # Example 285
24 # an empty list item cannot interrupt a paragraph
25 end_pos = state.append_paragraph()
26 if end_pos:
27 return end_pos
29 marker = m.group('list_2')
30 ordered = len(marker) > 1
31 depth = state.depth()
32 token = {
33 'type': 'list',
34 'children': [],
35 'tight': True,
36 'bullet': marker[-1],
37 'attrs': {
38 'depth': depth,
39 'ordered': ordered,
40 },
41 }
42 if ordered:
43 start = int(marker[:-1])
44 if start != 1:
45 # Example 304
46 # we allow only lists starting with 1 to interrupt paragraphs
47 end_pos = state.append_paragraph()
48 if end_pos:
49 return end_pos
50 token['attrs']['start'] = start
52 state.cursor = m.end() + 1
53 groups = (m.group('list_1'), marker, text)
55 if depth >= block.max_nested_level - 1:
56 rules = list(block.list_rules)
57 rules.remove('list')
58 else:
59 rules = block.list_rules
61 bullet = _get_list_bullet(marker[-1])
62 while groups:
63 groups = _parse_list_item(block, bullet, groups, token, state, rules)
65 end_pos = token.pop('_end_pos', None)
66 _transform_tight_list(token)
67 if end_pos:
68 index = token.pop('_tok_index')
69 state.tokens.insert(index, token)
70 return end_pos
72 state.append_token(token)
73 return state.cursor
76def _transform_tight_list(token):
77 if token['tight']:
78 # reset tight list item
79 for list_item in token['children']:
80 for tok in list_item['children']:
81 if tok['type'] == 'paragraph':
82 tok['type'] = 'block_text'
83 elif tok['type'] == 'list':
84 _transform_tight_list(tok)
87def _parse_list_item(block, bullet, groups, token, state, rules):
88 spaces, marker, text = groups
90 leading_width = len(spaces) + len(marker)
91 text, continue_width = _compile_continue_width(text, leading_width)
92 item_pattern = _compile_list_item_pattern(bullet, leading_width)
93 pairs = [
94 ('thematic_break', block.specification['thematic_break']),
95 ('fenced_code', block.specification['fenced_code']),
96 ('axt_heading', block.specification['axt_heading']),
97 ('block_quote', block.specification['block_quote']),
98 ('block_html', block.specification['block_html']),
99 ('list', block.specification['list']),
100 ]
101 if leading_width < 3:
102 _repl_w = str(leading_width)
103 pairs = [(n, p.replace('3', _repl_w, 1)) for n, p in pairs]
105 pairs.insert(1, ('list_item', item_pattern))
106 regex = '|'.join(r'(?P<%s>(?<=\n)%s)' % pair for pair in pairs)
107 sc = re.compile(regex, re.M)
109 src = ''
110 next_group = None
111 prev_blank_line = False
112 pos = state.cursor
114 continue_space = ' ' * continue_width
115 while pos < state.cursor_max:
116 pos = state.find_line_end()
117 line = state.get_text(pos)
118 if block.BLANK_LINE.match(line):
119 src += '\n'
120 prev_blank_line = True
121 state.cursor = pos
122 continue
124 line = expand_leading_tab(line)
125 if line.startswith(continue_space):
126 if prev_blank_line and not text and not src.strip():
127 # Example 280
128 # A list item can begin with at most one blank line
129 break
131 src += line
132 prev_blank_line = False
133 state.cursor = pos
134 continue
136 m = sc.match(state.src, state.cursor)
137 if m:
138 tok_type = m.lastgroup
139 if tok_type == 'list_item':
140 if prev_blank_line:
141 token['tight'] = False
142 next_group = (
143 m.group('listitem_1'),
144 m.group('listitem_2'),
145 m.group('listitem_3')
146 )
147 state.cursor = m.end() + 1
148 break
149 tok_index = len(state.tokens)
150 end_pos = block.parse_method(m, state)
151 if end_pos:
152 token['_tok_index'] = tok_index
153 token['_end_pos'] = end_pos
154 break
156 if prev_blank_line and not line.startswith(continue_space):
157 # not a continue line, and previous line is blank
158 break
160 src += line
161 state.cursor = pos
163 text += _clean_list_item_text(src, continue_width)
164 child = state.child_state(strip_end(text))
166 block.parse(child, rules)
168 if token['tight'] and _is_loose_list(child.tokens):
169 token['tight'] = False
171 token['children'].append({
172 'type': 'list_item',
173 'children': child.tokens,
174 })
175 if next_group:
176 return next_group
179def _get_list_bullet(c):
180 if c == '.':
181 bullet = r'\d{0,9}\.'
182 elif c == ')':
183 bullet = r'\d{0,9}\)'
184 elif c == '*':
185 bullet = r'\*'
186 elif c == '+':
187 bullet = r'\+'
188 else:
189 bullet = '-'
190 return bullet
193def _compile_list_item_pattern(bullet, leading_width):
194 if leading_width > 3:
195 leading_width = 3
196 return (
197 r'^(?P<listitem_1> {0,' + str(leading_width) + '})'
198 r'(?P<listitem_2>' + bullet + ')'
199 r'(?P<listitem_3>[ \t]*|[ \t][^\n]+)$'
200 )
203def _compile_continue_width(text, leading_width):
204 text = expand_leading_tab(text, 3)
205 text = expand_tab(text)
207 m2 = _LINE_HAS_TEXT.match(text)
208 if m2:
209 # indent code, startswith 5 spaces
210 if text.startswith(' '):
211 space_width = 1
212 else:
213 space_width = len(m2.group(1))
215 text = text[space_width:] + '\n'
216 else:
217 space_width = 1
218 text = ''
220 continue_width = leading_width + space_width
221 return text, continue_width
224def _clean_list_item_text(src, continue_width):
225 # according to Example 7, tab should be treated as 3 spaces
226 rv = []
227 trim_space = ' ' * continue_width
228 lines = src.split('\n')
229 for line in lines:
230 if line.startswith(trim_space):
231 line = line.replace(trim_space, '', 1)
232 # according to CommonMark Example 5
233 # tab should be treated as 4 spaces
234 line = expand_tab(line)
235 rv.append(line)
236 else:
237 rv.append(line)
239 return '\n'.join(rv)
242def _is_loose_list(tokens):
243 paragraph_count = 0
244 for tok in tokens:
245 if tok['type'] == 'blank_line':
246 return True
247 if tok['type'] == 'paragraph':
248 paragraph_count += 1
249 if paragraph_count > 1:
250 return True