Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/list_parser.py: 100%

155 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1import re 

2from .core import BlockState 

3from .util import ( 

4 strip_end, 

5 expand_tab, 

6 expand_leading_tab, 

7) 

8# because list is complex, split list parser in a new file 

9 

10LIST_PATTERN = ( 

11 r'^(?P<list_1> {0,3})' 

12 r'(?P<list_2>[\*\+-]|\d{1,9}[.)])' 

13 r'(?P<list_3>[ \t]*|[ \t].+)$' 

14) 

15 

16_LINE_HAS_TEXT = re.compile(r'( *)\S') 

17 

18 

19def parse_list(block, m: re.Match, state: BlockState) -> int: 

20 """Parse tokens for ordered and unordered list.""" 

21 text = m.group('list_3') 

22 if not text.strip(): 

23 # Example 285 

24 # an empty list item cannot interrupt a paragraph 

25 end_pos = state.append_paragraph() 

26 if end_pos: 

27 return end_pos 

28 

29 marker = m.group('list_2') 

30 ordered = len(marker) > 1 

31 depth = state.depth() 

32 token = { 

33 'type': 'list', 

34 'children': [], 

35 'tight': True, 

36 'bullet': marker[-1], 

37 'attrs': { 

38 'depth': depth, 

39 'ordered': ordered, 

40 }, 

41 } 

42 if ordered: 

43 start = int(marker[:-1]) 

44 if start != 1: 

45 # Example 304 

46 # we allow only lists starting with 1 to interrupt paragraphs 

47 end_pos = state.append_paragraph() 

48 if end_pos: 

49 return end_pos 

50 token['attrs']['start'] = start 

51 

52 state.cursor = m.end() + 1 

53 groups = (m.group('list_1'), marker, text) 

54 

55 if depth >= block.max_nested_level - 1: 

56 rules = list(block.list_rules) 

57 rules.remove('list') 

58 else: 

59 rules = block.list_rules 

60 

61 bullet = _get_list_bullet(marker[-1]) 

62 while groups: 

63 groups = _parse_list_item(block, bullet, groups, token, state, rules) 

64 

65 end_pos = token.pop('_end_pos', None) 

66 _transform_tight_list(token) 

67 if end_pos: 

68 index = token.pop('_tok_index') 

69 state.tokens.insert(index, token) 

70 return end_pos 

71 

72 state.append_token(token) 

73 return state.cursor 

74 

75 

76def _transform_tight_list(token): 

77 if token['tight']: 

78 # reset tight list item 

79 for list_item in token['children']: 

80 for tok in list_item['children']: 

81 if tok['type'] == 'paragraph': 

82 tok['type'] = 'block_text' 

83 elif tok['type'] == 'list': 

84 _transform_tight_list(tok) 

85 

86 

87def _parse_list_item(block, bullet, groups, token, state, rules): 

88 spaces, marker, text = groups 

89 

90 leading_width = len(spaces) + len(marker) 

91 text, continue_width = _compile_continue_width(text, leading_width) 

92 item_pattern = _compile_list_item_pattern(bullet, leading_width) 

93 pairs = [ 

94 ('thematic_break', block.specification['thematic_break']), 

95 ('fenced_code', block.specification['fenced_code']), 

96 ('axt_heading', block.specification['axt_heading']), 

97 ('block_quote', block.specification['block_quote']), 

98 ('block_html', block.specification['block_html']), 

99 ('list', block.specification['list']), 

100 ] 

101 if leading_width < 3: 

102 _repl_w = str(leading_width) 

103 pairs = [(n, p.replace('3', _repl_w, 1)) for n, p in pairs] 

104 

105 pairs.insert(1, ('list_item', item_pattern)) 

106 regex = '|'.join(r'(?P<%s>(?<=\n)%s)' % pair for pair in pairs) 

107 sc = re.compile(regex, re.M) 

108 

109 src = '' 

110 next_group = None 

111 prev_blank_line = False 

112 pos = state.cursor 

113 

114 continue_space = ' ' * continue_width 

115 while pos < state.cursor_max: 

116 pos = state.find_line_end() 

117 line = state.get_text(pos) 

118 if block.BLANK_LINE.match(line): 

119 src += '\n' 

120 prev_blank_line = True 

121 state.cursor = pos 

122 continue 

123 

124 line = expand_leading_tab(line) 

125 if line.startswith(continue_space): 

126 if prev_blank_line and not text and not src.strip(): 

127 # Example 280 

128 # A list item can begin with at most one blank line 

129 break 

130 

131 src += line 

132 prev_blank_line = False 

133 state.cursor = pos 

134 continue 

135 

136 m = sc.match(state.src, state.cursor) 

137 if m: 

138 tok_type = m.lastgroup 

139 if tok_type == 'list_item': 

140 if prev_blank_line: 

141 token['tight'] = False 

142 next_group = ( 

143 m.group('listitem_1'), 

144 m.group('listitem_2'), 

145 m.group('listitem_3') 

146 ) 

147 state.cursor = m.end() + 1 

148 break 

149 tok_index = len(state.tokens) 

150 end_pos = block.parse_method(m, state) 

151 if end_pos: 

152 token['_tok_index'] = tok_index 

153 token['_end_pos'] = end_pos 

154 break 

155 

156 if prev_blank_line and not line.startswith(continue_space): 

157 # not a continue line, and previous line is blank 

158 break 

159 

160 src += line 

161 state.cursor = pos 

162 

163 text += _clean_list_item_text(src, continue_width) 

164 child = state.child_state(strip_end(text)) 

165 

166 block.parse(child, rules) 

167 

168 if token['tight'] and _is_loose_list(child.tokens): 

169 token['tight'] = False 

170 

171 token['children'].append({ 

172 'type': 'list_item', 

173 'children': child.tokens, 

174 }) 

175 if next_group: 

176 return next_group 

177 

178 

179def _get_list_bullet(c): 

180 if c == '.': 

181 bullet = r'\d{0,9}\.' 

182 elif c == ')': 

183 bullet = r'\d{0,9}\)' 

184 elif c == '*': 

185 bullet = r'\*' 

186 elif c == '+': 

187 bullet = r'\+' 

188 else: 

189 bullet = '-' 

190 return bullet 

191 

192 

193def _compile_list_item_pattern(bullet, leading_width): 

194 if leading_width > 3: 

195 leading_width = 3 

196 return ( 

197 r'^(?P<listitem_1> {0,' + str(leading_width) + '})' 

198 r'(?P<listitem_2>' + bullet + ')' 

199 r'(?P<listitem_3>[ \t]*|[ \t][^\n]+)$' 

200 ) 

201 

202 

203def _compile_continue_width(text, leading_width): 

204 text = expand_leading_tab(text, 3) 

205 text = expand_tab(text) 

206 

207 m2 = _LINE_HAS_TEXT.match(text) 

208 if m2: 

209 # indent code, startswith 5 spaces 

210 if text.startswith(' '): 

211 space_width = 1 

212 else: 

213 space_width = len(m2.group(1)) 

214 

215 text = text[space_width:] + '\n' 

216 else: 

217 space_width = 1 

218 text = '' 

219 

220 continue_width = leading_width + space_width 

221 return text, continue_width 

222 

223 

224def _clean_list_item_text(src, continue_width): 

225 # according to Example 7, tab should be treated as 3 spaces 

226 rv = [] 

227 trim_space = ' ' * continue_width 

228 lines = src.split('\n') 

229 for line in lines: 

230 if line.startswith(trim_space): 

231 line = line.replace(trim_space, '', 1) 

232 # according to CommonMark Example 5 

233 # tab should be treated as 4 spaces 

234 line = expand_tab(line) 

235 rv.append(line) 

236 else: 

237 rv.append(line) 

238 

239 return '\n'.join(rv) 

240 

241 

242def _is_loose_list(tokens): 

243 paragraph_count = 0 

244 for tok in tokens: 

245 if tok['type'] == 'blank_line': 

246 return True 

247 if tok['type'] == 'paragraph': 

248 paragraph_count += 1 

249 if paragraph_count > 1: 

250 return True