Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

163 statements  

1"""because list is complex, split list parser in a new file""" 

2 

3import re 

4from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match 

5from .util import expand_leading_tab, expand_tab, strip_end 

6 

7if TYPE_CHECKING: 

8 from .block_parser import BlockParser 

9 from .core import BlockState 

10 

11LIST_PATTERN = ( 

12 r"^(?P<list_1> {0,3})" 

13 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])" 

14 r"(?P<list_3>[ \t]*|[ \t].+)$" 

15) 

16 

17_LINE_HAS_TEXT = re.compile(r"(\s*)\S") 

18 

19 

20def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int: 

21 """Parse tokens for ordered and unordered list.""" 

22 text = m.group("list_3") 

23 if not text.strip(): 

24 # Example 285 

25 # an empty list item cannot interrupt a paragraph 

26 end_pos = state.append_paragraph() 

27 if end_pos: 

28 return end_pos 

29 

30 marker = m.group("list_2") 

31 ordered = len(marker) > 1 

32 depth = state.depth() 

33 token: Dict[str, Any] = { 

34 "type": "list", 

35 "children": [], 

36 "tight": True, 

37 "bullet": marker[-1], 

38 "attrs": { 

39 "depth": depth, 

40 "ordered": ordered, 

41 }, 

42 } 

43 if ordered: 

44 start = int(marker[:-1]) 

45 if start != 1: 

46 # Example 304 

47 # we allow only lists starting with 1 to interrupt paragraphs 

48 end_pos = state.append_paragraph() 

49 if end_pos: 

50 return end_pos 

51 token["attrs"]["start"] = start 

52 

53 state.cursor = m.end() + 1 

54 groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text) 

55 

56 if depth >= block.max_nested_level - 1: 

57 rules = list(block.list_rules) 

58 rules.remove("list") 

59 else: 

60 rules = block.list_rules 

61 

62 bullet = _get_list_bullet(marker[-1]) 

63 while groups: 

64 groups = _parse_list_item(block, bullet, groups, token, state, rules) 

65 

66 end_pos = token.pop("_end_pos", None) 

67 _transform_tight_list(token) 

68 if end_pos: 

69 index = token.pop("_tok_index") 

70 state.tokens.insert(index, token) 

71 return end_pos 

72 

73 state.append_token(token) 

74 return state.cursor 

75 

76 

77def _transform_tight_list(token: Dict[str, Any]) -> None: 

78 if token["tight"]: 

79 # reset tight list item 

80 for list_item in token["children"]: 

81 for tok in list_item["children"]: 

82 if tok["type"] == "paragraph": 

83 tok["type"] = "block_text" 

84 elif tok["type"] == "list": 

85 _transform_tight_list(tok) 

86 

87 

88def _parse_list_item( 

89 block: "BlockParser", 

90 bullet: str, 

91 groups: Tuple[str, str, str], 

92 token: Dict[str, Any], 

93 state: "BlockState", 

94 rules: List[str], 

95) -> Optional[Tuple[str, str, str]]: 

96 spaces, marker, text = groups 

97 

98 leading_width = len(spaces) + len(marker) 

99 text, continue_width = _compile_continue_width(text, leading_width) 

100 item_pattern = _compile_list_item_pattern(bullet, leading_width) 

101 pairs = [ 

102 ("thematic_break", block.specification["thematic_break"]), 

103 ("fenced_code", block.specification["fenced_code"]), 

104 ("atx_heading", block.specification["atx_heading"]), 

105 ("block_quote", block.specification["block_quote"]), 

106 ("block_html", block.specification["block_html"]), 

107 ("list", block.specification["list"]), 

108 ] 

109 if leading_width < 3: 

110 _repl_w = str(leading_width) 

111 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs] 

112 

113 pairs.insert(1, ("list_item", item_pattern)) 

114 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs) 

115 sc = re.compile(regex, re.M) 

116 

117 src = "" 

118 next_group = None 

119 prev_blank_line = False 

120 pos = state.cursor 

121 

122 continue_space = " " * continue_width 

123 while pos < state.cursor_max: 

124 pos = state.find_line_end() 

125 line = state.get_text(pos) 

126 if block.BLANK_LINE.match(line): 

127 src += "\n" 

128 prev_blank_line = True 

129 state.cursor = pos 

130 continue 

131 

132 line = expand_leading_tab(line) 

133 if line.startswith(continue_space): 

134 if prev_blank_line and not text and not src.strip(): 

135 # Example 280 

136 # A list item can begin with at most one blank line 

137 break 

138 

139 src += line 

140 prev_blank_line = False 

141 state.cursor = pos 

142 continue 

143 

144 m = sc.match(state.src, state.cursor) 

145 if m: 

146 tok_type = m.lastgroup 

147 if tok_type == "list_item": 

148 if prev_blank_line: 

149 token["tight"] = False 

150 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3")) 

151 state.cursor = m.end() + 1 

152 break 

153 

154 if tok_type == "list": 

155 break 

156 

157 tok_index = len(state.tokens) 

158 end_pos = block.parse_method(m, state) 

159 if end_pos: 

160 token["_tok_index"] = tok_index 

161 token["_end_pos"] = end_pos 

162 break 

163 

164 if prev_blank_line and not line.startswith(continue_space): 

165 # not a continue line, and previous line is blank 

166 break 

167 

168 src += line 

169 state.cursor = pos 

170 

171 text += _clean_list_item_text(src, continue_width) 

172 child = state.child_state(strip_end(text)) 

173 

174 block.parse(child, rules) 

175 

176 if token["tight"] and _is_loose_list(child.tokens): 

177 token["tight"] = False 

178 

179 token["children"].append( 

180 { 

181 "type": "list_item", 

182 "children": child.tokens, 

183 } 

184 ) 

185 if next_group: 

186 return next_group 

187 

188 return None 

189 

190 

191def _get_list_bullet(c: str) -> str: 

192 if c == ".": 

193 bullet = r"\d{0,9}\." 

194 elif c == ")": 

195 bullet = r"\d{0,9}\)" 

196 elif c == "*": 

197 bullet = r"\*" 

198 elif c == "+": 

199 bullet = r"\+" 

200 else: 

201 bullet = "-" 

202 return bullet 

203 

204 

205def _compile_list_item_pattern(bullet: str, leading_width: int) -> str: 

206 if leading_width > 3: 

207 leading_width = 3 

208 return ( 

209 r"^(?P<listitem_1> {0," + str(leading_width) + "})" 

210 r"(?P<listitem_2>" + bullet + ")" 

211 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$" 

212 ) 

213 

214 

215def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]: 

216 text = expand_leading_tab(text, 3) 

217 text = expand_tab(text) 

218 

219 m2 = _LINE_HAS_TEXT.match(text) 

220 if m2: 

221 # indent code, startswith 5 spaces 

222 if text.startswith(" "): 

223 space_width = 1 

224 else: 

225 space_width = len(m2.group(1)) 

226 

227 text = text[space_width:] + "\n" 

228 else: 

229 space_width = 1 

230 text = "" 

231 

232 continue_width = leading_width + space_width 

233 return text, continue_width 

234 

235 

236def _clean_list_item_text(src: str, continue_width: int) -> str: 

237 # according to Example 7, tab should be treated as 3 spaces 

238 rv = [] 

239 trim_space = " " * continue_width 

240 lines = src.split("\n") 

241 for line in lines: 

242 if line.startswith(trim_space): 

243 line = line.replace(trim_space, "", 1) 

244 # according to CommonMark Example 5 

245 # tab should be treated as 4 spaces 

246 line = expand_tab(line) 

247 rv.append(line) 

248 else: 

249 rv.append(line) 

250 

251 return "\n".join(rv) 

252 

253 

254def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool: 

255 paragraph_count = 0 

256 for tok in tokens: 

257 if tok["type"] == "blank_line": 

258 return True 

259 if tok["type"] == "paragraph": 

260 paragraph_count += 1 

261 if paragraph_count > 1: 

262 return True 

263 return False