Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

166 statements  

1"""because list is complex, split list parser in a new file""" 

2 

3import re 

4from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match 

5from .util import expand_leading_tab, expand_tab, strip_end 

6 

7if TYPE_CHECKING: 

8 from .block_parser import BlockParser 

9 from .core import BlockState 

10 

11LIST_PATTERN = ( 

12 r"^(?P<list_1> {0,3})" 

13 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])" 

14 r"(?P<list_3>[ \t]*|[ \t].+)$" 

15) 

16 

17_LINE_HAS_TEXT = re.compile(r"(\s*)\S") 

18 

19 

20def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int: 

21 """Parse tokens for ordered and unordered list.""" 

22 text = m.group("list_3") 

23 if not text.strip(): 

24 # Example 285 

25 # an empty list item cannot interrupt a paragraph 

26 end_pos = state.append_paragraph() 

27 if end_pos: 

28 return end_pos 

29 

30 marker = m.group("list_2") 

31 ordered = len(marker) > 1 

32 depth = state.depth() 

33 token: Dict[str, Any] = { 

34 "type": "list", 

35 "children": [], 

36 "tight": True, 

37 "bullet": marker[-1], 

38 "attrs": { 

39 "depth": depth, 

40 "ordered": ordered, 

41 }, 

42 } 

43 if ordered: 

44 start = int(marker[:-1]) 

45 if start != 1: 

46 # Example 304 

47 # we allow only lists starting with 1 to interrupt paragraphs 

48 end_pos = state.append_paragraph() 

49 if end_pos: 

50 return end_pos 

51 token["attrs"]["start"] = start 

52 

53 state.cursor = m.end() + 1 

54 groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text) 

55 

56 if depth >= block.max_nested_level - 1: 

57 rules = list(block.list_rules) 

58 rules.remove("list") 

59 else: 

60 rules = block.list_rules 

61 

62 bullet = _get_list_bullet(marker[-1]) 

63 while groups: 

64 groups = _parse_list_item(block, bullet, groups, token, state, rules) 

65 

66 end_pos = token.pop("_end_pos", None) 

67 _transform_tight_list(token) 

68 if end_pos: 

69 index = token.pop("_tok_index") 

70 state.tokens.insert(index, token) 

71 return end_pos 

72 

73 state.append_token(token) 

74 return state.cursor 

75 

76 

77def _transform_tight_list(token: Dict[str, Any]) -> None: 

78 if token["tight"]: 

79 # reset tight list item 

80 for list_item in token["children"]: 

81 for tok in list_item["children"]: 

82 if tok["type"] == "paragraph": 

83 tok["type"] = "block_text" 

84 elif tok["type"] == "list": 

85 _transform_tight_list(tok) 

86 

87 

88def _parse_list_item( 

89 block: "BlockParser", 

90 bullet: str, 

91 groups: Tuple[str, str, str], 

92 token: Dict[str, Any], 

93 state: "BlockState", 

94 rules: List[str], 

95) -> Optional[Tuple[str, str, str]]: 

96 spaces, marker, text = groups 

97 

98 leading_width = len(spaces) + len(marker) 

99 text, continue_width = _compile_continue_width(text, leading_width) 

100 item_pattern = _compile_list_item_pattern(bullet, leading_width) 

101 list_item_breaks = [ 

102 "thematic_break", 

103 "fenced_code", 

104 "atx_heading", 

105 "block_quote", 

106 "block_html", 

107 "list", 

108 ] 

109 if "fenced_directive" in block.specification: 

110 list_item_breaks.insert(1, "fenced_directive") 

111 

112 pairs = [(name, block.specification[name]) for name in list_item_breaks] 

113 if leading_width < 3: 

114 _repl_w = str(leading_width) 

115 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs] 

116 

117 pairs.insert(1, ("list_item", item_pattern)) 

118 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs) 

119 sc = re.compile(regex, re.M) 

120 

121 src = "" 

122 next_group = None 

123 prev_blank_line = False 

124 pos = state.cursor 

125 

126 continue_space = " " * continue_width 

127 while pos < state.cursor_max: 

128 pos = state.find_line_end() 

129 line = state.get_text(pos) 

130 if block.BLANK_LINE.match(line): 

131 src += "\n" 

132 prev_blank_line = True 

133 state.cursor = pos 

134 continue 

135 

136 line = expand_leading_tab(line) 

137 if line.startswith(continue_space): 

138 if prev_blank_line and not text and not src.strip(): 

139 # Example 280 

140 # A list item can begin with at most one blank line 

141 break 

142 

143 src += line 

144 prev_blank_line = False 

145 state.cursor = pos 

146 continue 

147 

148 m = sc.match(state.src, state.cursor) 

149 if m: 

150 tok_type = m.lastgroup 

151 if tok_type == "list_item": 

152 if prev_blank_line: 

153 token["tight"] = False 

154 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3")) 

155 state.cursor = m.end() + 1 

156 break 

157 

158 if tok_type == "list": 

159 break 

160 

161 tok_index = len(state.tokens) 

162 end_pos = block.parse_method(m, state) 

163 if end_pos: 

164 token["_tok_index"] = tok_index 

165 token["_end_pos"] = end_pos 

166 break 

167 

168 if prev_blank_line and not line.startswith(continue_space): 

169 # not a continue line, and previous line is blank 

170 break 

171 

172 src += line 

173 state.cursor = pos 

174 

175 text += _clean_list_item_text(src, continue_width) 

176 child = state.child_state(strip_end(text)) 

177 

178 block.parse(child, rules) 

179 

180 if token["tight"] and _is_loose_list(child.tokens): 

181 token["tight"] = False 

182 

183 token["children"].append( 

184 { 

185 "type": "list_item", 

186 "children": child.tokens, 

187 } 

188 ) 

189 if next_group: 

190 return next_group 

191 

192 return None 

193 

194 

195def _get_list_bullet(c: str) -> str: 

196 if c == ".": 

197 bullet = r"\d{0,9}\." 

198 elif c == ")": 

199 bullet = r"\d{0,9}\)" 

200 elif c == "*": 

201 bullet = r"\*" 

202 elif c == "+": 

203 bullet = r"\+" 

204 else: 

205 bullet = "-" 

206 return bullet 

207 

208 

209def _compile_list_item_pattern(bullet: str, leading_width: int) -> str: 

210 if leading_width > 3: 

211 leading_width = 3 

212 return ( 

213 r"^(?P<listitem_1> {0," + str(leading_width) + "})" 

214 r"(?P<listitem_2>" + bullet + ")" 

215 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$" 

216 ) 

217 

218 

219def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]: 

220 text = expand_leading_tab(text, 3) 

221 text = expand_tab(text) 

222 

223 m2 = _LINE_HAS_TEXT.match(text) 

224 if m2: 

225 # indent code, startswith 5 spaces 

226 if text.startswith(" "): 

227 space_width = 1 

228 else: 

229 space_width = len(m2.group(1)) 

230 

231 text = text[space_width:] + "\n" 

232 else: 

233 space_width = 1 

234 text = "" 

235 

236 continue_width = leading_width + space_width 

237 return text, continue_width 

238 

239 

240def _clean_list_item_text(src: str, continue_width: int) -> str: 

241 # according to Example 7, tab should be treated as 3 spaces 

242 rv = [] 

243 trim_space = " " * continue_width 

244 lines = src.split("\n") 

245 for line in lines: 

246 if line.startswith(trim_space): 

247 line = line.replace(trim_space, "", 1) 

248 # according to CommonMark Example 5 

249 # tab should be treated as 4 spaces 

250 line = expand_tab(line) 

251 rv.append(line) 

252 else: 

253 rv.append(line) 

254 

255 return "\n".join(rv) 

256 

257 

258def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool: 

259 paragraph_count = 0 

260 for tok in tokens: 

261 if tok["type"] == "blank_line": 

262 return True 

263 if tok["type"] == "paragraph": 

264 paragraph_count += 1 

265 if paragraph_count > 1: 

266 return True 

267 return False