Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

167 statements  

1"""because list is complex, split list parser in a new file""" 

2 

3from __future__ import annotations 

4 

5import re 

6from typing import TYPE_CHECKING, Any, Iterable, Optional, Match 

7from .util import expand_leading_tab, expand_tab, strip_end 

8 

9if TYPE_CHECKING: 

10 from .block_parser import BlockParser 

11 from .core import BlockState 

12 

13LIST_PATTERN = ( 

14 r"^(?P<list_1> {0,3})" 

15 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])" 

16 r"(?P<list_3>[ \t]*|[ \t].+)$" 

17) 

18 

19_LINE_HAS_TEXT = re.compile(r"(\s*)\S") 

20 

21 

22def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int: 

23 """Parse tokens for ordered and unordered list.""" 

24 text = m.group("list_3") 

25 if not text.strip(): 

26 # Example 285 

27 # an empty list item cannot interrupt a paragraph 

28 end_pos = state.append_paragraph() 

29 if end_pos: 

30 return end_pos 

31 

32 marker = m.group("list_2") 

33 ordered = len(marker) > 1 

34 depth = state.depth() 

35 token: dict[str, Any] = { 

36 "type": "list", 

37 "children": [], 

38 "tight": True, 

39 "bullet": marker[-1], 

40 "attrs": { 

41 "depth": depth, 

42 "ordered": ordered, 

43 }, 

44 } 

45 if ordered: 

46 start = int(marker[:-1]) 

47 if start != 1: 

48 # Example 304 

49 # we allow only lists starting with 1 to interrupt paragraphs 

50 end_pos = state.append_paragraph() 

51 if end_pos: 

52 return end_pos 

53 token["attrs"]["start"] = start 

54 

55 state.cursor = m.end() + 1 

56 groups: Optional[tuple[str, str, str]] = (m.group("list_1"), marker, text) 

57 

58 if depth >= block.max_nested_level - 1: 

59 rules = list(block.list_rules) 

60 rules.remove("list") 

61 else: 

62 rules = block.list_rules 

63 

64 bullet = _get_list_bullet(marker[-1]) 

65 while groups: 

66 groups = _parse_list_item(block, bullet, groups, token, state, rules) 

67 

68 end_pos = token.pop("_end_pos", None) 

69 _transform_tight_list(token) 

70 if end_pos: 

71 index = token.pop("_tok_index") 

72 state.tokens.insert(index, token) 

73 return end_pos # type: ignore[no-any-return] 

74 

75 state.append_token(token) 

76 return state.cursor 

77 

78 

79def _transform_tight_list(token: dict[str, Any]) -> None: 

80 if token["tight"]: 

81 # reset tight list item 

82 for list_item in token["children"]: 

83 for tok in list_item["children"]: 

84 if tok["type"] == "paragraph": 

85 tok["type"] = "block_text" 

86 elif tok["type"] == "list": 

87 _transform_tight_list(tok) 

88 

89 

90def _parse_list_item( 

91 block: "BlockParser", 

92 bullet: str, 

93 groups: tuple[str, str, str], 

94 token: dict[str, Any], 

95 state: "BlockState", 

96 rules: list[str], 

97) -> tuple[str, str, str] | None: 

98 spaces, marker, text = groups 

99 

100 leading_width = len(spaces) + len(marker) 

101 text, continue_width = _compile_continue_width(text, leading_width) 

102 item_pattern = _compile_list_item_pattern(bullet, leading_width) 

103 list_item_breaks = [ 

104 "thematic_break", 

105 "fenced_code", 

106 "atx_heading", 

107 "block_quote", 

108 "block_html", 

109 "list", 

110 ] 

111 if "fenced_directive" in block.specification: 

112 list_item_breaks.insert(1, "fenced_directive") 

113 

114 pairs = [(name, block.specification[name]) for name in list_item_breaks] 

115 if leading_width < 3: 

116 _repl_w = str(leading_width) 

117 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs] 

118 

119 pairs.insert(1, ("list_item", item_pattern)) 

120 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs) 

121 sc = re.compile(regex, re.M) 

122 

123 src = "" 

124 next_group = None 

125 prev_blank_line = False 

126 pos = state.cursor 

127 

128 continue_space = " " * continue_width 

129 while pos < state.cursor_max: 

130 pos = state.find_line_end() 

131 line = state.get_text(pos) 

132 if block.BLANK_LINE.match(line): 

133 src += "\n" 

134 prev_blank_line = True 

135 state.cursor = pos 

136 continue 

137 

138 line = expand_leading_tab(line) 

139 if line.startswith(continue_space): 

140 if prev_blank_line and not text and not src.strip(): 

141 # Example 280 

142 # A list item can begin with at most one blank line 

143 break 

144 

145 src += line 

146 prev_blank_line = False 

147 state.cursor = pos 

148 continue 

149 

150 m = sc.match(state.src, state.cursor) 

151 if m: 

152 tok_type = m.lastgroup 

153 if tok_type == "list_item": 

154 if prev_blank_line: 

155 token["tight"] = False 

156 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3")) 

157 state.cursor = m.end() + 1 

158 break 

159 

160 if tok_type == "list": 

161 break 

162 

163 tok_index = len(state.tokens) 

164 end_pos = block.parse_method(m, state) 

165 if end_pos: 

166 token["_tok_index"] = tok_index 

167 token["_end_pos"] = end_pos 

168 break 

169 

170 if prev_blank_line and not line.startswith(continue_space): 

171 # not a continue line, and previous line is blank 

172 break 

173 

174 src += line 

175 state.cursor = pos 

176 

177 text += _clean_list_item_text(src, continue_width) 

178 child = state.child_state(strip_end(text)) 

179 

180 block.parse(child, rules) 

181 

182 if token["tight"] and _is_loose_list(child.tokens): 

183 token["tight"] = False 

184 

185 token["children"].append( 

186 { 

187 "type": "list_item", 

188 "children": child.tokens, 

189 } 

190 ) 

191 if next_group: 

192 return next_group 

193 

194 return None 

195 

196 

197def _get_list_bullet(c: str) -> str: 

198 if c == ".": 

199 bullet = r"\d{0,9}\." 

200 elif c == ")": 

201 bullet = r"\d{0,9}\)" 

202 elif c == "*": 

203 bullet = r"\*" 

204 elif c == "+": 

205 bullet = r"\+" 

206 else: 

207 bullet = "-" 

208 return bullet 

209 

210 

211def _compile_list_item_pattern(bullet: str, leading_width: int) -> str: 

212 if leading_width > 3: 

213 leading_width = 3 

214 return ( 

215 r"^(?P<listitem_1> {0," + str(leading_width) + "})" 

216 r"(?P<listitem_2>" + bullet + ")" 

217 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$" 

218 ) 

219 

220 

221def _compile_continue_width(text: str, leading_width: int) -> tuple[str, int]: 

222 text = expand_leading_tab(text, 3) 

223 text = expand_tab(text) 

224 

225 m2 = _LINE_HAS_TEXT.match(text) 

226 if m2: 

227 # indent code, startswith 5 spaces 

228 if text.startswith(" "): 

229 space_width = 1 

230 else: 

231 space_width = len(m2.group(1)) 

232 

233 text = text[space_width:] + "\n" 

234 else: 

235 space_width = 1 

236 text = "" 

237 

238 continue_width = leading_width + space_width 

239 return text, continue_width 

240 

241 

242def _clean_list_item_text(src: str, continue_width: int) -> str: 

243 # according to Example 7, tab should be treated as 3 spaces 

244 rv = [] 

245 trim_space = " " * continue_width 

246 lines = src.split("\n") 

247 for line in lines: 

248 if line.startswith(trim_space): 

249 line = line.replace(trim_space, "", 1) 

250 # according to CommonMark Example 5 

251 # tab should be treated as 4 spaces 

252 line = expand_tab(line) 

253 rv.append(line) 

254 else: 

255 rv.append(line) 

256 

257 return "\n".join(rv) 

258 

259 

260def _is_loose_list(tokens: Iterable[dict[str, Any]]) -> bool: 

261 paragraph_count = 0 

262 for tok in tokens: 

263 if tok["type"] == "blank_line": 

264 return True 

265 if tok["type"] == "paragraph": 

266 paragraph_count += 1 

267 if paragraph_count > 1: 

268 return True 

269 return False