Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list

1"""because list is complex, split list parser in a new file"""

3import re

4from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match

5from .util import expand_leading_tab, expand_tab, strip_end

7if TYPE_CHECKING:

8 from .block_parser import BlockParser

9 from .core import BlockState

11LIST_PATTERN = (

12 r"^(?P<list_1> {0,3})"

13 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])"

14 r"(?P<list_3>[ \t]*|[ \t].+)$"

15)

17_LINE_HAS_TEXT = re.compile(r"(\s*)\S")

20def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int:

21 """Parse tokens for ordered and unordered list."""

22 text = m.group("list_3")

23 if not text.strip():

24 # Example 285

25 # an empty list item cannot interrupt a paragraph

26 end_pos = state.append_paragraph()

27 if end_pos:

28 return end_pos

30 marker = m.group("list_2")

31 ordered = len(marker) > 1

32 depth = state.depth()

33 token: Dict[str, Any] = {

34 "type": "list",

35 "children": [],

36 "tight": True,

37 "bullet": marker[-1],

38 "attrs": {

39 "depth": depth,

40 "ordered": ordered,

41 },

42 }

43 if ordered:

44 start = int(marker[:-1])

45 if start != 1:

46 # Example 304

47 # we allow only lists starting with 1 to interrupt paragraphs

48 end_pos = state.append_paragraph()

49 if end_pos:

50 return end_pos

51 token["attrs"]["start"] = start

53 state.cursor = m.end() + 1

54 groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text)

56 if depth >= block.max_nested_level - 1:

57 rules = list(block.list_rules)

58 rules.remove("list")

59 else:

60 rules = block.list_rules

62 bullet = _get_list_bullet(marker[-1])

63 while groups:

64 groups = _parse_list_item(block, bullet, groups, token, state, rules)

66 end_pos = token.pop("_end_pos", None)

67 _transform_tight_list(token)

68 if end_pos:

69 index = token.pop("_tok_index")

70 state.tokens.insert(index, token)

71 return end_pos

73 state.append_token(token)

74 return state.cursor

77def _transform_tight_list(token: Dict[str, Any]) -> None:

78 if token["tight"]:

79 # reset tight list item

80 for list_item in token["children"]:

81 for tok in list_item["children"]:

82 if tok["type"] == "paragraph":

83 tok["type"] = "block_text"

84 elif tok["type"] == "list":

85 _transform_tight_list(tok)

88def _parse_list_item(

89 block: "BlockParser",

90 bullet: str,

91 groups: Tuple[str, str, str],

92 token: Dict[str, Any],

93 state: "BlockState",

94 rules: List[str],

95) -> Optional[Tuple[str, str, str]]:

96 spaces, marker, text = groups

98 leading_width = len(spaces) + len(marker)

99 text, continue_width = _compile_continue_width(text, leading_width)

100 item_pattern = _compile_list_item_pattern(bullet, leading_width)

101 pairs = [

102 ("thematic_break", block.specification["thematic_break"]),

103 ("fenced_code", block.specification["fenced_code"]),

104 ("atx_heading", block.specification["atx_heading"]),

105 ("block_quote", block.specification["block_quote"]),

106 ("block_html", block.specification["block_html"]),

107 ("list", block.specification["list"]),

108 ]

109 if leading_width < 3:

110 _repl_w = str(leading_width)

111 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs]

112

113 pairs.insert(1, ("list_item", item_pattern))

114 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs)

115 sc = re.compile(regex, re.M)

116

117 src = ""

118 next_group = None

119 prev_blank_line = False

120 pos = state.cursor

121

122 continue_space = " " * continue_width

123 while pos < state.cursor_max:

124 pos = state.find_line_end()

125 line = state.get_text(pos)

126 if block.BLANK_LINE.match(line):

127 src += "\n"

128 prev_blank_line = True

129 state.cursor = pos

130 continue

131

132 line = expand_leading_tab(line)

133 if line.startswith(continue_space):

134 if prev_blank_line and not text and not src.strip():

135 # Example 280

136 # A list item can begin with at most one blank line

137 break

138

139 src += line

140 prev_blank_line = False

141 state.cursor = pos

142 continue

143

144 m = sc.match(state.src, state.cursor)

145 if m:

146 tok_type = m.lastgroup

147 if tok_type == "list_item":

148 if prev_blank_line:

149 token["tight"] = False

150 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3"))

151 state.cursor = m.end() + 1

152 break

153

154 if tok_type == "list":

155 break

156

157 tok_index = len(state.tokens)

158 end_pos = block.parse_method(m, state)

159 if end_pos:

160 token["_tok_index"] = tok_index

161 token["_end_pos"] = end_pos

162 break

163

164 if prev_blank_line and not line.startswith(continue_space):

165 # not a continue line, and previous line is blank

166 break

167

168 src += line

169 state.cursor = pos

170

171 text += _clean_list_item_text(src, continue_width)

172 child = state.child_state(strip_end(text))

173

174 block.parse(child, rules)

175

176 if token["tight"] and _is_loose_list(child.tokens):

177 token["tight"] = False

178

179 token["children"].append(

180 {

181 "type": "list_item",

182 "children": child.tokens,

183 }

184 )

185 if next_group:

186 return next_group

187

188 return None

189

190

191def _get_list_bullet(c: str) -> str:

192 if c == ".":

193 bullet = r"\d{0,9}\."

194 elif c == ")":

195 bullet = r"\d{0,9}\)"

196 elif c == "*":

197 bullet = r"\*"

198 elif c == "+":

199 bullet = r"\+"

200 else:

201 bullet = "-"

202 return bullet

203

204

205def _compile_list_item_pattern(bullet: str, leading_width: int) -> str:

206 if leading_width > 3:

207 leading_width = 3

208 return (

209 r"^(?P<listitem_1> {0," + str(leading_width) + "})"

210 r"(?P<listitem_2>" + bullet + ")"

211 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$"

212 )

213

214

215def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]:

216 text = expand_leading_tab(text, 3)

217 text = expand_tab(text)

218

219 m2 = _LINE_HAS_TEXT.match(text)

220 if m2:

221 # indent code, startswith 5 spaces

222 if text.startswith(" "):

223 space_width = 1

224 else:

225 space_width = len(m2.group(1))

226

227 text = text[space_width:] + "\n"

228 else:

229 space_width = 1

230 text = ""

231

232 continue_width = leading_width + space_width

233 return text, continue_width

234

235

236def _clean_list_item_text(src: str, continue_width: int) -> str:

237 # according to Example 7, tab should be treated as 3 spaces

238 rv = []

239 trim_space = " " * continue_width

240 lines = src.split("\n")

241 for line in lines:

242 if line.startswith(trim_space):

243 line = line.replace(trim_space, "", 1)

244 # according to CommonMark Example 5

245 # tab should be treated as 4 spaces

246 line = expand_tab(line)

247 rv.append(line)

248 else:

249 rv.append(line)

250

251 return "\n".join(rv)

252

253

254def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool:

255 paragraph_count = 0

256 for tok in tokens:

257 if tok["type"] == "blank_line":

258 return True

259 if tok["type"] == "paragraph":

260 paragraph_count += 1

261 if paragraph_count > 1:

262 return True

263 return False

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%

163 statements