Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list

1"""because list is complex, split list parser in a new file"""

3import re

4from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match

5from .util import expand_leading_tab, expand_tab, strip_end

7if TYPE_CHECKING:

8 from .block_parser import BlockParser

9 from .core import BlockState

11LIST_PATTERN = (

12 r"^(?P<list_1> {0,3})"

13 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])"

14 r"(?P<list_3>[ \t]*|[ \t].+)$"

15)

17_LINE_HAS_TEXT = re.compile(r"(\s*)\S")

20def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int:

21 """Parse tokens for ordered and unordered list."""

22 text = m.group("list_3")

23 if not text.strip():

24 # Example 285

25 # an empty list item cannot interrupt a paragraph

26 end_pos = state.append_paragraph()

27 if end_pos:

28 return end_pos

30 marker = m.group("list_2")

31 ordered = len(marker) > 1

32 depth = state.depth()

33 token: Dict[str, Any] = {

34 "type": "list",

35 "children": [],

36 "tight": True,

37 "bullet": marker[-1],

38 "attrs": {

39 "depth": depth,

40 "ordered": ordered,

41 },

42 }

43 if ordered:

44 start = int(marker[:-1])

45 if start != 1:

46 # Example 304

47 # we allow only lists starting with 1 to interrupt paragraphs

48 end_pos = state.append_paragraph()

49 if end_pos:

50 return end_pos

51 token["attrs"]["start"] = start

53 state.cursor = m.end() + 1

54 groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text)

56 if depth >= block.max_nested_level - 1:

57 rules = list(block.list_rules)

58 rules.remove("list")

59 else:

60 rules = block.list_rules

62 bullet = _get_list_bullet(marker[-1])

63 while groups:

64 groups = _parse_list_item(block, bullet, groups, token, state, rules)

66 end_pos = token.pop("_end_pos", None)

67 _transform_tight_list(token)

68 if end_pos:

69 index = token.pop("_tok_index")

70 state.tokens.insert(index, token)

71 return end_pos

73 state.append_token(token)

74 return state.cursor

77def _transform_tight_list(token: Dict[str, Any]) -> None:

78 if token["tight"]:

79 # reset tight list item

80 for list_item in token["children"]:

81 for tok in list_item["children"]:

82 if tok["type"] == "paragraph":

83 tok["type"] = "block_text"

84 elif tok["type"] == "list":

85 _transform_tight_list(tok)

88def _parse_list_item(

89 block: "BlockParser",

90 bullet: str,

91 groups: Tuple[str, str, str],

92 token: Dict[str, Any],

93 state: "BlockState",

94 rules: List[str],

95) -> Optional[Tuple[str, str, str]]:

96 spaces, marker, text = groups

98 leading_width = len(spaces) + len(marker)

99 text, continue_width = _compile_continue_width(text, leading_width)

100 item_pattern = _compile_list_item_pattern(bullet, leading_width)

101 list_item_breaks = [

102 "thematic_break",

103 "fenced_code",

104 "atx_heading",

105 "block_quote",

106 "block_html",

107 "list",

108 ]

109 if "fenced_directive" in block.specification:

110 list_item_breaks.insert(1, "fenced_directive")

111

112 pairs = [(name, block.specification[name]) for name in list_item_breaks]

113 if leading_width < 3:

114 _repl_w = str(leading_width)

115 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs]

116

117 pairs.insert(1, ("list_item", item_pattern))

118 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs)

119 sc = re.compile(regex, re.M)

120

121 src = ""

122 next_group = None

123 prev_blank_line = False

124 pos = state.cursor

125

126 continue_space = " " * continue_width

127 while pos < state.cursor_max:

128 pos = state.find_line_end()

129 line = state.get_text(pos)

130 if block.BLANK_LINE.match(line):

131 src += "\n"

132 prev_blank_line = True

133 state.cursor = pos

134 continue

135

136 line = expand_leading_tab(line)

137 if line.startswith(continue_space):

138 if prev_blank_line and not text and not src.strip():

139 # Example 280

140 # A list item can begin with at most one blank line

141 break

142

143 src += line

144 prev_blank_line = False

145 state.cursor = pos

146 continue

147

148 m = sc.match(state.src, state.cursor)

149 if m:

150 tok_type = m.lastgroup

151 if tok_type == "list_item":

152 if prev_blank_line:

153 token["tight"] = False

154 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3"))

155 state.cursor = m.end() + 1

156 break

157

158 if tok_type == "list":

159 break

160

161 tok_index = len(state.tokens)

162 end_pos = block.parse_method(m, state)

163 if end_pos:

164 token["_tok_index"] = tok_index

165 token["_end_pos"] = end_pos

166 break

167

168 if prev_blank_line and not line.startswith(continue_space):

169 # not a continue line, and previous line is blank

170 break

171

172 src += line

173 state.cursor = pos

174

175 text += _clean_list_item_text(src, continue_width)

176 child = state.child_state(strip_end(text))

177

178 block.parse(child, rules)

179

180 if token["tight"] and _is_loose_list(child.tokens):

181 token["tight"] = False

182

183 token["children"].append(

184 {

185 "type": "list_item",

186 "children": child.tokens,

187 }

188 )

189 if next_group:

190 return next_group

191

192 return None

193

194

195def _get_list_bullet(c: str) -> str:

196 if c == ".":

197 bullet = r"\d{0,9}\."

198 elif c == ")":

199 bullet = r"\d{0,9}\)"

200 elif c == "*":

201 bullet = r"\*"

202 elif c == "+":

203 bullet = r"\+"

204 else:

205 bullet = "-"

206 return bullet

207

208

209def _compile_list_item_pattern(bullet: str, leading_width: int) -> str:

210 if leading_width > 3:

211 leading_width = 3

212 return (

213 r"^(?P<listitem_1> {0," + str(leading_width) + "})"

214 r"(?P<listitem_2>" + bullet + ")"

215 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$"

216 )

217

218

219def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]:

220 text = expand_leading_tab(text, 3)

221 text = expand_tab(text)

222

223 m2 = _LINE_HAS_TEXT.match(text)

224 if m2:

225 # indent code, startswith 5 spaces

226 if text.startswith(" "):

227 space_width = 1

228 else:

229 space_width = len(m2.group(1))

230

231 text = text[space_width:] + "\n"

232 else:

233 space_width = 1

234 text = ""

235

236 continue_width = leading_width + space_width

237 return text, continue_width

238

239

240def _clean_list_item_text(src: str, continue_width: int) -> str:

241 # according to Example 7, tab should be treated as 3 spaces

242 rv = []

243 trim_space = " " * continue_width

244 lines = src.split("\n")

245 for line in lines:

246 if line.startswith(trim_space):

247 line = line.replace(trim_space, "", 1)

248 # according to CommonMark Example 5

249 # tab should be treated as 4 spaces

250 line = expand_tab(line)

251 rv.append(line)

252 else:

253 rv.append(line)

254

255 return "\n".join(rv)

256

257

258def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool:

259 paragraph_count = 0

260 for tok in tokens:

261 if tok["type"] == "blank_line":

262 return True

263 if tok["type"] == "paragraph":

264 paragraph_count += 1

265 if paragraph_count > 1:

266 return True

267 return False

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%

166 statements