Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list

1"""because list is complex, split list parser in a new file"""

3from __future__ import annotations

5import re

6from typing import TYPE_CHECKING, Any, Iterable, Optional, Match

7from .util import expand_leading_tab, expand_tab, strip_end

9if TYPE_CHECKING:

10 from .block_parser import BlockParser

11 from .core import BlockState

13LIST_PATTERN = (

14 r"^(?P<list_1> {0,3})"

15 r"(?P<list_2>[\*\+-]|\d{1,9}[.)])"

16 r"(?P<list_3>[ \t]*|[ \t].+)$"

17)

19_LINE_HAS_TEXT = re.compile(r"(\s*)\S")

22def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int:

23 """Parse tokens for ordered and unordered list."""

24 text = m.group("list_3")

25 if not text.strip():

26 # Example 285

27 # an empty list item cannot interrupt a paragraph

28 end_pos = state.append_paragraph()

29 if end_pos:

30 return end_pos

32 marker = m.group("list_2")

33 ordered = len(marker) > 1

34 depth = state.depth()

35 token: dict[str, Any] = {

36 "type": "list",

37 "children": [],

38 "tight": True,

39 "bullet": marker[-1],

40 "attrs": {

41 "depth": depth,

42 "ordered": ordered,

43 },

44 }

45 if ordered:

46 start = int(marker[:-1])

47 if start != 1:

48 # Example 304

49 # we allow only lists starting with 1 to interrupt paragraphs

50 end_pos = state.append_paragraph()

51 if end_pos:

52 return end_pos

53 token["attrs"]["start"] = start

55 state.cursor = m.end() + 1

56 groups: Optional[tuple[str, str, str]] = (m.group("list_1"), marker, text)

58 if depth >= block.max_nested_level - 1:

59 rules = list(block.list_rules)

60 rules.remove("list")

61 else:

62 rules = block.list_rules

64 bullet = _get_list_bullet(marker[-1])

65 while groups:

66 groups = _parse_list_item(block, bullet, groups, token, state, rules)

68 end_pos = token.pop("_end_pos", None)

69 _transform_tight_list(token)

70 if end_pos:

71 index = token.pop("_tok_index")

72 state.tokens.insert(index, token)

73 return end_pos # type: ignore[no-any-return]

75 state.append_token(token)

76 return state.cursor

79def _transform_tight_list(token: dict[str, Any]) -> None:

80 if token["tight"]:

81 # reset tight list item

82 for list_item in token["children"]:

83 for tok in list_item["children"]:

84 if tok["type"] == "paragraph":

85 tok["type"] = "block_text"

86 elif tok["type"] == "list":

87 _transform_tight_list(tok)

90def _parse_list_item(

91 block: "BlockParser",

92 bullet: str,

93 groups: tuple[str, str, str],

94 token: dict[str, Any],

95 state: "BlockState",

96 rules: list[str],

97) -> tuple[str, str, str] | None:

98 spaces, marker, text = groups

100 leading_width = len(spaces) + len(marker)

101 text, continue_width = _compile_continue_width(text, leading_width)

102 item_pattern = _compile_list_item_pattern(bullet, leading_width)

103 list_item_breaks = [

104 "thematic_break",

105 "fenced_code",

106 "atx_heading",

107 "block_quote",

108 "block_html",

109 "list",

110 ]

111 if "fenced_directive" in block.specification:

112 list_item_breaks.insert(1, "fenced_directive")

113

114 pairs = [(name, block.specification[name]) for name in list_item_breaks]

115 if leading_width < 3:

116 _repl_w = str(leading_width)

117 pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs]

118

119 pairs.insert(1, ("list_item", item_pattern))

120 regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs)

121 sc = re.compile(regex, re.M)

122

123 src = ""

124 next_group = None

125 prev_blank_line = False

126 pos = state.cursor

127

128 continue_space = " " * continue_width

129 while pos < state.cursor_max:

130 pos = state.find_line_end()

131 line = state.get_text(pos)

132 if block.BLANK_LINE.match(line):

133 src += "\n"

134 prev_blank_line = True

135 state.cursor = pos

136 continue

137

138 line = expand_leading_tab(line)

139 if line.startswith(continue_space):

140 if prev_blank_line and not text and not src.strip():

141 # Example 280

142 # A list item can begin with at most one blank line

143 break

144

145 src += line

146 prev_blank_line = False

147 state.cursor = pos

148 continue

149

150 m = sc.match(state.src, state.cursor)

151 if m:

152 tok_type = m.lastgroup

153 if tok_type == "list_item":

154 if prev_blank_line:

155 token["tight"] = False

156 next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3"))

157 state.cursor = m.end() + 1

158 break

159

160 if tok_type == "list":

161 break

162

163 tok_index = len(state.tokens)

164 end_pos = block.parse_method(m, state)

165 if end_pos:

166 token["_tok_index"] = tok_index

167 token["_end_pos"] = end_pos

168 break

169

170 if prev_blank_line and not line.startswith(continue_space):

171 # not a continue line, and previous line is blank

172 break

173

174 src += line

175 state.cursor = pos

176

177 text += _clean_list_item_text(src, continue_width)

178 child = state.child_state(strip_end(text))

179

180 block.parse(child, rules)

181

182 if token["tight"] and _is_loose_list(child.tokens):

183 token["tight"] = False

184

185 token["children"].append(

186 {

187 "type": "list_item",

188 "children": child.tokens,

189 }

190 )

191 if next_group:

192 return next_group

193

194 return None

195

196

197def _get_list_bullet(c: str) -> str:

198 if c == ".":

199 bullet = r"\d{0,9}\."

200 elif c == ")":

201 bullet = r"\d{0,9}\)"

202 elif c == "*":

203 bullet = r"\*"

204 elif c == "+":

205 bullet = r"\+"

206 else:

207 bullet = "-"

208 return bullet

209

210

211def _compile_list_item_pattern(bullet: str, leading_width: int) -> str:

212 if leading_width > 3:

213 leading_width = 3

214 return (

215 r"^(?P<listitem_1> {0," + str(leading_width) + "})"

216 r"(?P<listitem_2>" + bullet + ")"

217 r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$"

218 )

219

220

221def _compile_continue_width(text: str, leading_width: int) -> tuple[str, int]:

222 text = expand_leading_tab(text, 3)

223 text = expand_tab(text)

224

225 m2 = _LINE_HAS_TEXT.match(text)

226 if m2:

227 # indent code, startswith 5 spaces

228 if text.startswith(" "):

229 space_width = 1

230 else:

231 space_width = len(m2.group(1))

232

233 text = text[space_width:] + "\n"

234 else:

235 space_width = 1

236 text = ""

237

238 continue_width = leading_width + space_width

239 return text, continue_width

240

241

242def _clean_list_item_text(src: str, continue_width: int) -> str:

243 # according to Example 7, tab should be treated as 3 spaces

244 rv = []

245 trim_space = " " * continue_width

246 lines = src.split("\n")

247 for line in lines:

248 if line.startswith(trim_space):

249 line = line.replace(trim_space, "", 1)

250 # according to CommonMark Example 5

251 # tab should be treated as 4 spaces

252 line = expand_tab(line)

253 rv.append(line)

254 else:

255 rv.append(line)

256

257 return "\n".join(rv)

258

259

260def _is_loose_list(tokens: Iterable[dict[str, Any]]) -> bool:

261 paragraph_count = 0

262 for tok in tokens:

263 if tok["type"] == "blank_line":

264 return True

265 if tok["type"] == "paragraph":

266 paragraph_count += 1

267 if paragraph_count > 1:

268 return True

269 return False

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/list_parser.py: 98%

167 statements