Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/block

1import re

2from typing import Optional, List, Tuple, Match, Pattern

3from .util import (

4 unikey,

5 escape_url,

6 expand_tab,

7 expand_leading_tab,

9from .core import Parser, BlockState

10from .helpers import (

11 LINK_LABEL,

12 HTML_TAGNAME,

13 HTML_ATTRIBUTES,

14 BLOCK_TAGS,

15 PRE_TAGS,

16 unescape_char,

17 parse_link_href,

18 parse_link_title,

19)

20from .list_parser import parse_list, LIST_PATTERN

22_INDENT_CODE_TRIM = re.compile(r"^ {1,4}", flags=re.M)

23_ATX_HEADING_TRIM = re.compile(r"(\s+|^)#+\s*$")

24_BLOCK_QUOTE_TRIM = re.compile(r"^ ?", flags=re.M)

25_BLOCK_QUOTE_LEADING = re.compile(r"^ *>", flags=re.M)

27_LINE_BLANK_END = re.compile(r"\n[ \t]*\n$")

28_BLANK_TO_LINE = re.compile(r"[ \t]*\n")

30_BLOCK_TAGS_PATTERN = "(" + "|".join(BLOCK_TAGS) + "|" + "|".join(PRE_TAGS) + ")"

31_OPEN_TAG_END = re.compile(HTML_ATTRIBUTES + r"[ \t]*>[ \t]*(?:\n|$)")

32_CLOSE_TAG_END = re.compile(r"[ \t]*>[ \t]*(?:\n|$)")

33_STRICT_BLOCK_QUOTE = re.compile(r"( {0,3}>[^\n]*(?:\n|$))+")

36class BlockParser(Parser[BlockState]):

37 state_cls = BlockState

39 BLANK_LINE = re.compile(r"(^[ \t\v\f]*\n)+", re.M)

41 RAW_HTML = (

42 r"^ {0,3}("

43 r"</?" + HTML_TAGNAME + r"|"

44 r"<!--|" # comment

45 r"<\?|" # script

46 r"<![A-Z]|"

47 r"<!\[CDATA\[)"

48 )

50 BLOCK_HTML = (

51 r"^ {0,3}(?:"

52 r"(?:</?" + _BLOCK_TAGS_PATTERN + r"(?:[ \t]+|\n|$))"

53 r"|<!--" # comment

54 r"|<\?" # script

55 r"|<![A-Z]"

56 r"|<!\[CDATA\[)"

57 )

59 SPECIFICATION = {

60 "blank_line": r"(^[ \t\v\f]*\n)+",

61 "atx_heading": r"^ {0,3}(?P<atx_1>#{1,6})(?!#+)(?P<atx_2>[ \t]*|[ \t]+.*?)$",

62 "setex_heading": r"^ {0,3}(?P<setext_1>=|-){1,}[ \t]*$",

63 "fenced_code": (

64 r"^(?P<fenced_1> {0,3})(?P<fenced_2>`{3,}|~{3,})"

65 r"[ \t]*(?P<fenced_3>.*?)$"

66 ),

67 "indent_code": (

68 r"^(?: {4}| *\t)[^\n]+(?:\n+|$)"

69 r"((?:(?: {4}| *\t)[^\n]+(?:\n+|$))|\s)*"

70 ),

71 "thematic_break": r"^ {0,3}((?:-[ \t]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})$",

72 "ref_link": r"^ {0,3}\[(?P<reflink_1>" + LINK_LABEL + r")\]:",

73 "block_quote": r"^ {0,3}>(?P<quote_1>.*?)$",

74 "list": LIST_PATTERN,

75 "block_html": BLOCK_HTML,

76 "raw_html": RAW_HTML,

77 }

79 DEFAULT_RULES = (

80 "fenced_code",

81 "indent_code",

82 "atx_heading",

83 "setex_heading",

84 "thematic_break",

85 "block_quote",

86 "list",

87 "ref_link",

88 "raw_html",

89 "blank_line",

90 )

92 def __init__(

93 self,

94 block_quote_rules: Optional[List[str]] = None,

95 list_rules: Optional[List[str]] = None,

96 max_nested_level: int = 6,

97 ):

98 super(BlockParser, self).__init__()

100 if block_quote_rules is None:

101 block_quote_rules = list(self.DEFAULT_RULES)

102

103 if list_rules is None:

104 list_rules = list(self.DEFAULT_RULES)

105

106 self.block_quote_rules = block_quote_rules

107 self.list_rules = list_rules

108 self.max_nested_level = max_nested_level

109 # register default parse methods

110 self._methods = {name: getattr(self, "parse_" + name) for name in self.SPECIFICATION}

111

112 def parse_blank_line(self, m: Match[str], state: BlockState) -> int:

113 """Parse token for blank lines."""

114 state.append_token({"type": "blank_line"})

115 return m.end()

116

117 def parse_thematic_break(self, m: Match[str], state: BlockState) -> int:

118 """Parse token for thematic break, e.g. ``<hr>`` tag in HTML."""

119 state.append_token({"type": "thematic_break"})

120 # $ does not count '\n'

121 return m.end() + 1

122

123 def parse_indent_code(self, m: Match[str], state: BlockState) -> int:

124 """Parse token for code block which is indented by 4 spaces."""

125 # it is a part of the paragraph

126 end_pos = state.append_paragraph()

127 if end_pos:

128 return end_pos

129

130 code = m.group(0)

131 code = expand_leading_tab(code)

132 code = _INDENT_CODE_TRIM.sub("", code)

133 code = code.strip("\n")

134 state.append_token({"type": "block_code", "raw": code, "style": "indent"})

135 return m.end()

136

137 def parse_fenced_code(self, m: Match[str], state: BlockState) -> Optional[int]:

138 """Parse token for fenced code block. A fenced code block is started with

139 3 or more backtick(`) or tilde(~).

140

141 An example of a fenced code block:

142

143 .. code-block:: markdown

144

145 ```python

146 def markdown(text):

147 return mistune.html(text)

148 ```

149 """

150 spaces = m.group("fenced_1")

151 marker = m.group("fenced_2")

152 info = m.group("fenced_3")

153

154 c = marker[0]

155 if info and c == "`":

156 # CommonMark Example 145

157 # Info strings for backtick code blocks cannot contain backticks

158 if info.find(c) != -1:

159 return None

160

161 _end = re.compile(r"^ {0,3}" + c + "{" + str(len(marker)) + r",}[ \t]*(?:\n|$)", re.M)

162 cursor_start = m.end() + 1

163

164 m2 = _end.search(state.src, cursor_start)

165 if m2:

166 code = state.src[cursor_start : m2.start()]

167 end_pos = m2.end()

168 else:

169 code = state.src[cursor_start:]

170 end_pos = state.cursor_max

171

172 if spaces and code:

173 _trim_pattern = re.compile("^ {0," + str(len(spaces)) + "}", re.M)

174 code = _trim_pattern.sub("", code)

175

176 token = {"type": "block_code", "raw": code, "style": "fenced", "marker": marker}

177 if info:

178 info = unescape_char(info)

179 token["attrs"] = {"info": info.strip()}

180

181 state.append_token(token)

182 return end_pos

183

184 def parse_atx_heading(self, m: Match[str], state: BlockState) -> int:

185 """Parse token for ATX heading. An ATX heading is started with 1 to 6

186 symbol of ``#``."""

187 level = len(m.group("atx_1"))

188 text = m.group("atx_2").strip()

189 # remove last #

190 if text:

191 text = _ATX_HEADING_TRIM.sub("", text)

192

193 token = {"type": "heading", "text": text, "attrs": {"level": level}, "style": "atx"}

194 state.append_token(token)

195 return m.end() + 1

196

197 def parse_setex_heading(self, m: Match[str], state: BlockState) -> Optional[int]:

198 """Parse token for setex style heading. A setex heading syntax looks like:

199

200 .. code-block:: markdown

201

202 H1 title

203 ========

204 """

205 last_token = state.last_token()

206 if last_token and last_token["type"] == "paragraph":

207 level = 1 if m.group("setext_1") == "=" else 2

208 last_token["type"] = "heading"

209 last_token["style"] = "setext"

210 last_token["attrs"] = {"level": level}

211 return m.end() + 1

212

213 sc = self.compile_sc(["thematic_break", "list"])

214 m2 = sc.match(state.src, state.cursor)

215 if m2:

216 return self.parse_method(m2, state)

217 return None

218

219 def parse_ref_link(self, m: Match[str], state: BlockState) -> Optional[int]:

220 """Parse link references and save the link information into ``state.env``.

221

222 Here is an example of a link reference:

223

224 .. code-block:: markdown

225

226 a [link][example]

227

228 [example]: https://example.com "Optional title"

229

230 This method will save the link reference into ``state.env`` as::

231

232 state.env['ref_links']['example'] = {

233 'url': 'https://example.com',

234 'title': "Optional title",

235 }

236 """

237 end_pos = state.append_paragraph()

238 if end_pos:

239 return end_pos

240

241 label = m.group("reflink_1")

242 key = unikey(label)

243 if not key:

244 return None

245

246 href, href_pos = parse_link_href(state.src, m.end(), block=True)

247 if href is None:

248 return None

249

250 assert href_pos is not None

251

252 _blank = self.BLANK_LINE.search(state.src, href_pos)

253 if _blank:

254 max_pos = _blank.start()

255 else:

256 max_pos = state.cursor_max

257

258 title, title_pos = parse_link_title(state.src, href_pos, max_pos)

259 if title_pos:

260 m2 = _BLANK_TO_LINE.match(state.src, title_pos)

261 if m2:

262 title_pos = m2.end()

263 else:

264 title_pos = None

265 title = None

266

267 if title_pos is None:

268 m3 = _BLANK_TO_LINE.match(state.src, href_pos)

269 if m3:

270 href_pos = m3.end()

271 else:

272 href_pos = None

273 href = None

274

275 end_pos = title_pos or href_pos

276 if not end_pos:

277 return None

278

279 if key not in state.env["ref_links"]:

280 assert href is not None

281 href = unescape_char(href)

282 data = {"url": escape_url(href), "label": label}

283 if title:

284 data["title"] = title

285 state.env["ref_links"][key] = data

286 return end_pos

287

288 def extract_block_quote(self, m: Match[str], state: BlockState) -> Tuple[str, Optional[int]]:

289 """Extract text and cursor end position of a block quote."""

290

291 # cleanup at first to detect if it is code block

292 text = m.group("quote_1") + "\n"

293 text = expand_leading_tab(text, 3)

294 text = _BLOCK_QUOTE_TRIM.sub("", text)

295

296 sc = self.compile_sc(["blank_line", "indent_code", "fenced_code"])

297 require_marker = bool(sc.match(text))

298

299 state.cursor = m.end() + 1

300

301 end_pos: Optional[int] = None

302 if require_marker:

303 m2 = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)

304 if m2:

305 quote = m2.group(0)

306 quote = _BLOCK_QUOTE_LEADING.sub("", quote)

307 quote = expand_leading_tab(quote, 3)

308 quote = _BLOCK_QUOTE_TRIM.sub("", quote)

309 text += quote

310 state.cursor = m2.end()

311 else:

312 prev_blank_line = False

313 break_sc = self.compile_sc(

314 [

315 "blank_line",

316 "thematic_break",

317 "fenced_code",

318 "list",

319 "block_html",

320 ]

321 )

322 while state.cursor < state.cursor_max:

323 m3 = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)

324 if m3:

325 quote = m3.group(0)

326 quote = _BLOCK_QUOTE_LEADING.sub("", quote)

327 quote = expand_leading_tab(quote, 3)

328 quote = _BLOCK_QUOTE_TRIM.sub("", quote)

329 text += quote

330 state.cursor = m3.end()

331 if not quote.strip():

332 prev_blank_line = True

333 else:

334 prev_blank_line = bool(_LINE_BLANK_END.search(quote))

335 continue

336

337 if prev_blank_line:

338 # CommonMark Example 249

339 # because of laziness, a blank line is needed between

340 # a block quote and a following paragraph

341 break

342

343 m4 = break_sc.match(state.src, state.cursor)

344 if m4:

345 end_pos = self.parse_method(m4, state)

346 if end_pos:

347 break

348

349 # lazy continuation line

350 pos = state.find_line_end()

351 line = state.get_text(pos)

352 line = expand_leading_tab(line, 3)

353 text += line

354 state.cursor = pos

355

356 # according to CommonMark Example 6, the second tab should be

357 # treated as 4 spaces

358 return expand_tab(text), end_pos

359

360 def parse_block_quote(self, m: Match[str], state: BlockState) -> int:

361 """Parse token for block quote. Here is an example of the syntax:

362

363 .. code-block:: markdown

364

365 > a block quote starts

366 > with right arrows

367 """

368 text, end_pos = self.extract_block_quote(m, state)

369 # scan children state

370 child = state.child_state(text)

371 if state.depth() >= self.max_nested_level - 1:

372 rules = list(self.block_quote_rules)

373 rules.remove("block_quote")

374 else:

375 rules = self.block_quote_rules

376

377 self.parse(child, rules)

378 token = {"type": "block_quote", "children": child.tokens}

379 if end_pos:

380 state.prepend_token(token)

381 return end_pos

382 state.append_token(token)

383 return state.cursor

384

385 def parse_list(self, m: Match[str], state: BlockState) -> int:

386 """Parse tokens for ordered and unordered list."""

387 return parse_list(self, m, state)

388

389 def parse_block_html(self, m: Match[str], state: BlockState) -> Optional[int]:

390 return self.parse_raw_html(m, state)

391

392 def parse_raw_html(self, m: Match[str], state: BlockState) -> Optional[int]:

393 marker = m.group(0).strip()

394

395 # rule 2

396 if marker == "<!--":

397 return _parse_html_to_end(state, "-->", m.end())

398

399 # rule 3

400 if marker == "<?":

401 return _parse_html_to_end(state, "?>", m.end())

402

403 # rule 5

404 if marker == "<![CDATA[":

405 return _parse_html_to_end(state, "]]>", m.end())

406

407 # rule 4

408 if marker.startswith("<!"):

409 return _parse_html_to_end(state, ">", m.end())

410

411 close_tag = None

412 open_tag = None

413 if marker.startswith("</"):

414 close_tag = marker[2:].lower()

415 # rule 6

416 if close_tag in BLOCK_TAGS:

417 return _parse_html_to_newline(state, self.BLANK_LINE)

418 else:

419 open_tag = marker[1:].lower()

420 # rule 1

421 if open_tag in PRE_TAGS:

422 end_tag = "</" + open_tag + ">"

423 return _parse_html_to_end(state, end_tag, m.end())

424 # rule 6

425 if open_tag in BLOCK_TAGS:

426 return _parse_html_to_newline(state, self.BLANK_LINE)

427

428 # Blocks of type 7 may not interrupt a paragraph.

429 end_pos = state.append_paragraph()

430 if end_pos:

431 return end_pos

432

433 # rule 7

434 start_pos = m.end()

435 end_pos = state.find_line_end()

436 if (open_tag and _OPEN_TAG_END.match(state.src, start_pos, end_pos)) or (

437 close_tag and _CLOSE_TAG_END.match(state.src, start_pos, end_pos)

438 ):

439 return _parse_html_to_newline(state, self.BLANK_LINE)

440

441 return None

442

443 def parse(self, state: BlockState, rules: Optional[List[str]] = None) -> None:

444 sc = self.compile_sc(rules)

445

446 while state.cursor < state.cursor_max:

447 m = sc.search(state.src, state.cursor)

448 if not m:

449 break

450

451 end_pos = m.start()

452 if end_pos > state.cursor:

453 text = state.get_text(end_pos)

454 state.add_paragraph(text)

455 state.cursor = end_pos

456

457 end_pos2 = self.parse_method(m, state)

458 if end_pos2:

459 state.cursor = end_pos2

460 else:

461 end_pos3 = state.find_line_end()

462 text = state.get_text(end_pos3)

463 state.add_paragraph(text)

464 state.cursor = end_pos3

465

466 if state.cursor < state.cursor_max:

467 text = state.src[state.cursor :]

468 state.add_paragraph(text)

469 state.cursor = state.cursor_max

470

471

472def _parse_html_to_end(state: BlockState, end_marker: str, start_pos: int) -> int:

473 marker_pos = state.src.find(end_marker, start_pos)

474 if marker_pos == -1:

475 text = state.src[state.cursor :]

476 end_pos = state.cursor_max

477 else:

478 text = state.get_text(marker_pos)

479 state.cursor = marker_pos

480 end_pos = state.find_line_end()

481 text += state.get_text(end_pos)

482

483 state.append_token({"type": "block_html", "raw": text})

484 return end_pos

485

486

487def _parse_html_to_newline(state: BlockState, newline: Pattern[str]) -> int:

488 m = newline.search(state.src, state.cursor)

489 if m:

490 end_pos = m.start()

491 text = state.get_text(end_pos)

492 else:

493 text = state.src[state.cursor :]

494 end_pos = state.cursor_max

495

496 state.append_token({"type": "block_html", "raw": text})

497 return end_pos

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/block_parser.py: 100%

270 statements