Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/block

1import re

2from typing import Optional, List, Tuple, Match, Pattern

3import string

4from .util import (

5 unikey,

6 escape_url,

7 expand_tab,

8 expand_leading_tab,

10from .core import Parser, BlockState

11from .helpers import (

12 LINK_LABEL,

13 HTML_TAGNAME,

14 HTML_ATTRIBUTES,

15 BLOCK_TAGS,

16 PRE_TAGS,

17 unescape_char,

18 parse_link_href,

19 parse_link_title,

20)

21from .list_parser import parse_list, LIST_PATTERN

23_INDENT_CODE_TRIM = re.compile(r"^ {1,4}", flags=re.M)

24_ATX_HEADING_TRIM = re.compile(r"(\s+|^)#+\s*$")

25_BLOCK_QUOTE_TRIM = re.compile(r"^ ?", flags=re.M)

26_BLOCK_QUOTE_LEADING = re.compile(r"^ *>", flags=re.M)

28_LINE_BLANK_END = re.compile(r"\n[ \t]*\n$")

29_BLANK_TO_LINE = re.compile(r"[ \t]*\n")

31_BLOCK_TAGS_PATTERN = "(" + "|".join(BLOCK_TAGS) + "|" + "|".join(PRE_TAGS) + ")"

32_OPEN_TAG_END = re.compile(HTML_ATTRIBUTES + r"[ \t]*>[ \t]*(?:\n|$)")

33_CLOSE_TAG_END = re.compile(r"[ \t]*>[ \t]*(?:\n|$)")

34_STRICT_BLOCK_QUOTE = re.compile(r"( {0,3}>[^\n]*(?:\n|$))+")

37class BlockParser(Parser[BlockState]):

38 state_cls = BlockState

40 BLANK_LINE = re.compile(r"(^[ \t\v\f]*\n)+", re.M)

42 RAW_HTML = (

43 r"^ {0,3}("

44 r"</?" + HTML_TAGNAME + r"|"

45 r"<!--|" # comment

46 r"<\?|" # script

47 r"<![A-Z]|"

48 r"<!\[CDATA\[)"

49 )

51 BLOCK_HTML = (

52 r"^ {0,3}(?:"

53 r"(?:</?" + _BLOCK_TAGS_PATTERN + r"(?:[ \t]+|\n|$))"

54 r"|<!--" # comment

55 r"|<\?" # script

56 r"|<![A-Z]"

57 r"|<!\[CDATA\[)"

58 )

60 SPECIFICATION = {

61 "blank_line": r"(^[ \t\v\f]*\n)+",

62 "atx_heading": r"^ {0,3}(?P<atx_1>#{1,6})(?!#+)(?P<atx_2>[ \t]*|[ \t]+.*?)$",

63 "setex_heading": r"^ {0,3}(?P<setext_1>=|-){1,}[ \t]*$",

64 "fenced_code": (

65 r"^(?P<fenced_1> {0,3})(?P<fenced_2>`{3,}|~{3,})"

66 r"[ \t]*(?P<fenced_3>.*?)$"

67 ),

68 "indent_code": (

69 r"^(?: {4}| *\t)[^\n]+(?:\n+|$)"

70 r"((?:(?: {4}| *\t)[^\n]+(?:\n+|$))|\s)*"

71 ),

72 "thematic_break": r"^ {0,3}((?:-[ \t]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})$",

73 "ref_link": r"^ {0,3}\[(?P<reflink_1>" + LINK_LABEL + r")\]:",

74 "block_quote": r"^ {0,3}>(?P<quote_1>.*?)$",

75 "list": LIST_PATTERN,

76 "block_html": BLOCK_HTML,

77 "raw_html": RAW_HTML,

78 }

80 DEFAULT_RULES = (

81 "fenced_code",

82 "indent_code",

83 "atx_heading",

84 "setex_heading",

85 "thematic_break",

86 "block_quote",

87 "list",

88 "ref_link",

89 "raw_html",

90 "blank_line",

91 )

93 def __init__(

94 self,

95 block_quote_rules: Optional[List[str]] = None,

96 list_rules: Optional[List[str]] = None,

97 max_nested_level: int = 6,

98 ):

99 super(BlockParser, self).__init__()

100

101 if block_quote_rules is None:

102 block_quote_rules = list(self.DEFAULT_RULES)

103

104 if list_rules is None:

105 list_rules = list(self.DEFAULT_RULES)

106

107 self.block_quote_rules = block_quote_rules

108 self.list_rules = list_rules

109 self.max_nested_level = max_nested_level

110 # register default parse methods

111 self._methods = {name: getattr(self, "parse_" + name) for name in self.SPECIFICATION}

112

113 def parse_blank_line(self, m: Match[str], state: BlockState) -> int:

114 """Parse token for blank lines."""

115 state.append_token({"type": "blank_line"})

116 return m.end()

117

118 def parse_thematic_break(self, m: Match[str], state: BlockState) -> int:

119 """Parse token for thematic break, e.g. ``<hr>`` tag in HTML."""

120 state.append_token({"type": "thematic_break"})

121 # $ does not count '\n'

122 return m.end() + 1

123

124 def parse_indent_code(self, m: Match[str], state: BlockState) -> int:

125 """Parse token for code block which is indented by 4 spaces."""

126 # it is a part of the paragraph

127 end_pos = state.append_paragraph()

128 if end_pos:

129 return end_pos

130

131 code = m.group(0)

132 code = expand_leading_tab(code)

133 code = _INDENT_CODE_TRIM.sub("", code)

134 code = code.strip("\n")

135 state.append_token({"type": "block_code", "raw": code, "style": "indent"})

136 return m.end()

137

138 def parse_fenced_code(self, m: Match[str], state: BlockState) -> Optional[int]:

139 """Parse token for fenced code block. A fenced code block is started with

140 3 or more backtick(`) or tilde(~).

141

142 An example of a fenced code block:

143

144 .. code-block:: markdown

145

146 ```python

147 def markdown(text):

148 return mistune.html(text)

149 ```

150 """

151 spaces = m.group("fenced_1")

152 marker = m.group("fenced_2")

153 info = m.group("fenced_3")

154

155 c = marker[0]

156 if info and c == "`":

157 # CommonMark Example 145

158 # Info strings for backtick code blocks cannot contain backticks

159 if info.find(c) != -1:

160 return None

161

162 _end = re.compile(r"^ {0,3}" + c + "{" + str(len(marker)) + r",}[ \t]*(?:\n|$)", re.M)

163 cursor_start = m.end() + 1

164

165 m2 = _end.search(state.src, cursor_start)

166 if m2:

167 code = state.src[cursor_start : m2.start()]

168 end_pos = m2.end()

169 else:

170 code = state.src[cursor_start:]

171 end_pos = state.cursor_max

172

173 if spaces and code:

174 _trim_pattern = re.compile("^ {0," + str(len(spaces)) + "}", re.M)

175 code = _trim_pattern.sub("", code)

176

177 token = {"type": "block_code", "raw": code, "style": "fenced", "marker": marker}

178 if info:

179 info = unescape_char(info)

180 token["attrs"] = {"info": info.strip()}

181

182 state.append_token(token)

183 return end_pos

184

185 def parse_atx_heading(self, m: Match[str], state: BlockState) -> int:

186 """Parse token for ATX heading. An ATX heading is started with 1 to 6

187 symbol of ``#``."""

188 level = len(m.group("atx_1"))

189 text = m.group("atx_2").strip(string.whitespace)

190 # remove last #

191 if text:

192 text = _ATX_HEADING_TRIM.sub("", text)

193

194 token = {"type": "heading", "text": text, "attrs": {"level": level}, "style": "atx"}

195 state.append_token(token)

196 return m.end() + 1

197

198 def parse_setex_heading(self, m: Match[str], state: BlockState) -> Optional[int]:

199 """Parse token for setex style heading. A setex heading syntax looks like:

200

201 .. code-block:: markdown

202

203 H1 title

204 ========

205 """

206 last_token = state.last_token()

207 if last_token and last_token["type"] == "paragraph":

208 level = 1 if m.group("setext_1") == "=" else 2

209 last_token["type"] = "heading"

210 last_token["style"] = "setext"

211 last_token["attrs"] = {"level": level}

212 return m.end() + 1

213

214 sc = self.compile_sc(["thematic_break", "list"])

215 m2 = sc.match(state.src, state.cursor)

216 if m2:

217 return self.parse_method(m2, state)

218 return None

219

220 def parse_ref_link(self, m: Match[str], state: BlockState) -> Optional[int]:

221 """Parse link references and save the link information into ``state.env``.

222

223 Here is an example of a link reference:

224

225 .. code-block:: markdown

226

227 a [link][example]

228

229 [example]: https://example.com "Optional title"

230

231 This method will save the link reference into ``state.env`` as::

232

233 state.env['ref_links']['example'] = {

234 'url': 'https://example.com',

235 'title': "Optional title",

236 }

237 """

238 end_pos = state.append_paragraph()

239 if end_pos:

240 return end_pos

241

242 label = m.group("reflink_1")

243 key = unikey(label)

244 if not key:

245 return None

246

247 href, href_pos = parse_link_href(state.src, m.end(), block=True)

248 if href is None:

249 return None

250

251 assert href_pos is not None

252

253 _blank = self.BLANK_LINE.search(state.src, href_pos)

254 if _blank:

255 max_pos = _blank.start()

256 else:

257 max_pos = state.cursor_max

258

259 title, title_pos = parse_link_title(state.src, href_pos, max_pos)

260 if title_pos:

261 m2 = _BLANK_TO_LINE.match(state.src, title_pos)

262 if m2:

263 title_pos = m2.end()

264 else:

265 title_pos = None

266 title = None

267

268 if title_pos is None:

269 m3 = _BLANK_TO_LINE.match(state.src, href_pos)

270 if m3:

271 href_pos = m3.end()

272 else:

273 href_pos = None

274 href = None

275

276 end_pos = title_pos or href_pos

277 if not end_pos:

278 return None

279

280 if key not in state.env["ref_links"]:

281 assert href is not None

282 href = unescape_char(href)

283 data = {"url": escape_url(href), "label": label}

284 if title:

285 data["title"] = title

286 state.env["ref_links"][key] = data

287 return end_pos

288

289 def extract_block_quote(self, m: Match[str], state: BlockState) -> Tuple[str, Optional[int]]:

290 """Extract text and cursor end position of a block quote."""

291

292 # cleanup at first to detect if it is code block

293 text = m.group("quote_1") + "\n"

294 text = expand_leading_tab(text, 3)

295 text = _BLOCK_QUOTE_TRIM.sub("", text)

296

297 sc = self.compile_sc(["blank_line", "indent_code", "fenced_code"])

298 require_marker = bool(sc.match(text))

299

300 state.cursor = m.end() + 1

301

302 end_pos: Optional[int] = None

303 if require_marker:

304 m2 = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)

305 if m2:

306 quote = m2.group(0)

307 quote = _BLOCK_QUOTE_LEADING.sub("", quote)

308 quote = expand_leading_tab(quote, 3)

309 quote = _BLOCK_QUOTE_TRIM.sub("", quote)

310 text += quote

311 state.cursor = m2.end()

312 else:

313 prev_blank_line = False

314 break_sc = self.compile_sc(

315 [

316 "blank_line",

317 "thematic_break",

318 "fenced_code",

319 "list",

320 "block_html",

321 ]

322 )

323 while state.cursor < state.cursor_max:

324 m3 = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)

325 if m3:

326 quote = m3.group(0)

327 quote = _BLOCK_QUOTE_LEADING.sub("", quote)

328 quote = expand_leading_tab(quote, 3)

329 quote = _BLOCK_QUOTE_TRIM.sub("", quote)

330 text += quote

331 state.cursor = m3.end()

332 if not quote.strip():

333 prev_blank_line = True

334 else:

335 prev_blank_line = bool(_LINE_BLANK_END.search(quote))

336 continue

337

338 if prev_blank_line:

339 # CommonMark Example 249

340 # because of laziness, a blank line is needed between

341 # a block quote and a following paragraph

342 break

343

344 m4 = break_sc.match(state.src, state.cursor)

345 if m4:

346 end_pos = self.parse_method(m4, state)

347 if end_pos:

348 break

349

350 # lazy continuation line

351 pos = state.find_line_end()

352 line = state.get_text(pos)

353 line = expand_leading_tab(line, 3)

354 text += line

355 state.cursor = pos

356

357 # according to CommonMark Example 6, the second tab should be

358 # treated as 4 spaces

359 return expand_tab(text), end_pos

360

361 def parse_block_quote(self, m: Match[str], state: BlockState) -> int:

362 """Parse token for block quote. Here is an example of the syntax:

363

364 .. code-block:: markdown

365

366 > a block quote starts

367 > with right arrows

368 """

369 text, end_pos = self.extract_block_quote(m, state)

370 # scan children state

371 child = state.child_state(text)

372 if state.depth() >= self.max_nested_level - 1:

373 rules = list(self.block_quote_rules)

374 rules.remove("block_quote")

375 else:

376 rules = self.block_quote_rules

377

378 self.parse(child, rules)

379 token = {"type": "block_quote", "children": child.tokens}

380 if end_pos:

381 state.prepend_token(token)

382 return end_pos

383 state.append_token(token)

384 return state.cursor

385

386 def parse_list(self, m: Match[str], state: BlockState) -> int:

387 """Parse tokens for ordered and unordered list."""

388 return parse_list(self, m, state)

389

390 def parse_block_html(self, m: Match[str], state: BlockState) -> Optional[int]:

391 return self.parse_raw_html(m, state)

392

393 def parse_raw_html(self, m: Match[str], state: BlockState) -> Optional[int]:

394 marker = m.group(0).strip()

395

396 # rule 2

397 if marker == "<!--":

398 return _parse_html_to_end(state, "-->", m.end())

399

400 # rule 3

401 if marker == "<?":

402 return _parse_html_to_end(state, "?>", m.end())

403

404 # rule 5

405 if marker == "<![CDATA[":

406 return _parse_html_to_end(state, "]]>", m.end())

407

408 # rule 4

409 if marker.startswith("<!"):

410 return _parse_html_to_end(state, ">", m.end())

411

412 close_tag = None

413 open_tag = None

414 if marker.startswith("</"):

415 close_tag = marker[2:].lower()

416 # rule 6

417 if close_tag in BLOCK_TAGS:

418 return _parse_html_to_newline(state, self.BLANK_LINE)

419 else:

420 open_tag = marker[1:].lower()

421 # rule 1

422 if open_tag in PRE_TAGS:

423 end_tag = "</" + open_tag + ">"

424 return _parse_html_to_end(state, end_tag, m.end())

425 # rule 6

426 if open_tag in BLOCK_TAGS:

427 return _parse_html_to_newline(state, self.BLANK_LINE)

428

429 # Blocks of type 7 may not interrupt a paragraph.

430 end_pos = state.append_paragraph()

431 if end_pos:

432 return end_pos

433

434 # rule 7

435 start_pos = m.end()

436 end_pos = state.find_line_end()

437 if (open_tag and _OPEN_TAG_END.match(state.src, start_pos, end_pos)) or (

438 close_tag and _CLOSE_TAG_END.match(state.src, start_pos, end_pos)

439 ):

440 return _parse_html_to_newline(state, self.BLANK_LINE)

441

442 return None

443

444 def parse(self, state: BlockState, rules: Optional[List[str]] = None) -> None:

445 sc = self.compile_sc(rules)

446

447 while state.cursor < state.cursor_max:

448 m = sc.search(state.src, state.cursor)

449 if not m:

450 break

451

452 end_pos = m.start()

453 if end_pos > state.cursor:

454 text = state.get_text(end_pos)

455 state.add_paragraph(text)

456 state.cursor = end_pos

457

458 end_pos2 = self.parse_method(m, state)

459 if end_pos2:

460 state.cursor = end_pos2

461 else:

462 end_pos3 = state.find_line_end()

463 text = state.get_text(end_pos3)

464 state.add_paragraph(text)

465 state.cursor = end_pos3

466

467 if state.cursor < state.cursor_max:

468 text = state.src[state.cursor :]

469 state.add_paragraph(text)

470 state.cursor = state.cursor_max

471

472

473def _parse_html_to_end(state: BlockState, end_marker: str, start_pos: int) -> int:

474 marker_pos = state.src.find(end_marker, start_pos)

475 if marker_pos == -1:

476 text = state.src[state.cursor :]

477 end_pos = state.cursor_max

478 else:

479 text = state.get_text(marker_pos)

480 state.cursor = marker_pos

481 end_pos = state.find_line_end()

482 text += state.get_text(end_pos)

483

484 state.append_token({"type": "block_html", "raw": text})

485 return end_pos

486

487

488def _parse_html_to_newline(state: BlockState, newline: Pattern[str]) -> int:

489 m = newline.search(state.src, state.cursor)

490 if m:

491 end_pos = m.start()

492 text = state.get_text(end_pos)

493 else:

494 text = state.src[state.cursor :]

495 end_pos = state.cursor_max

496

497 state.append_token({"type": "block_html", "raw": text})

498 return end_pos

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/block_parser.py: 100%

271 statements