Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/block

1import re

2from typing import Optional, List, Tuple, Match

3from .util import (

4 unikey,

5 escape_url,

6 expand_tab,

7 expand_leading_tab,

9from .core import Parser, BlockState

10from .helpers import (

11 LINK_LABEL,

12 HTML_TAGNAME,

13 HTML_ATTRIBUTES,

14 BLOCK_TAGS,

15 PRE_TAGS,

16 unescape_char,

17 parse_link_href,

18 parse_link_title,

19)

20from .list_parser import parse_list, LIST_PATTERN

22_INDENT_CODE_TRIM = re.compile(r'^ {1,4}', flags=re.M)

23_AXT_HEADING_TRIM = re.compile(r'(\s+|^)#+\s*$')

24_BLOCK_QUOTE_TRIM = re.compile(r'^ ?', flags=re.M)

25_BLOCK_QUOTE_LEADING = re.compile(r'^ *>', flags=re.M)

27_LINE_BLANK_END = re.compile(r'\n[ \t]*\n$')

28_BLANK_TO_LINE = re.compile(r'[ \t]*\n')

30_BLOCK_TAGS_PATTERN = '|'.join(BLOCK_TAGS) + '|' + '|'.join(PRE_TAGS)

31_OPEN_TAG_END = re.compile(HTML_ATTRIBUTES + r'[ \t]*>[ \t]*(?:\n|$)')

32_CLOSE_TAG_END = re.compile(r'[ \t]*>[ \t]*(?:\n|$)')

33_STRICT_BLOCK_QUOTE = re.compile(r'( {0,3}>[^\n]*(?:\n|$))+')

36class BlockParser(Parser):

37 BLANK_LINE = re.compile(r'(^[ \t\v\f]*\n)+', re.M)

39 RAW_HTML = (

40 r'^ {0,3}('

41 r'</?' + HTML_TAGNAME + r'|'

42 r'<!--|' # comment

43 r'<\?|' # script

44 r'<![A-Z]|'

45 r'<!\[CDATA\[)'

46 )

48 BLOCK_HTML = (

49 r'^ {0,3}(?:'

50 r'(?:</?' + _BLOCK_TAGS_PATTERN + r'(?:[ \t]+|\n|$))'

51 r'|<!--' # comment

52 r'|<\?' # script

53 r'|<![A-Z]'

54 r'|<!\[CDATA\[)'

55 )

57 SPECIFICATION = {

58 'blank_line': r'(^[ \t\v\f]*\n)+',

59 'axt_heading': r'^ {0,3}(?P<axt_1>#{1,6})(?!#+)(?P<axt_2>[ \t]*|[ \t]+.*?)$',

60 'setex_heading': r'^ {0,3}(?P<setext_1>=|-){1,}[ \t]*$',

61 'fenced_code': (

62 r'^(?P<fenced_1> {0,3})(?P<fenced_2>`{3,}|~{3,})'

63 r'[ \t]*(?P<fenced_3>.*?)$'

64 ),

65 'indent_code': (

66 r'^(?: {4}| *\t)[^\n]+(?:\n+|$)'

67 r'((?:(?: {4}| *\t)[^\n]+(?:\n+|$))|\s)*'

68 ),

69 'thematic_break': r'^ {0,3}((?:-[ \t]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})$',

70 'ref_link': r'^ {0,3}\[(?P<reflink_1>' + LINK_LABEL + r')\]:',

71 'block_quote': r'^ {0,3}>(?P<quote_1>.*?)$',

72 'list': LIST_PATTERN,

73 'block_html': BLOCK_HTML,

74 'raw_html': RAW_HTML,

75 }

77 DEFAULT_RULES = (

78 'fenced_code',

79 'indent_code',

80 'axt_heading',

81 'setex_heading',

82 'thematic_break',

83 'block_quote',

84 'list',

85 'ref_link',

86 'raw_html',

87 'blank_line',

88 )

90 def __init__(

91 self,

92 block_quote_rules: Optional[List[str]]=None,

93 list_rules: Optional[List[str]]=None,

94 max_nested_level: int=6

95 ):

96 super(BlockParser, self).__init__()

98 if block_quote_rules is None:

99 block_quote_rules = list(self.DEFAULT_RULES)

100

101 if list_rules is None:

102 list_rules = list(self.DEFAULT_RULES)

103

104 self.block_quote_rules = block_quote_rules

105 self.list_rules = list_rules

106 self.max_nested_level = max_nested_level

107 # register default parse methods

108 self._methods = {

109 name: getattr(self, 'parse_' + name) for name in self.SPECIFICATION

110 }

111

112 def parse_blank_line(self, m: Match, state: BlockState) -> int:

113 """Parse token for blank lines."""

114 state.append_token({'type': 'blank_line'})

115 return m.end()

116

117 def parse_thematic_break(self, m: Match, state: BlockState) -> int:

118 """Parse token for thematic break, e.g. ``<hr>`` tag in HTML."""

119 state.append_token({'type': 'thematic_break'})

120 # $ does not count '\n'

121 return m.end() + 1

122

123 def parse_indent_code(self, m: Match, state: BlockState) -> int:

124 """Parse token for code block which is indented by 4 spaces."""

125 # it is a part of the paragraph

126 end_pos = state.append_paragraph()

127 if end_pos:

128 return end_pos

129

130 code = m.group(0)

131 code = expand_leading_tab(code)

132 code = _INDENT_CODE_TRIM.sub('', code)

133 code = code.strip('\n')

134 state.append_token({'type': 'block_code', 'raw': code, 'style': 'indent'})

135 return m.end()

136

137 def parse_fenced_code(self, m: Match, state: BlockState) -> Optional[int]:

138 """Parse token for fenced code block. A fenced code block is started with

139 3 or more backtick(`) or tilde(~).

140

141 An example of a fenced code block:

142

143 .. code-block:: markdown

144

145 ```python

146 def markdown(text):

147 return mistune.html(text)

148 ```

149 """

150 spaces = m.group('fenced_1')

151 marker = m.group('fenced_2')

152 info = m.group('fenced_3')

153

154 c = marker[0]

155 if info and c == '`':

156 # CommonMark Example 145

157 # Info strings for backtick code blocks cannot contain backticks

158 if info.find(c) != -1:

159 return

160

161 _end = re.compile(

162 r'^ {0,3}' + c + '{' + str(len(marker)) + r',}[ \t]*(?:\n|$)', re.M)

163 cursor_start = m.end() + 1

164

165 m2 = _end.search(state.src, cursor_start)

166 if m2:

167 code = state.src[cursor_start:m2.start()]

168 end_pos = m2.end()

169 else:

170 code = state.src[cursor_start:]

171 end_pos = state.cursor_max

172

173 if spaces and code:

174 _trim_pattern = re.compile('^ {0,' + str(len(spaces)) + '}', re.M)

175 code = _trim_pattern.sub('', code)

176

177 token = {'type': 'block_code', 'raw': code, 'style': 'fenced', 'marker': marker}

178 if info:

179 info = unescape_char(info)

180 token['attrs'] = {'info': info.strip()}

181

182 state.append_token(token)

183 return end_pos

184

185 def parse_axt_heading(self, m: Match, state: BlockState) -> int:

186 """Parse token for AXT heading. An AXT heading is started with 1 to 6

187 symbol of ``#``."""

188 level = len(m.group('axt_1'))

189 text = m.group('axt_2').strip()

190 # remove last #

191 if text:

192 text = _AXT_HEADING_TRIM.sub('', text)

193

194 token = {'type': 'heading', 'text': text, 'attrs': {'level': level}, 'style': 'axt'}

195 state.append_token(token)

196 return m.end() + 1

197

198 def parse_setex_heading(self, m: Match, state: BlockState) -> Optional[int]:

199 """Parse token for setex style heading. A setex heading syntax looks like:

200

201 .. code-block:: markdown

202

203 H1 title

204 ========

205 """

206 last_token = state.last_token()

207 if last_token and last_token['type'] == 'paragraph':

208 level = 1 if m.group('setext_1') == '=' else 2

209 last_token['type'] = 'heading'

210 last_token['style'] = 'setext'

211 last_token['attrs'] = {'level': level}

212 return m.end() + 1

213

214 sc = self.compile_sc(['thematic_break', 'list'])

215 m = sc.match(state.src, state.cursor)

216 if m:

217 return self.parse_method(m, state)

218

219 def parse_ref_link(self, m: Match, state: BlockState) -> Optional[int]:

220 """Parse link references and save the link information into ``state.env``.

221

222 Here is an example of a link reference:

223

224 .. code-block:: markdown

225

226 a [link][example]

227

228 [example]: https://example.com "Optional title"

229

230 This method will save the link reference into ``state.env`` as::

231

232 state.env['ref_links']['example'] = {

233 'url': 'https://example.com',

234 'title': "Optional title",

235 }

236 """

237 end_pos = state.append_paragraph()

238 if end_pos:

239 return end_pos

240

241 label = m.group('reflink_1')

242 key = unikey(label)

243 if not key:

244 return

245

246 href, href_pos = parse_link_href(state.src, m.end(), block=True)

247 if href is None:

248 return

249

250 _blank = self.BLANK_LINE.search(state.src, href_pos)

251 if _blank:

252 max_pos = _blank.start()

253 else:

254 max_pos = state.cursor_max

255

256 title, title_pos = parse_link_title(state.src, href_pos, max_pos)

257 if title_pos:

258 m = _BLANK_TO_LINE.match(state.src, title_pos)

259 if m:

260 title_pos = m.end()

261 else:

262 title_pos = None

263 title = None

264

265 if title_pos is None:

266 m = _BLANK_TO_LINE.match(state.src, href_pos)

267 if m:

268 href_pos = m.end()

269 else:

270 href_pos = None

271 href = None

272

273 end_pos = title_pos or href_pos

274 if not end_pos:

275 return

276

277 if key not in state.env['ref_links']:

278 href = unescape_char(href)

279 data = {'url': escape_url(href), 'label': label}

280 if title:

281 data['title'] = title

282 state.env['ref_links'][key] = data

283 return end_pos

284

285 def extract_block_quote(self, m: Match, state: BlockState) -> Tuple[str, int]:

286 """Extract text and cursor end position of a block quote."""

287

288 # cleanup at first to detect if it is code block

289 text = m.group('quote_1') + '\n'

290 text = expand_leading_tab(text, 3)

291 text = _BLOCK_QUOTE_TRIM.sub('', text)

292

293 sc = self.compile_sc(['blank_line', 'indent_code', 'fenced_code'])

294 require_marker = bool(sc.match(text))

295

296 state.cursor = m.end() + 1

297

298 end_pos = None

299 if require_marker:

300 m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)

301 if m:

302 quote = m.group(0)

303 quote = _BLOCK_QUOTE_LEADING.sub('', quote)

304 quote = expand_leading_tab(quote, 3)

305 quote = _BLOCK_QUOTE_TRIM.sub('', quote)

306 text += quote

307 state.cursor = m.end()

308 else:

309 prev_blank_line = False

310 break_sc = self.compile_sc([

311 'blank_line', 'thematic_break', 'fenced_code',

312 'list', 'block_html',

313 ])

314 while state.cursor < state.cursor_max:

315 m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)

316 if m:

317 quote = m.group(0)

318 quote = _BLOCK_QUOTE_LEADING.sub('', quote)

319 quote = expand_leading_tab(quote, 3)

320 quote = _BLOCK_QUOTE_TRIM.sub('', quote)

321 text += quote

322 state.cursor = m.end()

323 if not quote.strip():

324 prev_blank_line = True

325 else:

326 prev_blank_line = bool(_LINE_BLANK_END.search(quote))

327 continue

328

329 if prev_blank_line:

330 # CommonMark Example 249

331 # because of laziness, a blank line is needed between

332 # a block quote and a following paragraph

333 break

334

335 m = break_sc.match(state.src, state.cursor)

336 if m:

337 end_pos = self.parse_method(m, state)

338 if end_pos:

339 break

340

341 # lazy continuation line

342 pos = state.find_line_end()

343 line = state.get_text(pos)

344 line = expand_leading_tab(line, 3)

345 text += line

346 state.cursor = pos

347

348 # according to CommonMark Example 6, the second tab should be

349 # treated as 4 spaces

350 return expand_tab(text), end_pos

351

352 def parse_block_quote(self, m: Match, state: BlockState) -> int:

353 """Parse token for block quote. Here is an example of the syntax:

354

355 .. code-block:: markdown

356

357 > a block quote starts

358 > with right arrows

359 """

360 text, end_pos = self.extract_block_quote(m, state)

361 # scan children state

362 child = state.child_state(text)

363 if state.depth() >= self.max_nested_level - 1:

364 rules = list(self.block_quote_rules)

365 rules.remove('block_quote')

366 else:

367 rules = self.block_quote_rules

368

369 self.parse(child, rules)

370 token = {'type': 'block_quote', 'children': child.tokens}

371 if end_pos:

372 state.prepend_token(token)

373 return end_pos

374 state.append_token(token)

375 return state.cursor

376

377 def parse_list(self, m: Match, state: BlockState) -> int:

378 """Parse tokens for ordered and unordered list."""

379 return parse_list(self, m, state)

380

381 def parse_block_html(self, m: Match, state: BlockState) -> Optional[int]:

382 return self.parse_raw_html(m, state)

383

384 def parse_raw_html(self, m: Match, state: BlockState) -> Optional[int]:

385 marker = m.group(0).strip()

386

387 # rule 2

388 if marker == '<!--':

389 return _parse_html_to_end(state, '-->', m.end())

390

391 # rule 3

392 if marker == '<?':

393 return _parse_html_to_end(state, '?>', m.end())

394

395 # rule 5

396 if marker == '<![CDATA[':

397 return _parse_html_to_end(state, ']]>', m.end())

398

399 # rule 4

400 if marker.startswith('<!'):

401 return _parse_html_to_end(state, '>', m.end())

402

403 close_tag = None

404 open_tag = None

405 if marker.startswith('</'):

406 close_tag = marker[2:].lower()

407 # rule 6

408 if close_tag in BLOCK_TAGS:

409 return _parse_html_to_newline(state, self.BLANK_LINE)

410 else:

411 open_tag = marker[1:].lower()

412 # rule 1

413 if open_tag in PRE_TAGS:

414 end_tag = '</' + open_tag + '>'

415 return _parse_html_to_end(state, end_tag, m.end())

416 # rule 6

417 if open_tag in BLOCK_TAGS:

418 return _parse_html_to_newline(state, self.BLANK_LINE)

419

420 # Blocks of type 7 may not interrupt a paragraph.

421 end_pos = state.append_paragraph()

422 if end_pos:

423 return end_pos

424

425 # rule 7

426 start_pos = m.end()

427 end_pos = state.find_line_end()

428 if (open_tag and _OPEN_TAG_END.match(state.src, start_pos, end_pos)) or \

429 (close_tag and _CLOSE_TAG_END.match(state.src, start_pos, end_pos)):

430 return _parse_html_to_newline(state, self.BLANK_LINE)

431

432 def parse(self, state: BlockState, rules: Optional[List[str]]=None) -> None:

433 sc = self.compile_sc(rules)

434

435 while state.cursor < state.cursor_max:

436 m = sc.search(state.src, state.cursor)

437 if not m:

438 break

439

440 end_pos = m.start()

441 if end_pos > state.cursor:

442 text = state.get_text(end_pos)

443 state.add_paragraph(text)

444 state.cursor = end_pos

445

446 end_pos = self.parse_method(m, state)

447 if end_pos:

448 state.cursor = end_pos

449 else:

450 end_pos = state.find_line_end()

451 text = state.get_text(end_pos)

452 state.add_paragraph(text)

453 state.cursor = end_pos

454

455 if state.cursor < state.cursor_max:

456 text = state.src[state.cursor:]

457 state.add_paragraph(text)

458 state.cursor = state.cursor_max

459

460

461def _parse_html_to_end(state, end_marker, start_pos):

462 marker_pos = state.src.find(end_marker, start_pos)

463 if marker_pos == -1:

464 text = state.src[state.cursor:]

465 end_pos = state.cursor_max

466 else:

467 text = state.get_text(marker_pos)

468 state.cursor = marker_pos

469 end_pos = state.find_line_end()

470 text += state.get_text(end_pos)

471

472 state.append_token({'type': 'block_html', 'raw': text})

473 return end_pos

474

475

476def _parse_html_to_newline(state, newline):

477 m = newline.search(state.src, state.cursor)

478 if m:

479 end_pos = m.start()

480 text = state.get_text(end_pos)

481 else:

482 text = state.src[state.cursor:]

483 end_pos = state.cursor_max

484

485 state.append_token({'type': 'block_html', 'raw': text})

486 return end_pos

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/block_parser.py: 100%

265 statements