Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/block

1import re

2from .scanner import ScannerParser, Matcher

3from .inline_parser import ESCAPE_CHAR, LINK_LABEL

4from .util import unikey

6_NEW_LINES = re.compile(r'\r\n|\r')

7_BLANK_LINES = re.compile(r'^ +$', re.M)

9_TRIM_4 = re.compile(r'^ {1,4}')

10_EXPAND_TAB = re.compile(r'^( {0,3})\t', flags=re.M)

11_INDENT_CODE_TRIM = re.compile(r'^ {1,4}', flags=re.M)

12_BLOCK_QUOTE_TRIM = re.compile(r'^ {0,1}', flags=re.M)

13_BLOCK_QUOTE_LEADING = re.compile(r'^ *>', flags=re.M)

14_BLOCK_TAGS = {

15 'address', 'article', 'aside', 'base', 'basefont', 'blockquote',

16 'body', 'caption', 'center', 'col', 'colgroup', 'dd', 'details',

17 'dialog', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',

18 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3',

19 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe',

20 'legend', 'li', 'link', 'main', 'menu', 'menuitem', 'meta', 'nav',

21 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'section',

22 'source', 'summary', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead',

23 'title', 'tr', 'track', 'ul'

24}

25_BLOCK_HTML_RULE6 = (

26 r'</?(?:' + '|'.join(_BLOCK_TAGS) + r')'

27 r'(?: +|\n|/?>)[\s\S]*?'

28 r'(?:\n{2,}|\n*$)'

29)

30_BLOCK_HTML_RULE7 = (

31 # open tag

32 r'<(?!script|pre|style)([a-z][\w-]*)(?:'

33 r' +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"|'

34 r''' *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?'''

35 r')*? */?>(?=\s*\n)[\s\S]*?(?:\n{2,}|\n*$)|'

36 # close tag

37 r'</(?!script|pre|style)[a-z][\w-]*\s*>(?=\s*\n)[\s\S]*?(?:\n{2,}|\n*$)'

38)

40_PARAGRAPH_SPLIT = re.compile(r'\n{2,}')

41_LIST_BULLET = re.compile(r'^ *([\*\+-]|\d+[.)])')

44class BlockParser(ScannerParser):

45 scanner_cls = Matcher

47 NEWLINE = re.compile(r'\n+')

48 DEF_LINK = re.compile(

49 r' {0,3}\[(' + LINK_LABEL + r')\]:(?:[ \t]*\n)?[ \t]*'

50 r'<?([^\s>]+)>?(?:[ \t]*\n)?'

51 r'(?: +["(]([^\n]+)[")])? *\n+'

52 )

54 AXT_HEADING = re.compile(

55 r' {0,3}(#{1,6})(?!#+)(?: *\n+|'

56 r'\s+([^\n]*?)(?:\n+|\s+?#+\s*\n+))'

57 )

58 SETEX_HEADING = re.compile(r'([^\n]+)\n *(=|-){2,}[ \t]*\n+')

59 THEMATIC_BREAK = re.compile(

60 r' {0,3}((?:-[ \t]*){3,}|'

61 r'(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})\n+'

62 )

64 INDENT_CODE = re.compile(r'(?:\n*)(?:(?: {4}| *\t)[^\n]+\n*)+')

66 FENCED_CODE = re.compile(

67 r'( {0,3})(`{3,}|~{3,})([^`\n]*)\n'

68 r'(?:|([\s\S]*?)\n)'

69 r'(?: {0,3}\2[~`]* *\n+|$)'

70 )

71 BLOCK_QUOTE = re.compile(

72 r'(?: {0,3}>[^\n]*\n)+'

73 )

74 LIST_START = re.compile(

75 r'( {0,3})([\*\+-]|\d{1,9}[.)])(?:[ \t]*|[ \t][^\n]+)\n+'

76 )

78 BLOCK_HTML = re.compile((

79 r' {0,3}(?:'

80 r'<(script|pre|style)[\s>][\s\S]*?(?:</\1>[^\n]*\n+|$)|'

81 r'[^\n]*\n+|'

82 r'<\?[\s\S]*?\?>[^\n]*\n+|'

83 r'<![A-Z][\s\S]*?>[^\n]*\n+|'

84 r'<!\[CDATA\[[\s\S]*?\]\]>[^\n]*\n+'

85 r'|' + _BLOCK_HTML_RULE6 + '|' + _BLOCK_HTML_RULE7 + ')'

86 ), re.I)

88 LIST_MAX_DEPTH = 6

89 BLOCK_QUOTE_MAX_DEPTH = 6

90 RULE_NAMES = (

91 'newline', 'thematic_break',

92 'fenced_code', 'indent_code',

93 'block_quote', 'block_html',

94 'list_start',

95 'axt_heading', 'setex_heading',

96 'def_link',

97 )

99 def __init__(self):

100 super(BlockParser, self).__init__()

101 self.block_quote_rules = list(self.RULE_NAMES)

102 self.list_rules = list(self.RULE_NAMES)

103

104 def parse_newline(self, m, state):

105 return {'type': 'newline', 'blank': True}

106

107 def parse_thematic_break(self, m, state):

108 return {'type': 'thematic_break', 'blank': True}

109

110 def parse_indent_code(self, m, state):

111 text = expand_leading_tab(m.group(0))

112 code = _INDENT_CODE_TRIM.sub('', text)

113 code = code.lstrip('\n')

114 return self.tokenize_block_code(code, None, state)

115

116 def parse_fenced_code(self, m, state):

117 info = ESCAPE_CHAR.sub(r'\1', m.group(3))

118 spaces = m.group(1)

119 code = m.group(4) or ''

120 if spaces and code:

121 _trim_pattern = re.compile('^' + spaces, re.M)

122 code = _trim_pattern.sub('', code)

123 return self.tokenize_block_code(code + '\n', info, state)

124

125 def tokenize_block_code(self, code, info, state):

126 token = {'type': 'block_code', 'raw': code}

127 if info:

128 token['params'] = (info, )

129 return token

130

131 def parse_axt_heading(self, m, state):

132 level = len(m.group(1))

133 text = m.group(2) or ''

134 text = text.strip()

135 if set(text) == {'#'}:

136 text = ''

137 return self.tokenize_heading(text, level, state)

138

139 def parse_setex_heading(self, m, state):

140 level = 1 if m.group(2) == '=' else 2

141 text = m.group(1)

142 text = text.strip()

143 return self.tokenize_heading(text, level, state)

144

145 def tokenize_heading(self, text, level, state):

146 return {'type': 'heading', 'text': text, 'params': (level,)}

147

148 def get_block_quote_rules(self, depth):

149 if depth > self.BLOCK_QUOTE_MAX_DEPTH - 1:

150 rules = list(self.block_quote_rules)

151 rules.remove('block_quote')

152 return rules

153 return self.block_quote_rules

154

155 def parse_block_quote(self, m, state):

156 depth = state.get('block_quote_depth', 0) + 1

157 state['block_quote_depth'] = depth

158

159 # normalize block quote text

160 text = _BLOCK_QUOTE_LEADING.sub('', m.group(0))

161 text = expand_leading_tab(text)

162 text = _BLOCK_QUOTE_TRIM.sub('', text)

163 text = cleanup_lines(text)

164

165 rules = self.get_block_quote_rules(depth)

166 children = self.parse(text, state, rules)

167 state['block_quote_depth'] = depth - 1

168 return {'type': 'block_quote', 'children': children}

169

170 def get_list_rules(self, depth):

171 if depth > self.LIST_MAX_DEPTH - 1:

172 rules = list(self.list_rules)

173 rules.remove('list_start')

174 return rules

175 return self.list_rules

176

177 def parse_list_start(self, m, state, string):

178 items = []

179 spaces = m.group(1)

180 marker = m.group(2)

181 items, pos = _find_list_items(string, m.start(), spaces, marker)

182 tight = '\n\n' not in ''.join(items).strip()

183

184 ordered = len(marker) != 1

185 if ordered:

186 start = int(marker[:-1])

187 if start == 1:

188 start = None

189 else:

190 start = None

191

192 list_tights = state.get('list_tights', [])

193 list_tights.append(tight)

194 state['list_tights'] = list_tights

195

196 depth = len(list_tights)

197 rules = self.get_list_rules(depth)

198 children = [

199 self.parse_list_item(item, depth, state, rules)

200 for item in items

201 ]

202 list_tights.pop()

203 params = (ordered, depth, start)

204 token = {'type': 'list', 'children': children, 'params': params}

205 return token, pos

206

207 def parse_list_item(self, text, depth, state, rules):

208 text = self.normalize_list_item_text(text)

209 if not text:

210 children = [{'type': 'block_text', 'text': ''}]

211 else:

212 children = self.parse(text, state, rules)

213 return {

214 'type': 'list_item',

215 'params': (depth,),

216 'children': children,

217 }

218

219 @staticmethod

220 def normalize_list_item_text(text):

221 text_length = len(text)

222 text = _LIST_BULLET.sub('', text)

223

224 if not text.strip():

225 return ''

226

227 space = text_length - len(text)

228 text = expand_leading_tab(text)

229 if text.startswith(' '):

230 text = text[1:]

231 space += 1

232 else:

233 text_length = len(text)

234 text = _TRIM_4.sub('', text)

235 space += max(text_length - len(text), 1)

236

237 # outdent

238 if '\n ' in text:

239 pattern = re.compile(r'\n {1,' + str(space) + r'}')

240 text = pattern.sub(r'\n', text)

241 return text

242

243 def parse_block_html(self, m, state):

244 html = m.group(0).rstrip()

245 return {'type': 'block_html', 'raw': html}

246

247 def parse_def_link(self, m, state):

248 key = unikey(m.group(1))

249 link = m.group(2)

250 title = m.group(3)

251 if key not in state['def_links']:

252 state['def_links'][key] = (link, title)

253

254 def parse_text(self, text, state):

255 list_tights = state.get('list_tights')

256 if list_tights and list_tights[-1]:

257 return {'type': 'block_text', 'text': text.strip()}

258

259 tokens = []

260 for s in _PARAGRAPH_SPLIT.split(text):

261 s = s.strip()

262 if s:

263 tokens.append({'type': 'paragraph', 'text': s})

264 return tokens

265

266 def parse(self, s, state, rules=None):

267 if rules is None:

268 rules = self.rules

269

270 return list(self._scan(s, state, rules))

271

272 def render(self, tokens, inline, state):

273 data = self._iter_render(tokens, inline, state)

274 return inline.renderer.finalize(data)

275

276 def _iter_render(self, tokens, inline, state):

277 for tok in tokens:

278 method = inline.renderer._get_method(tok['type'])

279 if 'blank' in tok:

280 yield method()

281 continue

282

283 if 'children' in tok:

284 children = self.render(tok['children'], inline, state)

285 elif 'raw' in tok:

286 children = tok['raw']

287 else:

288 children = inline(tok['text'], state)

289 params = tok.get('params')

290 if params:

291 yield method(children, *params)

292 else:

293 yield method(children)

294

295

296def cleanup_lines(s):

297 s = _NEW_LINES.sub('\n', s)

298 s = _BLANK_LINES.sub('', s)

299 return s

300

301

302def expand_leading_tab(text):

303 return _EXPAND_TAB.sub(_expand_tab_repl, text)

304

305

306def _expand_tab_repl(m):

307 s = m.group(1)

308 return s + ' ' * (4 - len(s))

309

310

311def _create_list_item_pattern(spaces, marker):

312 prefix = r'( {0,' + str(len(spaces) + len(marker)) + r'})'

313

314 if len(marker) > 1:

315 if marker[-1] == '.':

316 prefix = prefix + r'\d{0,9}\.'

317 else:

318 prefix = prefix + r'\d{0,9}\)'

319 else:

320 prefix = prefix + re.escape(marker)

321

322 s1 = ' {' + str(len(marker) + 1) + ',}'

323 if len(marker) > 4:

324 s2 = ' {' + str(len(marker) - 4) + r',}\t'

325 else:

326 s2 = r' *\t'

327 return re.compile(

328 prefix + r'(?:[ \t]*|[ \t]+[^\n]+)\n+'

329 r'(?:\1(?:' + s1 + '|' + s2 + ')'

330 r'[^\n]+\n+)*'

331 )

332

333

334def _find_list_items(string, pos, spaces, marker):

335 items = []

336

337 if marker in {'*', '-'}:

338 is_hr = re.compile(

339 r' *((?:-[ \t]*){3,}|(?:\*[ \t]*){3,})\n+'

340 )

341 else:

342 is_hr = None

343

344 pattern = _create_list_item_pattern(spaces, marker)

345 while 1:

346 m = pattern.match(string, pos)

347 if not m:

348 break

349

350 text = m.group(0)

351 if is_hr and is_hr.match(text):

352 break

353

354 new_spaces = m.group(1)

355 if new_spaces != spaces:

356 spaces = new_spaces

357 pattern = _create_list_item_pattern(spaces, marker)

358

359 items.append(text)

360 pos = m.end()

361 return items, pos

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/block_parser.py: 100%

221 statements