Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/filters/markdown

1"""Markdown filters with mistune

3Used from markdown.py

4"""

5# Copyright (c) IPython Development Team.

6# Distributed under the terms of the Modified BSD License.

9import base64

10import mimetypes

11import os

12import re

13from functools import partial

14from html import escape

16import bs4

17from mistune import PLUGINS, BlockParser, HTMLRenderer, InlineParser, Markdown # type:ignore

18from pygments import highlight

19from pygments.formatters import HtmlFormatter

20from pygments.lexers import get_lexer_by_name

21from pygments.util import ClassNotFound

23from nbconvert.filters.strings import add_anchor

25html_escape = partial(escape, quote=False)

28class InvalidNotebook(Exception): # noqa

29 """An invalid notebook model."""

31 pass

34class MathBlockParser(BlockParser):

35 """This acts as a pass-through to the MathInlineParser. It is needed in

36 order to avoid other block level rules splitting math sections apart.

37 """

39 MULTILINE_MATH = re.compile(

40 r"(?<!\\)[$]{2}.*?(?<!\\)[$]{2}|"

41 r"\\\\\[.*?\\\\\]|"

42 r"\\begin\{([a-z]*\*?)\}.*?\\end\{\1\}",

43 re.DOTALL,

44 )

46 RULE_NAMES = ("multiline_math", *BlockParser.RULE_NAMES)

48 # Regex for header that doesn't require space after '#'

49 AXT_HEADING = re.compile(r" {0,3}(#{1,6})(?!#+)(?: *\n+|([^\n]*?)(?:\n+|\s+?#+\s*\n+))")

51 def parse_multiline_math(self, m, state):

52 """Pass token through mutiline math."""

53 return {"type": "multiline_math", "text": m.group(0)}

56def _dotall(pattern):

57 """Make the '.' special character match any character inside the pattern, including a newline.

59 This is implemented with the inline flag `(?s:...)` and is equivalent to using `re.DOTALL` when

60 it is the only pattern used. It is necessary since `mistune>=2.0.0`, where the pattern is passed

61 to the undocumented `re.Scanner`.

62 """

63 return f"(?s:{pattern})"

66class MathInlineParser(InlineParser):

67 r"""This interprets the content of LaTeX style math objects.

69 In particular this grabs ``$$...$$``, ``\\[...\\]``, ``\$...\$``, ``$...$``,

70 and ``\begin{foo}...\end{foo}`` styles for declaring mathematics. It strips

71 delimiters from all these varieties, and extracts the type of environment

72 in the last case (``foo`` in this example).

73 """

74 BLOCK_MATH_TEX = _dotall(r"(?<!\\)\$\$(.*?)(?<!\\)\$\$")

75 BLOCK_MATH_LATEX = _dotall(r"(?<!\\)\\\\\[(.*?)(?<!\\)\\\\\]")

76 INLINE_MATH_TEX = _dotall(r"(?<![$\\])\$(.+?)(?<![$\\])\$")

77 INLINE_MATH_LATEX = _dotall(r"(?<!\\)\\\\$(.*?)(?<!\$\\\\\)")

78 LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")

80 # The order is important here

81 RULE_NAMES = (

82 "block_math_tex",

83 "block_math_latex",

84 "inline_math_tex",

85 "inline_math_latex",

86 "latex_environment",

87 *InlineParser.RULE_NAMES,

88 )

90 def parse_block_math_tex(self, m, state):

91 """Parse block text math."""

92 # sometimes the Scanner keeps the final '$$', so we use the

93 # full matched string and remove the math markers

94 text = m.group(0)[2:-2]

95 return "block_math", text

97 def parse_block_math_latex(self, m, state):

98 """Parse block latex math ."""

99 text = m.group(1)

100 return "block_math", text

101

102 def parse_inline_math_tex(self, m, state):

103 """Parse inline tex math."""

104 text = m.group(1)

105 return "inline_math", text

106

107 def parse_inline_math_latex(self, m, state):

108 """Parse inline latex math."""

109 text = m.group(1)

110 return "inline_math", text

111

112 def parse_latex_environment(self, m, state):

113 """Parse a latex environment."""

114 name, text = m.group(1), m.group(2)

115 return "latex_environment", name, text

116

117

118class MarkdownWithMath(Markdown):

119 """Markdown text with math enabled."""

120

121 def __init__(self, renderer, block=None, inline=None, plugins=None):

122 """Initialize the parser."""

123 if block is None:

124 block = MathBlockParser()

125 if inline is None:

126 inline = MathInlineParser(renderer, hard_wrap=False)

127 if plugins is None:

128 plugins = [

129 # "abbr",

130 # 'footnotes',

131 "strikethrough",

132 "table",

133 "url",

134 "task_lists",

135 "def_list",

136 ]

137 _plugins = []

138 for p in plugins:

139 if isinstance(p, str):

140 _plugins.append(PLUGINS[p])

141 else:

142 _plugins.append(p)

143 plugins = _plugins

144 super().__init__(renderer, block, inline, plugins)

145

146 def render(self, s):

147 """Compatibility method with `mistune==0.8.4`."""

148 return self.parse(s)

149

150

151class IPythonRenderer(HTMLRenderer):

152 """An ipython html renderer."""

153

154 def __init__( # noqa

155 self,

156 escape=True,

157 allow_harmful_protocols=True,

158 embed_images=False,

159 exclude_anchor_links=False,

160 anchor_link_text="¶",

161 path="",

162 attachments=None,

163 ):

164 """Initialize the renderer."""

165 super().__init__(escape, allow_harmful_protocols)

166 self.embed_images = embed_images

167 self.exclude_anchor_links = exclude_anchor_links

168 self.anchor_link_text = anchor_link_text

169 self.path = path

170 if attachments is not None:

171 self.attachments = attachments

172 else:

173 self.attachments = {}

174

175 def block_code(self, code, info=None):

176 """Handle block code."""

177 lang = ""

178 lexer = None

179 if info:

180 try:

181 lang = info.strip().split(None, 1)[0]

182 lexer = get_lexer_by_name(lang, stripall=True)

183 except ClassNotFound:

184 code = lang + "\n" + code

185 lang = None # type:ignore

186

187 if not lang:

188 return super().block_code(code)

189

190 formatter = HtmlFormatter()

191 return highlight(code, lexer, formatter)

192

193 def block_html(self, html):

194 """Handle block html."""

195 if self.embed_images:

196 html = self._html_embed_images(html)

197

198 return super().block_html(html)

199

200 def inline_html(self, html):

201 """Handle inline html."""

202 if self.embed_images:

203 html = self._html_embed_images(html)

204

205 return super().inline_html(html)

206

207 def heading(self, text, level):

208 """Handle a heading."""

209 html = super().heading(text, level)

210 if self.exclude_anchor_links:

211 return html

212 return add_anchor(html, anchor_link_text=self.anchor_link_text)

213

214 def escape_html(self, text):

215 """Escape html content."""

216 return html_escape(text)

217

218 def multiline_math(self, text):

219 """Handle mulitline math."""

220 return text

221

222 def block_math(self, text):

223 """Handle block math."""

224 return f"$${self.escape_html(text)}$$"

225

226 def latex_environment(self, name, text):

227 """Handle a latex environment."""

228 name, text = self.escape_html(name), self.escape_html(text)

229 return f"\\begin{{{name}}}{text}\\end{{{name}}}"

230

231 def inline_math(self, text):

232 """Handle inline math."""

233 return f"${self.escape_html(text)}$"

234

235 def image(self, src, text, title):

236 """Rendering a image with title and text.

237

238 :param src: source link of the image.

239 :param text: alt text of the image.

240 :param title: title text of the image.

241 """

242 attachment_prefix = "attachment:"

243

244 if src.startswith(attachment_prefix):

245 name = src[len(attachment_prefix) :]

246

247 if name not in self.attachments:

248 msg = f"missing attachment: {name}"

249 raise InvalidNotebook(msg)

250

251 attachment = self.attachments[name]

252 # we choose vector over raster, and lossless over lossy

253 preferred_mime_types = ["image/svg+xml", "image/png", "image/jpeg"]

254 for preferred_mime_type in preferred_mime_types:

255 if preferred_mime_type in attachment:

256 break

257 else: # otherwise we choose the first mimetype we can find

258 preferred_mime_type = list(attachment.keys())[0]

259 mime_type = preferred_mime_type

260 data = attachment[mime_type]

261 src = "data:" + mime_type + ";base64," + data

262

263 elif self.embed_images:

264 base64_url = self._src_to_base64(src)

265

266 if base64_url is not None:

267 src = base64_url

268

269 return super().image(src, text, title)

270

271 def _src_to_base64(self, src):

272 """Turn the source file into a base64 url.

273

274 :param src: source link of the file.

275 :return: the base64 url or None if the file was not found.

276 """

277 src_path = os.path.join(self.path, src)

278

279 if not os.path.exists(src_path):

280 return None

281

282 with open(src_path, "rb") as fobj:

283 mime_type = mimetypes.guess_type(src_path)[0]

284

285 base64_data = base64.b64encode(fobj.read())

286 base64_str = base64_data.replace(b"\n", b"").decode("ascii")

287

288 return f"data:{mime_type};base64,{base64_str}"

289

290 def _html_embed_images(self, html):

291 parsed_html = bs4.BeautifulSoup(html, features="html.parser")

292 imgs = parsed_html.find_all("img")

293

294 # Replace img tags's sources by base64 dataurls

295 for img in imgs:

296 if "src" not in img.attrs:

297 continue

298

299 base64_url = self._src_to_base64(img.attrs["src"])

300

301 if base64_url is not None:

302 img.attrs["src"] = base64_url

303

304 return str(parsed_html)

305

306

307def markdown2html_mistune(source):

308 """Convert a markdown string to HTML using mistune"""

309 return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbconvert/filters/markdown_mistune.py: 73%

153 statements