Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/extensions/codehilite.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

117 statements  

1# CodeHilite Extension for Python-Markdown 

2# ======================================== 

3 

4# Adds code/syntax highlighting to standard Python-Markdown code blocks. 

5 

6# See https://Python-Markdown.github.io/extensions/code_hilite 

7# for documentation. 

8 

9# Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). 

10 

11# All changes Copyright 2008-2014 The Python Markdown Project 

12 

13# License: [BSD](https://opensource.org/licenses/bsd-license.php) 

14 

15""" 

16Adds code/syntax highlighting to standard Python-Markdown code blocks. 

17 

18See the [documentation](https://Python-Markdown.github.io/extensions/code_hilite) 

19for details. 

20""" 

21 

22from __future__ import annotations 

23 

24from . import Extension 

25from ..treeprocessors import Treeprocessor 

26from ..util import parseBoolValue 

27from typing import TYPE_CHECKING, Callable, Any 

28 

29if TYPE_CHECKING: # pragma: no cover 

30 import xml.etree.ElementTree as etree 

31 

32try: # pragma: no cover 

33 from pygments import highlight 

34 from pygments.lexers import get_lexer_by_name, guess_lexer 

35 from pygments.formatters import get_formatter_by_name 

36 from pygments.util import ClassNotFound 

37 pygments = True 

38except ImportError: # pragma: no cover 

39 pygments = False 

40 

41 

42def parse_hl_lines(expr: str) -> list[int]: 

43 """Support our syntax for emphasizing certain lines of code. 

44 

45 `expr` should be like '1 2' to emphasize lines 1 and 2 of a code block. 

46 Returns a list of integers, the line numbers to emphasize. 

47 """ 

48 if not expr: 

49 return [] 

50 

51 try: 

52 return list(map(int, expr.split())) 

53 except ValueError: # pragma: no cover 

54 return [] 

55 

56 

57# ------------------ The Main CodeHilite Class ---------------------- 

58class CodeHilite: 

59 """ 

60 Determine language of source code, and pass it on to the Pygments highlighter. 

61 

62 Usage: 

63 

64 ```python 

65 code = CodeHilite(src=some_code, lang='python') 

66 html = code.hilite() 

67 ``` 

68 

69 Arguments: 

70 src: Source string or any object with a `.readline` attribute. 

71 

72 Keyword arguments: 

73 lang (str): String name of Pygments lexer to use for highlighting. Default: `None`. 

74 guess_lang (bool): Auto-detect which lexer to use. 

75 Ignored if `lang` is set to a valid value. Default: `True`. 

76 use_pygments (bool): Pass code to Pygments for code highlighting. If `False`, the code is 

77 instead wrapped for highlighting by a JavaScript library. Default: `True`. 

78 pygments_formatter (str): The name of a Pygments formatter or a formatter class used for 

79 highlighting the code blocks. Default: `html`. 

80 linenums (bool): An alias to Pygments `linenos` formatter option. Default: `None`. 

81 css_class (str): An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. 

82 lang_prefix (str): Prefix prepended to the language. Default: "language-". 

83 

84 Other Options: 

85 

86 Any other options are accepted and passed on to the lexer and formatter. Therefore, 

87 valid options include any options which are accepted by the `html` formatter or 

88 whichever lexer the code's language uses. Note that most lexers do not have any 

89 options. However, a few have very useful options, such as PHP's `startinline` option. 

90 Any invalid options are ignored without error. 

91 

92 * **Formatter options**: <https://pygments.org/docs/formatters/#HtmlFormatter> 

93 * **Lexer Options**: <https://pygments.org/docs/lexers/> 

94 

95 Additionally, when Pygments is enabled, the code's language is passed to the 

96 formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`. 

97 This option has no effect to the Pygments' builtin formatters. 

98 

99 Advanced Usage: 

100 

101 ```python 

102 code = CodeHilite( 

103 src = some_code, 

104 lang = 'php', 

105 startinline = True, # Lexer option. Snippet does not start with `<?php`. 

106 linenostart = 42, # Formatter option. Snippet starts on line 42. 

107 hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50. 

108 linenos = 'inline' # Formatter option. Avoid alignment problems. 

109 ) 

110 html = code.hilite() 

111 ``` 

112 

113 """ 

114 

115 def __init__(self, src: str, **options): 

116 self.src = src 

117 self.lang: str | None = options.pop('lang', None) 

118 self.guess_lang: bool = options.pop('guess_lang', True) 

119 self.use_pygments: bool = options.pop('use_pygments', True) 

120 self.lang_prefix: str = options.pop('lang_prefix', 'language-') 

121 self.pygments_formatter: str | Callable = options.pop('pygments_formatter', 'html') 

122 

123 if 'linenos' not in options: 

124 options['linenos'] = options.pop('linenums', None) 

125 if 'cssclass' not in options: 

126 options['cssclass'] = options.pop('css_class', 'codehilite') 

127 if 'wrapcode' not in options: 

128 # Override Pygments default 

129 options['wrapcode'] = True 

130 # Disallow use of `full` option 

131 options['full'] = False 

132 

133 self.options = options 

134 

135 def hilite(self, shebang: bool = True) -> str: 

136 """ 

137 Pass code to the [Pygments](https://pygments.org/) highlighter with 

138 optional line numbers. The output should then be styled with CSS to 

139 your liking. No styles are applied by default - only styling hooks 

140 (i.e.: `<span class="k">`). 

141 

142 returns : A string of html. 

143 

144 """ 

145 

146 self.src = self.src.strip('\n') 

147 

148 if self.lang is None and shebang: 

149 self._parseHeader() 

150 

151 if pygments and self.use_pygments: 

152 try: 

153 lexer = get_lexer_by_name(self.lang, **self.options) 

154 except ValueError: 

155 try: 

156 if self.guess_lang: 

157 lexer = guess_lexer(self.src, **self.options) 

158 else: 

159 lexer = get_lexer_by_name('text', **self.options) 

160 except ValueError: # pragma: no cover 

161 lexer = get_lexer_by_name('text', **self.options) 

162 if not self.lang: 

163 # Use the guessed lexer's language instead 

164 self.lang = lexer.aliases[0] 

165 lang_str = f'{self.lang_prefix}{self.lang}' 

166 if isinstance(self.pygments_formatter, str): 

167 try: 

168 formatter = get_formatter_by_name(self.pygments_formatter, **self.options) 

169 except ClassNotFound: 

170 formatter = get_formatter_by_name('html', **self.options) 

171 else: 

172 formatter = self.pygments_formatter(lang_str=lang_str, **self.options) 

173 return highlight(self.src, lexer, formatter) 

174 else: 

175 # just escape and build markup usable by JavaScript highlighting libraries 

176 txt = self.src.replace('&', '&amp;') 

177 txt = txt.replace('<', '&lt;') 

178 txt = txt.replace('>', '&gt;') 

179 txt = txt.replace('"', '&quot;') 

180 classes = [] 

181 if self.lang: 

182 classes.append('{}{}'.format(self.lang_prefix, self.lang)) 

183 if self.options['linenos']: 

184 classes.append('linenums') 

185 class_str = '' 

186 if classes: 

187 class_str = ' class="{}"'.format(' '.join(classes)) 

188 return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format( 

189 self.options['cssclass'], 

190 class_str, 

191 txt 

192 ) 

193 

194 def _parseHeader(self) -> None: 

195 """ 

196 Determines language of a code block from shebang line and whether the 

197 said line should be removed or left in place. If the shebang line 

198 contains a path (even a single /) then it is assumed to be a real 

199 shebang line and left alone. However, if no path is given 

200 (e.i.: `#!python` or `:::python`) then it is assumed to be a mock shebang 

201 for language identification of a code fragment and removed from the 

202 code block prior to processing for code highlighting. When a mock 

203 shebang (e.i: `#!python`) is found, line numbering is turned on. When 

204 colons are found in place of a shebang (e.i.: `:::python`), line 

205 numbering is left in the current state - off by default. 

206 

207 Also parses optional list of highlight lines, like: 

208 

209 :::python hl_lines="1 3" 

210 """ 

211 

212 import re 

213 

214 # split text into lines 

215 lines = self.src.split("\n") 

216 # pull first line to examine 

217 fl = lines.pop(0) 

218 

219 c = re.compile(r''' 

220 (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons 

221 (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path 

222 (?P<lang>[\w#.+-]*) # The language 

223 \s* # Arbitrary whitespace 

224 # Optional highlight lines, single- or double-quote-delimited 

225 (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? 

226 ''', re.VERBOSE) 

227 # search first line for shebang 

228 m = c.search(fl) 

229 if m: 

230 # we have a match 

231 try: 

232 self.lang = m.group('lang').lower() 

233 except IndexError: # pragma: no cover 

234 self.lang = None 

235 if m.group('path'): 

236 # path exists - restore first line 

237 lines.insert(0, fl) 

238 if self.options['linenos'] is None and m.group('shebang'): 

239 # Overridable and Shebang exists - use line numbers 

240 self.options['linenos'] = True 

241 

242 self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines')) 

243 else: 

244 # No match 

245 lines.insert(0, fl) 

246 

247 self.src = "\n".join(lines).strip("\n") 

248 

249 

250# ------------------ The Markdown Extension ------------------------------- 

251 

252 

253class HiliteTreeprocessor(Treeprocessor): 

254 """ Highlight source code in code blocks. """ 

255 

256 config: dict[str, Any] 

257 

258 def code_unescape(self, text: str) -> str: 

259 """Unescape code.""" 

260 text = text.replace("&lt;", "<") 

261 text = text.replace("&gt;", ">") 

262 # Escaped '&' should be replaced at the end to avoid 

263 # conflicting with < and >. 

264 text = text.replace("&amp;", "&") 

265 return text 

266 

267 def run(self, root: etree.Element) -> None: 

268 """ Find code blocks and store in `htmlStash`. """ 

269 blocks = root.iter('pre') 

270 for block in blocks: 

271 if len(block) == 1 and block[0].tag == 'code': 

272 local_config = self.config.copy() 

273 text = block[0].text 

274 if text is None: 

275 continue 

276 code = CodeHilite( 

277 self.code_unescape(text), 

278 tab_length=self.md.tab_length, 

279 style=local_config.pop('pygments_style', 'default'), 

280 **local_config 

281 ) 

282 placeholder = self.md.htmlStash.store(code.hilite()) 

283 # Clear code block in `etree` instance 

284 block.clear() 

285 # Change to `p` element which will later 

286 # be removed when inserting raw html 

287 block.tag = 'p' 

288 block.text = placeholder 

289 

290 

291class CodeHiliteExtension(Extension): 

292 """ Add source code highlighting to markdown code blocks. """ 

293 

294 def __init__(self, **kwargs): 

295 # define default configs 

296 self.config = { 

297 'linenums': [ 

298 None, "Use lines numbers. True|table|inline=yes, False=no, None=auto. Default: `None`." 

299 ], 

300 'guess_lang': [ 

301 True, "Automatic language detection - Default: `True`." 

302 ], 

303 'css_class': [ 

304 "codehilite", "Set class name for wrapper <div> - Default: `codehilite`." 

305 ], 

306 'pygments_style': [ 

307 'default', 'Pygments HTML Formatter Style (Colorscheme). Default: `default`.' 

308 ], 

309 'noclasses': [ 

310 False, 'Use inline styles instead of CSS classes - Default `False`.' 

311 ], 

312 'use_pygments': [ 

313 True, 'Highlight code blocks with pygments. Disable if using a JavaScript library. Default: `True`.' 

314 ], 

315 'lang_prefix': [ 

316 'language-', 'Prefix prepended to the language when `use_pygments` is false. Default: `language-`.' 

317 ], 

318 'pygments_formatter': [ 

319 'html', 'Use a specific formatter for Pygments highlighting. Default: `html`.' 

320 ], 

321 } 

322 """ Default configuration options. """ 

323 

324 for key, value in kwargs.items(): 

325 if key in self.config: 

326 self.setConfig(key, value) 

327 else: 

328 # manually set unknown keywords. 

329 if isinstance(value, str): 

330 try: 

331 # Attempt to parse `str` as a boolean value 

332 value = parseBoolValue(value, preserve_none=True) 

333 except ValueError: 

334 pass # Assume it's not a boolean value. Use as-is. 

335 self.config[key] = [value, ''] 

336 

337 def extendMarkdown(self, md): 

338 """ Add `HilitePostprocessor` to Markdown instance. """ 

339 hiliter = HiliteTreeprocessor(md) 

340 hiliter.config = self.getConfigs() 

341 md.treeprocessors.register(hiliter, 'hilite', 30) 

342 

343 md.registerExtension(self) 

344 

345 

346def makeExtension(**kwargs): # pragma: no cover 

347 return CodeHiliteExtension(**kwargs)