Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/formatters/latex.py: 55%

211 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:45 +0000

1""" 

2 pygments.formatters.latex 

3 ~~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Formatter for LaTeX fancyvrb output. 

6 

7 :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11from io import StringIO 

12 

13from pygments.formatter import Formatter 

14from pygments.lexer import Lexer, do_insertions 

15from pygments.token import Token, STANDARD_TYPES 

16from pygments.util import get_bool_opt, get_int_opt 

17 

18 

19__all__ = ['LatexFormatter'] 

20 

21 

22def escape_tex(text, commandprefix): 

23 return text.replace('\\', '\x00'). \ 

24 replace('{', '\x01'). \ 

25 replace('}', '\x02'). \ 

26 replace('\x00', r'\%sZbs{}' % commandprefix). \ 

27 replace('\x01', r'\%sZob{}' % commandprefix). \ 

28 replace('\x02', r'\%sZcb{}' % commandprefix). \ 

29 replace('^', r'\%sZca{}' % commandprefix). \ 

30 replace('_', r'\%sZus{}' % commandprefix). \ 

31 replace('&', r'\%sZam{}' % commandprefix). \ 

32 replace('<', r'\%sZlt{}' % commandprefix). \ 

33 replace('>', r'\%sZgt{}' % commandprefix). \ 

34 replace('#', r'\%sZsh{}' % commandprefix). \ 

35 replace('%', r'\%sZpc{}' % commandprefix). \ 

36 replace('$', r'\%sZdl{}' % commandprefix). \ 

37 replace('-', r'\%sZhy{}' % commandprefix). \ 

38 replace("'", r'\%sZsq{}' % commandprefix). \ 

39 replace('"', r'\%sZdq{}' % commandprefix). \ 

40 replace('~', r'\%sZti{}' % commandprefix) 

41 

42 

43DOC_TEMPLATE = r''' 

44\documentclass{%(docclass)s} 

45\usepackage{fancyvrb} 

46\usepackage{color} 

47\usepackage[%(encoding)s]{inputenc} 

48%(preamble)s 

49 

50%(styledefs)s 

51 

52\begin{document} 

53 

54\section*{%(title)s} 

55 

56%(code)s 

57\end{document} 

58''' 

59 

60## Small explanation of the mess below :) 

61# 

62# The previous version of the LaTeX formatter just assigned a command to 

63# each token type defined in the current style. That obviously is 

64# problematic if the highlighted code is produced for a different style 

65# than the style commands themselves. 

66# 

67# This version works much like the HTML formatter which assigns multiple 

68# CSS classes to each <span> tag, from the most specific to the least 

69# specific token type, thus falling back to the parent token type if one 

70# is not defined. Here, the classes are there too and use the same short 

71# forms given in token.STANDARD_TYPES. 

72# 

73# Highlighted code now only uses one custom command, which by default is 

74# \PY and selectable by the commandprefix option (and in addition the 

75# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for 

76# backwards compatibility purposes). 

77# 

78# \PY has two arguments: the classes, separated by +, and the text to 

79# render in that style. The classes are resolved into the respective 

80# style commands by magic, which serves to ignore unknown classes. 

81# 

82# The magic macros are: 

83# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text 

84# to render in \PY@do. Their definition determines the style. 

85# * \PY@reset resets \PY@it etc. to do nothing. 

86# * \PY@toks parses the list of classes, using magic inspired by the 

87# keyval package (but modified to use plusses instead of commas 

88# because fancyvrb redefines commas inside its environments). 

89# * \PY@tok processes one class, calling the \PY@tok@classname command 

90# if it exists. 

91# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style 

92# for its class. 

93# * \PY resets the style, parses the classnames and then calls \PY@do. 

94# 

95# Tip: to read this code, print it out in substituted form using e.g. 

96# >>> print STYLE_TEMPLATE % {'cp': 'PY'} 

97 

98STYLE_TEMPLATE = r''' 

99\makeatletter 

100\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% 

101 \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% 

102 \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} 

103\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} 

104\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% 

105 \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} 

106\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% 

107 \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} 

108\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} 

109 

110%(styles)s 

111 

112\def\%(cp)sZbs{\char`\\} 

113\def\%(cp)sZus{\char`\_} 

114\def\%(cp)sZob{\char`\{} 

115\def\%(cp)sZcb{\char`\}} 

116\def\%(cp)sZca{\char`\^} 

117\def\%(cp)sZam{\char`\&} 

118\def\%(cp)sZlt{\char`\<} 

119\def\%(cp)sZgt{\char`\>} 

120\def\%(cp)sZsh{\char`\#} 

121\def\%(cp)sZpc{\char`\%%} 

122\def\%(cp)sZdl{\char`\$} 

123\def\%(cp)sZhy{\char`\-} 

124\def\%(cp)sZsq{\char`\'} 

125\def\%(cp)sZdq{\char`\"} 

126\def\%(cp)sZti{\char`\~} 

127%% for compatibility with earlier versions 

128\def\%(cp)sZat{@} 

129\def\%(cp)sZlb{[} 

130\def\%(cp)sZrb{]} 

131\makeatother 

132''' 

133 

134 

135def _get_ttype_name(ttype): 

136 fname = STANDARD_TYPES.get(ttype) 

137 if fname: 

138 return fname 

139 aname = '' 

140 while fname is None: 

141 aname = ttype[-1] + aname 

142 ttype = ttype.parent 

143 fname = STANDARD_TYPES.get(ttype) 

144 return fname + aname 

145 

146 

147class LatexFormatter(Formatter): 

148 r""" 

149 Format tokens as LaTeX code. This needs the `fancyvrb` and `color` 

150 standard packages. 

151 

152 Without the `full` option, code is formatted as one ``Verbatim`` 

153 environment, like this: 

154 

155 .. sourcecode:: latex 

156 

157 \begin{Verbatim}[commandchars=\\\{\}] 

158 \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): 

159 \PY{k}{pass} 

160 \end{Verbatim} 

161 

162 Wrapping can be disabled using the `nowrap` option. 

163 

164 The special command used here (``\PY``) and all the other macros it needs 

165 are output by the `get_style_defs` method. 

166 

167 With the `full` option, a complete LaTeX document is output, including 

168 the command definitions in the preamble. 

169 

170 The `get_style_defs()` method of a `LatexFormatter` returns a string 

171 containing ``\def`` commands defining the macros needed inside the 

172 ``Verbatim`` environments. 

173 

174 Additional options accepted: 

175 

176 `nowrap` 

177 If set to ``True``, don't wrap the tokens at all, not even inside a 

178 ``\begin{Verbatim}`` environment. This disables most other options 

179 (default: ``False``). 

180 

181 `style` 

182 The style to use, can be a string or a Style subclass (default: 

183 ``'default'``). 

184 

185 `full` 

186 Tells the formatter to output a "full" document, i.e. a complete 

187 self-contained document (default: ``False``). 

188 

189 `title` 

190 If `full` is true, the title that should be used to caption the 

191 document (default: ``''``). 

192 

193 `docclass` 

194 If the `full` option is enabled, this is the document class to use 

195 (default: ``'article'``). 

196 

197 `preamble` 

198 If the `full` option is enabled, this can be further preamble commands, 

199 e.g. ``\usepackage`` (default: ``''``). 

200 

201 `linenos` 

202 If set to ``True``, output line numbers (default: ``False``). 

203 

204 `linenostart` 

205 The line number for the first line (default: ``1``). 

206 

207 `linenostep` 

208 If set to a number n > 1, only every nth line number is printed. 

209 

210 `verboptions` 

211 Additional options given to the Verbatim environment (see the *fancyvrb* 

212 docs for possible values) (default: ``''``). 

213 

214 `commandprefix` 

215 The LaTeX commands used to produce colored output are constructed 

216 using this prefix and some letters (default: ``'PY'``). 

217 

218 .. versionadded:: 0.7 

219 .. versionchanged:: 0.10 

220 The default is now ``'PY'`` instead of ``'C'``. 

221 

222 `texcomments` 

223 If set to ``True``, enables LaTeX comment lines. That is, LaTex markup 

224 in comment tokens is not escaped so that LaTeX can render it (default: 

225 ``False``). 

226 

227 .. versionadded:: 1.2 

228 

229 `mathescape` 

230 If set to ``True``, enables LaTeX math mode escape in comments. That 

231 is, ``'$...$'`` inside a comment will trigger math mode (default: 

232 ``False``). 

233 

234 .. versionadded:: 1.2 

235 

236 `escapeinside` 

237 If set to a string of length 2, enables escaping to LaTeX. Text 

238 delimited by these 2 characters is read as LaTeX code and 

239 typeset accordingly. It has no effect in string literals. It has 

240 no effect in comments if `texcomments` or `mathescape` is 

241 set. (default: ``''``). 

242 

243 .. versionadded:: 2.0 

244 

245 `envname` 

246 Allows you to pick an alternative environment name replacing Verbatim. 

247 The alternate environment still has to support Verbatim's option syntax. 

248 (default: ``'Verbatim'``). 

249 

250 .. versionadded:: 2.0 

251 """ 

252 name = 'LaTeX' 

253 aliases = ['latex', 'tex'] 

254 filenames = ['*.tex'] 

255 

256 def __init__(self, **options): 

257 Formatter.__init__(self, **options) 

258 self.nowrap = get_bool_opt(options, 'nowrap', False) 

259 self.docclass = options.get('docclass', 'article') 

260 self.preamble = options.get('preamble', '') 

261 self.linenos = get_bool_opt(options, 'linenos', False) 

262 self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) 

263 self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) 

264 self.verboptions = options.get('verboptions', '') 

265 self.nobackground = get_bool_opt(options, 'nobackground', False) 

266 self.commandprefix = options.get('commandprefix', 'PY') 

267 self.texcomments = get_bool_opt(options, 'texcomments', False) 

268 self.mathescape = get_bool_opt(options, 'mathescape', False) 

269 self.escapeinside = options.get('escapeinside', '') 

270 if len(self.escapeinside) == 2: 

271 self.left = self.escapeinside[0] 

272 self.right = self.escapeinside[1] 

273 else: 

274 self.escapeinside = '' 

275 self.envname = options.get('envname', 'Verbatim') 

276 

277 self._create_stylesheet() 

278 

279 def _create_stylesheet(self): 

280 t2n = self.ttype2name = {Token: ''} 

281 c2d = self.cmd2def = {} 

282 cp = self.commandprefix 

283 

284 def rgbcolor(col): 

285 if col: 

286 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0) 

287 for i in (0, 2, 4)]) 

288 else: 

289 return '1,1,1' 

290 

291 for ttype, ndef in self.style: 

292 name = _get_ttype_name(ttype) 

293 cmndef = '' 

294 if ndef['bold']: 

295 cmndef += r'\let\$$@bf=\textbf' 

296 if ndef['italic']: 

297 cmndef += r'\let\$$@it=\textit' 

298 if ndef['underline']: 

299 cmndef += r'\let\$$@ul=\underline' 

300 if ndef['roman']: 

301 cmndef += r'\let\$$@ff=\textrm' 

302 if ndef['sans']: 

303 cmndef += r'\let\$$@ff=\textsf' 

304 if ndef['mono']: 

305 cmndef += r'\let\$$@ff=\textsf' 

306 if ndef['color']: 

307 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % 

308 rgbcolor(ndef['color'])) 

309 if ndef['border']: 

310 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}' 

311 r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' % 

312 (rgbcolor(ndef['border']), 

313 rgbcolor(ndef['bgcolor']))) 

314 elif ndef['bgcolor']: 

315 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}' 

316 r'\colorbox[rgb]{%s}{\strut ##1}}}' % 

317 rgbcolor(ndef['bgcolor'])) 

318 if cmndef == '': 

319 continue 

320 cmndef = cmndef.replace('$$', cp) 

321 t2n[ttype] = name 

322 c2d[name] = cmndef 

323 

324 def get_style_defs(self, arg=''): 

325 """ 

326 Return the command sequences needed to define the commands 

327 used to format text in the verbatim environment. ``arg`` is ignored. 

328 """ 

329 cp = self.commandprefix 

330 styles = [] 

331 for name, definition in self.cmd2def.items(): 

332 styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition)) 

333 return STYLE_TEMPLATE % {'cp': self.commandprefix, 

334 'styles': '\n'.join(styles)} 

335 

336 def format_unencoded(self, tokensource, outfile): 

337 # TODO: add support for background colors 

338 t2n = self.ttype2name 

339 cp = self.commandprefix 

340 

341 if self.full: 

342 realoutfile = outfile 

343 outfile = StringIO() 

344 

345 if not self.nowrap: 

346 outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}') 

347 if self.linenos: 

348 start, step = self.linenostart, self.linenostep 

349 outfile.write(',numbers=left' + 

350 (start and ',firstnumber=%d' % start or '') + 

351 (step and ',stepnumber=%d' % step or '')) 

352 if self.mathescape or self.texcomments or self.escapeinside: 

353 outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7' 

354 '\\catcode`\\_=8\\relax}') 

355 if self.verboptions: 

356 outfile.write(',' + self.verboptions) 

357 outfile.write(']\n') 

358 

359 for ttype, value in tokensource: 

360 if ttype in Token.Comment: 

361 if self.texcomments: 

362 # Try to guess comment starting lexeme and escape it ... 

363 start = value[0:1] 

364 for i in range(1, len(value)): 

365 if start[0] != value[i]: 

366 break 

367 start += value[i] 

368 

369 value = value[len(start):] 

370 start = escape_tex(start, cp) 

371 

372 # ... but do not escape inside comment. 

373 value = start + value 

374 elif self.mathescape: 

375 # Only escape parts not inside a math environment. 

376 parts = value.split('$') 

377 in_math = False 

378 for i, part in enumerate(parts): 

379 if not in_math: 

380 parts[i] = escape_tex(part, cp) 

381 in_math = not in_math 

382 value = '$'.join(parts) 

383 elif self.escapeinside: 

384 text = value 

385 value = '' 

386 while text: 

387 a, sep1, text = text.partition(self.left) 

388 if sep1: 

389 b, sep2, text = text.partition(self.right) 

390 if sep2: 

391 value += escape_tex(a, cp) + b 

392 else: 

393 value += escape_tex(a + sep1 + b, cp) 

394 else: 

395 value += escape_tex(a, cp) 

396 else: 

397 value = escape_tex(value, cp) 

398 elif ttype not in Token.Escape: 

399 value = escape_tex(value, cp) 

400 styles = [] 

401 while ttype is not Token: 

402 try: 

403 styles.append(t2n[ttype]) 

404 except KeyError: 

405 # not in current style 

406 styles.append(_get_ttype_name(ttype)) 

407 ttype = ttype.parent 

408 styleval = '+'.join(reversed(styles)) 

409 if styleval: 

410 spl = value.split('\n') 

411 for line in spl[:-1]: 

412 if line: 

413 outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) 

414 outfile.write('\n') 

415 if spl[-1]: 

416 outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) 

417 else: 

418 outfile.write(value) 

419 

420 if not self.nowrap: 

421 outfile.write('\\end{' + self.envname + '}\n') 

422 

423 if self.full: 

424 encoding = self.encoding or 'utf8' 

425 # map known existings encodings from LaTeX distribution 

426 encoding = { 

427 'utf_8': 'utf8', 

428 'latin_1': 'latin1', 

429 'iso_8859_1': 'latin1', 

430 }.get(encoding.replace('-', '_'), encoding) 

431 realoutfile.write(DOC_TEMPLATE % 

432 dict(docclass = self.docclass, 

433 preamble = self.preamble, 

434 title = self.title, 

435 encoding = encoding, 

436 styledefs = self.get_style_defs(), 

437 code = outfile.getvalue())) 

438 

439 

440class LatexEmbeddedLexer(Lexer): 

441 """ 

442 This lexer takes one lexer as argument, the lexer for the language 

443 being formatted, and the left and right delimiters for escaped text. 

444 

445 First everything is scanned using the language lexer to obtain 

446 strings and comments. All other consecutive tokens are merged and 

447 the resulting text is scanned for escaped segments, which are given 

448 the Token.Escape type. Finally text that is not escaped is scanned 

449 again with the language lexer. 

450 """ 

451 def __init__(self, left, right, lang, **options): 

452 self.left = left 

453 self.right = right 

454 self.lang = lang 

455 Lexer.__init__(self, **options) 

456 

457 def get_tokens_unprocessed(self, text): 

458 # find and remove all the escape tokens (replace with an empty string) 

459 # this is very similar to DelegatingLexer.get_tokens_unprocessed. 

460 buffered = '' 

461 insertions = [] 

462 insertion_buf = [] 

463 for i, t, v in self._find_safe_escape_tokens(text): 

464 if t is None: 

465 if insertion_buf: 

466 insertions.append((len(buffered), insertion_buf)) 

467 insertion_buf = [] 

468 buffered += v 

469 else: 

470 insertion_buf.append((i, t, v)) 

471 if insertion_buf: 

472 insertions.append((len(buffered), insertion_buf)) 

473 return do_insertions(insertions, 

474 self.lang.get_tokens_unprocessed(buffered)) 

475 

476 def _find_safe_escape_tokens(self, text): 

477 """ find escape tokens that are not in strings or comments """ 

478 for i, t, v in self._filter_to( 

479 self.lang.get_tokens_unprocessed(text), 

480 lambda t: t in Token.Comment or t in Token.String 

481 ): 

482 if t is None: 

483 for i2, t2, v2 in self._find_escape_tokens(v): 

484 yield i + i2, t2, v2 

485 else: 

486 yield i, None, v 

487 

488 def _filter_to(self, it, pred): 

489 """ Keep only the tokens that match `pred`, merge the others together """ 

490 buf = '' 

491 idx = 0 

492 for i, t, v in it: 

493 if pred(t): 

494 if buf: 

495 yield idx, None, buf 

496 buf = '' 

497 yield i, t, v 

498 else: 

499 if not buf: 

500 idx = i 

501 buf += v 

502 if buf: 

503 yield idx, None, buf 

504 

505 def _find_escape_tokens(self, text): 

506 """ Find escape tokens within text, give token=None otherwise """ 

507 index = 0 

508 while text: 

509 a, sep1, text = text.partition(self.left) 

510 if a: 

511 yield index, None, a 

512 index += len(a) 

513 if sep1: 

514 b, sep2, text = text.partition(self.right) 

515 if sep2: 

516 yield index + len(sep1), Token.Escape, b 

517 index += len(sep1) + len(b) + len(sep2) 

518 else: 

519 yield index, Token.Error, sep1 

520 index += len(sep1) 

521 text = b