Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/formatters/latex.py: 55%

1"""

2 pygments.formatters.latex

3 ~~~~~~~~~~~~~~~~~~~~~~~~~

5 Formatter for LaTeX fancyvrb output.

8 :license: BSD, see LICENSE for details.

9"""

11from io import StringIO

13from pygments.formatter import Formatter

14from pygments.lexer import Lexer, do_insertions

15from pygments.token import Token, STANDARD_TYPES

16from pygments.util import get_bool_opt, get_int_opt

19__all__ = ['LatexFormatter']

22def escape_tex(text, commandprefix):

23 return text.replace('\\', '\x00'). \

24 replace('{', '\x01'). \

25 replace('}', '\x02'). \

26 replace('\x00', r'\%sZbs{}' % commandprefix). \

27 replace('\x01', r'\%sZob{}' % commandprefix). \

28 replace('\x02', r'\%sZcb{}' % commandprefix). \

29 replace('^', r'\%sZca{}' % commandprefix). \

30 replace('_', r'\%sZus{}' % commandprefix). \

31 replace('&', r'\%sZam{}' % commandprefix). \

32 replace('<', r'\%sZlt{}' % commandprefix). \

33 replace('>', r'\%sZgt{}' % commandprefix). \

34 replace('#', r'\%sZsh{}' % commandprefix). \

35 replace('%', r'\%sZpc{}' % commandprefix). \

36 replace('$', r'\%sZdl{}' % commandprefix). \

37 replace('-', r'\%sZhy{}' % commandprefix). \

38 replace("'", r'\%sZsq{}' % commandprefix). \

39 replace('"', r'\%sZdq{}' % commandprefix). \

40 replace('~', r'\%sZti{}' % commandprefix)

43DOC_TEMPLATE = r'''

44\documentclass{%(docclass)s}

45\usepackage{fancyvrb}

46\usepackage{color}

47\usepackage[%(encoding)s]{inputenc}

48%(preamble)s

50%(styledefs)s

52\begin{document}

54\section*{%(title)s}

56%(code)s

57\end{document}

58'''

60## Small explanation of the mess below :)

61#

62# The previous version of the LaTeX formatter just assigned a command to

63# each token type defined in the current style. That obviously is

64# problematic if the highlighted code is produced for a different style

65# than the style commands themselves.

66#

67# This version works much like the HTML formatter which assigns multiple

68# CSS classes to each <span> tag, from the most specific to the least

69# specific token type, thus falling back to the parent token type if one

70# is not defined. Here, the classes are there too and use the same short

71# forms given in token.STANDARD_TYPES.

72#

73# Highlighted code now only uses one custom command, which by default is

74# \PY and selectable by the commandprefix option (and in addition the

75# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for

76# backwards compatibility purposes).

77#

78# \PY has two arguments: the classes, separated by +, and the text to

79# render in that style. The classes are resolved into the respective

80# style commands by magic, which serves to ignore unknown classes.

81#

82# The magic macros are:

83# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text

84# to render in \PY@do. Their definition determines the style.

85# * \PY@reset resets \PY@it etc. to do nothing.

86# * \PY@toks parses the list of classes, using magic inspired by the

87# keyval package (but modified to use plusses instead of commas

88# because fancyvrb redefines commas inside its environments).

89# * \PY@tok processes one class, calling the \PY@tok@classname command

90# if it exists.

91# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style

92# for its class.

93# * \PY resets the style, parses the classnames and then calls \PY@do.

94#

95# Tip: to read this code, print it out in substituted form using e.g.

96# >>> print STYLE_TEMPLATE % {'cp': 'PY'}

98STYLE_TEMPLATE = r'''

99\makeatletter

100\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%

101 \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%

102 \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}

103\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}

104\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%

105 \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}

106\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%

107 \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}

108\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}

109

110%(styles)s

111

112\def\%(cp)sZbs{\char`\\}

113\def\%(cp)sZus{\char`\_}

114\def\%(cp)sZob{\char`\{}

115\def\%(cp)sZcb{\char`\}}

116\def\%(cp)sZca{\char`\^}

117\def\%(cp)sZam{\char`\&}

118\def\%(cp)sZlt{\char`\<}

119\def\%(cp)sZgt{\char`\>}

120\def\%(cp)sZsh{\char`\#}

121\def\%(cp)sZpc{\char`\%%}

122\def\%(cp)sZdl{\char`\$}

123\def\%(cp)sZhy{\char`\-}

124\def\%(cp)sZsq{\char`\'}

125\def\%(cp)sZdq{\char`\"}

126\def\%(cp)sZti{\char`\~}

127%% for compatibility with earlier versions

128\def\%(cp)sZat{@}

129\def\%(cp)sZlb{[}

130\def\%(cp)sZrb{]}

131\makeatother

132'''

133

134

135def _get_ttype_name(ttype):

136 fname = STANDARD_TYPES.get(ttype)

137 if fname:

138 return fname

139 aname = ''

140 while fname is None:

141 aname = ttype[-1] + aname

142 ttype = ttype.parent

143 fname = STANDARD_TYPES.get(ttype)

144 return fname + aname

145

146

147class LatexFormatter(Formatter):

148 r"""

149 Format tokens as LaTeX code. This needs the `fancyvrb` and `color`

150 standard packages.

151

152 Without the `full` option, code is formatted as one ``Verbatim``

153 environment, like this:

154

155 .. sourcecode:: latex

156

157 \begin{Verbatim}[commandchars=\\\{\}]

158 \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):

159 \PY{k}{pass}

160 \end{Verbatim}

161

162 Wrapping can be disabled using the `nowrap` option.

163

164 The special command used here (``\PY``) and all the other macros it needs

165 are output by the `get_style_defs` method.

166

167 With the `full` option, a complete LaTeX document is output, including

168 the command definitions in the preamble.

169

170 The `get_style_defs()` method of a `LatexFormatter` returns a string

171 containing ``\def`` commands defining the macros needed inside the

172 ``Verbatim`` environments.

173

174 Additional options accepted:

175

176 `nowrap`

177 If set to ``True``, don't wrap the tokens at all, not even inside a

178 ``\begin{Verbatim}`` environment. This disables most other options

179 (default: ``False``).

180

181 `style`

182 The style to use, can be a string or a Style subclass (default:

183 ``'default'``).

184

185 `full`

186 Tells the formatter to output a "full" document, i.e. a complete

187 self-contained document (default: ``False``).

188

189 `title`

190 If `full` is true, the title that should be used to caption the

191 document (default: ``''``).

192

193 `docclass`

194 If the `full` option is enabled, this is the document class to use

195 (default: ``'article'``).

196

197 `preamble`

198 If the `full` option is enabled, this can be further preamble commands,

199 e.g. ``\usepackage`` (default: ``''``).

200

201 `linenos`

202 If set to ``True``, output line numbers (default: ``False``).

203

204 `linenostart`

205 The line number for the first line (default: ``1``).

206

207 `linenostep`

208 If set to a number n > 1, only every nth line number is printed.

209

210 `verboptions`

211 Additional options given to the Verbatim environment (see the *fancyvrb*

212 docs for possible values) (default: ``''``).

213

214 `commandprefix`

215 The LaTeX commands used to produce colored output are constructed

216 using this prefix and some letters (default: ``'PY'``).

217

218 .. versionadded:: 0.7

219 .. versionchanged:: 0.10

220 The default is now ``'PY'`` instead of ``'C'``.

221

222 `texcomments`

223 If set to ``True``, enables LaTeX comment lines. That is, LaTex markup

224 in comment tokens is not escaped so that LaTeX can render it (default:

225 ``False``).

226

227 .. versionadded:: 1.2

228

229 `mathescape`

230 If set to ``True``, enables LaTeX math mode escape in comments. That

231 is, ``'$...$'`` inside a comment will trigger math mode (default:

232 ``False``).

233

234 .. versionadded:: 1.2

235

236 `escapeinside`

237 If set to a string of length 2, enables escaping to LaTeX. Text

238 delimited by these 2 characters is read as LaTeX code and

239 typeset accordingly. It has no effect in string literals. It has

240 no effect in comments if `texcomments` or `mathescape` is

241 set. (default: ``''``).

242

243 .. versionadded:: 2.0

244

245 `envname`

246 Allows you to pick an alternative environment name replacing Verbatim.

247 The alternate environment still has to support Verbatim's option syntax.

248 (default: ``'Verbatim'``).

249

250 .. versionadded:: 2.0

251 """

252 name = 'LaTeX'

253 aliases = ['latex', 'tex']

254 filenames = ['*.tex']

255

256 def __init__(self, **options):

257 Formatter.__init__(self, **options)

258 self.nowrap = get_bool_opt(options, 'nowrap', False)

259 self.docclass = options.get('docclass', 'article')

260 self.preamble = options.get('preamble', '')

261 self.linenos = get_bool_opt(options, 'linenos', False)

262 self.linenostart = abs(get_int_opt(options, 'linenostart', 1))

263 self.linenostep = abs(get_int_opt(options, 'linenostep', 1))

264 self.verboptions = options.get('verboptions', '')

265 self.nobackground = get_bool_opt(options, 'nobackground', False)

266 self.commandprefix = options.get('commandprefix', 'PY')

267 self.texcomments = get_bool_opt(options, 'texcomments', False)

268 self.mathescape = get_bool_opt(options, 'mathescape', False)

269 self.escapeinside = options.get('escapeinside', '')

270 if len(self.escapeinside) == 2:

271 self.left = self.escapeinside[0]

272 self.right = self.escapeinside[1]

273 else:

274 self.escapeinside = ''

275 self.envname = options.get('envname', 'Verbatim')

276

277 self._create_stylesheet()

278

279 def _create_stylesheet(self):

280 t2n = self.ttype2name = {Token: ''}

281 c2d = self.cmd2def = {}

282 cp = self.commandprefix

283

284 def rgbcolor(col):

285 if col:

286 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)

287 for i in (0, 2, 4)])

288 else:

289 return '1,1,1'

290

291 for ttype, ndef in self.style:

292 name = _get_ttype_name(ttype)

293 cmndef = ''

294 if ndef['bold']:

295 cmndef += r'\let\$$@bf=\textbf'

296 if ndef['italic']:

297 cmndef += r'\let\$$@it=\textit'

298 if ndef['underline']:

299 cmndef += r'\let\$$@ul=\underline'

300 if ndef['roman']:

301 cmndef += r'\let\$$@ff=\textrm'

302 if ndef['sans']:

303 cmndef += r'\let\$$@ff=\textsf'

304 if ndef['mono']:

305 cmndef += r'\let\$$@ff=\textsf'

306 if ndef['color']:

307 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' %

308 rgbcolor(ndef['color']))

309 if ndef['border']:

310 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}'

311 r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' %

312 (rgbcolor(ndef['border']),

313 rgbcolor(ndef['bgcolor'])))

314 elif ndef['bgcolor']:

315 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}'

316 r'\colorbox[rgb]{%s}{\strut ##1}}}' %

317 rgbcolor(ndef['bgcolor']))

318 if cmndef == '':

319 continue

320 cmndef = cmndef.replace('$$', cp)

321 t2n[ttype] = name

322 c2d[name] = cmndef

323

324 def get_style_defs(self, arg=''):

325 """

326 Return the command sequences needed to define the commands

327 used to format text in the verbatim environment. ``arg`` is ignored.

328 """

329 cp = self.commandprefix

330 styles = []

331 for name, definition in self.cmd2def.items():

332 styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition))

333 return STYLE_TEMPLATE % {'cp': self.commandprefix,

334 'styles': '\n'.join(styles)}

335

336 def format_unencoded(self, tokensource, outfile):

337 # TODO: add support for background colors

338 t2n = self.ttype2name

339 cp = self.commandprefix

340

341 if self.full:

342 realoutfile = outfile

343 outfile = StringIO()

344

345 if not self.nowrap:

346 outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')

347 if self.linenos:

348 start, step = self.linenostart, self.linenostep

349 outfile.write(',numbers=left' +

350 (start and ',firstnumber=%d' % start or '') +

351 (step and ',stepnumber=%d' % step or ''))

352 if self.mathescape or self.texcomments or self.escapeinside:

353 outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7'

354 '\\catcode`\\_=8\\relax}')

355 if self.verboptions:

356 outfile.write(',' + self.verboptions)

357 outfile.write(']\n')

358

359 for ttype, value in tokensource:

360 if ttype in Token.Comment:

361 if self.texcomments:

362 # Try to guess comment starting lexeme and escape it ...

363 start = value[0:1]

364 for i in range(1, len(value)):

365 if start[0] != value[i]:

366 break

367 start += value[i]

368

369 value = value[len(start):]

370 start = escape_tex(start, cp)

371

372 # ... but do not escape inside comment.

373 value = start + value

374 elif self.mathescape:

375 # Only escape parts not inside a math environment.

376 parts = value.split('$')

377 in_math = False

378 for i, part in enumerate(parts):

379 if not in_math:

380 parts[i] = escape_tex(part, cp)

381 in_math = not in_math

382 value = '$'.join(parts)

383 elif self.escapeinside:

384 text = value

385 value = ''

386 while text:

387 a, sep1, text = text.partition(self.left)

388 if sep1:

389 b, sep2, text = text.partition(self.right)

390 if sep2:

391 value += escape_tex(a, cp) + b

392 else:

393 value += escape_tex(a + sep1 + b, cp)

394 else:

395 value += escape_tex(a, cp)

396 else:

397 value = escape_tex(value, cp)

398 elif ttype not in Token.Escape:

399 value = escape_tex(value, cp)

400 styles = []

401 while ttype is not Token:

402 try:

403 styles.append(t2n[ttype])

404 except KeyError:

405 # not in current style

406 styles.append(_get_ttype_name(ttype))

407 ttype = ttype.parent

408 styleval = '+'.join(reversed(styles))

409 if styleval:

410 spl = value.split('\n')

411 for line in spl[:-1]:

412 if line:

413 outfile.write("\\%s{%s}{%s}" % (cp, styleval, line))

414 outfile.write('\n')

415 if spl[-1]:

416 outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1]))

417 else:

418 outfile.write(value)

419

420 if not self.nowrap:

421 outfile.write('\\end{' + self.envname + '}\n')

422

423 if self.full:

424 encoding = self.encoding or 'utf8'

425 # map known existings encodings from LaTeX distribution

426 encoding = {

427 'utf_8': 'utf8',

428 'latin_1': 'latin1',

429 'iso_8859_1': 'latin1',

430 }.get(encoding.replace('-', '_'), encoding)

431 realoutfile.write(DOC_TEMPLATE %

432 dict(docclass = self.docclass,

433 preamble = self.preamble,

434 title = self.title,

435 encoding = encoding,

436 styledefs = self.get_style_defs(),

437 code = outfile.getvalue()))

438

439

440class LatexEmbeddedLexer(Lexer):

441 """

442 This lexer takes one lexer as argument, the lexer for the language

443 being formatted, and the left and right delimiters for escaped text.

444

445 First everything is scanned using the language lexer to obtain

446 strings and comments. All other consecutive tokens are merged and

447 the resulting text is scanned for escaped segments, which are given

448 the Token.Escape type. Finally text that is not escaped is scanned

449 again with the language lexer.

450 """

451 def __init__(self, left, right, lang, **options):

452 self.left = left

453 self.right = right

454 self.lang = lang

455 Lexer.__init__(self, **options)

456

457 def get_tokens_unprocessed(self, text):

458 # find and remove all the escape tokens (replace with an empty string)

459 # this is very similar to DelegatingLexer.get_tokens_unprocessed.

460 buffered = ''

461 insertions = []

462 insertion_buf = []

463 for i, t, v in self._find_safe_escape_tokens(text):

464 if t is None:

465 if insertion_buf:

466 insertions.append((len(buffered), insertion_buf))

467 insertion_buf = []

468 buffered += v

469 else:

470 insertion_buf.append((i, t, v))

471 if insertion_buf:

472 insertions.append((len(buffered), insertion_buf))

473 return do_insertions(insertions,

474 self.lang.get_tokens_unprocessed(buffered))

475

476 def _find_safe_escape_tokens(self, text):

477 """ find escape tokens that are not in strings or comments """

478 for i, t, v in self._filter_to(

479 self.lang.get_tokens_unprocessed(text),

480 lambda t: t in Token.Comment or t in Token.String

481 ):

482 if t is None:

483 for i2, t2, v2 in self._find_escape_tokens(v):

484 yield i + i2, t2, v2

485 else:

486 yield i, None, v

487

488 def _filter_to(self, it, pred):

489 """ Keep only the tokens that match `pred`, merge the others together """

490 buf = ''

491 idx = 0

492 for i, t, v in it:

493 if pred(t):

494 if buf:

495 yield idx, None, buf

496 buf = ''

497 yield i, t, v

498 else:

499 if not buf:

500 idx = i

501 buf += v

502 if buf:

503 yield idx, None, buf

504

505 def _find_escape_tokens(self, text):

506 """ Find escape tokens within text, give token=None otherwise """

507 index = 0

508 while text:

509 a, sep1, text = text.partition(self.left)

510 if a:

511 yield index, None, a

512 index += len(a)

513 if sep1:

514 b, sep2, text = text.partition(self.right)

515 if sep2:

516 yield index + len(sep1), Token.Escape, b

517 index += len(sep1) + len(b) + len(sep2)

518 else:

519 yield index, Token.Error, sep1

520 index += len(sep1)

521 text = b