1"""
2 pygments.filters
3 ~~~~~~~~~~~~~~~~
4
5 Module containing filter lookup functions and default
6 filters.
7
8 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
15 string_to_tokentype
16from pygments.filter import Filter
17from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
18 get_choice_opt, ClassNotFound, OptionError
19from pygments.plugin import find_plugin_filters
20
21
22def find_filter_class(filtername):
23 """Lookup a filter by name. Return None if not found."""
24 if filtername in FILTERS:
25 return FILTERS[filtername]
26 for name, cls in find_plugin_filters():
27 if name == filtername:
28 return cls
29 return None
30
31
32def get_filter_by_name(filtername, **options):
33 """Return an instantiated filter.
34
35 Options are passed to the filter initializer if wanted.
36 Raise a ClassNotFound if not found.
37 """
38 cls = find_filter_class(filtername)
39 if cls:
40 return cls(**options)
41 else:
42 raise ClassNotFound(f'filter {filtername!r} not found')
43
44
45def get_all_filters():
46 """Return a generator of all filter names."""
47 yield from FILTERS
48 for name, _ in find_plugin_filters():
49 yield name
50
51
52def _replace_special(ttype, value, regex, specialttype,
53 replacefunc=lambda x: x):
54 last = 0
55 for match in regex.finditer(value):
56 start, end = match.start(), match.end()
57 if start != last:
58 yield ttype, value[last:start]
59 yield specialttype, replacefunc(value[start:end])
60 last = end
61 if last != len(value):
62 yield ttype, value[last:]
63
64
65class CodeTagFilter(Filter):
66 """Highlight special code tags in comments and docstrings.
67
68 Options accepted:
69
70 `codetags` : list of strings
71 A list of strings that are flagged as code tags. The default is to
72 highlight ``XXX``, ``TODO``, ``FIXME``, ``BUG`` and ``NOTE``.
73
74 .. versionchanged:: 2.13
75 Now recognizes ``FIXME`` by default.
76 """
77
78 def __init__(self, **options):
79 Filter.__init__(self, **options)
80 tags = get_list_opt(options, 'codetags',
81 ['XXX', 'TODO', 'FIXME', 'BUG', 'NOTE'])
82 self.tag_re = re.compile(r'\b({})\b'.format('|'.join([
83 re.escape(tag) for tag in tags if tag
84 ])))
85
86 def filter(self, lexer, stream):
87 regex = self.tag_re
88 for ttype, value in stream:
89 if ttype in String.Doc or \
90 ttype in Comment and \
91 ttype not in Comment.Preproc:
92 yield from _replace_special(ttype, value, regex, Comment.Special)
93 else:
94 yield ttype, value
95
96
97class SymbolFilter(Filter):
98 """Convert mathematical symbols into Unicode characters.
99
100 Examples are ``\\<longrightarrow>`` in Isabelle or
101 ``\\longrightarrow`` in LaTeX.
102
103 This is mostly useful for HTML or console output when you want to
104 approximate the source rendering you'd see in an IDE.
105
106 Options accepted:
107
108 `lang` : string
109 The symbol language. Must be one of ``'isabelle'`` or
110 ``'latex'``. The default is ``'isabelle'``.
111 """
112
113 latex_symbols = {
114 '\\alpha' : '\U000003b1',
115 '\\beta' : '\U000003b2',
116 '\\gamma' : '\U000003b3',
117 '\\delta' : '\U000003b4',
118 '\\varepsilon' : '\U000003b5',
119 '\\zeta' : '\U000003b6',
120 '\\eta' : '\U000003b7',
121 '\\vartheta' : '\U000003b8',
122 '\\iota' : '\U000003b9',
123 '\\kappa' : '\U000003ba',
124 '\\lambda' : '\U000003bb',
125 '\\mu' : '\U000003bc',
126 '\\nu' : '\U000003bd',
127 '\\xi' : '\U000003be',
128 '\\pi' : '\U000003c0',
129 '\\varrho' : '\U000003c1',
130 '\\sigma' : '\U000003c3',
131 '\\tau' : '\U000003c4',
132 '\\upsilon' : '\U000003c5',
133 '\\varphi' : '\U000003c6',
134 '\\chi' : '\U000003c7',
135 '\\psi' : '\U000003c8',
136 '\\omega' : '\U000003c9',
137 '\\Gamma' : '\U00000393',
138 '\\Delta' : '\U00000394',
139 '\\Theta' : '\U00000398',
140 '\\Lambda' : '\U0000039b',
141 '\\Xi' : '\U0000039e',
142 '\\Pi' : '\U000003a0',
143 '\\Sigma' : '\U000003a3',
144 '\\Upsilon' : '\U000003a5',
145 '\\Phi' : '\U000003a6',
146 '\\Psi' : '\U000003a8',
147 '\\Omega' : '\U000003a9',
148 '\\leftarrow' : '\U00002190',
149 '\\longleftarrow' : '\U000027f5',
150 '\\rightarrow' : '\U00002192',
151 '\\longrightarrow' : '\U000027f6',
152 '\\Leftarrow' : '\U000021d0',
153 '\\Longleftarrow' : '\U000027f8',
154 '\\Rightarrow' : '\U000021d2',
155 '\\Longrightarrow' : '\U000027f9',
156 '\\leftrightarrow' : '\U00002194',
157 '\\longleftrightarrow' : '\U000027f7',
158 '\\Leftrightarrow' : '\U000021d4',
159 '\\Longleftrightarrow' : '\U000027fa',
160 '\\mapsto' : '\U000021a6',
161 '\\longmapsto' : '\U000027fc',
162 '\\relbar' : '\U00002500',
163 '\\Relbar' : '\U00002550',
164 '\\hookleftarrow' : '\U000021a9',
165 '\\hookrightarrow' : '\U000021aa',
166 '\\leftharpoondown' : '\U000021bd',
167 '\\rightharpoondown' : '\U000021c1',
168 '\\leftharpoonup' : '\U000021bc',
169 '\\rightharpoonup' : '\U000021c0',
170 '\\rightleftharpoons' : '\U000021cc',
171 '\\leadsto' : '\U0000219d',
172 '\\downharpoonleft' : '\U000021c3',
173 '\\downharpoonright' : '\U000021c2',
174 '\\upharpoonleft' : '\U000021bf',
175 '\\upharpoonright' : '\U000021be',
176 '\\restriction' : '\U000021be',
177 '\\uparrow' : '\U00002191',
178 '\\Uparrow' : '\U000021d1',
179 '\\downarrow' : '\U00002193',
180 '\\Downarrow' : '\U000021d3',
181 '\\updownarrow' : '\U00002195',
182 '\\Updownarrow' : '\U000021d5',
183 '\\langle' : '\U000027e8',
184 '\\rangle' : '\U000027e9',
185 '\\lceil' : '\U00002308',
186 '\\rceil' : '\U00002309',
187 '\\lfloor' : '\U0000230a',
188 '\\rfloor' : '\U0000230b',
189 '\\flqq' : '\U000000ab',
190 '\\frqq' : '\U000000bb',
191 '\\bot' : '\U000022a5',
192 '\\top' : '\U000022a4',
193 '\\wedge' : '\U00002227',
194 '\\bigwedge' : '\U000022c0',
195 '\\vee' : '\U00002228',
196 '\\bigvee' : '\U000022c1',
197 '\\forall' : '\U00002200',
198 '\\exists' : '\U00002203',
199 '\\nexists' : '\U00002204',
200 '\\neg' : '\U000000ac',
201 '\\Box' : '\U000025a1',
202 '\\Diamond' : '\U000025c7',
203 '\\vdash' : '\U000022a2',
204 '\\models' : '\U000022a8',
205 '\\dashv' : '\U000022a3',
206 '\\surd' : '\U0000221a',
207 '\\le' : '\U00002264',
208 '\\ge' : '\U00002265',
209 '\\ll' : '\U0000226a',
210 '\\gg' : '\U0000226b',
211 '\\lesssim' : '\U00002272',
212 '\\gtrsim' : '\U00002273',
213 '\\lessapprox' : '\U00002a85',
214 '\\gtrapprox' : '\U00002a86',
215 '\\in' : '\U00002208',
216 '\\notin' : '\U00002209',
217 '\\subset' : '\U00002282',
218 '\\supset' : '\U00002283',
219 '\\subseteq' : '\U00002286',
220 '\\supseteq' : '\U00002287',
221 '\\sqsubset' : '\U0000228f',
222 '\\sqsupset' : '\U00002290',
223 '\\sqsubseteq' : '\U00002291',
224 '\\sqsupseteq' : '\U00002292',
225 '\\cap' : '\U00002229',
226 '\\bigcap' : '\U000022c2',
227 '\\cup' : '\U0000222a',
228 '\\bigcup' : '\U000022c3',
229 '\\sqcup' : '\U00002294',
230 '\\bigsqcup' : '\U00002a06',
231 '\\sqcap' : '\U00002293',
232 '\\Bigsqcap' : '\U00002a05',
233 '\\setminus' : '\U00002216',
234 '\\propto' : '\U0000221d',
235 '\\uplus' : '\U0000228e',
236 '\\bigplus' : '\U00002a04',
237 '\\sim' : '\U0000223c',
238 '\\doteq' : '\U00002250',
239 '\\simeq' : '\U00002243',
240 '\\approx' : '\U00002248',
241 '\\asymp' : '\U0000224d',
242 '\\cong' : '\U00002245',
243 '\\equiv' : '\U00002261',
244 '\\Join' : '\U000022c8',
245 '\\bowtie' : '\U00002a1d',
246 '\\prec' : '\U0000227a',
247 '\\succ' : '\U0000227b',
248 '\\preceq' : '\U0000227c',
249 '\\succeq' : '\U0000227d',
250 '\\parallel' : '\U00002225',
251 '\\mid' : '\U000000a6',
252 '\\pm' : '\U000000b1',
253 '\\mp' : '\U00002213',
254 '\\times' : '\U000000d7',
255 '\\div' : '\U000000f7',
256 '\\cdot' : '\U000022c5',
257 '\\star' : '\U000022c6',
258 '\\circ' : '\U00002218',
259 '\\dagger' : '\U00002020',
260 '\\ddagger' : '\U00002021',
261 '\\lhd' : '\U000022b2',
262 '\\rhd' : '\U000022b3',
263 '\\unlhd' : '\U000022b4',
264 '\\unrhd' : '\U000022b5',
265 '\\triangleleft' : '\U000025c3',
266 '\\triangleright' : '\U000025b9',
267 '\\triangle' : '\U000025b3',
268 '\\triangleq' : '\U0000225c',
269 '\\oplus' : '\U00002295',
270 '\\bigoplus' : '\U00002a01',
271 '\\otimes' : '\U00002297',
272 '\\bigotimes' : '\U00002a02',
273 '\\odot' : '\U00002299',
274 '\\bigodot' : '\U00002a00',
275 '\\ominus' : '\U00002296',
276 '\\oslash' : '\U00002298',
277 '\\dots' : '\U00002026',
278 '\\cdots' : '\U000022ef',
279 '\\sum' : '\U00002211',
280 '\\prod' : '\U0000220f',
281 '\\coprod' : '\U00002210',
282 '\\infty' : '\U0000221e',
283 '\\int' : '\U0000222b',
284 '\\oint' : '\U0000222e',
285 '\\clubsuit' : '\U00002663',
286 '\\diamondsuit' : '\U00002662',
287 '\\heartsuit' : '\U00002661',
288 '\\spadesuit' : '\U00002660',
289 '\\aleph' : '\U00002135',
290 '\\emptyset' : '\U00002205',
291 '\\nabla' : '\U00002207',
292 '\\partial' : '\U00002202',
293 '\\flat' : '\U0000266d',
294 '\\natural' : '\U0000266e',
295 '\\sharp' : '\U0000266f',
296 '\\angle' : '\U00002220',
297 '\\copyright' : '\U000000a9',
298 '\\textregistered' : '\U000000ae',
299 '\\textonequarter' : '\U000000bc',
300 '\\textonehalf' : '\U000000bd',
301 '\\textthreequarters' : '\U000000be',
302 '\\textordfeminine' : '\U000000aa',
303 '\\textordmasculine' : '\U000000ba',
304 '\\euro' : '\U000020ac',
305 '\\pounds' : '\U000000a3',
306 '\\yen' : '\U000000a5',
307 '\\textcent' : '\U000000a2',
308 '\\textcurrency' : '\U000000a4',
309 '\\textdegree' : '\U000000b0',
310 }
311
312 isabelle_symbols = {
313 '\\<zero>' : '\U0001d7ec',
314 '\\<one>' : '\U0001d7ed',
315 '\\<two>' : '\U0001d7ee',
316 '\\<three>' : '\U0001d7ef',
317 '\\<four>' : '\U0001d7f0',
318 '\\<five>' : '\U0001d7f1',
319 '\\<six>' : '\U0001d7f2',
320 '\\<seven>' : '\U0001d7f3',
321 '\\<eight>' : '\U0001d7f4',
322 '\\<nine>' : '\U0001d7f5',
323 '\\<A>' : '\U0001d49c',
324 '\\<B>' : '\U0000212c',
325 '\\<C>' : '\U0001d49e',
326 '\\<D>' : '\U0001d49f',
327 '\\<E>' : '\U00002130',
328 '\\<F>' : '\U00002131',
329 '\\<G>' : '\U0001d4a2',
330 '\\<H>' : '\U0000210b',
331 '\\<I>' : '\U00002110',
332 '\\<J>' : '\U0001d4a5',
333 '\\<K>' : '\U0001d4a6',
334 '\\<L>' : '\U00002112',
335 '\\<M>' : '\U00002133',
336 '\\<N>' : '\U0001d4a9',
337 '\\<O>' : '\U0001d4aa',
338 '\\<P>' : '\U0001d4ab',
339 '\\<Q>' : '\U0001d4ac',
340 '\\<R>' : '\U0000211b',
341 '\\<S>' : '\U0001d4ae',
342 '\\<T>' : '\U0001d4af',
343 '\\<U>' : '\U0001d4b0',
344 '\\<V>' : '\U0001d4b1',
345 '\\<W>' : '\U0001d4b2',
346 '\\<X>' : '\U0001d4b3',
347 '\\<Y>' : '\U0001d4b4',
348 '\\<Z>' : '\U0001d4b5',
349 '\\<a>' : '\U0001d5ba',
350 '\\<b>' : '\U0001d5bb',
351 '\\<c>' : '\U0001d5bc',
352 '\\<d>' : '\U0001d5bd',
353 '\\<e>' : '\U0001d5be',
354 '\\<f>' : '\U0001d5bf',
355 '\\<g>' : '\U0001d5c0',
356 '\\<h>' : '\U0001d5c1',
357 '\\<i>' : '\U0001d5c2',
358 '\\<j>' : '\U0001d5c3',
359 '\\<k>' : '\U0001d5c4',
360 '\\<l>' : '\U0001d5c5',
361 '\\<m>' : '\U0001d5c6',
362 '\\<n>' : '\U0001d5c7',
363 '\\<o>' : '\U0001d5c8',
364 '\\<p>' : '\U0001d5c9',
365 '\\<q>' : '\U0001d5ca',
366 '\\<r>' : '\U0001d5cb',
367 '\\<s>' : '\U0001d5cc',
368 '\\<t>' : '\U0001d5cd',
369 '\\<u>' : '\U0001d5ce',
370 '\\<v>' : '\U0001d5cf',
371 '\\<w>' : '\U0001d5d0',
372 '\\<x>' : '\U0001d5d1',
373 '\\<y>' : '\U0001d5d2',
374 '\\<z>' : '\U0001d5d3',
375 '\\<AA>' : '\U0001d504',
376 '\\<BB>' : '\U0001d505',
377 '\\<CC>' : '\U0000212d',
378 '\\<DD>' : '\U0001d507',
379 '\\<EE>' : '\U0001d508',
380 '\\<FF>' : '\U0001d509',
381 '\\<GG>' : '\U0001d50a',
382 '\\<HH>' : '\U0000210c',
383 '\\<II>' : '\U00002111',
384 '\\<JJ>' : '\U0001d50d',
385 '\\<KK>' : '\U0001d50e',
386 '\\<LL>' : '\U0001d50f',
387 '\\<MM>' : '\U0001d510',
388 '\\<NN>' : '\U0001d511',
389 '\\<OO>' : '\U0001d512',
390 '\\<PP>' : '\U0001d513',
391 '\\<QQ>' : '\U0001d514',
392 '\\<RR>' : '\U0000211c',
393 '\\<SS>' : '\U0001d516',
394 '\\<TT>' : '\U0001d517',
395 '\\<UU>' : '\U0001d518',
396 '\\<VV>' : '\U0001d519',
397 '\\<WW>' : '\U0001d51a',
398 '\\<XX>' : '\U0001d51b',
399 '\\<YY>' : '\U0001d51c',
400 '\\<ZZ>' : '\U00002128',
401 '\\<aa>' : '\U0001d51e',
402 '\\<bb>' : '\U0001d51f',
403 '\\<cc>' : '\U0001d520',
404 '\\<dd>' : '\U0001d521',
405 '\\<ee>' : '\U0001d522',
406 '\\<ff>' : '\U0001d523',
407 '\\<gg>' : '\U0001d524',
408 '\\<hh>' : '\U0001d525',
409 '\\<ii>' : '\U0001d526',
410 '\\<jj>' : '\U0001d527',
411 '\\<kk>' : '\U0001d528',
412 '\\<ll>' : '\U0001d529',
413 '\\<mm>' : '\U0001d52a',
414 '\\<nn>' : '\U0001d52b',
415 '\\<oo>' : '\U0001d52c',
416 '\\<pp>' : '\U0001d52d',
417 '\\<qq>' : '\U0001d52e',
418 '\\<rr>' : '\U0001d52f',
419 '\\<ss>' : '\U0001d530',
420 '\\<tt>' : '\U0001d531',
421 '\\<uu>' : '\U0001d532',
422 '\\<vv>' : '\U0001d533',
423 '\\<ww>' : '\U0001d534',
424 '\\<xx>' : '\U0001d535',
425 '\\<yy>' : '\U0001d536',
426 '\\<zz>' : '\U0001d537',
427 '\\<alpha>' : '\U000003b1',
428 '\\<beta>' : '\U000003b2',
429 '\\<gamma>' : '\U000003b3',
430 '\\<delta>' : '\U000003b4',
431 '\\<epsilon>' : '\U000003b5',
432 '\\<zeta>' : '\U000003b6',
433 '\\<eta>' : '\U000003b7',
434 '\\<theta>' : '\U000003b8',
435 '\\<iota>' : '\U000003b9',
436 '\\<kappa>' : '\U000003ba',
437 '\\<lambda>' : '\U000003bb',
438 '\\<mu>' : '\U000003bc',
439 '\\<nu>' : '\U000003bd',
440 '\\<xi>' : '\U000003be',
441 '\\<pi>' : '\U000003c0',
442 '\\<rho>' : '\U000003c1',
443 '\\<sigma>' : '\U000003c3',
444 '\\<tau>' : '\U000003c4',
445 '\\<upsilon>' : '\U000003c5',
446 '\\<phi>' : '\U000003c6',
447 '\\<chi>' : '\U000003c7',
448 '\\<psi>' : '\U000003c8',
449 '\\<omega>' : '\U000003c9',
450 '\\<Gamma>' : '\U00000393',
451 '\\<Delta>' : '\U00000394',
452 '\\<Theta>' : '\U00000398',
453 '\\<Lambda>' : '\U0000039b',
454 '\\<Xi>' : '\U0000039e',
455 '\\<Pi>' : '\U000003a0',
456 '\\<Sigma>' : '\U000003a3',
457 '\\<Upsilon>' : '\U000003a5',
458 '\\<Phi>' : '\U000003a6',
459 '\\<Psi>' : '\U000003a8',
460 '\\<Omega>' : '\U000003a9',
461 '\\<bool>' : '\U0001d539',
462 '\\<complex>' : '\U00002102',
463 '\\<nat>' : '\U00002115',
464 '\\<rat>' : '\U0000211a',
465 '\\<real>' : '\U0000211d',
466 '\\<int>' : '\U00002124',
467 '\\<leftarrow>' : '\U00002190',
468 '\\<longleftarrow>' : '\U000027f5',
469 '\\<rightarrow>' : '\U00002192',
470 '\\<longrightarrow>' : '\U000027f6',
471 '\\<Leftarrow>' : '\U000021d0',
472 '\\<Longleftarrow>' : '\U000027f8',
473 '\\<Rightarrow>' : '\U000021d2',
474 '\\<Longrightarrow>' : '\U000027f9',
475 '\\<leftrightarrow>' : '\U00002194',
476 '\\<longleftrightarrow>' : '\U000027f7',
477 '\\<Leftrightarrow>' : '\U000021d4',
478 '\\<Longleftrightarrow>' : '\U000027fa',
479 '\\<mapsto>' : '\U000021a6',
480 '\\<longmapsto>' : '\U000027fc',
481 '\\<midarrow>' : '\U00002500',
482 '\\<Midarrow>' : '\U00002550',
483 '\\<hookleftarrow>' : '\U000021a9',
484 '\\<hookrightarrow>' : '\U000021aa',
485 '\\<leftharpoondown>' : '\U000021bd',
486 '\\<rightharpoondown>' : '\U000021c1',
487 '\\<leftharpoonup>' : '\U000021bc',
488 '\\<rightharpoonup>' : '\U000021c0',
489 '\\<rightleftharpoons>' : '\U000021cc',
490 '\\<leadsto>' : '\U0000219d',
491 '\\<downharpoonleft>' : '\U000021c3',
492 '\\<downharpoonright>' : '\U000021c2',
493 '\\<upharpoonleft>' : '\U000021bf',
494 '\\<upharpoonright>' : '\U000021be',
495 '\\<restriction>' : '\U000021be',
496 '\\<Colon>' : '\U00002237',
497 '\\<up>' : '\U00002191',
498 '\\<Up>' : '\U000021d1',
499 '\\<down>' : '\U00002193',
500 '\\<Down>' : '\U000021d3',
501 '\\<updown>' : '\U00002195',
502 '\\<Updown>' : '\U000021d5',
503 '\\<langle>' : '\U000027e8',
504 '\\<rangle>' : '\U000027e9',
505 '\\<lceil>' : '\U00002308',
506 '\\<rceil>' : '\U00002309',
507 '\\<lfloor>' : '\U0000230a',
508 '\\<rfloor>' : '\U0000230b',
509 '\\<lparr>' : '\U00002987',
510 '\\<rparr>' : '\U00002988',
511 '\\<lbrakk>' : '\U000027e6',
512 '\\<rbrakk>' : '\U000027e7',
513 '\\<lbrace>' : '\U00002983',
514 '\\<rbrace>' : '\U00002984',
515 '\\<guillemotleft>' : '\U000000ab',
516 '\\<guillemotright>' : '\U000000bb',
517 '\\<bottom>' : '\U000022a5',
518 '\\<top>' : '\U000022a4',
519 '\\<and>' : '\U00002227',
520 '\\<And>' : '\U000022c0',
521 '\\<or>' : '\U00002228',
522 '\\<Or>' : '\U000022c1',
523 '\\<forall>' : '\U00002200',
524 '\\<exists>' : '\U00002203',
525 '\\<nexists>' : '\U00002204',
526 '\\<not>' : '\U000000ac',
527 '\\<box>' : '\U000025a1',
528 '\\<diamond>' : '\U000025c7',
529 '\\<turnstile>' : '\U000022a2',
530 '\\<Turnstile>' : '\U000022a8',
531 '\\<tturnstile>' : '\U000022a9',
532 '\\<TTurnstile>' : '\U000022ab',
533 '\\<stileturn>' : '\U000022a3',
534 '\\<surd>' : '\U0000221a',
535 '\\<le>' : '\U00002264',
536 '\\<ge>' : '\U00002265',
537 '\\<lless>' : '\U0000226a',
538 '\\<ggreater>' : '\U0000226b',
539 '\\<lesssim>' : '\U00002272',
540 '\\<greatersim>' : '\U00002273',
541 '\\<lessapprox>' : '\U00002a85',
542 '\\<greaterapprox>' : '\U00002a86',
543 '\\<in>' : '\U00002208',
544 '\\<notin>' : '\U00002209',
545 '\\<subset>' : '\U00002282',
546 '\\<supset>' : '\U00002283',
547 '\\<subseteq>' : '\U00002286',
548 '\\<supseteq>' : '\U00002287',
549 '\\<sqsubset>' : '\U0000228f',
550 '\\<sqsupset>' : '\U00002290',
551 '\\<sqsubseteq>' : '\U00002291',
552 '\\<sqsupseteq>' : '\U00002292',
553 '\\<inter>' : '\U00002229',
554 '\\<Inter>' : '\U000022c2',
555 '\\<union>' : '\U0000222a',
556 '\\<Union>' : '\U000022c3',
557 '\\<squnion>' : '\U00002294',
558 '\\<Squnion>' : '\U00002a06',
559 '\\<sqinter>' : '\U00002293',
560 '\\<Sqinter>' : '\U00002a05',
561 '\\<setminus>' : '\U00002216',
562 '\\<propto>' : '\U0000221d',
563 '\\<uplus>' : '\U0000228e',
564 '\\<Uplus>' : '\U00002a04',
565 '\\<noteq>' : '\U00002260',
566 '\\<sim>' : '\U0000223c',
567 '\\<doteq>' : '\U00002250',
568 '\\<simeq>' : '\U00002243',
569 '\\<approx>' : '\U00002248',
570 '\\<asymp>' : '\U0000224d',
571 '\\<cong>' : '\U00002245',
572 '\\<smile>' : '\U00002323',
573 '\\<equiv>' : '\U00002261',
574 '\\<frown>' : '\U00002322',
575 '\\<Join>' : '\U000022c8',
576 '\\<bowtie>' : '\U00002a1d',
577 '\\<prec>' : '\U0000227a',
578 '\\<succ>' : '\U0000227b',
579 '\\<preceq>' : '\U0000227c',
580 '\\<succeq>' : '\U0000227d',
581 '\\<parallel>' : '\U00002225',
582 '\\<bar>' : '\U000000a6',
583 '\\<plusminus>' : '\U000000b1',
584 '\\<minusplus>' : '\U00002213',
585 '\\<times>' : '\U000000d7',
586 '\\<div>' : '\U000000f7',
587 '\\<cdot>' : '\U000022c5',
588 '\\<star>' : '\U000022c6',
589 '\\<bullet>' : '\U00002219',
590 '\\<circ>' : '\U00002218',
591 '\\<dagger>' : '\U00002020',
592 '\\<ddagger>' : '\U00002021',
593 '\\<lhd>' : '\U000022b2',
594 '\\<rhd>' : '\U000022b3',
595 '\\<unlhd>' : '\U000022b4',
596 '\\<unrhd>' : '\U000022b5',
597 '\\<triangleleft>' : '\U000025c3',
598 '\\<triangleright>' : '\U000025b9',
599 '\\<triangle>' : '\U000025b3',
600 '\\<triangleq>' : '\U0000225c',
601 '\\<oplus>' : '\U00002295',
602 '\\<Oplus>' : '\U00002a01',
603 '\\<otimes>' : '\U00002297',
604 '\\<Otimes>' : '\U00002a02',
605 '\\<odot>' : '\U00002299',
606 '\\<Odot>' : '\U00002a00',
607 '\\<ominus>' : '\U00002296',
608 '\\<oslash>' : '\U00002298',
609 '\\<dots>' : '\U00002026',
610 '\\<cdots>' : '\U000022ef',
611 '\\<Sum>' : '\U00002211',
612 '\\<Prod>' : '\U0000220f',
613 '\\<Coprod>' : '\U00002210',
614 '\\<infinity>' : '\U0000221e',
615 '\\<integral>' : '\U0000222b',
616 '\\<ointegral>' : '\U0000222e',
617 '\\<clubsuit>' : '\U00002663',
618 '\\<diamondsuit>' : '\U00002662',
619 '\\<heartsuit>' : '\U00002661',
620 '\\<spadesuit>' : '\U00002660',
621 '\\<aleph>' : '\U00002135',
622 '\\<emptyset>' : '\U00002205',
623 '\\<nabla>' : '\U00002207',
624 '\\<partial>' : '\U00002202',
625 '\\<flat>' : '\U0000266d',
626 '\\<natural>' : '\U0000266e',
627 '\\<sharp>' : '\U0000266f',
628 '\\<angle>' : '\U00002220',
629 '\\<copyright>' : '\U000000a9',
630 '\\<registered>' : '\U000000ae',
631 '\\<hyphen>' : '\U000000ad',
632 '\\<inverse>' : '\U000000af',
633 '\\<onequarter>' : '\U000000bc',
634 '\\<onehalf>' : '\U000000bd',
635 '\\<threequarters>' : '\U000000be',
636 '\\<ordfeminine>' : '\U000000aa',
637 '\\<ordmasculine>' : '\U000000ba',
638 '\\<section>' : '\U000000a7',
639 '\\<paragraph>' : '\U000000b6',
640 '\\<exclamdown>' : '\U000000a1',
641 '\\<questiondown>' : '\U000000bf',
642 '\\<euro>' : '\U000020ac',
643 '\\<pounds>' : '\U000000a3',
644 '\\<yen>' : '\U000000a5',
645 '\\<cent>' : '\U000000a2',
646 '\\<currency>' : '\U000000a4',
647 '\\<degree>' : '\U000000b0',
648 '\\<amalg>' : '\U00002a3f',
649 '\\<mho>' : '\U00002127',
650 '\\<lozenge>' : '\U000025ca',
651 '\\<wp>' : '\U00002118',
652 '\\<wrong>' : '\U00002240',
653 '\\<struct>' : '\U000022c4',
654 '\\<acute>' : '\U000000b4',
655 '\\<index>' : '\U00000131',
656 '\\<dieresis>' : '\U000000a8',
657 '\\<cedilla>' : '\U000000b8',
658 '\\<hungarumlaut>' : '\U000002dd',
659 '\\<some>' : '\U000003f5',
660 '\\<newline>' : '\U000023ce',
661 '\\<open>' : '\U00002039',
662 '\\<close>' : '\U0000203a',
663 '\\<here>' : '\U00002302',
664 '\\<^sub>' : '\U000021e9',
665 '\\<^sup>' : '\U000021e7',
666 '\\<^bold>' : '\U00002759',
667 '\\<^bsub>' : '\U000021d8',
668 '\\<^esub>' : '\U000021d9',
669 '\\<^bsup>' : '\U000021d7',
670 '\\<^esup>' : '\U000021d6',
671 }
672
673 lang_map = {'isabelle' : isabelle_symbols, 'latex' : latex_symbols}
674
675 def __init__(self, **options):
676 Filter.__init__(self, **options)
677 lang = get_choice_opt(options, 'lang',
678 ['isabelle', 'latex'], 'isabelle')
679 self.symbols = self.lang_map[lang]
680
681 def filter(self, lexer, stream):
682 for ttype, value in stream:
683 if value in self.symbols:
684 yield ttype, self.symbols[value]
685 else:
686 yield ttype, value
687
688
689class KeywordCaseFilter(Filter):
690 """Convert keywords to lowercase or uppercase or capitalize them.
691
692 This means first letter uppercase, rest lowercase.
693
694 This can be useful e.g. if you highlight Pascal code and want to adapt the
695 code to your styleguide.
696
697 Options accepted:
698
699 `case` : string
700 The casing to convert keywords to. Must be one of ``'lower'``,
701 ``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
702 """
703
704 def __init__(self, **options):
705 Filter.__init__(self, **options)
706 case = get_choice_opt(options, 'case',
707 ['lower', 'upper', 'capitalize'], 'lower')
708 self.convert = getattr(str, case)
709
710 def filter(self, lexer, stream):
711 for ttype, value in stream:
712 if ttype in Keyword:
713 yield ttype, self.convert(value)
714 else:
715 yield ttype, value
716
717
718class NameHighlightFilter(Filter):
719 """Highlight a normal Name (and Name.*) token with a different token type.
720
721 Example::
722
723 filter = NameHighlightFilter(
724 names=['foo', 'bar', 'baz'],
725 tokentype=Name.Function,
726 )
727
728 This would highlight the names "foo", "bar" and "baz"
729 as functions. `Name.Function` is the default token type.
730
731 Options accepted:
732
733 `names` : list of strings
734 A list of names that should be given the different token type.
735 There is no default.
736 `tokentype` : TokenType or string
737 A token type or a string containing a token type name that is
738 used for highlighting the strings in `names`. The default is
739 `Name.Function`.
740 """
741
742 def __init__(self, **options):
743 Filter.__init__(self, **options)
744 self.names = set(get_list_opt(options, 'names', []))
745 tokentype = options.get('tokentype')
746 if tokentype:
747 self.tokentype = string_to_tokentype(tokentype)
748 else:
749 self.tokentype = Name.Function
750
751 def filter(self, lexer, stream):
752 for ttype, value in stream:
753 if ttype in Name and value in self.names:
754 yield self.tokentype, value
755 else:
756 yield ttype, value
757
758
759class ErrorToken(Exception):
760 pass
761
762
763class RaiseOnErrorTokenFilter(Filter):
764 """Raise an exception when the lexer generates an error token.
765
766 Options accepted:
767
768 `excclass` : Exception class
769 The exception class to raise.
770 The default is `pygments.filters.ErrorToken`.
771
772 .. versionadded:: 0.8
773 """
774
775 def __init__(self, **options):
776 Filter.__init__(self, **options)
777 self.exception = options.get('excclass', ErrorToken)
778 try:
779 # issubclass() will raise TypeError if first argument is not a class
780 if not issubclass(self.exception, Exception):
781 raise TypeError
782 except TypeError:
783 raise OptionError('excclass option is not an exception class')
784
785 def filter(self, lexer, stream):
786 for ttype, value in stream:
787 if ttype is Error:
788 raise self.exception(value)
789 yield ttype, value
790
791
792class VisibleWhitespaceFilter(Filter):
793 """Convert tabs, newlines and/or spaces to visible characters.
794
795 Options accepted:
796
797 `spaces` : string or bool
798 If this is a one-character string, spaces will be replaces by this string.
799 If it is another true value, spaces will be replaced by ``·`` (unicode
800 MIDDLE DOT). If it is a false value, spaces will not be replaced. The
801 default is ``False``.
802 `tabs` : string or bool
803 The same as for `spaces`, but the default replacement character is ``»``
804 (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
805 is ``False``. Note: this will not work if the `tabsize` option for the
806 lexer is nonzero, as tabs will already have been expanded then.
807 `tabsize` : int
808 If tabs are to be replaced by this filter (see the `tabs` option), this
809 is the total number of characters that a tab should be expanded to.
810 The default is ``8``.
811 `newlines` : string or bool
812 The same as for `spaces`, but the default replacement character is ``¶``
813 (unicode PILCROW SIGN). The default value is ``False``.
814 `wstokentype` : bool
815 If true, give whitespace the special `Whitespace` token type. This allows
816 styling the visible whitespace differently (e.g. greyed out), but it can
817 disrupt background colors. The default is ``True``.
818
819 .. versionadded:: 0.8
820 """
821
822 def __init__(self, **options):
823 Filter.__init__(self, **options)
824 for name, default in [('spaces', '·'),
825 ('tabs', '»'),
826 ('newlines', '¶')]:
827 opt = options.get(name, False)
828 if isinstance(opt, str) and len(opt) == 1:
829 setattr(self, name, opt)
830 else:
831 setattr(self, name, (opt and default or ''))
832 tabsize = get_int_opt(options, 'tabsize', 8)
833 if self.tabs:
834 self.tabs += ' ' * (tabsize - 1)
835 if self.newlines:
836 self.newlines += '\n'
837 self.wstt = get_bool_opt(options, 'wstokentype', True)
838
839 def filter(self, lexer, stream):
840 if self.wstt:
841 spaces = self.spaces or ' '
842 tabs = self.tabs or '\t'
843 newlines = self.newlines or '\n'
844 regex = re.compile(r'\s')
845
846 def replacefunc(wschar):
847 if wschar == ' ':
848 return spaces
849 elif wschar == '\t':
850 return tabs
851 elif wschar == '\n':
852 return newlines
853 return wschar
854
855 for ttype, value in stream:
856 yield from _replace_special(ttype, value, regex, Whitespace,
857 replacefunc)
858 else:
859 spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
860 # simpler processing
861 for ttype, value in stream:
862 if spaces:
863 value = value.replace(' ', spaces)
864 if tabs:
865 value = value.replace('\t', tabs)
866 if newlines:
867 value = value.replace('\n', newlines)
868 yield ttype, value
869
870
871class GobbleFilter(Filter):
872 """Gobble source code lines (eats initial characters).
873
874 This filter drops the first ``n`` characters off every line of code. This
875 may be useful when the source code fed to the lexer is indented by a fixed
876 amount of space that isn't desired in the output.
877
878 Options accepted:
879
880 `n` : int
881 The number of characters to gobble.
882
883 .. versionadded:: 1.2
884 """
885 def __init__(self, **options):
886 Filter.__init__(self, **options)
887 self.n = get_int_opt(options, 'n', 0)
888
889 def gobble(self, value, left):
890 if left < len(value):
891 return value[left:], 0
892 else:
893 return '', left - len(value)
894
895 def filter(self, lexer, stream):
896 n = self.n
897 left = n # How many characters left to gobble.
898 for ttype, value in stream:
899 # Remove ``left`` tokens from first line, ``n`` from all others.
900 parts = value.split('\n')
901 (parts[0], left) = self.gobble(parts[0], left)
902 for i in range(1, len(parts)):
903 (parts[i], left) = self.gobble(parts[i], n)
904 value = '\n'.join(parts)
905
906 if value != '':
907 yield ttype, value
908
909
910class TokenMergeFilter(Filter):
911 """Merge consecutive tokens with the same token type in the output stream.
912
913 .. versionadded:: 1.2
914 """
915 def __init__(self, **options):
916 Filter.__init__(self, **options)
917
918 def filter(self, lexer, stream):
919 current_type = None
920 current_value = None
921 for ttype, value in stream:
922 if ttype is current_type:
923 current_value += value
924 else:
925 if current_type is not None:
926 yield current_type, current_value
927 current_type = ttype
928 current_value = value
929 if current_type is not None:
930 yield current_type, current_value
931
932
933FILTERS = {
934 'codetagify': CodeTagFilter,
935 'keywordcase': KeywordCaseFilter,
936 'highlight': NameHighlightFilter,
937 'raiseonerror': RaiseOnErrorTokenFilter,
938 'whitespace': VisibleWhitespaceFilter,
939 'gobble': GobbleFilter,
940 'tokenmerge': TokenMergeFilter,
941 'symbols': SymbolFilter,
942}