1"""
2 pygments.filters
3 ~~~~~~~~~~~~~~~~
4
5 Module containing filter lookup functions and default
6 filters.
7
8 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
15 string_to_tokentype
16from pygments.filter import Filter
17from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
18 get_choice_opt, ClassNotFound, OptionError
19from pygments.plugin import find_plugin_filters
20
21
22def find_filter_class(filtername):
23 """Lookup a filter by name. Return None if not found."""
24 if filtername in FILTERS:
25 return FILTERS[filtername]
26 for name, cls in find_plugin_filters():
27 if name == filtername:
28 return cls
29 return None
30
31
32def get_filter_by_name(filtername, **options):
33 """Return an instantiated filter.
34
35 Options are passed to the filter initializer if wanted.
36 Raise a ClassNotFound if not found.
37 """
38 cls = find_filter_class(filtername)
39 if cls:
40 return cls(**options)
41 else:
42 raise ClassNotFound(f'filter {filtername!r} not found')
43
44
45def get_all_filters():
46 """Return a generator of all filter names."""
47 yield from FILTERS
48 for name, _ in find_plugin_filters():
49 yield name
50
51
52def _replace_special(ttype, value, regex, specialttype,
53 replacefunc=lambda x: x):
54 last = 0
55 for match in regex.finditer(value):
56 start, end = match.start(), match.end()
57 if start != last:
58 yield ttype, value[last:start]
59 yield specialttype, replacefunc(value[start:end])
60 last = end
61 if last != len(value):
62 yield ttype, value[last:]
63
64
65class CodeTagFilter(Filter):
66 """Highlight special code tags in comments and docstrings.
67
68 Options accepted:
69
70 `codetags` : list of strings
71 A list of strings that are flagged as code tags. The default is to
72 highlight ``XXX``, ``TODO``, ``FIXME``, ``BUG`` and ``NOTE``.
73
74 .. versionchanged:: 2.13
75 Now recognizes ``FIXME`` by default.
76 """
77
78 def __init__(self, **options):
79 Filter.__init__(self, **options)
80 tags = get_list_opt(options, 'codetags',
81 ['XXX', 'TODO', 'FIXME', 'BUG', 'NOTE'])
82 self.tag_re = re.compile(r'\b({})\b'.format('|'.join([
83 re.escape(tag) for tag in tags if tag
84 ])))
85
86 def filter(self, lexer, stream):
87 regex = self.tag_re
88 for ttype, value in stream:
89 if ttype in String.Doc or \
90 ttype in Comment and \
91 ttype not in Comment.Preproc:
92 yield from _replace_special(ttype, value, regex, Comment.Special)
93 else:
94 yield ttype, value
95
96
97class SymbolFilter(Filter):
98 """Convert mathematical symbols such as \\<longrightarrow> in Isabelle
99 or \\longrightarrow in LaTeX into Unicode characters.
100
101 This is mostly useful for HTML or console output when you want to
102 approximate the source rendering you'd see in an IDE.
103
104 Options accepted:
105
106 `lang` : string
107 The symbol language. Must be one of ``'isabelle'`` or
108 ``'latex'``. The default is ``'isabelle'``.
109 """
110
111 latex_symbols = {
112 '\\alpha' : '\U000003b1',
113 '\\beta' : '\U000003b2',
114 '\\gamma' : '\U000003b3',
115 '\\delta' : '\U000003b4',
116 '\\varepsilon' : '\U000003b5',
117 '\\zeta' : '\U000003b6',
118 '\\eta' : '\U000003b7',
119 '\\vartheta' : '\U000003b8',
120 '\\iota' : '\U000003b9',
121 '\\kappa' : '\U000003ba',
122 '\\lambda' : '\U000003bb',
123 '\\mu' : '\U000003bc',
124 '\\nu' : '\U000003bd',
125 '\\xi' : '\U000003be',
126 '\\pi' : '\U000003c0',
127 '\\varrho' : '\U000003c1',
128 '\\sigma' : '\U000003c3',
129 '\\tau' : '\U000003c4',
130 '\\upsilon' : '\U000003c5',
131 '\\varphi' : '\U000003c6',
132 '\\chi' : '\U000003c7',
133 '\\psi' : '\U000003c8',
134 '\\omega' : '\U000003c9',
135 '\\Gamma' : '\U00000393',
136 '\\Delta' : '\U00000394',
137 '\\Theta' : '\U00000398',
138 '\\Lambda' : '\U0000039b',
139 '\\Xi' : '\U0000039e',
140 '\\Pi' : '\U000003a0',
141 '\\Sigma' : '\U000003a3',
142 '\\Upsilon' : '\U000003a5',
143 '\\Phi' : '\U000003a6',
144 '\\Psi' : '\U000003a8',
145 '\\Omega' : '\U000003a9',
146 '\\leftarrow' : '\U00002190',
147 '\\longleftarrow' : '\U000027f5',
148 '\\rightarrow' : '\U00002192',
149 '\\longrightarrow' : '\U000027f6',
150 '\\Leftarrow' : '\U000021d0',
151 '\\Longleftarrow' : '\U000027f8',
152 '\\Rightarrow' : '\U000021d2',
153 '\\Longrightarrow' : '\U000027f9',
154 '\\leftrightarrow' : '\U00002194',
155 '\\longleftrightarrow' : '\U000027f7',
156 '\\Leftrightarrow' : '\U000021d4',
157 '\\Longleftrightarrow' : '\U000027fa',
158 '\\mapsto' : '\U000021a6',
159 '\\longmapsto' : '\U000027fc',
160 '\\relbar' : '\U00002500',
161 '\\Relbar' : '\U00002550',
162 '\\hookleftarrow' : '\U000021a9',
163 '\\hookrightarrow' : '\U000021aa',
164 '\\leftharpoondown' : '\U000021bd',
165 '\\rightharpoondown' : '\U000021c1',
166 '\\leftharpoonup' : '\U000021bc',
167 '\\rightharpoonup' : '\U000021c0',
168 '\\rightleftharpoons' : '\U000021cc',
169 '\\leadsto' : '\U0000219d',
170 '\\downharpoonleft' : '\U000021c3',
171 '\\downharpoonright' : '\U000021c2',
172 '\\upharpoonleft' : '\U000021bf',
173 '\\upharpoonright' : '\U000021be',
174 '\\restriction' : '\U000021be',
175 '\\uparrow' : '\U00002191',
176 '\\Uparrow' : '\U000021d1',
177 '\\downarrow' : '\U00002193',
178 '\\Downarrow' : '\U000021d3',
179 '\\updownarrow' : '\U00002195',
180 '\\Updownarrow' : '\U000021d5',
181 '\\langle' : '\U000027e8',
182 '\\rangle' : '\U000027e9',
183 '\\lceil' : '\U00002308',
184 '\\rceil' : '\U00002309',
185 '\\lfloor' : '\U0000230a',
186 '\\rfloor' : '\U0000230b',
187 '\\flqq' : '\U000000ab',
188 '\\frqq' : '\U000000bb',
189 '\\bot' : '\U000022a5',
190 '\\top' : '\U000022a4',
191 '\\wedge' : '\U00002227',
192 '\\bigwedge' : '\U000022c0',
193 '\\vee' : '\U00002228',
194 '\\bigvee' : '\U000022c1',
195 '\\forall' : '\U00002200',
196 '\\exists' : '\U00002203',
197 '\\nexists' : '\U00002204',
198 '\\neg' : '\U000000ac',
199 '\\Box' : '\U000025a1',
200 '\\Diamond' : '\U000025c7',
201 '\\vdash' : '\U000022a2',
202 '\\models' : '\U000022a8',
203 '\\dashv' : '\U000022a3',
204 '\\surd' : '\U0000221a',
205 '\\le' : '\U00002264',
206 '\\ge' : '\U00002265',
207 '\\ll' : '\U0000226a',
208 '\\gg' : '\U0000226b',
209 '\\lesssim' : '\U00002272',
210 '\\gtrsim' : '\U00002273',
211 '\\lessapprox' : '\U00002a85',
212 '\\gtrapprox' : '\U00002a86',
213 '\\in' : '\U00002208',
214 '\\notin' : '\U00002209',
215 '\\subset' : '\U00002282',
216 '\\supset' : '\U00002283',
217 '\\subseteq' : '\U00002286',
218 '\\supseteq' : '\U00002287',
219 '\\sqsubset' : '\U0000228f',
220 '\\sqsupset' : '\U00002290',
221 '\\sqsubseteq' : '\U00002291',
222 '\\sqsupseteq' : '\U00002292',
223 '\\cap' : '\U00002229',
224 '\\bigcap' : '\U000022c2',
225 '\\cup' : '\U0000222a',
226 '\\bigcup' : '\U000022c3',
227 '\\sqcup' : '\U00002294',
228 '\\bigsqcup' : '\U00002a06',
229 '\\sqcap' : '\U00002293',
230 '\\Bigsqcap' : '\U00002a05',
231 '\\setminus' : '\U00002216',
232 '\\propto' : '\U0000221d',
233 '\\uplus' : '\U0000228e',
234 '\\bigplus' : '\U00002a04',
235 '\\sim' : '\U0000223c',
236 '\\doteq' : '\U00002250',
237 '\\simeq' : '\U00002243',
238 '\\approx' : '\U00002248',
239 '\\asymp' : '\U0000224d',
240 '\\cong' : '\U00002245',
241 '\\equiv' : '\U00002261',
242 '\\Join' : '\U000022c8',
243 '\\bowtie' : '\U00002a1d',
244 '\\prec' : '\U0000227a',
245 '\\succ' : '\U0000227b',
246 '\\preceq' : '\U0000227c',
247 '\\succeq' : '\U0000227d',
248 '\\parallel' : '\U00002225',
249 '\\mid' : '\U000000a6',
250 '\\pm' : '\U000000b1',
251 '\\mp' : '\U00002213',
252 '\\times' : '\U000000d7',
253 '\\div' : '\U000000f7',
254 '\\cdot' : '\U000022c5',
255 '\\star' : '\U000022c6',
256 '\\circ' : '\U00002218',
257 '\\dagger' : '\U00002020',
258 '\\ddagger' : '\U00002021',
259 '\\lhd' : '\U000022b2',
260 '\\rhd' : '\U000022b3',
261 '\\unlhd' : '\U000022b4',
262 '\\unrhd' : '\U000022b5',
263 '\\triangleleft' : '\U000025c3',
264 '\\triangleright' : '\U000025b9',
265 '\\triangle' : '\U000025b3',
266 '\\triangleq' : '\U0000225c',
267 '\\oplus' : '\U00002295',
268 '\\bigoplus' : '\U00002a01',
269 '\\otimes' : '\U00002297',
270 '\\bigotimes' : '\U00002a02',
271 '\\odot' : '\U00002299',
272 '\\bigodot' : '\U00002a00',
273 '\\ominus' : '\U00002296',
274 '\\oslash' : '\U00002298',
275 '\\dots' : '\U00002026',
276 '\\cdots' : '\U000022ef',
277 '\\sum' : '\U00002211',
278 '\\prod' : '\U0000220f',
279 '\\coprod' : '\U00002210',
280 '\\infty' : '\U0000221e',
281 '\\int' : '\U0000222b',
282 '\\oint' : '\U0000222e',
283 '\\clubsuit' : '\U00002663',
284 '\\diamondsuit' : '\U00002662',
285 '\\heartsuit' : '\U00002661',
286 '\\spadesuit' : '\U00002660',
287 '\\aleph' : '\U00002135',
288 '\\emptyset' : '\U00002205',
289 '\\nabla' : '\U00002207',
290 '\\partial' : '\U00002202',
291 '\\flat' : '\U0000266d',
292 '\\natural' : '\U0000266e',
293 '\\sharp' : '\U0000266f',
294 '\\angle' : '\U00002220',
295 '\\copyright' : '\U000000a9',
296 '\\textregistered' : '\U000000ae',
297 '\\textonequarter' : '\U000000bc',
298 '\\textonehalf' : '\U000000bd',
299 '\\textthreequarters' : '\U000000be',
300 '\\textordfeminine' : '\U000000aa',
301 '\\textordmasculine' : '\U000000ba',
302 '\\euro' : '\U000020ac',
303 '\\pounds' : '\U000000a3',
304 '\\yen' : '\U000000a5',
305 '\\textcent' : '\U000000a2',
306 '\\textcurrency' : '\U000000a4',
307 '\\textdegree' : '\U000000b0',
308 }
309
310 isabelle_symbols = {
311 '\\<zero>' : '\U0001d7ec',
312 '\\<one>' : '\U0001d7ed',
313 '\\<two>' : '\U0001d7ee',
314 '\\<three>' : '\U0001d7ef',
315 '\\<four>' : '\U0001d7f0',
316 '\\<five>' : '\U0001d7f1',
317 '\\<six>' : '\U0001d7f2',
318 '\\<seven>' : '\U0001d7f3',
319 '\\<eight>' : '\U0001d7f4',
320 '\\<nine>' : '\U0001d7f5',
321 '\\<A>' : '\U0001d49c',
322 '\\<B>' : '\U0000212c',
323 '\\<C>' : '\U0001d49e',
324 '\\<D>' : '\U0001d49f',
325 '\\<E>' : '\U00002130',
326 '\\<F>' : '\U00002131',
327 '\\<G>' : '\U0001d4a2',
328 '\\<H>' : '\U0000210b',
329 '\\<I>' : '\U00002110',
330 '\\<J>' : '\U0001d4a5',
331 '\\<K>' : '\U0001d4a6',
332 '\\<L>' : '\U00002112',
333 '\\<M>' : '\U00002133',
334 '\\<N>' : '\U0001d4a9',
335 '\\<O>' : '\U0001d4aa',
336 '\\<P>' : '\U0001d4ab',
337 '\\<Q>' : '\U0001d4ac',
338 '\\<R>' : '\U0000211b',
339 '\\<S>' : '\U0001d4ae',
340 '\\<T>' : '\U0001d4af',
341 '\\<U>' : '\U0001d4b0',
342 '\\<V>' : '\U0001d4b1',
343 '\\<W>' : '\U0001d4b2',
344 '\\<X>' : '\U0001d4b3',
345 '\\<Y>' : '\U0001d4b4',
346 '\\<Z>' : '\U0001d4b5',
347 '\\<a>' : '\U0001d5ba',
348 '\\<b>' : '\U0001d5bb',
349 '\\<c>' : '\U0001d5bc',
350 '\\<d>' : '\U0001d5bd',
351 '\\<e>' : '\U0001d5be',
352 '\\<f>' : '\U0001d5bf',
353 '\\<g>' : '\U0001d5c0',
354 '\\<h>' : '\U0001d5c1',
355 '\\<i>' : '\U0001d5c2',
356 '\\<j>' : '\U0001d5c3',
357 '\\<k>' : '\U0001d5c4',
358 '\\<l>' : '\U0001d5c5',
359 '\\<m>' : '\U0001d5c6',
360 '\\<n>' : '\U0001d5c7',
361 '\\<o>' : '\U0001d5c8',
362 '\\<p>' : '\U0001d5c9',
363 '\\<q>' : '\U0001d5ca',
364 '\\<r>' : '\U0001d5cb',
365 '\\<s>' : '\U0001d5cc',
366 '\\<t>' : '\U0001d5cd',
367 '\\<u>' : '\U0001d5ce',
368 '\\<v>' : '\U0001d5cf',
369 '\\<w>' : '\U0001d5d0',
370 '\\<x>' : '\U0001d5d1',
371 '\\<y>' : '\U0001d5d2',
372 '\\<z>' : '\U0001d5d3',
373 '\\<AA>' : '\U0001d504',
374 '\\<BB>' : '\U0001d505',
375 '\\<CC>' : '\U0000212d',
376 '\\<DD>' : '\U0001d507',
377 '\\<EE>' : '\U0001d508',
378 '\\<FF>' : '\U0001d509',
379 '\\<GG>' : '\U0001d50a',
380 '\\<HH>' : '\U0000210c',
381 '\\<II>' : '\U00002111',
382 '\\<JJ>' : '\U0001d50d',
383 '\\<KK>' : '\U0001d50e',
384 '\\<LL>' : '\U0001d50f',
385 '\\<MM>' : '\U0001d510',
386 '\\<NN>' : '\U0001d511',
387 '\\<OO>' : '\U0001d512',
388 '\\<PP>' : '\U0001d513',
389 '\\<QQ>' : '\U0001d514',
390 '\\<RR>' : '\U0000211c',
391 '\\<SS>' : '\U0001d516',
392 '\\<TT>' : '\U0001d517',
393 '\\<UU>' : '\U0001d518',
394 '\\<VV>' : '\U0001d519',
395 '\\<WW>' : '\U0001d51a',
396 '\\<XX>' : '\U0001d51b',
397 '\\<YY>' : '\U0001d51c',
398 '\\<ZZ>' : '\U00002128',
399 '\\<aa>' : '\U0001d51e',
400 '\\<bb>' : '\U0001d51f',
401 '\\<cc>' : '\U0001d520',
402 '\\<dd>' : '\U0001d521',
403 '\\<ee>' : '\U0001d522',
404 '\\<ff>' : '\U0001d523',
405 '\\<gg>' : '\U0001d524',
406 '\\<hh>' : '\U0001d525',
407 '\\<ii>' : '\U0001d526',
408 '\\<jj>' : '\U0001d527',
409 '\\<kk>' : '\U0001d528',
410 '\\<ll>' : '\U0001d529',
411 '\\<mm>' : '\U0001d52a',
412 '\\<nn>' : '\U0001d52b',
413 '\\<oo>' : '\U0001d52c',
414 '\\<pp>' : '\U0001d52d',
415 '\\<qq>' : '\U0001d52e',
416 '\\<rr>' : '\U0001d52f',
417 '\\<ss>' : '\U0001d530',
418 '\\<tt>' : '\U0001d531',
419 '\\<uu>' : '\U0001d532',
420 '\\<vv>' : '\U0001d533',
421 '\\<ww>' : '\U0001d534',
422 '\\<xx>' : '\U0001d535',
423 '\\<yy>' : '\U0001d536',
424 '\\<zz>' : '\U0001d537',
425 '\\<alpha>' : '\U000003b1',
426 '\\<beta>' : '\U000003b2',
427 '\\<gamma>' : '\U000003b3',
428 '\\<delta>' : '\U000003b4',
429 '\\<epsilon>' : '\U000003b5',
430 '\\<zeta>' : '\U000003b6',
431 '\\<eta>' : '\U000003b7',
432 '\\<theta>' : '\U000003b8',
433 '\\<iota>' : '\U000003b9',
434 '\\<kappa>' : '\U000003ba',
435 '\\<lambda>' : '\U000003bb',
436 '\\<mu>' : '\U000003bc',
437 '\\<nu>' : '\U000003bd',
438 '\\<xi>' : '\U000003be',
439 '\\<pi>' : '\U000003c0',
440 '\\<rho>' : '\U000003c1',
441 '\\<sigma>' : '\U000003c3',
442 '\\<tau>' : '\U000003c4',
443 '\\<upsilon>' : '\U000003c5',
444 '\\<phi>' : '\U000003c6',
445 '\\<chi>' : '\U000003c7',
446 '\\<psi>' : '\U000003c8',
447 '\\<omega>' : '\U000003c9',
448 '\\<Gamma>' : '\U00000393',
449 '\\<Delta>' : '\U00000394',
450 '\\<Theta>' : '\U00000398',
451 '\\<Lambda>' : '\U0000039b',
452 '\\<Xi>' : '\U0000039e',
453 '\\<Pi>' : '\U000003a0',
454 '\\<Sigma>' : '\U000003a3',
455 '\\<Upsilon>' : '\U000003a5',
456 '\\<Phi>' : '\U000003a6',
457 '\\<Psi>' : '\U000003a8',
458 '\\<Omega>' : '\U000003a9',
459 '\\<bool>' : '\U0001d539',
460 '\\<complex>' : '\U00002102',
461 '\\<nat>' : '\U00002115',
462 '\\<rat>' : '\U0000211a',
463 '\\<real>' : '\U0000211d',
464 '\\<int>' : '\U00002124',
465 '\\<leftarrow>' : '\U00002190',
466 '\\<longleftarrow>' : '\U000027f5',
467 '\\<rightarrow>' : '\U00002192',
468 '\\<longrightarrow>' : '\U000027f6',
469 '\\<Leftarrow>' : '\U000021d0',
470 '\\<Longleftarrow>' : '\U000027f8',
471 '\\<Rightarrow>' : '\U000021d2',
472 '\\<Longrightarrow>' : '\U000027f9',
473 '\\<leftrightarrow>' : '\U00002194',
474 '\\<longleftrightarrow>' : '\U000027f7',
475 '\\<Leftrightarrow>' : '\U000021d4',
476 '\\<Longleftrightarrow>' : '\U000027fa',
477 '\\<mapsto>' : '\U000021a6',
478 '\\<longmapsto>' : '\U000027fc',
479 '\\<midarrow>' : '\U00002500',
480 '\\<Midarrow>' : '\U00002550',
481 '\\<hookleftarrow>' : '\U000021a9',
482 '\\<hookrightarrow>' : '\U000021aa',
483 '\\<leftharpoondown>' : '\U000021bd',
484 '\\<rightharpoondown>' : '\U000021c1',
485 '\\<leftharpoonup>' : '\U000021bc',
486 '\\<rightharpoonup>' : '\U000021c0',
487 '\\<rightleftharpoons>' : '\U000021cc',
488 '\\<leadsto>' : '\U0000219d',
489 '\\<downharpoonleft>' : '\U000021c3',
490 '\\<downharpoonright>' : '\U000021c2',
491 '\\<upharpoonleft>' : '\U000021bf',
492 '\\<upharpoonright>' : '\U000021be',
493 '\\<restriction>' : '\U000021be',
494 '\\<Colon>' : '\U00002237',
495 '\\<up>' : '\U00002191',
496 '\\<Up>' : '\U000021d1',
497 '\\<down>' : '\U00002193',
498 '\\<Down>' : '\U000021d3',
499 '\\<updown>' : '\U00002195',
500 '\\<Updown>' : '\U000021d5',
501 '\\<langle>' : '\U000027e8',
502 '\\<rangle>' : '\U000027e9',
503 '\\<lceil>' : '\U00002308',
504 '\\<rceil>' : '\U00002309',
505 '\\<lfloor>' : '\U0000230a',
506 '\\<rfloor>' : '\U0000230b',
507 '\\<lparr>' : '\U00002987',
508 '\\<rparr>' : '\U00002988',
509 '\\<lbrakk>' : '\U000027e6',
510 '\\<rbrakk>' : '\U000027e7',
511 '\\<lbrace>' : '\U00002983',
512 '\\<rbrace>' : '\U00002984',
513 '\\<guillemotleft>' : '\U000000ab',
514 '\\<guillemotright>' : '\U000000bb',
515 '\\<bottom>' : '\U000022a5',
516 '\\<top>' : '\U000022a4',
517 '\\<and>' : '\U00002227',
518 '\\<And>' : '\U000022c0',
519 '\\<or>' : '\U00002228',
520 '\\<Or>' : '\U000022c1',
521 '\\<forall>' : '\U00002200',
522 '\\<exists>' : '\U00002203',
523 '\\<nexists>' : '\U00002204',
524 '\\<not>' : '\U000000ac',
525 '\\<box>' : '\U000025a1',
526 '\\<diamond>' : '\U000025c7',
527 '\\<turnstile>' : '\U000022a2',
528 '\\<Turnstile>' : '\U000022a8',
529 '\\<tturnstile>' : '\U000022a9',
530 '\\<TTurnstile>' : '\U000022ab',
531 '\\<stileturn>' : '\U000022a3',
532 '\\<surd>' : '\U0000221a',
533 '\\<le>' : '\U00002264',
534 '\\<ge>' : '\U00002265',
535 '\\<lless>' : '\U0000226a',
536 '\\<ggreater>' : '\U0000226b',
537 '\\<lesssim>' : '\U00002272',
538 '\\<greatersim>' : '\U00002273',
539 '\\<lessapprox>' : '\U00002a85',
540 '\\<greaterapprox>' : '\U00002a86',
541 '\\<in>' : '\U00002208',
542 '\\<notin>' : '\U00002209',
543 '\\<subset>' : '\U00002282',
544 '\\<supset>' : '\U00002283',
545 '\\<subseteq>' : '\U00002286',
546 '\\<supseteq>' : '\U00002287',
547 '\\<sqsubset>' : '\U0000228f',
548 '\\<sqsupset>' : '\U00002290',
549 '\\<sqsubseteq>' : '\U00002291',
550 '\\<sqsupseteq>' : '\U00002292',
551 '\\<inter>' : '\U00002229',
552 '\\<Inter>' : '\U000022c2',
553 '\\<union>' : '\U0000222a',
554 '\\<Union>' : '\U000022c3',
555 '\\<squnion>' : '\U00002294',
556 '\\<Squnion>' : '\U00002a06',
557 '\\<sqinter>' : '\U00002293',
558 '\\<Sqinter>' : '\U00002a05',
559 '\\<setminus>' : '\U00002216',
560 '\\<propto>' : '\U0000221d',
561 '\\<uplus>' : '\U0000228e',
562 '\\<Uplus>' : '\U00002a04',
563 '\\<noteq>' : '\U00002260',
564 '\\<sim>' : '\U0000223c',
565 '\\<doteq>' : '\U00002250',
566 '\\<simeq>' : '\U00002243',
567 '\\<approx>' : '\U00002248',
568 '\\<asymp>' : '\U0000224d',
569 '\\<cong>' : '\U00002245',
570 '\\<smile>' : '\U00002323',
571 '\\<equiv>' : '\U00002261',
572 '\\<frown>' : '\U00002322',
573 '\\<Join>' : '\U000022c8',
574 '\\<bowtie>' : '\U00002a1d',
575 '\\<prec>' : '\U0000227a',
576 '\\<succ>' : '\U0000227b',
577 '\\<preceq>' : '\U0000227c',
578 '\\<succeq>' : '\U0000227d',
579 '\\<parallel>' : '\U00002225',
580 '\\<bar>' : '\U000000a6',
581 '\\<plusminus>' : '\U000000b1',
582 '\\<minusplus>' : '\U00002213',
583 '\\<times>' : '\U000000d7',
584 '\\<div>' : '\U000000f7',
585 '\\<cdot>' : '\U000022c5',
586 '\\<star>' : '\U000022c6',
587 '\\<bullet>' : '\U00002219',
588 '\\<circ>' : '\U00002218',
589 '\\<dagger>' : '\U00002020',
590 '\\<ddagger>' : '\U00002021',
591 '\\<lhd>' : '\U000022b2',
592 '\\<rhd>' : '\U000022b3',
593 '\\<unlhd>' : '\U000022b4',
594 '\\<unrhd>' : '\U000022b5',
595 '\\<triangleleft>' : '\U000025c3',
596 '\\<triangleright>' : '\U000025b9',
597 '\\<triangle>' : '\U000025b3',
598 '\\<triangleq>' : '\U0000225c',
599 '\\<oplus>' : '\U00002295',
600 '\\<Oplus>' : '\U00002a01',
601 '\\<otimes>' : '\U00002297',
602 '\\<Otimes>' : '\U00002a02',
603 '\\<odot>' : '\U00002299',
604 '\\<Odot>' : '\U00002a00',
605 '\\<ominus>' : '\U00002296',
606 '\\<oslash>' : '\U00002298',
607 '\\<dots>' : '\U00002026',
608 '\\<cdots>' : '\U000022ef',
609 '\\<Sum>' : '\U00002211',
610 '\\<Prod>' : '\U0000220f',
611 '\\<Coprod>' : '\U00002210',
612 '\\<infinity>' : '\U0000221e',
613 '\\<integral>' : '\U0000222b',
614 '\\<ointegral>' : '\U0000222e',
615 '\\<clubsuit>' : '\U00002663',
616 '\\<diamondsuit>' : '\U00002662',
617 '\\<heartsuit>' : '\U00002661',
618 '\\<spadesuit>' : '\U00002660',
619 '\\<aleph>' : '\U00002135',
620 '\\<emptyset>' : '\U00002205',
621 '\\<nabla>' : '\U00002207',
622 '\\<partial>' : '\U00002202',
623 '\\<flat>' : '\U0000266d',
624 '\\<natural>' : '\U0000266e',
625 '\\<sharp>' : '\U0000266f',
626 '\\<angle>' : '\U00002220',
627 '\\<copyright>' : '\U000000a9',
628 '\\<registered>' : '\U000000ae',
629 '\\<hyphen>' : '\U000000ad',
630 '\\<inverse>' : '\U000000af',
631 '\\<onequarter>' : '\U000000bc',
632 '\\<onehalf>' : '\U000000bd',
633 '\\<threequarters>' : '\U000000be',
634 '\\<ordfeminine>' : '\U000000aa',
635 '\\<ordmasculine>' : '\U000000ba',
636 '\\<section>' : '\U000000a7',
637 '\\<paragraph>' : '\U000000b6',
638 '\\<exclamdown>' : '\U000000a1',
639 '\\<questiondown>' : '\U000000bf',
640 '\\<euro>' : '\U000020ac',
641 '\\<pounds>' : '\U000000a3',
642 '\\<yen>' : '\U000000a5',
643 '\\<cent>' : '\U000000a2',
644 '\\<currency>' : '\U000000a4',
645 '\\<degree>' : '\U000000b0',
646 '\\<amalg>' : '\U00002a3f',
647 '\\<mho>' : '\U00002127',
648 '\\<lozenge>' : '\U000025ca',
649 '\\<wp>' : '\U00002118',
650 '\\<wrong>' : '\U00002240',
651 '\\<struct>' : '\U000022c4',
652 '\\<acute>' : '\U000000b4',
653 '\\<index>' : '\U00000131',
654 '\\<dieresis>' : '\U000000a8',
655 '\\<cedilla>' : '\U000000b8',
656 '\\<hungarumlaut>' : '\U000002dd',
657 '\\<some>' : '\U000003f5',
658 '\\<newline>' : '\U000023ce',
659 '\\<open>' : '\U00002039',
660 '\\<close>' : '\U0000203a',
661 '\\<here>' : '\U00002302',
662 '\\<^sub>' : '\U000021e9',
663 '\\<^sup>' : '\U000021e7',
664 '\\<^bold>' : '\U00002759',
665 '\\<^bsub>' : '\U000021d8',
666 '\\<^esub>' : '\U000021d9',
667 '\\<^bsup>' : '\U000021d7',
668 '\\<^esup>' : '\U000021d6',
669 }
670
671 lang_map = {'isabelle' : isabelle_symbols, 'latex' : latex_symbols}
672
673 def __init__(self, **options):
674 Filter.__init__(self, **options)
675 lang = get_choice_opt(options, 'lang',
676 ['isabelle', 'latex'], 'isabelle')
677 self.symbols = self.lang_map[lang]
678
679 def filter(self, lexer, stream):
680 for ttype, value in stream:
681 if value in self.symbols:
682 yield ttype, self.symbols[value]
683 else:
684 yield ttype, value
685
686
687class KeywordCaseFilter(Filter):
688 """Convert keywords to lowercase or uppercase or capitalize them, which
689 means first letter uppercase, rest lowercase.
690
691 This can be useful e.g. if you highlight Pascal code and want to adapt the
692 code to your styleguide.
693
694 Options accepted:
695
696 `case` : string
697 The casing to convert keywords to. Must be one of ``'lower'``,
698 ``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
699 """
700
701 def __init__(self, **options):
702 Filter.__init__(self, **options)
703 case = get_choice_opt(options, 'case',
704 ['lower', 'upper', 'capitalize'], 'lower')
705 self.convert = getattr(str, case)
706
707 def filter(self, lexer, stream):
708 for ttype, value in stream:
709 if ttype in Keyword:
710 yield ttype, self.convert(value)
711 else:
712 yield ttype, value
713
714
715class NameHighlightFilter(Filter):
716 """Highlight a normal Name (and Name.*) token with a different token type.
717
718 Example::
719
720 filter = NameHighlightFilter(
721 names=['foo', 'bar', 'baz'],
722 tokentype=Name.Function,
723 )
724
725 This would highlight the names "foo", "bar" and "baz"
726 as functions. `Name.Function` is the default token type.
727
728 Options accepted:
729
730 `names` : list of strings
731 A list of names that should be given the different token type.
732 There is no default.
733 `tokentype` : TokenType or string
734 A token type or a string containing a token type name that is
735 used for highlighting the strings in `names`. The default is
736 `Name.Function`.
737 """
738
739 def __init__(self, **options):
740 Filter.__init__(self, **options)
741 self.names = set(get_list_opt(options, 'names', []))
742 tokentype = options.get('tokentype')
743 if tokentype:
744 self.tokentype = string_to_tokentype(tokentype)
745 else:
746 self.tokentype = Name.Function
747
748 def filter(self, lexer, stream):
749 for ttype, value in stream:
750 if ttype in Name and value in self.names:
751 yield self.tokentype, value
752 else:
753 yield ttype, value
754
755
756class ErrorToken(Exception):
757 pass
758
759
760class RaiseOnErrorTokenFilter(Filter):
761 """Raise an exception when the lexer generates an error token.
762
763 Options accepted:
764
765 `excclass` : Exception class
766 The exception class to raise.
767 The default is `pygments.filters.ErrorToken`.
768
769 .. versionadded:: 0.8
770 """
771
772 def __init__(self, **options):
773 Filter.__init__(self, **options)
774 self.exception = options.get('excclass', ErrorToken)
775 try:
776 # issubclass() will raise TypeError if first argument is not a class
777 if not issubclass(self.exception, Exception):
778 raise TypeError
779 except TypeError:
780 raise OptionError('excclass option is not an exception class')
781
782 def filter(self, lexer, stream):
783 for ttype, value in stream:
784 if ttype is Error:
785 raise self.exception(value)
786 yield ttype, value
787
788
789class VisibleWhitespaceFilter(Filter):
790 """Convert tabs, newlines and/or spaces to visible characters.
791
792 Options accepted:
793
794 `spaces` : string or bool
795 If this is a one-character string, spaces will be replaces by this string.
796 If it is another true value, spaces will be replaced by ``·`` (unicode
797 MIDDLE DOT). If it is a false value, spaces will not be replaced. The
798 default is ``False``.
799 `tabs` : string or bool
800 The same as for `spaces`, but the default replacement character is ``»``
801 (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
802 is ``False``. Note: this will not work if the `tabsize` option for the
803 lexer is nonzero, as tabs will already have been expanded then.
804 `tabsize` : int
805 If tabs are to be replaced by this filter (see the `tabs` option), this
806 is the total number of characters that a tab should be expanded to.
807 The default is ``8``.
808 `newlines` : string or bool
809 The same as for `spaces`, but the default replacement character is ``¶``
810 (unicode PILCROW SIGN). The default value is ``False``.
811 `wstokentype` : bool
812 If true, give whitespace the special `Whitespace` token type. This allows
813 styling the visible whitespace differently (e.g. greyed out), but it can
814 disrupt background colors. The default is ``True``.
815
816 .. versionadded:: 0.8
817 """
818
819 def __init__(self, **options):
820 Filter.__init__(self, **options)
821 for name, default in [('spaces', '·'),
822 ('tabs', '»'),
823 ('newlines', '¶')]:
824 opt = options.get(name, False)
825 if isinstance(opt, str) and len(opt) == 1:
826 setattr(self, name, opt)
827 else:
828 setattr(self, name, (opt and default or ''))
829 tabsize = get_int_opt(options, 'tabsize', 8)
830 if self.tabs:
831 self.tabs += ' ' * (tabsize - 1)
832 if self.newlines:
833 self.newlines += '\n'
834 self.wstt = get_bool_opt(options, 'wstokentype', True)
835
836 def filter(self, lexer, stream):
837 if self.wstt:
838 spaces = self.spaces or ' '
839 tabs = self.tabs or '\t'
840 newlines = self.newlines or '\n'
841 regex = re.compile(r'\s')
842
843 def replacefunc(wschar):
844 if wschar == ' ':
845 return spaces
846 elif wschar == '\t':
847 return tabs
848 elif wschar == '\n':
849 return newlines
850 return wschar
851
852 for ttype, value in stream:
853 yield from _replace_special(ttype, value, regex, Whitespace,
854 replacefunc)
855 else:
856 spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
857 # simpler processing
858 for ttype, value in stream:
859 if spaces:
860 value = value.replace(' ', spaces)
861 if tabs:
862 value = value.replace('\t', tabs)
863 if newlines:
864 value = value.replace('\n', newlines)
865 yield ttype, value
866
867
868class GobbleFilter(Filter):
869 """Gobbles source code lines (eats initial characters).
870
871 This filter drops the first ``n`` characters off every line of code. This
872 may be useful when the source code fed to the lexer is indented by a fixed
873 amount of space that isn't desired in the output.
874
875 Options accepted:
876
877 `n` : int
878 The number of characters to gobble.
879
880 .. versionadded:: 1.2
881 """
882 def __init__(self, **options):
883 Filter.__init__(self, **options)
884 self.n = get_int_opt(options, 'n', 0)
885
886 def gobble(self, value, left):
887 if left < len(value):
888 return value[left:], 0
889 else:
890 return '', left - len(value)
891
892 def filter(self, lexer, stream):
893 n = self.n
894 left = n # How many characters left to gobble.
895 for ttype, value in stream:
896 # Remove ``left`` tokens from first line, ``n`` from all others.
897 parts = value.split('\n')
898 (parts[0], left) = self.gobble(parts[0], left)
899 for i in range(1, len(parts)):
900 (parts[i], left) = self.gobble(parts[i], n)
901 value = '\n'.join(parts)
902
903 if value != '':
904 yield ttype, value
905
906
907class TokenMergeFilter(Filter):
908 """Merges consecutive tokens with the same token type in the output
909 stream of a lexer.
910
911 .. versionadded:: 1.2
912 """
913 def __init__(self, **options):
914 Filter.__init__(self, **options)
915
916 def filter(self, lexer, stream):
917 current_type = None
918 current_value = None
919 for ttype, value in stream:
920 if ttype is current_type:
921 current_value += value
922 else:
923 if current_type is not None:
924 yield current_type, current_value
925 current_type = ttype
926 current_value = value
927 if current_type is not None:
928 yield current_type, current_value
929
930
931FILTERS = {
932 'codetagify': CodeTagFilter,
933 'keywordcase': KeywordCaseFilter,
934 'highlight': NameHighlightFilter,
935 'raiseonerror': RaiseOnErrorTokenFilter,
936 'whitespace': VisibleWhitespaceFilter,
937 'gobble': GobbleFilter,
938 'tokenmerge': TokenMergeFilter,
939 'symbols': SymbolFilter,
940}