Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/filters/__init__.py: 24%
168 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
1"""
2 pygments.filters
3 ~~~~~~~~~~~~~~~~
5 Module containing filter lookup functions and default
6 filters.
8 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10"""
12import re
14from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
15 string_to_tokentype
16from pygments.filter import Filter
17from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
18 get_choice_opt, ClassNotFound, OptionError
19from pygments.plugin import find_plugin_filters
22def find_filter_class(filtername):
23 """Lookup a filter by name. Return None if not found."""
24 if filtername in FILTERS:
25 return FILTERS[filtername]
26 for name, cls in find_plugin_filters():
27 if name == filtername:
28 return cls
29 return None
32def get_filter_by_name(filtername, **options):
33 """Return an instantiated filter.
35 Options are passed to the filter initializer if wanted.
36 Raise a ClassNotFound if not found.
37 """
38 cls = find_filter_class(filtername)
39 if cls:
40 return cls(**options)
41 else:
42 raise ClassNotFound('filter %r not found' % filtername)
45def get_all_filters():
46 """Return a generator of all filter names."""
47 yield from FILTERS
48 for name, _ in find_plugin_filters():
49 yield name
52def _replace_special(ttype, value, regex, specialttype,
53 replacefunc=lambda x: x):
54 last = 0
55 for match in regex.finditer(value):
56 start, end = match.start(), match.end()
57 if start != last:
58 yield ttype, value[last:start]
59 yield specialttype, replacefunc(value[start:end])
60 last = end
61 if last != len(value):
62 yield ttype, value[last:]
65class CodeTagFilter(Filter):
66 """Highlight special code tags in comments and docstrings.
68 Options accepted:
70 `codetags` : list of strings
71 A list of strings that are flagged as code tags. The default is to
72 highlight ``XXX``, ``TODO``, ``FIXME``, ``BUG`` and ``NOTE``.
74 .. versionchanged:: 2.13
75 Now recognizes ``FIXME`` by default.
76 """
78 def __init__(self, **options):
79 Filter.__init__(self, **options)
80 tags = get_list_opt(options, 'codetags',
81 ['XXX', 'TODO', 'FIXME', 'BUG', 'NOTE'])
82 self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
83 re.escape(tag) for tag in tags if tag
84 ]))
86 def filter(self, lexer, stream):
87 regex = self.tag_re
88 for ttype, value in stream:
89 if ttype in String.Doc or \
90 ttype in Comment and \
91 ttype not in Comment.Preproc:
92 yield from _replace_special(ttype, value, regex, Comment.Special)
93 else:
94 yield ttype, value
97class SymbolFilter(Filter):
98 """Convert mathematical symbols such as \\<longrightarrow> in Isabelle
99 or \\longrightarrow in LaTeX into Unicode characters.
101 This is mostly useful for HTML or console output when you want to
102 approximate the source rendering you'd see in an IDE.
104 Options accepted:
106 `lang` : string
107 The symbol language. Must be one of ``'isabelle'`` or
108 ``'latex'``. The default is ``'isabelle'``.
109 """
111 latex_symbols = {
112 '\\alpha' : '\U000003b1',
113 '\\beta' : '\U000003b2',
114 '\\gamma' : '\U000003b3',
115 '\\delta' : '\U000003b4',
116 '\\varepsilon' : '\U000003b5',
117 '\\zeta' : '\U000003b6',
118 '\\eta' : '\U000003b7',
119 '\\vartheta' : '\U000003b8',
120 '\\iota' : '\U000003b9',
121 '\\kappa' : '\U000003ba',
122 '\\lambda' : '\U000003bb',
123 '\\mu' : '\U000003bc',
124 '\\nu' : '\U000003bd',
125 '\\xi' : '\U000003be',
126 '\\pi' : '\U000003c0',
127 '\\varrho' : '\U000003c1',
128 '\\sigma' : '\U000003c3',
129 '\\tau' : '\U000003c4',
130 '\\upsilon' : '\U000003c5',
131 '\\varphi' : '\U000003c6',
132 '\\chi' : '\U000003c7',
133 '\\psi' : '\U000003c8',
134 '\\omega' : '\U000003c9',
135 '\\Gamma' : '\U00000393',
136 '\\Delta' : '\U00000394',
137 '\\Theta' : '\U00000398',
138 '\\Lambda' : '\U0000039b',
139 '\\Xi' : '\U0000039e',
140 '\\Pi' : '\U000003a0',
141 '\\Sigma' : '\U000003a3',
142 '\\Upsilon' : '\U000003a5',
143 '\\Phi' : '\U000003a6',
144 '\\Psi' : '\U000003a8',
145 '\\Omega' : '\U000003a9',
146 '\\leftarrow' : '\U00002190',
147 '\\longleftarrow' : '\U000027f5',
148 '\\rightarrow' : '\U00002192',
149 '\\longrightarrow' : '\U000027f6',
150 '\\Leftarrow' : '\U000021d0',
151 '\\Longleftarrow' : '\U000027f8',
152 '\\Rightarrow' : '\U000021d2',
153 '\\Longrightarrow' : '\U000027f9',
154 '\\leftrightarrow' : '\U00002194',
155 '\\longleftrightarrow' : '\U000027f7',
156 '\\Leftrightarrow' : '\U000021d4',
157 '\\Longleftrightarrow' : '\U000027fa',
158 '\\mapsto' : '\U000021a6',
159 '\\longmapsto' : '\U000027fc',
160 '\\relbar' : '\U00002500',
161 '\\Relbar' : '\U00002550',
162 '\\hookleftarrow' : '\U000021a9',
163 '\\hookrightarrow' : '\U000021aa',
164 '\\leftharpoondown' : '\U000021bd',
165 '\\rightharpoondown' : '\U000021c1',
166 '\\leftharpoonup' : '\U000021bc',
167 '\\rightharpoonup' : '\U000021c0',
168 '\\rightleftharpoons' : '\U000021cc',
169 '\\leadsto' : '\U0000219d',
170 '\\downharpoonleft' : '\U000021c3',
171 '\\downharpoonright' : '\U000021c2',
172 '\\upharpoonleft' : '\U000021bf',
173 '\\upharpoonright' : '\U000021be',
174 '\\restriction' : '\U000021be',
175 '\\uparrow' : '\U00002191',
176 '\\Uparrow' : '\U000021d1',
177 '\\downarrow' : '\U00002193',
178 '\\Downarrow' : '\U000021d3',
179 '\\updownarrow' : '\U00002195',
180 '\\Updownarrow' : '\U000021d5',
181 '\\langle' : '\U000027e8',
182 '\\rangle' : '\U000027e9',
183 '\\lceil' : '\U00002308',
184 '\\rceil' : '\U00002309',
185 '\\lfloor' : '\U0000230a',
186 '\\rfloor' : '\U0000230b',
187 '\\flqq' : '\U000000ab',
188 '\\frqq' : '\U000000bb',
189 '\\bot' : '\U000022a5',
190 '\\top' : '\U000022a4',
191 '\\wedge' : '\U00002227',
192 '\\bigwedge' : '\U000022c0',
193 '\\vee' : '\U00002228',
194 '\\bigvee' : '\U000022c1',
195 '\\forall' : '\U00002200',
196 '\\exists' : '\U00002203',
197 '\\nexists' : '\U00002204',
198 '\\neg' : '\U000000ac',
199 '\\Box' : '\U000025a1',
200 '\\Diamond' : '\U000025c7',
201 '\\vdash' : '\U000022a2',
202 '\\models' : '\U000022a8',
203 '\\dashv' : '\U000022a3',
204 '\\surd' : '\U0000221a',
205 '\\le' : '\U00002264',
206 '\\ge' : '\U00002265',
207 '\\ll' : '\U0000226a',
208 '\\gg' : '\U0000226b',
209 '\\lesssim' : '\U00002272',
210 '\\gtrsim' : '\U00002273',
211 '\\lessapprox' : '\U00002a85',
212 '\\gtrapprox' : '\U00002a86',
213 '\\in' : '\U00002208',
214 '\\notin' : '\U00002209',
215 '\\subset' : '\U00002282',
216 '\\supset' : '\U00002283',
217 '\\subseteq' : '\U00002286',
218 '\\supseteq' : '\U00002287',
219 '\\sqsubset' : '\U0000228f',
220 '\\sqsupset' : '\U00002290',
221 '\\sqsubseteq' : '\U00002291',
222 '\\sqsupseteq' : '\U00002292',
223 '\\cap' : '\U00002229',
224 '\\bigcap' : '\U000022c2',
225 '\\cup' : '\U0000222a',
226 '\\bigcup' : '\U000022c3',
227 '\\sqcup' : '\U00002294',
228 '\\bigsqcup' : '\U00002a06',
229 '\\sqcap' : '\U00002293',
230 '\\Bigsqcap' : '\U00002a05',
231 '\\setminus' : '\U00002216',
232 '\\propto' : '\U0000221d',
233 '\\uplus' : '\U0000228e',
234 '\\bigplus' : '\U00002a04',
235 '\\sim' : '\U0000223c',
236 '\\doteq' : '\U00002250',
237 '\\simeq' : '\U00002243',
238 '\\approx' : '\U00002248',
239 '\\asymp' : '\U0000224d',
240 '\\cong' : '\U00002245',
241 '\\equiv' : '\U00002261',
242 '\\Join' : '\U000022c8',
243 '\\bowtie' : '\U00002a1d',
244 '\\prec' : '\U0000227a',
245 '\\succ' : '\U0000227b',
246 '\\preceq' : '\U0000227c',
247 '\\succeq' : '\U0000227d',
248 '\\parallel' : '\U00002225',
249 '\\mid' : '\U000000a6',
250 '\\pm' : '\U000000b1',
251 '\\mp' : '\U00002213',
252 '\\times' : '\U000000d7',
253 '\\div' : '\U000000f7',
254 '\\cdot' : '\U000022c5',
255 '\\star' : '\U000022c6',
256 '\\circ' : '\U00002218',
257 '\\dagger' : '\U00002020',
258 '\\ddagger' : '\U00002021',
259 '\\lhd' : '\U000022b2',
260 '\\rhd' : '\U000022b3',
261 '\\unlhd' : '\U000022b4',
262 '\\unrhd' : '\U000022b5',
263 '\\triangleleft' : '\U000025c3',
264 '\\triangleright' : '\U000025b9',
265 '\\triangle' : '\U000025b3',
266 '\\triangleq' : '\U0000225c',
267 '\\oplus' : '\U00002295',
268 '\\bigoplus' : '\U00002a01',
269 '\\otimes' : '\U00002297',
270 '\\bigotimes' : '\U00002a02',
271 '\\odot' : '\U00002299',
272 '\\bigodot' : '\U00002a00',
273 '\\ominus' : '\U00002296',
274 '\\oslash' : '\U00002298',
275 '\\dots' : '\U00002026',
276 '\\cdots' : '\U000022ef',
277 '\\sum' : '\U00002211',
278 '\\prod' : '\U0000220f',
279 '\\coprod' : '\U00002210',
280 '\\infty' : '\U0000221e',
281 '\\int' : '\U0000222b',
282 '\\oint' : '\U0000222e',
283 '\\clubsuit' : '\U00002663',
284 '\\diamondsuit' : '\U00002662',
285 '\\heartsuit' : '\U00002661',
286 '\\spadesuit' : '\U00002660',
287 '\\aleph' : '\U00002135',
288 '\\emptyset' : '\U00002205',
289 '\\nabla' : '\U00002207',
290 '\\partial' : '\U00002202',
291 '\\flat' : '\U0000266d',
292 '\\natural' : '\U0000266e',
293 '\\sharp' : '\U0000266f',
294 '\\angle' : '\U00002220',
295 '\\copyright' : '\U000000a9',
296 '\\textregistered' : '\U000000ae',
297 '\\textonequarter' : '\U000000bc',
298 '\\textonehalf' : '\U000000bd',
299 '\\textthreequarters' : '\U000000be',
300 '\\textordfeminine' : '\U000000aa',
301 '\\textordmasculine' : '\U000000ba',
302 '\\euro' : '\U000020ac',
303 '\\pounds' : '\U000000a3',
304 '\\yen' : '\U000000a5',
305 '\\textcent' : '\U000000a2',
306 '\\textcurrency' : '\U000000a4',
307 '\\textdegree' : '\U000000b0',
308 }
310 isabelle_symbols = {
311 '\\<zero>' : '\U0001d7ec',
312 '\\<one>' : '\U0001d7ed',
313 '\\<two>' : '\U0001d7ee',
314 '\\<three>' : '\U0001d7ef',
315 '\\<four>' : '\U0001d7f0',
316 '\\<five>' : '\U0001d7f1',
317 '\\<six>' : '\U0001d7f2',
318 '\\<seven>' : '\U0001d7f3',
319 '\\<eight>' : '\U0001d7f4',
320 '\\<nine>' : '\U0001d7f5',
321 '\\<A>' : '\U0001d49c',
322 '\\<B>' : '\U0000212c',
323 '\\<C>' : '\U0001d49e',
324 '\\<D>' : '\U0001d49f',
325 '\\<E>' : '\U00002130',
326 '\\<F>' : '\U00002131',
327 '\\<G>' : '\U0001d4a2',
328 '\\<H>' : '\U0000210b',
329 '\\<I>' : '\U00002110',
330 '\\<J>' : '\U0001d4a5',
331 '\\<K>' : '\U0001d4a6',
332 '\\<L>' : '\U00002112',
333 '\\<M>' : '\U00002133',
334 '\\<N>' : '\U0001d4a9',
335 '\\<O>' : '\U0001d4aa',
336 '\\<P>' : '\U0001d4ab',
337 '\\<Q>' : '\U0001d4ac',
338 '\\<R>' : '\U0000211b',
339 '\\<S>' : '\U0001d4ae',
340 '\\<T>' : '\U0001d4af',
341 '\\<U>' : '\U0001d4b0',
342 '\\<V>' : '\U0001d4b1',
343 '\\<W>' : '\U0001d4b2',
344 '\\<X>' : '\U0001d4b3',
345 '\\<Y>' : '\U0001d4b4',
346 '\\<Z>' : '\U0001d4b5',
347 '\\<a>' : '\U0001d5ba',
348 '\\<b>' : '\U0001d5bb',
349 '\\<c>' : '\U0001d5bc',
350 '\\<d>' : '\U0001d5bd',
351 '\\<e>' : '\U0001d5be',
352 '\\<f>' : '\U0001d5bf',
353 '\\<g>' : '\U0001d5c0',
354 '\\<h>' : '\U0001d5c1',
355 '\\<i>' : '\U0001d5c2',
356 '\\<j>' : '\U0001d5c3',
357 '\\<k>' : '\U0001d5c4',
358 '\\<l>' : '\U0001d5c5',
359 '\\<m>' : '\U0001d5c6',
360 '\\<n>' : '\U0001d5c7',
361 '\\<o>' : '\U0001d5c8',
362 '\\<p>' : '\U0001d5c9',
363 '\\<q>' : '\U0001d5ca',
364 '\\<r>' : '\U0001d5cb',
365 '\\<s>' : '\U0001d5cc',
366 '\\<t>' : '\U0001d5cd',
367 '\\<u>' : '\U0001d5ce',
368 '\\<v>' : '\U0001d5cf',
369 '\\<w>' : '\U0001d5d0',
370 '\\<x>' : '\U0001d5d1',
371 '\\<y>' : '\U0001d5d2',
372 '\\<z>' : '\U0001d5d3',
373 '\\<AA>' : '\U0001d504',
374 '\\<BB>' : '\U0001d505',
375 '\\<CC>' : '\U0000212d',
376 '\\<DD>' : '\U0001d507',
377 '\\<EE>' : '\U0001d508',
378 '\\<FF>' : '\U0001d509',
379 '\\<GG>' : '\U0001d50a',
380 '\\<HH>' : '\U0000210c',
381 '\\<II>' : '\U00002111',
382 '\\<JJ>' : '\U0001d50d',
383 '\\<KK>' : '\U0001d50e',
384 '\\<LL>' : '\U0001d50f',
385 '\\<MM>' : '\U0001d510',
386 '\\<NN>' : '\U0001d511',
387 '\\<OO>' : '\U0001d512',
388 '\\<PP>' : '\U0001d513',
389 '\\<QQ>' : '\U0001d514',
390 '\\<RR>' : '\U0000211c',
391 '\\<SS>' : '\U0001d516',
392 '\\<TT>' : '\U0001d517',
393 '\\<UU>' : '\U0001d518',
394 '\\<VV>' : '\U0001d519',
395 '\\<WW>' : '\U0001d51a',
396 '\\<XX>' : '\U0001d51b',
397 '\\<YY>' : '\U0001d51c',
398 '\\<ZZ>' : '\U00002128',
399 '\\<aa>' : '\U0001d51e',
400 '\\<bb>' : '\U0001d51f',
401 '\\<cc>' : '\U0001d520',
402 '\\<dd>' : '\U0001d521',
403 '\\<ee>' : '\U0001d522',
404 '\\<ff>' : '\U0001d523',
405 '\\<gg>' : '\U0001d524',
406 '\\<hh>' : '\U0001d525',
407 '\\<ii>' : '\U0001d526',
408 '\\<jj>' : '\U0001d527',
409 '\\<kk>' : '\U0001d528',
410 '\\<ll>' : '\U0001d529',
411 '\\<mm>' : '\U0001d52a',
412 '\\<nn>' : '\U0001d52b',
413 '\\<oo>' : '\U0001d52c',
414 '\\<pp>' : '\U0001d52d',
415 '\\<qq>' : '\U0001d52e',
416 '\\<rr>' : '\U0001d52f',
417 '\\<ss>' : '\U0001d530',
418 '\\<tt>' : '\U0001d531',
419 '\\<uu>' : '\U0001d532',
420 '\\<vv>' : '\U0001d533',
421 '\\<ww>' : '\U0001d534',
422 '\\<xx>' : '\U0001d535',
423 '\\<yy>' : '\U0001d536',
424 '\\<zz>' : '\U0001d537',
425 '\\<alpha>' : '\U000003b1',
426 '\\<beta>' : '\U000003b2',
427 '\\<gamma>' : '\U000003b3',
428 '\\<delta>' : '\U000003b4',
429 '\\<epsilon>' : '\U000003b5',
430 '\\<zeta>' : '\U000003b6',
431 '\\<eta>' : '\U000003b7',
432 '\\<theta>' : '\U000003b8',
433 '\\<iota>' : '\U000003b9',
434 '\\<kappa>' : '\U000003ba',
435 '\\<lambda>' : '\U000003bb',
436 '\\<mu>' : '\U000003bc',
437 '\\<nu>' : '\U000003bd',
438 '\\<xi>' : '\U000003be',
439 '\\<pi>' : '\U000003c0',
440 '\\<rho>' : '\U000003c1',
441 '\\<sigma>' : '\U000003c3',
442 '\\<tau>' : '\U000003c4',
443 '\\<upsilon>' : '\U000003c5',
444 '\\<phi>' : '\U000003c6',
445 '\\<chi>' : '\U000003c7',
446 '\\<psi>' : '\U000003c8',
447 '\\<omega>' : '\U000003c9',
448 '\\<Gamma>' : '\U00000393',
449 '\\<Delta>' : '\U00000394',
450 '\\<Theta>' : '\U00000398',
451 '\\<Lambda>' : '\U0000039b',
452 '\\<Xi>' : '\U0000039e',
453 '\\<Pi>' : '\U000003a0',
454 '\\<Sigma>' : '\U000003a3',
455 '\\<Upsilon>' : '\U000003a5',
456 '\\<Phi>' : '\U000003a6',
457 '\\<Psi>' : '\U000003a8',
458 '\\<Omega>' : '\U000003a9',
459 '\\<bool>' : '\U0001d539',
460 '\\<complex>' : '\U00002102',
461 '\\<nat>' : '\U00002115',
462 '\\<rat>' : '\U0000211a',
463 '\\<real>' : '\U0000211d',
464 '\\<int>' : '\U00002124',
465 '\\<leftarrow>' : '\U00002190',
466 '\\<longleftarrow>' : '\U000027f5',
467 '\\<rightarrow>' : '\U00002192',
468 '\\<longrightarrow>' : '\U000027f6',
469 '\\<Leftarrow>' : '\U000021d0',
470 '\\<Longleftarrow>' : '\U000027f8',
471 '\\<Rightarrow>' : '\U000021d2',
472 '\\<Longrightarrow>' : '\U000027f9',
473 '\\<leftrightarrow>' : '\U00002194',
474 '\\<longleftrightarrow>' : '\U000027f7',
475 '\\<Leftrightarrow>' : '\U000021d4',
476 '\\<Longleftrightarrow>' : '\U000027fa',
477 '\\<mapsto>' : '\U000021a6',
478 '\\<longmapsto>' : '\U000027fc',
479 '\\<midarrow>' : '\U00002500',
480 '\\<Midarrow>' : '\U00002550',
481 '\\<hookleftarrow>' : '\U000021a9',
482 '\\<hookrightarrow>' : '\U000021aa',
483 '\\<leftharpoondown>' : '\U000021bd',
484 '\\<rightharpoondown>' : '\U000021c1',
485 '\\<leftharpoonup>' : '\U000021bc',
486 '\\<rightharpoonup>' : '\U000021c0',
487 '\\<rightleftharpoons>' : '\U000021cc',
488 '\\<leadsto>' : '\U0000219d',
489 '\\<downharpoonleft>' : '\U000021c3',
490 '\\<downharpoonright>' : '\U000021c2',
491 '\\<upharpoonleft>' : '\U000021bf',
492 '\\<upharpoonright>' : '\U000021be',
493 '\\<restriction>' : '\U000021be',
494 '\\<Colon>' : '\U00002237',
495 '\\<up>' : '\U00002191',
496 '\\<Up>' : '\U000021d1',
497 '\\<down>' : '\U00002193',
498 '\\<Down>' : '\U000021d3',
499 '\\<updown>' : '\U00002195',
500 '\\<Updown>' : '\U000021d5',
501 '\\<langle>' : '\U000027e8',
502 '\\<rangle>' : '\U000027e9',
503 '\\<lceil>' : '\U00002308',
504 '\\<rceil>' : '\U00002309',
505 '\\<lfloor>' : '\U0000230a',
506 '\\<rfloor>' : '\U0000230b',
507 '\\<lparr>' : '\U00002987',
508 '\\<rparr>' : '\U00002988',
509 '\\<lbrakk>' : '\U000027e6',
510 '\\<rbrakk>' : '\U000027e7',
511 '\\<lbrace>' : '\U00002983',
512 '\\<rbrace>' : '\U00002984',
513 '\\<guillemotleft>' : '\U000000ab',
514 '\\<guillemotright>' : '\U000000bb',
515 '\\<bottom>' : '\U000022a5',
516 '\\<top>' : '\U000022a4',
517 '\\<and>' : '\U00002227',
518 '\\<And>' : '\U000022c0',
519 '\\<or>' : '\U00002228',
520 '\\<Or>' : '\U000022c1',
521 '\\<forall>' : '\U00002200',
522 '\\<exists>' : '\U00002203',
523 '\\<nexists>' : '\U00002204',
524 '\\<not>' : '\U000000ac',
525 '\\<box>' : '\U000025a1',
526 '\\<diamond>' : '\U000025c7',
527 '\\<turnstile>' : '\U000022a2',
528 '\\<Turnstile>' : '\U000022a8',
529 '\\<tturnstile>' : '\U000022a9',
530 '\\<TTurnstile>' : '\U000022ab',
531 '\\<stileturn>' : '\U000022a3',
532 '\\<surd>' : '\U0000221a',
533 '\\<le>' : '\U00002264',
534 '\\<ge>' : '\U00002265',
535 '\\<lless>' : '\U0000226a',
536 '\\<ggreater>' : '\U0000226b',
537 '\\<lesssim>' : '\U00002272',
538 '\\<greatersim>' : '\U00002273',
539 '\\<lessapprox>' : '\U00002a85',
540 '\\<greaterapprox>' : '\U00002a86',
541 '\\<in>' : '\U00002208',
542 '\\<notin>' : '\U00002209',
543 '\\<subset>' : '\U00002282',
544 '\\<supset>' : '\U00002283',
545 '\\<subseteq>' : '\U00002286',
546 '\\<supseteq>' : '\U00002287',
547 '\\<sqsubset>' : '\U0000228f',
548 '\\<sqsupset>' : '\U00002290',
549 '\\<sqsubseteq>' : '\U00002291',
550 '\\<sqsupseteq>' : '\U00002292',
551 '\\<inter>' : '\U00002229',
552 '\\<Inter>' : '\U000022c2',
553 '\\<union>' : '\U0000222a',
554 '\\<Union>' : '\U000022c3',
555 '\\<squnion>' : '\U00002294',
556 '\\<Squnion>' : '\U00002a06',
557 '\\<sqinter>' : '\U00002293',
558 '\\<Sqinter>' : '\U00002a05',
559 '\\<setminus>' : '\U00002216',
560 '\\<propto>' : '\U0000221d',
561 '\\<uplus>' : '\U0000228e',
562 '\\<Uplus>' : '\U00002a04',
563 '\\<noteq>' : '\U00002260',
564 '\\<sim>' : '\U0000223c',
565 '\\<doteq>' : '\U00002250',
566 '\\<simeq>' : '\U00002243',
567 '\\<approx>' : '\U00002248',
568 '\\<asymp>' : '\U0000224d',
569 '\\<cong>' : '\U00002245',
570 '\\<smile>' : '\U00002323',
571 '\\<equiv>' : '\U00002261',
572 '\\<frown>' : '\U00002322',
573 '\\<Join>' : '\U000022c8',
574 '\\<bowtie>' : '\U00002a1d',
575 '\\<prec>' : '\U0000227a',
576 '\\<succ>' : '\U0000227b',
577 '\\<preceq>' : '\U0000227c',
578 '\\<succeq>' : '\U0000227d',
579 '\\<parallel>' : '\U00002225',
580 '\\<bar>' : '\U000000a6',
581 '\\<plusminus>' : '\U000000b1',
582 '\\<minusplus>' : '\U00002213',
583 '\\<times>' : '\U000000d7',
584 '\\<div>' : '\U000000f7',
585 '\\<cdot>' : '\U000022c5',
586 '\\<star>' : '\U000022c6',
587 '\\<bullet>' : '\U00002219',
588 '\\<circ>' : '\U00002218',
589 '\\<dagger>' : '\U00002020',
590 '\\<ddagger>' : '\U00002021',
591 '\\<lhd>' : '\U000022b2',
592 '\\<rhd>' : '\U000022b3',
593 '\\<unlhd>' : '\U000022b4',
594 '\\<unrhd>' : '\U000022b5',
595 '\\<triangleleft>' : '\U000025c3',
596 '\\<triangleright>' : '\U000025b9',
597 '\\<triangle>' : '\U000025b3',
598 '\\<triangleq>' : '\U0000225c',
599 '\\<oplus>' : '\U00002295',
600 '\\<Oplus>' : '\U00002a01',
601 '\\<otimes>' : '\U00002297',
602 '\\<Otimes>' : '\U00002a02',
603 '\\<odot>' : '\U00002299',
604 '\\<Odot>' : '\U00002a00',
605 '\\<ominus>' : '\U00002296',
606 '\\<oslash>' : '\U00002298',
607 '\\<dots>' : '\U00002026',
608 '\\<cdots>' : '\U000022ef',
609 '\\<Sum>' : '\U00002211',
610 '\\<Prod>' : '\U0000220f',
611 '\\<Coprod>' : '\U00002210',
612 '\\<infinity>' : '\U0000221e',
613 '\\<integral>' : '\U0000222b',
614 '\\<ointegral>' : '\U0000222e',
615 '\\<clubsuit>' : '\U00002663',
616 '\\<diamondsuit>' : '\U00002662',
617 '\\<heartsuit>' : '\U00002661',
618 '\\<spadesuit>' : '\U00002660',
619 '\\<aleph>' : '\U00002135',
620 '\\<emptyset>' : '\U00002205',
621 '\\<nabla>' : '\U00002207',
622 '\\<partial>' : '\U00002202',
623 '\\<flat>' : '\U0000266d',
624 '\\<natural>' : '\U0000266e',
625 '\\<sharp>' : '\U0000266f',
626 '\\<angle>' : '\U00002220',
627 '\\<copyright>' : '\U000000a9',
628 '\\<registered>' : '\U000000ae',
629 '\\<hyphen>' : '\U000000ad',
630 '\\<inverse>' : '\U000000af',
631 '\\<onequarter>' : '\U000000bc',
632 '\\<onehalf>' : '\U000000bd',
633 '\\<threequarters>' : '\U000000be',
634 '\\<ordfeminine>' : '\U000000aa',
635 '\\<ordmasculine>' : '\U000000ba',
636 '\\<section>' : '\U000000a7',
637 '\\<paragraph>' : '\U000000b6',
638 '\\<exclamdown>' : '\U000000a1',
639 '\\<questiondown>' : '\U000000bf',
640 '\\<euro>' : '\U000020ac',
641 '\\<pounds>' : '\U000000a3',
642 '\\<yen>' : '\U000000a5',
643 '\\<cent>' : '\U000000a2',
644 '\\<currency>' : '\U000000a4',
645 '\\<degree>' : '\U000000b0',
646 '\\<amalg>' : '\U00002a3f',
647 '\\<mho>' : '\U00002127',
648 '\\<lozenge>' : '\U000025ca',
649 '\\<wp>' : '\U00002118',
650 '\\<wrong>' : '\U00002240',
651 '\\<struct>' : '\U000022c4',
652 '\\<acute>' : '\U000000b4',
653 '\\<index>' : '\U00000131',
654 '\\<dieresis>' : '\U000000a8',
655 '\\<cedilla>' : '\U000000b8',
656 '\\<hungarumlaut>' : '\U000002dd',
657 '\\<some>' : '\U000003f5',
658 '\\<newline>' : '\U000023ce',
659 '\\<open>' : '\U00002039',
660 '\\<close>' : '\U0000203a',
661 '\\<here>' : '\U00002302',
662 '\\<^sub>' : '\U000021e9',
663 '\\<^sup>' : '\U000021e7',
664 '\\<^bold>' : '\U00002759',
665 '\\<^bsub>' : '\U000021d8',
666 '\\<^esub>' : '\U000021d9',
667 '\\<^bsup>' : '\U000021d7',
668 '\\<^esup>' : '\U000021d6',
669 }
671 lang_map = {'isabelle' : isabelle_symbols, 'latex' : latex_symbols}
673 def __init__(self, **options):
674 Filter.__init__(self, **options)
675 lang = get_choice_opt(options, 'lang',
676 ['isabelle', 'latex'], 'isabelle')
677 self.symbols = self.lang_map[lang]
679 def filter(self, lexer, stream):
680 for ttype, value in stream:
681 if value in self.symbols:
682 yield ttype, self.symbols[value]
683 else:
684 yield ttype, value
687class KeywordCaseFilter(Filter):
688 """Convert keywords to lowercase or uppercase or capitalize them, which
689 means first letter uppercase, rest lowercase.
691 This can be useful e.g. if you highlight Pascal code and want to adapt the
692 code to your styleguide.
694 Options accepted:
696 `case` : string
697 The casing to convert keywords to. Must be one of ``'lower'``,
698 ``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
699 """
701 def __init__(self, **options):
702 Filter.__init__(self, **options)
703 case = get_choice_opt(options, 'case',
704 ['lower', 'upper', 'capitalize'], 'lower')
705 self.convert = getattr(str, case)
707 def filter(self, lexer, stream):
708 for ttype, value in stream:
709 if ttype in Keyword:
710 yield ttype, self.convert(value)
711 else:
712 yield ttype, value
715class NameHighlightFilter(Filter):
716 """Highlight a normal Name (and Name.*) token with a different token type.
718 Example::
720 filter = NameHighlightFilter(
721 names=['foo', 'bar', 'baz'],
722 tokentype=Name.Function,
723 )
725 This would highlight the names "foo", "bar" and "baz"
726 as functions. `Name.Function` is the default token type.
728 Options accepted:
730 `names` : list of strings
731 A list of names that should be given the different token type.
732 There is no default.
733 `tokentype` : TokenType or string
734 A token type or a string containing a token type name that is
735 used for highlighting the strings in `names`. The default is
736 `Name.Function`.
737 """
739 def __init__(self, **options):
740 Filter.__init__(self, **options)
741 self.names = set(get_list_opt(options, 'names', []))
742 tokentype = options.get('tokentype')
743 if tokentype:
744 self.tokentype = string_to_tokentype(tokentype)
745 else:
746 self.tokentype = Name.Function
748 def filter(self, lexer, stream):
749 for ttype, value in stream:
750 if ttype in Name and value in self.names:
751 yield self.tokentype, value
752 else:
753 yield ttype, value
756class ErrorToken(Exception):
757 pass
760class RaiseOnErrorTokenFilter(Filter):
761 """Raise an exception when the lexer generates an error token.
763 Options accepted:
765 `excclass` : Exception class
766 The exception class to raise.
767 The default is `pygments.filters.ErrorToken`.
769 .. versionadded:: 0.8
770 """
772 def __init__(self, **options):
773 Filter.__init__(self, **options)
774 self.exception = options.get('excclass', ErrorToken)
775 try:
776 # issubclass() will raise TypeError if first argument is not a class
777 if not issubclass(self.exception, Exception):
778 raise TypeError
779 except TypeError:
780 raise OptionError('excclass option is not an exception class')
782 def filter(self, lexer, stream):
783 for ttype, value in stream:
784 if ttype is Error:
785 raise self.exception(value)
786 yield ttype, value
789class VisibleWhitespaceFilter(Filter):
790 """Convert tabs, newlines and/or spaces to visible characters.
792 Options accepted:
794 `spaces` : string or bool
795 If this is a one-character string, spaces will be replaces by this string.
796 If it is another true value, spaces will be replaced by ``·`` (unicode
797 MIDDLE DOT). If it is a false value, spaces will not be replaced. The
798 default is ``False``.
799 `tabs` : string or bool
800 The same as for `spaces`, but the default replacement character is ``»``
801 (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
802 is ``False``. Note: this will not work if the `tabsize` option for the
803 lexer is nonzero, as tabs will already have been expanded then.
804 `tabsize` : int
805 If tabs are to be replaced by this filter (see the `tabs` option), this
806 is the total number of characters that a tab should be expanded to.
807 The default is ``8``.
808 `newlines` : string or bool
809 The same as for `spaces`, but the default replacement character is ``¶``
810 (unicode PILCROW SIGN). The default value is ``False``.
811 `wstokentype` : bool
812 If true, give whitespace the special `Whitespace` token type. This allows
813 styling the visible whitespace differently (e.g. greyed out), but it can
814 disrupt background colors. The default is ``True``.
816 .. versionadded:: 0.8
817 """
819 def __init__(self, **options):
820 Filter.__init__(self, **options)
821 for name, default in [('spaces', '·'),
822 ('tabs', '»'),
823 ('newlines', '¶')]:
824 opt = options.get(name, False)
825 if isinstance(opt, str) and len(opt) == 1:
826 setattr(self, name, opt)
827 else:
828 setattr(self, name, (opt and default or ''))
829 tabsize = get_int_opt(options, 'tabsize', 8)
830 if self.tabs:
831 self.tabs += ' ' * (tabsize - 1)
832 if self.newlines:
833 self.newlines += '\n'
834 self.wstt = get_bool_opt(options, 'wstokentype', True)
836 def filter(self, lexer, stream):
837 if self.wstt:
838 spaces = self.spaces or ' '
839 tabs = self.tabs or '\t'
840 newlines = self.newlines or '\n'
841 regex = re.compile(r'\s')
843 def replacefunc(wschar):
844 if wschar == ' ':
845 return spaces
846 elif wschar == '\t':
847 return tabs
848 elif wschar == '\n':
849 return newlines
850 return wschar
852 for ttype, value in stream:
853 yield from _replace_special(ttype, value, regex, Whitespace,
854 replacefunc)
855 else:
856 spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
857 # simpler processing
858 for ttype, value in stream:
859 if spaces:
860 value = value.replace(' ', spaces)
861 if tabs:
862 value = value.replace('\t', tabs)
863 if newlines:
864 value = value.replace('\n', newlines)
865 yield ttype, value
868class GobbleFilter(Filter):
869 """Gobbles source code lines (eats initial characters).
871 This filter drops the first ``n`` characters off every line of code. This
872 may be useful when the source code fed to the lexer is indented by a fixed
873 amount of space that isn't desired in the output.
875 Options accepted:
877 `n` : int
878 The number of characters to gobble.
880 .. versionadded:: 1.2
881 """
882 def __init__(self, **options):
883 Filter.__init__(self, **options)
884 self.n = get_int_opt(options, 'n', 0)
886 def gobble(self, value, left):
887 if left < len(value):
888 return value[left:], 0
889 else:
890 return '', left - len(value)
892 def filter(self, lexer, stream):
893 n = self.n
894 left = n # How many characters left to gobble.
895 for ttype, value in stream:
896 # Remove ``left`` tokens from first line, ``n`` from all others.
897 parts = value.split('\n')
898 (parts[0], left) = self.gobble(parts[0], left)
899 for i in range(1, len(parts)):
900 (parts[i], left) = self.gobble(parts[i], n)
901 value = '\n'.join(parts)
903 if value != '':
904 yield ttype, value
907class TokenMergeFilter(Filter):
908 """Merges consecutive tokens with the same token type in the output
909 stream of a lexer.
911 .. versionadded:: 1.2
912 """
913 def __init__(self, **options):
914 Filter.__init__(self, **options)
916 def filter(self, lexer, stream):
917 current_type = None
918 current_value = None
919 for ttype, value in stream:
920 if ttype is current_type:
921 current_value += value
922 else:
923 if current_type is not None:
924 yield current_type, current_value
925 current_type = ttype
926 current_value = value
927 if current_type is not None:
928 yield current_type, current_value
931FILTERS = {
932 'codetagify': CodeTagFilter,
933 'keywordcase': KeywordCaseFilter,
934 'highlight': NameHighlightFilter,
935 'raiseonerror': RaiseOnErrorTokenFilter,
936 'whitespace': VisibleWhitespaceFilter,
937 'gobble': GobbleFilter,
938 'tokenmerge': TokenMergeFilter,
939 'symbols': SymbolFilter,
940}