1"""
2 pygments.lexers.special
3 ~~~~~~~~~~~~~~~~~~~~~~~
4
5 Special lexers.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import ast
12
13from pygments.lexer import Lexer, line_re
14from pygments.token import Token, Error, Text, Generic
15from pygments.util import get_choice_opt
16
17
18__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']
19
20
21class TextLexer(Lexer):
22 """
23 "Null" lexer, doesn't highlight anything.
24 """
25 name = 'Text only'
26 aliases = ['text']
27 filenames = ['*.txt']
28 mimetypes = ['text/plain']
29 url = ""
30 version_added = ''
31
32 priority = 0.01
33
34 def get_tokens_unprocessed(self, text):
35 yield 0, Text, text
36
37 def analyse_text(text):
38 return TextLexer.priority
39
40
41class OutputLexer(Lexer):
42 """
43 Simple lexer that highlights everything as ``Token.Generic.Output``.
44 """
45 name = 'Text output'
46 aliases = ['output']
47 url = ""
48 version_added = '2.10'
49 _example = "output/output"
50
51 def get_tokens_unprocessed(self, text):
52 yield 0, Generic.Output, text
53
54
55_ttype_cache = {}
56
57
58class RawTokenLexer(Lexer):
59 """
60 Recreate a token stream formatted with the `RawTokenFormatter`.
61
62 Additional options accepted:
63
64 `compress`
65 If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
66 the given compression algorithm before lexing (default: ``""``).
67 """
68 name = 'Raw token data'
69 aliases = []
70 filenames = []
71 mimetypes = ['application/x-pygments-tokens']
72 url = 'https://pygments.org/docs/formatters/#RawTokenFormatter'
73 version_added = ''
74
75 def __init__(self, **options):
76 self.compress = get_choice_opt(options, 'compress',
77 ['', 'none', 'gz', 'bz2'], '')
78 Lexer.__init__(self, **options)
79
80 def get_tokens(self, text):
81 if self.compress:
82 if isinstance(text, str):
83 text = text.encode('latin1')
84 try:
85 if self.compress == 'gz':
86 import gzip
87 text = gzip.decompress(text)
88 elif self.compress == 'bz2':
89 import bz2
90 text = bz2.decompress(text)
91 except OSError:
92 yield Error, text.decode('latin1')
93 if isinstance(text, bytes):
94 text = text.decode('latin1')
95
96 # do not call Lexer.get_tokens() because stripping is not optional.
97 text = text.strip('\n') + '\n'
98 for i, t, v in self.get_tokens_unprocessed(text):
99 yield t, v
100
101 def get_tokens_unprocessed(self, text):
102 length = 0
103 for match in line_re.finditer(text):
104 try:
105 ttypestr, val = match.group().rstrip().split('\t', 1)
106 ttype = _ttype_cache.get(ttypestr)
107 if not ttype:
108 ttype = Token
109 ttypes = ttypestr.split('.')[1:]
110 for ttype_ in ttypes:
111 if not ttype_ or not ttype_[0].isupper():
112 raise ValueError('malformed token name')
113 ttype = getattr(ttype, ttype_)
114 _ttype_cache[ttypestr] = ttype
115 val = ast.literal_eval(val)
116 if not isinstance(val, str):
117 raise ValueError('expected str')
118 except (SyntaxError, ValueError):
119 val = match.group()
120 ttype = Error
121 yield length, ttype, val
122 length += len(val)