1"""
2 pygments.lexers.python
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Python and related languages.
6
7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import keyword
12
13from pygments.lexer import DelegatingLexer, RegexLexer, include, \
14 bygroups, using, default, words, combined, this
15from pygments.util import get_bool_opt, shebang_matches
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic, Other, Error, Whitespace
18from pygments import unistring as uni
19
20__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
21 'Python2Lexer', 'Python2TracebackLexer',
22 'CythonLexer', 'DgLexer', 'NumPyLexer']
23
24
25class PythonLexer(RegexLexer):
26 """
27 For Python source code (version 3.x).
28
29 .. versionchanged:: 2.5
30 This is now the default ``PythonLexer``. It is still available as the
31 alias ``Python3Lexer``.
32 """
33
34 name = 'Python'
35 url = 'https://www.python.org'
36 aliases = ['python', 'py', 'sage', 'python3', 'py3', 'bazel', 'starlark', 'pyi']
37 filenames = [
38 '*.py',
39 '*.pyw',
40 # Type stubs
41 '*.pyi',
42 # Jython
43 '*.jy',
44 # Sage
45 '*.sage',
46 # SCons
47 '*.sc',
48 'SConstruct',
49 'SConscript',
50 # Skylark/Starlark (used by Bazel, Buck, and Pants)
51 '*.bzl',
52 'BUCK',
53 'BUILD',
54 'BUILD.bazel',
55 'WORKSPACE',
56 # Twisted Application infrastructure
57 '*.tac',
58 # Execubot level format
59 '*.pye',
60 ]
61 mimetypes = ['text/x-python', 'application/x-python',
62 'text/x-python3', 'application/x-python3']
63 version_added = '0.10'
64
65 uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
66
67 def innerstring_rules(ttype):
68 return [
69 # the old style '%s' % (...) string formatting (still valid in Py3)
70 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
71 '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
72 # the new style '{}'.format(...) string formatting
73 (r'\{'
74 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
75 r'(\![sra])?' # conversion
76 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
77 r'\}', String.Interpol),
78
79 # backslashes, quotes and formatting signs must be parsed one at a time
80 (r'[^\\\'"%{\n]+', ttype),
81 (r'[\'"\\]', ttype),
82 # unhandled string formatting sign
83 (r'%|(\{{1,2})', ttype)
84 # newlines are an error (use "nl" state)
85 ]
86
87 def fstring_rules(ttype):
88 return [
89 # Assuming that a '}' is the closing brace after format specifier.
90 # Sadly, this means that we won't detect syntax error. But it's
91 # more important to parse correct syntax correctly, than to
92 # highlight invalid syntax.
93 (r'\}', String.Interpol),
94 (r'\{', String.Interpol, 'expr-inside-fstring'),
95 # backslashes, quotes and formatting signs must be parsed one at a time
96 (r'[^\\\'"{}\n]+', ttype),
97 (r'[\'"\\]', ttype),
98 # newlines are an error (use "nl" state)
99 ]
100
101 tokens = {
102 'root': [
103 (r'\n', Whitespace),
104 (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
105 bygroups(Whitespace, String.Affix, String.Doc)),
106 (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
107 bygroups(Whitespace, String.Affix, String.Doc)),
108 (r'\A#!.+$', Comment.Hashbang),
109 (r'#.*$', Comment.Single),
110 (r'\\\n', Text),
111 (r'\\', Text),
112 include('keywords'),
113 include('soft-keywords'),
114 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'funcname'),
115 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'classname'),
116 (r'^(lazy)((?:\s|\\\s)+)(from)((?:\s|\\\s)+)',
117 bygroups(Keyword.Namespace, Whitespace, Keyword.Namespace, Whitespace),
118 'fromimport'),
119 (r'^(lazy)((?:\s|\\\s)+)(import)((?:\s|\\\s)+)',
120 bygroups(Keyword.Namespace, Whitespace, Keyword.Namespace, Whitespace),
121 'import'),
122 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
123 'fromimport'),
124 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
125 'import'),
126 include('expr'),
127 ],
128 'expr': [
129 # raw f-strings and t-strings
130 ('(?i)(r[ft]|[ft]r)(""")',
131 bygroups(String.Affix, String.Double),
132 combined('rfstringescape', 'tdqf')),
133 ("(?i)(r[ft]|[ft]r)(''')",
134 bygroups(String.Affix, String.Single),
135 combined('rfstringescape', 'tsqf')),
136 ('(?i)(r[ft]|[ft]r)(")',
137 bygroups(String.Affix, String.Double),
138 combined('rfstringescape', 'dqf')),
139 ("(?i)(r[ft]|[ft]r)(')",
140 bygroups(String.Affix, String.Single),
141 combined('rfstringescape', 'sqf')),
142 # non-raw f-strings and t-strings
143 ('([fFtT])(""")', bygroups(String.Affix, String.Double),
144 combined('fstringescape', 'tdqf')),
145 ("([fFtT])(''')", bygroups(String.Affix, String.Single),
146 combined('fstringescape', 'tsqf')),
147 ('([fFtT])(")', bygroups(String.Affix, String.Double),
148 combined('fstringescape', 'dqf')),
149 ("([fFtT])(')", bygroups(String.Affix, String.Single),
150 combined('fstringescape', 'sqf')),
151 # raw bytes and strings
152 ('(?i)(rb|br|r)(""")',
153 bygroups(String.Affix, String.Double), 'tdqs'),
154 ("(?i)(rb|br|r)(''')",
155 bygroups(String.Affix, String.Single), 'tsqs'),
156 ('(?i)(rb|br|r)(")',
157 bygroups(String.Affix, String.Double), 'dqs'),
158 ("(?i)(rb|br|r)(')",
159 bygroups(String.Affix, String.Single), 'sqs'),
160 # non-raw strings
161 ('([uU]?)(""")', bygroups(String.Affix, String.Double),
162 combined('stringescape', 'tdqs')),
163 ("([uU]?)(''')", bygroups(String.Affix, String.Single),
164 combined('stringescape', 'tsqs')),
165 ('([uU]?)(")', bygroups(String.Affix, String.Double),
166 combined('stringescape', 'dqs')),
167 ("([uU]?)(')", bygroups(String.Affix, String.Single),
168 combined('stringescape', 'sqs')),
169 # non-raw bytes
170 ('([bB])(""")', bygroups(String.Affix, String.Double),
171 combined('bytesescape', 'tdqs')),
172 ("([bB])(''')", bygroups(String.Affix, String.Single),
173 combined('bytesescape', 'tsqs')),
174 ('([bB])(")', bygroups(String.Affix, String.Double),
175 combined('bytesescape', 'dqs')),
176 ("([bB])(')", bygroups(String.Affix, String.Single),
177 combined('bytesescape', 'sqs')),
178
179 (r'[^\S\n]+', Text),
180 include('numbers'),
181 (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
182 (r'[]{}:(),;[]', Punctuation),
183 (r'(in|is|and|or|not)\b', Operator.Word),
184 include('expr-keywords'),
185 include('builtins'),
186 include('magicfuncs'),
187 include('magicvars'),
188 include('name'),
189 ],
190 'expr-inside-fstring': [
191 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
192 # without format specifier
193 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
194 r'(\![sraf])?' # conversion
195 r'\}', String.Interpol, '#pop'),
196 # with format specifier
197 # we'll catch the remaining '}' in the outer scope
198 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
199 r'(\![sraf])?' # conversion
200 r':', String.Interpol, '#pop'),
201 (r'\s+', Whitespace), # allow new lines
202 include('expr'),
203 ],
204 'expr-inside-fstring-inner': [
205 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
206 (r'[])}]', Punctuation, '#pop'),
207 (r'\s+', Whitespace), # allow new lines
208 include('expr'),
209 ],
210 'expr-keywords': [
211 # Based on https://docs.python.org/3/reference/expressions.html
212 (words((
213 'async for', 'await', 'else', 'for', 'if', 'lambda',
214 'yield', 'yield from'), suffix=r'\b'),
215 Keyword),
216 (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
217 ],
218 'keywords': [
219 (words((
220 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
221 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
222 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
223 'yield from', 'as', 'with'), suffix=r'\b'),
224 Keyword),
225 (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
226 ],
227 'soft-keywords': [
228 # `match`, `case` and `_` soft keywords
229 (r'(^[ \t]*)' # at beginning of line + possible indentation
230 r'(match|case)\b' # a possible keyword
231 r'(?![ \t]*(?:' # not followed by...
232 r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't
233 # pattern matching (but None/True/False is ok)
234 r'|'.join(k for k in keyword.kwlist if k[0].islower()) + r')\b))',
235 bygroups(Text, Keyword), 'soft-keywords-inner'),
236 ],
237 'soft-keywords-inner': [
238 # optional `_` keyword
239 (r'(\s+)([^\n_]*)(_\b)', bygroups(Whitespace, using(this), Keyword)),
240 default('#pop')
241 ],
242 'builtins': [
243 (words((
244 '__import__', 'abs', 'aiter', 'all', 'any', 'bin', 'bool', 'bytearray',
245 'breakpoint', 'bytes', 'callable', 'chr', 'classmethod', 'compile',
246 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
247 'filter', 'float', 'format', 'frozendict', 'frozenset', 'getattr',
248 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int',
249 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'map',
250 'max', 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord',
251 'pow', 'print', 'property', 'range', 'repr', 'reversed', 'round',
252 'sentinel', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str',
253 'sum', 'super', 'tuple', 'type', 'vars', 'zip'), prefix=r'(?<!\.)',
254 suffix=r'\b'),
255 Name.Builtin),
256 (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
257 (words((
258 'ArithmeticError', 'AssertionError', 'AttributeError',
259 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
260 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
261 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
262 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
263 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
264 'NotImplementedError', 'OSError', 'OverflowError',
265 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning',
266 'RuntimeError', 'RuntimeWarning', 'StopIteration',
267 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
268 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
269 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
270 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError',
271 'Warning', 'WindowsError', 'ZeroDivisionError',
272 # new builtin exceptions from PEP 3151
273 'BlockingIOError', 'ChildProcessError', 'ConnectionError',
274 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
275 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
276 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
277 'PermissionError', 'ProcessLookupError', 'TimeoutError',
278 # others new in Python 3
279 'StopAsyncIteration', 'ModuleNotFoundError', 'RecursionError',
280 'EncodingWarning'),
281 prefix=r'(?<!\.)', suffix=r'\b'),
282 Name.Exception),
283 ],
284 'magicfuncs': [
285 (words((
286 '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
287 '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
288 '__call__', '__complex__', '__contains__', '__del__', '__delattr__',
289 '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__',
290 '__eq__', '__exit__', '__float__', '__floordiv__', '__format__',
291 '__ge__', '__get__', '__getattr__', '__getattribute__',
292 '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__',
293 '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__',
294 '__imul__', '__index__', '__init__', '__instancecheck__',
295 '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__',
296 '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
297 '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__',
298 '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
299 '__new__', '__next__', '__or__', '__pos__', '__pow__',
300 '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
301 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
302 '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
303 '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
304 '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
305 '__sub__', '__subclasscheck__', '__truediv__',
306 '__xor__'), suffix=r'\b'),
307 Name.Function.Magic),
308 ],
309 'magicvars': [
310 (words((
311 '__annotations__', '__bases__', '__class__', '__closure__',
312 '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
313 '__func__', '__globals__', '__kwdefaults__', '__module__',
314 '__mro__', '__name__', '__objclass__', '__qualname__',
315 '__self__', '__slots__', '__weakref__'), suffix=r'\b'),
316 Name.Variable.Magic),
317 ],
318 'numbers': [
319 (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
320 r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
321 (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
322 (r'0[oO](?:_?[0-7])+', Number.Oct),
323 (r'0[bB](?:_?[01])+', Number.Bin),
324 (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
325 (r'\d(?:_?\d)*', Number.Integer),
326 ],
327 'name': [
328 (r'@' + uni_name, Name.Decorator),
329 (r'@', Operator), # new matrix multiplication operator
330 (uni_name, Name),
331 ],
332 'funcname': [
333 include('magicfuncs'),
334 (uni_name, Name.Function, '#pop'),
335 default('#pop'),
336 ],
337 'classname': [
338 (uni_name, Name.Class, '#pop'),
339 ],
340 'import': [
341 (r'(\s+)(as)(\s+)', bygroups(Whitespace, Keyword, Whitespace)),
342 (r'\.', Name.Namespace),
343 (uni_name, Name.Namespace),
344 (r'(\s*)(,)(\s*)', bygroups(Whitespace, Operator, Whitespace)),
345 default('#pop') # all else: go back
346 ],
347 'fromimport': [
348 (r'(\s+)(import)\b', bygroups(Whitespace, Keyword.Namespace), '#pop'),
349 (r'\.', Name.Namespace),
350 # if None occurs here, it's "raise x from None", since None can
351 # never be a module name
352 (r'None\b', Keyword.Constant, '#pop'),
353 (uni_name, Name.Namespace),
354 default('#pop'),
355 ],
356 'rfstringescape': [
357 (r'\{\{', String.Escape),
358 (r'\}\}', String.Escape),
359 ],
360 'fstringescape': [
361 include('rfstringescape'),
362 include('stringescape'),
363 ],
364 'bytesescape': [
365 (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
366 ],
367 'stringescape': [
368 (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
369 include('bytesescape')
370 ],
371 'fstrings-single': fstring_rules(String.Single),
372 'fstrings-double': fstring_rules(String.Double),
373 'strings-single': innerstring_rules(String.Single),
374 'strings-double': innerstring_rules(String.Double),
375 'dqf': [
376 (r'"', String.Double, '#pop'),
377 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
378 include('fstrings-double')
379 ],
380 'sqf': [
381 (r"'", String.Single, '#pop'),
382 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
383 include('fstrings-single')
384 ],
385 'dqs': [
386 (r'"', String.Double, '#pop'),
387 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
388 include('strings-double')
389 ],
390 'sqs': [
391 (r"'", String.Single, '#pop'),
392 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
393 include('strings-single')
394 ],
395 'tdqf': [
396 (r'"""', String.Double, '#pop'),
397 include('fstrings-double'),
398 (r'\n', String.Double)
399 ],
400 'tsqf': [
401 (r"'''", String.Single, '#pop'),
402 include('fstrings-single'),
403 (r'\n', String.Single)
404 ],
405 'tdqs': [
406 (r'"""', String.Double, '#pop'),
407 include('strings-double'),
408 (r'\n', String.Double)
409 ],
410 'tsqs': [
411 (r"'''", String.Single, '#pop'),
412 include('strings-single'),
413 (r'\n', String.Single)
414 ],
415 }
416
417 def analyse_text(text):
418 return shebang_matches(text, r'pythonw?(3(\.\d)?)?') or \
419 'import ' in text[:1000]
420
421
422Python3Lexer = PythonLexer
423
424
425class Python2Lexer(RegexLexer):
426 """
427 For Python 2.x source code.
428
429 .. versionchanged:: 2.5
430 This class has been renamed from ``PythonLexer``. ``PythonLexer`` now
431 refers to the Python 3 variant. File name patterns like ``*.py`` have
432 been moved to Python 3 as well.
433 """
434
435 name = 'Python 2.x'
436 url = 'https://www.python.org'
437 aliases = ['python2', 'py2']
438 filenames = [] # now taken over by PythonLexer (3.x)
439 mimetypes = ['text/x-python2', 'application/x-python2']
440 version_added = ''
441
442 def innerstring_rules(ttype):
443 return [
444 # the old style '%s' % (...) string formatting
445 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
446 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
447 # backslashes, quotes and formatting signs must be parsed one at a time
448 (r'[^\\\'"%\n]+', ttype),
449 (r'[\'"\\]', ttype),
450 # unhandled string formatting sign
451 (r'%', ttype),
452 # newlines are an error (use "nl" state)
453 ]
454
455 tokens = {
456 'root': [
457 (r'\n', Whitespace),
458 (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
459 bygroups(Whitespace, String.Affix, String.Doc)),
460 (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
461 bygroups(Whitespace, String.Affix, String.Doc)),
462 (r'[^\S\n]+', Text),
463 (r'\A#!.+$', Comment.Hashbang),
464 (r'#.*$', Comment.Single),
465 (r'[]{}:(),;[]', Punctuation),
466 (r'\\\n', Text),
467 (r'\\', Text),
468 (r'(in|is|and|or|not)\b', Operator.Word),
469 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
470 include('keywords'),
471 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'funcname'),
472 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'classname'),
473 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
474 'fromimport'),
475 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
476 'import'),
477 include('builtins'),
478 include('magicfuncs'),
479 include('magicvars'),
480 include('backtick'),
481 ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
482 bygroups(String.Affix, String.Double), 'tdqs'),
483 ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
484 bygroups(String.Affix, String.Single), 'tsqs'),
485 ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
486 bygroups(String.Affix, String.Double), 'dqs'),
487 ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
488 bygroups(String.Affix, String.Single), 'sqs'),
489 ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
490 combined('stringescape', 'tdqs')),
491 ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
492 combined('stringescape', 'tsqs')),
493 ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
494 combined('stringescape', 'dqs')),
495 ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
496 combined('stringescape', 'sqs')),
497 include('name'),
498 include('numbers'),
499 ],
500 'keywords': [
501 (words((
502 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
503 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
504 'print', 'raise', 'return', 'try', 'while', 'yield',
505 'yield from', 'as', 'with'), suffix=r'\b'),
506 Keyword),
507 ],
508 'builtins': [
509 (words((
510 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
511 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
512 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
513 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
514 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
515 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
516 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
517 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
518 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
519 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
520 'unichr', 'unicode', 'vars', 'xrange', 'zip'),
521 prefix=r'(?<!\.)', suffix=r'\b'),
522 Name.Builtin),
523 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
524 r')\b', Name.Builtin.Pseudo),
525 (words((
526 'ArithmeticError', 'AssertionError', 'AttributeError',
527 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
528 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
529 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
530 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
531 'MemoryError', 'NameError',
532 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning',
533 'PendingDeprecationWarning', 'ReferenceError',
534 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
535 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
536 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
537 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
538 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
539 'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
540 Name.Exception),
541 ],
542 'magicfuncs': [
543 (words((
544 '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
545 '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
546 '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
547 '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
548 '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
549 '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
550 '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
551 '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
552 '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
553 '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
554 '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
555 '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
556 '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
557 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
558 '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
559 '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
560 '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
561 '__unicode__', '__xor__'), suffix=r'\b'),
562 Name.Function.Magic),
563 ],
564 'magicvars': [
565 (words((
566 '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
567 '__dict__', '__doc__', '__file__', '__func__', '__globals__',
568 '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
569 '__slots__', '__weakref__'),
570 suffix=r'\b'),
571 Name.Variable.Magic),
572 ],
573 'numbers': [
574 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
575 (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
576 (r'0[0-7]+j?', Number.Oct),
577 (r'0[bB][01]+', Number.Bin),
578 (r'0[xX][a-fA-F0-9]+', Number.Hex),
579 (r'\d+L', Number.Integer.Long),
580 (r'\d+j?', Number.Integer)
581 ],
582 'backtick': [
583 ('`.*?`', String.Backtick),
584 ],
585 'name': [
586 (r'@[\w.]+', Name.Decorator),
587 (r'[a-zA-Z_]\w*', Name),
588 ],
589 'funcname': [
590 include('magicfuncs'),
591 (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
592 default('#pop'),
593 ],
594 'classname': [
595 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
596 ],
597 'import': [
598 (r'(?:[ \t]|\\\n)+', Text),
599 (r'as\b', Keyword.Namespace),
600 (r',', Operator),
601 (r'[a-zA-Z_][\w.]*', Name.Namespace),
602 default('#pop') # all else: go back
603 ],
604 'fromimport': [
605 (r'(?:[ \t]|\\\n)+', Text),
606 (r'import\b', Keyword.Namespace, '#pop'),
607 # if None occurs here, it's "raise x from None", since None can
608 # never be a module name
609 (r'None\b', Name.Builtin.Pseudo, '#pop'),
610 # sadly, in "raise x from y" y will be highlighted as namespace too
611 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
612 # anything else here also means "raise x from y" and is therefore
613 # not an error
614 default('#pop'),
615 ],
616 'stringescape': [
617 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
618 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
619 ],
620 'strings-single': innerstring_rules(String.Single),
621 'strings-double': innerstring_rules(String.Double),
622 'dqs': [
623 (r'"', String.Double, '#pop'),
624 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
625 include('strings-double')
626 ],
627 'sqs': [
628 (r"'", String.Single, '#pop'),
629 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
630 include('strings-single')
631 ],
632 'tdqs': [
633 (r'"""', String.Double, '#pop'),
634 include('strings-double'),
635 (r'\n', String.Double)
636 ],
637 'tsqs': [
638 (r"'''", String.Single, '#pop'),
639 include('strings-single'),
640 (r'\n', String.Single)
641 ],
642 }
643
644 def analyse_text(text):
645 return shebang_matches(text, r'pythonw?2(\.\d)?')
646
647
648class _PythonConsoleLexerBase(RegexLexer):
649 name = 'Python console session'
650 aliases = ['pycon', 'python-console']
651 mimetypes = ['text/x-python-doctest']
652
653 """Auxiliary lexer for `PythonConsoleLexer`.
654
655 Code tokens are output as ``Token.Other.Code``, traceback tokens as
656 ``Token.Other.Traceback``.
657 """
658 tokens = {
659 'root': [
660 (r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'),
661 # This happens, e.g., when tracebacks are embedded in documentation;
662 # trailing whitespaces are often stripped in such contexts.
663 (r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)),
664 (r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'),
665 # SyntaxError starts with this
666 (r' File "[^"]+", line \d+', Other.Traceback, 'traceback'),
667 (r'.*\n', Generic.Output),
668 ],
669 'continuations': [
670 (r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)),
671 # See above.
672 (r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)),
673 default('#pop'),
674 ],
675 'traceback': [
676 # As soon as we see a traceback, consume everything until the next
677 # >>> prompt.
678 (r'(?=>>>( |$))', Text, '#pop'),
679 (r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)),
680 (r'.*\n', Other.Traceback),
681 ],
682 }
683
684
685class PythonConsoleLexer(DelegatingLexer):
686 """
687 For Python console output or doctests, such as:
688
689 .. sourcecode:: pycon
690
691 >>> a = 'foo'
692 >>> print(a)
693 foo
694 >>> 1 / 0
695 Traceback (most recent call last):
696 File "<stdin>", line 1, in <module>
697 ZeroDivisionError: integer division or modulo by zero
698
699 Additional options:
700
701 `python3`
702 Use Python 3 lexer for code. Default is ``True``.
703
704 .. versionadded:: 1.0
705 .. versionchanged:: 2.5
706 Now defaults to ``True``.
707 """
708
709 name = 'Python console session'
710 aliases = ['pycon', 'python-console']
711 mimetypes = ['text/x-python-doctest']
712 url = 'https://python.org'
713 version_added = ''
714
715 def __init__(self, **options):
716 python3 = get_bool_opt(options, 'python3', True)
717 if python3:
718 pylexer = PythonLexer
719 tblexer = PythonTracebackLexer
720 else:
721 pylexer = Python2Lexer
722 tblexer = Python2TracebackLexer
723 # We have two auxiliary lexers. Use DelegatingLexer twice with
724 # different tokens. TODO: DelegatingLexer should support this
725 # directly, by accepting a tuplet of auxiliary lexers and a tuple of
726 # distinguishing tokens. Then we wouldn't need this intermediary
727 # class.
728 class _ReplaceInnerCode(DelegatingLexer):
729 def __init__(self, **options):
730 super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options)
731 super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options)
732
733
734class PythonTracebackLexer(RegexLexer):
735 """
736 For Python 3.x tracebacks, with support for chained exceptions.
737
738 .. versionchanged:: 2.5
739 This is now the default ``PythonTracebackLexer``. It is still available
740 as the alias ``Python3TracebackLexer``.
741 """
742
743 name = 'Python Traceback'
744 aliases = ['pytb', 'py3tb']
745 filenames = ['*.pytb', '*.py3tb']
746 mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback']
747 url = 'https://python.org'
748 version_added = '1.0'
749
750 tokens = {
751 'root': [
752 (r'\n', Whitespace),
753 (r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
754 (r'^During handling of the above exception, another '
755 r'exception occurred:\n\n', Generic.Traceback),
756 (r'^The above exception was the direct cause of the '
757 r'following exception:\n\n', Generic.Traceback),
758 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
759 (r'^.*\n', Other),
760 ],
761 'intb': [
762 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
763 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
764 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
765 bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
766 (r'^( )(.+)(\n)',
767 bygroups(Whitespace, using(PythonLexer), Whitespace), 'markers'),
768 (r'^([ \t]*)(\.\.\.)(\n)',
769 bygroups(Whitespace, Comment, Whitespace)), # for doctests...
770 (r'^([^:]+)(: )(.+)(\n)',
771 bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
772 (r'^([a-zA-Z_][\w.]*)(:?\n)',
773 bygroups(Generic.Error, Whitespace), '#pop'),
774 default('#pop'),
775 ],
776 'markers': [
777 # Either `PEP 657 <https://www.python.org/dev/peps/pep-0657/>`
778 # error locations in Python 3.11+, or single-caret markers
779 # for syntax errors before that.
780 (r'^( {4,})([~^]+)(\n)',
781 bygroups(Whitespace, Punctuation.Marker, Whitespace),
782 '#pop'),
783 default('#pop'),
784 ],
785 }
786
787
788Python3TracebackLexer = PythonTracebackLexer
789
790
791class Python2TracebackLexer(RegexLexer):
792 """
793 For Python tracebacks.
794
795 .. versionchanged:: 2.5
796 This class has been renamed from ``PythonTracebackLexer``.
797 ``PythonTracebackLexer`` now refers to the Python 3 variant.
798 """
799
800 name = 'Python 2.x Traceback'
801 aliases = ['py2tb']
802 filenames = ['*.py2tb']
803 mimetypes = ['text/x-python2-traceback']
804 url = 'https://python.org'
805 version_added = '0.7'
806
807 tokens = {
808 'root': [
809 # Cover both (most recent call last) and (innermost last)
810 # The optional ^C allows us to catch keyboard interrupt signals.
811 (r'^(\^C)?(Traceback.*\n)',
812 bygroups(Text, Generic.Traceback), 'intb'),
813 # SyntaxError starts with this.
814 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
815 (r'^.*\n', Other),
816 ],
817 'intb': [
818 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
819 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
820 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
821 bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
822 (r'^( )(.+)(\n)',
823 bygroups(Text, using(Python2Lexer), Whitespace), 'marker'),
824 (r'^([ \t]*)(\.\.\.)(\n)',
825 bygroups(Text, Comment, Whitespace)), # for doctests...
826 (r'^([^:]+)(: )(.+)(\n)',
827 bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
828 (r'^([a-zA-Z_]\w*)(:?\n)',
829 bygroups(Generic.Error, Whitespace), '#pop')
830 ],
831 'marker': [
832 # For syntax errors.
833 (r'( {4,})(\^)', bygroups(Text, Punctuation.Marker), '#pop'),
834 default('#pop'),
835 ],
836 }
837
838
839class CythonLexer(RegexLexer):
840 """
841 For Pyrex and Cython source code.
842 """
843
844 name = 'Cython'
845 url = 'https://cython.org'
846 aliases = ['cython', 'pyx', 'pyrex']
847 filenames = ['*.pyx', '*.pxd', '*.pxi']
848 mimetypes = ['text/x-cython', 'application/x-cython']
849 version_added = '1.1'
850
851 tokens = {
852 'root': [
853 (r'\n', Whitespace),
854 (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Whitespace, String.Doc)),
855 (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Whitespace, String.Doc)),
856 (r'[^\S\n]+', Text),
857 (r'#.*$', Comment),
858 (r'[]{}:(),;[]', Punctuation),
859 (r'\\\n', Whitespace),
860 (r'\\', Text),
861 (r'(in|is|and|or|not)\b', Operator.Word),
862 (r'(<)([a-zA-Z0-9.?]+)(>)',
863 bygroups(Punctuation, Keyword.Type, Punctuation)),
864 (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
865 (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
866 bygroups(Keyword, Number.Integer, Operator, Whitespace, Operator,
867 Name, Punctuation)),
868 include('keywords'),
869 (r'(def|property)(\s+)', bygroups(Keyword, Whitespace), 'funcname'),
870 (r'(cp?def)(\s+)', bygroups(Keyword, Whitespace), 'cdef'),
871 # (should actually start a block with only cdefs)
872 (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
873 (r'(class|cppclass|struct)(\s+)', bygroups(Keyword, Whitespace), 'classname'),
874 (r'(from)(\s+)', bygroups(Keyword, Whitespace), 'fromimport'),
875 (r'(c?import)(\s+)', bygroups(Keyword, Whitespace), 'import'),
876 include('builtins'),
877 include('backtick'),
878 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
879 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
880 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
881 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
882 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
883 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
884 ('[uU]?"', String, combined('stringescape', 'dqs')),
885 ("[uU]?'", String, combined('stringescape', 'sqs')),
886 include('name'),
887 include('numbers'),
888 ],
889 'keywords': [
890 (words((
891 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del',
892 'elif', 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
893 'global', 'if', 'include', 'lambda', 'namespace', 'new', 'noexcept','nogil',
894 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'),
895 suffix=r'\b'),
896 Keyword),
897 (words(('True', 'False', 'None', 'NULL'), suffix=r'\b'), Keyword.Constant),
898 (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
899 ],
900 'builtins': [
901 (words((
902 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint',
903 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'char', 'chr',
904 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
905 'dict', 'dir', 'divmod', 'double', 'enumerate', 'eval', 'execfile', 'exit',
906 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
907 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
908 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
909 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property',
910 'Py_ssize_t', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
911 'round', 'set', 'setattr', 'size_t', 'slice', 'sorted', 'staticmethod',
912 'ssize_t', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode',
913 'unsigned', 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
914 Name.Builtin),
915 (r'(?<!\.)(self|cls|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
916 (words((
917 'ArithmeticError', 'AssertionError', 'AttributeError',
918 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
919 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
920 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
921 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
922 'MemoryError', 'NameError', 'NotImplementedError',
923 'OSError', 'OverflowError', 'OverflowWarning',
924 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
925 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
926 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
927 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
928 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
929 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
930 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
931 Name.Exception),
932 ],
933 'numbers': [
934 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
935 (r'0\d+', Number.Oct),
936 (r'0[xX][a-fA-F0-9]+', Number.Hex),
937 (r'\d+L', Number.Integer.Long),
938 (r'\d+', Number.Integer)
939 ],
940 'backtick': [
941 ('`.*?`', String.Backtick),
942 ],
943 'name': [
944 (r'@\w+', Name.Decorator),
945 (r'[a-zA-Z_]\w*', Name),
946 ],
947 'funcname': [
948 (r'[a-zA-Z_]\w*', Name.Function, '#pop')
949 ],
950 'cdef': [
951 (r"(public|readonly|extern|api|inline|packed)\b", Keyword.Reserved),
952 (r"(struct|enum|union|class|cppclass)\b(\s+)([a-zA-Z_]\w*)",
953 bygroups(Keyword, Whitespace, Name.Class), "#pop",),
954 (r"([a-zA-Z_]\w*)(\s*)(?=\()", bygroups(Name.Function, Whitespace), "#pop"),
955 (r"([a-zA-Z_]\w*)(\s*)(?=[:,=#\n]|$)", bygroups(Name.Variable, Whitespace), "#pop"),
956 (r"([a-zA-Z_]\w*)(\s*)(,)", bygroups(Name.Variable, Whitespace, Punctuation)),
957 (r'from\b', Keyword, '#pop'),
958 (r'as\b', Keyword),
959 (r':', Punctuation, '#pop'),
960 (r'(?=["\'])', Text, '#pop'),
961 (r'[a-zA-Z_]\w*', Keyword.Type),
962 (r'.', Text),
963 ],
964 'classname': [
965 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
966 ],
967 'import': [
968 (r'(\s+)(as)(\s+)', bygroups(Whitespace, Keyword, Whitespace)),
969 (r'[a-zA-Z_][\w.]*', Name.Namespace),
970 (r'(\s*)(,)(\s*)', bygroups(Whitespace, Operator, Whitespace)),
971 default('#pop') # all else: go back
972 ],
973 'fromimport': [
974 (r'(\s+)(c?import)\b', bygroups(Whitespace, Keyword), '#pop'),
975 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
976 # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
977 default('#pop'),
978 ],
979 'stringescape': [
980 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
981 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
982 ],
983 'strings': [
984 (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
985 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
986 (r'[^\\\'"%\n]+', String),
987 # quotes, percents and backslashes must be parsed one at a time
988 (r'[\'"\\]', String),
989 # unhandled string formatting sign
990 (r'%', String)
991 # newlines are an error (use "nl" state)
992 ],
993 'nl': [
994 (r'\n', String)
995 ],
996 'dqs': [
997 (r'"', String, '#pop'),
998 (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
999 include('strings')
1000 ],
1001 'sqs': [
1002 (r"'", String, '#pop'),
1003 (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
1004 include('strings')
1005 ],
1006 'tdqs': [
1007 (r'"""', String, '#pop'),
1008 include('strings'),
1009 include('nl')
1010 ],
1011 'tsqs': [
1012 (r"'''", String, '#pop'),
1013 include('strings'),
1014 include('nl')
1015 ],
1016 }
1017
1018
1019class DgLexer(RegexLexer):
1020 """
1021 Lexer for dg,
1022 a functional and object-oriented programming language
1023 running on the CPython 3 VM.
1024 """
1025 name = 'dg'
1026 aliases = ['dg']
1027 filenames = ['*.dg']
1028 mimetypes = ['text/x-dg']
1029 url = 'http://pyos.github.io/dg'
1030 version_added = '1.6'
1031
1032 tokens = {
1033 'root': [
1034 (r'\s+', Text),
1035 (r'#.*?$', Comment.Single),
1036
1037 (r'(?i)0b[01]+', Number.Bin),
1038 (r'(?i)0o[0-7]+', Number.Oct),
1039 (r'(?i)0x[0-9a-f]+', Number.Hex),
1040 (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float),
1041 (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float),
1042 (r'(?i)[+-]?[0-9]+j?', Number.Integer),
1043
1044 (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')),
1045 (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')),
1046 (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')),
1047 (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')),
1048
1049 (r"`\w+'*`", Operator),
1050 (r'\b(and|in|is|or|where)\b', Operator.Word),
1051 (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
1052
1053 (words((
1054 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
1055 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
1056 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
1057 'super', 'tuple', 'tuple\'', 'type'),
1058 prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
1059 Name.Builtin),
1060 (words((
1061 '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
1062 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
1063 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
1064 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
1065 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
1066 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
1067 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
1068 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
1069 prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
1070 Name.Builtin),
1071 (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
1072 Name.Builtin.Pseudo),
1073
1074 (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
1075 Name.Exception),
1076 (r"(?<!\.)(Exception|GeneratorExit|KeyboardInterrupt|StopIteration|"
1077 r"SystemExit)(?!['\w])", Name.Exception),
1078
1079 (r"(?<![\w.])(except|finally|for|if|import|not|otherwise|raise|"
1080 r"subclass|while|with|yield)(?!['\w])", Keyword.Reserved),
1081
1082 (r"[A-Z_]+'*(?!['\w])", Name),
1083 (r"[A-Z]\w+'*(?!['\w])", Keyword.Type),
1084 (r"\w+'*", Name),
1085
1086 (r'[()]', Punctuation),
1087 (r'.', Error),
1088 ],
1089 'stringescape': [
1090 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
1091 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
1092 ],
1093 'string': [
1094 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
1095 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
1096 (r'[^\\\'"%\n]+', String),
1097 # quotes, percents and backslashes must be parsed one at a time
1098 (r'[\'"\\]', String),
1099 # unhandled string formatting sign
1100 (r'%', String),
1101 (r'\n', String)
1102 ],
1103 'dqs': [
1104 (r'"', String, '#pop')
1105 ],
1106 'sqs': [
1107 (r"'", String, '#pop')
1108 ],
1109 'tdqs': [
1110 (r'"""', String, '#pop')
1111 ],
1112 'tsqs': [
1113 (r"'''", String, '#pop')
1114 ],
1115 }
1116
1117
1118class NumPyLexer(PythonLexer):
1119 """
1120 A Python lexer recognizing Numerical Python builtins.
1121 """
1122
1123 name = 'NumPy'
1124 url = 'https://numpy.org/'
1125 aliases = ['numpy']
1126 version_added = '0.10'
1127
1128 # override the mimetypes to not inherit them from python
1129 mimetypes = []
1130 filenames = []
1131
1132 EXTRA_KEYWORDS = {
1133 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
1134 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
1135 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
1136 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
1137 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
1138 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
1139 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
1140 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
1141 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
1142 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
1143 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
1144 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
1145 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
1146 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
1147 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
1148 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
1149 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
1150 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
1151 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
1152 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
1153 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
1154 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
1155 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
1156 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
1157 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
1158 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
1159 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
1160 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
1161 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
1162 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
1163 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
1164 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
1165 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
1166 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
1167 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
1168 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
1169 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
1170 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
1171 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
1172 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
1173 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
1174 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
1175 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
1176 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
1177 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
1178 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
1179 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
1180 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
1181 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
1182 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
1183 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
1184 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
1185 'set_numeric_ops', 'set_printoptions', 'set_string_function',
1186 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
1187 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
1188 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
1189 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
1190 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
1191 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
1192 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
1193 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
1194 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
1195 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
1196 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
1197 }
1198
1199 def get_tokens_unprocessed(self, text):
1200 for index, token, value in \
1201 PythonLexer.get_tokens_unprocessed(self, text):
1202 if token is Name and value in self.EXTRA_KEYWORDS:
1203 yield index, Keyword.Pseudo, value
1204 else:
1205 yield index, token, value
1206
1207 def analyse_text(text):
1208 ltext = text[:1000]
1209 return (shebang_matches(text, r'pythonw?(3(\.\d)?)?') or
1210 'import ' in ltext) \
1211 and ('import numpy' in ltext or 'from numpy import' in ltext)