1"""
2 pygments.lexers.python
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Python and related languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import keyword
12
13from pygments.lexer import DelegatingLexer, RegexLexer, include, \
14 bygroups, using, default, words, combined, this
15from pygments.util import get_bool_opt, shebang_matches
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic, Other, Error, Whitespace
18from pygments import unistring as uni
19
20__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
21 'Python2Lexer', 'Python2TracebackLexer',
22 'CythonLexer', 'DgLexer', 'NumPyLexer']
23
24
25class PythonLexer(RegexLexer):
26 """
27 For Python source code (version 3.x).
28
29 .. versionchanged:: 2.5
30 This is now the default ``PythonLexer``. It is still available as the
31 alias ``Python3Lexer``.
32 """
33
34 name = 'Python'
35 url = 'https://www.python.org'
36 aliases = ['python', 'py', 'sage', 'python3', 'py3', 'bazel', 'starlark', 'pyi']
37 filenames = [
38 '*.py',
39 '*.pyw',
40 # Type stubs
41 '*.pyi',
42 # Jython
43 '*.jy',
44 # Sage
45 '*.sage',
46 # SCons
47 '*.sc',
48 'SConstruct',
49 'SConscript',
50 # Skylark/Starlark (used by Bazel, Buck, and Pants)
51 '*.bzl',
52 'BUCK',
53 'BUILD',
54 'BUILD.bazel',
55 'WORKSPACE',
56 # Twisted Application infrastructure
57 '*.tac',
58 ]
59 mimetypes = ['text/x-python', 'application/x-python',
60 'text/x-python3', 'application/x-python3']
61 version_added = '0.10'
62
63 uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
64
65 def innerstring_rules(ttype):
66 return [
67 # the old style '%s' % (...) string formatting (still valid in Py3)
68 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
69 '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
70 # the new style '{}'.format(...) string formatting
71 (r'\{'
72 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
73 r'(\![sra])?' # conversion
74 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
75 r'\}', String.Interpol),
76
77 # backslashes, quotes and formatting signs must be parsed one at a time
78 (r'[^\\\'"%{\n]+', ttype),
79 (r'[\'"\\]', ttype),
80 # unhandled string formatting sign
81 (r'%|(\{{1,2})', ttype)
82 # newlines are an error (use "nl" state)
83 ]
84
85 def fstring_rules(ttype):
86 return [
87 # Assuming that a '}' is the closing brace after format specifier.
88 # Sadly, this means that we won't detect syntax error. But it's
89 # more important to parse correct syntax correctly, than to
90 # highlight invalid syntax.
91 (r'\}', String.Interpol),
92 (r'\{', String.Interpol, 'expr-inside-fstring'),
93 # backslashes, quotes and formatting signs must be parsed one at a time
94 (r'[^\\\'"{}\n]+', ttype),
95 (r'[\'"\\]', ttype),
96 # newlines are an error (use "nl" state)
97 ]
98
99 tokens = {
100 'root': [
101 (r'\n', Whitespace),
102 (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
103 bygroups(Whitespace, String.Affix, String.Doc)),
104 (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
105 bygroups(Whitespace, String.Affix, String.Doc)),
106 (r'\A#!.+$', Comment.Hashbang),
107 (r'#.*$', Comment.Single),
108 (r'\\\n', Text),
109 (r'\\', Text),
110 include('keywords'),
111 include('soft-keywords'),
112 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'funcname'),
113 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'classname'),
114 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
115 'fromimport'),
116 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
117 'import'),
118 include('expr'),
119 ],
120 'expr': [
121 # raw f-strings
122 ('(?i)(rf|fr)(""")',
123 bygroups(String.Affix, String.Double),
124 combined('rfstringescape', 'tdqf')),
125 ("(?i)(rf|fr)(''')",
126 bygroups(String.Affix, String.Single),
127 combined('rfstringescape', 'tsqf')),
128 ('(?i)(rf|fr)(")',
129 bygroups(String.Affix, String.Double),
130 combined('rfstringescape', 'dqf')),
131 ("(?i)(rf|fr)(')",
132 bygroups(String.Affix, String.Single),
133 combined('rfstringescape', 'sqf')),
134 # non-raw f-strings
135 ('([fF])(""")', bygroups(String.Affix, String.Double),
136 combined('fstringescape', 'tdqf')),
137 ("([fF])(''')", bygroups(String.Affix, String.Single),
138 combined('fstringescape', 'tsqf')),
139 ('([fF])(")', bygroups(String.Affix, String.Double),
140 combined('fstringescape', 'dqf')),
141 ("([fF])(')", bygroups(String.Affix, String.Single),
142 combined('fstringescape', 'sqf')),
143 # raw bytes and strings
144 ('(?i)(rb|br|r)(""")',
145 bygroups(String.Affix, String.Double), 'tdqs'),
146 ("(?i)(rb|br|r)(''')",
147 bygroups(String.Affix, String.Single), 'tsqs'),
148 ('(?i)(rb|br|r)(")',
149 bygroups(String.Affix, String.Double), 'dqs'),
150 ("(?i)(rb|br|r)(')",
151 bygroups(String.Affix, String.Single), 'sqs'),
152 # non-raw strings
153 ('([uU]?)(""")', bygroups(String.Affix, String.Double),
154 combined('stringescape', 'tdqs')),
155 ("([uU]?)(''')", bygroups(String.Affix, String.Single),
156 combined('stringescape', 'tsqs')),
157 ('([uU]?)(")', bygroups(String.Affix, String.Double),
158 combined('stringescape', 'dqs')),
159 ("([uU]?)(')", bygroups(String.Affix, String.Single),
160 combined('stringescape', 'sqs')),
161 # non-raw bytes
162 ('([bB])(""")', bygroups(String.Affix, String.Double),
163 combined('bytesescape', 'tdqs')),
164 ("([bB])(''')", bygroups(String.Affix, String.Single),
165 combined('bytesescape', 'tsqs')),
166 ('([bB])(")', bygroups(String.Affix, String.Double),
167 combined('bytesescape', 'dqs')),
168 ("([bB])(')", bygroups(String.Affix, String.Single),
169 combined('bytesescape', 'sqs')),
170
171 (r'[^\S\n]+', Text),
172 include('numbers'),
173 (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
174 (r'[]{}:(),;[]', Punctuation),
175 (r'(in|is|and|or|not)\b', Operator.Word),
176 include('expr-keywords'),
177 include('builtins'),
178 include('magicfuncs'),
179 include('magicvars'),
180 include('name'),
181 ],
182 'expr-inside-fstring': [
183 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
184 # without format specifier
185 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
186 r'(\![sraf])?' # conversion
187 r'\}', String.Interpol, '#pop'),
188 # with format specifier
189 # we'll catch the remaining '}' in the outer scope
190 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
191 r'(\![sraf])?' # conversion
192 r':', String.Interpol, '#pop'),
193 (r'\s+', Whitespace), # allow new lines
194 include('expr'),
195 ],
196 'expr-inside-fstring-inner': [
197 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
198 (r'[])}]', Punctuation, '#pop'),
199 (r'\s+', Whitespace), # allow new lines
200 include('expr'),
201 ],
202 'expr-keywords': [
203 # Based on https://docs.python.org/3/reference/expressions.html
204 (words((
205 'async for', 'await', 'else', 'for', 'if', 'lambda',
206 'yield', 'yield from'), suffix=r'\b'),
207 Keyword),
208 (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
209 ],
210 'keywords': [
211 (words((
212 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
213 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
214 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
215 'yield from', 'as', 'with'), suffix=r'\b'),
216 Keyword),
217 (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
218 ],
219 'soft-keywords': [
220 # `match`, `case` and `_` soft keywords
221 (r'(^[ \t]*)' # at beginning of line + possible indentation
222 r'(match|case)\b' # a possible keyword
223 r'(?![ \t]*(?:' # not followed by...
224 r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't
225 # pattern matching (but None/True/False is ok)
226 r'|'.join(k for k in keyword.kwlist if k[0].islower()) + r')\b))',
227 bygroups(Text, Keyword), 'soft-keywords-inner'),
228 ],
229 'soft-keywords-inner': [
230 # optional `_` keyword
231 (r'(\s+)([^\n_]*)(_\b)', bygroups(Whitespace, using(this), Keyword)),
232 default('#pop')
233 ],
234 'builtins': [
235 (words((
236 '__import__', 'abs', 'aiter', 'all', 'any', 'bin', 'bool', 'bytearray',
237 'breakpoint', 'bytes', 'callable', 'chr', 'classmethod', 'compile',
238 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
239 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals',
240 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance',
241 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max',
242 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow',
243 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set',
244 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
245 'tuple', 'type', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
246 Name.Builtin),
247 (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
248 (words((
249 'ArithmeticError', 'AssertionError', 'AttributeError',
250 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
251 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
252 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
253 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
254 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
255 'NotImplementedError', 'OSError', 'OverflowError',
256 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning',
257 'RuntimeError', 'RuntimeWarning', 'StopIteration',
258 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
259 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
260 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
261 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError',
262 'Warning', 'WindowsError', 'ZeroDivisionError',
263 # new builtin exceptions from PEP 3151
264 'BlockingIOError', 'ChildProcessError', 'ConnectionError',
265 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
266 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
267 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
268 'PermissionError', 'ProcessLookupError', 'TimeoutError',
269 # others new in Python 3
270 'StopAsyncIteration', 'ModuleNotFoundError', 'RecursionError',
271 'EncodingWarning'),
272 prefix=r'(?<!\.)', suffix=r'\b'),
273 Name.Exception),
274 ],
275 'magicfuncs': [
276 (words((
277 '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
278 '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
279 '__call__', '__complex__', '__contains__', '__del__', '__delattr__',
280 '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__',
281 '__eq__', '__exit__', '__float__', '__floordiv__', '__format__',
282 '__ge__', '__get__', '__getattr__', '__getattribute__',
283 '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__',
284 '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__',
285 '__imul__', '__index__', '__init__', '__instancecheck__',
286 '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__',
287 '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
288 '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__',
289 '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
290 '__new__', '__next__', '__or__', '__pos__', '__pow__',
291 '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
292 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
293 '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
294 '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
295 '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
296 '__sub__', '__subclasscheck__', '__truediv__',
297 '__xor__'), suffix=r'\b'),
298 Name.Function.Magic),
299 ],
300 'magicvars': [
301 (words((
302 '__annotations__', '__bases__', '__class__', '__closure__',
303 '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
304 '__func__', '__globals__', '__kwdefaults__', '__module__',
305 '__mro__', '__name__', '__objclass__', '__qualname__',
306 '__self__', '__slots__', '__weakref__'), suffix=r'\b'),
307 Name.Variable.Magic),
308 ],
309 'numbers': [
310 (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
311 r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
312 (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
313 (r'0[oO](?:_?[0-7])+', Number.Oct),
314 (r'0[bB](?:_?[01])+', Number.Bin),
315 (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
316 (r'\d(?:_?\d)*', Number.Integer),
317 ],
318 'name': [
319 (r'@' + uni_name, Name.Decorator),
320 (r'@', Operator), # new matrix multiplication operator
321 (uni_name, Name),
322 ],
323 'funcname': [
324 include('magicfuncs'),
325 (uni_name, Name.Function, '#pop'),
326 default('#pop'),
327 ],
328 'classname': [
329 (uni_name, Name.Class, '#pop'),
330 ],
331 'import': [
332 (r'(\s+)(as)(\s+)', bygroups(Whitespace, Keyword, Whitespace)),
333 (r'\.', Name.Namespace),
334 (uni_name, Name.Namespace),
335 (r'(\s*)(,)(\s*)', bygroups(Whitespace, Operator, Whitespace)),
336 default('#pop') # all else: go back
337 ],
338 'fromimport': [
339 (r'(\s+)(import)\b', bygroups(Whitespace, Keyword.Namespace), '#pop'),
340 (r'\.', Name.Namespace),
341 # if None occurs here, it's "raise x from None", since None can
342 # never be a module name
343 (r'None\b', Keyword.Constant, '#pop'),
344 (uni_name, Name.Namespace),
345 default('#pop'),
346 ],
347 'rfstringescape': [
348 (r'\{\{', String.Escape),
349 (r'\}\}', String.Escape),
350 ],
351 'fstringescape': [
352 include('rfstringescape'),
353 include('stringescape'),
354 ],
355 'bytesescape': [
356 (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
357 ],
358 'stringescape': [
359 (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
360 include('bytesescape')
361 ],
362 'fstrings-single': fstring_rules(String.Single),
363 'fstrings-double': fstring_rules(String.Double),
364 'strings-single': innerstring_rules(String.Single),
365 'strings-double': innerstring_rules(String.Double),
366 'dqf': [
367 (r'"', String.Double, '#pop'),
368 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
369 include('fstrings-double')
370 ],
371 'sqf': [
372 (r"'", String.Single, '#pop'),
373 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
374 include('fstrings-single')
375 ],
376 'dqs': [
377 (r'"', String.Double, '#pop'),
378 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
379 include('strings-double')
380 ],
381 'sqs': [
382 (r"'", String.Single, '#pop'),
383 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
384 include('strings-single')
385 ],
386 'tdqf': [
387 (r'"""', String.Double, '#pop'),
388 include('fstrings-double'),
389 (r'\n', String.Double)
390 ],
391 'tsqf': [
392 (r"'''", String.Single, '#pop'),
393 include('fstrings-single'),
394 (r'\n', String.Single)
395 ],
396 'tdqs': [
397 (r'"""', String.Double, '#pop'),
398 include('strings-double'),
399 (r'\n', String.Double)
400 ],
401 'tsqs': [
402 (r"'''", String.Single, '#pop'),
403 include('strings-single'),
404 (r'\n', String.Single)
405 ],
406 }
407
408 def analyse_text(text):
409 return shebang_matches(text, r'pythonw?(3(\.\d)?)?') or \
410 'import ' in text[:1000]
411
412
413Python3Lexer = PythonLexer
414
415
416class Python2Lexer(RegexLexer):
417 """
418 For Python 2.x source code.
419
420 .. versionchanged:: 2.5
421 This class has been renamed from ``PythonLexer``. ``PythonLexer`` now
422 refers to the Python 3 variant. File name patterns like ``*.py`` have
423 been moved to Python 3 as well.
424 """
425
426 name = 'Python 2.x'
427 url = 'https://www.python.org'
428 aliases = ['python2', 'py2']
429 filenames = [] # now taken over by PythonLexer (3.x)
430 mimetypes = ['text/x-python2', 'application/x-python2']
431 version_added = ''
432
433 def innerstring_rules(ttype):
434 return [
435 # the old style '%s' % (...) string formatting
436 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
437 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
438 # backslashes, quotes and formatting signs must be parsed one at a time
439 (r'[^\\\'"%\n]+', ttype),
440 (r'[\'"\\]', ttype),
441 # unhandled string formatting sign
442 (r'%', ttype),
443 # newlines are an error (use "nl" state)
444 ]
445
446 tokens = {
447 'root': [
448 (r'\n', Whitespace),
449 (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
450 bygroups(Whitespace, String.Affix, String.Doc)),
451 (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
452 bygroups(Whitespace, String.Affix, String.Doc)),
453 (r'[^\S\n]+', Text),
454 (r'\A#!.+$', Comment.Hashbang),
455 (r'#.*$', Comment.Single),
456 (r'[]{}:(),;[]', Punctuation),
457 (r'\\\n', Text),
458 (r'\\', Text),
459 (r'(in|is|and|or|not)\b', Operator.Word),
460 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
461 include('keywords'),
462 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'funcname'),
463 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'classname'),
464 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
465 'fromimport'),
466 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
467 'import'),
468 include('builtins'),
469 include('magicfuncs'),
470 include('magicvars'),
471 include('backtick'),
472 ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
473 bygroups(String.Affix, String.Double), 'tdqs'),
474 ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
475 bygroups(String.Affix, String.Single), 'tsqs'),
476 ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
477 bygroups(String.Affix, String.Double), 'dqs'),
478 ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
479 bygroups(String.Affix, String.Single), 'sqs'),
480 ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
481 combined('stringescape', 'tdqs')),
482 ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
483 combined('stringescape', 'tsqs')),
484 ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
485 combined('stringescape', 'dqs')),
486 ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
487 combined('stringescape', 'sqs')),
488 include('name'),
489 include('numbers'),
490 ],
491 'keywords': [
492 (words((
493 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
494 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
495 'print', 'raise', 'return', 'try', 'while', 'yield',
496 'yield from', 'as', 'with'), suffix=r'\b'),
497 Keyword),
498 ],
499 'builtins': [
500 (words((
501 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
502 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
503 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
504 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
505 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
506 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
507 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
508 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
509 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
510 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
511 'unichr', 'unicode', 'vars', 'xrange', 'zip'),
512 prefix=r'(?<!\.)', suffix=r'\b'),
513 Name.Builtin),
514 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
515 r')\b', Name.Builtin.Pseudo),
516 (words((
517 'ArithmeticError', 'AssertionError', 'AttributeError',
518 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
519 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
520 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
521 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
522 'MemoryError', 'NameError',
523 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning',
524 'PendingDeprecationWarning', 'ReferenceError',
525 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
526 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
527 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
528 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
529 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
530 'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
531 Name.Exception),
532 ],
533 'magicfuncs': [
534 (words((
535 '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
536 '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
537 '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
538 '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
539 '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
540 '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
541 '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
542 '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
543 '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
544 '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
545 '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
546 '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
547 '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
548 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
549 '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
550 '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
551 '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
552 '__unicode__', '__xor__'), suffix=r'\b'),
553 Name.Function.Magic),
554 ],
555 'magicvars': [
556 (words((
557 '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
558 '__dict__', '__doc__', '__file__', '__func__', '__globals__',
559 '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
560 '__slots__', '__weakref__'),
561 suffix=r'\b'),
562 Name.Variable.Magic),
563 ],
564 'numbers': [
565 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
566 (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
567 (r'0[0-7]+j?', Number.Oct),
568 (r'0[bB][01]+', Number.Bin),
569 (r'0[xX][a-fA-F0-9]+', Number.Hex),
570 (r'\d+L', Number.Integer.Long),
571 (r'\d+j?', Number.Integer)
572 ],
573 'backtick': [
574 ('`.*?`', String.Backtick),
575 ],
576 'name': [
577 (r'@[\w.]+', Name.Decorator),
578 (r'[a-zA-Z_]\w*', Name),
579 ],
580 'funcname': [
581 include('magicfuncs'),
582 (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
583 default('#pop'),
584 ],
585 'classname': [
586 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
587 ],
588 'import': [
589 (r'(?:[ \t]|\\\n)+', Text),
590 (r'as\b', Keyword.Namespace),
591 (r',', Operator),
592 (r'[a-zA-Z_][\w.]*', Name.Namespace),
593 default('#pop') # all else: go back
594 ],
595 'fromimport': [
596 (r'(?:[ \t]|\\\n)+', Text),
597 (r'import\b', Keyword.Namespace, '#pop'),
598 # if None occurs here, it's "raise x from None", since None can
599 # never be a module name
600 (r'None\b', Name.Builtin.Pseudo, '#pop'),
601 # sadly, in "raise x from y" y will be highlighted as namespace too
602 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
603 # anything else here also means "raise x from y" and is therefore
604 # not an error
605 default('#pop'),
606 ],
607 'stringescape': [
608 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
609 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
610 ],
611 'strings-single': innerstring_rules(String.Single),
612 'strings-double': innerstring_rules(String.Double),
613 'dqs': [
614 (r'"', String.Double, '#pop'),
615 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
616 include('strings-double')
617 ],
618 'sqs': [
619 (r"'", String.Single, '#pop'),
620 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
621 include('strings-single')
622 ],
623 'tdqs': [
624 (r'"""', String.Double, '#pop'),
625 include('strings-double'),
626 (r'\n', String.Double)
627 ],
628 'tsqs': [
629 (r"'''", String.Single, '#pop'),
630 include('strings-single'),
631 (r'\n', String.Single)
632 ],
633 }
634
635 def analyse_text(text):
636 return shebang_matches(text, r'pythonw?2(\.\d)?')
637
638
639class _PythonConsoleLexerBase(RegexLexer):
640 name = 'Python console session'
641 aliases = ['pycon', 'python-console']
642 mimetypes = ['text/x-python-doctest']
643
644 """Auxiliary lexer for `PythonConsoleLexer`.
645
646 Code tokens are output as ``Token.Other.Code``, traceback tokens as
647 ``Token.Other.Traceback``.
648 """
649 tokens = {
650 'root': [
651 (r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'),
652 # This happens, e.g., when tracebacks are embedded in documentation;
653 # trailing whitespaces are often stripped in such contexts.
654 (r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)),
655 (r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'),
656 # SyntaxError starts with this
657 (r' File "[^"]+", line \d+', Other.Traceback, 'traceback'),
658 (r'.*\n', Generic.Output),
659 ],
660 'continuations': [
661 (r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)),
662 # See above.
663 (r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)),
664 default('#pop'),
665 ],
666 'traceback': [
667 # As soon as we see a traceback, consume everything until the next
668 # >>> prompt.
669 (r'(?=>>>( |$))', Text, '#pop'),
670 (r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)),
671 (r'.*\n', Other.Traceback),
672 ],
673 }
674
675
676class PythonConsoleLexer(DelegatingLexer):
677 """
678 For Python console output or doctests, such as:
679
680 .. sourcecode:: pycon
681
682 >>> a = 'foo'
683 >>> print(a)
684 foo
685 >>> 1 / 0
686 Traceback (most recent call last):
687 File "<stdin>", line 1, in <module>
688 ZeroDivisionError: integer division or modulo by zero
689
690 Additional options:
691
692 `python3`
693 Use Python 3 lexer for code. Default is ``True``.
694
695 .. versionadded:: 1.0
696 .. versionchanged:: 2.5
697 Now defaults to ``True``.
698 """
699
700 name = 'Python console session'
701 aliases = ['pycon', 'python-console']
702 mimetypes = ['text/x-python-doctest']
703 url = 'https://python.org'
704 version_added = ''
705
706 def __init__(self, **options):
707 python3 = get_bool_opt(options, 'python3', True)
708 if python3:
709 pylexer = PythonLexer
710 tblexer = PythonTracebackLexer
711 else:
712 pylexer = Python2Lexer
713 tblexer = Python2TracebackLexer
714 # We have two auxiliary lexers. Use DelegatingLexer twice with
715 # different tokens. TODO: DelegatingLexer should support this
716 # directly, by accepting a tuplet of auxiliary lexers and a tuple of
717 # distinguishing tokens. Then we wouldn't need this intermediary
718 # class.
719 class _ReplaceInnerCode(DelegatingLexer):
720 def __init__(self, **options):
721 super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options)
722 super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options)
723
724
725class PythonTracebackLexer(RegexLexer):
726 """
727 For Python 3.x tracebacks, with support for chained exceptions.
728
729 .. versionchanged:: 2.5
730 This is now the default ``PythonTracebackLexer``. It is still available
731 as the alias ``Python3TracebackLexer``.
732 """
733
734 name = 'Python Traceback'
735 aliases = ['pytb', 'py3tb']
736 filenames = ['*.pytb', '*.py3tb']
737 mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback']
738 url = 'https://python.org'
739 version_added = '1.0'
740
741 tokens = {
742 'root': [
743 (r'\n', Whitespace),
744 (r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
745 (r'^During handling of the above exception, another '
746 r'exception occurred:\n\n', Generic.Traceback),
747 (r'^The above exception was the direct cause of the '
748 r'following exception:\n\n', Generic.Traceback),
749 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
750 (r'^.*\n', Other),
751 ],
752 'intb': [
753 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
754 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
755 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
756 bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
757 (r'^( )(.+)(\n)',
758 bygroups(Whitespace, using(PythonLexer), Whitespace), 'markers'),
759 (r'^([ \t]*)(\.\.\.)(\n)',
760 bygroups(Whitespace, Comment, Whitespace)), # for doctests...
761 (r'^([^:]+)(: )(.+)(\n)',
762 bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
763 (r'^([a-zA-Z_][\w.]*)(:?\n)',
764 bygroups(Generic.Error, Whitespace), '#pop'),
765 default('#pop'),
766 ],
767 'markers': [
768 # Either `PEP 657 <https://www.python.org/dev/peps/pep-0657/>`
769 # error locations in Python 3.11+, or single-caret markers
770 # for syntax errors before that.
771 (r'^( {4,})([~^]+)(\n)',
772 bygroups(Whitespace, Punctuation.Marker, Whitespace),
773 '#pop'),
774 default('#pop'),
775 ],
776 }
777
778
779Python3TracebackLexer = PythonTracebackLexer
780
781
782class Python2TracebackLexer(RegexLexer):
783 """
784 For Python tracebacks.
785
786 .. versionchanged:: 2.5
787 This class has been renamed from ``PythonTracebackLexer``.
788 ``PythonTracebackLexer`` now refers to the Python 3 variant.
789 """
790
791 name = 'Python 2.x Traceback'
792 aliases = ['py2tb']
793 filenames = ['*.py2tb']
794 mimetypes = ['text/x-python2-traceback']
795 url = 'https://python.org'
796 version_added = '0.7'
797
798 tokens = {
799 'root': [
800 # Cover both (most recent call last) and (innermost last)
801 # The optional ^C allows us to catch keyboard interrupt signals.
802 (r'^(\^C)?(Traceback.*\n)',
803 bygroups(Text, Generic.Traceback), 'intb'),
804 # SyntaxError starts with this.
805 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
806 (r'^.*\n', Other),
807 ],
808 'intb': [
809 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
810 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
811 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
812 bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
813 (r'^( )(.+)(\n)',
814 bygroups(Text, using(Python2Lexer), Whitespace), 'marker'),
815 (r'^([ \t]*)(\.\.\.)(\n)',
816 bygroups(Text, Comment, Whitespace)), # for doctests...
817 (r'^([^:]+)(: )(.+)(\n)',
818 bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
819 (r'^([a-zA-Z_]\w*)(:?\n)',
820 bygroups(Generic.Error, Whitespace), '#pop')
821 ],
822 'marker': [
823 # For syntax errors.
824 (r'( {4,})(\^)', bygroups(Text, Punctuation.Marker), '#pop'),
825 default('#pop'),
826 ],
827 }
828
829
830class CythonLexer(RegexLexer):
831 """
832 For Pyrex and Cython source code.
833 """
834
835 name = 'Cython'
836 url = 'https://cython.org'
837 aliases = ['cython', 'pyx', 'pyrex']
838 filenames = ['*.pyx', '*.pxd', '*.pxi']
839 mimetypes = ['text/x-cython', 'application/x-cython']
840 version_added = '1.1'
841
842 tokens = {
843 'root': [
844 (r'\n', Whitespace),
845 (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Whitespace, String.Doc)),
846 (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Whitespace, String.Doc)),
847 (r'[^\S\n]+', Text),
848 (r'#.*$', Comment),
849 (r'[]{}:(),;[]', Punctuation),
850 (r'\\\n', Whitespace),
851 (r'\\', Text),
852 (r'(in|is|and|or|not)\b', Operator.Word),
853 (r'(<)([a-zA-Z0-9.?]+)(>)',
854 bygroups(Punctuation, Keyword.Type, Punctuation)),
855 (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
856 (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
857 bygroups(Keyword, Number.Integer, Operator, Whitespace, Operator,
858 Name, Punctuation)),
859 include('keywords'),
860 (r'(def|property)(\s+)', bygroups(Keyword, Whitespace), 'funcname'),
861 (r'(cp?def)(\s+)', bygroups(Keyword, Whitespace), 'cdef'),
862 # (should actually start a block with only cdefs)
863 (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
864 (r'(class|struct)(\s+)', bygroups(Keyword, Whitespace), 'classname'),
865 (r'(from)(\s+)', bygroups(Keyword, Whitespace), 'fromimport'),
866 (r'(c?import)(\s+)', bygroups(Keyword, Whitespace), 'import'),
867 include('builtins'),
868 include('backtick'),
869 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
870 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
871 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
872 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
873 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
874 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
875 ('[uU]?"', String, combined('stringescape', 'dqs')),
876 ("[uU]?'", String, combined('stringescape', 'sqs')),
877 include('name'),
878 include('numbers'),
879 ],
880 'keywords': [
881 (words((
882 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif',
883 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
884 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print',
885 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'),
886 Keyword),
887 (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
888 ],
889 'builtins': [
890 (words((
891 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint',
892 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr',
893 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
894 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit',
895 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
896 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
897 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
898 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'Py_ssize_t',
899 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
900 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
901 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned',
902 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
903 Name.Builtin),
904 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
905 r')\b', Name.Builtin.Pseudo),
906 (words((
907 'ArithmeticError', 'AssertionError', 'AttributeError',
908 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
909 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
910 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
911 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
912 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError',
913 'OSError', 'OverflowError', 'OverflowWarning',
914 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
915 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
916 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
917 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
918 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
919 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
920 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
921 Name.Exception),
922 ],
923 'numbers': [
924 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
925 (r'0\d+', Number.Oct),
926 (r'0[xX][a-fA-F0-9]+', Number.Hex),
927 (r'\d+L', Number.Integer.Long),
928 (r'\d+', Number.Integer)
929 ],
930 'backtick': [
931 ('`.*?`', String.Backtick),
932 ],
933 'name': [
934 (r'@\w+', Name.Decorator),
935 (r'[a-zA-Z_]\w*', Name),
936 ],
937 'funcname': [
938 (r'[a-zA-Z_]\w*', Name.Function, '#pop')
939 ],
940 'cdef': [
941 (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
942 (r'(struct|enum|union|class)\b', Keyword),
943 (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)',
944 bygroups(Name.Function, Whitespace), '#pop'),
945 (r'([a-zA-Z_]\w*)(\s*)(,)',
946 bygroups(Name.Function, Whitespace, Punctuation)),
947 (r'from\b', Keyword, '#pop'),
948 (r'as\b', Keyword),
949 (r':', Punctuation, '#pop'),
950 (r'(?=["\'])', Text, '#pop'),
951 (r'[a-zA-Z_]\w*', Keyword.Type),
952 (r'.', Text),
953 ],
954 'classname': [
955 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
956 ],
957 'import': [
958 (r'(\s+)(as)(\s+)', bygroups(Whitespace, Keyword, Whitespace)),
959 (r'[a-zA-Z_][\w.]*', Name.Namespace),
960 (r'(\s*)(,)(\s*)', bygroups(Whitespace, Operator, Whitespace)),
961 default('#pop') # all else: go back
962 ],
963 'fromimport': [
964 (r'(\s+)(c?import)\b', bygroups(Whitespace, Keyword), '#pop'),
965 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
966 # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
967 default('#pop'),
968 ],
969 'stringescape': [
970 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
971 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
972 ],
973 'strings': [
974 (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
975 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
976 (r'[^\\\'"%\n]+', String),
977 # quotes, percents and backslashes must be parsed one at a time
978 (r'[\'"\\]', String),
979 # unhandled string formatting sign
980 (r'%', String)
981 # newlines are an error (use "nl" state)
982 ],
983 'nl': [
984 (r'\n', String)
985 ],
986 'dqs': [
987 (r'"', String, '#pop'),
988 (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
989 include('strings')
990 ],
991 'sqs': [
992 (r"'", String, '#pop'),
993 (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
994 include('strings')
995 ],
996 'tdqs': [
997 (r'"""', String, '#pop'),
998 include('strings'),
999 include('nl')
1000 ],
1001 'tsqs': [
1002 (r"'''", String, '#pop'),
1003 include('strings'),
1004 include('nl')
1005 ],
1006 }
1007
1008
1009class DgLexer(RegexLexer):
1010 """
1011 Lexer for dg,
1012 a functional and object-oriented programming language
1013 running on the CPython 3 VM.
1014 """
1015 name = 'dg'
1016 aliases = ['dg']
1017 filenames = ['*.dg']
1018 mimetypes = ['text/x-dg']
1019 url = 'http://pyos.github.io/dg'
1020 version_added = '1.6'
1021
1022 tokens = {
1023 'root': [
1024 (r'\s+', Text),
1025 (r'#.*?$', Comment.Single),
1026
1027 (r'(?i)0b[01]+', Number.Bin),
1028 (r'(?i)0o[0-7]+', Number.Oct),
1029 (r'(?i)0x[0-9a-f]+', Number.Hex),
1030 (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float),
1031 (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float),
1032 (r'(?i)[+-]?[0-9]+j?', Number.Integer),
1033
1034 (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')),
1035 (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')),
1036 (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')),
1037 (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')),
1038
1039 (r"`\w+'*`", Operator),
1040 (r'\b(and|in|is|or|where)\b', Operator.Word),
1041 (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
1042
1043 (words((
1044 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
1045 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
1046 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
1047 'super', 'tuple', 'tuple\'', 'type'),
1048 prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
1049 Name.Builtin),
1050 (words((
1051 '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
1052 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
1053 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
1054 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
1055 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
1056 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
1057 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
1058 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
1059 prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
1060 Name.Builtin),
1061 (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
1062 Name.Builtin.Pseudo),
1063
1064 (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
1065 Name.Exception),
1066 (r"(?<!\.)(Exception|GeneratorExit|KeyboardInterrupt|StopIteration|"
1067 r"SystemExit)(?!['\w])", Name.Exception),
1068
1069 (r"(?<![\w.])(except|finally|for|if|import|not|otherwise|raise|"
1070 r"subclass|while|with|yield)(?!['\w])", Keyword.Reserved),
1071
1072 (r"[A-Z_]+'*(?!['\w])", Name),
1073 (r"[A-Z]\w+'*(?!['\w])", Keyword.Type),
1074 (r"\w+'*", Name),
1075
1076 (r'[()]', Punctuation),
1077 (r'.', Error),
1078 ],
1079 'stringescape': [
1080 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
1081 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
1082 ],
1083 'string': [
1084 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
1085 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
1086 (r'[^\\\'"%\n]+', String),
1087 # quotes, percents and backslashes must be parsed one at a time
1088 (r'[\'"\\]', String),
1089 # unhandled string formatting sign
1090 (r'%', String),
1091 (r'\n', String)
1092 ],
1093 'dqs': [
1094 (r'"', String, '#pop')
1095 ],
1096 'sqs': [
1097 (r"'", String, '#pop')
1098 ],
1099 'tdqs': [
1100 (r'"""', String, '#pop')
1101 ],
1102 'tsqs': [
1103 (r"'''", String, '#pop')
1104 ],
1105 }
1106
1107
1108class NumPyLexer(PythonLexer):
1109 """
1110 A Python lexer recognizing Numerical Python builtins.
1111 """
1112
1113 name = 'NumPy'
1114 url = 'https://numpy.org/'
1115 aliases = ['numpy']
1116 version_added = '0.10'
1117
1118 # override the mimetypes to not inherit them from python
1119 mimetypes = []
1120 filenames = []
1121
1122 EXTRA_KEYWORDS = {
1123 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
1124 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
1125 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
1126 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
1127 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
1128 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
1129 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
1130 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
1131 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
1132 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
1133 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
1134 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
1135 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
1136 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
1137 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
1138 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
1139 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
1140 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
1141 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
1142 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
1143 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
1144 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
1145 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
1146 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
1147 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
1148 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
1149 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
1150 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
1151 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
1152 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
1153 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
1154 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
1155 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
1156 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
1157 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
1158 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
1159 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
1160 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
1161 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
1162 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
1163 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
1164 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
1165 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
1166 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
1167 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
1168 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
1169 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
1170 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
1171 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
1172 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
1173 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
1174 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
1175 'set_numeric_ops', 'set_printoptions', 'set_string_function',
1176 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
1177 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
1178 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
1179 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
1180 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
1181 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
1182 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
1183 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
1184 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
1185 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
1186 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
1187 }
1188
1189 def get_tokens_unprocessed(self, text):
1190 for index, token, value in \
1191 PythonLexer.get_tokens_unprocessed(self, text):
1192 if token is Name and value in self.EXTRA_KEYWORDS:
1193 yield index, Keyword.Pseudo, value
1194 else:
1195 yield index, token, value
1196
1197 def analyse_text(text):
1198 ltext = text[:1000]
1199 return (shebang_matches(text, r'pythonw?(3(\.\d)?)?') or
1200 'import ' in ltext) \
1201 and ('import numpy' in ltext or 'from numpy import' in ltext)