1"""
2 pygments.lexers.python
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Python and related languages.
6
7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import keyword
12
13from pygments.lexer import DelegatingLexer, RegexLexer, include, \
14 bygroups, using, default, words, combined, this
15from pygments.util import get_bool_opt, shebang_matches
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic, Other, Error, Whitespace
18from pygments import unistring as uni
19
20__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
21 'Python2Lexer', 'Python2TracebackLexer',
22 'CythonLexer', 'DgLexer', 'NumPyLexer']
23
24
25class PythonLexer(RegexLexer):
26 """
27 For Python source code (version 3.x).
28
29 .. versionchanged:: 2.5
30 This is now the default ``PythonLexer``. It is still available as the
31 alias ``Python3Lexer``.
32 """
33
34 name = 'Python'
35 url = 'https://www.python.org'
36 aliases = ['python', 'py', 'sage', 'python3', 'py3', 'bazel', 'starlark', 'pyi']
37 filenames = [
38 '*.py',
39 '*.pyw',
40 # Type stubs
41 '*.pyi',
42 # Jython
43 '*.jy',
44 # Sage
45 '*.sage',
46 # SCons
47 '*.sc',
48 'SConstruct',
49 'SConscript',
50 # Skylark/Starlark (used by Bazel, Buck, and Pants)
51 '*.bzl',
52 'BUCK',
53 'BUILD',
54 'BUILD.bazel',
55 'WORKSPACE',
56 # Twisted Application infrastructure
57 '*.tac',
58 # Execubot level format
59 '*.pye',
60 ]
61 mimetypes = ['text/x-python', 'application/x-python',
62 'text/x-python3', 'application/x-python3']
63 version_added = '0.10'
64
65 uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
66
67 def innerstring_rules(ttype):
68 return [
69 # the old style '%s' % (...) string formatting (still valid in Py3)
70 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
71 '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
72 # the new style '{}'.format(...) string formatting
73 (r'\{'
74 r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
75 r'(\![sra])?' # conversion
76 r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
77 r'\}', String.Interpol),
78
79 # backslashes, quotes and formatting signs must be parsed one at a time
80 (r'[^\\\'"%{\n]+', ttype),
81 (r'[\'"\\]', ttype),
82 # unhandled string formatting sign
83 (r'%|(\{{1,2})', ttype)
84 # newlines are an error (use "nl" state)
85 ]
86
87 def fstring_rules(ttype):
88 return [
89 # Assuming that a '}' is the closing brace after format specifier.
90 # Sadly, this means that we won't detect syntax error. But it's
91 # more important to parse correct syntax correctly, than to
92 # highlight invalid syntax.
93 (r'\}', String.Interpol),
94 (r'\{', String.Interpol, 'expr-inside-fstring'),
95 # backslashes, quotes and formatting signs must be parsed one at a time
96 (r'[^\\\'"{}\n]+', ttype),
97 (r'[\'"\\]', ttype),
98 # newlines are an error (use "nl" state)
99 ]
100
101 tokens = {
102 'root': [
103 (r'\n', Whitespace),
104 (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
105 bygroups(Whitespace, String.Affix, String.Doc)),
106 (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
107 bygroups(Whitespace, String.Affix, String.Doc)),
108 (r'\A#!.+$', Comment.Hashbang),
109 (r'#.*$', Comment.Single),
110 (r'\\\n', Text),
111 (r'\\', Text),
112 include('keywords'),
113 include('soft-keywords'),
114 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'funcname'),
115 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'classname'),
116 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
117 'fromimport'),
118 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
119 'import'),
120 include('expr'),
121 ],
122 'expr': [
123 # raw f-strings and t-strings
124 ('(?i)(r[ft]|[ft]r)(""")',
125 bygroups(String.Affix, String.Double),
126 combined('rfstringescape', 'tdqf')),
127 ("(?i)(r[ft]|[ft]r)(''')",
128 bygroups(String.Affix, String.Single),
129 combined('rfstringescape', 'tsqf')),
130 ('(?i)(r[ft]|[ft]r)(")',
131 bygroups(String.Affix, String.Double),
132 combined('rfstringescape', 'dqf')),
133 ("(?i)(r[ft]|[ft]r)(')",
134 bygroups(String.Affix, String.Single),
135 combined('rfstringescape', 'sqf')),
136 # non-raw f-strings and t-strings
137 ('([fFtT])(""")', bygroups(String.Affix, String.Double),
138 combined('fstringescape', 'tdqf')),
139 ("([fFtT])(''')", bygroups(String.Affix, String.Single),
140 combined('fstringescape', 'tsqf')),
141 ('([fFtT])(")', bygroups(String.Affix, String.Double),
142 combined('fstringescape', 'dqf')),
143 ("([fFtT])(')", bygroups(String.Affix, String.Single),
144 combined('fstringescape', 'sqf')),
145 # raw bytes and strings
146 ('(?i)(rb|br|r)(""")',
147 bygroups(String.Affix, String.Double), 'tdqs'),
148 ("(?i)(rb|br|r)(''')",
149 bygroups(String.Affix, String.Single), 'tsqs'),
150 ('(?i)(rb|br|r)(")',
151 bygroups(String.Affix, String.Double), 'dqs'),
152 ("(?i)(rb|br|r)(')",
153 bygroups(String.Affix, String.Single), 'sqs'),
154 # non-raw strings
155 ('([uU]?)(""")', bygroups(String.Affix, String.Double),
156 combined('stringescape', 'tdqs')),
157 ("([uU]?)(''')", bygroups(String.Affix, String.Single),
158 combined('stringescape', 'tsqs')),
159 ('([uU]?)(")', bygroups(String.Affix, String.Double),
160 combined('stringescape', 'dqs')),
161 ("([uU]?)(')", bygroups(String.Affix, String.Single),
162 combined('stringescape', 'sqs')),
163 # non-raw bytes
164 ('([bB])(""")', bygroups(String.Affix, String.Double),
165 combined('bytesescape', 'tdqs')),
166 ("([bB])(''')", bygroups(String.Affix, String.Single),
167 combined('bytesescape', 'tsqs')),
168 ('([bB])(")', bygroups(String.Affix, String.Double),
169 combined('bytesescape', 'dqs')),
170 ("([bB])(')", bygroups(String.Affix, String.Single),
171 combined('bytesescape', 'sqs')),
172
173 (r'[^\S\n]+', Text),
174 include('numbers'),
175 (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
176 (r'[]{}:(),;[]', Punctuation),
177 (r'(in|is|and|or|not)\b', Operator.Word),
178 include('expr-keywords'),
179 include('builtins'),
180 include('magicfuncs'),
181 include('magicvars'),
182 include('name'),
183 ],
184 'expr-inside-fstring': [
185 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
186 # without format specifier
187 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
188 r'(\![sraf])?' # conversion
189 r'\}', String.Interpol, '#pop'),
190 # with format specifier
191 # we'll catch the remaining '}' in the outer scope
192 (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
193 r'(\![sraf])?' # conversion
194 r':', String.Interpol, '#pop'),
195 (r'\s+', Whitespace), # allow new lines
196 include('expr'),
197 ],
198 'expr-inside-fstring-inner': [
199 (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
200 (r'[])}]', Punctuation, '#pop'),
201 (r'\s+', Whitespace), # allow new lines
202 include('expr'),
203 ],
204 'expr-keywords': [
205 # Based on https://docs.python.org/3/reference/expressions.html
206 (words((
207 'async for', 'await', 'else', 'for', 'if', 'lambda',
208 'yield', 'yield from'), suffix=r'\b'),
209 Keyword),
210 (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
211 ],
212 'keywords': [
213 (words((
214 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
215 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
216 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
217 'yield from', 'as', 'with'), suffix=r'\b'),
218 Keyword),
219 (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
220 ],
221 'soft-keywords': [
222 # `match`, `case` and `_` soft keywords
223 (r'(^[ \t]*)' # at beginning of line + possible indentation
224 r'(match|case)\b' # a possible keyword
225 r'(?![ \t]*(?:' # not followed by...
226 r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't
227 # pattern matching (but None/True/False is ok)
228 r'|'.join(k for k in keyword.kwlist if k[0].islower()) + r')\b))',
229 bygroups(Text, Keyword), 'soft-keywords-inner'),
230 ],
231 'soft-keywords-inner': [
232 # optional `_` keyword
233 (r'(\s+)([^\n_]*)(_\b)', bygroups(Whitespace, using(this), Keyword)),
234 default('#pop')
235 ],
236 'builtins': [
237 (words((
238 '__import__', 'abs', 'aiter', 'all', 'any', 'bin', 'bool', 'bytearray',
239 'breakpoint', 'bytes', 'callable', 'chr', 'classmethod', 'compile',
240 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
241 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals',
242 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance',
243 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max',
244 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow',
245 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set',
246 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
247 'tuple', 'type', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
248 Name.Builtin),
249 (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
250 (words((
251 'ArithmeticError', 'AssertionError', 'AttributeError',
252 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
253 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
254 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
255 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
256 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
257 'NotImplementedError', 'OSError', 'OverflowError',
258 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning',
259 'RuntimeError', 'RuntimeWarning', 'StopIteration',
260 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
261 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
262 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
263 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError',
264 'Warning', 'WindowsError', 'ZeroDivisionError',
265 # new builtin exceptions from PEP 3151
266 'BlockingIOError', 'ChildProcessError', 'ConnectionError',
267 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
268 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
269 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
270 'PermissionError', 'ProcessLookupError', 'TimeoutError',
271 # others new in Python 3
272 'StopAsyncIteration', 'ModuleNotFoundError', 'RecursionError',
273 'EncodingWarning'),
274 prefix=r'(?<!\.)', suffix=r'\b'),
275 Name.Exception),
276 ],
277 'magicfuncs': [
278 (words((
279 '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
280 '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
281 '__call__', '__complex__', '__contains__', '__del__', '__delattr__',
282 '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__',
283 '__eq__', '__exit__', '__float__', '__floordiv__', '__format__',
284 '__ge__', '__get__', '__getattr__', '__getattribute__',
285 '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__',
286 '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__',
287 '__imul__', '__index__', '__init__', '__instancecheck__',
288 '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__',
289 '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
290 '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__',
291 '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
292 '__new__', '__next__', '__or__', '__pos__', '__pow__',
293 '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
294 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
295 '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
296 '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
297 '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
298 '__sub__', '__subclasscheck__', '__truediv__',
299 '__xor__'), suffix=r'\b'),
300 Name.Function.Magic),
301 ],
302 'magicvars': [
303 (words((
304 '__annotations__', '__bases__', '__class__', '__closure__',
305 '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
306 '__func__', '__globals__', '__kwdefaults__', '__module__',
307 '__mro__', '__name__', '__objclass__', '__qualname__',
308 '__self__', '__slots__', '__weakref__'), suffix=r'\b'),
309 Name.Variable.Magic),
310 ],
311 'numbers': [
312 (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
313 r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
314 (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
315 (r'0[oO](?:_?[0-7])+', Number.Oct),
316 (r'0[bB](?:_?[01])+', Number.Bin),
317 (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
318 (r'\d(?:_?\d)*', Number.Integer),
319 ],
320 'name': [
321 (r'@' + uni_name, Name.Decorator),
322 (r'@', Operator), # new matrix multiplication operator
323 (uni_name, Name),
324 ],
325 'funcname': [
326 include('magicfuncs'),
327 (uni_name, Name.Function, '#pop'),
328 default('#pop'),
329 ],
330 'classname': [
331 (uni_name, Name.Class, '#pop'),
332 ],
333 'import': [
334 (r'(\s+)(as)(\s+)', bygroups(Whitespace, Keyword, Whitespace)),
335 (r'\.', Name.Namespace),
336 (uni_name, Name.Namespace),
337 (r'(\s*)(,)(\s*)', bygroups(Whitespace, Operator, Whitespace)),
338 default('#pop') # all else: go back
339 ],
340 'fromimport': [
341 (r'(\s+)(import)\b', bygroups(Whitespace, Keyword.Namespace), '#pop'),
342 (r'\.', Name.Namespace),
343 # if None occurs here, it's "raise x from None", since None can
344 # never be a module name
345 (r'None\b', Keyword.Constant, '#pop'),
346 (uni_name, Name.Namespace),
347 default('#pop'),
348 ],
349 'rfstringescape': [
350 (r'\{\{', String.Escape),
351 (r'\}\}', String.Escape),
352 ],
353 'fstringescape': [
354 include('rfstringescape'),
355 include('stringescape'),
356 ],
357 'bytesescape': [
358 (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
359 ],
360 'stringescape': [
361 (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
362 include('bytesescape')
363 ],
364 'fstrings-single': fstring_rules(String.Single),
365 'fstrings-double': fstring_rules(String.Double),
366 'strings-single': innerstring_rules(String.Single),
367 'strings-double': innerstring_rules(String.Double),
368 'dqf': [
369 (r'"', String.Double, '#pop'),
370 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
371 include('fstrings-double')
372 ],
373 'sqf': [
374 (r"'", String.Single, '#pop'),
375 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
376 include('fstrings-single')
377 ],
378 'dqs': [
379 (r'"', String.Double, '#pop'),
380 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
381 include('strings-double')
382 ],
383 'sqs': [
384 (r"'", String.Single, '#pop'),
385 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
386 include('strings-single')
387 ],
388 'tdqf': [
389 (r'"""', String.Double, '#pop'),
390 include('fstrings-double'),
391 (r'\n', String.Double)
392 ],
393 'tsqf': [
394 (r"'''", String.Single, '#pop'),
395 include('fstrings-single'),
396 (r'\n', String.Single)
397 ],
398 'tdqs': [
399 (r'"""', String.Double, '#pop'),
400 include('strings-double'),
401 (r'\n', String.Double)
402 ],
403 'tsqs': [
404 (r"'''", String.Single, '#pop'),
405 include('strings-single'),
406 (r'\n', String.Single)
407 ],
408 }
409
410 def analyse_text(text):
411 return shebang_matches(text, r'pythonw?(3(\.\d)?)?') or \
412 'import ' in text[:1000]
413
414
415Python3Lexer = PythonLexer
416
417
418class Python2Lexer(RegexLexer):
419 """
420 For Python 2.x source code.
421
422 .. versionchanged:: 2.5
423 This class has been renamed from ``PythonLexer``. ``PythonLexer`` now
424 refers to the Python 3 variant. File name patterns like ``*.py`` have
425 been moved to Python 3 as well.
426 """
427
428 name = 'Python 2.x'
429 url = 'https://www.python.org'
430 aliases = ['python2', 'py2']
431 filenames = [] # now taken over by PythonLexer (3.x)
432 mimetypes = ['text/x-python2', 'application/x-python2']
433 version_added = ''
434
435 def innerstring_rules(ttype):
436 return [
437 # the old style '%s' % (...) string formatting
438 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
439 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
440 # backslashes, quotes and formatting signs must be parsed one at a time
441 (r'[^\\\'"%\n]+', ttype),
442 (r'[\'"\\]', ttype),
443 # unhandled string formatting sign
444 (r'%', ttype),
445 # newlines are an error (use "nl" state)
446 ]
447
448 tokens = {
449 'root': [
450 (r'\n', Whitespace),
451 (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
452 bygroups(Whitespace, String.Affix, String.Doc)),
453 (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
454 bygroups(Whitespace, String.Affix, String.Doc)),
455 (r'[^\S\n]+', Text),
456 (r'\A#!.+$', Comment.Hashbang),
457 (r'#.*$', Comment.Single),
458 (r'[]{}:(),;[]', Punctuation),
459 (r'\\\n', Text),
460 (r'\\', Text),
461 (r'(in|is|and|or|not)\b', Operator.Word),
462 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
463 include('keywords'),
464 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'funcname'),
465 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Whitespace), 'classname'),
466 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
467 'fromimport'),
468 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Whitespace),
469 'import'),
470 include('builtins'),
471 include('magicfuncs'),
472 include('magicvars'),
473 include('backtick'),
474 ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
475 bygroups(String.Affix, String.Double), 'tdqs'),
476 ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
477 bygroups(String.Affix, String.Single), 'tsqs'),
478 ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
479 bygroups(String.Affix, String.Double), 'dqs'),
480 ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
481 bygroups(String.Affix, String.Single), 'sqs'),
482 ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
483 combined('stringescape', 'tdqs')),
484 ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
485 combined('stringescape', 'tsqs')),
486 ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
487 combined('stringescape', 'dqs')),
488 ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
489 combined('stringescape', 'sqs')),
490 include('name'),
491 include('numbers'),
492 ],
493 'keywords': [
494 (words((
495 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
496 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
497 'print', 'raise', 'return', 'try', 'while', 'yield',
498 'yield from', 'as', 'with'), suffix=r'\b'),
499 Keyword),
500 ],
501 'builtins': [
502 (words((
503 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
504 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
505 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
506 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
507 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
508 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
509 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
510 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
511 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
512 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
513 'unichr', 'unicode', 'vars', 'xrange', 'zip'),
514 prefix=r'(?<!\.)', suffix=r'\b'),
515 Name.Builtin),
516 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
517 r')\b', Name.Builtin.Pseudo),
518 (words((
519 'ArithmeticError', 'AssertionError', 'AttributeError',
520 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
521 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
522 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
523 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
524 'MemoryError', 'NameError',
525 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning',
526 'PendingDeprecationWarning', 'ReferenceError',
527 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
528 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
529 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
530 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
531 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
532 'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
533 Name.Exception),
534 ],
535 'magicfuncs': [
536 (words((
537 '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
538 '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
539 '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
540 '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
541 '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
542 '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
543 '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
544 '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
545 '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
546 '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
547 '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
548 '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
549 '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
550 '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
551 '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
552 '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
553 '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
554 '__unicode__', '__xor__'), suffix=r'\b'),
555 Name.Function.Magic),
556 ],
557 'magicvars': [
558 (words((
559 '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
560 '__dict__', '__doc__', '__file__', '__func__', '__globals__',
561 '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
562 '__slots__', '__weakref__'),
563 suffix=r'\b'),
564 Name.Variable.Magic),
565 ],
566 'numbers': [
567 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
568 (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
569 (r'0[0-7]+j?', Number.Oct),
570 (r'0[bB][01]+', Number.Bin),
571 (r'0[xX][a-fA-F0-9]+', Number.Hex),
572 (r'\d+L', Number.Integer.Long),
573 (r'\d+j?', Number.Integer)
574 ],
575 'backtick': [
576 ('`.*?`', String.Backtick),
577 ],
578 'name': [
579 (r'@[\w.]+', Name.Decorator),
580 (r'[a-zA-Z_]\w*', Name),
581 ],
582 'funcname': [
583 include('magicfuncs'),
584 (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
585 default('#pop'),
586 ],
587 'classname': [
588 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
589 ],
590 'import': [
591 (r'(?:[ \t]|\\\n)+', Text),
592 (r'as\b', Keyword.Namespace),
593 (r',', Operator),
594 (r'[a-zA-Z_][\w.]*', Name.Namespace),
595 default('#pop') # all else: go back
596 ],
597 'fromimport': [
598 (r'(?:[ \t]|\\\n)+', Text),
599 (r'import\b', Keyword.Namespace, '#pop'),
600 # if None occurs here, it's "raise x from None", since None can
601 # never be a module name
602 (r'None\b', Name.Builtin.Pseudo, '#pop'),
603 # sadly, in "raise x from y" y will be highlighted as namespace too
604 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
605 # anything else here also means "raise x from y" and is therefore
606 # not an error
607 default('#pop'),
608 ],
609 'stringescape': [
610 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
611 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
612 ],
613 'strings-single': innerstring_rules(String.Single),
614 'strings-double': innerstring_rules(String.Double),
615 'dqs': [
616 (r'"', String.Double, '#pop'),
617 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
618 include('strings-double')
619 ],
620 'sqs': [
621 (r"'", String.Single, '#pop'),
622 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
623 include('strings-single')
624 ],
625 'tdqs': [
626 (r'"""', String.Double, '#pop'),
627 include('strings-double'),
628 (r'\n', String.Double)
629 ],
630 'tsqs': [
631 (r"'''", String.Single, '#pop'),
632 include('strings-single'),
633 (r'\n', String.Single)
634 ],
635 }
636
637 def analyse_text(text):
638 return shebang_matches(text, r'pythonw?2(\.\d)?')
639
640
641class _PythonConsoleLexerBase(RegexLexer):
642 name = 'Python console session'
643 aliases = ['pycon', 'python-console']
644 mimetypes = ['text/x-python-doctest']
645
646 """Auxiliary lexer for `PythonConsoleLexer`.
647
648 Code tokens are output as ``Token.Other.Code``, traceback tokens as
649 ``Token.Other.Traceback``.
650 """
651 tokens = {
652 'root': [
653 (r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'),
654 # This happens, e.g., when tracebacks are embedded in documentation;
655 # trailing whitespaces are often stripped in such contexts.
656 (r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)),
657 (r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'),
658 # SyntaxError starts with this
659 (r' File "[^"]+", line \d+', Other.Traceback, 'traceback'),
660 (r'.*\n', Generic.Output),
661 ],
662 'continuations': [
663 (r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)),
664 # See above.
665 (r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)),
666 default('#pop'),
667 ],
668 'traceback': [
669 # As soon as we see a traceback, consume everything until the next
670 # >>> prompt.
671 (r'(?=>>>( |$))', Text, '#pop'),
672 (r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)),
673 (r'.*\n', Other.Traceback),
674 ],
675 }
676
677
678class PythonConsoleLexer(DelegatingLexer):
679 """
680 For Python console output or doctests, such as:
681
682 .. sourcecode:: pycon
683
684 >>> a = 'foo'
685 >>> print(a)
686 foo
687 >>> 1 / 0
688 Traceback (most recent call last):
689 File "<stdin>", line 1, in <module>
690 ZeroDivisionError: integer division or modulo by zero
691
692 Additional options:
693
694 `python3`
695 Use Python 3 lexer for code. Default is ``True``.
696
697 .. versionadded:: 1.0
698 .. versionchanged:: 2.5
699 Now defaults to ``True``.
700 """
701
702 name = 'Python console session'
703 aliases = ['pycon', 'python-console']
704 mimetypes = ['text/x-python-doctest']
705 url = 'https://python.org'
706 version_added = ''
707
708 def __init__(self, **options):
709 python3 = get_bool_opt(options, 'python3', True)
710 if python3:
711 pylexer = PythonLexer
712 tblexer = PythonTracebackLexer
713 else:
714 pylexer = Python2Lexer
715 tblexer = Python2TracebackLexer
716 # We have two auxiliary lexers. Use DelegatingLexer twice with
717 # different tokens. TODO: DelegatingLexer should support this
718 # directly, by accepting a tuplet of auxiliary lexers and a tuple of
719 # distinguishing tokens. Then we wouldn't need this intermediary
720 # class.
721 class _ReplaceInnerCode(DelegatingLexer):
722 def __init__(self, **options):
723 super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options)
724 super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options)
725
726
727class PythonTracebackLexer(RegexLexer):
728 """
729 For Python 3.x tracebacks, with support for chained exceptions.
730
731 .. versionchanged:: 2.5
732 This is now the default ``PythonTracebackLexer``. It is still available
733 as the alias ``Python3TracebackLexer``.
734 """
735
736 name = 'Python Traceback'
737 aliases = ['pytb', 'py3tb']
738 filenames = ['*.pytb', '*.py3tb']
739 mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback']
740 url = 'https://python.org'
741 version_added = '1.0'
742
743 tokens = {
744 'root': [
745 (r'\n', Whitespace),
746 (r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
747 (r'^During handling of the above exception, another '
748 r'exception occurred:\n\n', Generic.Traceback),
749 (r'^The above exception was the direct cause of the '
750 r'following exception:\n\n', Generic.Traceback),
751 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
752 (r'^.*\n', Other),
753 ],
754 'intb': [
755 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
756 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
757 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
758 bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
759 (r'^( )(.+)(\n)',
760 bygroups(Whitespace, using(PythonLexer), Whitespace), 'markers'),
761 (r'^([ \t]*)(\.\.\.)(\n)',
762 bygroups(Whitespace, Comment, Whitespace)), # for doctests...
763 (r'^([^:]+)(: )(.+)(\n)',
764 bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
765 (r'^([a-zA-Z_][\w.]*)(:?\n)',
766 bygroups(Generic.Error, Whitespace), '#pop'),
767 default('#pop'),
768 ],
769 'markers': [
770 # Either `PEP 657 <https://www.python.org/dev/peps/pep-0657/>`
771 # error locations in Python 3.11+, or single-caret markers
772 # for syntax errors before that.
773 (r'^( {4,})([~^]+)(\n)',
774 bygroups(Whitespace, Punctuation.Marker, Whitespace),
775 '#pop'),
776 default('#pop'),
777 ],
778 }
779
780
781Python3TracebackLexer = PythonTracebackLexer
782
783
784class Python2TracebackLexer(RegexLexer):
785 """
786 For Python tracebacks.
787
788 .. versionchanged:: 2.5
789 This class has been renamed from ``PythonTracebackLexer``.
790 ``PythonTracebackLexer`` now refers to the Python 3 variant.
791 """
792
793 name = 'Python 2.x Traceback'
794 aliases = ['py2tb']
795 filenames = ['*.py2tb']
796 mimetypes = ['text/x-python2-traceback']
797 url = 'https://python.org'
798 version_added = '0.7'
799
800 tokens = {
801 'root': [
802 # Cover both (most recent call last) and (innermost last)
803 # The optional ^C allows us to catch keyboard interrupt signals.
804 (r'^(\^C)?(Traceback.*\n)',
805 bygroups(Text, Generic.Traceback), 'intb'),
806 # SyntaxError starts with this.
807 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
808 (r'^.*\n', Other),
809 ],
810 'intb': [
811 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
812 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
813 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
814 bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
815 (r'^( )(.+)(\n)',
816 bygroups(Text, using(Python2Lexer), Whitespace), 'marker'),
817 (r'^([ \t]*)(\.\.\.)(\n)',
818 bygroups(Text, Comment, Whitespace)), # for doctests...
819 (r'^([^:]+)(: )(.+)(\n)',
820 bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
821 (r'^([a-zA-Z_]\w*)(:?\n)',
822 bygroups(Generic.Error, Whitespace), '#pop')
823 ],
824 'marker': [
825 # For syntax errors.
826 (r'( {4,})(\^)', bygroups(Text, Punctuation.Marker), '#pop'),
827 default('#pop'),
828 ],
829 }
830
831
832class CythonLexer(RegexLexer):
833 """
834 For Pyrex and Cython source code.
835 """
836
837 name = 'Cython'
838 url = 'https://cython.org'
839 aliases = ['cython', 'pyx', 'pyrex']
840 filenames = ['*.pyx', '*.pxd', '*.pxi']
841 mimetypes = ['text/x-cython', 'application/x-cython']
842 version_added = '1.1'
843
844 tokens = {
845 'root': [
846 (r'\n', Whitespace),
847 (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Whitespace, String.Doc)),
848 (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Whitespace, String.Doc)),
849 (r'[^\S\n]+', Text),
850 (r'#.*$', Comment),
851 (r'[]{}:(),;[]', Punctuation),
852 (r'\\\n', Whitespace),
853 (r'\\', Text),
854 (r'(in|is|and|or|not)\b', Operator.Word),
855 (r'(<)([a-zA-Z0-9.?]+)(>)',
856 bygroups(Punctuation, Keyword.Type, Punctuation)),
857 (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
858 (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
859 bygroups(Keyword, Number.Integer, Operator, Whitespace, Operator,
860 Name, Punctuation)),
861 include('keywords'),
862 (r'(def|property)(\s+)', bygroups(Keyword, Whitespace), 'funcname'),
863 (r'(cp?def)(\s+)', bygroups(Keyword, Whitespace), 'cdef'),
864 # (should actually start a block with only cdefs)
865 (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
866 (r'(class|cppclass|struct)(\s+)', bygroups(Keyword, Whitespace), 'classname'),
867 (r'(from)(\s+)', bygroups(Keyword, Whitespace), 'fromimport'),
868 (r'(c?import)(\s+)', bygroups(Keyword, Whitespace), 'import'),
869 include('builtins'),
870 include('backtick'),
871 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
872 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
873 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
874 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
875 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
876 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
877 ('[uU]?"', String, combined('stringescape', 'dqs')),
878 ("[uU]?'", String, combined('stringescape', 'sqs')),
879 include('name'),
880 include('numbers'),
881 ],
882 'keywords': [
883 (words((
884 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del',
885 'elif', 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
886 'global', 'if', 'include', 'lambda', 'namespace', 'new', 'noexcept','nogil',
887 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'),
888 suffix=r'\b'),
889 Keyword),
890 (words(('True', 'False', 'None', 'NULL'), suffix=r'\b'), Keyword.Constant),
891 (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
892 ],
893 'builtins': [
894 (words((
895 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint',
896 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'char', 'chr',
897 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
898 'dict', 'dir', 'divmod', 'double', 'enumerate', 'eval', 'execfile', 'exit',
899 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
900 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
901 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
902 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property',
903 'Py_ssize_t', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
904 'round', 'set', 'setattr', 'size_t', 'slice', 'sorted', 'staticmethod',
905 'ssize_t', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode',
906 'unsigned', 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
907 Name.Builtin),
908 (r'(?<!\.)(self|cls|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
909 (words((
910 'ArithmeticError', 'AssertionError', 'AttributeError',
911 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
912 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
913 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
914 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
915 'MemoryError', 'NameError', 'NotImplementedError',
916 'OSError', 'OverflowError', 'OverflowWarning',
917 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
918 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
919 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
920 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
921 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
922 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
923 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
924 Name.Exception),
925 ],
926 'numbers': [
927 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
928 (r'0\d+', Number.Oct),
929 (r'0[xX][a-fA-F0-9]+', Number.Hex),
930 (r'\d+L', Number.Integer.Long),
931 (r'\d+', Number.Integer)
932 ],
933 'backtick': [
934 ('`.*?`', String.Backtick),
935 ],
936 'name': [
937 (r'@\w+', Name.Decorator),
938 (r'[a-zA-Z_]\w*', Name),
939 ],
940 'funcname': [
941 (r'[a-zA-Z_]\w*', Name.Function, '#pop')
942 ],
943 'cdef': [
944 (r"(public|readonly|extern|api|inline|packed)\b", Keyword.Reserved),
945 (r"(struct|enum|union|class|cppclass)\b(\s+)([a-zA-Z_]\w*)",
946 bygroups(Keyword, Whitespace, Name.Class), "#pop",),
947 (r"([a-zA-Z_]\w*)(\s*)(?=\()", bygroups(Name.Function, Whitespace), "#pop"),
948 (r"([a-zA-Z_]\w*)(\s*)(?=[:,=#\n]|$)", bygroups(Name.Variable, Whitespace), "#pop"),
949 (r"([a-zA-Z_]\w*)(\s*)(,)", bygroups(Name.Variable, Whitespace, Punctuation)),
950 (r'from\b', Keyword, '#pop'),
951 (r'as\b', Keyword),
952 (r':', Punctuation, '#pop'),
953 (r'(?=["\'])', Text, '#pop'),
954 (r'[a-zA-Z_]\w*', Keyword.Type),
955 (r'.', Text),
956 ],
957 'classname': [
958 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
959 ],
960 'import': [
961 (r'(\s+)(as)(\s+)', bygroups(Whitespace, Keyword, Whitespace)),
962 (r'[a-zA-Z_][\w.]*', Name.Namespace),
963 (r'(\s*)(,)(\s*)', bygroups(Whitespace, Operator, Whitespace)),
964 default('#pop') # all else: go back
965 ],
966 'fromimport': [
967 (r'(\s+)(c?import)\b', bygroups(Whitespace, Keyword), '#pop'),
968 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
969 # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
970 default('#pop'),
971 ],
972 'stringescape': [
973 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
974 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
975 ],
976 'strings': [
977 (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
978 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
979 (r'[^\\\'"%\n]+', String),
980 # quotes, percents and backslashes must be parsed one at a time
981 (r'[\'"\\]', String),
982 # unhandled string formatting sign
983 (r'%', String)
984 # newlines are an error (use "nl" state)
985 ],
986 'nl': [
987 (r'\n', String)
988 ],
989 'dqs': [
990 (r'"', String, '#pop'),
991 (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
992 include('strings')
993 ],
994 'sqs': [
995 (r"'", String, '#pop'),
996 (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
997 include('strings')
998 ],
999 'tdqs': [
1000 (r'"""', String, '#pop'),
1001 include('strings'),
1002 include('nl')
1003 ],
1004 'tsqs': [
1005 (r"'''", String, '#pop'),
1006 include('strings'),
1007 include('nl')
1008 ],
1009 }
1010
1011
1012class DgLexer(RegexLexer):
1013 """
1014 Lexer for dg,
1015 a functional and object-oriented programming language
1016 running on the CPython 3 VM.
1017 """
1018 name = 'dg'
1019 aliases = ['dg']
1020 filenames = ['*.dg']
1021 mimetypes = ['text/x-dg']
1022 url = 'http://pyos.github.io/dg'
1023 version_added = '1.6'
1024
1025 tokens = {
1026 'root': [
1027 (r'\s+', Text),
1028 (r'#.*?$', Comment.Single),
1029
1030 (r'(?i)0b[01]+', Number.Bin),
1031 (r'(?i)0o[0-7]+', Number.Oct),
1032 (r'(?i)0x[0-9a-f]+', Number.Hex),
1033 (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float),
1034 (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float),
1035 (r'(?i)[+-]?[0-9]+j?', Number.Integer),
1036
1037 (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')),
1038 (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')),
1039 (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')),
1040 (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')),
1041
1042 (r"`\w+'*`", Operator),
1043 (r'\b(and|in|is|or|where)\b', Operator.Word),
1044 (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
1045
1046 (words((
1047 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
1048 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
1049 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
1050 'super', 'tuple', 'tuple\'', 'type'),
1051 prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
1052 Name.Builtin),
1053 (words((
1054 '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
1055 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
1056 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
1057 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
1058 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
1059 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
1060 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
1061 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
1062 prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
1063 Name.Builtin),
1064 (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
1065 Name.Builtin.Pseudo),
1066
1067 (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
1068 Name.Exception),
1069 (r"(?<!\.)(Exception|GeneratorExit|KeyboardInterrupt|StopIteration|"
1070 r"SystemExit)(?!['\w])", Name.Exception),
1071
1072 (r"(?<![\w.])(except|finally|for|if|import|not|otherwise|raise|"
1073 r"subclass|while|with|yield)(?!['\w])", Keyword.Reserved),
1074
1075 (r"[A-Z_]+'*(?!['\w])", Name),
1076 (r"[A-Z]\w+'*(?!['\w])", Keyword.Type),
1077 (r"\w+'*", Name),
1078
1079 (r'[()]', Punctuation),
1080 (r'.', Error),
1081 ],
1082 'stringescape': [
1083 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
1084 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
1085 ],
1086 'string': [
1087 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
1088 '[hlL]?[E-GXc-giorsux%]', String.Interpol),
1089 (r'[^\\\'"%\n]+', String),
1090 # quotes, percents and backslashes must be parsed one at a time
1091 (r'[\'"\\]', String),
1092 # unhandled string formatting sign
1093 (r'%', String),
1094 (r'\n', String)
1095 ],
1096 'dqs': [
1097 (r'"', String, '#pop')
1098 ],
1099 'sqs': [
1100 (r"'", String, '#pop')
1101 ],
1102 'tdqs': [
1103 (r'"""', String, '#pop')
1104 ],
1105 'tsqs': [
1106 (r"'''", String, '#pop')
1107 ],
1108 }
1109
1110
1111class NumPyLexer(PythonLexer):
1112 """
1113 A Python lexer recognizing Numerical Python builtins.
1114 """
1115
1116 name = 'NumPy'
1117 url = 'https://numpy.org/'
1118 aliases = ['numpy']
1119 version_added = '0.10'
1120
1121 # override the mimetypes to not inherit them from python
1122 mimetypes = []
1123 filenames = []
1124
1125 EXTRA_KEYWORDS = {
1126 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
1127 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
1128 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
1129 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
1130 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
1131 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
1132 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
1133 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
1134 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
1135 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
1136 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
1137 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
1138 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
1139 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
1140 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
1141 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
1142 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
1143 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
1144 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
1145 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
1146 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
1147 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
1148 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
1149 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
1150 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
1151 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
1152 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
1153 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
1154 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
1155 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
1156 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
1157 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
1158 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
1159 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
1160 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
1161 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
1162 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
1163 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
1164 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
1165 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
1166 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
1167 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
1168 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
1169 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
1170 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
1171 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
1172 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
1173 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
1174 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
1175 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
1176 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
1177 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
1178 'set_numeric_ops', 'set_printoptions', 'set_string_function',
1179 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
1180 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
1181 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
1182 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
1183 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
1184 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
1185 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
1186 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
1187 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
1188 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
1189 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
1190 }
1191
1192 def get_tokens_unprocessed(self, text):
1193 for index, token, value in \
1194 PythonLexer.get_tokens_unprocessed(self, text):
1195 if token is Name and value in self.EXTRA_KEYWORDS:
1196 yield index, Keyword.Pseudo, value
1197 else:
1198 yield index, token, value
1199
1200 def analyse_text(text):
1201 ltext = text[:1000]
1202 return (shebang_matches(text, r'pythonw?(3(\.\d)?)?') or
1203 'import ' in ltext) \
1204 and ('import numpy' in ltext or 'from numpy import' in ltext)