1"""
2 pygments.lexers.jvm
3 ~~~~~~~~~~~~~~~~~~~
4
5 Pygments lexers for JVM languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
14 this, combined, default, words
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Whitespace
17from pygments.util import shebang_matches
18from pygments import unistring as uni
19
20__all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer',
21 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'ClojureScriptLexer',
22 'KotlinLexer', 'XtendLexer', 'AspectJLexer', 'CeylonLexer',
23 'PigLexer', 'GoloLexer', 'JasminLexer', 'SarlLexer']
24
25
26class JavaLexer(RegexLexer):
27 """
28 For Java source code.
29 """
30
31 name = 'Java'
32 url = 'https://www.oracle.com/technetwork/java/'
33 aliases = ['java']
34 filenames = ['*.java']
35 mimetypes = ['text/x-java']
36 version_added = ''
37
38 flags = re.MULTILINE | re.DOTALL
39
40 tokens = {
41 'root': [
42 (r'(^\s*)((?:(?:public|private|protected|static|strictfp)(?:\s+))*)(record)\b',
43 bygroups(Whitespace, using(this), Keyword.Declaration), 'class'),
44 (r'[^\S\n]+', Whitespace),
45 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
46 (r'/\*.*?\*/', Comment.Multiline),
47 # keywords: go before method names to avoid lexing "throw new XYZ"
48 # as a method signature
49 (r'(assert|break|case|catch|continue|default|do|else|finally|for|'
50 r'if|goto|instanceof|new|return|switch|this|throw|try|while)\b',
51 Keyword),
52 # method names
53 (r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>?]*\s+)+?)' # return arguments
54 r'((?:[^\W\d]|\$)[\w$]*)' # method name
55 r'(\s*)(\()', # signature start
56 bygroups(using(this), Name.Function, Whitespace, Punctuation)),
57 (r'@[^\W\d][\w.]*', Name.Decorator),
58 (r'(abstract|const|enum|extends|final|implements|native|private|'
59 r'protected|public|sealed|static|strictfp|super|synchronized|throws|'
60 r'transient|volatile|yield)\b', Keyword.Declaration),
61 (r'(boolean|byte|char|double|float|int|long|short|void)\b',
62 Keyword.Type),
63 (r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'import'),
64 (r'(true|false|null)\b', Keyword.Constant),
65 (r'(class|interface)\b', Keyword.Declaration, 'class'),
66 (r'(var)(\s+)', bygroups(Keyword.Declaration, Whitespace), 'var'),
67 (r'(import(?:\s+static)?)(\s+)', bygroups(Keyword.Namespace, Whitespace),
68 'import'),
69 (r'"""\n', String, 'multiline_string'),
70 (r'"', String, 'string'),
71 (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
72 (r'(\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Punctuation,
73 Name.Attribute)),
74 (r'^(\s*)(default)(:)', bygroups(Whitespace, Keyword, Punctuation)),
75 (r'^(\s*)((?:[^\W\d]|\$)[\w$]*)(:)', bygroups(Whitespace, Name.Label,
76 Punctuation)),
77 (r'([^\W\d]|\$)[\w$]*', Name),
78 (r'([0-9][0-9_]*\.([0-9][0-9_]*)?|'
79 r'\.[0-9][0-9_]*)'
80 r'([eE][+\-]?[0-9][0-9_]*)?[fFdD]?|'
81 r'[0-9][eE][+\-]?[0-9][0-9_]*[fFdD]?|'
82 r'[0-9]([eE][+\-]?[0-9][0-9_]*)?[fFdD]|'
83 r'0[xX]([0-9a-fA-F][0-9a-fA-F_]*\.?|'
84 r'([0-9a-fA-F][0-9a-fA-F_]*)?\.[0-9a-fA-F][0-9a-fA-F_]*)'
85 r'[pP][+\-]?[0-9][0-9_]*[fFdD]?', Number.Float),
86 (r'0[xX][0-9a-fA-F][0-9a-fA-F_]*[lL]?', Number.Hex),
87 (r'0[bB][01][01_]*[lL]?', Number.Bin),
88 (r'0[0-7_]+[lL]?', Number.Oct),
89 (r'0|[1-9][0-9_]*[lL]?', Number.Integer),
90 (r'[~^*!%&\[\]<>|+=/?-]', Operator),
91 (r'[{}();:.,]', Punctuation),
92 (r'\n', Whitespace)
93 ],
94 'class': [
95 (r'\s+', Text),
96 (r'([^\W\d]|\$)[\w$]*', Name.Class, '#pop')
97 ],
98 'var': [
99 (r'([^\W\d]|\$)[\w$]*', Name, '#pop')
100 ],
101 'import': [
102 (r'[\w.]+\*?', Name.Namespace, '#pop')
103 ],
104 'multiline_string': [
105 (r'"""', String, '#pop'),
106 (r'"', String),
107 include('string')
108 ],
109 'string': [
110 (r'[^\\"]+', String),
111 (r'\\\\', String), # Escaped backslash
112 (r'\\"', String), # Escaped quote
113 (r'\\', String), # Bare backslash
114 (r'"', String, '#pop'), # Closing quote
115 ],
116 }
117
118
119class AspectJLexer(JavaLexer):
120 """
121 For AspectJ source code.
122 """
123
124 name = 'AspectJ'
125 url = 'http://www.eclipse.org/aspectj/'
126 aliases = ['aspectj']
127 filenames = ['*.aj']
128 mimetypes = ['text/x-aspectj']
129 version_added = '1.6'
130
131 aj_keywords = {
132 'aspect', 'pointcut', 'privileged', 'call', 'execution',
133 'initialization', 'preinitialization', 'handler', 'get', 'set',
134 'staticinitialization', 'target', 'args', 'within', 'withincode',
135 'cflow', 'cflowbelow', 'annotation', 'before', 'after', 'around',
136 'proceed', 'throwing', 'returning', 'adviceexecution', 'declare',
137 'parents', 'warning', 'error', 'soft', 'precedence', 'thisJoinPoint',
138 'thisJoinPointStaticPart', 'thisEnclosingJoinPointStaticPart',
139 'issingleton', 'perthis', 'pertarget', 'percflow', 'percflowbelow',
140 'pertypewithin', 'lock', 'unlock', 'thisAspectInstance'
141 }
142 aj_inter_type = {'parents:', 'warning:', 'error:', 'soft:', 'precedence:'}
143 aj_inter_type_annotation = {'@type', '@method', '@constructor', '@field'}
144
145 def get_tokens_unprocessed(self, text):
146 for index, token, value in JavaLexer.get_tokens_unprocessed(self, text):
147 if token is Name and value in self.aj_keywords:
148 yield index, Keyword, value
149 elif token is Name.Label and value in self.aj_inter_type:
150 yield index, Keyword, value[:-1]
151 yield index, Operator, value[-1]
152 elif token is Name.Decorator and value in self.aj_inter_type_annotation:
153 yield index, Keyword, value
154 else:
155 yield index, token, value
156
157
158class ScalaLexer(RegexLexer):
159 """
160 For Scala source code.
161 """
162
163 name = 'Scala'
164 url = 'http://www.scala-lang.org'
165 aliases = ['scala']
166 filenames = ['*.scala']
167 mimetypes = ['text/x-scala']
168 version_added = ''
169
170 flags = re.MULTILINE | re.DOTALL
171
172 opchar = '[!#%&*\\-\\/:?@^' + uni.combine('Sm', 'So') + ']'
173 letter = '[_\\$' + uni.combine('Ll', 'Lu', 'Lo', 'Nl', 'Lt') + ']'
174 upperLetter = '[' + uni.combine('Lu', 'Lt') + ']'
175 letterOrDigit = f'(?:{letter}|[0-9])'
176 letterOrDigitNoDollarSign = '(?:{}|[0-9])'.format(letter.replace('\\$', ''))
177 alphaId = f'{letter}+'
178 simpleInterpolatedVariable = f'{letter}{letterOrDigitNoDollarSign}*'
179 idrest = f'{letter}{letterOrDigit}*(?:(?<=_){opchar}+)?'
180 idUpper = f'{upperLetter}{letterOrDigit}*(?:(?<=_){opchar}+)?'
181 plainid = f'(?:{idrest}|{opchar}+)'
182 backQuotedId = r'`[^`]+`'
183 anyId = rf'(?:{plainid}|{backQuotedId})'
184 notStartOfComment = r'(?!//|/\*)'
185 endOfLineMaybeWithComment = r'(?=\s*(//|$))'
186
187 keywords = (
188 'new', 'return', 'throw', 'classOf', 'isInstanceOf', 'asInstanceOf',
189 'else', 'if', 'then', 'do', 'while', 'for', 'yield', 'match', 'case',
190 'catch', 'finally', 'try'
191 )
192
193 operators = (
194 '<%', '=:=', '<:<', '<%<', '>:', '<:', '=', '==', '!=', '<=', '>=',
195 '<>', '<', '>', '<-', '←', '->', '→', '=>', '⇒', '?', '@', '|', '-',
196 '+', '*', '%', '~', '\\'
197 )
198
199 storage_modifiers = (
200 'private', 'protected', 'synchronized', '@volatile', 'abstract',
201 'final', 'lazy', 'sealed', 'implicit', 'override', '@transient',
202 '@native'
203 )
204
205 tokens = {
206 'root': [
207 include('whitespace'),
208 include('comments'),
209 include('script-header'),
210 include('imports'),
211 include('exports'),
212 include('storage-modifiers'),
213 include('annotations'),
214 include('using'),
215 include('declarations'),
216 include('inheritance'),
217 include('extension'),
218 include('end'),
219 include('constants'),
220 include('strings'),
221 include('symbols'),
222 include('singleton-type'),
223 include('inline'),
224 include('quoted'),
225 include('keywords'),
226 include('operators'),
227 include('punctuation'),
228 include('names'),
229 ],
230
231 # Includes:
232 'whitespace': [
233 (r'\s+', Whitespace),
234 ],
235 'comments': [
236 (r'//.*?\n', Comment.Single),
237 (r'/\*', Comment.Multiline, 'comment'),
238 ],
239 'script-header': [
240 (r'^#!([^\n]*)$', Comment.Hashbang),
241 ],
242 'imports': [
243 (r'\b(import)(\s+)', bygroups(Keyword, Whitespace), 'import-path'),
244 ],
245 'exports': [
246 (r'\b(export)(\s+)(given)(\s+)',
247 bygroups(Keyword, Whitespace, Keyword, Whitespace), 'export-path'),
248 (r'\b(export)(\s+)', bygroups(Keyword, Whitespace), 'export-path'),
249 ],
250 'storage-modifiers': [
251 (words(storage_modifiers, prefix=r'\b', suffix=r'\b'), Keyword),
252 # Only highlight soft modifiers if they are eventually followed by
253 # the correct keyword. Note that soft modifiers can be followed by a
254 # sequence of regular modifiers; [a-z\s]* skips those, and we just
255 # check that the soft modifier is applied to a supported statement.
256 (r'\b(transparent|opaque|infix|open|inline)\b(?=[a-z\s]*\b'
257 r'(def|val|var|given|type|class|trait|object|enum)\b)', Keyword),
258 ],
259 'annotations': [
260 (rf'@{idrest}', Name.Decorator),
261 ],
262 'using': [
263 # using is a soft keyword, can only be used in the first position of
264 # a parameter or argument list.
265 (r'(\()(\s*)(using)(\s)', bygroups(Punctuation, Whitespace, Keyword, Whitespace)),
266 ],
267 'declarations': [
268 (rf'\b(def)\b(\s*){notStartOfComment}({anyId})?',
269 bygroups(Keyword, Whitespace, Name.Function)),
270 (rf'\b(trait)\b(\s*){notStartOfComment}({anyId})?',
271 bygroups(Keyword, Whitespace, Name.Class)),
272 (rf'\b(?:(case)(\s+))?(class|object|enum)\b(\s*){notStartOfComment}({anyId})?',
273 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class)),
274 (rf'(?<!\.)\b(type)\b(\s*){notStartOfComment}({anyId})?',
275 bygroups(Keyword, Whitespace, Name.Class)),
276 (r'\b(val|var)\b', Keyword.Declaration),
277 (rf'\b(package)(\s+)(object)\b(\s*){notStartOfComment}({anyId})?',
278 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
279 (r'\b(package)(\s+)', bygroups(Keyword, Whitespace), 'package'),
280 (rf'\b(given)\b(\s*)({idUpper})',
281 bygroups(Keyword, Whitespace, Name.Class)),
282 (rf'\b(given)\b(\s*)({anyId})?',
283 bygroups(Keyword, Whitespace, Name)),
284 ],
285 'inheritance': [
286 (r'\b(extends|with|derives)\b(\s*)'
287 rf'({idUpper}|{backQuotedId}|(?=\([^\)]+=>)|(?={plainid})|(?="))?',
288 bygroups(Keyword, Whitespace, Name.Class)),
289 ],
290 'extension': [
291 (r'\b(extension)(\s+)(?=[\[\(])', bygroups(Keyword, Whitespace)),
292 ],
293 'end': [
294 # end is a soft keyword, should only be highlighted in certain cases
295 (r'\b(end)(\s+)(if|while|for|match|new|extension|val|var)\b',
296 bygroups(Keyword, Whitespace, Keyword)),
297 (rf'\b(end)(\s+)({idUpper}){endOfLineMaybeWithComment}',
298 bygroups(Keyword, Whitespace, Name.Class)),
299 (rf'\b(end)(\s+)({backQuotedId}|{plainid})?{endOfLineMaybeWithComment}',
300 bygroups(Keyword, Whitespace, Name.Namespace)),
301 ],
302 'punctuation': [
303 (r'[{}()\[\];,.]', Punctuation),
304 (r'(?<!:):(?!:)', Punctuation),
305 ],
306 'keywords': [
307 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
308 ],
309 'operators': [
310 (rf'({opchar}{{2,}})(\s+)', bygroups(Operator, Whitespace)),
311 (r'/(?![/*])', Operator),
312 (words(operators), Operator),
313 (rf'(?<!{opchar})(!|&&|\|\|)(?!{opchar})', Operator),
314 ],
315 'constants': [
316 (r'\b(this|super)\b', Name.Builtin.Pseudo),
317 (r'(true|false|null)\b', Keyword.Constant),
318 (r'0[xX][0-9a-fA-F_]*', Number.Hex),
319 (r'([0-9][0-9_]*\.[0-9][0-9_]*|\.[0-9][0-9_]*)'
320 r'([eE][+-]?[0-9][0-9_]*)?[fFdD]?', Number.Float),
321 (r'[0-9]+([eE][+-]?[0-9]+)?[fFdD]', Number.Float),
322 (r'[0-9]+([eE][+-]?[0-9]+)[fFdD]?', Number.Float),
323 (r'[0-9]+[lL]', Number.Integer.Long),
324 (r'[0-9]+', Number.Integer),
325 (r'""".*?"""(?!")', String),
326 (r'"(\\\\|\\"|[^"])*"', String),
327 (r"(')(\\.)(')", bygroups(String.Char, String.Escape, String.Char)),
328 (r"'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
329 ],
330 "strings": [
331 (r'[fs]"""', String, 'interpolated-string-triple'),
332 (r'[fs]"', String, 'interpolated-string'),
333 (r'raw"(\\\\|\\"|[^"])*"', String),
334 ],
335 'symbols': [
336 (rf"('{plainid})(?!')", String.Symbol),
337 ],
338 'singleton-type': [
339 (r'(\.)(type)\b', bygroups(Punctuation, Keyword)),
340 ],
341 'inline': [
342 # inline is a soft modifier, only highlighted if followed by if,
343 # match or parameters.
344 (rf'\b(inline)(?=\s+({plainid}|{backQuotedId})\s*:)',
345 Keyword),
346 (r'\b(inline)\b(?=(?:.(?!\b(?:val|def|given)\b))*\b(if|match)\b)',
347 Keyword),
348 ],
349 'quoted': [
350 # '{...} or ${...}
351 (r"['$]\{(?!')", Punctuation),
352 # '[...]
353 (r"'\[(?!')", Punctuation),
354 ],
355 'names': [
356 (idUpper, Name.Class),
357 (anyId, Name),
358 ],
359
360 # States
361 'comment': [
362 (r'[^/*]+', Comment.Multiline),
363 (r'/\*', Comment.Multiline, '#push'),
364 (r'\*/', Comment.Multiline, '#pop'),
365 (r'[*/]', Comment.Multiline),
366 ],
367 'import-path': [
368 (r'(?<=[\n;:])', Text, '#pop'),
369 include('comments'),
370 (r'\b(given)\b', Keyword),
371 include('qualified-name'),
372 (r'\{', Punctuation, 'import-path-curly-brace'),
373 ],
374 'import-path-curly-brace': [
375 include('whitespace'),
376 include('comments'),
377 (r'\b(given)\b', Keyword),
378 (r'=>', Operator),
379 (r'\}', Punctuation, '#pop'),
380 (r',', Punctuation),
381 (r'[\[\]]', Punctuation),
382 include('qualified-name'),
383 ],
384 'export-path': [
385 (r'(?<=[\n;:])', Text, '#pop'),
386 include('comments'),
387 include('qualified-name'),
388 (r'\{', Punctuation, 'export-path-curly-brace'),
389 ],
390 'export-path-curly-brace': [
391 include('whitespace'),
392 include('comments'),
393 (r'=>', Operator),
394 (r'\}', Punctuation, '#pop'),
395 (r',', Punctuation),
396 include('qualified-name'),
397 ],
398 'package': [
399 (r'(?<=[\n;])', Text, '#pop'),
400 (r':', Punctuation, '#pop'),
401 include('comments'),
402 include('qualified-name'),
403 ],
404 'interpolated-string-triple': [
405 (r'"""(?!")', String, '#pop'),
406 (r'"', String),
407 include('interpolated-string-common'),
408 ],
409 'interpolated-string': [
410 (r'"', String, '#pop'),
411 include('interpolated-string-common'),
412 ],
413 'interpolated-string-brace': [
414 (r'\}', String.Interpol, '#pop'),
415 (r'\{', Punctuation, 'interpolated-string-nested-brace'),
416 include('root'),
417 ],
418 'interpolated-string-nested-brace': [
419 (r'\{', Punctuation, '#push'),
420 (r'\}', Punctuation, '#pop'),
421 include('root'),
422 ],
423
424 # Helpers
425 'qualified-name': [
426 (idUpper, Name.Class),
427 (rf'({anyId})(\.)', bygroups(Name.Namespace, Punctuation)),
428 (r'\.', Punctuation),
429 (anyId, Name),
430 (r'[^\S\n]+', Whitespace),
431 ],
432 'interpolated-string-common': [
433 (r'[^"$\\]+', String),
434 (r'\$\$', String.Escape),
435 (rf'(\$)({simpleInterpolatedVariable})',
436 bygroups(String.Interpol, Name)),
437 (r'\$\{', String.Interpol, 'interpolated-string-brace'),
438 (r'\\.', String),
439 ],
440 }
441
442
443class GosuLexer(RegexLexer):
444 """
445 For Gosu source code.
446 """
447
448 name = 'Gosu'
449 aliases = ['gosu']
450 filenames = ['*.gs', '*.gsx', '*.gsp', '*.vark']
451 mimetypes = ['text/x-gosu']
452 url = 'https://gosu-lang.github.io'
453 version_added = '1.5'
454
455 flags = re.MULTILINE | re.DOTALL
456
457 tokens = {
458 'root': [
459 # method names
460 (r'^(\s*(?:[a-zA-Z_][\w.\[\]]*\s+)+?)' # modifiers etc.
461 r'([a-zA-Z_]\w*)' # method name
462 r'(\s*)(\()', # signature start
463 bygroups(using(this), Name.Function, Whitespace, Operator)),
464 (r'[^\S\n]+', Whitespace),
465 (r'//.*?\n', Comment.Single),
466 (r'/\*.*?\*/', Comment.Multiline),
467 (r'@[a-zA-Z_][\w.]*', Name.Decorator),
468 (r'(in|as|typeof|statictypeof|typeis|typeas|if|else|foreach|for|'
469 r'index|while|do|continue|break|return|try|catch|finally|this|'
470 r'throw|new|switch|case|default|eval|super|outer|classpath|'
471 r'using)\b', Keyword),
472 (r'(var|delegate|construct|function|private|internal|protected|'
473 r'public|abstract|override|final|static|extends|transient|'
474 r'implements|represents|readonly)\b', Keyword.Declaration),
475 (r'(property)(\s+)(get|set)?', bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration)),
476 (r'(boolean|byte|char|double|float|int|long|short|void|block)\b',
477 Keyword.Type),
478 (r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace)),
479 (r'(true|false|null|NaN|Infinity)\b', Keyword.Constant),
480 (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_]\w*)',
481 bygroups(Keyword.Declaration, Whitespace, Name.Class)),
482 (r'(uses)(\s+)([\w.]+\*?)',
483 bygroups(Keyword.Namespace, Whitespace, Name.Namespace)),
484 (r'"', String, 'string'),
485 (r'(\??[.#])([a-zA-Z_]\w*)',
486 bygroups(Operator, Name.Attribute)),
487 (r'(:)([a-zA-Z_]\w*)',
488 bygroups(Operator, Name.Attribute)),
489 (r'[a-zA-Z_$]\w*', Name),
490 (r'and|or|not|[\\~^*!%&\[\](){}<>|+=:;,./?-]', Operator),
491 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
492 (r'[0-9]+', Number.Integer),
493 (r'\n', Whitespace)
494 ],
495 'templateText': [
496 (r'(\\<)|(\\\$)', String),
497 (r'(<%@\s+)(extends|params)',
498 bygroups(Operator, Name.Decorator), 'stringTemplate'),
499 (r'<%!--.*?--%>', Comment.Multiline),
500 (r'(<%)|(<%=)', Operator, 'stringTemplate'),
501 (r'\$\{', Operator, 'stringTemplateShorthand'),
502 (r'.', String)
503 ],
504 'string': [
505 (r'"', String, '#pop'),
506 include('templateText')
507 ],
508 'stringTemplate': [
509 (r'"', String, 'string'),
510 (r'%>', Operator, '#pop'),
511 include('root')
512 ],
513 'stringTemplateShorthand': [
514 (r'"', String, 'string'),
515 (r'\{', Operator, 'stringTemplateShorthand'),
516 (r'\}', Operator, '#pop'),
517 include('root')
518 ],
519 }
520
521
522class GosuTemplateLexer(Lexer):
523 """
524 For Gosu templates.
525 """
526
527 name = 'Gosu Template'
528 aliases = ['gst']
529 filenames = ['*.gst']
530 mimetypes = ['text/x-gosu-template']
531 url = 'https://gosu-lang.github.io'
532 version_added = '1.5'
533
534 def get_tokens_unprocessed(self, text):
535 lexer = GosuLexer()
536 stack = ['templateText']
537 yield from lexer.get_tokens_unprocessed(text, stack)
538
539
540class GroovyLexer(RegexLexer):
541 """
542 For Groovy source code.
543 """
544
545 name = 'Groovy'
546 url = 'https://groovy-lang.org/'
547 aliases = ['groovy']
548 filenames = ['*.groovy','*.gradle']
549 mimetypes = ['text/x-groovy']
550 version_added = '1.5'
551
552 flags = re.MULTILINE | re.DOTALL
553
554 tokens = {
555 'root': [
556 # Groovy allows a file to start with a shebang
557 (r'#!(.*?)$', Comment.Preproc, 'base'),
558 default('base'),
559 ],
560 'base': [
561 (r'[^\S\n]+', Whitespace),
562 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
563 (r'/\*.*?\*/', Comment.Multiline),
564 # keywords: go before method names to avoid lexing "throw new XYZ"
565 # as a method signature
566 (r'(assert|break|case|catch|continue|default|do|else|finally|for|'
567 r'if|goto|instanceof|new|return|switch|this|throw|try|while|in|as)\b',
568 Keyword),
569 # method names
570 (r'^(\s*(?:[a-zA-Z_][\w.\[\]]*\s+)+?)' # return arguments
571 r'('
572 r'[a-zA-Z_]\w*' # method name
573 r'|"(?:\\\\|\\[^\\]|[^"\\])*"' # or double-quoted method name
574 r"|'(?:\\\\|\\[^\\]|[^'\\])*'" # or single-quoted method name
575 r')'
576 r'(\s*)(\()', # signature start
577 bygroups(using(this), Name.Function, Whitespace, Operator)),
578 (r'@[a-zA-Z_][\w.]*', Name.Decorator),
579 (r'(abstract|const|enum|extends|final|implements|native|private|'
580 r'protected|public|static|strictfp|super|synchronized|throws|'
581 r'transient|volatile)\b', Keyword.Declaration),
582 (r'(def|boolean|byte|char|double|float|int|long|short|void)\b',
583 Keyword.Type),
584 (r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace)),
585 (r'(true|false|null)\b', Keyword.Constant),
586 (r'(class|interface)(\s+)', bygroups(Keyword.Declaration, Whitespace),
587 'class'),
588 (r'(import)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'import'),
589 (r'""".*?"""', String.Double),
590 (r"'''.*?'''", String.Single),
591 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
592 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
593 (r'\$/((?!/\$).)*/\$', String),
594 (r'/(\\\\|\\[^\\]|[^/\\])*/', String),
595 (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
596 (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)),
597 (r'[a-zA-Z_]\w*:', Name.Label),
598 (r'[a-zA-Z_$]\w*', Name),
599 (r'[~^*!%&\[\](){}<>|+=:;,./?-]', Operator),
600 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
601 (r'0x[0-9a-fA-F]+', Number.Hex),
602 (r'[0-9]+L?', Number.Integer),
603 (r'\n', Whitespace)
604 ],
605 'class': [
606 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
607 ],
608 'import': [
609 (r'[\w.]+\*?', Name.Namespace, '#pop')
610 ],
611 }
612
613 def analyse_text(text):
614 return shebang_matches(text, r'groovy')
615
616
617class IokeLexer(RegexLexer):
618 """
619 For Ioke (a strongly typed, dynamic,
620 prototype based programming language) source.
621 """
622 name = 'Ioke'
623 url = 'https://ioke.org/'
624 filenames = ['*.ik']
625 aliases = ['ioke', 'ik']
626 mimetypes = ['text/x-iokesrc']
627 version_added = '1.4'
628 tokens = {
629 'interpolatableText': [
630 (r'(\\b|\\e|\\t|\\n|\\f|\\r|\\"|\\\\|\\#|\\\Z|\\u[0-9a-fA-F]{1,4}'
631 r'|\\[0-3]?[0-7]?[0-7])', String.Escape),
632 (r'#\{', Punctuation, 'textInterpolationRoot')
633 ],
634
635 'text': [
636 (r'(?<!\\)"', String, '#pop'),
637 include('interpolatableText'),
638 (r'[^"]', String)
639 ],
640
641 'documentation': [
642 (r'(?<!\\)"', String.Doc, '#pop'),
643 include('interpolatableText'),
644 (r'[^"]', String.Doc)
645 ],
646
647 'textInterpolationRoot': [
648 (r'\}', Punctuation, '#pop'),
649 include('root')
650 ],
651
652 'slashRegexp': [
653 (r'(?<!\\)/[im-psux]*', String.Regex, '#pop'),
654 include('interpolatableText'),
655 (r'\\/', String.Regex),
656 (r'[^/]', String.Regex)
657 ],
658
659 'squareRegexp': [
660 (r'(?<!\\)][im-psux]*', String.Regex, '#pop'),
661 include('interpolatableText'),
662 (r'\\]', String.Regex),
663 (r'[^\]]', String.Regex)
664 ],
665
666 'squareText': [
667 (r'(?<!\\)]', String, '#pop'),
668 include('interpolatableText'),
669 (r'[^\]]', String)
670 ],
671
672 'root': [
673 (r'\n', Whitespace),
674 (r'\s+', Whitespace),
675
676 # Comments
677 (r';(.*?)\n', Comment),
678 (r'\A#!(.*?)\n', Comment),
679
680 # Regexps
681 (r'#/', String.Regex, 'slashRegexp'),
682 (r'#r\[', String.Regex, 'squareRegexp'),
683
684 # Symbols
685 (r':[\w!:?]+', String.Symbol),
686 (r'[\w!:?]+:(?![\w!?])', String.Other),
687 (r':"(\\\\|\\[^\\]|[^"\\])*"', String.Symbol),
688
689 # Documentation
690 (r'((?<=fn\()|(?<=fnx\()|(?<=method\()|(?<=macro\()|(?<=lecro\()'
691 r'|(?<=syntax\()|(?<=dmacro\()|(?<=dlecro\()|(?<=dlecrox\()'
692 r'|(?<=dsyntax\())(\s*)"', String.Doc, 'documentation'),
693
694 # Text
695 (r'"', String, 'text'),
696 (r'#\[', String, 'squareText'),
697
698 # Mimic
699 (r'\w[\w!:?]+(?=\s*=.*mimic\s)', Name.Entity),
700
701 # Assignment
702 (r'[a-zA-Z_][\w!:?]*(?=[\s]*[+*/-]?=[^=].*($|\.))',
703 Name.Variable),
704
705 # keywords
706 (r'(break|cond|continue|do|ensure|for|for:dict|for:set|if|let|'
707 r'loop|p:for|p:for:dict|p:for:set|return|unless|until|while|'
708 r'with)(?![\w!:?])', Keyword.Reserved),
709
710 # Origin
711 (r'(eval|mimic|print|println)(?![\w!:?])', Keyword),
712
713 # Base
714 (r'(cell\?|cellNames|cellOwner\?|cellOwner|cells|cell|'
715 r'documentation|hash|identity|mimic|removeCell\!|undefineCell\!)'
716 r'(?![\w!:?])', Keyword),
717
718 # Ground
719 (r'(stackTraceAsText)(?![\w!:?])', Keyword),
720
721 # DefaultBehaviour Literals
722 (r'(dict|list|message|set)(?![\w!:?])', Keyword.Reserved),
723
724 # DefaultBehaviour Case
725 (r'(case|case:and|case:else|case:nand|case:nor|case:not|case:or|'
726 r'case:otherwise|case:xor)(?![\w!:?])', Keyword.Reserved),
727
728 # DefaultBehaviour Reflection
729 (r'(asText|become\!|derive|freeze\!|frozen\?|in\?|is\?|kind\?|'
730 r'mimic\!|mimics|mimics\?|prependMimic\!|removeAllMimics\!|'
731 r'removeMimic\!|same\?|send|thaw\!|uniqueHexId)'
732 r'(?![\w!:?])', Keyword),
733
734 # DefaultBehaviour Aspects
735 (r'(after|around|before)(?![\w!:?])', Keyword.Reserved),
736
737 # DefaultBehaviour
738 (r'(kind|cellDescriptionDict|cellSummary|genSym|inspect|notice)'
739 r'(?![\w!:?])', Keyword),
740 (r'(use|destructuring)', Keyword.Reserved),
741
742 # DefaultBehavior BaseBehavior
743 (r'(cell\?|cellOwner\?|cellOwner|cellNames|cells|cell|'
744 r'documentation|identity|removeCell!|undefineCell)'
745 r'(?![\w!:?])', Keyword),
746
747 # DefaultBehavior Internal
748 (r'(internal:compositeRegexp|internal:concatenateText|'
749 r'internal:createDecimal|internal:createNumber|'
750 r'internal:createRegexp|internal:createText)'
751 r'(?![\w!:?])', Keyword.Reserved),
752
753 # DefaultBehaviour Conditions
754 (r'(availableRestarts|bind|error\!|findRestart|handle|'
755 r'invokeRestart|rescue|restart|signal\!|warn\!)'
756 r'(?![\w!:?])', Keyword.Reserved),
757
758 # constants
759 (r'(nil|false|true)(?![\w!:?])', Name.Constant),
760
761 # names
762 (r'(Arity|Base|Call|Condition|DateTime|Aspects|Pointcut|'
763 r'Assignment|BaseBehavior|Boolean|Case|AndCombiner|Else|'
764 r'NAndCombiner|NOrCombiner|NotCombiner|OrCombiner|XOrCombiner|'
765 r'Conditions|Definitions|FlowControl|Internal|Literals|'
766 r'Reflection|DefaultMacro|DefaultMethod|DefaultSyntax|Dict|'
767 r'FileSystem|Ground|Handler|Hook|IO|IokeGround|Struct|'
768 r'LexicalBlock|LexicalMacro|List|Message|Method|Mixins|'
769 r'NativeMethod|Number|Origin|Pair|Range|Reflector|Regexp Match|'
770 r'Regexp|Rescue|Restart|Runtime|Sequence|Set|Symbol|'
771 r'System|Text|Tuple)(?![\w!:?])', Name.Builtin),
772
773 # functions
774 ('(generateMatchMethod|aliasMethod|\u03bb|\u028E|fnx|fn|method|'
775 'dmacro|dlecro|syntax|macro|dlecrox|lecrox|lecro|syntax)'
776 '(?![\\w!:?])', Name.Function),
777
778 # Numbers
779 (r'-?0[xX][0-9a-fA-F]+', Number.Hex),
780 (r'-?(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
781 (r'-?\d+', Number.Integer),
782
783 (r'#\(', Punctuation),
784
785 # Operators
786 (r'(&&>>|\|\|>>|\*\*>>|:::|::|\.\.\.|===|\*\*>|\*\*=|&&>|&&=|'
787 r'\|\|>|\|\|=|\->>|\+>>|!>>|<>>>|<>>|&>>|%>>|#>>|@>>|/>>|\*>>|'
788 r'\?>>|\|>>|\^>>|~>>|\$>>|=>>|<<=|>>=|<=>|<\->|=~|!~|=>|\+\+|'
789 r'\-\-|<=|>=|==|!=|&&|\.\.|\+=|\-=|\*=|\/=|%=|&=|\^=|\|=|<\-|'
790 r'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|'
791 r'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|'
792 r'\?|#|\u2260|\u2218|\u2208|\u2209)', Operator),
793 (r'(and|nand|or|xor|nor|return|import)(?![\w!?])',
794 Operator),
795
796 # Punctuation
797 (r'(\`\`|\`|\'\'|\'|\.|\,|@@|@|\[|\]|\(|\)|\{|\})', Punctuation),
798
799 # kinds
800 (r'[A-Z][\w!:?]*', Name.Class),
801
802 # default cellnames
803 (r'[a-z_][\w!:?]*', Name)
804 ]
805 }
806
807
808class ClojureLexer(RegexLexer):
809 """
810 Lexer for Clojure source code.
811 """
812 name = 'Clojure'
813 url = 'http://clojure.org/'
814 aliases = ['clojure', 'clj']
815 filenames = ['*.clj', '*.cljc']
816 mimetypes = ['text/x-clojure', 'application/x-clojure']
817 version_added = '0.11'
818
819 special_forms = (
820 '.', 'def', 'do', 'fn', 'if', 'let', 'new', 'quote', 'var', 'loop'
821 )
822
823 # It's safe to consider 'ns' a declaration thing because it defines a new
824 # namespace.
825 declarations = (
826 'def-', 'defn', 'defn-', 'defmacro', 'defmulti', 'defmethod',
827 'defstruct', 'defonce', 'declare', 'definline', 'definterface',
828 'defprotocol', 'defrecord', 'deftype', 'defproject', 'ns'
829 )
830
831 builtins = (
832 '*', '+', '-', '->', '/', '<', '<=', '=', '==', '>', '>=', '..',
833 'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
834 'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
835 'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
836 'aset-int', 'aset-long', 'aset-short', 'assert', 'assoc', 'await',
837 'await-for', 'bean', 'binding', 'bit-and', 'bit-not', 'bit-or',
838 'bit-shift-left', 'bit-shift-right', 'bit-xor', 'boolean', 'branch?',
839 'butlast', 'byte', 'cast', 'char', 'children', 'class',
840 'clear-agent-errors', 'comment', 'commute', 'comp', 'comparator',
841 'complement', 'concat', 'conj', 'cons', 'constantly', 'cond', 'if-not',
842 'construct-proxy', 'contains?', 'count', 'create-ns', 'create-struct',
843 'cycle', 'dec', 'deref', 'difference', 'disj', 'dissoc', 'distinct',
844 'doall', 'doc', 'dorun', 'doseq', 'dosync', 'dotimes', 'doto',
845 'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
846 'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
847 'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush', 'for',
848 'fnseq', 'frest', 'gensym', 'get-proxy-class', 'get',
849 'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
850 'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
851 'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
852 'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
853 'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
854 'lefts', 'line-seq', 'list*', 'list', 'load', 'load-file',
855 'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
856 'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
857 'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
858 'min-key', 'name', 'namespace', 'neg?', 'new', 'newline', 'next',
859 'nil?', 'node', 'not', 'not-any?', 'not-every?', 'not=', 'ns-imports',
860 'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers',
861 'ns-resolve', 'ns-unmap', 'nth', 'nthrest', 'or', 'parse', 'partial',
862 'path', 'peek', 'pop', 'pos?', 'pr', 'pr-str', 'print', 'print-str',
863 'println', 'println-str', 'prn', 'prn-str', 'project', 'proxy',
864 'proxy-mappings', 'quot', 'rand', 'rand-int', 'range', 're-find',
865 're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq',
866 'read', 'read-line', 'reduce', 'ref', 'ref-set', 'refer', 'rem',
867 'remove', 'remove-method', 'remove-ns', 'rename', 'rename-keys',
868 'repeat', 'replace', 'replicate', 'resolve', 'rest', 'resultset-seq',
869 'reverse', 'rfirst', 'right', 'rights', 'root', 'rrest', 'rseq',
870 'second', 'select', 'select-keys', 'send', 'send-off', 'seq',
871 'seq-zip', 'seq?', 'set', 'short', 'slurp', 'some', 'sort',
872 'sort-by', 'sorted-map', 'sorted-map-by', 'sorted-set',
873 'special-symbol?', 'split-at', 'split-with', 'str', 'string?',
874 'struct', 'struct-map', 'subs', 'subvec', 'symbol', 'symbol?',
875 'sync', 'take', 'take-nth', 'take-while', 'test', 'time', 'to-array',
876 'to-array-2d', 'tree-seq', 'true?', 'union', 'up', 'update-proxy',
877 'val', 'vals', 'var-get', 'var-set', 'var?', 'vector', 'vector-zip',
878 'vector?', 'when', 'when-first', 'when-let', 'when-not',
879 'with-local-vars', 'with-meta', 'with-open', 'with-out-str',
880 'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper')
881
882 # valid names for identifiers
883 # well, names can only not consist fully of numbers
884 # but this should be good enough for now
885
886 # TODO / should divide keywords/symbols into namespace/rest
887 # but that's hard, so just pretend / is part of the name
888 valid_name = r'(?!#)[\w!$%*+<=>?/.#|-]+'
889
890 tokens = {
891 'root': [
892 # the comments - always starting with semicolon
893 # and going to the end of the line
894 (r';.*$', Comment.Single),
895
896 # whitespaces - usually not relevant
897 (r',+', Text),
898 (r'\s+', Whitespace),
899
900 # numbers
901 (r'-?\d+\.\d+', Number.Float),
902 (r'-?\d+/\d+', Number),
903 (r'-?\d+', Number.Integer),
904 (r'0x-?[abcdef\d]+', Number.Hex),
905
906 # strings, symbols and characters
907 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
908 (r"'" + valid_name, String.Symbol),
909 (r"\\(.|[a-z]+)", String.Char),
910
911 # keywords
912 (r'::?#?' + valid_name, String.Symbol),
913
914 # special operators
915 (r'~@|[`\'#^~&@]', Operator),
916
917 # highlight the special forms
918 (words(special_forms, suffix=' '), Keyword),
919
920 # Technically, only the special forms are 'keywords'. The problem
921 # is that only treating them as keywords means that things like
922 # 'defn' and 'ns' need to be highlighted as builtins. This is ugly
923 # and weird for most styles. So, as a compromise we're going to
924 # highlight them as Keyword.Declarations.
925 (words(declarations, suffix=' '), Keyword.Declaration),
926
927 # highlight the builtins
928 (words(builtins, suffix=' '), Name.Builtin),
929
930 # the remaining functions
931 (r'(?<=\()' + valid_name, Name.Function),
932
933 # find the remaining variables
934 (valid_name, Name.Variable),
935
936 # Clojure accepts vector notation
937 (r'(\[|\])', Punctuation),
938
939 # Clojure accepts map notation
940 (r'(\{|\})', Punctuation),
941
942 # the famous parentheses!
943 (r'(\(|\))', Punctuation),
944 ],
945 }
946
947
948class ClojureScriptLexer(ClojureLexer):
949 """
950 Lexer for ClojureScript source code.
951 """
952 name = 'ClojureScript'
953 url = 'http://clojure.org/clojurescript'
954 aliases = ['clojurescript', 'cljs']
955 filenames = ['*.cljs']
956 mimetypes = ['text/x-clojurescript', 'application/x-clojurescript']
957 version_added = '2.0'
958
959
960class TeaLangLexer(RegexLexer):
961 """
962 For Tea source code. Only used within a
963 TeaTemplateLexer.
964
965 .. versionadded:: 1.5
966 """
967
968 flags = re.MULTILINE | re.DOTALL
969
970 tokens = {
971 'root': [
972 # method names
973 (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
974 r'([a-zA-Z_]\w*)' # method name
975 r'(\s*)(\()', # signature start
976 bygroups(using(this), Name.Function, Whitespace, Operator)),
977 (r'[^\S\n]+', Whitespace),
978 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
979 (r'/\*.*?\*/', Comment.Multiline),
980 (r'@[a-zA-Z_][\w\.]*', Name.Decorator),
981 (r'(and|break|else|foreach|if|in|not|or|reverse)\b',
982 Keyword),
983 (r'(as|call|define)\b', Keyword.Declaration),
984 (r'(true|false|null)\b', Keyword.Constant),
985 (r'(template)(\s+)', bygroups(Keyword.Declaration, Whitespace), 'template'),
986 (r'(import)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'import'),
987 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
988 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
989 (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)),
990 (r'[a-zA-Z_]\w*:', Name.Label),
991 (r'[a-zA-Z_\$]\w*', Name),
992 (r'(isa|[.]{3}|[.]{2}|[=#!<>+-/%&;,.\*\\\(\)\[\]\{\}])', Operator),
993 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
994 (r'0x[0-9a-fA-F]+', Number.Hex),
995 (r'[0-9]+L?', Number.Integer),
996 (r'\n', Whitespace)
997 ],
998 'template': [
999 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
1000 ],
1001 'import': [
1002 (r'[\w.]+\*?', Name.Namespace, '#pop')
1003 ],
1004 }
1005
1006
1007class CeylonLexer(RegexLexer):
1008 """
1009 For Ceylon source code.
1010 """
1011
1012 name = 'Ceylon'
1013 url = 'http://ceylon-lang.org/'
1014 aliases = ['ceylon']
1015 filenames = ['*.ceylon']
1016 mimetypes = ['text/x-ceylon']
1017 version_added = '1.6'
1018
1019 flags = re.MULTILINE | re.DOTALL
1020
1021 #: optional Comment or Whitespace
1022 _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
1023
1024 tokens = {
1025 'root': [
1026 # method names
1027 (r'^(\s*(?:[a-zA-Z_][\w.\[\]]*\s+)+?)' # return arguments
1028 r'([a-zA-Z_]\w*)' # method name
1029 r'(\s*)(\()', # signature start
1030 bygroups(using(this), Name.Function, Whitespace, Operator)),
1031 (r'[^\S\n]+', Whitespace),
1032 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
1033 (r'/\*', Comment.Multiline, 'comment'),
1034 (r'(shared|abstract|formal|default|actual|variable|deprecated|small|'
1035 r'late|literal|doc|by|see|throws|optional|license|tagged|final|native|'
1036 r'annotation|sealed)\b', Name.Decorator),
1037 (r'(break|case|catch|continue|else|finally|for|in|'
1038 r'if|return|switch|this|throw|try|while|is|exists|dynamic|'
1039 r'nonempty|then|outer|assert|let)\b', Keyword),
1040 (r'(abstracts|extends|satisfies|'
1041 r'super|given|of|out|assign)\b', Keyword.Declaration),
1042 (r'(function|value|void|new)\b',
1043 Keyword.Type),
1044 (r'(assembly|module|package)(\s+)', bygroups(Keyword.Namespace, Whitespace)),
1045 (r'(true|false|null)\b', Keyword.Constant),
1046 (r'(class|interface|object|alias)(\s+)',
1047 bygroups(Keyword.Declaration, Whitespace), 'class'),
1048 (r'(import)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'import'),
1049 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
1050 (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char),
1051 (r'(\.)([a-z_]\w*)',
1052 bygroups(Operator, Name.Attribute)),
1053 (r'[a-zA-Z_]\w*:', Name.Label),
1054 (r'[a-zA-Z_]\w*', Name),
1055 (r'[~^*!%&\[\](){}<>|+=:;,./?-]', Operator),
1056 (r'\d{1,3}(_\d{3})+\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float),
1057 (r'\d{1,3}(_\d{3})+\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?',
1058 Number.Float),
1059 (r'[0-9][0-9]*\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float),
1060 (r'[0-9][0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?',
1061 Number.Float),
1062 (r'#([0-9a-fA-F]{4})(_[0-9a-fA-F]{4})+', Number.Hex),
1063 (r'#[0-9a-fA-F]+', Number.Hex),
1064 (r'\$([01]{4})(_[01]{4})+', Number.Bin),
1065 (r'\$[01]+', Number.Bin),
1066 (r'\d{1,3}(_\d{3})+[kMGTP]?', Number.Integer),
1067 (r'[0-9]+[kMGTP]?', Number.Integer),
1068 (r'\n', Whitespace)
1069 ],
1070 'class': [
1071 (r'[A-Za-z_]\w*', Name.Class, '#pop')
1072 ],
1073 'import': [
1074 (r'[a-z][\w.]*',
1075 Name.Namespace, '#pop')
1076 ],
1077 'comment': [
1078 (r'[^*/]', Comment.Multiline),
1079 (r'/\*', Comment.Multiline, '#push'),
1080 (r'\*/', Comment.Multiline, '#pop'),
1081 (r'[*/]', Comment.Multiline)
1082 ],
1083 }
1084
1085
1086class KotlinLexer(RegexLexer):
1087 """
1088 For Kotlin source code.
1089 """
1090
1091 name = 'Kotlin'
1092 url = 'http://kotlinlang.org/'
1093 aliases = ['kotlin']
1094 filenames = ['*.kt', '*.kts']
1095 mimetypes = ['text/x-kotlin']
1096 version_added = '1.5'
1097
1098 flags = re.MULTILINE | re.DOTALL
1099
1100 kt_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
1101 '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf',
1102 'Mn', 'Mc') + ']*')
1103
1104 kt_space_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
1105 '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf',
1106 'Mn', 'Mc', 'Zs')
1107 + r'\'~!%^&*()+=|\[\]:;,.<>/\?-]*')
1108
1109 kt_id = '(' + kt_name + '|`' + kt_space_name + '`)'
1110
1111 modifiers = (r'actual|abstract|annotation|companion|const|crossinline|'
1112 r'data|enum|expect|external|final|infix|inline|inner|'
1113 r'internal|lateinit|noinline|open|operator|override|private|'
1114 r'protected|public|sealed|suspend|tailrec|value')
1115
1116 tokens = {
1117 'root': [
1118 # Whitespaces
1119 (r'[^\S\n]+', Whitespace),
1120 (r'\s+', Whitespace),
1121 (r'\\$', String.Escape), # line continuation
1122 (r'\n', Whitespace),
1123 # Comments
1124 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
1125 (r'^(#!/.+?)(\n)', bygroups(Comment.Single, Whitespace)), # shebang for kotlin scripts
1126 (r'/[*].*?[*]/', Comment.Multiline),
1127 # Keywords
1128 (r'as\?', Keyword),
1129 (r'(as|break|by|catch|constructor|continue|do|dynamic|else|finally|'
1130 r'get|for|if|init|[!]*in|[!]*is|out|reified|return|set|super|this|'
1131 r'throw|try|typealias|typeof|vararg|when|where|while)\b', Keyword),
1132 (r'it\b', Name.Builtin),
1133 # Built-in types
1134 (words(('Boolean?', 'Byte?', 'Char?', 'Double?', 'Float?',
1135 'Int?', 'Long?', 'Short?', 'String?', 'Any?', 'Unit?')), Keyword.Type),
1136 (words(('Boolean', 'Byte', 'Char', 'Double', 'Float',
1137 'Int', 'Long', 'Short', 'String', 'Any', 'Unit'), suffix=r'\b'), Keyword.Type),
1138 # Constants
1139 (r'(true|false|null)\b', Keyword.Constant),
1140 # Imports
1141 (r'(package|import)(\s+)(\S+)', bygroups(Keyword, Whitespace, Name.Namespace)),
1142 # Dot access
1143 (r'(\?\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Operator, Name.Attribute)),
1144 (r'(\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Punctuation, Name.Attribute)),
1145 # Annotations
1146 (r'@[^\W\d][\w.]*', Name.Decorator),
1147 # Labels
1148 (r'[^\W\d][\w.]+@', Name.Decorator),
1149 # Object expression
1150 (r'(object)(\s+)(:)(\s+)', bygroups(Keyword, Whitespace, Punctuation, Whitespace), 'class'),
1151 # Types
1152 (r'((?:(?:' + modifiers + r'|fun)\s+)*)(class|interface|object)(\s+)',
1153 bygroups(using(this, state='modifiers'), Keyword.Declaration, Whitespace), 'class'),
1154 # Variables
1155 (r'(var|val)(\s+)(\()', bygroups(Keyword.Declaration, Whitespace, Punctuation),
1156 'destructuring_assignment'),
1157 (r'((?:(?:' + modifiers + r')\s+)*)(var|val)(\s+)',
1158 bygroups(using(this, state='modifiers'), Keyword.Declaration, Whitespace), 'variable'),
1159 # Functions
1160 (r'((?:(?:' + modifiers + r')\s+)*)(fun)(\s+)',
1161 bygroups(using(this, state='modifiers'), Keyword.Declaration, Whitespace), 'function'),
1162 # Operators
1163 (r'::|!!|\?[:.]', Operator),
1164 (r'[~^*!%&\[\]<>|+=/?-]', Operator),
1165 # Punctuation
1166 (r'[{}();:.,]', Punctuation),
1167 # Strings
1168 (r'"""', String, 'multiline_string'),
1169 (r'"', String, 'string'),
1170 (r"'\\.'|'[^\\]'", String.Char),
1171 # Numbers
1172 (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFL]?|"
1173 r"0[xX][0-9a-fA-F]+[Ll]?", Number),
1174 # Identifiers
1175 (r'' + kt_id + r'((\?[^.])?)', Name) # additionally handle nullable types
1176 ],
1177 'class': [
1178 (kt_id, Name.Class, '#pop')
1179 ],
1180 'variable': [
1181 (kt_id, Name.Variable, '#pop')
1182 ],
1183 'destructuring_assignment': [
1184 (r',', Punctuation),
1185 (r'\s+', Whitespace),
1186 (kt_id, Name.Variable),
1187 (r'(:)(\s+)(' + kt_id + ')', bygroups(Punctuation, Whitespace, Name)),
1188 (r'<', Operator, 'generic'),
1189 (r'\)', Punctuation, '#pop')
1190 ],
1191 'function': [
1192 (r'<', Operator, 'generic'),
1193 (r'' + kt_id + r'(\.)' + kt_id, bygroups(Name, Punctuation, Name.Function), '#pop'),
1194 (kt_id, Name.Function, '#pop')
1195 ],
1196 'generic': [
1197 (r'(>)(\s*)', bygroups(Operator, Whitespace), '#pop'),
1198 (r':', Punctuation),
1199 (r'(reified|out|in)\b', Keyword),
1200 (r',', Punctuation),
1201 (r'\s+', Whitespace),
1202 (kt_id, Name)
1203 ],
1204 'modifiers': [
1205 (r'\w+', Keyword.Declaration),
1206 (r'\s+', Whitespace),
1207 default('#pop')
1208 ],
1209 'string': [
1210 (r'"', String, '#pop'),
1211 include('string_common')
1212 ],
1213 'multiline_string': [
1214 (r'"""', String, '#pop'),
1215 (r'"', String),
1216 include('string_common')
1217 ],
1218 'string_common': [
1219 (r'\\\\', String), # escaped backslash
1220 (r'\\"', String), # escaped quote
1221 (r'\\', String), # bare backslash
1222 (r'\$\{', String.Interpol, 'interpolation'),
1223 (r'(\$)(\w+)', bygroups(String.Interpol, Name)),
1224 (r'[^\\"$]+', String)
1225 ],
1226 'interpolation': [
1227 (r'"', String),
1228 (r'\$\{', String.Interpol, 'interpolation'),
1229 (r'\{', Punctuation, 'scope'),
1230 (r'\}', String.Interpol, '#pop'),
1231 include('root')
1232 ],
1233 'scope': [
1234 (r'\{', Punctuation, 'scope'),
1235 (r'\}', Punctuation, '#pop'),
1236 include('root')
1237 ]
1238 }
1239
1240
1241class XtendLexer(RegexLexer):
1242 """
1243 For Xtend source code.
1244 """
1245
1246 name = 'Xtend'
1247 url = 'https://www.eclipse.org/xtend/'
1248 aliases = ['xtend']
1249 filenames = ['*.xtend']
1250 mimetypes = ['text/x-xtend']
1251 version_added = '1.6'
1252
1253 flags = re.MULTILINE | re.DOTALL
1254
1255 tokens = {
1256 'root': [
1257 # method names
1258 (r'^(\s*(?:[a-zA-Z_][\w.\[\]]*\s+)+?)' # return arguments
1259 r'([a-zA-Z_$][\w$]*)' # method name
1260 r'(\s*)(\()', # signature start
1261 bygroups(using(this), Name.Function, Whitespace, Operator)),
1262 (r'[^\S\n]+', Whitespace),
1263 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
1264 (r'/\*.*?\*/', Comment.Multiline),
1265 (r'@[a-zA-Z_][\w.]*', Name.Decorator),
1266 (r'(assert|break|case|catch|continue|default|do|else|finally|for|'
1267 r'if|goto|instanceof|new|return|switch|this|throw|try|while|IF|'
1268 r'ELSE|ELSEIF|ENDIF|FOR|ENDFOR|SEPARATOR|BEFORE|AFTER)\b',
1269 Keyword),
1270 (r'(def|abstract|const|enum|extends|final|implements|native|private|'
1271 r'protected|public|static|strictfp|super|synchronized|throws|'
1272 r'transient|volatile|val|var)\b', Keyword.Declaration),
1273 (r'(boolean|byte|char|double|float|int|long|short|void)\b',
1274 Keyword.Type),
1275 (r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace)),
1276 (r'(true|false|null)\b', Keyword.Constant),
1277 (r'(class|interface)(\s+)', bygroups(Keyword.Declaration, Whitespace),
1278 'class'),
1279 (r'(import)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'import'),
1280 (r"(''')", String, 'template'),
1281 (r'(\u00BB)', String, 'template'),
1282 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
1283 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
1284 (r'[a-zA-Z_]\w*:', Name.Label),
1285 (r'[a-zA-Z_$]\w*', Name),
1286 (r'[~^*!%&\[\](){}<>\|+=:;,./?-]', Operator),
1287 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
1288 (r'0x[0-9a-fA-F]+', Number.Hex),
1289 (r'[0-9]+L?', Number.Integer),
1290 (r'\n', Whitespace)
1291 ],
1292 'class': [
1293 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
1294 ],
1295 'import': [
1296 (r'[\w.]+\*?', Name.Namespace, '#pop')
1297 ],
1298 'template': [
1299 (r"'''", String, '#pop'),
1300 (r'\u00AB', String, '#pop'),
1301 (r'.', String)
1302 ],
1303 }
1304
1305
1306class PigLexer(RegexLexer):
1307 """
1308 For Pig Latin source code.
1309 """
1310
1311 name = 'Pig'
1312 url = 'https://pig.apache.org/'
1313 aliases = ['pig']
1314 filenames = ['*.pig']
1315 mimetypes = ['text/x-pig']
1316 version_added = '2.0'
1317
1318 flags = re.MULTILINE | re.IGNORECASE
1319
1320 tokens = {
1321 'root': [
1322 (r'\s+', Whitespace),
1323 (r'--.*', Comment),
1324 (r'/\*[\w\W]*?\*/', Comment.Multiline),
1325 (r'\\$', String.Escape),
1326 (r'\\', Text),
1327 (r'\'(?:\\[ntbrf\\\']|\\u[0-9a-f]{4}|[^\'\\\n\r])*\'', String),
1328 include('keywords'),
1329 include('types'),
1330 include('builtins'),
1331 include('punct'),
1332 include('operators'),
1333 (r'[0-9]*\.[0-9]+(e[0-9]+)?[fd]?', Number.Float),
1334 (r'0x[0-9a-f]+', Number.Hex),
1335 (r'[0-9]+L?', Number.Integer),
1336 (r'\n', Whitespace),
1337 (r'([a-z_]\w*)(\s*)(\()',
1338 bygroups(Name.Function, Whitespace, Punctuation)),
1339 (r'[()#:]', Text),
1340 (r'[^(:#\'")\s]+', Text),
1341 (r'\S+\s+', Text) # TODO: make tests pass without \s+
1342 ],
1343 'keywords': [
1344 (r'(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|'
1345 r'%declare|%default|define|dense|desc|describe|distinct|du|dump|'
1346 r'eval|exex|explain|filter|flatten|foreach|full|generate|group|'
1347 r'help|if|illustrate|import|inner|input|into|is|join|kill|left|'
1348 r'limit|load|ls|map|matches|mkdir|mv|not|null|onschema|or|order|'
1349 r'outer|output|parallel|pig|pwd|quit|register|returns|right|rm|'
1350 r'rmf|rollup|run|sample|set|ship|split|stderr|stdin|stdout|store|'
1351 r'stream|through|union|using|void)\b', Keyword)
1352 ],
1353 'builtins': [
1354 (r'(AVG|BinStorage|cogroup|CONCAT|copyFromLocal|copyToLocal|COUNT|'
1355 r'cross|DIFF|MAX|MIN|PigDump|PigStorage|SIZE|SUM|TextLoader|'
1356 r'TOKENIZE)\b', Name.Builtin)
1357 ],
1358 'types': [
1359 (r'(bytearray|BIGINTEGER|BIGDECIMAL|chararray|datetime|double|float|'
1360 r'int|long|tuple)\b', Keyword.Type)
1361 ],
1362 'punct': [
1363 (r'[;(){}\[\]]', Punctuation),
1364 ],
1365 'operators': [
1366 (r'[#=,./%+\-?]', Operator),
1367 (r'(eq|gt|lt|gte|lte|neq|matches)\b', Operator),
1368 (r'(==|<=|<|>=|>|!=)', Operator),
1369 ],
1370 }
1371
1372
1373class GoloLexer(RegexLexer):
1374 """
1375 For Golo source code.
1376 """
1377
1378 name = 'Golo'
1379 url = 'http://golo-lang.org/'
1380 filenames = ['*.golo']
1381 aliases = ['golo']
1382 version_added = '2.0'
1383
1384 tokens = {
1385 'root': [
1386 (r'[^\S\n]+', Whitespace),
1387
1388 (r'#.*$', Comment),
1389
1390 (r'(\^|\.\.\.|:|\?:|->|==|!=|=|\+|\*|%|/|<=|<|>=|>|=|\.)',
1391 Operator),
1392 (r'(?<=[^-])(-)(?=[^-])', Operator),
1393
1394 (r'(?<=[^`])(is|isnt|and|or|not|oftype|in|orIfNull)\b', Operator.Word),
1395 (r'[]{}|(),[]', Punctuation),
1396
1397 (r'(module|import)(\s+)',
1398 bygroups(Keyword.Namespace, Whitespace),
1399 'modname'),
1400 (r'\b([a-zA-Z_][\w$.]*)(::)', bygroups(Name.Namespace, Punctuation)),
1401 (r'\b([a-zA-Z_][\w$]*(?:\.[a-zA-Z_][\w$]*)+)\b', Name.Namespace),
1402
1403 (r'(let|var)(\s+)',
1404 bygroups(Keyword.Declaration, Whitespace),
1405 'varname'),
1406 (r'(struct)(\s+)',
1407 bygroups(Keyword.Declaration, Whitespace),
1408 'structname'),
1409 (r'(function)(\s+)',
1410 bygroups(Keyword.Declaration, Whitespace),
1411 'funcname'),
1412
1413 (r'(null|true|false)\b', Keyword.Constant),
1414 (r'(augment|pimp'
1415 r'|if|else|case|match|return'
1416 r'|case|when|then|otherwise'
1417 r'|while|for|foreach'
1418 r'|try|catch|finally|throw'
1419 r'|local'
1420 r'|continue|break)\b', Keyword),
1421
1422 (r'(map|array|list|set|vector|tuple)(\[)',
1423 bygroups(Name.Builtin, Punctuation)),
1424 (r'(print|println|readln|raise|fun'
1425 r'|asInterfaceInstance)\b', Name.Builtin),
1426 (r'(`?[a-zA-Z_][\w$]*)(\()',
1427 bygroups(Name.Function, Punctuation)),
1428
1429 (r'-?[\d_]*\.[\d_]*([eE][+-]?\d[\d_]*)?F?', Number.Float),
1430 (r'0[0-7]+j?', Number.Oct),
1431 (r'0[xX][a-fA-F0-9]+', Number.Hex),
1432 (r'-?\d[\d_]*L', Number.Integer.Long),
1433 (r'-?\d[\d_]*', Number.Integer),
1434
1435 (r'`?[a-zA-Z_][\w$]*', Name),
1436 (r'@[a-zA-Z_][\w$.]*', Name.Decorator),
1437
1438 (r'"""', String, combined('stringescape', 'triplestring')),
1439 (r'"', String, combined('stringescape', 'doublestring')),
1440 (r"'", String, combined('stringescape', 'singlestring')),
1441 (r'----((.|\n)*?)----', String.Doc)
1442
1443 ],
1444
1445 'funcname': [
1446 (r'`?[a-zA-Z_][\w$]*', Name.Function, '#pop'),
1447 ],
1448 'modname': [
1449 (r'[a-zA-Z_][\w$.]*\*?', Name.Namespace, '#pop')
1450 ],
1451 'structname': [
1452 (r'`?[\w.]+\*?', Name.Class, '#pop')
1453 ],
1454 'varname': [
1455 (r'`?[a-zA-Z_][\w$]*', Name.Variable, '#pop'),
1456 ],
1457 'string': [
1458 (r'[^\\\'"\n]+', String),
1459 (r'[\'"\\]', String)
1460 ],
1461 'stringescape': [
1462 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
1463 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
1464 ],
1465 'triplestring': [
1466 (r'"""', String, '#pop'),
1467 include('string'),
1468 (r'\n', String),
1469 ],
1470 'doublestring': [
1471 (r'"', String.Double, '#pop'),
1472 include('string'),
1473 ],
1474 'singlestring': [
1475 (r"'", String, '#pop'),
1476 include('string'),
1477 ],
1478 'operators': [
1479 (r'[#=,./%+\-?]', Operator),
1480 (r'(eq|gt|lt|gte|lte|neq|matches)\b', Operator),
1481 (r'(==|<=|<|>=|>|!=)', Operator),
1482 ],
1483 }
1484
1485
1486class JasminLexer(RegexLexer):
1487 """
1488 For Jasmin assembly code.
1489 """
1490
1491 name = 'Jasmin'
1492 url = 'http://jasmin.sourceforge.net/'
1493 aliases = ['jasmin', 'jasminxt']
1494 filenames = ['*.j']
1495 version_added = '2.0'
1496
1497 _whitespace = r' \n\t\r'
1498 _ws = rf'(?:[{_whitespace}]+)'
1499 _separator = rf'{_whitespace}:='
1500 _break = rf'(?=[{_separator}]|$)'
1501 _name = rf'[^{_separator}]+'
1502 _unqualified_name = rf'(?:[^{_separator}.;\[/]+)'
1503
1504 tokens = {
1505 'default': [
1506 (r'\n', Whitespace, '#pop'),
1507 (r"'", String.Single, ('#pop', 'quote')),
1508 (r'"', String.Double, 'string'),
1509 (r'=', Punctuation),
1510 (r':', Punctuation, 'label'),
1511 (_ws, Whitespace),
1512 (r';.*', Comment.Single),
1513 (rf'(\$[-+])?0x-?[\da-fA-F]+{_break}', Number.Hex),
1514 (rf'(\$[-+]|\+)?-?\d+{_break}', Number.Integer),
1515 (r'-?(\d+\.\d*|\.\d+)([eE][-+]?\d+)?[fFdD]?'
1516 rf'[\x00-\x08\x0b\x0c\x0e-\x1f]*{_break}', Number.Float),
1517 (rf'\${_name}', Name.Variable),
1518
1519 # Directives
1520 (rf'\.annotation{_break}', Keyword.Reserved, 'annotation'),
1521 (r'(\.attribute|\.bytecode|\.debug|\.deprecated|\.enclosing|'
1522 r'\.interface|\.line|\.signature|\.source|\.stack|\.var|abstract|'
1523 r'annotation|bridge|class|default|enum|field|final|fpstrict|'
1524 r'interface|native|private|protected|public|signature|static|'
1525 rf'synchronized|synthetic|transient|varargs|volatile){_break}',
1526 Keyword.Reserved),
1527 (rf'\.catch{_break}', Keyword.Reserved, 'caught-exception'),
1528 (r'(\.class|\.implements|\.inner|\.super|inner|invisible|'
1529 rf'invisibleparam|outer|visible|visibleparam){_break}',
1530 Keyword.Reserved, 'class/convert-dots'),
1531 (rf'\.field{_break}', Keyword.Reserved,
1532 ('descriptor/convert-dots', 'field')),
1533 (rf'(\.end|\.limit|use){_break}', Keyword.Reserved,
1534 'no-verification'),
1535 (rf'\.method{_break}', Keyword.Reserved, 'method'),
1536 (rf'\.set{_break}', Keyword.Reserved, 'var'),
1537 (rf'\.throws{_break}', Keyword.Reserved, 'exception'),
1538 (rf'(from|offset|to|using){_break}', Keyword.Reserved, 'label'),
1539 (rf'is{_break}', Keyword.Reserved,
1540 ('descriptor/convert-dots', 'var')),
1541 (rf'(locals|stack){_break}', Keyword.Reserved, 'verification'),
1542 (rf'method{_break}', Keyword.Reserved, 'enclosing-method'),
1543
1544 # Instructions
1545 (words((
1546 'aaload', 'aastore', 'aconst_null', 'aload', 'aload_0', 'aload_1', 'aload_2',
1547 'aload_3', 'aload_w', 'areturn', 'arraylength', 'astore', 'astore_0', 'astore_1',
1548 'astore_2', 'astore_3', 'astore_w', 'athrow', 'baload', 'bastore', 'bipush',
1549 'breakpoint', 'caload', 'castore', 'd2f', 'd2i', 'd2l', 'dadd', 'daload', 'dastore',
1550 'dcmpg', 'dcmpl', 'dconst_0', 'dconst_1', 'ddiv', 'dload', 'dload_0', 'dload_1',
1551 'dload_2', 'dload_3', 'dload_w', 'dmul', 'dneg', 'drem', 'dreturn', 'dstore', 'dstore_0',
1552 'dstore_1', 'dstore_2', 'dstore_3', 'dstore_w', 'dsub', 'dup', 'dup2', 'dup2_x1',
1553 'dup2_x2', 'dup_x1', 'dup_x2', 'f2d', 'f2i', 'f2l', 'fadd', 'faload', 'fastore', 'fcmpg',
1554 'fcmpl', 'fconst_0', 'fconst_1', 'fconst_2', 'fdiv', 'fload', 'fload_0', 'fload_1',
1555 'fload_2', 'fload_3', 'fload_w', 'fmul', 'fneg', 'frem', 'freturn', 'fstore', 'fstore_0',
1556 'fstore_1', 'fstore_2', 'fstore_3', 'fstore_w', 'fsub', 'i2b', 'i2c', 'i2d', 'i2f', 'i2l',
1557 'i2s', 'iadd', 'iaload', 'iand', 'iastore', 'iconst_0', 'iconst_1', 'iconst_2',
1558 'iconst_3', 'iconst_4', 'iconst_5', 'iconst_m1', 'idiv', 'iinc', 'iinc_w', 'iload',
1559 'iload_0', 'iload_1', 'iload_2', 'iload_3', 'iload_w', 'imul', 'ineg', 'int2byte',
1560 'int2char', 'int2short', 'ior', 'irem', 'ireturn', 'ishl', 'ishr', 'istore', 'istore_0',
1561 'istore_1', 'istore_2', 'istore_3', 'istore_w', 'isub', 'iushr', 'ixor', 'l2d', 'l2f',
1562 'l2i', 'ladd', 'laload', 'land', 'lastore', 'lcmp', 'lconst_0', 'lconst_1', 'ldc2_w',
1563 'ldiv', 'lload', 'lload_0', 'lload_1', 'lload_2', 'lload_3', 'lload_w', 'lmul', 'lneg',
1564 'lookupswitch', 'lor', 'lrem', 'lreturn', 'lshl', 'lshr', 'lstore', 'lstore_0',
1565 'lstore_1', 'lstore_2', 'lstore_3', 'lstore_w', 'lsub', 'lushr', 'lxor',
1566 'monitorenter', 'monitorexit', 'nop', 'pop', 'pop2', 'ret', 'ret_w', 'return', 'saload',
1567 'sastore', 'sipush', 'swap'), suffix=_break), Keyword.Reserved),
1568 (rf'(anewarray|checkcast|instanceof|ldc|ldc_w|new){_break}',
1569 Keyword.Reserved, 'class/no-dots'),
1570 (r'invoke(dynamic|interface|nonvirtual|special|'
1571 rf'static|virtual){_break}', Keyword.Reserved,
1572 'invocation'),
1573 (rf'(getfield|putfield){_break}', Keyword.Reserved,
1574 ('descriptor/no-dots', 'field')),
1575 (rf'(getstatic|putstatic){_break}', Keyword.Reserved,
1576 ('descriptor/no-dots', 'static')),
1577 (words((
1578 'goto', 'goto_w', 'if_acmpeq', 'if_acmpne', 'if_icmpeq',
1579 'if_icmpge', 'if_icmpgt', 'if_icmple', 'if_icmplt', 'if_icmpne',
1580 'ifeq', 'ifge', 'ifgt', 'ifle', 'iflt', 'ifne', 'ifnonnull',
1581 'ifnull', 'jsr', 'jsr_w'), suffix=_break),
1582 Keyword.Reserved, 'label'),
1583 (rf'(multianewarray|newarray){_break}', Keyword.Reserved,
1584 'descriptor/convert-dots'),
1585 (rf'tableswitch{_break}', Keyword.Reserved, 'table')
1586 ],
1587 'quote': [
1588 (r"'", String.Single, '#pop'),
1589 (r'\\u[\da-fA-F]{4}', String.Escape),
1590 (r"[^'\\]+", String.Single)
1591 ],
1592 'string': [
1593 (r'"', String.Double, '#pop'),
1594 (r'\\([nrtfb"\'\\]|u[\da-fA-F]{4}|[0-3]?[0-7]{1,2})',
1595 String.Escape),
1596 (r'[^"\\]+', String.Double)
1597 ],
1598 'root': [
1599 (r'\n+', Whitespace),
1600 (r"'", String.Single, 'quote'),
1601 include('default'),
1602 (rf'({_name})([ \t\r]*)(:)',
1603 bygroups(Name.Label, Whitespace, Punctuation)),
1604 (_name, String.Other)
1605 ],
1606 'annotation': [
1607 (r'\n', Whitespace, ('#pop', 'annotation-body')),
1608 (rf'default{_break}', Keyword.Reserved,
1609 ('#pop', 'annotation-default')),
1610 include('default')
1611 ],
1612 'annotation-body': [
1613 (r'\n+', Whitespace),
1614 (rf'\.end{_break}', Keyword.Reserved, '#pop'),
1615 include('default'),
1616 (_name, String.Other, ('annotation-items', 'descriptor/no-dots'))
1617 ],
1618 'annotation-default': [
1619 (r'\n+', Whitespace),
1620 (rf'\.end{_break}', Keyword.Reserved, '#pop'),
1621 include('default'),
1622 default(('annotation-items', 'descriptor/no-dots'))
1623 ],
1624 'annotation-items': [
1625 (r"'", String.Single, 'quote'),
1626 include('default'),
1627 (_name, String.Other)
1628 ],
1629 'caught-exception': [
1630 (rf'all{_break}', Keyword, '#pop'),
1631 include('exception')
1632 ],
1633 'class/convert-dots': [
1634 include('default'),
1635 (rf'(L)((?:{_unqualified_name}[/.])*)({_name})(;)',
1636 bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation),
1637 '#pop'),
1638 (rf'((?:{_unqualified_name}[/.])*)({_name})',
1639 bygroups(Name.Namespace, Name.Class), '#pop')
1640 ],
1641 'class/no-dots': [
1642 include('default'),
1643 (r'\[+', Punctuation, ('#pop', 'descriptor/no-dots')),
1644 (rf'(L)((?:{_unqualified_name}/)*)({_name})(;)',
1645 bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation),
1646 '#pop'),
1647 (rf'((?:{_unqualified_name}/)*)({_name})',
1648 bygroups(Name.Namespace, Name.Class), '#pop')
1649 ],
1650 'descriptor/convert-dots': [
1651 include('default'),
1652 (r'\[+', Punctuation),
1653 (rf'(L)((?:{_unqualified_name}[/.])*)({_name}?)(;)',
1654 bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation),
1655 '#pop'),
1656 (rf'[^{_separator}\[)L]+', Keyword.Type, '#pop'),
1657 default('#pop')
1658 ],
1659 'descriptor/no-dots': [
1660 include('default'),
1661 (r'\[+', Punctuation),
1662 (rf'(L)((?:{_unqualified_name}/)*)({_name})(;)',
1663 bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation),
1664 '#pop'),
1665 (rf'[^{_separator}\[)L]+', Keyword.Type, '#pop'),
1666 default('#pop')
1667 ],
1668 'descriptors/convert-dots': [
1669 (r'\)', Punctuation, '#pop'),
1670 default('descriptor/convert-dots')
1671 ],
1672 'enclosing-method': [
1673 (_ws, Whitespace),
1674 (rf'(?=[^{_separator}]*\()', Text, ('#pop', 'invocation')),
1675 default(('#pop', 'class/convert-dots'))
1676 ],
1677 'exception': [
1678 include('default'),
1679 (rf'((?:{_unqualified_name}[/.])*)({_name})',
1680 bygroups(Name.Namespace, Name.Exception), '#pop')
1681 ],
1682 'field': [
1683 (rf'static{_break}', Keyword.Reserved, ('#pop', 'static')),
1684 include('default'),
1685 (rf'((?:{_unqualified_name}[/.](?=[^{_separator}]*[/.]))*)({_unqualified_name}[/.])?({_name})',
1686 bygroups(Name.Namespace, Name.Class, Name.Variable.Instance),
1687 '#pop')
1688 ],
1689 'invocation': [
1690 include('default'),
1691 (rf'((?:{_unqualified_name}[/.](?=[^{_separator}(]*[/.]))*)({_unqualified_name}[/.])?({_name})(\()',
1692 bygroups(Name.Namespace, Name.Class, Name.Function, Punctuation),
1693 ('#pop', 'descriptor/convert-dots', 'descriptors/convert-dots',
1694 'descriptor/convert-dots'))
1695 ],
1696 'label': [
1697 include('default'),
1698 (_name, Name.Label, '#pop')
1699 ],
1700 'method': [
1701 include('default'),
1702 (rf'({_name})(\()', bygroups(Name.Function, Punctuation),
1703 ('#pop', 'descriptor/convert-dots', 'descriptors/convert-dots',
1704 'descriptor/convert-dots'))
1705 ],
1706 'no-verification': [
1707 (rf'(locals|method|stack){_break}', Keyword.Reserved, '#pop'),
1708 include('default')
1709 ],
1710 'static': [
1711 include('default'),
1712 (rf'((?:{_unqualified_name}[/.](?=[^{_separator}]*[/.]))*)({_unqualified_name}[/.])?({_name})',
1713 bygroups(Name.Namespace, Name.Class, Name.Variable.Class), '#pop')
1714 ],
1715 'table': [
1716 (r'\n+', Whitespace),
1717 (rf'default{_break}', Keyword.Reserved, '#pop'),
1718 include('default'),
1719 (_name, Name.Label)
1720 ],
1721 'var': [
1722 include('default'),
1723 (_name, Name.Variable, '#pop')
1724 ],
1725 'verification': [
1726 include('default'),
1727 (rf'(Double|Float|Integer|Long|Null|Top|UninitializedThis){_break}', Keyword, '#pop'),
1728 (rf'Object{_break}', Keyword, ('#pop', 'class/no-dots')),
1729 (rf'Uninitialized{_break}', Keyword, ('#pop', 'label'))
1730 ]
1731 }
1732
1733 def analyse_text(text):
1734 score = 0
1735 if re.search(r'^\s*\.class\s', text, re.MULTILINE):
1736 score += 0.5
1737 if re.search(r'^\s*[a-z]+_[a-z]+\b', text, re.MULTILINE):
1738 score += 0.3
1739 if re.search(r'^\s*\.(attribute|bytecode|debug|deprecated|enclosing|'
1740 r'inner|interface|limit|set|signature|stack)\b', text,
1741 re.MULTILINE):
1742 score += 0.6
1743 return min(score, 1.0)
1744
1745
1746class SarlLexer(RegexLexer):
1747 """
1748 For SARL source code.
1749 """
1750
1751 name = 'SARL'
1752 url = 'http://www.sarl.io'
1753 aliases = ['sarl']
1754 filenames = ['*.sarl']
1755 mimetypes = ['text/x-sarl']
1756 version_added = '2.4'
1757
1758 flags = re.MULTILINE | re.DOTALL
1759
1760 tokens = {
1761 'root': [
1762 # method names
1763 (r'^(\s*(?:[a-zA-Z_][\w.\[\]]*\s+)+?)' # return arguments
1764 r'([a-zA-Z_$][\w$]*)' # method name
1765 r'(\s*)(\()', # signature start
1766 bygroups(using(this), Name.Function, Whitespace, Operator)),
1767 (r'[^\S\n]+', Whitespace),
1768 (r'(//.*?)(\n)', bygroups(Comment.Single, Whitespace)),
1769 (r'/\*.*?\*/', Comment.Multiline),
1770 (r'@[a-zA-Z_][\w.]*', Name.Decorator),
1771 (r'(as|break|case|catch|default|do|else|extends|extension|finally|'
1772 r'fires|for|if|implements|instanceof|new|on|requires|return|super|'
1773 r'switch|throw|throws|try|typeof|uses|while|with)\b',
1774 Keyword),
1775 (r'(abstract|def|dispatch|final|native|override|private|protected|'
1776 r'public|static|strictfp|synchronized|transient|val|var|volatile)\b',
1777 Keyword.Declaration),
1778 (r'(boolean|byte|char|double|float|int|long|short|void)\b',
1779 Keyword.Type),
1780 (r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace)),
1781 (r'(false|it|null|occurrence|this|true|void)\b', Keyword.Constant),
1782 (r'(agent|annotation|artifact|behavior|capacity|class|enum|event|'
1783 r'interface|skill|space)(\s+)', bygroups(Keyword.Declaration, Whitespace),
1784 'class'),
1785 (r'(import)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'import'),
1786 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
1787 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
1788 (r'[a-zA-Z_]\w*:', Name.Label),
1789 (r'[a-zA-Z_$]\w*', Name),
1790 (r'[~^*!%&\[\](){}<>\|+=:;,./?-]', Operator),
1791 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
1792 (r'0x[0-9a-fA-F]+', Number.Hex),
1793 (r'[0-9]+L?', Number.Integer),
1794 (r'\n', Whitespace)
1795 ],
1796 'class': [
1797 (r'[a-zA-Z_]\w*', Name.Class, '#pop')
1798 ],
1799 'import': [
1800 (r'[\w.]+\*?', Name.Namespace, '#pop')
1801 ],
1802 }