Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/perl.py: 31%
136 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.perl
3 ~~~~~~~~~~~~~~~~~~~~
5 Lexers for Perl, Raku and related languages.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
14 using, this, default, words
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Whitespace
17from pygments.util import shebang_matches
19__all__ = ['PerlLexer', 'Perl6Lexer']
22class PerlLexer(RegexLexer):
23 """
24 For Perl source code.
25 """
27 name = 'Perl'
28 url = 'https://www.perl.org'
29 aliases = ['perl', 'pl']
30 filenames = ['*.pl', '*.pm', '*.t', '*.perl']
31 mimetypes = ['text/x-perl', 'application/x-perl']
33 flags = re.DOTALL | re.MULTILINE
34 # TODO: give this to a perl guy who knows how to parse perl...
35 tokens = {
36 'balanced-regex': [
37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'),
41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'),
44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'),
45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'),
46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'),
47 ],
48 'root': [
49 (r'\A\#!.+?$', Comment.Hashbang),
50 (r'\#.*?$', Comment.Single),
51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
52 (words((
53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach',
54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then',
55 'unless', 'until', 'while', 'print', 'new', 'BEGIN',
56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'),
57 Keyword),
58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
59 bygroups(Keyword, Whitespace, Name, Whitespace, Punctuation, Whitespace), 'format'),
60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
61 # common delimiters
62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
63 String.Regex),
64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
67 String.Regex),
68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
69 String.Regex),
70 # balanced delimiters
71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'),
72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
74 'balanced-regex'),
75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex,
76 'balanced-regex'),
78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
81 String.Regex),
82 (r'\s+', Whitespace),
83 (words((
84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir',
85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect',
86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die',
87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent',
88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl',
89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin',
91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp',
92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber',
93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime',
95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last',
96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat',
97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open',
98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf',
99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir',
100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename',
101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir',
102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt',
106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread',
107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr',
108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie',
109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
110 Name.Builtin),
111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Whitespace)),
114 (r'__END__', Comment.Preproc, 'end-part'),
115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
117 (r'[$@%#]+', Name.Variable, 'varname'),
118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
120 (r'0b[01]+(_[01]+)*', Number.Bin),
121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
122 Number.Float),
123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
124 (r'\d+(_\d+)*', Number.Integer),
125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
128 (r'<([^\s>]+)>', String.Regex),
129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other),
134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
135 bygroups(Keyword, Whitespace, Name.Namespace)),
136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
137 bygroups(Keyword, Whitespace, Name.Namespace)),
138 (r'(sub)(\s+)', bygroups(Keyword, Whitespace), 'funcname'),
139 (words((
140 'no', 'package', 'require', 'use'), suffix=r'\b'),
141 Keyword),
142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
143 r'!~|&&?|\|\||\.{1,3})', Operator),
144 (r'[-+/*%=<>&^|!\\~]=?', Operator),
145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage
146 # of punctuation in Perl!
147 (r'(?=\w)', Name, 'name'),
148 ],
149 'format': [
150 (r'\.\n', String.Interpol, '#pop'),
151 (r'[^\n]*\n', String.Interpol),
152 ],
153 'varname': [
154 (r'\s+', Whitespace),
155 (r'\{', Punctuation, '#pop'), # hash syntax?
156 (r'\)|,', Punctuation, '#pop'), # argument specifier
157 (r'\w+::', Name.Namespace),
158 (r'[\w:]+', Name.Variable, '#pop'),
159 ],
160 'name': [
161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'),
162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'),
163 (r'[\w:]+', Name, '#pop'),
164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
165 (r'(?=\W)', Text, '#pop'),
166 ],
167 'funcname': [
168 (r'[a-zA-Z_]\w*[!?]?', Name.Function),
169 (r'\s+', Whitespace),
170 # argument declaration
171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Whitespace)),
172 (r';', Punctuation, '#pop'),
173 (r'.*?\{', Punctuation, '#pop'),
174 ],
175 'cb-string': [
176 (r'\\[{}\\]', String.Other),
177 (r'\\', String.Other),
178 (r'\{', String.Other, 'cb-string'),
179 (r'\}', String.Other, '#pop'),
180 (r'[^{}\\]+', String.Other)
181 ],
182 'rb-string': [
183 (r'\\[()\\]', String.Other),
184 (r'\\', String.Other),
185 (r'\(', String.Other, 'rb-string'),
186 (r'\)', String.Other, '#pop'),
187 (r'[^()]+', String.Other)
188 ],
189 'sb-string': [
190 (r'\\[\[\]\\]', String.Other),
191 (r'\\', String.Other),
192 (r'\[', String.Other, 'sb-string'),
193 (r'\]', String.Other, '#pop'),
194 (r'[^\[\]]+', String.Other)
195 ],
196 'lt-string': [
197 (r'\\[<>\\]', String.Other),
198 (r'\\', String.Other),
199 (r'\<', String.Other, 'lt-string'),
200 (r'\>', String.Other, '#pop'),
201 (r'[^<>]+', String.Other)
202 ],
203 'end-part': [
204 (r'.+', Comment.Preproc, '#pop')
205 ]
206 }
208 def analyse_text(text):
209 if shebang_matches(text, r'perl'):
210 return True
212 result = 0
214 if re.search(r'(?:my|our)\s+[$@%(]', text):
215 result += 0.9
217 if ':=' in text:
218 # := is not valid Perl, but it appears in unicon, so we should
219 # become less confident if we think we found Perl with :=
220 result /= 2
222 return result
225class Perl6Lexer(ExtendedRegexLexer):
226 """
227 For Raku (a.k.a. Perl 6) source code.
229 .. versionadded:: 2.0
230 """
232 name = 'Perl6'
233 url = 'https://www.raku.org'
234 aliases = ['perl6', 'pl6', 'raku']
235 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
236 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod',
237 '*.rakutest', '*.rakudoc']
238 mimetypes = ['text/x-perl6', 'application/x-perl6']
239 flags = re.MULTILINE | re.DOTALL
241 PERL6_IDENTIFIER_RANGE = r"['\w:-]"
243 PERL6_KEYWORDS = (
244 #Phasers
245 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST',
246 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO',
247 #Keywords
248 'anon','augment','but','class','constant','default','does','else',
249 'elsif','enum','for','gather','given','grammar','has','if','import',
250 'is','let','loop','made','make','method','module','multi','my','need',
251 'orwith','our','proceed','proto','repeat','require','return',
252 'return-rw','returns','role','rule','state','sub','submethod','subset',
253 'succeed','supersede','token','try','unit','unless','until','use',
254 'when','while','with','without',
255 #Traits
256 'export','native','repr','required','rw','symbol',
257 )
259 PERL6_BUILTINS = (
260 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos',
261 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action',
262 'actions','add','add_attribute','add_enum_value','add_fallback',
263 'add_method','add_parent','add_private_method','add_role','add_trustee',
264 'adverb','after','all','allocate','allof','allowed','alternative-names',
265 'annotations','antipair','antipairs','any','anyof','app_lifetime',
266 'append','arch','archname','args','arity','Array','asec','asech','asin',
267 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2',
268 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch',
269 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc',
270 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth',
271 'await','backtrace','Bag','BagHash','bail-out','base','basename',
272 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr',
273 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool',
274 'bool-only','bounds','break','Bridge','broken','BUILD','build-date',
275 'bytes','cache','callframe','calling-package','CALL-ME','callsame',
276 'callwith','can','cancel','candidates','cando','can-ok','canonpath',
277 'caps','caption','Capture','cas','catdir','categorize','categorize-list',
278 'catfile','catpath','cause','ceiling','cglobal','changed','Channel',
279 'chars','chdir','child','child-name','child-typename','chmod','chomp',
280 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup',
281 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate',
282 'column','comb','combinations','command','comment','compiler','Complex',
283 'compose','compose_type','composer','condition','config',
284 'configure_destroy','configure_type_checking','conj','connect',
285 'constraints','construct','contains','contents','copy','cos','cosec',
286 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores',
287 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d',
288 'Date','DateTime','day','daycount','day-of-month','day-of-week',
289 'day-of-year','days-in-month','declaration','decode','decoder','deepmap',
290 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS',
291 'denominator','desc','DESTROY','destroyers','devnull','diag',
292 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames',
293 'do','does','does-ok','done','done-testing','duckmap','dynamic','e',
294 'eager','earlier','elems','emit','enclosing','encode','encoder',
295 'encoding','end','ends-with','enum_from_value','enum_value_list',
296 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE',
297 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY',
298 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage',
299 'expmod','extension','f','fail','fails-like','fc','feature','file',
300 'filename','find_method','find_method_qualified','finish','first','flat',
301 'flatmap','flip','floor','flunk','flush','fmt','format','formatter',
302 'freeze','from','from-list','from-loop','from-posix','full',
303 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs',
304 'grep','handle','handled','handles','hardware','has_accessor','Hash',
305 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id',
306 'illegal','im','in','indent','index','indices','indir','infinite',
307 'infix','infix:<+>','infix:<->','install_method_cache','Instant',
308 'instead','Int','int-bounds','interval','in-timezone','invalid-str',
309 'invert','invocant','IO','IO::Notification.watch-path','is_trusted',
310 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply',
311 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year',
312 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting',
313 'is-win','item','iterator','join','keep','kept','KERNELnames','key',
314 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later',
315 'lazy','lc','leading','level','like','line','lines','link','List',
316 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb',
317 'made','MAIN','make','Map','match','max','maxpairs','merge','message',
318 'method','method_table','methods','migrate','min','minmax','minpairs',
319 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month',
320 'move','mro','msb','multi','multiness','my','name','named','named_names',
321 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type',
322 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle',
323 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out',
324 'nodemap','nok','none','norm','not','note','now','nude','Num',
325 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes',
326 'ok','old','on-close','one','on-switch','open','opened','operation',
327 'optional','ord','ords','orig','os-error','osname','out-buffer','pack',
328 'package','package-kind','package-name','packages','pair','pairs',
329 'pairup','parameter','params','parent','parent-name','parents','parse',
330 'parse-base','parsefile','parse-names','parts','pass','path','path-sep',
331 'payload','peer-host','peer-port','periods','perl','permutations','phaser',
332 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll',
333 'polymod','pop','pos','positional','posix','postfix','postmatch',
334 'precomp-ext','precomp-target','pred','prefix','prematch','prepend',
335 'print','printf','print-nl','print-to','private','private_method_table',
336 'proc','produce','Promise','prompt','protect','pull-one','push',
337 'push-all','push-at-least','push-exactly','push-until-lazy','put',
338 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw',
339 're','read','readchars','readonly','ready','Real','reallocate','reals',
340 'reason','rebless','receive','recv','redispatcher','redo','reduce',
341 'rel2abs','relative','release','rename','repeated','replacement',
342 'report','reserved','resolve','restore','result','resume','rethrow',
343 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish',
344 'roll','rootdir','roots','rotate','rotor','round','roundrobin',
345 'routine-type','run','rwx','s','samecase','samemark','samewith','say',
346 'schedule-on','scheduler','scope','sec','sech','second','seek','self',
347 'send','Set','set_hidden','set_name','set_package','set_rw','set_value',
348 'SetHash','set-instruments','setup_finalization','shape','share','shell',
349 'shift','sibling','sigil','sign','signal','signals','signature','sin',
350 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one',
351 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp',
352 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port',
353 'sort','source','source-package','spawn','SPEC','splice','split',
354 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable',
355 'start','started','starts-with','status','stderr','stdout','Str',
356 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst',
357 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum',
358 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap',
359 'target','target-name','tc','tclc','tell','then','throttle','throw',
360 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix',
361 'total','trailing','trans','tree','trim','trim-leading','trim-trailing',
362 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type',
363 'type_captures','typename','uc','udp','uncaught_handler','unimatch',
364 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival',
365 'univals','unlike','unlink','unlock','unpack','unpolar','unshift',
366 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR',
367 'variable','verbose-config','version','VMnames','volume','vow','w','wait',
368 'warn','watch','watch-path','week','weekday-of-month','week-number',
369 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO',
370 'whole-second','WHY','wordcase','words','workaround','wrap','write',
371 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest',
373 )
375 PERL6_BUILTIN_CLASSES = (
376 #Booleans
377 'False','True',
378 #Classes
379 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace',
380 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf',
381 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code',
382 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler',
383 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding',
384 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant',
385 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles',
386 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path',
387 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32',
388 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec',
389 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32',
390 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List',
391 'Lock','Lock::Async','long','longlong','Macro','Map','Match',
392 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW',
393 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer',
394 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance',
395 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer',
396 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash',
397 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64',
398 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block',
399 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator',
400 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading',
401 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc',
402 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat',
403 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler',
404 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip',
405 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier',
406 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry',
407 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage',
408 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler',
409 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable',
410 'Version','VM','Whatever','WhateverCode','WrapHandle'
411 )
413 PERL6_OPERATORS = (
414 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
415 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
416 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
417 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
418 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
419 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
420 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
421 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
422 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
423 'not', '<==', '==>', '<<==', '==>>','unicmp',
424 )
426 # Perl 6 has a *lot* of possible bracketing characters
427 # this list was lifted from STD.pm6 (https://github.com/perl6/std)
428 PERL6_BRACKETS = {
429 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
430 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
431 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
432 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
433 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
434 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
435 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
436 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
437 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
438 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
439 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
440 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
441 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
442 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
443 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
444 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
445 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
446 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
447 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
448 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
449 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
450 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
451 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
452 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
453 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
454 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
455 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
456 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
457 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
458 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
459 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
460 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
461 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
462 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
463 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
464 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
465 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
466 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
467 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
468 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
469 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
470 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
471 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
472 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
473 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
474 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
475 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
476 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
477 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
478 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
479 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
480 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
481 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
482 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
483 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
484 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
485 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
486 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
487 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
488 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
489 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
490 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
491 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
492 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
493 }
495 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
496 if boundary_regex_fragment is None:
497 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
498 suffix + r')\b'
499 else:
500 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
501 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
502 boundary_regex_fragment + r')'
504 def brackets_callback(token_class):
505 def callback(lexer, match, context):
506 groups = match.groupdict()
507 opening_chars = groups['delimiter']
508 n_chars = len(opening_chars)
509 adverbs = groups.get('adverbs')
511 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
512 text = context.text
514 if closer is None: # it's not a mirrored character, which means we
515 # just need to look for the next occurrence
517 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
518 else: # we need to look for the corresponding closing character,
519 # keep nesting in mind
520 closing_chars = closer * n_chars
521 nesting_level = 1
523 search_pos = match.start('delimiter')
525 while nesting_level > 0:
526 next_open_pos = text.find(opening_chars, search_pos + n_chars)
527 next_close_pos = text.find(closing_chars, search_pos + n_chars)
529 if next_close_pos == -1:
530 next_close_pos = len(text)
531 nesting_level = 0
532 elif next_open_pos != -1 and next_open_pos < next_close_pos:
533 nesting_level += 1
534 search_pos = next_open_pos
535 else: # next_close_pos < next_open_pos
536 nesting_level -= 1
537 search_pos = next_close_pos
539 end_pos = next_close_pos
541 if end_pos < 0: # if we didn't find a closer, just highlight the
542 # rest of the text in this class
543 end_pos = len(text)
545 if adverbs is not None and re.search(r':to\b', adverbs):
546 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
547 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
548 r'\s*$', text[end_pos:], re.MULTILINE)
550 if end_heredoc:
551 end_pos += end_heredoc.end()
552 else:
553 end_pos = len(text)
555 yield match.start(), token_class, text[match.start():end_pos + n_chars]
556 context.pos = end_pos + n_chars
558 return callback
560 def opening_brace_callback(lexer, match, context):
561 stack = context.stack
563 yield match.start(), Text, context.text[match.start():match.end()]
564 context.pos = match.end()
566 # if we encounter an opening brace and we're one level
567 # below a token state, it means we need to increment
568 # the nesting level for braces so we know later when
569 # we should return to the token rules.
570 if len(stack) > 2 and stack[-2] == 'token':
571 context.perl6_token_nesting_level += 1
573 def closing_brace_callback(lexer, match, context):
574 stack = context.stack
576 yield match.start(), Text, context.text[match.start():match.end()]
577 context.pos = match.end()
579 # if we encounter a free closing brace and we're one level
580 # below a token state, it means we need to check the nesting
581 # level to see if we need to return to the token state.
582 if len(stack) > 2 and stack[-2] == 'token':
583 context.perl6_token_nesting_level -= 1
584 if context.perl6_token_nesting_level == 0:
585 stack.pop()
587 def embedded_perl6_callback(lexer, match, context):
588 context.perl6_token_nesting_level = 1
589 yield match.start(), Text, context.text[match.start():match.end()]
590 context.pos = match.end()
591 context.stack.append('root')
593 # If you're modifying these rules, be careful if you need to process '{' or '}'
594 # characters. We have special logic for processing these characters (due to the fact
595 # that you can nest Perl 6 code in regex blocks), so if you need to process one of
596 # them, make sure you also process the corresponding one!
597 tokens = {
598 'common': [
599 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
600 brackets_callback(Comment.Multiline)),
601 (r'#[^\n]*$', Comment.Single),
602 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
603 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
604 (r'^=.*?\n\s*?\n', Comment.Multiline),
605 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
606 bygroups(Keyword, Name), 'token-sym-brackets'),
607 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
608 bygroups(Keyword, Name), 'pre-token'),
609 # deal with a special case in the Perl 6 grammar (role q { ... })
610 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Whitespace, Name, Whitespace)),
611 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
612 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
613 Name.Builtin),
614 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
615 # copied from PerlLexer
616 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
617 Name.Variable),
618 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
619 (r'::\?\w+', Name.Variable.Global),
620 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
621 Name.Variable.Global),
622 (r'\$(?:<.*?>)+', Name.Variable),
623 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
624 r'(?P=first_char)*)', brackets_callback(String)),
625 # copied from PerlLexer
626 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
627 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
628 (r'0b[01]+(_[01]+)*', Number.Bin),
629 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
630 Number.Float),
631 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
632 (r'\d+(_\d+)*', Number.Integer),
633 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
634 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
635 (r'm\w+(?=\()', Name),
636 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
637 r'(?P=first_char)*)', brackets_callback(String.Regex)),
638 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
639 String.Regex),
640 (r'<[^\s=].*?\S>', String),
641 (_build_word_match(PERL6_OPERATORS), Operator),
642 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
643 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
644 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
645 ],
646 'root': [
647 include('common'),
648 (r'\{', opening_brace_callback),
649 (r'\}', closing_brace_callback),
650 (r'.+?', Text),
651 ],
652 'pre-token': [
653 include('common'),
654 (r'\{', Text, ('#pop', 'token')),
655 (r'.+?', Text),
656 ],
657 'token-sym-brackets': [
658 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
659 brackets_callback(Name), ('#pop', 'pre-token')),
660 default(('#pop', 'pre-token')),
661 ],
662 'token': [
663 (r'\}', Text, '#pop'),
664 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
665 # make sure that quotes in character classes aren't treated as strings
666 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
667 # make sure that '#' characters in quotes aren't treated as comments
668 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
669 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
670 (r'#.*?$', Comment.Single),
671 (r'\{', embedded_perl6_callback),
672 ('.+?', String.Regex),
673 ],
674 }
676 def analyse_text(text):
677 def strip_pod(lines):
678 in_pod = False
679 stripped_lines = []
681 for line in lines:
682 if re.match(r'^=(?:end|cut)', line):
683 in_pod = False
684 elif re.match(r'^=\w+', line):
685 in_pod = True
686 elif not in_pod:
687 stripped_lines.append(line)
689 return stripped_lines
691 # XXX handle block comments
692 lines = text.splitlines()
693 lines = strip_pod(lines)
694 text = '\n'.join(lines)
696 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
697 return True
699 saw_perl_decl = False
700 rating = False
702 # check for my/our/has declarations
703 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
704 r"+\s+)?[$@%&(]", text):
705 rating = 0.8
706 saw_perl_decl = True
708 for line in lines:
709 line = re.sub('#.*', '', line)
710 if re.match(r'^\s*$', line):
711 continue
713 # match v6; use v6; use v6.0; use v6.0.0;
714 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
715 return True
716 # match class, module, role, enum, grammar declarations
717 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
718 if class_decl:
719 if saw_perl_decl or class_decl.group('scope') is not None:
720 return True
721 rating = 0.05
722 continue
723 break
725 if ':=' in text:
726 # Same logic as above for PerlLexer
727 rating /= 2
729 return rating
731 def __init__(self, **options):
732 super().__init__(**options)
733 self.encoding = options.get('encoding', 'utf-8')