1"""
2 pygments.lexers.perl
3 ~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Perl, Raku and related languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
14 using, this, default, words
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Whitespace
17from pygments.util import shebang_matches
18
19__all__ = ['PerlLexer', 'Perl6Lexer']
20
21
22class PerlLexer(RegexLexer):
23 """
24 For Perl source code.
25 """
26
27 name = 'Perl'
28 url = 'https://www.perl.org'
29 aliases = ['perl', 'pl']
30 filenames = ['*.pl', '*.pm', '*.t', '*.perl']
31 mimetypes = ['text/x-perl', 'application/x-perl']
32 version_added = ''
33
34 flags = re.DOTALL | re.MULTILINE
35 # TODO: give this to a perl guy who knows how to parse perl...
36 tokens = {
37 'balanced-regex': [
38 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
39 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
40 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
41 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'),
42 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
43 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
44 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'),
45 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'),
46 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'),
47 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'),
48 ],
49 'root': [
50 (r'\A\#!.+?$', Comment.Hashbang),
51 (r'\#.*?$', Comment.Single),
52 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
53 (words((
54 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach',
55 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then',
56 'unless', 'until', 'while', 'print', 'new', 'BEGIN',
57 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'),
58 Keyword),
59 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
60 bygroups(Keyword, Whitespace, Name, Whitespace, Punctuation, Whitespace), 'format'),
61 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
62 # common delimiters
63 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
64 String.Regex),
65 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
66 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
67 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
68 String.Regex),
69 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
70 String.Regex),
71 # balanced delimiters
72 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'),
73 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
74 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
75 'balanced-regex'),
76 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex,
77 'balanced-regex'),
78
79 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
80 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
81 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
82 String.Regex),
83 (r'\s+', Whitespace),
84 (words((
85 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir',
86 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect',
87 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die',
88 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent',
89 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl',
90 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
91 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin',
92 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp',
93 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber',
94 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
95 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime',
96 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last',
97 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat',
98 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open',
99 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf',
100 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir',
101 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename',
102 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir',
103 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
104 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
105 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
106 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt',
107 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread',
108 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr',
109 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie',
110 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
111 Name.Builtin),
112 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
113 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
114 bygroups(String, String, String.Delimiter, String, String.Delimiter, Whitespace)),
115 (r'__END__', Comment.Preproc, 'end-part'),
116 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
117 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
118 (r'[$@%#]+', Name.Variable, 'varname'),
119 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
120 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
121 (r'0b[01]+(_[01]+)*', Number.Bin),
122 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
123 Number.Float),
124 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
125 (r'\d+(_\d+)*', Number.Integer),
126 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
127 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
128 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
129 (r'<([^\s>]+)>', String.Regex),
130 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
131 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
132 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
133 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
134 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other),
135 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
136 bygroups(Keyword, Whitespace, Name.Namespace)),
137 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
138 bygroups(Keyword, Whitespace, Name.Namespace)),
139 (r'(sub)(\s+)', bygroups(Keyword, Whitespace), 'funcname'),
140 (words((
141 'no', 'package', 'require', 'use'), suffix=r'\b'),
142 Keyword),
143 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
144 r'!~|&&?|\|\||\.{1,3})', Operator),
145 (r'[-+/*%=<>&^|!\\~]=?', Operator),
146 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage
147 # of punctuation in Perl!
148 (r'(?=\w)', Name, 'name'),
149 ],
150 'format': [
151 (r'\.\n', String.Interpol, '#pop'),
152 (r'[^\n]*\n', String.Interpol),
153 ],
154 'varname': [
155 (r'\s+', Whitespace),
156 (r'\{', Punctuation, '#pop'), # hash syntax?
157 (r'\)|,', Punctuation, '#pop'), # argument specifier
158 (r'\w+::', Name.Namespace),
159 (r'[\w:]+', Name.Variable, '#pop'),
160 ],
161 'name': [
162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'),
163 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'),
164 (r'[\w:]+', Name, '#pop'),
165 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
166 (r'(?=\W)', Text, '#pop'),
167 ],
168 'funcname': [
169 (r'[a-zA-Z_]\w*[!?]?', Name.Function),
170 (r'\s+', Whitespace),
171 # argument declaration
172 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Whitespace)),
173 (r';', Punctuation, '#pop'),
174 (r'.*?\{', Punctuation, '#pop'),
175 ],
176 'cb-string': [
177 (r'\\[{}\\]', String.Other),
178 (r'\\', String.Other),
179 (r'\{', String.Other, 'cb-string'),
180 (r'\}', String.Other, '#pop'),
181 (r'[^{}\\]+', String.Other)
182 ],
183 'rb-string': [
184 (r'\\[()\\]', String.Other),
185 (r'\\', String.Other),
186 (r'\(', String.Other, 'rb-string'),
187 (r'\)', String.Other, '#pop'),
188 (r'[^()]+', String.Other)
189 ],
190 'sb-string': [
191 (r'\\[\[\]\\]', String.Other),
192 (r'\\', String.Other),
193 (r'\[', String.Other, 'sb-string'),
194 (r'\]', String.Other, '#pop'),
195 (r'[^\[\]]+', String.Other)
196 ],
197 'lt-string': [
198 (r'\\[<>\\]', String.Other),
199 (r'\\', String.Other),
200 (r'\<', String.Other, 'lt-string'),
201 (r'\>', String.Other, '#pop'),
202 (r'[^<>]+', String.Other)
203 ],
204 'end-part': [
205 (r'.+', Comment.Preproc, '#pop')
206 ]
207 }
208
209 def analyse_text(text):
210 if shebang_matches(text, r'perl'):
211 return True
212
213 result = 0
214
215 if re.search(r'(?:my|our)\s+[$@%(]', text):
216 result += 0.9
217
218 if ':=' in text:
219 # := is not valid Perl, but it appears in unicon, so we should
220 # become less confident if we think we found Perl with :=
221 result /= 2
222
223 return result
224
225
226class Perl6Lexer(ExtendedRegexLexer):
227 """
228 For Raku (a.k.a. Perl 6) source code.
229 """
230
231 name = 'Perl6'
232 url = 'https://www.raku.org'
233 aliases = ['perl6', 'pl6', 'raku']
234 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
235 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod',
236 '*.rakutest', '*.rakudoc']
237 mimetypes = ['text/x-perl6', 'application/x-perl6']
238 version_added = '2.0'
239 flags = re.MULTILINE | re.DOTALL
240
241 PERL6_IDENTIFIER_RANGE = r"['\w:-]"
242
243 PERL6_KEYWORDS = (
244 #Phasers
245 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST',
246 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO',
247 #Keywords
248 'anon','augment','but','class','constant','default','does','else',
249 'elsif','enum','for','gather','given','grammar','has','if','import',
250 'is','let','loop','made','make','method','module','multi','my','need',
251 'orwith','our','proceed','proto','repeat','require','return',
252 'return-rw','returns','role','rule','state','sub','submethod','subset',
253 'succeed','supersede','token','try','unit','unless','until','use',
254 'when','while','with','without',
255 #Traits
256 'export','native','repr','required','rw','symbol',
257 )
258
259 PERL6_BUILTINS = (
260 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos',
261 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action',
262 'actions','add','add_attribute','add_enum_value','add_fallback',
263 'add_method','add_parent','add_private_method','add_role','add_trustee',
264 'adverb','after','all','allocate','allof','allowed','alternative-names',
265 'annotations','antipair','antipairs','any','anyof','app_lifetime',
266 'append','arch','archname','args','arity','Array','asec','asech','asin',
267 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2',
268 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch',
269 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc',
270 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth',
271 'await','backtrace','Bag','BagHash','bail-out','base','basename',
272 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr',
273 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool',
274 'bool-only','bounds','break','Bridge','broken','BUILD','build-date',
275 'bytes','cache','callframe','calling-package','CALL-ME','callsame',
276 'callwith','can','cancel','candidates','cando','can-ok','canonpath',
277 'caps','caption','Capture','cas','catdir','categorize','categorize-list',
278 'catfile','catpath','cause','ceiling','cglobal','changed','Channel',
279 'chars','chdir','child','child-name','child-typename','chmod','chomp',
280 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup',
281 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate',
282 'column','comb','combinations','command','comment','compiler','Complex',
283 'compose','compose_type','composer','condition','config',
284 'configure_destroy','configure_type_checking','conj','connect',
285 'constraints','construct','contains','contents','copy','cos','cosec',
286 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores',
287 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d',
288 'Date','DateTime','day','daycount','day-of-month','day-of-week',
289 'day-of-year','days-in-month','declaration','decode','decoder','deepmap',
290 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS',
291 'denominator','desc','DESTROY','destroyers','devnull','diag',
292 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames',
293 'do','does','does-ok','done','done-testing','duckmap','dynamic','e',
294 'eager','earlier','elems','emit','enclosing','encode','encoder',
295 'encoding','end','ends-with','enum_from_value','enum_value_list',
296 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE',
297 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY',
298 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage',
299 'expmod','extension','f','fail','fails-like','fc','feature','file',
300 'filename','find_method','find_method_qualified','finish','first','flat',
301 'flatmap','flip','floor','flunk','flush','fmt','format','formatter',
302 'freeze','from','from-list','from-loop','from-posix','full',
303 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs',
304 'grep','handle','handled','handles','hardware','has_accessor','Hash',
305 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id',
306 'illegal','im','in','indent','index','indices','indir','infinite',
307 'infix','infix:<+>','infix:<->','install_method_cache','Instant',
308 'instead','Int','int-bounds','interval','in-timezone','invalid-str',
309 'invert','invocant','IO','IO::Notification.watch-path','is_trusted',
310 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply',
311 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year',
312 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting',
313 'is-win','item','iterator','join','keep','kept','KERNELnames','key',
314 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later',
315 'lazy','lc','leading','level','like','line','lines','link','List',
316 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb',
317 'made','MAIN','make','Map','match','max','maxpairs','merge','message',
318 'method','method_table','methods','migrate','min','minmax','minpairs',
319 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month',
320 'move','mro','msb','multi','multiness','my','name','named','named_names',
321 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type',
322 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle',
323 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out',
324 'nodemap','nok','none','norm','not','note','now','nude','Num',
325 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes',
326 'ok','old','on-close','one','on-switch','open','opened','operation',
327 'optional','ord','ords','orig','os-error','osname','out-buffer','pack',
328 'package','package-kind','package-name','packages','pair','pairs',
329 'pairup','parameter','params','parent','parent-name','parents','parse',
330 'parse-base','parsefile','parse-names','parts','pass','path','path-sep',
331 'payload','peer-host','peer-port','periods','perl','permutations','phaser',
332 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll',
333 'polymod','pop','pos','positional','posix','postfix','postmatch',
334 'precomp-ext','precomp-target','pred','prefix','prematch','prepend',
335 'print','printf','print-nl','print-to','private','private_method_table',
336 'proc','produce','Promise','prompt','protect','pull-one','push',
337 'push-all','push-at-least','push-exactly','push-until-lazy','put',
338 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw',
339 're','read','readchars','readonly','ready','Real','reallocate','reals',
340 'reason','rebless','receive','recv','redispatcher','redo','reduce',
341 'rel2abs','relative','release','rename','repeated','replacement',
342 'report','reserved','resolve','restore','result','resume','rethrow',
343 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish',
344 'roll','rootdir','roots','rotate','rotor','round','roundrobin',
345 'routine-type','run','rwx','s','samecase','samemark','samewith','say',
346 'schedule-on','scheduler','scope','sec','sech','second','seek','self',
347 'send','Set','set_hidden','set_name','set_package','set_rw','set_value',
348 'SetHash','set-instruments','setup_finalization','shape','share','shell',
349 'shift','sibling','sigil','sign','signal','signals','signature','sin',
350 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one',
351 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp',
352 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port',
353 'sort','source','source-package','spawn','SPEC','splice','split',
354 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable',
355 'start','started','starts-with','status','stderr','stdout','Str',
356 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst',
357 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum',
358 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap',
359 'target','target-name','tc','tclc','tell','then','throttle','throw',
360 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix',
361 'total','trailing','trans','tree','trim','trim-leading','trim-trailing',
362 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type',
363 'type_captures','typename','uc','udp','uncaught_handler','unimatch',
364 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival',
365 'univals','unlike','unlink','unlock','unpack','unpolar','unshift',
366 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR',
367 'variable','verbose-config','version','VMnames','volume','vow','w','wait',
368 'warn','watch','watch-path','week','weekday-of-month','week-number',
369 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO',
370 'whole-second','WHY','wordcase','words','workaround','wrap','write',
371 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest',
372
373 )
374
375 PERL6_BUILTIN_CLASSES = (
376 #Booleans
377 'False','True',
378 #Classes
379 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace',
380 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf',
381 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code',
382 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler',
383 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding',
384 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant',
385 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles',
386 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path',
387 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32',
388 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec',
389 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32',
390 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List',
391 'Lock','Lock::Async','long','longlong','Macro','Map','Match',
392 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW',
393 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer',
394 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance',
395 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer',
396 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash',
397 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64',
398 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block',
399 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator',
400 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading',
401 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc',
402 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat',
403 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler',
404 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip',
405 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier',
406 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry',
407 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage',
408 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler',
409 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable',
410 'Version','VM','Whatever','WhateverCode','WrapHandle'
411 )
412
413 PERL6_OPERATORS = (
414 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
415 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
416 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
417 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
418 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
419 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
420 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
421 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
422 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
423 'not', '<==', '==>', '<<==', '==>>','unicmp',
424 )
425
426 # Perl 6 has a *lot* of possible bracketing characters
427 # this list was lifted from STD.pm6 (https://github.com/perl6/std)
428 PERL6_BRACKETS = {
429 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
430 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
431 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
432 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
433 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
434 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
435 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
436 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
437 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
438 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
439 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
440 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
441 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
442 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
443 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
444 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
445 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
446 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
447 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
448 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
449 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
450 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
451 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
452 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
453 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
454 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
455 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
456 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
457 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
458 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
459 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
460 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
461 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
462 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
463 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
464 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
465 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
466 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
467 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
468 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
469 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
470 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
471 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
472 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
473 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
474 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
475 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
476 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
477 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
478 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
479 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
480 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
481 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
482 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
483 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
484 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
485 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
486 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
487 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
488 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
489 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
490 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
491 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
492 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
493 }
494
495 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
496 if boundary_regex_fragment is None:
497 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
498 suffix + r')\b'
499 else:
500 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
501 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
502 boundary_regex_fragment + r')'
503
504 def brackets_callback(token_class):
505 def callback(lexer, match, context):
506 groups = match.groupdict()
507 opening_chars = groups['delimiter']
508 n_chars = len(opening_chars)
509 adverbs = groups.get('adverbs')
510
511 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
512 text = context.text
513
514 if closer is None: # it's not a mirrored character, which means we
515 # just need to look for the next occurrence
516
517 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
518 else: # we need to look for the corresponding closing character,
519 # keep nesting in mind
520 closing_chars = closer * n_chars
521 nesting_level = 1
522
523 search_pos = match.start('delimiter')
524
525 while nesting_level > 0:
526 next_open_pos = text.find(opening_chars, search_pos + n_chars)
527 next_close_pos = text.find(closing_chars, search_pos + n_chars)
528
529 if next_close_pos == -1:
530 next_close_pos = len(text)
531 nesting_level = 0
532 elif next_open_pos != -1 and next_open_pos < next_close_pos:
533 nesting_level += 1
534 search_pos = next_open_pos
535 else: # next_close_pos < next_open_pos
536 nesting_level -= 1
537 search_pos = next_close_pos
538
539 end_pos = next_close_pos
540
541 if end_pos < 0: # if we didn't find a closer, just highlight the
542 # rest of the text in this class
543 end_pos = len(text)
544
545 if adverbs is not None and re.search(r':to\b', adverbs):
546 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
547 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
548 r'\s*$', text[end_pos:], re.MULTILINE)
549
550 if end_heredoc:
551 end_pos += end_heredoc.end()
552 else:
553 end_pos = len(text)
554
555 yield match.start(), token_class, text[match.start():end_pos + n_chars]
556 context.pos = end_pos + n_chars
557
558 return callback
559
560 def opening_brace_callback(lexer, match, context):
561 stack = context.stack
562
563 yield match.start(), Text, context.text[match.start():match.end()]
564 context.pos = match.end()
565
566 # if we encounter an opening brace and we're one level
567 # below a token state, it means we need to increment
568 # the nesting level for braces so we know later when
569 # we should return to the token rules.
570 if len(stack) > 2 and stack[-2] == 'token':
571 context.perl6_token_nesting_level += 1
572
573 def closing_brace_callback(lexer, match, context):
574 stack = context.stack
575
576 yield match.start(), Text, context.text[match.start():match.end()]
577 context.pos = match.end()
578
579 # if we encounter a free closing brace and we're one level
580 # below a token state, it means we need to check the nesting
581 # level to see if we need to return to the token state.
582 if len(stack) > 2 and stack[-2] == 'token':
583 context.perl6_token_nesting_level -= 1
584 if context.perl6_token_nesting_level == 0:
585 stack.pop()
586
587 def embedded_perl6_callback(lexer, match, context):
588 context.perl6_token_nesting_level = 1
589 yield match.start(), Text, context.text[match.start():match.end()]
590 context.pos = match.end()
591 context.stack.append('root')
592
593 # If you're modifying these rules, be careful if you need to process '{' or '}'
594 # characters. We have special logic for processing these characters (due to the fact
595 # that you can nest Perl 6 code in regex blocks), so if you need to process one of
596 # them, make sure you also process the corresponding one!
597 tokens = {
598 'common': [
599 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
600 brackets_callback(Comment.Multiline)),
601 (r'#[^\n]*$', Comment.Single),
602 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
603 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
604 (r'^=.*?\n\s*?\n', Comment.Multiline),
605 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
606 bygroups(Keyword, Name), 'token-sym-brackets'),
607 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
608 bygroups(Keyword, Name), 'pre-token'),
609 # deal with a special case in the Perl 6 grammar (role q { ... })
610 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Whitespace, Name, Whitespace)),
611 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
612 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
613 Name.Builtin),
614 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
615 # copied from PerlLexer
616 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
617 Name.Variable),
618 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
619 (r'::\?\w+', Name.Variable.Global),
620 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
621 Name.Variable.Global),
622 (r'\$(?:<.*?>)+', Name.Variable),
623 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
624 r'(?P=first_char)*)', brackets_callback(String)),
625 # copied from PerlLexer
626 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
627 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
628 (r'0b[01]+(_[01]+)*', Number.Bin),
629 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
630 Number.Float),
631 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
632 (r'\d+(_\d+)*', Number.Integer),
633 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
634 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
635 (r'm\w+(?=\()', Name),
636 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
637 r'(?P=first_char)*)', brackets_callback(String.Regex)),
638 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
639 String.Regex),
640 (r'<[^\s=].*?\S>', String),
641 (_build_word_match(PERL6_OPERATORS), Operator),
642 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
643 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
644 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
645 ],
646 'root': [
647 include('common'),
648 (r'\{', opening_brace_callback),
649 (r'\}', closing_brace_callback),
650 (r'.+?', Text),
651 ],
652 'pre-token': [
653 include('common'),
654 (r'\{', Text, ('#pop', 'token')),
655 (r'.+?', Text),
656 ],
657 'token-sym-brackets': [
658 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
659 brackets_callback(Name), ('#pop', 'pre-token')),
660 default(('#pop', 'pre-token')),
661 ],
662 'token': [
663 (r'\}', Text, '#pop'),
664 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
665 # make sure that quotes in character classes aren't treated as strings
666 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
667 # make sure that '#' characters in quotes aren't treated as comments
668 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
669 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
670 (r'#.*?$', Comment.Single),
671 (r'\{', embedded_perl6_callback),
672 ('.+?', String.Regex),
673 ],
674 }
675
676 def analyse_text(text):
677 def strip_pod(lines):
678 in_pod = False
679 stripped_lines = []
680
681 for line in lines:
682 if re.match(r'^=(?:end|cut)', line):
683 in_pod = False
684 elif re.match(r'^=\w+', line):
685 in_pod = True
686 elif not in_pod:
687 stripped_lines.append(line)
688
689 return stripped_lines
690
691 # XXX handle block comments
692 lines = text.splitlines()
693 lines = strip_pod(lines)
694 text = '\n'.join(lines)
695
696 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
697 return True
698
699 saw_perl_decl = False
700 rating = False
701
702 # check for my/our/has declarations
703 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
704 r"+\s+)?[$@%&(]", text):
705 rating = 0.8
706 saw_perl_decl = True
707
708 for line in lines:
709 line = re.sub('#.*', '', line)
710 if re.match(r'^\s*$', line):
711 continue
712
713 # match v6; use v6; use v6.0; use v6.0.0;
714 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
715 return True
716 # match class, module, role, enum, grammar declarations
717 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
718 if class_decl:
719 if saw_perl_decl or class_decl.group('scope') is not None:
720 return True
721 rating = 0.05
722 continue
723 break
724
725 if ':=' in text:
726 # Same logic as above for PerlLexer
727 rating /= 2
728
729 return rating
730
731 def __init__(self, **options):
732 super().__init__(**options)
733 self.encoding = options.get('encoding', 'utf-8')