Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/perl.py: 31%

136 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.perl 

3 ~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Perl, Raku and related languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ 

14 using, this, default, words 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Whitespace 

17from pygments.util import shebang_matches 

18 

19__all__ = ['PerlLexer', 'Perl6Lexer'] 

20 

21 

22class PerlLexer(RegexLexer): 

23 """ 

24 For Perl source code. 

25 """ 

26 

27 name = 'Perl' 

28 url = 'https://www.perl.org' 

29 aliases = ['perl', 'pl'] 

30 filenames = ['*.pl', '*.pm', '*.t', '*.perl'] 

31 mimetypes = ['text/x-perl', 'application/x-perl'] 

32 

33 flags = re.DOTALL | re.MULTILINE 

34 # TODO: give this to a perl guy who knows how to parse perl... 

35 tokens = { 

36 'balanced-regex': [ 

37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'), 

38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'), 

39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), 

40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'), 

41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'), 

42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'), 

43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'), 

44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'), 

45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'), 

46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'), 

47 ], 

48 'root': [ 

49 (r'\A\#!.+?$', Comment.Hashbang), 

50 (r'\#.*?$', Comment.Single), 

51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), 

52 (words(( 

53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach', 

54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then', 

55 'unless', 'until', 'while', 'print', 'new', 'BEGIN', 

56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'), 

57 Keyword), 

58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)', 

59 bygroups(Keyword, Whitespace, Name, Whitespace, Punctuation, Whitespace), 'format'), 

60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), 

61 # common delimiters 

62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', 

63 String.Regex), 

64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex), 

65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex), 

66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', 

67 String.Regex), 

68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', 

69 String.Regex), 

70 # balanced delimiters 

71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'), 

72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'), 

73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex, 

74 'balanced-regex'), 

75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex, 

76 'balanced-regex'), 

77 

78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex), 

79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'), 

80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*', 

81 String.Regex), 

82 (r'\s+', Whitespace), 

83 (words(( 

84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 

85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect', 

86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 

87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent', 

88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl', 

89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid', 

90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 

91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 

92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber', 

93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname', 

94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 

95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last', 

96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat', 

97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open', 

98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf', 

99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir', 

100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename', 

101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir', 

102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 

103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent', 

104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown', 

105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt', 

106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread', 

107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr', 

108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 

109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'), 

110 Name.Builtin), 

111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), 

112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)', 

113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Whitespace)), 

114 (r'__END__', Comment.Preproc, 'end-part'), 

115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), 

116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), 

117 (r'[$@%#]+', Name.Variable, 'varname'), 

118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), 

119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), 

120 (r'0b[01]+(_[01]+)*', Number.Bin), 

121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', 

122 Number.Float), 

123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), 

124 (r'\d+(_\d+)*', Number.Integer), 

125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String), 

126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String), 

127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick), 

128 (r'<([^\s>]+)>', String.Regex), 

129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'), 

130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'), 

131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'), 

132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'), 

133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other), 

134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 

135 bygroups(Keyword, Whitespace, Name.Namespace)), 

136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 

137 bygroups(Keyword, Whitespace, Name.Namespace)), 

138 (r'(sub)(\s+)', bygroups(Keyword, Whitespace), 'funcname'), 

139 (words(( 

140 'no', 'package', 'require', 'use'), suffix=r'\b'), 

141 Keyword), 

142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|' 

143 r'!~|&&?|\|\||\.{1,3})', Operator), 

144 (r'[-+/*%=<>&^|!\\~]=?', Operator), 

145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage 

146 # of punctuation in Perl! 

147 (r'(?=\w)', Name, 'name'), 

148 ], 

149 'format': [ 

150 (r'\.\n', String.Interpol, '#pop'), 

151 (r'[^\n]*\n', String.Interpol), 

152 ], 

153 'varname': [ 

154 (r'\s+', Whitespace), 

155 (r'\{', Punctuation, '#pop'), # hash syntax? 

156 (r'\)|,', Punctuation, '#pop'), # argument specifier 

157 (r'\w+::', Name.Namespace), 

158 (r'[\w:]+', Name.Variable, '#pop'), 

159 ], 

160 'name': [ 

161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'), 

162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'), 

163 (r'[\w:]+', Name, '#pop'), 

164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'), 

165 (r'(?=\W)', Text, '#pop'), 

166 ], 

167 'funcname': [ 

168 (r'[a-zA-Z_]\w*[!?]?', Name.Function), 

169 (r'\s+', Whitespace), 

170 # argument declaration 

171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Whitespace)), 

172 (r';', Punctuation, '#pop'), 

173 (r'.*?\{', Punctuation, '#pop'), 

174 ], 

175 'cb-string': [ 

176 (r'\\[{}\\]', String.Other), 

177 (r'\\', String.Other), 

178 (r'\{', String.Other, 'cb-string'), 

179 (r'\}', String.Other, '#pop'), 

180 (r'[^{}\\]+', String.Other) 

181 ], 

182 'rb-string': [ 

183 (r'\\[()\\]', String.Other), 

184 (r'\\', String.Other), 

185 (r'\(', String.Other, 'rb-string'), 

186 (r'\)', String.Other, '#pop'), 

187 (r'[^()]+', String.Other) 

188 ], 

189 'sb-string': [ 

190 (r'\\[\[\]\\]', String.Other), 

191 (r'\\', String.Other), 

192 (r'\[', String.Other, 'sb-string'), 

193 (r'\]', String.Other, '#pop'), 

194 (r'[^\[\]]+', String.Other) 

195 ], 

196 'lt-string': [ 

197 (r'\\[<>\\]', String.Other), 

198 (r'\\', String.Other), 

199 (r'\<', String.Other, 'lt-string'), 

200 (r'\>', String.Other, '#pop'), 

201 (r'[^<>]+', String.Other) 

202 ], 

203 'end-part': [ 

204 (r'.+', Comment.Preproc, '#pop') 

205 ] 

206 } 

207 

208 def analyse_text(text): 

209 if shebang_matches(text, r'perl'): 

210 return True 

211 

212 result = 0 

213 

214 if re.search(r'(?:my|our)\s+[$@%(]', text): 

215 result += 0.9 

216 

217 if ':=' in text: 

218 # := is not valid Perl, but it appears in unicon, so we should 

219 # become less confident if we think we found Perl with := 

220 result /= 2 

221 

222 return result 

223 

224 

225class Perl6Lexer(ExtendedRegexLexer): 

226 """ 

227 For Raku (a.k.a. Perl 6) source code. 

228 

229 .. versionadded:: 2.0 

230 """ 

231 

232 name = 'Perl6' 

233 url = 'https://www.raku.org' 

234 aliases = ['perl6', 'pl6', 'raku'] 

235 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', 

236 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod', 

237 '*.rakutest', '*.rakudoc'] 

238 mimetypes = ['text/x-perl6', 'application/x-perl6'] 

239 flags = re.MULTILINE | re.DOTALL 

240 

241 PERL6_IDENTIFIER_RANGE = r"['\w:-]" 

242 

243 PERL6_KEYWORDS = ( 

244 #Phasers 

245 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST', 

246 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO', 

247 #Keywords 

248 'anon','augment','but','class','constant','default','does','else', 

249 'elsif','enum','for','gather','given','grammar','has','if','import', 

250 'is','let','loop','made','make','method','module','multi','my','need', 

251 'orwith','our','proceed','proto','repeat','require','return', 

252 'return-rw','returns','role','rule','state','sub','submethod','subset', 

253 'succeed','supersede','token','try','unit','unless','until','use', 

254 'when','while','with','without', 

255 #Traits 

256 'export','native','repr','required','rw','symbol', 

257 ) 

258 

259 PERL6_BUILTINS = ( 

260 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos', 

261 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action', 

262 'actions','add','add_attribute','add_enum_value','add_fallback', 

263 'add_method','add_parent','add_private_method','add_role','add_trustee', 

264 'adverb','after','all','allocate','allof','allowed','alternative-names', 

265 'annotations','antipair','antipairs','any','anyof','app_lifetime', 

266 'append','arch','archname','args','arity','Array','asec','asech','asin', 

267 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2', 

268 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch', 

269 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc', 

270 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth', 

271 'await','backtrace','Bag','BagHash','bail-out','base','basename', 

272 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr', 

273 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool', 

274 'bool-only','bounds','break','Bridge','broken','BUILD','build-date', 

275 'bytes','cache','callframe','calling-package','CALL-ME','callsame', 

276 'callwith','can','cancel','candidates','cando','can-ok','canonpath', 

277 'caps','caption','Capture','cas','catdir','categorize','categorize-list', 

278 'catfile','catpath','cause','ceiling','cglobal','changed','Channel', 

279 'chars','chdir','child','child-name','child-typename','chmod','chomp', 

280 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup', 

281 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate', 

282 'column','comb','combinations','command','comment','compiler','Complex', 

283 'compose','compose_type','composer','condition','config', 

284 'configure_destroy','configure_type_checking','conj','connect', 

285 'constraints','construct','contains','contents','copy','cos','cosec', 

286 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores', 

287 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d', 

288 'Date','DateTime','day','daycount','day-of-month','day-of-week', 

289 'day-of-year','days-in-month','declaration','decode','decoder','deepmap', 

290 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS', 

291 'denominator','desc','DESTROY','destroyers','devnull','diag', 

292 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames', 

293 'do','does','does-ok','done','done-testing','duckmap','dynamic','e', 

294 'eager','earlier','elems','emit','enclosing','encode','encoder', 

295 'encoding','end','ends-with','enum_from_value','enum_value_list', 

296 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE', 

297 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY', 

298 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage', 

299 'expmod','extension','f','fail','fails-like','fc','feature','file', 

300 'filename','find_method','find_method_qualified','finish','first','flat', 

301 'flatmap','flip','floor','flunk','flush','fmt','format','formatter', 

302 'freeze','from','from-list','from-loop','from-posix','full', 

303 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs', 

304 'grep','handle','handled','handles','hardware','has_accessor','Hash', 

305 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id', 

306 'illegal','im','in','indent','index','indices','indir','infinite', 

307 'infix','infix:<+>','infix:<->','install_method_cache','Instant', 

308 'instead','Int','int-bounds','interval','in-timezone','invalid-str', 

309 'invert','invocant','IO','IO::Notification.watch-path','is_trusted', 

310 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply', 

311 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year', 

312 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting', 

313 'is-win','item','iterator','join','keep','kept','KERNELnames','key', 

314 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later', 

315 'lazy','lc','leading','level','like','line','lines','link','List', 

316 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb', 

317 'made','MAIN','make','Map','match','max','maxpairs','merge','message', 

318 'method','method_table','methods','migrate','min','minmax','minpairs', 

319 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month', 

320 'move','mro','msb','multi','multiness','my','name','named','named_names', 

321 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type', 

322 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle', 

323 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out', 

324 'nodemap','nok','none','norm','not','note','now','nude','Num', 

325 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes', 

326 'ok','old','on-close','one','on-switch','open','opened','operation', 

327 'optional','ord','ords','orig','os-error','osname','out-buffer','pack', 

328 'package','package-kind','package-name','packages','pair','pairs', 

329 'pairup','parameter','params','parent','parent-name','parents','parse', 

330 'parse-base','parsefile','parse-names','parts','pass','path','path-sep', 

331 'payload','peer-host','peer-port','periods','perl','permutations','phaser', 

332 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll', 

333 'polymod','pop','pos','positional','posix','postfix','postmatch', 

334 'precomp-ext','precomp-target','pred','prefix','prematch','prepend', 

335 'print','printf','print-nl','print-to','private','private_method_table', 

336 'proc','produce','Promise','prompt','protect','pull-one','push', 

337 'push-all','push-at-least','push-exactly','push-until-lazy','put', 

338 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw', 

339 're','read','readchars','readonly','ready','Real','reallocate','reals', 

340 'reason','rebless','receive','recv','redispatcher','redo','reduce', 

341 'rel2abs','relative','release','rename','repeated','replacement', 

342 'report','reserved','resolve','restore','result','resume','rethrow', 

343 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish', 

344 'roll','rootdir','roots','rotate','rotor','round','roundrobin', 

345 'routine-type','run','rwx','s','samecase','samemark','samewith','say', 

346 'schedule-on','scheduler','scope','sec','sech','second','seek','self', 

347 'send','Set','set_hidden','set_name','set_package','set_rw','set_value', 

348 'SetHash','set-instruments','setup_finalization','shape','share','shell', 

349 'shift','sibling','sigil','sign','signal','signals','signature','sin', 

350 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one', 

351 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp', 

352 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port', 

353 'sort','source','source-package','spawn','SPEC','splice','split', 

354 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable', 

355 'start','started','starts-with','status','stderr','stdout','Str', 

356 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst', 

357 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum', 

358 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap', 

359 'target','target-name','tc','tclc','tell','then','throttle','throw', 

360 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix', 

361 'total','trailing','trans','tree','trim','trim-leading','trim-trailing', 

362 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type', 

363 'type_captures','typename','uc','udp','uncaught_handler','unimatch', 

364 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival', 

365 'univals','unlike','unlink','unlock','unpack','unpolar','unshift', 

366 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR', 

367 'variable','verbose-config','version','VMnames','volume','vow','w','wait', 

368 'warn','watch','watch-path','week','weekday-of-month','week-number', 

369 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO', 

370 'whole-second','WHY','wordcase','words','workaround','wrap','write', 

371 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest', 

372 

373 ) 

374 

375 PERL6_BUILTIN_CLASSES = ( 

376 #Booleans 

377 'False','True', 

378 #Classes 

379 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace', 

380 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf', 

381 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code', 

382 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler', 

383 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding', 

384 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant', 

385 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles', 

386 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path', 

387 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32', 

388 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec', 

389 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32', 

390 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List', 

391 'Lock','Lock::Async','long','longlong','Macro','Map','Match', 

392 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW', 

393 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer', 

394 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance', 

395 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer', 

396 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash', 

397 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64', 

398 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block', 

399 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator', 

400 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading', 

401 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc', 

402 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat', 

403 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler', 

404 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip', 

405 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier', 

406 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry', 

407 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage', 

408 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler', 

409 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable', 

410 'Version','VM','Whatever','WhateverCode','WrapHandle' 

411 ) 

412 

413 PERL6_OPERATORS = ( 

414 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', 

415 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', 

416 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', 

417 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', 

418 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', 

419 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', 

420 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', 

421 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', 

422 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', 

423 'not', '<==', '==>', '<<==', '==>>','unicmp', 

424 ) 

425 

426 # Perl 6 has a *lot* of possible bracketing characters 

427 # this list was lifted from STD.pm6 (https://github.com/perl6/std) 

428 PERL6_BRACKETS = { 

429 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d', 

430 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b', 

431 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019', 

432 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d', 

433 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a', 

434 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e', 

435 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d', 

436 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd', 

437 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265', 

438 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b', 

439 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273', 

440 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279', 

441 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f', 

442 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285', 

443 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b', 

444 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8', 

445 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4', 

446 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1', 

447 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7', 

448 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1', 

449 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db', 

450 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1', 

451 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7', 

452 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed', 

453 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb', 

454 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe', 

455 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a', 

456 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b', 

457 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771', 

458 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4', 

459 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de', 

460 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7', 

461 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984', 

462 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a', 

463 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990', 

464 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996', 

465 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5', 

466 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5', 

467 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9', 

468 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e', 

469 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65', 

470 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80', 

471 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c', 

472 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96', 

473 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c', 

474 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9', 

475 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0', 

476 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe', 

477 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4', 

478 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0', 

479 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6', 

480 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa', 

481 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a', 

482 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21', 

483 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d', 

484 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015', 

485 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b', 

486 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18', 

487 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a', 

488 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40', 

489 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48', 

490 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e', 

491 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d', 

492 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63', 

493 } 

494 

495 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''): 

496 if boundary_regex_fragment is None: 

497 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \ 

498 suffix + r')\b' 

499 else: 

500 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ 

501 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \ 

502 boundary_regex_fragment + r')' 

503 

504 def brackets_callback(token_class): 

505 def callback(lexer, match, context): 

506 groups = match.groupdict() 

507 opening_chars = groups['delimiter'] 

508 n_chars = len(opening_chars) 

509 adverbs = groups.get('adverbs') 

510 

511 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) 

512 text = context.text 

513 

514 if closer is None: # it's not a mirrored character, which means we 

515 # just need to look for the next occurrence 

516 

517 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) 

518 else: # we need to look for the corresponding closing character, 

519 # keep nesting in mind 

520 closing_chars = closer * n_chars 

521 nesting_level = 1 

522 

523 search_pos = match.start('delimiter') 

524 

525 while nesting_level > 0: 

526 next_open_pos = text.find(opening_chars, search_pos + n_chars) 

527 next_close_pos = text.find(closing_chars, search_pos + n_chars) 

528 

529 if next_close_pos == -1: 

530 next_close_pos = len(text) 

531 nesting_level = 0 

532 elif next_open_pos != -1 and next_open_pos < next_close_pos: 

533 nesting_level += 1 

534 search_pos = next_open_pos 

535 else: # next_close_pos < next_open_pos 

536 nesting_level -= 1 

537 search_pos = next_close_pos 

538 

539 end_pos = next_close_pos 

540 

541 if end_pos < 0: # if we didn't find a closer, just highlight the 

542 # rest of the text in this class 

543 end_pos = len(text) 

544 

545 if adverbs is not None and re.search(r':to\b', adverbs): 

546 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos] 

547 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + 

548 r'\s*$', text[end_pos:], re.MULTILINE) 

549 

550 if end_heredoc: 

551 end_pos += end_heredoc.end() 

552 else: 

553 end_pos = len(text) 

554 

555 yield match.start(), token_class, text[match.start():end_pos + n_chars] 

556 context.pos = end_pos + n_chars 

557 

558 return callback 

559 

560 def opening_brace_callback(lexer, match, context): 

561 stack = context.stack 

562 

563 yield match.start(), Text, context.text[match.start():match.end()] 

564 context.pos = match.end() 

565 

566 # if we encounter an opening brace and we're one level 

567 # below a token state, it means we need to increment 

568 # the nesting level for braces so we know later when 

569 # we should return to the token rules. 

570 if len(stack) > 2 and stack[-2] == 'token': 

571 context.perl6_token_nesting_level += 1 

572 

573 def closing_brace_callback(lexer, match, context): 

574 stack = context.stack 

575 

576 yield match.start(), Text, context.text[match.start():match.end()] 

577 context.pos = match.end() 

578 

579 # if we encounter a free closing brace and we're one level 

580 # below a token state, it means we need to check the nesting 

581 # level to see if we need to return to the token state. 

582 if len(stack) > 2 and stack[-2] == 'token': 

583 context.perl6_token_nesting_level -= 1 

584 if context.perl6_token_nesting_level == 0: 

585 stack.pop() 

586 

587 def embedded_perl6_callback(lexer, match, context): 

588 context.perl6_token_nesting_level = 1 

589 yield match.start(), Text, context.text[match.start():match.end()] 

590 context.pos = match.end() 

591 context.stack.append('root') 

592 

593 # If you're modifying these rules, be careful if you need to process '{' or '}' 

594 # characters. We have special logic for processing these characters (due to the fact 

595 # that you can nest Perl 6 code in regex blocks), so if you need to process one of 

596 # them, make sure you also process the corresponding one! 

597 tokens = { 

598 'common': [ 

599 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', 

600 brackets_callback(Comment.Multiline)), 

601 (r'#[^\n]*$', Comment.Single), 

602 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), 

603 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), 

604 (r'^=.*?\n\s*?\n', Comment.Multiline), 

605 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', 

606 bygroups(Keyword, Name), 'token-sym-brackets'), 

607 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', 

608 bygroups(Keyword, Name), 'pre-token'), 

609 # deal with a special case in the Perl 6 grammar (role q { ... }) 

610 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Whitespace, Name, Whitespace)), 

611 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), 

612 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'), 

613 Name.Builtin), 

614 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), 

615 # copied from PerlLexer 

616 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', 

617 Name.Variable), 

618 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), 

619 (r'::\?\w+', Name.Variable.Global), 

620 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', 

621 Name.Variable.Global), 

622 (r'\$(?:<.*?>)+', Name.Variable), 

623 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])' 

624 r'(?P=first_char)*)', brackets_callback(String)), 

625 # copied from PerlLexer 

626 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), 

627 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), 

628 (r'0b[01]+(_[01]+)*', Number.Bin), 

629 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', 

630 Number.Float), 

631 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), 

632 (r'\d+(_\d+)*', Number.Integer), 

633 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), 

634 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), 

635 (r'm\w+(?=\()', Name), 

636 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])' 

637 r'(?P=first_char)*)', brackets_callback(String.Regex)), 

638 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', 

639 String.Regex), 

640 (r'<[^\s=].*?\S>', String), 

641 (_build_word_match(PERL6_OPERATORS), Operator), 

642 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name), 

643 (r"'(\\\\|\\[^\\]|[^'\\])*'", String), 

644 (r'"(\\\\|\\[^\\]|[^"\\])*"', String), 

645 ], 

646 'root': [ 

647 include('common'), 

648 (r'\{', opening_brace_callback), 

649 (r'\}', closing_brace_callback), 

650 (r'.+?', Text), 

651 ], 

652 'pre-token': [ 

653 include('common'), 

654 (r'\{', Text, ('#pop', 'token')), 

655 (r'.+?', Text), 

656 ], 

657 'token-sym-brackets': [ 

658 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', 

659 brackets_callback(Name), ('#pop', 'pre-token')), 

660 default(('#pop', 'pre-token')), 

661 ], 

662 'token': [ 

663 (r'\}', Text, '#pop'), 

664 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), 

665 # make sure that quotes in character classes aren't treated as strings 

666 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), 

667 # make sure that '#' characters in quotes aren't treated as comments 

668 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), 

669 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), 

670 (r'#.*?$', Comment.Single), 

671 (r'\{', embedded_perl6_callback), 

672 ('.+?', String.Regex), 

673 ], 

674 } 

675 

676 def analyse_text(text): 

677 def strip_pod(lines): 

678 in_pod = False 

679 stripped_lines = [] 

680 

681 for line in lines: 

682 if re.match(r'^=(?:end|cut)', line): 

683 in_pod = False 

684 elif re.match(r'^=\w+', line): 

685 in_pod = True 

686 elif not in_pod: 

687 stripped_lines.append(line) 

688 

689 return stripped_lines 

690 

691 # XXX handle block comments 

692 lines = text.splitlines() 

693 lines = strip_pod(lines) 

694 text = '\n'.join(lines) 

695 

696 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'): 

697 return True 

698 

699 saw_perl_decl = False 

700 rating = False 

701 

702 # check for my/our/has declarations 

703 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + 

704 r"+\s+)?[$@%&(]", text): 

705 rating = 0.8 

706 saw_perl_decl = True 

707 

708 for line in lines: 

709 line = re.sub('#.*', '', line) 

710 if re.match(r'^\s*$', line): 

711 continue 

712 

713 # match v6; use v6; use v6.0; use v6.0.0; 

714 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line): 

715 return True 

716 # match class, module, role, enum, grammar declarations 

717 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line) 

718 if class_decl: 

719 if saw_perl_decl or class_decl.group('scope') is not None: 

720 return True 

721 rating = 0.05 

722 continue 

723 break 

724 

725 if ':=' in text: 

726 # Same logic as above for PerlLexer 

727 rating /= 2 

728 

729 return rating 

730 

731 def __init__(self, **options): 

732 super().__init__(**options) 

733 self.encoding = options.get('encoding', 'utf-8')