Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/erlang.py: 63%

158 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.erlang 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Erlang. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ 

14 include, default, line_re 

15from pygments.token import Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Generic, Whitespace 

17 

18__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', 

19 'ElixirLexer'] 

20 

21 

22class ErlangLexer(RegexLexer): 

23 """ 

24 For the Erlang functional programming language. 

25 

26 .. versionadded:: 0.9 

27 """ 

28 

29 name = 'Erlang' 

30 url = 'https://www.erlang.org/' 

31 aliases = ['erlang'] 

32 filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] 

33 mimetypes = ['text/x-erlang'] 

34 

35 keywords = ( 

36 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 

37 'let', 'of', 'query', 'receive', 'try', 'when', 

38 ) 

39 

40 builtins = ( # See erlang(3) man page 

41 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', 

42 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', 

43 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', 

44 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', 

45 'float', 'float_to_list', 'fun_info', 'fun_to_list', 

46 'function_exported', 'garbage_collect', 'get', 'get_keys', 

47 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', 

48 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 

49 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', 

50 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', 

51 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', 

52 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', 

53 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', 

54 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', 

55 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', 

56 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', 

57 'pid_to_list', 'port_close', 'port_command', 'port_connect', 

58 'port_control', 'port_call', 'port_info', 'port_to_list', 

59 'process_display', 'process_flag', 'process_info', 'purge_module', 

60 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', 

61 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', 

62 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', 

63 'spawn_opt', 'split_binary', 'start_timer', 'statistics', 

64 'suspend_process', 'system_flag', 'system_info', 'system_monitor', 

65 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', 

66 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', 

67 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' 

68 ) 

69 

70 operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' 

71 word_operators = ( 

72 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 

73 'div', 'not', 'or', 'orelse', 'rem', 'xor' 

74 ) 

75 

76 atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" 

77 

78 variable_re = r'(?:[A-Z_]\w*)' 

79 

80 esc_char_re = r'[bdefnrstv\'"\\]' 

81 esc_octal_re = r'[0-7][0-7]?[0-7]?' 

82 esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})' 

83 esc_ctrl_re = r'\^[a-zA-Z]' 

84 escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))' 

85 

86 macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' 

87 

88 base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' 

89 

90 tokens = { 

91 'root': [ 

92 (r'\s+', Whitespace), 

93 (r'(%.*)(\n)', bygroups(Comment, Whitespace)), 

94 (words(keywords, suffix=r'\b'), Keyword), 

95 (words(builtins, suffix=r'\b'), Name.Builtin), 

96 (words(word_operators, suffix=r'\b'), Operator.Word), 

97 (r'^-', Punctuation, 'directive'), 

98 (operators, Operator), 

99 (r'"', String, 'string'), 

100 (r'<<', Name.Label), 

101 (r'>>', Name.Label), 

102 ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), 

103 ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', 

104 bygroups(Name.Function, Whitespace, Punctuation)), 

105 (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), 

106 (r'[+-]?\d+', Number.Integer), 

107 (r'[+-]?\d+.\d+', Number.Float), 

108 (r'[]\[:_@\".{}()|;,]', Punctuation), 

109 (variable_re, Name.Variable), 

110 (atom_re, Name), 

111 (r'\?'+macro_re, Name.Constant), 

112 (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), 

113 (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), 

114 

115 # Erlang script shebang 

116 (r'\A#!.+\n', Comment.Hashbang), 

117 

118 # EEP 43: Maps 

119 # http://www.erlang.org/eeps/eep-0043.html 

120 (r'#\{', Punctuation, 'map_key'), 

121 ], 

122 'string': [ 

123 (escape_re, String.Escape), 

124 (r'"', String, '#pop'), 

125 (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), 

126 (r'[^"\\~]+', String), 

127 (r'~', String), 

128 ], 

129 'directive': [ 

130 (r'(define)(\s*)(\()('+macro_re+r')', 

131 bygroups(Name.Entity, Whitespace, Punctuation, Name.Constant), '#pop'), 

132 (r'(record)(\s*)(\()('+macro_re+r')', 

133 bygroups(Name.Entity, Whitespace, Punctuation, Name.Label), '#pop'), 

134 (atom_re, Name.Entity, '#pop'), 

135 ], 

136 'map_key': [ 

137 include('root'), 

138 (r'=>', Punctuation, 'map_val'), 

139 (r':=', Punctuation, 'map_val'), 

140 (r'\}', Punctuation, '#pop'), 

141 ], 

142 'map_val': [ 

143 include('root'), 

144 (r',', Punctuation, '#pop'), 

145 (r'(?=\})', Punctuation, '#pop'), 

146 ], 

147 } 

148 

149 

150class ErlangShellLexer(Lexer): 

151 """ 

152 Shell sessions in erl (for Erlang code). 

153 

154 .. versionadded:: 1.1 

155 """ 

156 name = 'Erlang erl session' 

157 aliases = ['erl'] 

158 filenames = ['*.erl-sh'] 

159 mimetypes = ['text/x-erl-shellsession'] 

160 

161 _prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)') 

162 

163 def get_tokens_unprocessed(self, text): 

164 erlexer = ErlangLexer(**self.options) 

165 

166 curcode = '' 

167 insertions = [] 

168 for match in line_re.finditer(text): 

169 line = match.group() 

170 m = self._prompt_re.match(line) 

171 if m is not None: 

172 end = m.end() 

173 insertions.append((len(curcode), 

174 [(0, Generic.Prompt, line[:end])])) 

175 curcode += line[end:] 

176 else: 

177 if curcode: 

178 yield from do_insertions(insertions, 

179 erlexer.get_tokens_unprocessed(curcode)) 

180 curcode = '' 

181 insertions = [] 

182 if line.startswith('*'): 

183 yield match.start(), Generic.Traceback, line 

184 else: 

185 yield match.start(), Generic.Output, line 

186 if curcode: 

187 yield from do_insertions(insertions, 

188 erlexer.get_tokens_unprocessed(curcode)) 

189 

190 

191def gen_elixir_string_rules(name, symbol, token): 

192 states = {} 

193 states['string_' + name] = [ 

194 (r'[^#%s\\]+' % (symbol,), token), 

195 include('escapes'), 

196 (r'\\.', token), 

197 (r'(%s)' % (symbol,), bygroups(token), "#pop"), 

198 include('interpol') 

199 ] 

200 return states 

201 

202 

203def gen_elixir_sigstr_rules(term, term_class, token, interpol=True): 

204 if interpol: 

205 return [ 

206 (r'[^#%s\\]+' % (term_class,), token), 

207 include('escapes'), 

208 (r'\\.', token), 

209 (r'%s[a-zA-Z]*' % (term,), token, '#pop'), 

210 include('interpol') 

211 ] 

212 else: 

213 return [ 

214 (r'[^%s\\]+' % (term_class,), token), 

215 (r'\\.', token), 

216 (r'%s[a-zA-Z]*' % (term,), token, '#pop'), 

217 ] 

218 

219 

220class ElixirLexer(RegexLexer): 

221 """ 

222 For the Elixir language. 

223 

224 .. versionadded:: 1.5 

225 """ 

226 

227 name = 'Elixir' 

228 url = 'http://elixir-lang.org' 

229 aliases = ['elixir', 'ex', 'exs'] 

230 filenames = ['*.ex', '*.eex', '*.exs', '*.leex'] 

231 mimetypes = ['text/x-elixir'] 

232 

233 KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') 

234 KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') 

235 BUILTIN = ( 

236 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', 

237 'quote', 'unquote', 'unquote_splicing', 'throw', 'super', 

238 ) 

239 BUILTIN_DECLARATION = ( 

240 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', 

241 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback', 

242 ) 

243 

244 BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') 

245 CONSTANT = ('nil', 'true', 'false') 

246 

247 PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') 

248 

249 OPERATORS3 = ( 

250 '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', 

251 '~>>', '<~>', '|~>', '<|>', 

252 ) 

253 OPERATORS2 = ( 

254 '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', 

255 '->', '<-', '|', '.', '=', '~>', '<~', 

256 ) 

257 OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') 

258 

259 PUNCTUATION = ( 

260 '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']', 

261 ) 

262 

263 def get_tokens_unprocessed(self, text): 

264 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): 

265 if token is Name: 

266 if value in self.KEYWORD: 

267 yield index, Keyword, value 

268 elif value in self.KEYWORD_OPERATOR: 

269 yield index, Operator.Word, value 

270 elif value in self.BUILTIN: 

271 yield index, Keyword, value 

272 elif value in self.BUILTIN_DECLARATION: 

273 yield index, Keyword.Declaration, value 

274 elif value in self.BUILTIN_NAMESPACE: 

275 yield index, Keyword.Namespace, value 

276 elif value in self.CONSTANT: 

277 yield index, Name.Constant, value 

278 elif value in self.PSEUDO_VAR: 

279 yield index, Name.Builtin.Pseudo, value 

280 else: 

281 yield index, token, value 

282 else: 

283 yield index, token, value 

284 

285 def gen_elixir_sigil_rules(): 

286 # all valid sigil terminators (excluding heredocs) 

287 terminators = [ 

288 (r'\{', r'\}', '}', 'cb'), 

289 (r'\[', r'\]', r'\]', 'sb'), 

290 (r'\(', r'\)', ')', 'pa'), 

291 ('<', '>', '>', 'ab'), 

292 ('/', '/', '/', 'slas'), 

293 (r'\|', r'\|', '|', 'pipe'), 

294 ('"', '"', '"', 'quot'), 

295 ("'", "'", "'", 'apos'), 

296 ] 

297 

298 # heredocs have slightly different rules 

299 triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] 

300 

301 token = String.Other 

302 states = {'sigils': []} 

303 

304 for term, name in triquotes: 

305 states['sigils'] += [ 

306 (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), 

307 (name + '-end', name + '-intp')), 

308 (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), 

309 (name + '-end', name + '-no-intp')), 

310 ] 

311 

312 states[name + '-end'] = [ 

313 (r'[a-zA-Z]+', token, '#pop'), 

314 default('#pop'), 

315 ] 

316 states[name + '-intp'] = [ 

317 (r'^(\s*)(' + term + ')', bygroups(Whitespace, String.Heredoc), '#pop'), 

318 include('heredoc_interpol'), 

319 ] 

320 states[name + '-no-intp'] = [ 

321 (r'^(\s*)(' + term +')', bygroups(Whitespace, String.Heredoc), '#pop'), 

322 include('heredoc_no_interpol'), 

323 ] 

324 

325 for lterm, rterm, rterm_class, name in terminators: 

326 states['sigils'] += [ 

327 (r'~[a-z]' + lterm, token, name + '-intp'), 

328 (r'~[A-Z]' + lterm, token, name + '-no-intp'), 

329 ] 

330 states[name + '-intp'] = \ 

331 gen_elixir_sigstr_rules(rterm, rterm_class, token) 

332 states[name + '-no-intp'] = \ 

333 gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False) 

334 

335 return states 

336 

337 op3_re = "|".join(re.escape(s) for s in OPERATORS3) 

338 op2_re = "|".join(re.escape(s) for s in OPERATORS2) 

339 op1_re = "|".join(re.escape(s) for s in OPERATORS1) 

340 ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re) 

341 punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) 

342 alnum = r'\w' 

343 name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum 

344 modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum} 

345 complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re) 

346 special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})' 

347 

348 long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})' 

349 hex_char_re = r'(\\x[\da-fA-F]{1,2})' 

350 escape_char_re = r'(\\[abdefnrstv])' 

351 

352 tokens = { 

353 'root': [ 

354 (r'\s+', Whitespace), 

355 (r'#.*$', Comment.Single), 

356 

357 # Various kinds of characters 

358 (r'(\?)' + long_hex_char_re, 

359 bygroups(String.Char, 

360 String.Escape, Number.Hex, String.Escape)), 

361 (r'(\?)' + hex_char_re, 

362 bygroups(String.Char, String.Escape)), 

363 (r'(\?)' + escape_char_re, 

364 bygroups(String.Char, String.Escape)), 

365 (r'\?\\?.', String.Char), 

366 

367 # '::' has to go before atoms 

368 (r':::', String.Symbol), 

369 (r'::', Operator), 

370 

371 # atoms 

372 (r':' + special_atom_re, String.Symbol), 

373 (r':' + complex_name_re, String.Symbol), 

374 (r':"', String.Symbol, 'string_double_atom'), 

375 (r":'", String.Symbol, 'string_single_atom'), 

376 

377 # [keywords: ...] 

378 (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re), 

379 bygroups(String.Symbol, Punctuation)), 

380 

381 # @attributes 

382 (r'@' + name_re, Name.Attribute), 

383 

384 # identifiers 

385 (name_re, Name), 

386 (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)), 

387 

388 # operators and punctuation 

389 (op3_re, Operator), 

390 (op2_re, Operator), 

391 (punctuation_re, Punctuation), 

392 (r'&\d', Name.Entity), # anon func arguments 

393 (op1_re, Operator), 

394 

395 # numbers 

396 (r'0b[01]+', Number.Bin), 

397 (r'0o[0-7]+', Number.Oct), 

398 (r'0x[\da-fA-F]+', Number.Hex), 

399 (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), 

400 (r'\d(_?\d)*', Number.Integer), 

401 

402 # strings and heredocs 

403 (r'(""")(\s*)', bygroups(String.Heredoc, Whitespace), 

404 'heredoc_double'), 

405 (r"(''')(\s*)$", bygroups(String.Heredoc, Whitespace), 

406 'heredoc_single'), 

407 (r'"', String.Double, 'string_double'), 

408 (r"'", String.Single, 'string_single'), 

409 

410 include('sigils'), 

411 

412 (r'%\{', Punctuation, 'map_key'), 

413 (r'\{', Punctuation, 'tuple'), 

414 ], 

415 'heredoc_double': [ 

416 (r'^(\s*)(""")', bygroups(Whitespace, String.Heredoc), '#pop'), 

417 include('heredoc_interpol'), 

418 ], 

419 'heredoc_single': [ 

420 (r"^\s*'''", String.Heredoc, '#pop'), 

421 include('heredoc_interpol'), 

422 ], 

423 'heredoc_interpol': [ 

424 (r'[^#\\\n]+', String.Heredoc), 

425 include('escapes'), 

426 (r'\\.', String.Heredoc), 

427 (r'\n+', String.Heredoc), 

428 include('interpol'), 

429 ], 

430 'heredoc_no_interpol': [ 

431 (r'[^\\\n]+', String.Heredoc), 

432 (r'\\.', String.Heredoc), 

433 (r'\n+', Whitespace), 

434 ], 

435 'escapes': [ 

436 (long_hex_char_re, 

437 bygroups(String.Escape, Number.Hex, String.Escape)), 

438 (hex_char_re, String.Escape), 

439 (escape_char_re, String.Escape), 

440 ], 

441 'interpol': [ 

442 (r'#\{', String.Interpol, 'interpol_string'), 

443 ], 

444 'interpol_string': [ 

445 (r'\}', String.Interpol, "#pop"), 

446 include('root') 

447 ], 

448 'map_key': [ 

449 include('root'), 

450 (r':', Punctuation, 'map_val'), 

451 (r'=>', Punctuation, 'map_val'), 

452 (r'\}', Punctuation, '#pop'), 

453 ], 

454 'map_val': [ 

455 include('root'), 

456 (r',', Punctuation, '#pop'), 

457 (r'(?=\})', Punctuation, '#pop'), 

458 ], 

459 'tuple': [ 

460 include('root'), 

461 (r'\}', Punctuation, '#pop'), 

462 ], 

463 } 

464 tokens.update(gen_elixir_string_rules('double', '"', String.Double)) 

465 tokens.update(gen_elixir_string_rules('single', "'", String.Single)) 

466 tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) 

467 tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) 

468 tokens.update(gen_elixir_sigil_rules()) 

469 

470 

471class ElixirConsoleLexer(Lexer): 

472 """ 

473 For Elixir interactive console (iex) output like: 

474 

475 .. sourcecode:: iex 

476 

477 iex> [head | tail] = [1,2,3] 

478 [1,2,3] 

479 iex> head 

480 1 

481 iex> tail 

482 [2,3] 

483 iex> [head | tail] 

484 [1,2,3] 

485 iex> length [head | tail] 

486 3 

487 

488 .. versionadded:: 1.5 

489 """ 

490 

491 name = 'Elixir iex session' 

492 aliases = ['iex'] 

493 mimetypes = ['text/x-elixir-shellsession'] 

494 

495 _prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ') 

496 

497 def get_tokens_unprocessed(self, text): 

498 exlexer = ElixirLexer(**self.options) 

499 

500 curcode = '' 

501 in_error = False 

502 insertions = [] 

503 for match in line_re.finditer(text): 

504 line = match.group() 

505 if line.startswith('** '): 

506 in_error = True 

507 insertions.append((len(curcode), 

508 [(0, Generic.Error, line[:-1])])) 

509 curcode += line[-1:] 

510 else: 

511 m = self._prompt_re.match(line) 

512 if m is not None: 

513 in_error = False 

514 end = m.end() 

515 insertions.append((len(curcode), 

516 [(0, Generic.Prompt, line[:end])])) 

517 curcode += line[end:] 

518 else: 

519 if curcode: 

520 yield from do_insertions( 

521 insertions, exlexer.get_tokens_unprocessed(curcode)) 

522 curcode = '' 

523 insertions = [] 

524 token = Generic.Error if in_error else Generic.Output 

525 yield match.start(), token, line 

526 if curcode: 

527 yield from do_insertions( 

528 insertions, exlexer.get_tokens_unprocessed(curcode))