1"""
2 pygments.lexers.erlang
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Erlang.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \
14 include, default, line_re
15from pygments.token import Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Generic, Whitespace
17
18__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer',
19 'ElixirLexer']
20
21
22class ErlangLexer(RegexLexer):
23 """
24 For the Erlang functional programming language.
25 """
26
27 name = 'Erlang'
28 url = 'https://www.erlang.org/'
29 aliases = ['erlang']
30 filenames = ['*.erl', '*.hrl', '*.es', '*.escript']
31 mimetypes = ['text/x-erlang']
32 version_added = '0.9'
33
34 keywords = (
35 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
36 'let', 'of', 'query', 'receive', 'try', 'when',
37 )
38
39 builtins = ( # See erlang(3) man page
40 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
41 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
42 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
43 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
44 'float', 'float_to_list', 'fun_info', 'fun_to_list',
45 'function_exported', 'garbage_collect', 'get', 'get_keys',
46 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
47 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
48 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
49 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
50 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
51 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
52 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
53 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
54 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
55 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
56 'pid_to_list', 'port_close', 'port_command', 'port_connect',
57 'port_control', 'port_call', 'port_info', 'port_to_list',
58 'process_display', 'process_flag', 'process_info', 'purge_module',
59 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
60 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
61 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
62 'spawn_opt', 'split_binary', 'start_timer', 'statistics',
63 'suspend_process', 'system_flag', 'system_info', 'system_monitor',
64 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
65 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
66 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
67 )
68
69 operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)'
70 word_operators = (
71 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
72 'div', 'not', 'or', 'orelse', 'rem', 'xor'
73 )
74
75 atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')"
76
77 variable_re = r'(?:[A-Z_]\w*)'
78
79 esc_char_re = r'[bdefnrstv\'"\\]'
80 esc_octal_re = r'[0-7][0-7]?[0-7]?'
81 esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})'
82 esc_ctrl_re = r'\^[a-zA-Z]'
83 escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))'
84
85 macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
86
87 base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
88
89 tokens = {
90 'root': [
91 (r'\s+', Whitespace),
92 (r'(%.*)(\n)', bygroups(Comment, Whitespace)),
93 (words(keywords, suffix=r'\b'), Keyword),
94 (words(builtins, suffix=r'\b'), Name.Builtin),
95 (words(word_operators, suffix=r'\b'), Operator.Word),
96 (r'^-', Punctuation, 'directive'),
97 (operators, Operator),
98 (r'"', String, 'string'),
99 (r'<<', Name.Label),
100 (r'>>', Name.Label),
101 ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)),
102 ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()',
103 bygroups(Name.Function, Whitespace, Punctuation)),
104 (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer),
105 (r'[+-]?\d+', Number.Integer),
106 (r'[+-]?\d+.\d+', Number.Float),
107 (r'[]\[:_@\".{}()|;,]', Punctuation),
108 (variable_re, Name.Variable),
109 (atom_re, Name),
110 (r'\?'+macro_re, Name.Constant),
111 (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
112 (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
113
114 # Erlang script shebang
115 (r'\A#!.+\n', Comment.Hashbang),
116
117 # EEP 43: Maps
118 # http://www.erlang.org/eeps/eep-0043.html
119 (r'#\{', Punctuation, 'map_key'),
120 ],
121 'string': [
122 (escape_re, String.Escape),
123 (r'"', String, '#pop'),
124 (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol),
125 (r'[^"\\~]+', String),
126 (r'~', String),
127 ],
128 'directive': [
129 (r'(define)(\s*)(\()('+macro_re+r')',
130 bygroups(Name.Entity, Whitespace, Punctuation, Name.Constant), '#pop'),
131 (r'(record)(\s*)(\()('+macro_re+r')',
132 bygroups(Name.Entity, Whitespace, Punctuation, Name.Label), '#pop'),
133 (atom_re, Name.Entity, '#pop'),
134 ],
135 'map_key': [
136 include('root'),
137 (r'=>', Punctuation, 'map_val'),
138 (r':=', Punctuation, 'map_val'),
139 (r'\}', Punctuation, '#pop'),
140 ],
141 'map_val': [
142 include('root'),
143 (r',', Punctuation, '#pop'),
144 (r'(?=\})', Punctuation, '#pop'),
145 ],
146 }
147
148
149class ErlangShellLexer(Lexer):
150 """
151 Shell sessions in erl (for Erlang code).
152 """
153 name = 'Erlang erl session'
154 aliases = ['erl']
155 filenames = ['*.erl-sh']
156 mimetypes = ['text/x-erl-shellsession']
157 url = 'https://www.erlang.org/'
158 version_added = '1.1'
159
160 _prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)')
161
162 def get_tokens_unprocessed(self, text):
163 erlexer = ErlangLexer(**self.options)
164
165 curcode = ''
166 insertions = []
167 for match in line_re.finditer(text):
168 line = match.group()
169 m = self._prompt_re.match(line)
170 if m is not None:
171 end = m.end()
172 insertions.append((len(curcode),
173 [(0, Generic.Prompt, line[:end])]))
174 curcode += line[end:]
175 else:
176 if curcode:
177 yield from do_insertions(insertions,
178 erlexer.get_tokens_unprocessed(curcode))
179 curcode = ''
180 insertions = []
181 if line.startswith('*'):
182 yield match.start(), Generic.Traceback, line
183 else:
184 yield match.start(), Generic.Output, line
185 if curcode:
186 yield from do_insertions(insertions,
187 erlexer.get_tokens_unprocessed(curcode))
188
189
190def gen_elixir_string_rules(name, symbol, token):
191 states = {}
192 states['string_' + name] = [
193 (rf'[^#{symbol}\\]+', token),
194 include('escapes'),
195 (r'\\.', token),
196 (rf'({symbol})', bygroups(token), "#pop"),
197 include('interpol')
198 ]
199 return states
200
201
202def gen_elixir_sigstr_rules(term, term_class, token, interpol=True):
203 if interpol:
204 return [
205 (rf'[^#{term_class}\\]+', token),
206 include('escapes'),
207 (r'\\.', token),
208 (rf'{term}[a-zA-Z]*', token, '#pop'),
209 include('interpol')
210 ]
211 else:
212 return [
213 (rf'[^{term_class}\\]+', token),
214 (r'\\.', token),
215 (rf'{term}[a-zA-Z]*', token, '#pop'),
216 ]
217
218
219class ElixirLexer(RegexLexer):
220 """
221 For the Elixir language.
222 """
223
224 name = 'Elixir'
225 url = 'https://elixir-lang.org'
226 aliases = ['elixir', 'ex', 'exs']
227 filenames = ['*.ex', '*.eex', '*.exs', '*.leex']
228 mimetypes = ['text/x-elixir']
229 version_added = '1.5'
230
231 KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch')
232 KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in')
233 BUILTIN = (
234 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise',
235 'quote', 'unquote', 'unquote_splicing', 'throw', 'super',
236 )
237 BUILTIN_DECLARATION = (
238 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop',
239 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback',
240 )
241
242 BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias')
243 CONSTANT = ('nil', 'true', 'false')
244
245 PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__')
246
247 OPERATORS3 = (
248 '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==',
249 '~>>', '<~>', '|~>', '<|>',
250 )
251 OPERATORS2 = (
252 '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~',
253 '->', '<-', '|', '.', '=', '~>', '<~',
254 )
255 OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&')
256
257 PUNCTUATION = (
258 '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']',
259 )
260
261 def get_tokens_unprocessed(self, text):
262 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
263 if token is Name:
264 if value in self.KEYWORD:
265 yield index, Keyword, value
266 elif value in self.KEYWORD_OPERATOR:
267 yield index, Operator.Word, value
268 elif value in self.BUILTIN:
269 yield index, Keyword, value
270 elif value in self.BUILTIN_DECLARATION:
271 yield index, Keyword.Declaration, value
272 elif value in self.BUILTIN_NAMESPACE:
273 yield index, Keyword.Namespace, value
274 elif value in self.CONSTANT:
275 yield index, Name.Constant, value
276 elif value in self.PSEUDO_VAR:
277 yield index, Name.Builtin.Pseudo, value
278 else:
279 yield index, token, value
280 else:
281 yield index, token, value
282
283 def gen_elixir_sigil_rules():
284 # all valid sigil terminators (excluding heredocs)
285 terminators = [
286 (r'\{', r'\}', '}', 'cb'),
287 (r'\[', r'\]', r'\]', 'sb'),
288 (r'\(', r'\)', ')', 'pa'),
289 ('<', '>', '>', 'ab'),
290 ('/', '/', '/', 'slas'),
291 (r'\|', r'\|', '|', 'pipe'),
292 ('"', '"', '"', 'quot'),
293 ("'", "'", "'", 'apos'),
294 ]
295
296 # heredocs have slightly different rules
297 triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]
298
299 token = String.Other
300 states = {'sigils': []}
301
302 for term, name in triquotes:
303 states['sigils'] += [
304 (rf'(~[a-z])({term})', bygroups(token, String.Heredoc),
305 (name + '-end', name + '-intp')),
306 (rf'(~[A-Z])({term})', bygroups(token, String.Heredoc),
307 (name + '-end', name + '-no-intp')),
308 ]
309
310 states[name + '-end'] = [
311 (r'[a-zA-Z]+', token, '#pop'),
312 default('#pop'),
313 ]
314 states[name + '-intp'] = [
315 (r'^(\s*)(' + term + ')', bygroups(Whitespace, String.Heredoc), '#pop'),
316 include('heredoc_interpol'),
317 ]
318 states[name + '-no-intp'] = [
319 (r'^(\s*)(' + term +')', bygroups(Whitespace, String.Heredoc), '#pop'),
320 include('heredoc_no_interpol'),
321 ]
322
323 for lterm, rterm, rterm_class, name in terminators:
324 states['sigils'] += [
325 (r'~[a-z]' + lterm, token, name + '-intp'),
326 (r'~[A-Z]' + lterm, token, name + '-no-intp'),
327 ]
328 states[name + '-intp'] = \
329 gen_elixir_sigstr_rules(rterm, rterm_class, token)
330 states[name + '-no-intp'] = \
331 gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False)
332
333 return states
334
335 op3_re = "|".join(re.escape(s) for s in OPERATORS3)
336 op2_re = "|".join(re.escape(s) for s in OPERATORS2)
337 op1_re = "|".join(re.escape(s) for s in OPERATORS1)
338 ops_re = rf'(?:{op3_re}|{op2_re}|{op1_re})'
339 punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION)
340 alnum = r'\w'
341 name_re = rf'(?:\.\.\.|[a-z_]{alnum}*[!?]?)'
342 modname_re = rf'[A-Z]{alnum}*(?:\.[A-Z]{alnum}*)*'
343 complex_name_re = rf'(?:{name_re}|{modname_re}|{ops_re})'
344 special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})'
345
346 long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})'
347 hex_char_re = r'(\\x[\da-fA-F]{1,2})'
348 escape_char_re = r'(\\[abdefnrstv])'
349
350 tokens = {
351 'root': [
352 (r'\s+', Whitespace),
353 (r'#.*$', Comment.Single),
354
355 # Various kinds of characters
356 (r'(\?)' + long_hex_char_re,
357 bygroups(String.Char,
358 String.Escape, Number.Hex, String.Escape)),
359 (r'(\?)' + hex_char_re,
360 bygroups(String.Char, String.Escape)),
361 (r'(\?)' + escape_char_re,
362 bygroups(String.Char, String.Escape)),
363 (r'\?\\?.', String.Char),
364
365 # '::' has to go before atoms
366 (r':::', String.Symbol),
367 (r'::', Operator),
368
369 # atoms
370 (r':' + special_atom_re, String.Symbol),
371 (r':' + complex_name_re, String.Symbol),
372 (r':"', String.Symbol, 'string_double_atom'),
373 (r":'", String.Symbol, 'string_single_atom'),
374
375 # [keywords: ...]
376 (rf'({special_atom_re}|{complex_name_re})(:)(?=\s|\n)',
377 bygroups(String.Symbol, Punctuation)),
378
379 # @attributes
380 (r'@' + name_re, Name.Attribute),
381
382 # identifiers
383 (name_re, Name),
384 (rf'(%?)({modname_re})', bygroups(Punctuation, Name.Class)),
385
386 # operators and punctuation
387 (op3_re, Operator),
388 (op2_re, Operator),
389 (punctuation_re, Punctuation),
390 (r'&\d', Name.Entity), # anon func arguments
391 (op1_re, Operator),
392
393 # numbers
394 (r'0b[01]+', Number.Bin),
395 (r'0o[0-7]+', Number.Oct),
396 (r'0x[\da-fA-F]+', Number.Hex),
397 (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float),
398 (r'\d(_?\d)*', Number.Integer),
399
400 # strings and heredocs
401 (r'(""")(\s*)', bygroups(String.Heredoc, Whitespace),
402 'heredoc_double'),
403 (r"(''')(\s*)$", bygroups(String.Heredoc, Whitespace),
404 'heredoc_single'),
405 (r'"', String.Double, 'string_double'),
406 (r"'", String.Single, 'string_single'),
407
408 include('sigils'),
409
410 (r'%\{', Punctuation, 'map_key'),
411 (r'\{', Punctuation, 'tuple'),
412 ],
413 'heredoc_double': [
414 (r'^(\s*)(""")', bygroups(Whitespace, String.Heredoc), '#pop'),
415 include('heredoc_interpol'),
416 ],
417 'heredoc_single': [
418 (r"^\s*'''", String.Heredoc, '#pop'),
419 include('heredoc_interpol'),
420 ],
421 'heredoc_interpol': [
422 (r'[^#\\\n]+', String.Heredoc),
423 include('escapes'),
424 (r'\\.', String.Heredoc),
425 (r'\n+', String.Heredoc),
426 include('interpol'),
427 ],
428 'heredoc_no_interpol': [
429 (r'[^\\\n]+', String.Heredoc),
430 (r'\\.', String.Heredoc),
431 (r'\n+', Whitespace),
432 ],
433 'escapes': [
434 (long_hex_char_re,
435 bygroups(String.Escape, Number.Hex, String.Escape)),
436 (hex_char_re, String.Escape),
437 (escape_char_re, String.Escape),
438 ],
439 'interpol': [
440 (r'#\{', String.Interpol, 'interpol_string'),
441 ],
442 'interpol_string': [
443 (r'\}', String.Interpol, "#pop"),
444 include('root')
445 ],
446 'map_key': [
447 include('root'),
448 (r':', Punctuation, 'map_val'),
449 (r'=>', Punctuation, 'map_val'),
450 (r'\}', Punctuation, '#pop'),
451 ],
452 'map_val': [
453 include('root'),
454 (r',', Punctuation, '#pop'),
455 (r'(?=\})', Punctuation, '#pop'),
456 ],
457 'tuple': [
458 include('root'),
459 (r'\}', Punctuation, '#pop'),
460 ],
461 }
462 tokens.update(gen_elixir_string_rules('double', '"', String.Double))
463 tokens.update(gen_elixir_string_rules('single', "'", String.Single))
464 tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol))
465 tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol))
466 tokens.update(gen_elixir_sigil_rules())
467
468
469class ElixirConsoleLexer(Lexer):
470 """
471 For Elixir interactive console (iex) output like:
472
473 .. sourcecode:: iex
474
475 iex> [head | tail] = [1,2,3]
476 [1,2,3]
477 iex> head
478 1
479 iex> tail
480 [2,3]
481 iex> [head | tail]
482 [1,2,3]
483 iex> length [head | tail]
484 3
485 """
486
487 name = 'Elixir iex session'
488 aliases = ['iex']
489 mimetypes = ['text/x-elixir-shellsession']
490 url = 'https://elixir-lang.org'
491 version_added = '1.5'
492
493 _prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ')
494
495 def get_tokens_unprocessed(self, text):
496 exlexer = ElixirLexer(**self.options)
497
498 curcode = ''
499 in_error = False
500 insertions = []
501 for match in line_re.finditer(text):
502 line = match.group()
503 if line.startswith('** '):
504 in_error = True
505 insertions.append((len(curcode),
506 [(0, Generic.Error, line[:-1])]))
507 curcode += line[-1:]
508 else:
509 m = self._prompt_re.match(line)
510 if m is not None:
511 in_error = False
512 end = m.end()
513 insertions.append((len(curcode),
514 [(0, Generic.Prompt, line[:end])]))
515 curcode += line[end:]
516 else:
517 if curcode:
518 yield from do_insertions(
519 insertions, exlexer.get_tokens_unprocessed(curcode))
520 curcode = ''
521 insertions = []
522 token = Generic.Error if in_error else Generic.Output
523 yield match.start(), token, line
524 if curcode:
525 yield from do_insertions(
526 insertions, exlexer.get_tokens_unprocessed(curcode))