1""" 
    2    pygments.lexers.erlang 
    3    ~~~~~~~~~~~~~~~~~~~~~~ 
    4 
    5    Lexers for Erlang. 
    6 
    7    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 
    8    :license: BSD, see LICENSE for details. 
    9""" 
    10 
    11import re 
    12 
    13from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ 
    14    include, default, line_re 
    15from pygments.token import Comment, Operator, Keyword, Name, String, \ 
    16    Number, Punctuation, Generic, Whitespace 
    17 
    18__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', 
    19           'ElixirLexer'] 
    20 
    21 
    22class ErlangLexer(RegexLexer): 
    23    """ 
    24    For the Erlang functional programming language. 
    25    """ 
    26 
    27    name = 'Erlang' 
    28    url = 'https://www.erlang.org/' 
    29    aliases = ['erlang'] 
    30    filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] 
    31    mimetypes = ['text/x-erlang'] 
    32    version_added = '0.9' 
    33 
    34    keywords = ( 
    35        'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 
    36        'let', 'of', 'query', 'receive', 'try', 'when', 
    37    ) 
    38 
    39    builtins = (  # See erlang(3) man page 
    40        'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', 
    41        'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', 
    42        'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', 
    43        'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', 
    44        'float', 'float_to_list', 'fun_info', 'fun_to_list', 
    45        'function_exported', 'garbage_collect', 'get', 'get_keys', 
    46        'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', 
    47        'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 
    48        'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', 
    49        'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', 
    50        'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', 
    51        'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', 
    52        'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', 
    53        'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', 
    54        'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', 
    55        'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', 
    56        'pid_to_list', 'port_close', 'port_command', 'port_connect', 
    57        'port_control', 'port_call', 'port_info', 'port_to_list', 
    58        'process_display', 'process_flag', 'process_info', 'purge_module', 
    59        'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', 
    60        'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', 
    61        'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', 
    62        'spawn_opt', 'split_binary', 'start_timer', 'statistics', 
    63        'suspend_process', 'system_flag', 'system_info', 'system_monitor', 
    64        'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', 
    65        'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', 
    66        'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' 
    67    ) 
    68 
    69    operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' 
    70    word_operators = ( 
    71        'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 
    72        'div', 'not', 'or', 'orelse', 'rem', 'xor' 
    73    ) 
    74 
    75    atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" 
    76 
    77    variable_re = r'(?:[A-Z_]\w*)' 
    78 
    79    esc_char_re = r'[bdefnrstv\'"\\]' 
    80    esc_octal_re = r'[0-7][0-7]?[0-7]?' 
    81    esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})' 
    82    esc_ctrl_re = r'\^[a-zA-Z]' 
    83    escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))' 
    84 
    85    macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' 
    86 
    87    base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' 
    88 
    89    tokens = { 
    90        'root': [ 
    91            (r'\s+', Whitespace), 
    92            (r'(%.*)(\n)', bygroups(Comment, Whitespace)), 
    93            (words(keywords, suffix=r'\b'), Keyword), 
    94            (words(builtins, suffix=r'\b'), Name.Builtin), 
    95            (words(word_operators, suffix=r'\b'), Operator.Word), 
    96            (r'^-', Punctuation, 'directive'), 
    97            (operators, Operator), 
    98            (r'"', String, 'string'), 
    99            (r'<<', Name.Label), 
    100            (r'>>', Name.Label), 
    101            ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), 
    102            ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', 
    103             bygroups(Name.Function, Whitespace, Punctuation)), 
    104            (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), 
    105            (r'[+-]?\d+', Number.Integer), 
    106            (r'[+-]?\d+.\d+', Number.Float), 
    107            (r'[]\[:_@\".{}()|;,]', Punctuation), 
    108            (variable_re, Name.Variable), 
    109            (atom_re, Name), 
    110            (r'\?'+macro_re, Name.Constant), 
    111            (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), 
    112            (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), 
    113 
    114            # Erlang script shebang 
    115            (r'\A#!.+\n', Comment.Hashbang), 
    116 
    117            # EEP 43: Maps 
    118            # http://www.erlang.org/eeps/eep-0043.html 
    119            (r'#\{', Punctuation, 'map_key'), 
    120        ], 
    121        'string': [ 
    122            (escape_re, String.Escape), 
    123            (r'"', String, '#pop'), 
    124            (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), 
    125            (r'[^"\\~]+', String), 
    126            (r'~', String), 
    127        ], 
    128        'directive': [ 
    129            (r'(define)(\s*)(\()('+macro_re+r')', 
    130             bygroups(Name.Entity, Whitespace, Punctuation, Name.Constant), '#pop'), 
    131            (r'(record)(\s*)(\()('+macro_re+r')', 
    132             bygroups(Name.Entity, Whitespace, Punctuation, Name.Label), '#pop'), 
    133            (atom_re, Name.Entity, '#pop'), 
    134        ], 
    135        'map_key': [ 
    136            include('root'), 
    137            (r'=>', Punctuation, 'map_val'), 
    138            (r':=', Punctuation, 'map_val'), 
    139            (r'\}', Punctuation, '#pop'), 
    140        ], 
    141        'map_val': [ 
    142            include('root'), 
    143            (r',', Punctuation, '#pop'), 
    144            (r'(?=\})', Punctuation, '#pop'), 
    145        ], 
    146    } 
    147 
    148 
    149class ErlangShellLexer(Lexer): 
    150    """ 
    151    Shell sessions in erl (for Erlang code). 
    152    """ 
    153    name = 'Erlang erl session' 
    154    aliases = ['erl'] 
    155    filenames = ['*.erl-sh'] 
    156    mimetypes = ['text/x-erl-shellsession'] 
    157    url = 'https://www.erlang.org/' 
    158    version_added = '1.1' 
    159 
    160    _prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)') 
    161 
    162    def get_tokens_unprocessed(self, text): 
    163        erlexer = ErlangLexer(**self.options) 
    164 
    165        curcode = '' 
    166        insertions = [] 
    167        for match in line_re.finditer(text): 
    168            line = match.group() 
    169            m = self._prompt_re.match(line) 
    170            if m is not None: 
    171                end = m.end() 
    172                insertions.append((len(curcode), 
    173                                   [(0, Generic.Prompt, line[:end])])) 
    174                curcode += line[end:] 
    175            else: 
    176                if curcode: 
    177                    yield from do_insertions(insertions, 
    178                                             erlexer.get_tokens_unprocessed(curcode)) 
    179                    curcode = '' 
    180                    insertions = [] 
    181                if line.startswith('*'): 
    182                    yield match.start(), Generic.Traceback, line 
    183                else: 
    184                    yield match.start(), Generic.Output, line 
    185        if curcode: 
    186            yield from do_insertions(insertions, 
    187                                     erlexer.get_tokens_unprocessed(curcode)) 
    188 
    189 
    190def gen_elixir_string_rules(name, symbol, token): 
    191    states = {} 
    192    states['string_' + name] = [ 
    193        (rf'[^#{symbol}\\]+', token), 
    194        include('escapes'), 
    195        (r'\\.', token), 
    196        (rf'({symbol})', bygroups(token), "#pop"), 
    197        include('interpol') 
    198    ] 
    199    return states 
    200 
    201 
    202def gen_elixir_sigstr_rules(term, term_class, token, interpol=True): 
    203    if interpol: 
    204        return [ 
    205            (rf'[^#{term_class}\\]+', token), 
    206            include('escapes'), 
    207            (r'\\.', token), 
    208            (rf'{term}[a-zA-Z]*', token, '#pop'), 
    209            include('interpol') 
    210        ] 
    211    else: 
    212        return [ 
    213            (rf'[^{term_class}\\]+', token), 
    214            (r'\\.', token), 
    215            (rf'{term}[a-zA-Z]*', token, '#pop'), 
    216        ] 
    217 
    218 
    219class ElixirLexer(RegexLexer): 
    220    """ 
    221    For the Elixir language. 
    222    """ 
    223 
    224    name = 'Elixir' 
    225    url = 'https://elixir-lang.org' 
    226    aliases = ['elixir', 'ex', 'exs'] 
    227    filenames = ['*.ex', '*.eex', '*.exs', '*.leex'] 
    228    mimetypes = ['text/x-elixir'] 
    229    version_added = '1.5' 
    230 
    231    KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') 
    232    KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') 
    233    BUILTIN = ( 
    234        'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', 
    235        'quote', 'unquote', 'unquote_splicing', 'throw', 'super', 
    236    ) 
    237    BUILTIN_DECLARATION = ( 
    238        'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', 
    239        'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback', 
    240    ) 
    241 
    242    BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') 
    243    CONSTANT = ('nil', 'true', 'false') 
    244 
    245    PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') 
    246 
    247    OPERATORS3 = ( 
    248        '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', 
    249        '~>>', '<~>', '|~>', '<|>', 
    250    ) 
    251    OPERATORS2 = ( 
    252        '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', 
    253        '->', '<-', '|', '.', '=', '~>', '<~', 
    254    ) 
    255    OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') 
    256 
    257    PUNCTUATION = ( 
    258        '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']', 
    259    ) 
    260 
    261    def get_tokens_unprocessed(self, text): 
    262        for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): 
    263            if token is Name: 
    264                if value in self.KEYWORD: 
    265                    yield index, Keyword, value 
    266                elif value in self.KEYWORD_OPERATOR: 
    267                    yield index, Operator.Word, value 
    268                elif value in self.BUILTIN: 
    269                    yield index, Keyword, value 
    270                elif value in self.BUILTIN_DECLARATION: 
    271                    yield index, Keyword.Declaration, value 
    272                elif value in self.BUILTIN_NAMESPACE: 
    273                    yield index, Keyword.Namespace, value 
    274                elif value in self.CONSTANT: 
    275                    yield index, Name.Constant, value 
    276                elif value in self.PSEUDO_VAR: 
    277                    yield index, Name.Builtin.Pseudo, value 
    278                else: 
    279                    yield index, token, value 
    280            else: 
    281                yield index, token, value 
    282 
    283    def gen_elixir_sigil_rules(): 
    284        # all valid sigil terminators (excluding heredocs) 
    285        terminators = [ 
    286            (r'\{', r'\}', '}',   'cb'), 
    287            (r'\[', r'\]', r'\]', 'sb'), 
    288            (r'\(', r'\)', ')',   'pa'), 
    289            ('<',   '>',   '>',   'ab'), 
    290            ('/',   '/',   '/',   'slas'), 
    291            (r'\|', r'\|', '|',   'pipe'), 
    292            ('"',   '"',   '"',   'quot'), 
    293            ("'",   "'",   "'",   'apos'), 
    294        ] 
    295 
    296        # heredocs have slightly different rules 
    297        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] 
    298 
    299        token = String.Other 
    300        states = {'sigils': []} 
    301 
    302        for term, name in triquotes: 
    303            states['sigils'] += [ 
    304                (rf'(~[a-z])({term})', bygroups(token, String.Heredoc), 
    305                    (name + '-end', name + '-intp')), 
    306                (rf'(~[A-Z])({term})', bygroups(token, String.Heredoc), 
    307                    (name + '-end', name + '-no-intp')), 
    308            ] 
    309 
    310            states[name + '-end'] = [ 
    311                (r'[a-zA-Z]+', token, '#pop'), 
    312                default('#pop'), 
    313            ] 
    314            states[name + '-intp'] = [ 
    315                (r'^(\s*)(' + term + ')', bygroups(Whitespace, String.Heredoc), '#pop'), 
    316                include('heredoc_interpol'), 
    317            ] 
    318            states[name + '-no-intp'] = [ 
    319                (r'^(\s*)(' + term +')', bygroups(Whitespace, String.Heredoc), '#pop'), 
    320                include('heredoc_no_interpol'), 
    321            ] 
    322 
    323        for lterm, rterm, rterm_class, name in terminators: 
    324            states['sigils'] += [ 
    325                (r'~[a-z]' + lterm, token, name + '-intp'), 
    326                (r'~[A-Z]' + lterm, token, name + '-no-intp'), 
    327            ] 
    328            states[name + '-intp'] = \ 
    329                gen_elixir_sigstr_rules(rterm, rterm_class, token) 
    330            states[name + '-no-intp'] = \ 
    331                gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False) 
    332 
    333        return states 
    334 
    335    op3_re = "|".join(re.escape(s) for s in OPERATORS3) 
    336    op2_re = "|".join(re.escape(s) for s in OPERATORS2) 
    337    op1_re = "|".join(re.escape(s) for s in OPERATORS1) 
    338    ops_re = rf'(?:{op3_re}|{op2_re}|{op1_re})' 
    339    punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) 
    340    alnum = r'\w' 
    341    name_re = rf'(?:\.\.\.|[a-z_]{alnum}*[!?]?)' 
    342    modname_re = rf'[A-Z]{alnum}*(?:\.[A-Z]{alnum}*)*' 
    343    complex_name_re = rf'(?:{name_re}|{modname_re}|{ops_re})' 
    344    special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})' 
    345 
    346    long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})' 
    347    hex_char_re = r'(\\x[\da-fA-F]{1,2})' 
    348    escape_char_re = r'(\\[abdefnrstv])' 
    349 
    350    tokens = { 
    351        'root': [ 
    352            (r'\s+', Whitespace), 
    353            (r'#.*$', Comment.Single), 
    354 
    355            # Various kinds of characters 
    356            (r'(\?)' + long_hex_char_re, 
    357                bygroups(String.Char, 
    358                         String.Escape, Number.Hex, String.Escape)), 
    359            (r'(\?)' + hex_char_re, 
    360                bygroups(String.Char, String.Escape)), 
    361            (r'(\?)' + escape_char_re, 
    362                bygroups(String.Char, String.Escape)), 
    363            (r'\?\\?.', String.Char), 
    364 
    365            # '::' has to go before atoms 
    366            (r':::', String.Symbol), 
    367            (r'::', Operator), 
    368 
    369            # atoms 
    370            (r':' + special_atom_re, String.Symbol), 
    371            (r':' + complex_name_re, String.Symbol), 
    372            (r':"', String.Symbol, 'string_double_atom'), 
    373            (r":'", String.Symbol, 'string_single_atom'), 
    374 
    375            # [keywords: ...] 
    376            (rf'({special_atom_re}|{complex_name_re})(:)(?=\s|\n)', 
    377                bygroups(String.Symbol, Punctuation)), 
    378 
    379            # @attributes 
    380            (r'@' + name_re, Name.Attribute), 
    381 
    382            # identifiers 
    383            (name_re, Name), 
    384            (rf'(%?)({modname_re})', bygroups(Punctuation, Name.Class)), 
    385 
    386            # operators and punctuation 
    387            (op3_re, Operator), 
    388            (op2_re, Operator), 
    389            (punctuation_re, Punctuation), 
    390            (r'&\d', Name.Entity),   # anon func arguments 
    391            (op1_re, Operator), 
    392 
    393            # numbers 
    394            (r'0b[01]+', Number.Bin), 
    395            (r'0o[0-7]+', Number.Oct), 
    396            (r'0x[\da-fA-F]+', Number.Hex), 
    397            (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), 
    398            (r'\d(_?\d)*', Number.Integer), 
    399 
    400            # strings and heredocs 
    401            (r'(""")(\s*)', bygroups(String.Heredoc, Whitespace), 
    402                'heredoc_double'), 
    403            (r"(''')(\s*)$", bygroups(String.Heredoc, Whitespace), 
    404                'heredoc_single'), 
    405            (r'"', String.Double, 'string_double'), 
    406            (r"'", String.Single, 'string_single'), 
    407 
    408            include('sigils'), 
    409 
    410            (r'%\{', Punctuation, 'map_key'), 
    411            (r'\{', Punctuation, 'tuple'), 
    412        ], 
    413        'heredoc_double': [ 
    414            (r'^(\s*)(""")', bygroups(Whitespace, String.Heredoc), '#pop'), 
    415            include('heredoc_interpol'), 
    416        ], 
    417        'heredoc_single': [ 
    418            (r"^\s*'''", String.Heredoc, '#pop'), 
    419            include('heredoc_interpol'), 
    420        ], 
    421        'heredoc_interpol': [ 
    422            (r'[^#\\\n]+', String.Heredoc), 
    423            include('escapes'), 
    424            (r'\\.', String.Heredoc), 
    425            (r'\n+', String.Heredoc), 
    426            include('interpol'), 
    427        ], 
    428        'heredoc_no_interpol': [ 
    429            (r'[^\\\n]+', String.Heredoc), 
    430            (r'\\.', String.Heredoc), 
    431            (r'\n+', Whitespace), 
    432        ], 
    433        'escapes': [ 
    434            (long_hex_char_re, 
    435                bygroups(String.Escape, Number.Hex, String.Escape)), 
    436            (hex_char_re, String.Escape), 
    437            (escape_char_re, String.Escape), 
    438        ], 
    439        'interpol': [ 
    440            (r'#\{', String.Interpol, 'interpol_string'), 
    441        ], 
    442        'interpol_string': [ 
    443            (r'\}', String.Interpol, "#pop"), 
    444            include('root') 
    445        ], 
    446        'map_key': [ 
    447            include('root'), 
    448            (r':', Punctuation, 'map_val'), 
    449            (r'=>', Punctuation, 'map_val'), 
    450            (r'\}', Punctuation, '#pop'), 
    451        ], 
    452        'map_val': [ 
    453            include('root'), 
    454            (r',', Punctuation, '#pop'), 
    455            (r'(?=\})', Punctuation, '#pop'), 
    456        ], 
    457        'tuple': [ 
    458            include('root'), 
    459            (r'\}', Punctuation, '#pop'), 
    460        ], 
    461    } 
    462    tokens.update(gen_elixir_string_rules('double', '"', String.Double)) 
    463    tokens.update(gen_elixir_string_rules('single', "'", String.Single)) 
    464    tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) 
    465    tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) 
    466    tokens.update(gen_elixir_sigil_rules()) 
    467 
    468 
    469class ElixirConsoleLexer(Lexer): 
    470    """ 
    471    For Elixir interactive console (iex) output like: 
    472 
    473    .. sourcecode:: iex 
    474 
    475        iex> [head | tail] = [1,2,3] 
    476        [1,2,3] 
    477        iex> head 
    478        1 
    479        iex> tail 
    480        [2,3] 
    481        iex> [head | tail] 
    482        [1,2,3] 
    483        iex> length [head | tail] 
    484        3 
    485    """ 
    486 
    487    name = 'Elixir iex session' 
    488    aliases = ['iex'] 
    489    mimetypes = ['text/x-elixir-shellsession'] 
    490    url = 'https://elixir-lang.org' 
    491    version_added = '1.5' 
    492 
    493    _prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ') 
    494 
    495    def get_tokens_unprocessed(self, text): 
    496        exlexer = ElixirLexer(**self.options) 
    497 
    498        curcode = '' 
    499        in_error = False 
    500        insertions = [] 
    501        for match in line_re.finditer(text): 
    502            line = match.group() 
    503            if line.startswith('** '): 
    504                in_error = True 
    505                insertions.append((len(curcode), 
    506                                   [(0, Generic.Error, line[:-1])])) 
    507                curcode += line[-1:] 
    508            else: 
    509                m = self._prompt_re.match(line) 
    510                if m is not None: 
    511                    in_error = False 
    512                    end = m.end() 
    513                    insertions.append((len(curcode), 
    514                                       [(0, Generic.Prompt, line[:end])])) 
    515                    curcode += line[end:] 
    516                else: 
    517                    if curcode: 
    518                        yield from do_insertions( 
    519                            insertions, exlexer.get_tokens_unprocessed(curcode)) 
    520                        curcode = '' 
    521                        insertions = [] 
    522                    token = Generic.Error if in_error else Generic.Output 
    523                    yield match.start(), token, line 
    524        if curcode: 
    525            yield from do_insertions( 
    526                insertions, exlexer.get_tokens_unprocessed(curcode))