1""" 
    2    pygments.lexers.crystal 
    3    ~~~~~~~~~~~~~~~~~~~~~~~ 
    4 
    5    Lexer for Crystal. 
    6 
    7    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 
    8    :license: BSD, see LICENSE for details. 
    9""" 
    10 
    11import re 
    12 
    13from pygments.lexer import ExtendedRegexLexer, include, bygroups, default, \ 
    14    words, line_re 
    15from pygments.token import Comment, Operator, Keyword, Name, String, Number, \ 
    16    Punctuation, Error, Whitespace 
    17 
    18__all__ = ['CrystalLexer'] 
    19 
    20 
    21CRYSTAL_OPERATORS = [ 
    22    '!=', '!~', '!', '%', '&&', '&', '**', '*', '+', '-', '/', '<=>', '<<', '<=', '<', 
    23    '===', '==', '=~', '=', '>=', '>>', '>', '[]=', '[]?', '[]', '^', '||', '|', '~' 
    24] 
    25 
    26 
    27class CrystalLexer(ExtendedRegexLexer): 
    28    """ 
    29    For Crystal source code. 
    30    """ 
    31 
    32    name = 'Crystal' 
    33    url = 'https://crystal-lang.org' 
    34    aliases = ['cr', 'crystal'] 
    35    filenames = ['*.cr'] 
    36    mimetypes = ['text/x-crystal'] 
    37    version_added = '2.2' 
    38 
    39    flags = re.DOTALL | re.MULTILINE 
    40 
    41    def heredoc_callback(self, match, ctx): 
    42        # okay, this is the hardest part of parsing Crystal... 
    43        # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line 
    44 
    45        start = match.start(1) 
    46        yield start, Operator, match.group(1)        # <<-? 
    47        yield match.start(2), String.Heredoc, match.group(2)    # quote ", ', ` 
    48        yield match.start(3), String.Delimiter, match.group(3)  # heredoc name 
    49        yield match.start(4), String.Heredoc, match.group(4)    # quote again 
    50 
    51        heredocstack = ctx.__dict__.setdefault('heredocstack', []) 
    52        outermost = not bool(heredocstack) 
    53        heredocstack.append((match.group(1) == '<<-', match.group(3))) 
    54 
    55        ctx.pos = match.start(5) 
    56        ctx.end = match.end(5) 
    57        # this may find other heredocs, so limit the recursion depth 
    58        if len(heredocstack) < 100: 
    59            yield from self.get_tokens_unprocessed(context=ctx) 
    60        else: 
    61            yield ctx.pos, String.Heredoc, match.group(5) 
    62        ctx.pos = match.end() 
    63 
    64        if outermost: 
    65            # this is the outer heredoc again, now we can process them all 
    66            for tolerant, hdname in heredocstack: 
    67                lines = [] 
    68                for match in line_re.finditer(ctx.text, ctx.pos): 
    69                    if tolerant: 
    70                        check = match.group().strip() 
    71                    else: 
    72                        check = match.group().rstrip() 
    73                    if check == hdname: 
    74                        for amatch in lines: 
    75                            yield amatch.start(), String.Heredoc, amatch.group() 
    76                        yield match.start(), String.Delimiter, match.group() 
    77                        ctx.pos = match.end() 
    78                        break 
    79                    else: 
    80                        lines.append(match) 
    81                else: 
    82                    # end of heredoc not found -- error! 
    83                    for amatch in lines: 
    84                        yield amatch.start(), Error, amatch.group() 
    85            ctx.end = len(ctx.text) 
    86            del heredocstack[:] 
    87 
    88    def gen_crystalstrings_rules(): 
    89        states = {} 
    90        states['strings'] = [ 
    91            (r'\:\w+[!?]?', String.Symbol), 
    92            (words(CRYSTAL_OPERATORS, prefix=r'\:'), String.Symbol), 
    93            (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol), 
    94            # This allows arbitrary text after '\ for simplicity 
    95            (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), 
    96            (r':"', String.Symbol, 'simple-sym'), 
    97            # Crystal doesn't have "symbol:"s but this simplifies function args 
    98            (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), 
    99            (r'"', String.Double, 'simple-string'), 
    100            (r'(?<!\.)`', String.Backtick, 'simple-backtick'), 
    101        ] 
    102 
    103        # double-quoted string and symbol 
    104        for name, ttype, end in ('string', String.Double, '"'), \ 
    105                                ('sym', String.Symbol, '"'), \ 
    106                                ('backtick', String.Backtick, '`'): 
    107            states['simple-'+name] = [ 
    108                include('string-escaped' if name == 'sym' else 'string-intp-escaped'), 
    109                (rf'[^\\{end}#]+', ttype), 
    110                (r'[\\#]', ttype), 
    111                (end, ttype, '#pop'), 
    112            ] 
    113 
    114        # https://crystal-lang.org/docs/syntax_and_semantics/literals/string.html#percent-string-literals 
    115        for lbrace, rbrace, bracecc, name in \ 
    116                ('\\{', '\\}', '{}', 'cb'), \ 
    117                ('\\[', '\\]', '\\[\\]', 'sb'), \ 
    118                ('\\(', '\\)', '()', 'pa'), \ 
    119                ('<', '>', '<>', 'ab'), \ 
    120                ('\\|', '\\|', '\\|', 'pi'): 
    121            states[name+'-intp-string'] = [ 
    122                (r'\\' + lbrace, String.Other), 
    123            ] + (lbrace != rbrace) * [ 
    124                (lbrace, String.Other, '#push'), 
    125            ] + [ 
    126                (rbrace, String.Other, '#pop'), 
    127                include('string-intp-escaped'), 
    128                (r'[\\#' + bracecc + ']', String.Other), 
    129                (r'[^\\#' + bracecc + ']+', String.Other), 
    130            ] 
    131            states['strings'].append((r'%Q?' + lbrace, String.Other, 
    132                                      name+'-intp-string')) 
    133            states[name+'-string'] = [ 
    134                (r'\\[\\' + bracecc + ']', String.Other), 
    135            ] + (lbrace != rbrace) * [ 
    136                (lbrace, String.Other, '#push'), 
    137            ] + [ 
    138                (rbrace, String.Other, '#pop'), 
    139                (r'[\\#' + bracecc + ']', String.Other), 
    140                (r'[^\\#' + bracecc + ']+', String.Other), 
    141            ] 
    142            # https://crystal-lang.org/docs/syntax_and_semantics/literals/array.html#percent-array-literals 
    143            states['strings'].append((r'%[qwi]' + lbrace, String.Other, 
    144                                      name+'-string')) 
    145            states[name+'-regex'] = [ 
    146                (r'\\[\\' + bracecc + ']', String.Regex), 
    147            ] + (lbrace != rbrace) * [ 
    148                (lbrace, String.Regex, '#push'), 
    149            ] + [ 
    150                (rbrace + '[imsx]*', String.Regex, '#pop'), 
    151                include('string-intp'), 
    152                (r'[\\#' + bracecc + ']', String.Regex), 
    153                (r'[^\\#' + bracecc + ']+', String.Regex), 
    154            ] 
    155            states['strings'].append((r'%r' + lbrace, String.Regex, 
    156                                      name+'-regex')) 
    157 
    158        return states 
    159 
    160    tokens = { 
    161        'root': [ 
    162            (r'#.*?$', Comment.Single), 
    163            # keywords 
    164            (words(''' 
    165                abstract asm begin break case do else elsif end ensure extend if in 
    166                include next of private protected require rescue return select self super 
    167                then unless until when while with yield 
    168            '''.split(), suffix=r'\b'), Keyword), 
    169            (words(''' 
    170                previous_def forall out uninitialized __DIR__ __FILE__ __LINE__ 
    171                __END_LINE__ 
    172            '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo), 
    173            # https://crystal-lang.org/docs/syntax_and_semantics/is_a.html 
    174            (r'\.(is_a\?|nil\?|responds_to\?|as\?|as\b)', Keyword.Pseudo), 
    175            (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant), 
    176            # start of function, class and module names 
    177            (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 
    178             bygroups(Keyword, Whitespace, Name.Namespace)), 
    179            (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)', 
    180             bygroups(Keyword, Whitespace, Name.Namespace), 'funcname'), 
    181            (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), 
    182            (r'(annotation|class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)', 
    183             bygroups(Keyword, Whitespace, Name.Namespace), 'classname'), 
    184            # https://crystal-lang.org/api/toplevel.html 
    185            (words(''' 
    186                instance_sizeof offsetof pointerof sizeof typeof 
    187            '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo), 
    188            # macros 
    189            (r'(?<!\.)(debugger\b|p!|pp!|record\b|spawn\b)', Name.Builtin.Pseudo), 
    190            # builtins 
    191            (words(''' 
    192                abort at_exit caller exit gets loop main p pp print printf puts 
    193                raise rand read_line sleep spawn sprintf system 
    194            '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), 
    195            # https://crystal-lang.org/api/Object.html#macro-summary 
    196            (r'(?<!\.)(((class_)?((getter|property)\b[!?]?|setter\b))|' 
    197             r'(def_(clone|equals|equals_and_hash|hash)|delegate|forward_missing_to)\b)', 
    198             Name.Builtin.Pseudo), 
    199            # normal heredocs 
    200            (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', 
    201             heredoc_callback), 
    202            # empty string heredocs 
    203            (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), 
    204            (r'__END__', Comment.Preproc, 'end-part'), 
    205            # multiline regex (after keywords or assignments) 
    206            (r'(?:^|(?<=[=<>~!:])|' 
    207             r'(?<=(?:\s|;)when\s)|' 
    208             r'(?<=(?:\s|;)or\s)|' 
    209             r'(?<=(?:\s|;)and\s)|' 
    210             r'(?<=\.index\s)|' 
    211             r'(?<=\.scan\s)|' 
    212             r'(?<=\.sub\s)|' 
    213             r'(?<=\.sub!\s)|' 
    214             r'(?<=\.gsub\s)|' 
    215             r'(?<=\.gsub!\s)|' 
    216             r'(?<=\.match\s)|' 
    217             r'(?<=(?:\s|;)if\s)|' 
    218             r'(?<=(?:\s|;)elsif\s)|' 
    219             r'(?<=^when\s)|' 
    220             r'(?<=^index\s)|' 
    221             r'(?<=^scan\s)|' 
    222             r'(?<=^sub\s)|' 
    223             r'(?<=^gsub\s)|' 
    224             r'(?<=^sub!\s)|' 
    225             r'(?<=^gsub!\s)|' 
    226             r'(?<=^match\s)|' 
    227             r'(?<=^if\s)|' 
    228             r'(?<=^elsif\s)' 
    229             r')(\s*)(/)', bygroups(Whitespace, String.Regex), 'multiline-regex'), 
    230            # multiline regex (in method calls or subscripts) 
    231            (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'), 
    232            # multiline regex (this time the funny no whitespace rule) 
    233            (r'(\s+)(/)(?![\s=])', bygroups(Whitespace, String.Regex), 
    234             'multiline-regex'), 
    235            # lex numbers and ignore following regular expressions which 
    236            # are division operators in fact (grrrr. i hate that. any 
    237            # better ideas?) 
    238            # since pygments 0.7 we also eat a "?" operator after numbers 
    239            # so that the char operator does not work. Chars are not allowed 
    240            # there so that you can use the ternary operator. 
    241            # stupid example: 
    242            #   x>=0?n[x]:"" 
    243            (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', 
    244             bygroups(Number.Oct, Whitespace, Operator)), 
    245            (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', 
    246             bygroups(Number.Hex, Whitespace, Operator)), 
    247            (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', 
    248             bygroups(Number.Bin, Whitespace, Operator)), 
    249            # 3 separate expressions for floats because any of the 3 optional 
    250            # parts makes it a float 
    251            (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?' 
    252             r'(?:_?f[0-9]+)?)(\s*)([/?])?', 
    253             bygroups(Number.Float, Whitespace, Operator)), 
    254            (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)' 
    255             r'(?:_?f[0-9]+)?)(\s*)([/?])?', 
    256             bygroups(Number.Float, Whitespace, Operator)), 
    257            (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?' 
    258             r'(?:_?f[0-9]+))(\s*)([/?])?', 
    259             bygroups(Number.Float, Whitespace, Operator)), 
    260            (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', 
    261             bygroups(Number.Integer, Whitespace, Operator)), 
    262            # Names 
    263            (r'@@[a-zA-Z_]\w*', Name.Variable.Class), 
    264            (r'@[a-zA-Z_]\w*', Name.Variable.Instance), 
    265            (r'\$\w+', Name.Variable.Global), 
    266            (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global), 
    267            (r'\$-[0adFiIlpvw]', Name.Variable.Global), 
    268            (r'::', Operator), 
    269            include('strings'), 
    270            # https://crystal-lang.org/reference/syntax_and_semantics/literals/char.html 
    271            (r'\?(\\[MC]-)*'  # modifiers 
    272             r'(\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})|\S)' 
    273             r'(?!\w)', 
    274             String.Char), 
    275            (r'[A-Z][A-Z_]+\b(?!::|\.)', Name.Constant), 
    276            # macro expansion 
    277            (r'\{%', String.Interpol, 'in-macro-control'), 
    278            (r'\{\{', String.Interpol, 'in-macro-expr'), 
    279            # annotations 
    280            (r'(@\[)(\s*)([A-Z]\w*(::[A-Z]\w*)*)', 
    281             bygroups(Operator, Whitespace, Name.Decorator), 'in-annot'), 
    282            # this is needed because Crystal attributes can look 
    283            # like keywords (class) or like this: ` ?!? 
    284            (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'), 
    285             bygroups(Operator, Name.Operator)), 
    286            (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])', 
    287             bygroups(Operator, Name)), 
    288            # Names can end with [!?] unless it's "!=" 
    289            (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name), 
    290            (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|' 
    291             r'!~|&&?|\|\||\.{1,3})', Operator), 
    292            (r'[-+/*%=<>&!^|~]=?', Operator), 
    293            (r'[(){};,/?:\\]', Punctuation), 
    294            (r'\s+', Whitespace) 
    295        ], 
    296        'funcname': [ 
    297            (r'(?:([a-zA-Z_]\w*)(\.))?' 
    298             r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|' 
    299             r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', 
    300             bygroups(Name.Class, Operator, Name.Function), '#pop'), 
    301            default('#pop') 
    302        ], 
    303        'classname': [ 
    304            (r'[A-Z_]\w*', Name.Class), 
    305            (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))', 
    306             bygroups(Punctuation, Whitespace, Name.Class, Whitespace, Punctuation)), 
    307            default('#pop') 
    308        ], 
    309        'in-intp': [ 
    310            (r'\{', String.Interpol, '#push'), 
    311            (r'\}', String.Interpol, '#pop'), 
    312            include('root'), 
    313        ], 
    314        'string-intp': [ 
    315            (r'#\{', String.Interpol, 'in-intp'), 
    316        ], 
    317        'string-escaped': [ 
    318            # https://crystal-lang.org/reference/syntax_and_semantics/literals/string.html 
    319            (r'\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})', 
    320             String.Escape) 
    321        ], 
    322        'string-intp-escaped': [ 
    323            include('string-intp'), 
    324            include('string-escaped'), 
    325        ], 
    326        'interpolated-regex': [ 
    327            include('string-intp'), 
    328            (r'[\\#]', String.Regex), 
    329            (r'[^\\#]+', String.Regex), 
    330        ], 
    331        'interpolated-string': [ 
    332            include('string-intp'), 
    333            (r'[\\#]', String.Other), 
    334            (r'[^\\#]+', String.Other), 
    335        ], 
    336        'multiline-regex': [ 
    337            include('string-intp'), 
    338            (r'\\\\', String.Regex), 
    339            (r'\\/', String.Regex), 
    340            (r'[\\#]', String.Regex), 
    341            (r'[^\\/#]+', String.Regex), 
    342            (r'/[imsx]*', String.Regex, '#pop'), 
    343        ], 
    344        'end-part': [ 
    345            (r'.+', Comment.Preproc, '#pop') 
    346        ], 
    347        'in-macro-control': [ 
    348            (r'\{%', String.Interpol, '#push'), 
    349            (r'%\}', String.Interpol, '#pop'), 
    350            (r'(for|verbatim)\b', Keyword), 
    351            include('root'), 
    352        ], 
    353        'in-macro-expr': [ 
    354            (r'\{\{', String.Interpol, '#push'), 
    355            (r'\}\}', String.Interpol, '#pop'), 
    356            include('root'), 
    357        ], 
    358        'in-annot': [ 
    359            (r'\[', Operator, '#push'), 
    360            (r'\]', Operator, '#pop'), 
    361            include('root'), 
    362        ], 
    363    } 
    364    tokens.update(gen_crystalstrings_rules())