1""" 
    2    pygments.lexers.esoteric 
    3    ~~~~~~~~~~~~~~~~~~~~~~~~ 
    4 
    5    Lexers for esoteric languages. 
    6 
    7    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 
    8    :license: BSD, see LICENSE for details. 
    9""" 
    10 
    11from pygments.lexer import RegexLexer, include, words, bygroups 
    12from pygments.token import Comment, Operator, Keyword, Name, String, Number, \ 
    13    Punctuation, Error, Whitespace 
    14 
    15__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer', 
    16           'CapDLLexer', 'AheuiLexer'] 
    17 
    18 
    19class BrainfuckLexer(RegexLexer): 
    20    """ 
    21    Lexer for the esoteric BrainFuck language. 
    22    """ 
    23 
    24    name = 'Brainfuck' 
    25    url = 'http://www.muppetlabs.com/~breadbox/bf/' 
    26    aliases = ['brainfuck', 'bf'] 
    27    filenames = ['*.bf', '*.b'] 
    28    mimetypes = ['application/x-brainfuck'] 
    29    version_added = '' 
    30 
    31    tokens = { 
    32        'common': [ 
    33            # use different colors for different instruction types 
    34            (r'[.,]+', Name.Tag), 
    35            (r'[+-]+', Name.Builtin), 
    36            (r'[<>]+', Name.Variable), 
    37            (r'[^.,+\-<>\[\]]+', Comment), 
    38        ], 
    39        'root': [ 
    40            (r'\[', Keyword, 'loop'), 
    41            (r'\]', Error), 
    42            include('common'), 
    43        ], 
    44        'loop': [ 
    45            (r'\[', Keyword, '#push'), 
    46            (r'\]', Keyword, '#pop'), 
    47            include('common'), 
    48        ] 
    49    } 
    50 
    51    def analyse_text(text): 
    52        """It's safe to assume that a program which mostly consists of + - 
    53        and < > is brainfuck.""" 
    54        plus_minus_count = 0 
    55        greater_less_count = 0 
    56 
    57        range_to_check = max(256, len(text)) 
    58 
    59        for c in text[:range_to_check]: 
    60            if c == '+' or c == '-': 
    61                plus_minus_count += 1 
    62            if c == '<' or c == '>': 
    63                greater_less_count += 1 
    64 
    65        if plus_minus_count > (0.25 * range_to_check): 
    66            return 1.0 
    67        if greater_less_count > (0.25 * range_to_check): 
    68            return 1.0 
    69 
    70        result = 0 
    71        if '[-]' in text: 
    72            result += 0.5 
    73 
    74        return result 
    75 
    76 
    77class BefungeLexer(RegexLexer): 
    78    """ 
    79    Lexer for the esoteric Befunge language. 
    80    """ 
    81    name = 'Befunge' 
    82    url = 'http://en.wikipedia.org/wiki/Befunge' 
    83    aliases = ['befunge'] 
    84    filenames = ['*.befunge'] 
    85    mimetypes = ['application/x-befunge'] 
    86    version_added = '0.7' 
    87 
    88    tokens = { 
    89        'root': [ 
    90            (r'[0-9a-f]', Number), 
    91            (r'[+*/%!`-]', Operator),             # Traditional math 
    92            (r'[<>^v?\[\]rxjk]', Name.Variable),  # Move, imperatives 
    93            (r'[:\\$.,n]', Name.Builtin),         # Stack ops, imperatives 
    94            (r'[|_mw]', Keyword), 
    95            (r'[{}]', Name.Tag),                  # Befunge-98 stack ops 
    96            (r'".*?"', String.Double),            # Strings don't appear to allow escapes 
    97            (r'\'.', String.Single),              # Single character 
    98            (r'[#;]', Comment),                   # Trampoline... depends on direction hit 
    99            (r'[pg&~=@iotsy]', Keyword),          # Misc 
    100            (r'[()A-Z]', Comment),                # Fingerprints 
    101            (r'\s+', Whitespace),                 # Whitespace doesn't matter 
    102        ], 
    103    } 
    104 
    105 
    106class CAmkESLexer(RegexLexer): 
    107    """ 
    108    Basic lexer for the input language for the CAmkES component platform. 
    109    """ 
    110    name = 'CAmkES' 
    111    url = 'https://sel4.systems/CAmkES/' 
    112    aliases = ['camkes', 'idl4'] 
    113    filenames = ['*.camkes', '*.idl4'] 
    114    version_added = '2.1' 
    115 
    116    tokens = { 
    117        'root': [ 
    118            # C pre-processor directive 
    119            (r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc, 
    120                Whitespace)), 
    121 
    122            # Whitespace, comments 
    123            (r'\s+', Whitespace), 
    124            (r'/\*(.|\n)*?\*/', Comment), 
    125            (r'//.*$', Comment), 
    126 
    127            (r'[\[(){},.;\]]', Punctuation), 
    128            (r'[~!%^&*+=|?:<>/-]', Operator), 
    129 
    130            (words(('assembly', 'attribute', 'component', 'composition', 
    131                    'configuration', 'connection', 'connector', 'consumes', 
    132                    'control', 'dataport', 'Dataport', 'Dataports', 'emits', 
    133                    'event', 'Event', 'Events', 'export', 'from', 'group', 
    134                    'hardware', 'has', 'interface', 'Interface', 'maybe', 
    135                    'procedure', 'Procedure', 'Procedures', 'provides', 
    136                    'template', 'thread', 'threads', 'to', 'uses', 'with'), 
    137                   suffix=r'\b'), Keyword), 
    138 
    139            (words(('bool', 'boolean', 'Buf', 'char', 'character', 'double', 
    140                    'float', 'in', 'inout', 'int', 'int16_6', 'int32_t', 
    141                    'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real', 
    142                    'refin', 'semaphore', 'signed', 'string', 'struct', 
    143                    'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t', 
    144                    'unsigned', 'void'), 
    145                   suffix=r'\b'), Keyword.Type), 
    146 
    147            # Recognised attributes 
    148            (r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved), 
    149            (words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'), 
    150                Keyword.Reserved), 
    151 
    152            # CAmkES-level include 
    153            (r'(import)(\s+)((?:<[^>]*>|"[^"]*");)', 
    154                bygroups(Comment.Preproc, Whitespace, Comment.Preproc)), 
    155 
    156            # C-level include 
    157            (r'(include)(\s+)((?:<[^>]*>|"[^"]*");)', 
    158                bygroups(Comment.Preproc, Whitespace, Comment.Preproc)), 
    159 
    160            # Literals 
    161            (r'0[xX][\da-fA-F]+', Number.Hex), 
    162            (r'-?[\d]+', Number), 
    163            (r'-?[\d]+\.[\d]+', Number.Float), 
    164            (r'"[^"]*"', String), 
    165            (r'[Tt]rue|[Ff]alse', Name.Builtin), 
    166 
    167            # Identifiers 
    168            (r'[a-zA-Z_]\w*', Name), 
    169        ], 
    170    } 
    171 
    172 
    173class CapDLLexer(RegexLexer): 
    174    """ 
    175    Basic lexer for CapDL. 
    176 
    177    The source of the primary tool that reads such specifications is available 
    178    at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this 
    179    lexer only supports a subset of the grammar. For example, identifiers can 
    180    shadow type names, but these instances are currently incorrectly 
    181    highlighted as types. Supporting this would need a stateful lexer that is 
    182    considered unnecessarily complex for now. 
    183    """ 
    184    name = 'CapDL' 
    185    url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml' 
    186    aliases = ['capdl'] 
    187    filenames = ['*.cdl'] 
    188    version_added = '2.2' 
    189 
    190    tokens = { 
    191        'root': [ 
    192            # C pre-processor directive 
    193            (r'^(\s*)(#.*)(\n)', 
    194                bygroups(Whitespace, Comment.Preproc, Whitespace)), 
    195 
    196            # Whitespace, comments 
    197            (r'\s+', Whitespace), 
    198            (r'/\*(.|\n)*?\*/', Comment), 
    199            (r'(//|--).*$', Comment), 
    200 
    201            (r'[<>\[(){},:;=\]]', Punctuation), 
    202            (r'\.\.', Punctuation), 
    203 
    204            (words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps', 
    205                    'objects'), suffix=r'\b'), Keyword), 
    206 
    207            (words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device', 
    208                    'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb', 
    209                    'ut', 'vcpu'), suffix=r'\b'), Keyword.Type), 
    210 
    211            # Properties 
    212            (words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf', 
    213                    'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip', 
    214                    'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W', 
    215                    'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr', 
    216                    'ports', 'reply', 'uncached'), suffix=r'\b'), 
    217             Keyword.Reserved), 
    218 
    219            # Literals 
    220            (r'0[xX][\da-fA-F]+', Number.Hex), 
    221            (r'\d+(\.\d+)?(k|M)?', Number), 
    222            (words(('bits',), suffix=r'\b'), Number), 
    223            (words(('cspace', 'vspace', 'reply_slot', 'caller_slot', 
    224                    'ipc_buffer_slot'), suffix=r'\b'), Number), 
    225 
    226            # Identifiers 
    227            (r'[a-zA-Z_][-@\.\w]*', Name), 
    228        ], 
    229    } 
    230 
    231 
    232class RedcodeLexer(RegexLexer): 
    233    """ 
    234    A simple Redcode lexer based on ICWS'94. 
    235    Contributed by Adam Blinkinsop <blinks@acm.org>. 
    236    """ 
    237    name = 'Redcode' 
    238    aliases = ['redcode'] 
    239    filenames = ['*.cw'] 
    240    url = 'https://en.wikipedia.org/wiki/Core_War' 
    241    version_added = '0.8' 
    242 
    243    opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD', 
    244               'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL', 
    245               'ORG', 'EQU', 'END') 
    246    modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I') 
    247 
    248    tokens = { 
    249        'root': [ 
    250            # Whitespace: 
    251            (r'\s+', Whitespace), 
    252            (r';.*$', Comment.Single), 
    253            # Lexemes: 
    254            #  Identifiers 
    255            (r'\b({})\b'.format('|'.join(opcodes)), Name.Function), 
    256            (r'\b({})\b'.format('|'.join(modifiers)), Name.Decorator), 
    257            (r'[A-Za-z_]\w+', Name), 
    258            #  Operators 
    259            (r'[-+*/%]', Operator), 
    260            (r'[#$@<>]', Operator),  # mode 
    261            (r'[.,]', Punctuation),  # mode 
    262            #  Numbers 
    263            (r'[-+]?\d+', Number.Integer), 
    264        ], 
    265    } 
    266 
    267 
    268class AheuiLexer(RegexLexer): 
    269    """ 
    270    Aheui is esoteric language based on Korean alphabets. 
    271    """ 
    272 
    273    name = 'Aheui' 
    274    url = 'http://aheui.github.io/' 
    275    aliases = ['aheui'] 
    276    filenames = ['*.aheui'] 
    277    version_added = '' 
    278 
    279    tokens = { 
    280        'root': [ 
    281            ('[' 
    282             '나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇' 
    283             '다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓' 
    284             '따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟' 
    285             '라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫' 
    286             '마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷' 
    287             '바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃' 
    288             '빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏' 
    289             '사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛' 
    290             '싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧' 
    291             '자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿' 
    292             '차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗' 
    293             '카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣' 
    294             '타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯' 
    295             '파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻' 
    296             '하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇' 
    297             ']', Operator), 
    298            ('.', Comment), 
    299        ], 
    300    }