1""" 
    2    pygments.lexers.stata 
    3    ~~~~~~~~~~~~~~~~~~~~~ 
    4 
    5    Lexer for Stata 
    6 
    7    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 
    8    :license: BSD, see LICENSE for details. 
    9""" 
    10 
    11import re 
    12from pygments.lexer import RegexLexer, default, include, words 
    13from pygments.token import Comment, Keyword, Name, Number, \ 
    14    String, Text, Operator 
    15 
    16from pygments.lexers._stata_builtins import builtins_base, builtins_functions 
    17 
    18__all__ = ['StataLexer'] 
    19 
    20 
    21class StataLexer(RegexLexer): 
    22    """ 
    23    For Stata do files. 
    24    """ 
    25    # Syntax based on 
    26    # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado 
    27    # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js 
    28    # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim 
    29 
    30    name = 'Stata' 
    31    url = 'http://www.stata.com/' 
    32    version_added = '2.2' 
    33    aliases   = ['stata', 'do'] 
    34    filenames = ['*.do', '*.ado'] 
    35    mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata'] 
    36    flags     = re.MULTILINE | re.DOTALL 
    37 
    38    tokens = { 
    39        'root': [ 
    40            include('comments'), 
    41            include('strings'), 
    42            include('macros'), 
    43            include('numbers'), 
    44            include('keywords'), 
    45            include('operators'), 
    46            include('format'), 
    47            (r'.', Text), 
    48        ], 
    49        # Comments are a complicated beast in Stata because they can be 
    50        # nested and there are a few corner cases with that. See: 
    51        # - github.com/kylebarron/language-stata/issues/90 
    52        # - statalist.org/forums/forum/general-stata-discussion/general/1448244 
    53        'comments': [ 
    54            (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'), 
    55            (r'^\s*\*', Comment.Single, 'comments-star'), 
    56            (r'/\*', Comment.Multiline, 'comments-block'), 
    57            (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash') 
    58        ], 
    59        'comments-block': [ 
    60            (r'/\*', Comment.Multiline, '#push'), 
    61            # this ends and restarts a comment block. but need to catch this so 
    62            # that it doesn\'t start _another_ level of comment blocks 
    63            (r'\*/\*', Comment.Multiline), 
    64            (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'), 
    65            # Match anything else as a character inside the comment 
    66            (r'.', Comment.Multiline), 
    67        ], 
    68        'comments-star': [ 
    69            (r'///.*?\n', Comment.Single, 
    70                ('#pop', 'comments-triple-slash')), 
    71            (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 
    72                ('#pop', 'comments-double-slash')), 
    73            (r'/\*', Comment.Multiline, 'comments-block'), 
    74            (r'.(?=\n)', Comment.Single, '#pop'), 
    75            (r'.', Comment.Single), 
    76        ], 
    77        'comments-triple-slash': [ 
    78            (r'\n', Comment.Special, '#pop'), 
    79            # A // breaks out of a comment for the rest of the line 
    80            (r'//.*?(?=\n)', Comment.Single, '#pop'), 
    81            (r'.', Comment.Special), 
    82        ], 
    83        'comments-double-slash': [ 
    84            (r'\n', Text, '#pop'), 
    85            (r'.', Comment.Single), 
    86        ], 
    87        # `"compound string"' and regular "string"; note the former are 
    88        # nested. 
    89        'strings': [ 
    90            (r'`"', String, 'string-compound'), 
    91            (r'(?<!`)"', String, 'string-regular'), 
    92        ], 
    93        'string-compound': [ 
    94            (r'`"', String, '#push'), 
    95            (r'"\'', String, '#pop'), 
    96            (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 
    97            include('macros'), 
    98            (r'.', String) 
    99        ], 
    100        'string-regular': [ 
    101            (r'(")(?!\')|(?=\n)', String, '#pop'), 
    102            (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 
    103            include('macros'), 
    104            (r'.', String) 
    105        ], 
    106        # A local is usually 
    107        #     `\w{0,31}' 
    108        #     `:extended macro' 
    109        #     `=expression' 
    110        #     `[rsen](results)' 
    111        #     `(++--)scalar(++--)' 
    112        # 
    113        # However, there are all sorts of weird rules wrt edge 
    114        # cases. Instead of writing 27 exceptions, anything inside 
    115        # `' is a local. 
    116        # 
    117        # A global is more restricted, so we do follow rules. Note only 
    118        # locals explicitly enclosed ${} can be nested. 
    119        'macros': [ 
    120            (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 
    121            (r'\$', Name.Variable.Global,  'macro-global-name'), 
    122            (r'`', Name.Variable, 'macro-local'), 
    123        ], 
    124        'macro-local': [ 
    125            (r'`', Name.Variable, '#push'), 
    126            (r"'", Name.Variable, '#pop'), 
    127            (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 
    128            (r'\$', Name.Variable.Global, 'macro-global-name'), 
    129            (r'.', Name.Variable),  # fallback 
    130        ], 
    131        'macro-global-nested': [ 
    132            (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'), 
    133            (r'\}', Name.Variable.Global, '#pop'), 
    134            (r'\$', Name.Variable.Global, 'macro-global-name'), 
    135            (r'`', Name.Variable, 'macro-local'), 
    136            (r'\w', Name.Variable.Global),  # fallback 
    137            default('#pop'), 
    138        ], 
    139        'macro-global-name': [ 
    140            (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'), 
    141            (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'), 
    142            (r'`', Name.Variable, 'macro-local', '#pop'), 
    143            (r'\w{1,32}', Name.Variable.Global, '#pop'), 
    144        ], 
    145        # Built in functions and statements 
    146        'keywords': [ 
    147            (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'), 
    148             Name.Function), 
    149            (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'), 
    150             Keyword), 
    151        ], 
    152        # http://www.stata.com/help.cgi?operators 
    153        'operators': [ 
    154            (r'-|==|<=|>=|<|>|&|!=', Operator), 
    155            (r'\*|\+|\^|/|!|~|==|~=', Operator) 
    156        ], 
    157        # Stata numbers 
    158        'numbers': [ 
    159            # decimal number 
    160            (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b', 
    161             Number), 
    162        ], 
    163        # Stata formats 
    164        'format': [ 
    165            (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other), 
    166            (r'%(21x|16H|16L|8H|8L)', Name.Other), 
    167            (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other), 
    168            (r'%[-~]?\d{1,4}s', Name.Other), 
    169        ] 
    170    }