1""" 
    2    pygments.lexers.textedit 
    3    ~~~~~~~~~~~~~~~~~~~~~~~~ 
    4 
    5    Lexers for languages related to text processing. 
    6 
    7    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 
    8    :license: BSD, see LICENSE for details. 
    9""" 
    10 
    11import re 
    12from bisect import bisect 
    13 
    14from pygments.lexer import RegexLexer, bygroups, default, include, this, using 
    15from pygments.lexers.python import PythonLexer 
    16from pygments.token import Comment, Keyword, Name, Number, Operator, \ 
    17    Punctuation, String, Text, Whitespace 
    18 
    19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer'] 
    20 
    21 
    22class AwkLexer(RegexLexer): 
    23    """ 
    24    For Awk scripts. 
    25    """ 
    26 
    27    name = 'Awk' 
    28    aliases = ['awk', 'gawk', 'mawk', 'nawk'] 
    29    filenames = ['*.awk'] 
    30    mimetypes = ['application/x-awk'] 
    31    url = 'https://en.wikipedia.org/wiki/AWK' 
    32    version_added = '1.5' 
    33 
    34    tokens = { 
    35        'commentsandwhitespace': [ 
    36            (r'\s+', Text), 
    37            (r'#.*$', Comment.Single) 
    38        ], 
    39        'slashstartsregex': [ 
    40            include('commentsandwhitespace'), 
    41            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' 
    42             r'\B', String.Regex, '#pop'), 
    43            (r'(?=/)', Text, ('#pop', 'badregex')), 
    44            default('#pop') 
    45        ], 
    46        'badregex': [ 
    47            (r'\n', Text, '#pop') 
    48        ], 
    49        'root': [ 
    50            (r'^(?=\s|/)', Text, 'slashstartsregex'), 
    51            include('commentsandwhitespace'), 
    52            (r'\+\+|--|\|\||&&|in\b|\$|!?~|\?|:|' 
    53             r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'), 
    54            (r'[{(\[;,]', Punctuation, 'slashstartsregex'), 
    55            (r'[})\].]', Punctuation), 
    56            (r'(break|continue|do|while|exit|for|if|else|' 
    57             r'return)\b', Keyword, 'slashstartsregex'), 
    58            (r'function\b', Keyword.Declaration, 'slashstartsregex'), 
    59            (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' 
    60             r'length|match|split|sprintf|sub|substr|tolower|toupper|close|' 
    61             r'fflush|getline|next|nextfile|print|printf|strftime|systime|' 
    62             r'delete|system)\b', Keyword.Reserved), 
    63            (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|' 
    64             r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|' 
    65             r'RSTART|RT|SUBSEP)\b', Name.Builtin), 
    66            (r'[$a-zA-Z_]\w*', Name.Other), 
    67            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), 
    68            (r'0x[0-9a-fA-F]+', Number.Hex), 
    69            (r'[0-9]+', Number.Integer), 
    70            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), 
    71            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), 
    72        ] 
    73    } 
    74 
    75 
    76class SedLexer(RegexLexer): 
    77    """ 
    78    Lexer for Sed script files. 
    79    """ 
    80    name = 'Sed' 
    81    aliases = ['sed', 'gsed', 'ssed'] 
    82    filenames = ['*.sed', '*.[gs]sed'] 
    83    mimetypes = ['text/x-sed'] 
    84    url = 'https://en.wikipedia.org/wiki/Sed' 
    85    version_added = '' 
    86    flags = re.MULTILINE 
    87 
    88    # Match the contents within delimiters such as /<contents>/ 
    89    _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)' 
    90 
    91    tokens = { 
    92        'root': [ 
    93            (r'\s+', Whitespace), 
    94            (r'#.*$', Comment.Single), 
    95            (r'[0-9]+', Number.Integer), 
    96            (r'\$', Operator), 
    97            (r'[{};,!]', Punctuation), 
    98            (r'[dDFgGhHlnNpPqQxz=]', Keyword), 
    99            (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)), 
    100            (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)), 
    101            (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)), 
    102            (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)), 
    103            (r'(\\(.))' + _inside_delims + r'(\2)', 
    104             bygroups(Punctuation, None, String.Regex, Punctuation)), 
    105            (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)', 
    106             bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)), 
    107            (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)', 
    108             bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation, 
    109                      Keyword)) 
    110        ] 
    111    } 
    112 
    113class VimLexer(RegexLexer): 
    114    """ 
    115    Lexer for VimL script files. 
    116    """ 
    117    name = 'VimL' 
    118    aliases = ['vim'] 
    119    filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', 
    120                 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] 
    121    mimetypes = ['text/x-vim'] 
    122    url = 'https://www.vim.org' 
    123    version_added = '0.8' 
    124 
    125    flags = re.MULTILINE 
    126 
    127    _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' 
    128 
    129    tokens = { 
    130        'root': [ 
    131            (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', 
    132             bygroups(using(this), Keyword, Text, Operator, Text, Text, 
    133                      using(PythonLexer), Text)), 
    134            (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', 
    135             bygroups(using(this), Keyword, Text, using(PythonLexer))), 
    136 
    137            (r'^\s*".*', Comment), 
    138 
    139            (r'[ \t]+', Text), 
    140            # TODO: regexes can have other delims 
    141            (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex), 
    142            (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double), 
    143            (r"'[^\n']*(?:''[^\n']*)*'", String.Single), 
    144 
    145            # Who decided that doublequote was a good comment character?? 
    146            (r'(?<=\s)"[^\-:.%#=*].*', Comment), 
    147            (r'-?\d+', Number), 
    148            (r'#[0-9a-f]{6}', Number.Hex), 
    149            (r'^:', Punctuation), 
    150            (r'[()<>+=!|,~-]', Punctuation),  # Inexact list.  Looks decent. 
    151            (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', 
    152             Keyword), 
    153            (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), 
    154            (r'\b\w+\b', Name.Other),  # These are postprocessed below 
    155            (r'.', Text), 
    156        ], 
    157    } 
    158 
    159    def __init__(self, **options): 
    160        from pygments.lexers._vim_builtins import auto, command, option 
    161        self._cmd = command 
    162        self._opt = option 
    163        self._aut = auto 
    164 
    165        RegexLexer.__init__(self, **options) 
    166 
    167    def is_in(self, w, mapping): 
    168        r""" 
    169        It's kind of difficult to decide if something might be a keyword 
    170        in VimL because it allows you to abbreviate them.  In fact, 
    171        'ab[breviate]' is a good example.  :ab, :abbre, or :abbreviate are 
    172        valid ways to call it so rather than making really awful regexps 
    173        like:: 
    174 
    175            \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b 
    176 
    177        we match `\b\w+\b` and then call is_in() on those tokens.  See 
    178        `scripts/get_vimkw.py` for how the lists are extracted. 
    179        """ 
    180        p = bisect(mapping, (w,)) 
    181        if p > 0: 
    182            if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ 
    183               mapping[p-1][1][:len(w)] == w: 
    184                return True 
    185        if p < len(mapping): 
    186            return mapping[p][0] == w[:len(mapping[p][0])] and \ 
    187                mapping[p][1][:len(w)] == w 
    188        return False 
    189 
    190    def get_tokens_unprocessed(self, text): 
    191        # TODO: builtins are only subsequent tokens on lines 
    192        #       and 'keywords' only happen at the beginning except 
    193        #       for :au ones 
    194        for index, token, value in \ 
    195                RegexLexer.get_tokens_unprocessed(self, text): 
    196            if token is Name.Other: 
    197                if self.is_in(value, self._cmd): 
    198                    yield index, Keyword, value 
    199                elif self.is_in(value, self._opt) or \ 
    200                        self.is_in(value, self._aut): 
    201                    yield index, Name.Builtin, value 
    202                else: 
    203                    yield index, Text, value 
    204            else: 
    205                yield index, token, value