1"""
2 pygments.lexers.textedit
3 ~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for languages related to text processing.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12from bisect import bisect
13
14from pygments.lexer import RegexLexer, bygroups, default, include, this, using
15from pygments.lexers.python import PythonLexer
16from pygments.token import Comment, Keyword, Name, Number, Operator, \
17 Punctuation, String, Text, Whitespace
18
19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer']
20
21
22class AwkLexer(RegexLexer):
23 """
24 For Awk scripts.
25 """
26
27 name = 'Awk'
28 aliases = ['awk', 'gawk', 'mawk', 'nawk']
29 filenames = ['*.awk']
30 mimetypes = ['application/x-awk']
31 url = 'https://en.wikipedia.org/wiki/AWK'
32 version_added = '1.5'
33
34 tokens = {
35 'commentsandwhitespace': [
36 (r'\s+', Text),
37 (r'#.*$', Comment.Single)
38 ],
39 'slashstartsregex': [
40 include('commentsandwhitespace'),
41 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
42 r'\B', String.Regex, '#pop'),
43 (r'(?=/)', Text, ('#pop', 'badregex')),
44 default('#pop')
45 ],
46 'badregex': [
47 (r'\n', Text, '#pop')
48 ],
49 'root': [
50 (r'^(?=\s|/)', Text, 'slashstartsregex'),
51 include('commentsandwhitespace'),
52 (r'\+\+|--|\|\||&&|in\b|\$|!?~|\?|:|'
53 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
54 (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
55 (r'[})\].]', Punctuation),
56 (r'(break|continue|do|while|exit|for|if|else|'
57 r'return)\b', Keyword, 'slashstartsregex'),
58 (r'function\b', Keyword.Declaration, 'slashstartsregex'),
59 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
60 r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
61 r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
62 r'delete|system)\b', Keyword.Reserved),
63 (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
64 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
65 r'RSTART|RT|SUBSEP)\b', Name.Builtin),
66 (r'[$a-zA-Z_]\w*', Name.Other),
67 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
68 (r'0x[0-9a-fA-F]+', Number.Hex),
69 (r'[0-9]+', Number.Integer),
70 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
71 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
72 ]
73 }
74
75
76class SedLexer(RegexLexer):
77 """
78 Lexer for Sed script files.
79 """
80 name = 'Sed'
81 aliases = ['sed', 'gsed', 'ssed']
82 filenames = ['*.sed', '*.[gs]sed']
83 mimetypes = ['text/x-sed']
84 url = 'https://en.wikipedia.org/wiki/Sed'
85 version_added = ''
86 flags = re.MULTILINE
87
88 # Match the contents within delimiters such as /<contents>/
89 _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)'
90
91 tokens = {
92 'root': [
93 (r'\s+', Whitespace),
94 (r'#.*$', Comment.Single),
95 (r'[0-9]+', Number.Integer),
96 (r'\$', Operator),
97 (r'[{};,!]', Punctuation),
98 (r'[dDFgGhHlnNpPqQxz=]', Keyword),
99 (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)),
100 (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)),
101 (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)),
102 (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)),
103 (r'(\\(.))' + _inside_delims + r'(\2)',
104 bygroups(Punctuation, None, String.Regex, Punctuation)),
105 (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)',
106 bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)),
107 (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)',
108 bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation,
109 Keyword))
110 ]
111 }
112
113class VimLexer(RegexLexer):
114 """
115 Lexer for VimL script files.
116 """
117 name = 'VimL'
118 aliases = ['vim']
119 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
120 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
121 mimetypes = ['text/x-vim']
122 url = 'https://www.vim.org'
123 version_added = '0.8'
124
125 flags = re.MULTILINE
126
127 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
128
129 tokens = {
130 'root': [
131 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
132 bygroups(using(this), Keyword, Text, Operator, Text, Text,
133 using(PythonLexer), Text)),
134 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
135 bygroups(using(this), Keyword, Text, using(PythonLexer))),
136
137 (r'^\s*".*', Comment),
138
139 (r'[ \t]+', Text),
140 # TODO: regexes can have other delims
141 (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),
142 (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),
143 (r"'[^\n']*(?:''[^\n']*)*'", String.Single),
144
145 # Who decided that doublequote was a good comment character??
146 (r'(?<=\s)"[^\-:.%#=*].*', Comment),
147 (r'-?\d+', Number),
148 (r'#[0-9a-f]{6}', Number.Hex),
149 (r'^:', Punctuation),
150 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
151 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
152 Keyword),
153 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
154 (r'\b\w+\b', Name.Other), # These are postprocessed below
155 (r'.', Text),
156 ],
157 }
158
159 def __init__(self, **options):
160 from pygments.lexers._vim_builtins import auto, command, option
161 self._cmd = command
162 self._opt = option
163 self._aut = auto
164
165 RegexLexer.__init__(self, **options)
166
167 def is_in(self, w, mapping):
168 r"""
169 It's kind of difficult to decide if something might be a keyword
170 in VimL because it allows you to abbreviate them. In fact,
171 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
172 valid ways to call it so rather than making really awful regexps
173 like::
174
175 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
176
177 we match `\b\w+\b` and then call is_in() on those tokens. See
178 `scripts/get_vimkw.py` for how the lists are extracted.
179 """
180 p = bisect(mapping, (w,))
181 if p > 0:
182 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
183 mapping[p-1][1][:len(w)] == w:
184 return True
185 if p < len(mapping):
186 return mapping[p][0] == w[:len(mapping[p][0])] and \
187 mapping[p][1][:len(w)] == w
188 return False
189
190 def get_tokens_unprocessed(self, text):
191 # TODO: builtins are only subsequent tokens on lines
192 # and 'keywords' only happen at the beginning except
193 # for :au ones
194 for index, token, value in \
195 RegexLexer.get_tokens_unprocessed(self, text):
196 if token is Name.Other:
197 if self.is_in(value, self._cmd):
198 yield index, Keyword, value
199 elif self.is_in(value, self._opt) or \
200 self.is_in(value, self._aut):
201 yield index, Name.Builtin, value
202 else:
203 yield index, Text, value
204 else:
205 yield index, token, value