Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textedit.py: 61%
51 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.textedit
3 ~~~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for languages related to text processing.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
12from bisect import bisect
14from pygments.lexer import RegexLexer, bygroups, default, include, this, using
15from pygments.lexers.python import PythonLexer
16from pygments.token import Comment, Keyword, Name, Number, Operator, \
17 Punctuation, String, Text, Whitespace
19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer']
22class AwkLexer(RegexLexer):
23 """
24 For Awk scripts.
26 .. versionadded:: 1.5
27 """
29 name = 'Awk'
30 aliases = ['awk', 'gawk', 'mawk', 'nawk']
31 filenames = ['*.awk']
32 mimetypes = ['application/x-awk']
34 tokens = {
35 'commentsandwhitespace': [
36 (r'\s+', Text),
37 (r'#.*$', Comment.Single)
38 ],
39 'slashstartsregex': [
40 include('commentsandwhitespace'),
41 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
42 r'\B', String.Regex, '#pop'),
43 (r'(?=/)', Text, ('#pop', 'badregex')),
44 default('#pop')
45 ],
46 'badregex': [
47 (r'\n', Text, '#pop')
48 ],
49 'root': [
50 (r'^(?=\s|/)', Text, 'slashstartsregex'),
51 include('commentsandwhitespace'),
52 (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
53 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
54 (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
55 (r'[})\].]', Punctuation),
56 (r'(break|continue|do|while|exit|for|if|else|'
57 r'return)\b', Keyword, 'slashstartsregex'),
58 (r'function\b', Keyword.Declaration, 'slashstartsregex'),
59 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
60 r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
61 r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
62 r'delete|system)\b', Keyword.Reserved),
63 (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
64 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
65 r'RSTART|RT|SUBSEP)\b', Name.Builtin),
66 (r'[$a-zA-Z_]\w*', Name.Other),
67 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
68 (r'0x[0-9a-fA-F]+', Number.Hex),
69 (r'[0-9]+', Number.Integer),
70 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
71 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
72 ]
73 }
76class SedLexer(RegexLexer):
77 """
78 Lexer for Sed script files.
79 """
80 name = 'Sed'
81 aliases = ['sed', 'gsed', 'ssed']
82 filenames = ['*.sed', '*.[gs]sed']
83 mimetypes = ['text/x-sed']
84 flags = re.MULTILINE
86 # Match the contents within delimiters such as /<contents>/
87 _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)'
89 tokens = {
90 'root': [
91 (r'\s+', Whitespace),
92 (r'#.*$', Comment.Single),
93 (r'[0-9]+', Number.Integer),
94 (r'\$', Operator),
95 (r'[{};,!]', Punctuation),
96 (r'[dDFgGhHlnNpPqQxz=]', Keyword),
97 (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)),
98 (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)),
99 (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)),
100 (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)),
101 (r'(\\(.))' + _inside_delims + r'(\2)',
102 bygroups(Punctuation, None, String.Regex, Punctuation)),
103 (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)',
104 bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)),
105 (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)',
106 bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation,
107 Keyword))
108 ]
109 }
111class VimLexer(RegexLexer):
112 """
113 Lexer for VimL script files.
115 .. versionadded:: 0.8
116 """
117 name = 'VimL'
118 aliases = ['vim']
119 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
120 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
121 mimetypes = ['text/x-vim']
122 flags = re.MULTILINE
124 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
126 tokens = {
127 'root': [
128 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
129 bygroups(using(this), Keyword, Text, Operator, Text, Text,
130 using(PythonLexer), Text)),
131 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
132 bygroups(using(this), Keyword, Text, using(PythonLexer))),
134 (r'^\s*".*', Comment),
136 (r'[ \t]+', Text),
137 # TODO: regexes can have other delims
138 (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),
139 (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),
140 (r"'[^\n']*(?:''[^\n']*)*'", String.Single),
142 # Who decided that doublequote was a good comment character??
143 (r'(?<=\s)"[^\-:.%#=*].*', Comment),
144 (r'-?\d+', Number),
145 (r'#[0-9a-f]{6}', Number.Hex),
146 (r'^:', Punctuation),
147 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
148 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
149 Keyword),
150 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
151 (r'\b\w+\b', Name.Other), # These are postprocessed below
152 (r'.', Text),
153 ],
154 }
156 def __init__(self, **options):
157 from pygments.lexers._vim_builtins import auto, command, option
158 self._cmd = command
159 self._opt = option
160 self._aut = auto
162 RegexLexer.__init__(self, **options)
164 def is_in(self, w, mapping):
165 r"""
166 It's kind of difficult to decide if something might be a keyword
167 in VimL because it allows you to abbreviate them. In fact,
168 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
169 valid ways to call it so rather than making really awful regexps
170 like::
172 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
174 we match `\b\w+\b` and then call is_in() on those tokens. See
175 `scripts/get_vimkw.py` for how the lists are extracted.
176 """
177 p = bisect(mapping, (w,))
178 if p > 0:
179 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
180 mapping[p-1][1][:len(w)] == w:
181 return True
182 if p < len(mapping):
183 return mapping[p][0] == w[:len(mapping[p][0])] and \
184 mapping[p][1][:len(w)] == w
185 return False
187 def get_tokens_unprocessed(self, text):
188 # TODO: builtins are only subsequent tokens on lines
189 # and 'keywords' only happen at the beginning except
190 # for :au ones
191 for index, token, value in \
192 RegexLexer.get_tokens_unprocessed(self, text):
193 if token is Name.Other:
194 if self.is_in(value, self._cmd):
195 yield index, Keyword, value
196 elif self.is_in(value, self._opt) or \
197 self.is_in(value, self._aut):
198 yield index, Name.Builtin, value
199 else:
200 yield index, Text, value
201 else:
202 yield index, token, value