Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/textedit.py: 66%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

58 statements  

1""" 

2 pygments.lexers.textedit 

3 ~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for languages related to text processing. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from bisect import bisect 

13 

14from pygments.lexer import RegexLexer, bygroups, default, include, this, using 

15from pygments.lexers.python import PythonLexer 

16from pygments.token import Comment, Keyword, Name, Number, Operator, \ 

17 Punctuation, String, Text, Whitespace 

18 

19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer'] 

20 

21 

22class AwkLexer(RegexLexer): 

23 """ 

24 For Awk scripts. 

25 """ 

26 

27 name = 'Awk' 

28 aliases = ['awk', 'gawk', 'mawk', 'nawk'] 

29 filenames = ['*.awk'] 

30 mimetypes = ['application/x-awk'] 

31 url = 'https://en.wikipedia.org/wiki/AWK' 

32 version_added = '1.5' 

33 

34 tokens = { 

35 'commentsandwhitespace': [ 

36 (r'\s+', Text), 

37 (r'#.*$', Comment.Single) 

38 ], 

39 'slashstartsregex': [ 

40 include('commentsandwhitespace'), 

41 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' 

42 r'\B', String.Regex, '#pop'), 

43 (r'(?=/)', Text, ('#pop', 'badregex')), 

44 default('#pop') 

45 ], 

46 'badregex': [ 

47 (r'\n', Text, '#pop') 

48 ], 

49 'root': [ 

50 (r'^(?=\s|/)', Text, 'slashstartsregex'), 

51 include('commentsandwhitespace'), 

52 (r'\+\+|--|\|\||&&|in\b|\$|!?~|\?|:|' 

53 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'), 

54 (r'[{(\[;,]', Punctuation, 'slashstartsregex'), 

55 (r'[})\].]', Punctuation), 

56 (r'(break|continue|do|while|exit|for|if|else|' 

57 r'return)\b', Keyword, 'slashstartsregex'), 

58 (r'function\b', Keyword.Declaration, 'slashstartsregex'), 

59 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' 

60 r'length|match|split|sprintf|sub|substr|tolower|toupper|close|' 

61 r'fflush|getline|next|nextfile|print|printf|strftime|systime|' 

62 r'delete|system)\b', Keyword.Reserved), 

63 (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|' 

64 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|' 

65 r'RSTART|RT|SUBSEP)\b', Name.Builtin), 

66 (r'[$a-zA-Z_]\w*', Name.Other), 

67 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), 

68 (r'0x[0-9a-fA-F]+', Number.Hex), 

69 (r'[0-9]+', Number.Integer), 

70 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), 

71 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), 

72 ] 

73 } 

74 

75 

76class SedLexer(RegexLexer): 

77 """ 

78 Lexer for Sed script files. 

79 """ 

80 name = 'Sed' 

81 aliases = ['sed', 'gsed', 'ssed'] 

82 filenames = ['*.sed', '*.[gs]sed'] 

83 mimetypes = ['text/x-sed'] 

84 url = 'https://en.wikipedia.org/wiki/Sed' 

85 version_added = '' 

86 flags = re.MULTILINE 

87 

88 # Match the contents within delimiters such as /<contents>/ 

89 _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)' 

90 

91 tokens = { 

92 'root': [ 

93 (r'\s+', Whitespace), 

94 (r'#.*$', Comment.Single), 

95 (r'[0-9]+', Number.Integer), 

96 (r'\$', Operator), 

97 (r'[{};,!]', Punctuation), 

98 (r'[dDFgGhHlnNpPqQxz=]', Keyword), 

99 (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)), 

100 (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)), 

101 (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)), 

102 (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)), 

103 (r'(\\(.))' + _inside_delims + r'(\2)', 

104 bygroups(Punctuation, None, String.Regex, Punctuation)), 

105 (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)', 

106 bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)), 

107 (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)', 

108 bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation, 

109 Keyword)) 

110 ] 

111 } 

112 

113class VimLexer(RegexLexer): 

114 """ 

115 Lexer for VimL script files. 

116 """ 

117 name = 'VimL' 

118 aliases = ['vim'] 

119 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', 

120 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] 

121 mimetypes = ['text/x-vim'] 

122 url = 'https://www.vim.org' 

123 version_added = '0.8' 

124 

125 flags = re.MULTILINE 

126 

127 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' 

128 

129 tokens = { 

130 'root': [ 

131 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', 

132 bygroups(using(this), Keyword, Text, Operator, Text, Text, 

133 using(PythonLexer), Text)), 

134 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', 

135 bygroups(using(this), Keyword, Text, using(PythonLexer))), 

136 

137 (r'^\s*".*', Comment), 

138 

139 (r'[ \t]+', Text), 

140 # TODO: regexes can have other delims 

141 (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex), 

142 (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double), 

143 (r"'[^\n']*(?:''[^\n']*)*'", String.Single), 

144 

145 # Who decided that doublequote was a good comment character?? 

146 (r'(?<=\s)"[^\-:.%#=*].*', Comment), 

147 (r'-?\d+', Number), 

148 (r'#[0-9a-f]{6}', Number.Hex), 

149 (r'^:', Punctuation), 

150 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. 

151 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', 

152 Keyword), 

153 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), 

154 (r'\b\w+\b', Name.Other), # These are postprocessed below 

155 (r'.', Text), 

156 ], 

157 } 

158 

159 def __init__(self, **options): 

160 from pygments.lexers._vim_builtins import auto, command, option 

161 self._cmd = command 

162 self._opt = option 

163 self._aut = auto 

164 

165 RegexLexer.__init__(self, **options) 

166 

167 def is_in(self, w, mapping): 

168 r""" 

169 It's kind of difficult to decide if something might be a keyword 

170 in VimL because it allows you to abbreviate them. In fact, 

171 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are 

172 valid ways to call it so rather than making really awful regexps 

173 like:: 

174 

175 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b 

176 

177 we match `\b\w+\b` and then call is_in() on those tokens. See 

178 `scripts/get_vimkw.py` for how the lists are extracted. 

179 """ 

180 p = bisect(mapping, (w,)) 

181 if p > 0: 

182 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ 

183 mapping[p-1][1][:len(w)] == w: 

184 return True 

185 if p < len(mapping): 

186 return mapping[p][0] == w[:len(mapping[p][0])] and \ 

187 mapping[p][1][:len(w)] == w 

188 return False 

189 

190 def get_tokens_unprocessed(self, text): 

191 # TODO: builtins are only subsequent tokens on lines 

192 # and 'keywords' only happen at the beginning except 

193 # for :au ones 

194 for index, token, value in \ 

195 RegexLexer.get_tokens_unprocessed(self, text): 

196 if token is Name.Other: 

197 if self.is_in(value, self._cmd): 

198 yield index, Keyword, value 

199 elif self.is_in(value, self._opt) or \ 

200 self.is_in(value, self._aut): 

201 yield index, Name.Builtin, value 

202 else: 

203 yield index, Text, value 

204 else: 

205 yield index, token, value