Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textedit.py: 61%

51 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.textedit 

3 ~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for languages related to text processing. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from bisect import bisect 

13 

14from pygments.lexer import RegexLexer, bygroups, default, include, this, using 

15from pygments.lexers.python import PythonLexer 

16from pygments.token import Comment, Keyword, Name, Number, Operator, \ 

17 Punctuation, String, Text, Whitespace 

18 

19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer'] 

20 

21 

22class AwkLexer(RegexLexer): 

23 """ 

24 For Awk scripts. 

25 

26 .. versionadded:: 1.5 

27 """ 

28 

29 name = 'Awk' 

30 aliases = ['awk', 'gawk', 'mawk', 'nawk'] 

31 filenames = ['*.awk'] 

32 mimetypes = ['application/x-awk'] 

33 

34 tokens = { 

35 'commentsandwhitespace': [ 

36 (r'\s+', Text), 

37 (r'#.*$', Comment.Single) 

38 ], 

39 'slashstartsregex': [ 

40 include('commentsandwhitespace'), 

41 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' 

42 r'\B', String.Regex, '#pop'), 

43 (r'(?=/)', Text, ('#pop', 'badregex')), 

44 default('#pop') 

45 ], 

46 'badregex': [ 

47 (r'\n', Text, '#pop') 

48 ], 

49 'root': [ 

50 (r'^(?=\s|/)', Text, 'slashstartsregex'), 

51 include('commentsandwhitespace'), 

52 (r'\+\+|--|\|\||&&|in\b|\$|!?~|' 

53 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'), 

54 (r'[{(\[;,]', Punctuation, 'slashstartsregex'), 

55 (r'[})\].]', Punctuation), 

56 (r'(break|continue|do|while|exit|for|if|else|' 

57 r'return)\b', Keyword, 'slashstartsregex'), 

58 (r'function\b', Keyword.Declaration, 'slashstartsregex'), 

59 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' 

60 r'length|match|split|sprintf|sub|substr|tolower|toupper|close|' 

61 r'fflush|getline|next|nextfile|print|printf|strftime|systime|' 

62 r'delete|system)\b', Keyword.Reserved), 

63 (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|' 

64 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|' 

65 r'RSTART|RT|SUBSEP)\b', Name.Builtin), 

66 (r'[$a-zA-Z_]\w*', Name.Other), 

67 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), 

68 (r'0x[0-9a-fA-F]+', Number.Hex), 

69 (r'[0-9]+', Number.Integer), 

70 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), 

71 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), 

72 ] 

73 } 

74 

75 

76class SedLexer(RegexLexer): 

77 """ 

78 Lexer for Sed script files. 

79 """ 

80 name = 'Sed' 

81 aliases = ['sed', 'gsed', 'ssed'] 

82 filenames = ['*.sed', '*.[gs]sed'] 

83 mimetypes = ['text/x-sed'] 

84 flags = re.MULTILINE 

85 

86 # Match the contents within delimiters such as /<contents>/ 

87 _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)' 

88 

89 tokens = { 

90 'root': [ 

91 (r'\s+', Whitespace), 

92 (r'#.*$', Comment.Single), 

93 (r'[0-9]+', Number.Integer), 

94 (r'\$', Operator), 

95 (r'[{};,!]', Punctuation), 

96 (r'[dDFgGhHlnNpPqQxz=]', Keyword), 

97 (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)), 

98 (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)), 

99 (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)), 

100 (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)), 

101 (r'(\\(.))' + _inside_delims + r'(\2)', 

102 bygroups(Punctuation, None, String.Regex, Punctuation)), 

103 (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)', 

104 bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)), 

105 (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)', 

106 bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation, 

107 Keyword)) 

108 ] 

109 } 

110 

111class VimLexer(RegexLexer): 

112 """ 

113 Lexer for VimL script files. 

114 

115 .. versionadded:: 0.8 

116 """ 

117 name = 'VimL' 

118 aliases = ['vim'] 

119 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', 

120 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] 

121 mimetypes = ['text/x-vim'] 

122 flags = re.MULTILINE 

123 

124 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' 

125 

126 tokens = { 

127 'root': [ 

128 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', 

129 bygroups(using(this), Keyword, Text, Operator, Text, Text, 

130 using(PythonLexer), Text)), 

131 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', 

132 bygroups(using(this), Keyword, Text, using(PythonLexer))), 

133 

134 (r'^\s*".*', Comment), 

135 

136 (r'[ \t]+', Text), 

137 # TODO: regexes can have other delims 

138 (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex), 

139 (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double), 

140 (r"'[^\n']*(?:''[^\n']*)*'", String.Single), 

141 

142 # Who decided that doublequote was a good comment character?? 

143 (r'(?<=\s)"[^\-:.%#=*].*', Comment), 

144 (r'-?\d+', Number), 

145 (r'#[0-9a-f]{6}', Number.Hex), 

146 (r'^:', Punctuation), 

147 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. 

148 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', 

149 Keyword), 

150 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), 

151 (r'\b\w+\b', Name.Other), # These are postprocessed below 

152 (r'.', Text), 

153 ], 

154 } 

155 

156 def __init__(self, **options): 

157 from pygments.lexers._vim_builtins import auto, command, option 

158 self._cmd = command 

159 self._opt = option 

160 self._aut = auto 

161 

162 RegexLexer.__init__(self, **options) 

163 

164 def is_in(self, w, mapping): 

165 r""" 

166 It's kind of difficult to decide if something might be a keyword 

167 in VimL because it allows you to abbreviate them. In fact, 

168 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are 

169 valid ways to call it so rather than making really awful regexps 

170 like:: 

171 

172 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b 

173 

174 we match `\b\w+\b` and then call is_in() on those tokens. See 

175 `scripts/get_vimkw.py` for how the lists are extracted. 

176 """ 

177 p = bisect(mapping, (w,)) 

178 if p > 0: 

179 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ 

180 mapping[p-1][1][:len(w)] == w: 

181 return True 

182 if p < len(mapping): 

183 return mapping[p][0] == w[:len(mapping[p][0])] and \ 

184 mapping[p][1][:len(w)] == w 

185 return False 

186 

187 def get_tokens_unprocessed(self, text): 

188 # TODO: builtins are only subsequent tokens on lines 

189 # and 'keywords' only happen at the beginning except 

190 # for :au ones 

191 for index, token, value in \ 

192 RegexLexer.get_tokens_unprocessed(self, text): 

193 if token is Name.Other: 

194 if self.is_in(value, self._cmd): 

195 yield index, Keyword, value 

196 elif self.is_in(value, self._opt) or \ 

197 self.is_in(value, self._aut): 

198 yield index, Name.Builtin, value 

199 else: 

200 yield index, Text, value 

201 else: 

202 yield index, token, value