Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textedit.py: 61%

1"""

2 pygments.lexers.textedit

3 ~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for languages related to text processing.

8 :license: BSD, see LICENSE for details.

9"""

11import re

12from bisect import bisect

14from pygments.lexer import RegexLexer, bygroups, default, include, this, using

15from pygments.lexers.python import PythonLexer

16from pygments.token import Comment, Keyword, Name, Number, Operator, \

17 Punctuation, String, Text, Whitespace

19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer']

22class AwkLexer(RegexLexer):

23 """

24 For Awk scripts.

26 .. versionadded:: 1.5

27 """

29 name = 'Awk'

30 aliases = ['awk', 'gawk', 'mawk', 'nawk']

31 filenames = ['*.awk']

32 mimetypes = ['application/x-awk']

34 tokens = {

35 'commentsandwhitespace': [

36 (r'\s+', Text),

37 (r'#.*$', Comment.Single)

38 ],

39 'slashstartsregex': [

40 include('commentsandwhitespace'),

41 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'

42 r'\B', String.Regex, '#pop'),

43 (r'(?=/)', Text, ('#pop', 'badregex')),

44 default('#pop')

45 ],

46 'badregex': [

47 (r'\n', Text, '#pop')

48 ],

49 'root': [

50 (r'^(?=\s|/)', Text, 'slashstartsregex'),

51 include('commentsandwhitespace'),

52 (r'\+\+|--|\|\||&&|in\b|\$|!?~|'

53 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),

54 (r'[{(\[;,]', Punctuation, 'slashstartsregex'),

55 (r'[})\].]', Punctuation),

56 (r'(break|continue|do|while|exit|for|if|else|'

57 r'return)\b', Keyword, 'slashstartsregex'),

58 (r'function\b', Keyword.Declaration, 'slashstartsregex'),

59 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'

62 r'delete|system)\b', Keyword.Reserved),

64 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'

65 r'RSTART|RT|SUBSEP)\b', Name.Builtin),

66 (r'[$a-zA-Z_]\w*', Name.Other),

67 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),

68 (r'0x[0-9a-fA-F]+', Number.Hex),

69 (r'[0-9]+', Number.Integer),

70 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),

71 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),

72 ]

73 }

76class SedLexer(RegexLexer):

77 """

78 Lexer for Sed script files.

79 """

80 name = 'Sed'

81 aliases = ['sed', 'gsed', 'ssed']

82 filenames = ['*.sed', '*.[gs]sed']

83 mimetypes = ['text/x-sed']

84 flags = re.MULTILINE

86 # Match the contents within delimiters such as /<contents>/

87 _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)'

89 tokens = {

90 'root': [

91 (r'\s+', Whitespace),

92 (r'#.*$', Comment.Single),

93 (r'[0-9]+', Number.Integer),

94 (r'\$', Operator),

95 (r'[{};,!]', Punctuation),

96 (r'[dDFgGhHlnNpPqQxz=]', Keyword),

97 (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)),

98 (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)),

99 (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)),

100 (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)),

101 (r'(\\(.))' + _inside_delims + r'(\2)',

102 bygroups(Punctuation, None, String.Regex, Punctuation)),

103 (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)',

104 bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)),

105 (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)',

106 bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation,

107 Keyword))

108 ]

109 }

110

111class VimLexer(RegexLexer):

112 """

113 Lexer for VimL script files.

114

115 .. versionadded:: 0.8

116 """

117 name = 'VimL'

118 aliases = ['vim']

119 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',

120 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']

121 mimetypes = ['text/x-vim']

122 flags = re.MULTILINE

123

124 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'

125

126 tokens = {

127 'root': [

128 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',

129 bygroups(using(this), Keyword, Text, Operator, Text, Text,

130 using(PythonLexer), Text)),

131 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',

132 bygroups(using(this), Keyword, Text, using(PythonLexer))),

133

134 (r'^\s*".*', Comment),

135

136 (r'[ \t]+', Text),

137 # TODO: regexes can have other delims

138 (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),

139 (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),

140 (r"'[^\n']*(?:''[^\n']*)*'", String.Single),

141

142 # Who decided that doublequote was a good comment character??

143 (r'(?<=\s)"[^\-:.%#=*].*', Comment),

144 (r'-?\d+', Number),

145 (r'#[0-9a-f]{6}', Number.Hex),

146 (r'^:', Punctuation),

147 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.

149 Keyword),

151 (r'\b\w+\b', Name.Other), # These are postprocessed below

152 (r'.', Text),

153 ],

154 }

155

156 def __init__(self, **options):

157 from pygments.lexers._vim_builtins import auto, command, option

158 self._cmd = command

159 self._opt = option

160 self._aut = auto

161

162 RegexLexer.__init__(self, **options)

163

164 def is_in(self, w, mapping):

165 r"""

166 It's kind of difficult to decide if something might be a keyword

167 in VimL because it allows you to abbreviate them. In fact,

168 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are

169 valid ways to call it so rather than making really awful regexps

170 like::

171

172 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b

173

174 we match `\b\w+\b` and then call is_in() on those tokens. See

175 `scripts/get_vimkw.py` for how the lists are extracted.

176 """

177 p = bisect(mapping, (w,))

178 if p > 0:

179 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \

180 mapping[p-1][1][:len(w)] == w:

181 return True

182 if p < len(mapping):

183 return mapping[p][0] == w[:len(mapping[p][0])] and \

184 mapping[p][1][:len(w)] == w

185 return False

186

187 def get_tokens_unprocessed(self, text):

188 # TODO: builtins are only subsequent tokens on lines

189 # and 'keywords' only happen at the beginning except

190 # for :au ones

191 for index, token, value in \

192 RegexLexer.get_tokens_unprocessed(self, text):

193 if token is Name.Other:

194 if self.is_in(value, self._cmd):

195 yield index, Keyword, value

196 elif self.is_in(value, self._opt) or \

197 self.is_in(value, self._aut):

198 yield index, Name.Builtin, value

199 else:

200 yield index, Text, value

201 else:

202 yield index, token, value