Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/textedit.py: 66%

1"""

2 pygments.lexers.textedit

3 ~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for languages related to text processing.

8 :license: BSD, see LICENSE for details.

9"""

11import re

12from bisect import bisect

14from pygments.lexer import RegexLexer, bygroups, default, include, this, using

15from pygments.lexers.python import PythonLexer

16from pygments.token import Comment, Keyword, Name, Number, Operator, \

17 Punctuation, String, Text, Whitespace

19__all__ = ['AwkLexer', 'SedLexer', 'VimLexer']

22class AwkLexer(RegexLexer):

23 """

24 For Awk scripts.

25 """

27 name = 'Awk'

28 aliases = ['awk', 'gawk', 'mawk', 'nawk']

29 filenames = ['*.awk']

30 mimetypes = ['application/x-awk']

31 url = 'https://en.wikipedia.org/wiki/AWK'

32 version_added = '1.5'

34 tokens = {

35 'commentsandwhitespace': [

36 (r'\s+', Text),

37 (r'#.*$', Comment.Single)

38 ],

39 'slashstartsregex': [

40 include('commentsandwhitespace'),

41 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'

42 r'\B', String.Regex, '#pop'),

43 (r'(?=/)', Text, ('#pop', 'badregex')),

44 default('#pop')

45 ],

46 'badregex': [

47 (r'\n', Text, '#pop')

48 ],

49 'root': [

50 (r'^(?=\s|/)', Text, 'slashstartsregex'),

51 include('commentsandwhitespace'),

52 (r'\+\+|--|\|\||&&|in\b|\$|!?~|\?|:|'

53 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),

54 (r'[{(\[;,]', Punctuation, 'slashstartsregex'),

55 (r'[})\].]', Punctuation),

56 (r'(break|continue|do|while|exit|for|if|else|'

57 r'return)\b', Keyword, 'slashstartsregex'),

58 (r'function\b', Keyword.Declaration, 'slashstartsregex'),

59 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'

62 r'delete|system)\b', Keyword.Reserved),

64 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'

65 r'RSTART|RT|SUBSEP)\b', Name.Builtin),

66 (r'[$a-zA-Z_]\w*', Name.Other),

67 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),

68 (r'0x[0-9a-fA-F]+', Number.Hex),

69 (r'[0-9]+', Number.Integer),

70 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),

71 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),

72 ]

73 }

76class SedLexer(RegexLexer):

77 """

78 Lexer for Sed script files.

79 """

80 name = 'Sed'

81 aliases = ['sed', 'gsed', 'ssed']

82 filenames = ['*.sed', '*.[gs]sed']

83 mimetypes = ['text/x-sed']

84 url = 'https://en.wikipedia.org/wiki/Sed'

85 version_added = ''

86 flags = re.MULTILINE

88 # Match the contents within delimiters such as /<contents>/

89 _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)'

91 tokens = {

92 'root': [

93 (r'\s+', Whitespace),

94 (r'#.*$', Comment.Single),

95 (r'[0-9]+', Number.Integer),

96 (r'\$', Operator),

97 (r'[{};,!]', Punctuation),

98 (r'[dDFgGhHlnNpPqQxz=]', Keyword),

99 (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)),

100 (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)),

101 (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)),

102 (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)),

103 (r'(\\(.))' + _inside_delims + r'(\2)',

104 bygroups(Punctuation, None, String.Regex, Punctuation)),

105 (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)',

106 bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)),

107 (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)',

108 bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation,

109 Keyword))

110 ]

111 }

112

113class VimLexer(RegexLexer):

114 """

115 Lexer for VimL script files.

116 """

117 name = 'VimL'

118 aliases = ['vim']

119 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',

120 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']

121 mimetypes = ['text/x-vim']

122 url = 'https://www.vim.org'

123 version_added = '0.8'

124

125 flags = re.MULTILINE

126

127 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'

128

129 tokens = {

130 'root': [

131 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',

132 bygroups(using(this), Keyword, Text, Operator, Text, Text,

133 using(PythonLexer), Text)),

134 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',

135 bygroups(using(this), Keyword, Text, using(PythonLexer))),

136

137 (r'^\s*".*', Comment),

138

139 (r'[ \t]+', Text),

140 # TODO: regexes can have other delims

141 (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),

142 (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),

143 (r"'[^\n']*(?:''[^\n']*)*'", String.Single),

144

145 # Who decided that doublequote was a good comment character??

146 (r'(?<=\s)"[^\-:.%#=*].*', Comment),

147 (r'-?\d+', Number),

148 (r'#[0-9a-f]{6}', Number.Hex),

149 (r'^:', Punctuation),

150 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.

152 Keyword),

154 (r'\b\w+\b', Name.Other), # These are postprocessed below

155 (r'.', Text),

156 ],

157 }

158

159 def __init__(self, **options):

160 from pygments.lexers._vim_builtins import auto, command, option

161 self._cmd = command

162 self._opt = option

163 self._aut = auto

164

165 RegexLexer.__init__(self, **options)

166

167 def is_in(self, w, mapping):

168 r"""

169 It's kind of difficult to decide if something might be a keyword

170 in VimL because it allows you to abbreviate them. In fact,

171 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are

172 valid ways to call it so rather than making really awful regexps

173 like::

174

175 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b

176

177 we match `\b\w+\b` and then call is_in() on those tokens. See

178 `scripts/get_vimkw.py` for how the lists are extracted.

179 """

180 p = bisect(mapping, (w,))

181 if p > 0:

182 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \

183 mapping[p-1][1][:len(w)] == w:

184 return True

185 if p < len(mapping):

186 return mapping[p][0] == w[:len(mapping[p][0])] and \

187 mapping[p][1][:len(w)] == w

188 return False

189

190 def get_tokens_unprocessed(self, text):

191 # TODO: builtins are only subsequent tokens on lines

192 # and 'keywords' only happen at the beginning except

193 # for :au ones

194 for index, token, value in \

195 RegexLexer.get_tokens_unprocessed(self, text):

196 if token is Name.Other:

197 if self.is_in(value, self._cmd):

198 yield index, Keyword, value

199 elif self.is_in(value, self._opt) or \

200 self.is_in(value, self._aut):

201 yield index, Name.Builtin, value

202 else:

203 yield index, Text, value

204 else:

205 yield index, token, value