Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/util.py: 20%

1"""

2 pygments.util

3 ~~~~~~~~~~~~~

5 Utility functions.

8 :license: BSD, see LICENSE for details.

9"""

11import re

12from io import TextIOWrapper

15split_path_re = re.compile(r'[/\\ ]')

16doctype_lookup_re = re.compile(r'''

17 <!DOCTYPE\s+(

18 [a-zA-Z_][a-zA-Z0-9]*

19 (?: \s+ # optional in HTML5

20 [a-zA-Z_][a-zA-Z0-9]*\s+

21 "[^"]*")?

22 )

23 [^>]*>

24''', re.DOTALL | re.MULTILINE | re.VERBOSE)

25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',

26 re.IGNORECASE | re.DOTALL | re.MULTILINE)

27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)

30class ClassNotFound(ValueError):

31 """Raised if one of the lookup functions didn't find a matching class."""

34class OptionError(Exception):

35 """

36 This exception will be raised by all option processing functions if

37 the type or value of the argument is not correct.

38 """

40def get_choice_opt(options, optname, allowed, default=None, normcase=False):

41 """

42 If the key `optname` from the dictionary is not in the sequence

43 `allowed`, raise an error, otherwise return it.

44 """

45 string = options.get(optname, default)

46 if normcase:

47 string = string.lower()

48 if string not in allowed:

49 raise OptionError('Value for option %s must be one of %s' %

50 (optname, ', '.join(map(str, allowed))))

51 return string

54def get_bool_opt(options, optname, default=None):

55 """

56 Intuitively, this is `options.get(optname, default)`, but restricted to

57 Boolean value. The Booleans can be represented as string, in order to accept

58 Boolean value from the command line arguments. If the key `optname` is

59 present in the dictionary `options` and is not associated with a Boolean,

60 raise an `OptionError`. If it is absent, `default` is returned instead.

62 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and

63 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``

64 (matched case-insensitively).

65 """

66 string = options.get(optname, default)

67 if isinstance(string, bool):

68 return string

69 elif isinstance(string, int):

70 return bool(string)

71 elif not isinstance(string, str):

72 raise OptionError('Invalid type %r for option %s; use '

73 '1/0, yes/no, true/false, on/off' % (

74 string, optname))

75 elif string.lower() in ('1', 'yes', 'true', 'on'):

76 return True

77 elif string.lower() in ('0', 'no', 'false', 'off'):

78 return False

79 else:

80 raise OptionError('Invalid value %r for option %s; use '

81 '1/0, yes/no, true/false, on/off' % (

82 string, optname))

85def get_int_opt(options, optname, default=None):

86 """As :func:`get_bool_opt`, but interpret the value as an integer."""

87 string = options.get(optname, default)

88 try:

89 return int(string)

90 except TypeError:

91 raise OptionError('Invalid type %r for option %s; you '

92 'must give an integer value' % (

93 string, optname))

94 except ValueError:

95 raise OptionError('Invalid value %r for option %s; you '

96 'must give an integer value' % (

97 string, optname))

99def get_list_opt(options, optname, default=None):

100 """

101 If the key `optname` from the dictionary `options` is a string,

102 split it at whitespace and return it. If it is already a list

103 or a tuple, it is returned as a list.

104 """

105 val = options.get(optname, default)

106 if isinstance(val, str):

107 return val.split()

108 elif isinstance(val, (list, tuple)):

109 return list(val)

110 else:

111 raise OptionError('Invalid type %r for option %s; you '

112 'must give a list value' % (

113 val, optname))

114

115

116def docstring_headline(obj):

117 if not obj.__doc__:

118 return ''

119 res = []

120 for line in obj.__doc__.strip().splitlines():

121 if line.strip():

122 res.append(" " + line.strip())

123 else:

124 break

125 return ''.join(res).lstrip()

126

127

128def make_analysator(f):

129 """Return a static text analyser function that returns float values."""

130 def text_analyse(text):

131 try:

132 rv = f(text)

133 except Exception:

134 return 0.0

135 if not rv:

136 return 0.0

137 try:

138 return min(1.0, max(0.0, float(rv)))

139 except (ValueError, TypeError):

140 return 0.0

141 text_analyse.__doc__ = f.__doc__

142 return staticmethod(text_analyse)

143

144

145def shebang_matches(text, regex):

146 r"""Check if the given regular expression matches the last part of the

147 shebang if one exists.

148

149 >>> from pygments.util import shebang_matches

150 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')

151 True

152 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')

153 True

154 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')

155 False

156 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')

157 False

158 >>> shebang_matches('#!/usr/bin/startsomethingwith python',

159 ... r'python(2\.\d)?')

160 True

161

162 It also checks for common windows executable file extensions::

163

164 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')

165 True

166

167 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does

168 the same as ``'perl -e'``)

169

170 Note that this method automatically searches the whole string (eg:

171 the regular expression is wrapped in ``'^$'``)

172 """

173 index = text.find('\n')

174 if index >= 0:

175 first_line = text[:index].lower()

176 else:

177 first_line = text.lower()

178 if first_line.startswith('#!'):

179 try:

180 found = [x for x in split_path_re.split(first_line[2:].strip())

181 if x and not x.startswith('-')][-1]

182 except IndexError:

183 return False

184 regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)

185 if regex.search(found) is not None:

186 return True

187 return False

188

189

190def doctype_matches(text, regex):

191 """Check if the doctype matches a regular expression (if present).

192

193 Note that this method only checks the first part of a DOCTYPE.

194 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'

195 """

196 m = doctype_lookup_re.search(text)

197 if m is None:

198 return False

199 doctype = m.group(1)

200 return re.compile(regex, re.I).match(doctype.strip()) is not None

201

202

203def html_doctype_matches(text):

204 """Check if the file looks like it has a html doctype."""

205 return doctype_matches(text, r'html')

206

207

208_looks_like_xml_cache = {}

209

210

211def looks_like_xml(text):

212 """Check if a doctype exists or if we have some tags."""

213 if xml_decl_re.match(text):

214 return True

215 key = hash(text)

216 try:

217 return _looks_like_xml_cache[key]

218 except KeyError:

219 m = doctype_lookup_re.search(text)

220 if m is not None:

221 return True

222 rv = tag_re.search(text[:1000]) is not None

223 _looks_like_xml_cache[key] = rv

224 return rv

225

226

227def surrogatepair(c):

228 """Given a unicode character code with length greater than 16 bits,

229 return the two 16 bit surrogate pair.

230 """

231 # From example D28 of:

232 # http://www.unicode.org/book/ch03.pdf

233 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))

234

235

236def format_lines(var_name, seq, raw=False, indent_level=0):

237 """Formats a sequence of strings for output."""

238 lines = []

239 base_indent = ' ' * indent_level * 4

240 inner_indent = ' ' * (indent_level + 1) * 4

241 lines.append(base_indent + var_name + ' = (')

242 if raw:

243 # These should be preformatted reprs of, say, tuples.

244 for i in seq:

245 lines.append(inner_indent + i + ',')

246 else:

247 for i in seq:

248 # Force use of single quotes

249 r = repr(i + '"')

250 lines.append(inner_indent + r[:-2] + r[-1] + ',')

251 lines.append(base_indent + ')')

252 return '\n'.join(lines)

253

254

255def duplicates_removed(it, already_seen=()):

256 """

257 Returns a list with duplicates removed from the iterable `it`.

258

259 Order is preserved.

260 """

261 lst = []

262 seen = set()

263 for i in it:

264 if i in seen or i in already_seen:

265 continue

266 lst.append(i)

267 seen.add(i)

268 return lst

269

270

271class Future:

272 """Generic class to defer some work.

273

274 Handled specially in RegexLexerMeta, to support regex string construction at

275 first use.

276 """

277 def get(self):

278 raise NotImplementedError

279

280

281def guess_decode(text):

282 """Decode *text* with guessed encoding.

283

284 First try UTF-8; this should fail for non-UTF-8 encodings.

285 Then try the preferred locale encoding.

286 Fall back to latin-1, which always works.

287 """

288 try:

289 text = text.decode('utf-8')

290 return text, 'utf-8'

291 except UnicodeDecodeError:

292 try:

293 import locale

294 prefencoding = locale.getpreferredencoding()

295 text = text.decode()

296 return text, prefencoding

297 except (UnicodeDecodeError, LookupError):

298 text = text.decode('latin1')

299 return text, 'latin1'

300

301

302def guess_decode_from_terminal(text, term):

303 """Decode *text* coming from terminal *term*.

304

305 First try the terminal encoding, if given.

306 Then try UTF-8. Then try the preferred locale encoding.

307 Fall back to latin-1, which always works.

308 """

309 if getattr(term, 'encoding', None):

310 try:

311 text = text.decode(term.encoding)

312 except UnicodeDecodeError:

313 pass

314 else:

315 return text, term.encoding

316 return guess_decode(text)

317

318

319def terminal_encoding(term):

320 """Return our best guess of encoding for the given *term*."""

321 if getattr(term, 'encoding', None):

322 return term.encoding

323 import locale

324 return locale.getpreferredencoding()

325

326

327class UnclosingTextIOWrapper(TextIOWrapper):

328 # Don't close underlying buffer on destruction.

329 def close(self):

330 self.flush()