Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/util.py: 48%

1"""

2 pygments.util

3 ~~~~~~~~~~~~~

5 Utility functions.

8 :license: BSD, see LICENSE for details.

9"""

11import re

12from io import TextIOWrapper

13import html

16split_path_re = re.compile(r'[/\\ ]')

17doctype_lookup_re = re.compile(r'''

18 <!DOCTYPE\s+(

19 [a-zA-Z_][a-zA-Z0-9]*

20 (?: \s+ # optional in HTML5

21 [a-zA-Z_][a-zA-Z0-9]*\s+

22 "[^"]*")?

23 )

24 [^>]*>

25''', re.DOTALL | re.MULTILINE | re.VERBOSE)

26tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',

27 re.IGNORECASE | re.DOTALL | re.MULTILINE)

28xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)

31class ClassNotFound(ValueError):

32 """Raised if one of the lookup functions didn't find a matching class."""

35class OptionError(Exception):

36 """

37 This exception will be raised by all option processing functions if

38 the type or value of the argument is not correct.

39 """

41def get_choice_opt(options, optname, allowed, default=None, normcase=False):

42 """

43 If the key `optname` from the dictionary is not in the sequence

44 `allowed`, raise an error, otherwise return it.

45 """

46 string = options.get(optname, default)

47 if normcase:

48 string = string.lower()

49 if string not in allowed:

50 raise OptionError('Value for option {} must be one of {}'.format(optname, ', '.join(map(str, allowed))))

51 return string

54def get_bool_opt(options, optname, default=None):

55 """

56 Intuitively, this is `options.get(optname, default)`, but restricted to

57 Boolean value. The Booleans can be represented as string, in order to accept

58 Boolean value from the command line arguments. If the key `optname` is

59 present in the dictionary `options` and is not associated with a Boolean,

60 raise an `OptionError`. If it is absent, `default` is returned instead.

62 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and

63 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``

64 (matched case-insensitively).

65 """

66 string = options.get(optname, default)

67 if isinstance(string, bool):

68 return string

69 elif isinstance(string, int):

70 return bool(string)

71 elif not isinstance(string, str):

72 raise OptionError(f'Invalid type {string!r} for option {optname}; use '

73 '1/0, yes/no, true/false, on/off')

74 elif string.lower() in ('1', 'yes', 'true', 'on'):

75 return True

76 elif string.lower() in ('0', 'no', 'false', 'off'):

77 return False

78 else:

79 raise OptionError(f'Invalid value {string!r} for option {optname}; use '

80 '1/0, yes/no, true/false, on/off')

83def get_int_opt(options, optname, default=None):

84 """As :func:`get_bool_opt`, but interpret the value as an integer."""

85 string = options.get(optname, default)

86 try:

87 return int(string)

88 except TypeError:

89 raise OptionError(f'Invalid type {string!r} for option {optname}; you '

90 'must give an integer value')

91 except ValueError:

92 raise OptionError(f'Invalid value {string!r} for option {optname}; you '

93 'must give an integer value')

95def get_list_opt(options, optname, default=None):

96 """

97 If the key `optname` from the dictionary `options` is a string,

98 split it at whitespace and return it. If it is already a list

99 or a tuple, it is returned as a list.

100 """

101 val = options.get(optname, default)

102 if isinstance(val, str):

103 return val.split()

104 elif isinstance(val, (list, tuple)):

105 return list(val)

106 else:

107 raise OptionError(f'Invalid type {val!r} for option {optname}; you '

108 'must give a list value')

109

110

111def docstring_headline(obj):

112 if not obj.__doc__:

113 return ''

114 res = []

115 for line in obj.__doc__.strip().splitlines():

116 if line.strip():

117 res.append(" " + line.strip())

118 else:

119 break

120 return ''.join(res).lstrip()

121

122

123def make_analysator(f):

124 """Return a static text analyser function that returns float values."""

125 def text_analyse(text):

126 try:

127 rv = f(text)

128 except Exception:

129 return 0.0

130 if not rv:

131 return 0.0

132 try:

133 return min(1.0, max(0.0, float(rv)))

134 except (ValueError, TypeError):

135 return 0.0

136 text_analyse.__doc__ = f.__doc__

137 return staticmethod(text_analyse)

138

139

140def shebang_matches(text, regex):

141 r"""Check if the given regular expression matches the last part of the

142 shebang if one exists.

143

144 >>> from pygments.util import shebang_matches

145 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')

146 True

147 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')

148 True

149 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')

150 False

151 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')

152 False

153 >>> shebang_matches('#!/usr/bin/startsomethingwith python',

154 ... r'python(2\.\d)?')

155 True

156

157 It also checks for common windows executable file extensions::

158

159 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')

160 True

161

162 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does

163 the same as ``'perl -e'``)

164

165 Note that this method automatically searches the whole string (eg:

166 the regular expression is wrapped in ``'^$'``)

167 """

168 index = text.find('\n')

169 if index >= 0:

170 first_line = text[:index].lower()

171 else:

172 first_line = text.lower()

173 if first_line.startswith('#!'):

174 try:

175 found = [x for x in split_path_re.split(first_line[2:].strip())

176 if x and not x.startswith('-')][-1]

177 except IndexError:

178 return False

179 regex = re.compile(rf'^{regex}(\.(exe|cmd|bat|bin))?$', re.IGNORECASE)

180 if regex.search(found) is not None:

181 return True

182 return False

183

184

185def doctype_matches(text, regex):

186 """Check if the doctype matches a regular expression (if present).

187

188 Note that this method only checks the first part of a DOCTYPE.

189 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'

190 """

191 m = doctype_lookup_re.search(text)

192 if m is None:

193 return False

194 doctype = m.group(1)

195 return re.compile(regex, re.I).match(doctype.strip()) is not None

196

197

198def html_doctype_matches(text):

199 """Check if the file looks like it has a html doctype."""

200 return doctype_matches(text, r'html')

201

202

203_looks_like_xml_cache = {}

204

205

206def looks_like_xml(text):

207 """Check if a doctype exists or if we have some tags."""

208 if xml_decl_re.match(text):

209 return True

210 key = hash(text)

211 try:

212 return _looks_like_xml_cache[key]

213 except KeyError:

214 m = doctype_lookup_re.search(text)

215 if m is not None:

216 return True

217 rv = tag_re.search(text[:1000]) is not None

218 _looks_like_xml_cache[key] = rv

219 return rv

220

221

222def surrogatepair(c):

223 """Given a unicode character code with length greater than 16 bits,

224 return the two 16 bit surrogate pair.

225 """

226 # From example D28 of:

227 # http://www.unicode.org/book/ch03.pdf

228 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))

229

230

231def format_lines(var_name, seq, raw=False, indent_level=0):

232 """Formats a sequence of strings for output."""

233 lines = []

234 base_indent = ' ' * indent_level * 4

235 inner_indent = ' ' * (indent_level + 1) * 4

236 lines.append(base_indent + var_name + ' = (')

237 if raw:

238 # These should be preformatted reprs of, say, tuples.

239 for i in seq:

240 lines.append(inner_indent + i + ',')

241 else:

242 for i in seq:

243 # Force use of single quotes

244 r = repr(i + '"')

245 lines.append(inner_indent + r[:-2] + r[-1] + ',')

246 lines.append(base_indent + ')')

247 return '\n'.join(lines)

248

249

250def duplicates_removed(it, already_seen=()):

251 """

252 Returns a list with duplicates removed from the iterable `it`.

253

254 Order is preserved.

255 """

256 lst = []

257 seen = set()

258 for i in it:

259 if i in seen or i in already_seen:

260 continue

261 lst.append(i)

262 seen.add(i)

263 return lst

264

265

266class Future:

267 """Generic class to defer some work.

268

269 Handled specially in RegexLexerMeta, to support regex string construction at

270 first use.

271 """

272 def get(self):

273 raise NotImplementedError

274

275

276def guess_decode(text):

277 """Decode *text* with guessed encoding.

278

279 First try UTF-8; this should fail for non-UTF-8 encodings.

280 Then try the preferred locale encoding.

281 Fall back to latin-1, which always works.

282 """

283 try:

284 text = text.decode('utf-8')

285 return text, 'utf-8'

286 except UnicodeDecodeError:

287 try:

288 import locale

289 prefencoding = locale.getpreferredencoding()

290 text = text.decode(prefencoding)

291 return text, prefencoding

292 except (UnicodeDecodeError, LookupError):

293 text = text.decode('latin1')

294 return text, 'latin1'

295

296

297def guess_decode_from_terminal(text, term):

298 """Decode *text* coming from terminal *term*.

299

300 First try the terminal encoding, if given.

301 Then try UTF-8. Then try the preferred locale encoding.

302 Fall back to latin-1, which always works.

303 """

304 if getattr(term, 'encoding', None):

305 try:

306 text = text.decode(term.encoding)

307 except UnicodeDecodeError:

308 pass

309 else:

310 return text, term.encoding

311 return guess_decode(text)

312

313

314def terminal_encoding(term):

315 """Return our best guess of encoding for the given *term*."""

316 if getattr(term, 'encoding', None):

317 return term.encoding

318 import locale

319 return locale.getpreferredencoding()

320

321

322class UnclosingTextIOWrapper(TextIOWrapper):

323 # Don't close underlying buffer on destruction.

324 def close(self):

325 self.flush()

326

327def html_escape(string, quote=True):

328 """Return a safe version of the passed `string`,

329 and an empty string if `None`.

330

331 `NoneType` is not supported by `html.escape`, as `html.escape`

332 uses the built-in `replace` function on `string`, so we need to

333 check for it first.

334

335 Optional flag quote is true by default, which also escapes

336 double and single quotes.

337 See https://docs.python.org/3/library/html.html#html.escape for more details.

338 """

339 if string is not None:

340 return html.escape(string, quote=quote)

341 return ''