Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/util.py: 23%

1"""

2 pygments.util

3 ~~~~~~~~~~~~~

5 Utility functions.

8 :license: BSD, see LICENSE for details.

9"""

11import re

12from io import TextIOWrapper

15split_path_re = re.compile(r'[/\\ ]')

16doctype_lookup_re = re.compile(r'''

17 <!DOCTYPE\s+(

18 [a-zA-Z_][a-zA-Z0-9]*

19 (?: \s+ # optional in HTML5

20 [a-zA-Z_][a-zA-Z0-9]*\s+

21 "[^"]*")?

22 )

23 [^>]*>

24''', re.DOTALL | re.MULTILINE | re.VERBOSE)

25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',

26 re.IGNORECASE | re.DOTALL | re.MULTILINE)

27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)

30class ClassNotFound(ValueError):

31 """Raised if one of the lookup functions didn't find a matching class."""

34class OptionError(Exception):

35 """

36 This exception will be raised by all option processing functions if

37 the type or value of the argument is not correct.

38 """

40def get_choice_opt(options, optname, allowed, default=None, normcase=False):

41 """

42 If the key `optname` from the dictionary is not in the sequence

43 `allowed`, raise an error, otherwise return it.

44 """

45 string = options.get(optname, default)

46 if normcase:

47 string = string.lower()

48 if string not in allowed:

49 raise OptionError('Value for option {} must be one of {}'.format(optname, ', '.join(map(str, allowed))))

50 return string

53def get_bool_opt(options, optname, default=None):

54 """

55 Intuitively, this is `options.get(optname, default)`, but restricted to

56 Boolean value. The Booleans can be represented as string, in order to accept

57 Boolean value from the command line arguments. If the key `optname` is

58 present in the dictionary `options` and is not associated with a Boolean,

59 raise an `OptionError`. If it is absent, `default` is returned instead.

61 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and

62 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``

63 (matched case-insensitively).

64 """

65 string = options.get(optname, default)

66 if isinstance(string, bool):

67 return string

68 elif isinstance(string, int):

69 return bool(string)

70 elif not isinstance(string, str):

71 raise OptionError(f'Invalid type {string!r} for option {optname}; use '

72 '1/0, yes/no, true/false, on/off')

73 elif string.lower() in ('1', 'yes', 'true', 'on'):

74 return True

75 elif string.lower() in ('0', 'no', 'false', 'off'):

76 return False

77 else:

78 raise OptionError(f'Invalid value {string!r} for option {optname}; use '

79 '1/0, yes/no, true/false, on/off')

82def get_int_opt(options, optname, default=None):

83 """As :func:`get_bool_opt`, but interpret the value as an integer."""

84 string = options.get(optname, default)

85 try:

86 return int(string)

87 except TypeError:

88 raise OptionError(f'Invalid type {string!r} for option {optname}; you '

89 'must give an integer value')

90 except ValueError:

91 raise OptionError(f'Invalid value {string!r} for option {optname}; you '

92 'must give an integer value')

94def get_list_opt(options, optname, default=None):

95 """

96 If the key `optname` from the dictionary `options` is a string,

97 split it at whitespace and return it. If it is already a list

98 or a tuple, it is returned as a list.

99 """

100 val = options.get(optname, default)

101 if isinstance(val, str):

102 return val.split()

103 elif isinstance(val, (list, tuple)):

104 return list(val)

105 else:

106 raise OptionError(f'Invalid type {val!r} for option {optname}; you '

107 'must give a list value')

108

109

110def docstring_headline(obj):

111 if not obj.__doc__:

112 return ''

113 res = []

114 for line in obj.__doc__.strip().splitlines():

115 if line.strip():

116 res.append(" " + line.strip())

117 else:

118 break

119 return ''.join(res).lstrip()

120

121

122def make_analysator(f):

123 """Return a static text analyser function that returns float values."""

124 def text_analyse(text):

125 try:

126 rv = f(text)

127 except Exception:

128 return 0.0

129 if not rv:

130 return 0.0

131 try:

132 return min(1.0, max(0.0, float(rv)))

133 except (ValueError, TypeError):

134 return 0.0

135 text_analyse.__doc__ = f.__doc__

136 return staticmethod(text_analyse)

137

138

139def shebang_matches(text, regex):

140 r"""Check if the given regular expression matches the last part of the

141 shebang if one exists.

142

143 >>> from pygments.util import shebang_matches

144 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')

145 True

146 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')

147 True

148 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')

149 False

150 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')

151 False

152 >>> shebang_matches('#!/usr/bin/startsomethingwith python',

153 ... r'python(2\.\d)?')

154 True

155

156 It also checks for common windows executable file extensions::

157

158 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')

159 True

160

161 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does

162 the same as ``'perl -e'``)

163

164 Note that this method automatically searches the whole string (eg:

165 the regular expression is wrapped in ``'^$'``)

166 """

167 index = text.find('\n')

168 if index >= 0:

169 first_line = text[:index].lower()

170 else:

171 first_line = text.lower()

172 if first_line.startswith('#!'):

173 try:

174 found = [x for x in split_path_re.split(first_line[2:].strip())

175 if x and not x.startswith('-')][-1]

176 except IndexError:

177 return False

178 regex = re.compile(rf'^{regex}(\.(exe|cmd|bat|bin))?$', re.IGNORECASE)

179 if regex.search(found) is not None:

180 return True

181 return False

182

183

184def doctype_matches(text, regex):

185 """Check if the doctype matches a regular expression (if present).

186

187 Note that this method only checks the first part of a DOCTYPE.

188 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'

189 """

190 m = doctype_lookup_re.search(text)

191 if m is None:

192 return False

193 doctype = m.group(1)

194 return re.compile(regex, re.I).match(doctype.strip()) is not None

195

196

197def html_doctype_matches(text):

198 """Check if the file looks like it has a html doctype."""

199 return doctype_matches(text, r'html')

200

201

202_looks_like_xml_cache = {}

203

204

205def looks_like_xml(text):

206 """Check if a doctype exists or if we have some tags."""

207 if xml_decl_re.match(text):

208 return True

209 key = hash(text)

210 try:

211 return _looks_like_xml_cache[key]

212 except KeyError:

213 m = doctype_lookup_re.search(text)

214 if m is not None:

215 return True

216 rv = tag_re.search(text[:1000]) is not None

217 _looks_like_xml_cache[key] = rv

218 return rv

219

220

221def surrogatepair(c):

222 """Given a unicode character code with length greater than 16 bits,

223 return the two 16 bit surrogate pair.

224 """

225 # From example D28 of:

226 # http://www.unicode.org/book/ch03.pdf

227 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))

228

229

230def format_lines(var_name, seq, raw=False, indent_level=0):

231 """Formats a sequence of strings for output."""

232 lines = []

233 base_indent = ' ' * indent_level * 4

234 inner_indent = ' ' * (indent_level + 1) * 4

235 lines.append(base_indent + var_name + ' = (')

236 if raw:

237 # These should be preformatted reprs of, say, tuples.

238 for i in seq:

239 lines.append(inner_indent + i + ',')

240 else:

241 for i in seq:

242 # Force use of single quotes

243 r = repr(i + '"')

244 lines.append(inner_indent + r[:-2] + r[-1] + ',')

245 lines.append(base_indent + ')')

246 return '\n'.join(lines)

247

248

249def duplicates_removed(it, already_seen=()):

250 """

251 Returns a list with duplicates removed from the iterable `it`.

252

253 Order is preserved.

254 """

255 lst = []

256 seen = set()

257 for i in it:

258 if i in seen or i in already_seen:

259 continue

260 lst.append(i)

261 seen.add(i)

262 return lst

263

264

265class Future:

266 """Generic class to defer some work.

267

268 Handled specially in RegexLexerMeta, to support regex string construction at

269 first use.

270 """

271 def get(self):

272 raise NotImplementedError

273

274

275def guess_decode(text):

276 """Decode *text* with guessed encoding.

277

278 First try UTF-8; this should fail for non-UTF-8 encodings.

279 Then try the preferred locale encoding.

280 Fall back to latin-1, which always works.

281 """

282 try:

283 text = text.decode('utf-8')

284 return text, 'utf-8'

285 except UnicodeDecodeError:

286 try:

287 import locale

288 prefencoding = locale.getpreferredencoding()

289 text = text.decode()

290 return text, prefencoding

291 except (UnicodeDecodeError, LookupError):

292 text = text.decode('latin1')

293 return text, 'latin1'

294

295

296def guess_decode_from_terminal(text, term):

297 """Decode *text* coming from terminal *term*.

298

299 First try the terminal encoding, if given.

300 Then try UTF-8. Then try the preferred locale encoding.

301 Fall back to latin-1, which always works.

302 """

303 if getattr(term, 'encoding', None):

304 try:

305 text = text.decode(term.encoding)

306 except UnicodeDecodeError:

307 pass

308 else:

309 return text, term.encoding

310 return guess_decode(text)

311

312

313def terminal_encoding(term):

314 """Return our best guess of encoding for the given *term*."""

315 if getattr(term, 'encoding', None):

316 return term.encoding

317 import locale

318 return locale.getpreferredencoding()

319

320

321class UnclosingTextIOWrapper(TextIOWrapper):

322 # Don't close underlying buffer on destruction.

323 def close(self):

324 self.flush()