Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/util.py: 48%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 pygments.util
3 ~~~~~~~~~~~~~
5 Utility functions.
7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
12from io import TextIOWrapper
13import html
16split_path_re = re.compile(r'[/\\ ]')
17doctype_lookup_re = re.compile(r'''
18 <!DOCTYPE\s+(
19 [a-zA-Z_][a-zA-Z0-9]*
20 (?: \s+ # optional in HTML5
21 [a-zA-Z_][a-zA-Z0-9]*\s+
22 "[^"]*")?
23 )
24 [^>]*>
25''', re.DOTALL | re.MULTILINE | re.VERBOSE)
26tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',
27 re.IGNORECASE | re.DOTALL | re.MULTILINE)
28xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)
31class ClassNotFound(ValueError):
32 """Raised if one of the lookup functions didn't find a matching class."""
35class OptionError(Exception):
36 """
37 This exception will be raised by all option processing functions if
38 the type or value of the argument is not correct.
39 """
41def get_choice_opt(options, optname, allowed, default=None, normcase=False):
42 """
43 If the key `optname` from the dictionary is not in the sequence
44 `allowed`, raise an error, otherwise return it.
45 """
46 string = options.get(optname, default)
47 if normcase:
48 string = string.lower()
49 if string not in allowed:
50 raise OptionError('Value for option {} must be one of {}'.format(optname, ', '.join(map(str, allowed))))
51 return string
54def get_bool_opt(options, optname, default=None):
55 """
56 Intuitively, this is `options.get(optname, default)`, but restricted to
57 Boolean value. The Booleans can be represented as string, in order to accept
58 Boolean value from the command line arguments. If the key `optname` is
59 present in the dictionary `options` and is not associated with a Boolean,
60 raise an `OptionError`. If it is absent, `default` is returned instead.
62 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and
63 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``
64 (matched case-insensitively).
65 """
66 string = options.get(optname, default)
67 if isinstance(string, bool):
68 return string
69 elif isinstance(string, int):
70 return bool(string)
71 elif not isinstance(string, str):
72 raise OptionError(f'Invalid type {string!r} for option {optname}; use '
73 '1/0, yes/no, true/false, on/off')
74 elif string.lower() in ('1', 'yes', 'true', 'on'):
75 return True
76 elif string.lower() in ('0', 'no', 'false', 'off'):
77 return False
78 else:
79 raise OptionError(f'Invalid value {string!r} for option {optname}; use '
80 '1/0, yes/no, true/false, on/off')
83def get_int_opt(options, optname, default=None):
84 """As :func:`get_bool_opt`, but interpret the value as an integer."""
85 string = options.get(optname, default)
86 try:
87 return int(string)
88 except TypeError:
89 raise OptionError(f'Invalid type {string!r} for option {optname}; you '
90 'must give an integer value')
91 except ValueError:
92 raise OptionError(f'Invalid value {string!r} for option {optname}; you '
93 'must give an integer value')
95def get_list_opt(options, optname, default=None):
96 """
97 If the key `optname` from the dictionary `options` is a string,
98 split it at whitespace and return it. If it is already a list
99 or a tuple, it is returned as a list.
100 """
101 val = options.get(optname, default)
102 if isinstance(val, str):
103 return val.split()
104 elif isinstance(val, (list, tuple)):
105 return list(val)
106 else:
107 raise OptionError(f'Invalid type {val!r} for option {optname}; you '
108 'must give a list value')
111def docstring_headline(obj):
112 if not obj.__doc__:
113 return ''
114 res = []
115 for line in obj.__doc__.strip().splitlines():
116 if line.strip():
117 res.append(" " + line.strip())
118 else:
119 break
120 return ''.join(res).lstrip()
123def make_analysator(f):
124 """Return a static text analyser function that returns float values."""
125 def text_analyse(text):
126 try:
127 rv = f(text)
128 except Exception:
129 return 0.0
130 if not rv:
131 return 0.0
132 try:
133 return min(1.0, max(0.0, float(rv)))
134 except (ValueError, TypeError):
135 return 0.0
136 text_analyse.__doc__ = f.__doc__
137 return staticmethod(text_analyse)
140def shebang_matches(text, regex):
141 r"""Check if the given regular expression matches the last part of the
142 shebang if one exists.
144 >>> from pygments.util import shebang_matches
145 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
146 True
147 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
148 True
149 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
150 False
151 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
152 False
153 >>> shebang_matches('#!/usr/bin/startsomethingwith python',
154 ... r'python(2\.\d)?')
155 True
157 It also checks for common windows executable file extensions::
159 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
160 True
162 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
163 the same as ``'perl -e'``)
165 Note that this method automatically searches the whole string (eg:
166 the regular expression is wrapped in ``'^$'``)
167 """
168 index = text.find('\n')
169 if index >= 0:
170 first_line = text[:index].lower()
171 else:
172 first_line = text.lower()
173 if first_line.startswith('#!'):
174 try:
175 found = [x for x in split_path_re.split(first_line[2:].strip())
176 if x and not x.startswith('-')][-1]
177 except IndexError:
178 return False
179 regex = re.compile(rf'^{regex}(\.(exe|cmd|bat|bin))?$', re.IGNORECASE)
180 if regex.search(found) is not None:
181 return True
182 return False
185def doctype_matches(text, regex):
186 """Check if the doctype matches a regular expression (if present).
188 Note that this method only checks the first part of a DOCTYPE.
189 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
190 """
191 m = doctype_lookup_re.search(text)
192 if m is None:
193 return False
194 doctype = m.group(1)
195 return re.compile(regex, re.I).match(doctype.strip()) is not None
198def html_doctype_matches(text):
199 """Check if the file looks like it has a html doctype."""
200 return doctype_matches(text, r'html')
203_looks_like_xml_cache = {}
206def looks_like_xml(text):
207 """Check if a doctype exists or if we have some tags."""
208 if xml_decl_re.match(text):
209 return True
210 key = hash(text)
211 try:
212 return _looks_like_xml_cache[key]
213 except KeyError:
214 m = doctype_lookup_re.search(text)
215 if m is not None:
216 return True
217 rv = tag_re.search(text[:1000]) is not None
218 _looks_like_xml_cache[key] = rv
219 return rv
222def surrogatepair(c):
223 """Given a unicode character code with length greater than 16 bits,
224 return the two 16 bit surrogate pair.
225 """
226 # From example D28 of:
227 # http://www.unicode.org/book/ch03.pdf
228 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
231def format_lines(var_name, seq, raw=False, indent_level=0):
232 """Formats a sequence of strings for output."""
233 lines = []
234 base_indent = ' ' * indent_level * 4
235 inner_indent = ' ' * (indent_level + 1) * 4
236 lines.append(base_indent + var_name + ' = (')
237 if raw:
238 # These should be preformatted reprs of, say, tuples.
239 for i in seq:
240 lines.append(inner_indent + i + ',')
241 else:
242 for i in seq:
243 # Force use of single quotes
244 r = repr(i + '"')
245 lines.append(inner_indent + r[:-2] + r[-1] + ',')
246 lines.append(base_indent + ')')
247 return '\n'.join(lines)
250def duplicates_removed(it, already_seen=()):
251 """
252 Returns a list with duplicates removed from the iterable `it`.
254 Order is preserved.
255 """
256 lst = []
257 seen = set()
258 for i in it:
259 if i in seen or i in already_seen:
260 continue
261 lst.append(i)
262 seen.add(i)
263 return lst
266class Future:
267 """Generic class to defer some work.
269 Handled specially in RegexLexerMeta, to support regex string construction at
270 first use.
271 """
272 def get(self):
273 raise NotImplementedError
276def guess_decode(text):
277 """Decode *text* with guessed encoding.
279 First try UTF-8; this should fail for non-UTF-8 encodings.
280 Then try the preferred locale encoding.
281 Fall back to latin-1, which always works.
282 """
283 try:
284 text = text.decode('utf-8')
285 return text, 'utf-8'
286 except UnicodeDecodeError:
287 try:
288 import locale
289 prefencoding = locale.getpreferredencoding()
290 text = text.decode(prefencoding)
291 return text, prefencoding
292 except (UnicodeDecodeError, LookupError):
293 text = text.decode('latin1')
294 return text, 'latin1'
297def guess_decode_from_terminal(text, term):
298 """Decode *text* coming from terminal *term*.
300 First try the terminal encoding, if given.
301 Then try UTF-8. Then try the preferred locale encoding.
302 Fall back to latin-1, which always works.
303 """
304 if getattr(term, 'encoding', None):
305 try:
306 text = text.decode(term.encoding)
307 except UnicodeDecodeError:
308 pass
309 else:
310 return text, term.encoding
311 return guess_decode(text)
314def terminal_encoding(term):
315 """Return our best guess of encoding for the given *term*."""
316 if getattr(term, 'encoding', None):
317 return term.encoding
318 import locale
319 return locale.getpreferredencoding()
322class UnclosingTextIOWrapper(TextIOWrapper):
323 # Don't close underlying buffer on destruction.
324 def close(self):
325 self.flush()
327def html_escape(string, quote=True):
328 """Return a safe version of the passed `string`,
329 and an empty string if `None`.
331 `NoneType` is not supported by `html.escape`, as `html.escape`
332 uses the built-in `replace` function on `string`, so we need to
333 check for it first.
335 Optional flag quote is true by default, which also escapes
336 double and single quotes.
337 See https://docs.python.org/3/library/html.html#html.escape for more details.
338 """
339 if string is not None:
340 return html.escape(string, quote=quote)
341 return ''