Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/util.py: 23%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 pygments.util
3 ~~~~~~~~~~~~~
5 Utility functions.
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
12from io import TextIOWrapper
15split_path_re = re.compile(r'[/\\ ]')
16doctype_lookup_re = re.compile(r'''
17 <!DOCTYPE\s+(
18 [a-zA-Z_][a-zA-Z0-9]*
19 (?: \s+ # optional in HTML5
20 [a-zA-Z_][a-zA-Z0-9]*\s+
21 "[^"]*")?
22 )
23 [^>]*>
24''', re.DOTALL | re.MULTILINE | re.VERBOSE)
25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',
26 re.IGNORECASE | re.DOTALL | re.MULTILINE)
27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)
30class ClassNotFound(ValueError):
31 """Raised if one of the lookup functions didn't find a matching class."""
34class OptionError(Exception):
35 """
36 This exception will be raised by all option processing functions if
37 the type or value of the argument is not correct.
38 """
40def get_choice_opt(options, optname, allowed, default=None, normcase=False):
41 """
42 If the key `optname` from the dictionary is not in the sequence
43 `allowed`, raise an error, otherwise return it.
44 """
45 string = options.get(optname, default)
46 if normcase:
47 string = string.lower()
48 if string not in allowed:
49 raise OptionError('Value for option {} must be one of {}'.format(optname, ', '.join(map(str, allowed))))
50 return string
53def get_bool_opt(options, optname, default=None):
54 """
55 Intuitively, this is `options.get(optname, default)`, but restricted to
56 Boolean value. The Booleans can be represented as string, in order to accept
57 Boolean value from the command line arguments. If the key `optname` is
58 present in the dictionary `options` and is not associated with a Boolean,
59 raise an `OptionError`. If it is absent, `default` is returned instead.
61 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and
62 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``
63 (matched case-insensitively).
64 """
65 string = options.get(optname, default)
66 if isinstance(string, bool):
67 return string
68 elif isinstance(string, int):
69 return bool(string)
70 elif not isinstance(string, str):
71 raise OptionError(f'Invalid type {string!r} for option {optname}; use '
72 '1/0, yes/no, true/false, on/off')
73 elif string.lower() in ('1', 'yes', 'true', 'on'):
74 return True
75 elif string.lower() in ('0', 'no', 'false', 'off'):
76 return False
77 else:
78 raise OptionError(f'Invalid value {string!r} for option {optname}; use '
79 '1/0, yes/no, true/false, on/off')
82def get_int_opt(options, optname, default=None):
83 """As :func:`get_bool_opt`, but interpret the value as an integer."""
84 string = options.get(optname, default)
85 try:
86 return int(string)
87 except TypeError:
88 raise OptionError(f'Invalid type {string!r} for option {optname}; you '
89 'must give an integer value')
90 except ValueError:
91 raise OptionError(f'Invalid value {string!r} for option {optname}; you '
92 'must give an integer value')
94def get_list_opt(options, optname, default=None):
95 """
96 If the key `optname` from the dictionary `options` is a string,
97 split it at whitespace and return it. If it is already a list
98 or a tuple, it is returned as a list.
99 """
100 val = options.get(optname, default)
101 if isinstance(val, str):
102 return val.split()
103 elif isinstance(val, (list, tuple)):
104 return list(val)
105 else:
106 raise OptionError(f'Invalid type {val!r} for option {optname}; you '
107 'must give a list value')
110def docstring_headline(obj):
111 if not obj.__doc__:
112 return ''
113 res = []
114 for line in obj.__doc__.strip().splitlines():
115 if line.strip():
116 res.append(" " + line.strip())
117 else:
118 break
119 return ''.join(res).lstrip()
122def make_analysator(f):
123 """Return a static text analyser function that returns float values."""
124 def text_analyse(text):
125 try:
126 rv = f(text)
127 except Exception:
128 return 0.0
129 if not rv:
130 return 0.0
131 try:
132 return min(1.0, max(0.0, float(rv)))
133 except (ValueError, TypeError):
134 return 0.0
135 text_analyse.__doc__ = f.__doc__
136 return staticmethod(text_analyse)
139def shebang_matches(text, regex):
140 r"""Check if the given regular expression matches the last part of the
141 shebang if one exists.
143 >>> from pygments.util import shebang_matches
144 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
145 True
146 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
147 True
148 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
149 False
150 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
151 False
152 >>> shebang_matches('#!/usr/bin/startsomethingwith python',
153 ... r'python(2\.\d)?')
154 True
156 It also checks for common windows executable file extensions::
158 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
159 True
161 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
162 the same as ``'perl -e'``)
164 Note that this method automatically searches the whole string (eg:
165 the regular expression is wrapped in ``'^$'``)
166 """
167 index = text.find('\n')
168 if index >= 0:
169 first_line = text[:index].lower()
170 else:
171 first_line = text.lower()
172 if first_line.startswith('#!'):
173 try:
174 found = [x for x in split_path_re.split(first_line[2:].strip())
175 if x and not x.startswith('-')][-1]
176 except IndexError:
177 return False
178 regex = re.compile(rf'^{regex}(\.(exe|cmd|bat|bin))?$', re.IGNORECASE)
179 if regex.search(found) is not None:
180 return True
181 return False
184def doctype_matches(text, regex):
185 """Check if the doctype matches a regular expression (if present).
187 Note that this method only checks the first part of a DOCTYPE.
188 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
189 """
190 m = doctype_lookup_re.search(text)
191 if m is None:
192 return False
193 doctype = m.group(1)
194 return re.compile(regex, re.I).match(doctype.strip()) is not None
197def html_doctype_matches(text):
198 """Check if the file looks like it has a html doctype."""
199 return doctype_matches(text, r'html')
202_looks_like_xml_cache = {}
205def looks_like_xml(text):
206 """Check if a doctype exists or if we have some tags."""
207 if xml_decl_re.match(text):
208 return True
209 key = hash(text)
210 try:
211 return _looks_like_xml_cache[key]
212 except KeyError:
213 m = doctype_lookup_re.search(text)
214 if m is not None:
215 return True
216 rv = tag_re.search(text[:1000]) is not None
217 _looks_like_xml_cache[key] = rv
218 return rv
221def surrogatepair(c):
222 """Given a unicode character code with length greater than 16 bits,
223 return the two 16 bit surrogate pair.
224 """
225 # From example D28 of:
226 # http://www.unicode.org/book/ch03.pdf
227 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
230def format_lines(var_name, seq, raw=False, indent_level=0):
231 """Formats a sequence of strings for output."""
232 lines = []
233 base_indent = ' ' * indent_level * 4
234 inner_indent = ' ' * (indent_level + 1) * 4
235 lines.append(base_indent + var_name + ' = (')
236 if raw:
237 # These should be preformatted reprs of, say, tuples.
238 for i in seq:
239 lines.append(inner_indent + i + ',')
240 else:
241 for i in seq:
242 # Force use of single quotes
243 r = repr(i + '"')
244 lines.append(inner_indent + r[:-2] + r[-1] + ',')
245 lines.append(base_indent + ')')
246 return '\n'.join(lines)
249def duplicates_removed(it, already_seen=()):
250 """
251 Returns a list with duplicates removed from the iterable `it`.
253 Order is preserved.
254 """
255 lst = []
256 seen = set()
257 for i in it:
258 if i in seen or i in already_seen:
259 continue
260 lst.append(i)
261 seen.add(i)
262 return lst
265class Future:
266 """Generic class to defer some work.
268 Handled specially in RegexLexerMeta, to support regex string construction at
269 first use.
270 """
271 def get(self):
272 raise NotImplementedError
275def guess_decode(text):
276 """Decode *text* with guessed encoding.
278 First try UTF-8; this should fail for non-UTF-8 encodings.
279 Then try the preferred locale encoding.
280 Fall back to latin-1, which always works.
281 """
282 try:
283 text = text.decode('utf-8')
284 return text, 'utf-8'
285 except UnicodeDecodeError:
286 try:
287 import locale
288 prefencoding = locale.getpreferredencoding()
289 text = text.decode()
290 return text, prefencoding
291 except (UnicodeDecodeError, LookupError):
292 text = text.decode('latin1')
293 return text, 'latin1'
296def guess_decode_from_terminal(text, term):
297 """Decode *text* coming from terminal *term*.
299 First try the terminal encoding, if given.
300 Then try UTF-8. Then try the preferred locale encoding.
301 Fall back to latin-1, which always works.
302 """
303 if getattr(term, 'encoding', None):
304 try:
305 text = text.decode(term.encoding)
306 except UnicodeDecodeError:
307 pass
308 else:
309 return text, term.encoding
310 return guess_decode(text)
313def terminal_encoding(term):
314 """Return our best guess of encoding for the given *term*."""
315 if getattr(term, 'encoding', None):
316 return term.encoding
317 import locale
318 return locale.getpreferredencoding()
321class UnclosingTextIOWrapper(TextIOWrapper):
322 # Don't close underlying buffer on destruction.
323 def close(self):
324 self.flush()