Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pygments/util.py: 21%
159 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:48 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:48 +0000
1"""
2 pygments.util
3 ~~~~~~~~~~~~~
5 Utility functions.
7 :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
12from io import TextIOWrapper
15split_path_re = re.compile(r'[/\\ ]')
16doctype_lookup_re = re.compile(r'''
17 <!DOCTYPE\s+(
18 [a-zA-Z_][a-zA-Z0-9]*
19 (?: \s+ # optional in HTML5
20 [a-zA-Z_][a-zA-Z0-9]*\s+
21 "[^"]*")?
22 )
23 [^>]*>
24''', re.DOTALL | re.MULTILINE | re.VERBOSE)
25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',
26 re.IGNORECASE | re.DOTALL | re.MULTILINE)
27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)
30class ClassNotFound(ValueError):
31 """Raised if one of the lookup functions didn't find a matching class."""
34class OptionError(Exception):
35 pass
38def get_choice_opt(options, optname, allowed, default=None, normcase=False):
39 string = options.get(optname, default)
40 if normcase:
41 string = string.lower()
42 if string not in allowed:
43 raise OptionError('Value for option %s must be one of %s' %
44 (optname, ', '.join(map(str, allowed))))
45 return string
48def get_bool_opt(options, optname, default=None):
49 string = options.get(optname, default)
50 if isinstance(string, bool):
51 return string
52 elif isinstance(string, int):
53 return bool(string)
54 elif not isinstance(string, str):
55 raise OptionError('Invalid type %r for option %s; use '
56 '1/0, yes/no, true/false, on/off' % (
57 string, optname))
58 elif string.lower() in ('1', 'yes', 'true', 'on'):
59 return True
60 elif string.lower() in ('0', 'no', 'false', 'off'):
61 return False
62 else:
63 raise OptionError('Invalid value %r for option %s; use '
64 '1/0, yes/no, true/false, on/off' % (
65 string, optname))
68def get_int_opt(options, optname, default=None):
69 string = options.get(optname, default)
70 try:
71 return int(string)
72 except TypeError:
73 raise OptionError('Invalid type %r for option %s; you '
74 'must give an integer value' % (
75 string, optname))
76 except ValueError:
77 raise OptionError('Invalid value %r for option %s; you '
78 'must give an integer value' % (
79 string, optname))
82def get_list_opt(options, optname, default=None):
83 val = options.get(optname, default)
84 if isinstance(val, str):
85 return val.split()
86 elif isinstance(val, (list, tuple)):
87 return list(val)
88 else:
89 raise OptionError('Invalid type %r for option %s; you '
90 'must give a list value' % (
91 val, optname))
94def docstring_headline(obj):
95 if not obj.__doc__:
96 return ''
97 res = []
98 for line in obj.__doc__.strip().splitlines():
99 if line.strip():
100 res.append(" " + line.strip())
101 else:
102 break
103 return ''.join(res).lstrip()
106def make_analysator(f):
107 """Return a static text analyser function that returns float values."""
108 def text_analyse(text):
109 try:
110 rv = f(text)
111 except Exception:
112 return 0.0
113 if not rv:
114 return 0.0
115 try:
116 return min(1.0, max(0.0, float(rv)))
117 except (ValueError, TypeError):
118 return 0.0
119 text_analyse.__doc__ = f.__doc__
120 return staticmethod(text_analyse)
123def shebang_matches(text, regex):
124 r"""Check if the given regular expression matches the last part of the
125 shebang if one exists.
127 >>> from pygments.util import shebang_matches
128 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
129 True
130 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
131 True
132 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
133 False
134 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
135 False
136 >>> shebang_matches('#!/usr/bin/startsomethingwith python',
137 ... r'python(2\.\d)?')
138 True
140 It also checks for common windows executable file extensions::
142 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
143 True
145 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
146 the same as ``'perl -e'``)
148 Note that this method automatically searches the whole string (eg:
149 the regular expression is wrapped in ``'^$'``)
150 """
151 index = text.find('\n')
152 if index >= 0:
153 first_line = text[:index].lower()
154 else:
155 first_line = text.lower()
156 if first_line.startswith('#!'):
157 try:
158 found = [x for x in split_path_re.split(first_line[2:].strip())
159 if x and not x.startswith('-')][-1]
160 except IndexError:
161 return False
162 regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)
163 if regex.search(found) is not None:
164 return True
165 return False
168def doctype_matches(text, regex):
169 """Check if the doctype matches a regular expression (if present).
171 Note that this method only checks the first part of a DOCTYPE.
172 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
173 """
174 m = doctype_lookup_re.search(text)
175 if m is None:
176 return False
177 doctype = m.group(1)
178 return re.compile(regex, re.I).match(doctype.strip()) is not None
181def html_doctype_matches(text):
182 """Check if the file looks like it has a html doctype."""
183 return doctype_matches(text, r'html')
186_looks_like_xml_cache = {}
189def looks_like_xml(text):
190 """Check if a doctype exists or if we have some tags."""
191 if xml_decl_re.match(text):
192 return True
193 key = hash(text)
194 try:
195 return _looks_like_xml_cache[key]
196 except KeyError:
197 m = doctype_lookup_re.search(text)
198 if m is not None:
199 return True
200 rv = tag_re.search(text[:1000]) is not None
201 _looks_like_xml_cache[key] = rv
202 return rv
205def surrogatepair(c):
206 """Given a unicode character code with length greater than 16 bits,
207 return the two 16 bit surrogate pair.
208 """
209 # From example D28 of:
210 # http://www.unicode.org/book/ch03.pdf
211 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
214def format_lines(var_name, seq, raw=False, indent_level=0):
215 """Formats a sequence of strings for output."""
216 lines = []
217 base_indent = ' ' * indent_level * 4
218 inner_indent = ' ' * (indent_level + 1) * 4
219 lines.append(base_indent + var_name + ' = (')
220 if raw:
221 # These should be preformatted reprs of, say, tuples.
222 for i in seq:
223 lines.append(inner_indent + i + ',')
224 else:
225 for i in seq:
226 # Force use of single quotes
227 r = repr(i + '"')
228 lines.append(inner_indent + r[:-2] + r[-1] + ',')
229 lines.append(base_indent + ')')
230 return '\n'.join(lines)
233def duplicates_removed(it, already_seen=()):
234 """
235 Returns a list with duplicates removed from the iterable `it`.
237 Order is preserved.
238 """
239 lst = []
240 seen = set()
241 for i in it:
242 if i in seen or i in already_seen:
243 continue
244 lst.append(i)
245 seen.add(i)
246 return lst
249class Future:
250 """Generic class to defer some work.
252 Handled specially in RegexLexerMeta, to support regex string construction at
253 first use.
254 """
255 def get(self):
256 raise NotImplementedError
259def guess_decode(text):
260 """Decode *text* with guessed encoding.
262 First try UTF-8; this should fail for non-UTF-8 encodings.
263 Then try the preferred locale encoding.
264 Fall back to latin-1, which always works.
265 """
266 try:
267 text = text.decode('utf-8')
268 return text, 'utf-8'
269 except UnicodeDecodeError:
270 try:
271 import locale
272 prefencoding = locale.getpreferredencoding()
273 text = text.decode()
274 return text, prefencoding
275 except (UnicodeDecodeError, LookupError):
276 text = text.decode('latin1')
277 return text, 'latin1'
280def guess_decode_from_terminal(text, term):
281 """Decode *text* coming from terminal *term*.
283 First try the terminal encoding, if given.
284 Then try UTF-8. Then try the preferred locale encoding.
285 Fall back to latin-1, which always works.
286 """
287 if getattr(term, 'encoding', None):
288 try:
289 text = text.decode(term.encoding)
290 except UnicodeDecodeError:
291 pass
292 else:
293 return text, term.encoding
294 return guess_decode(text)
297def terminal_encoding(term):
298 """Return our best guess of encoding for the given *term*."""
299 if getattr(term, 'encoding', None):
300 return term.encoding
301 import locale
302 return locale.getpreferredencoding()
305class UnclosingTextIOWrapper(TextIOWrapper):
306 # Don't close underlying buffer on destruction.
307 def close(self):
308 self.flush()