Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pygments/util.py: 21%

159 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:48 +0000

1""" 

2 pygments.util 

3 ~~~~~~~~~~~~~ 

4 

5 Utility functions. 

6 

7 :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from io import TextIOWrapper 

13 

14 

15split_path_re = re.compile(r'[/\\ ]') 

16doctype_lookup_re = re.compile(r''' 

17 <!DOCTYPE\s+( 

18 [a-zA-Z_][a-zA-Z0-9]* 

19 (?: \s+ # optional in HTML5 

20 [a-zA-Z_][a-zA-Z0-9]*\s+ 

21 "[^"]*")? 

22 ) 

23 [^>]*> 

24''', re.DOTALL | re.MULTILINE | re.VERBOSE) 

25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', 

26 re.IGNORECASE | re.DOTALL | re.MULTILINE) 

27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) 

28 

29 

30class ClassNotFound(ValueError): 

31 """Raised if one of the lookup functions didn't find a matching class.""" 

32 

33 

34class OptionError(Exception): 

35 pass 

36 

37 

38def get_choice_opt(options, optname, allowed, default=None, normcase=False): 

39 string = options.get(optname, default) 

40 if normcase: 

41 string = string.lower() 

42 if string not in allowed: 

43 raise OptionError('Value for option %s must be one of %s' % 

44 (optname, ', '.join(map(str, allowed)))) 

45 return string 

46 

47 

48def get_bool_opt(options, optname, default=None): 

49 string = options.get(optname, default) 

50 if isinstance(string, bool): 

51 return string 

52 elif isinstance(string, int): 

53 return bool(string) 

54 elif not isinstance(string, str): 

55 raise OptionError('Invalid type %r for option %s; use ' 

56 '1/0, yes/no, true/false, on/off' % ( 

57 string, optname)) 

58 elif string.lower() in ('1', 'yes', 'true', 'on'): 

59 return True 

60 elif string.lower() in ('0', 'no', 'false', 'off'): 

61 return False 

62 else: 

63 raise OptionError('Invalid value %r for option %s; use ' 

64 '1/0, yes/no, true/false, on/off' % ( 

65 string, optname)) 

66 

67 

68def get_int_opt(options, optname, default=None): 

69 string = options.get(optname, default) 

70 try: 

71 return int(string) 

72 except TypeError: 

73 raise OptionError('Invalid type %r for option %s; you ' 

74 'must give an integer value' % ( 

75 string, optname)) 

76 except ValueError: 

77 raise OptionError('Invalid value %r for option %s; you ' 

78 'must give an integer value' % ( 

79 string, optname)) 

80 

81 

82def get_list_opt(options, optname, default=None): 

83 val = options.get(optname, default) 

84 if isinstance(val, str): 

85 return val.split() 

86 elif isinstance(val, (list, tuple)): 

87 return list(val) 

88 else: 

89 raise OptionError('Invalid type %r for option %s; you ' 

90 'must give a list value' % ( 

91 val, optname)) 

92 

93 

94def docstring_headline(obj): 

95 if not obj.__doc__: 

96 return '' 

97 res = [] 

98 for line in obj.__doc__.strip().splitlines(): 

99 if line.strip(): 

100 res.append(" " + line.strip()) 

101 else: 

102 break 

103 return ''.join(res).lstrip() 

104 

105 

106def make_analysator(f): 

107 """Return a static text analyser function that returns float values.""" 

108 def text_analyse(text): 

109 try: 

110 rv = f(text) 

111 except Exception: 

112 return 0.0 

113 if not rv: 

114 return 0.0 

115 try: 

116 return min(1.0, max(0.0, float(rv))) 

117 except (ValueError, TypeError): 

118 return 0.0 

119 text_analyse.__doc__ = f.__doc__ 

120 return staticmethod(text_analyse) 

121 

122 

123def shebang_matches(text, regex): 

124 r"""Check if the given regular expression matches the last part of the 

125 shebang if one exists. 

126 

127 >>> from pygments.util import shebang_matches 

128 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') 

129 True 

130 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') 

131 True 

132 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') 

133 False 

134 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') 

135 False 

136 >>> shebang_matches('#!/usr/bin/startsomethingwith python', 

137 ... r'python(2\.\d)?') 

138 True 

139 

140 It also checks for common windows executable file extensions:: 

141 

142 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') 

143 True 

144 

145 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does 

146 the same as ``'perl -e'``) 

147 

148 Note that this method automatically searches the whole string (eg: 

149 the regular expression is wrapped in ``'^$'``) 

150 """ 

151 index = text.find('\n') 

152 if index >= 0: 

153 first_line = text[:index].lower() 

154 else: 

155 first_line = text.lower() 

156 if first_line.startswith('#!'): 

157 try: 

158 found = [x for x in split_path_re.split(first_line[2:].strip()) 

159 if x and not x.startswith('-')][-1] 

160 except IndexError: 

161 return False 

162 regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE) 

163 if regex.search(found) is not None: 

164 return True 

165 return False 

166 

167 

168def doctype_matches(text, regex): 

169 """Check if the doctype matches a regular expression (if present). 

170 

171 Note that this method only checks the first part of a DOCTYPE. 

172 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' 

173 """ 

174 m = doctype_lookup_re.search(text) 

175 if m is None: 

176 return False 

177 doctype = m.group(1) 

178 return re.compile(regex, re.I).match(doctype.strip()) is not None 

179 

180 

181def html_doctype_matches(text): 

182 """Check if the file looks like it has a html doctype.""" 

183 return doctype_matches(text, r'html') 

184 

185 

186_looks_like_xml_cache = {} 

187 

188 

189def looks_like_xml(text): 

190 """Check if a doctype exists or if we have some tags.""" 

191 if xml_decl_re.match(text): 

192 return True 

193 key = hash(text) 

194 try: 

195 return _looks_like_xml_cache[key] 

196 except KeyError: 

197 m = doctype_lookup_re.search(text) 

198 if m is not None: 

199 return True 

200 rv = tag_re.search(text[:1000]) is not None 

201 _looks_like_xml_cache[key] = rv 

202 return rv 

203 

204 

205def surrogatepair(c): 

206 """Given a unicode character code with length greater than 16 bits, 

207 return the two 16 bit surrogate pair. 

208 """ 

209 # From example D28 of: 

210 # http://www.unicode.org/book/ch03.pdf 

211 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) 

212 

213 

214def format_lines(var_name, seq, raw=False, indent_level=0): 

215 """Formats a sequence of strings for output.""" 

216 lines = [] 

217 base_indent = ' ' * indent_level * 4 

218 inner_indent = ' ' * (indent_level + 1) * 4 

219 lines.append(base_indent + var_name + ' = (') 

220 if raw: 

221 # These should be preformatted reprs of, say, tuples. 

222 for i in seq: 

223 lines.append(inner_indent + i + ',') 

224 else: 

225 for i in seq: 

226 # Force use of single quotes 

227 r = repr(i + '"') 

228 lines.append(inner_indent + r[:-2] + r[-1] + ',') 

229 lines.append(base_indent + ')') 

230 return '\n'.join(lines) 

231 

232 

233def duplicates_removed(it, already_seen=()): 

234 """ 

235 Returns a list with duplicates removed from the iterable `it`. 

236 

237 Order is preserved. 

238 """ 

239 lst = [] 

240 seen = set() 

241 for i in it: 

242 if i in seen or i in already_seen: 

243 continue 

244 lst.append(i) 

245 seen.add(i) 

246 return lst 

247 

248 

249class Future: 

250 """Generic class to defer some work. 

251 

252 Handled specially in RegexLexerMeta, to support regex string construction at 

253 first use. 

254 """ 

255 def get(self): 

256 raise NotImplementedError 

257 

258 

259def guess_decode(text): 

260 """Decode *text* with guessed encoding. 

261 

262 First try UTF-8; this should fail for non-UTF-8 encodings. 

263 Then try the preferred locale encoding. 

264 Fall back to latin-1, which always works. 

265 """ 

266 try: 

267 text = text.decode('utf-8') 

268 return text, 'utf-8' 

269 except UnicodeDecodeError: 

270 try: 

271 import locale 

272 prefencoding = locale.getpreferredencoding() 

273 text = text.decode() 

274 return text, prefencoding 

275 except (UnicodeDecodeError, LookupError): 

276 text = text.decode('latin1') 

277 return text, 'latin1' 

278 

279 

280def guess_decode_from_terminal(text, term): 

281 """Decode *text* coming from terminal *term*. 

282 

283 First try the terminal encoding, if given. 

284 Then try UTF-8. Then try the preferred locale encoding. 

285 Fall back to latin-1, which always works. 

286 """ 

287 if getattr(term, 'encoding', None): 

288 try: 

289 text = text.decode(term.encoding) 

290 except UnicodeDecodeError: 

291 pass 

292 else: 

293 return text, term.encoding 

294 return guess_decode(text) 

295 

296 

297def terminal_encoding(term): 

298 """Return our best guess of encoding for the given *term*.""" 

299 if getattr(term, 'encoding', None): 

300 return term.encoding 

301 import locale 

302 return locale.getpreferredencoding() 

303 

304 

305class UnclosingTextIOWrapper(TextIOWrapper): 

306 # Don't close underlying buffer on destruction. 

307 def close(self): 

308 self.flush()