Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/util.py: 20%

158 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1""" 

2 pygments.util 

3 ~~~~~~~~~~~~~ 

4 

5 Utility functions. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from io import TextIOWrapper 

13 

14 

15split_path_re = re.compile(r'[/\\ ]') 

16doctype_lookup_re = re.compile(r''' 

17 <!DOCTYPE\s+( 

18 [a-zA-Z_][a-zA-Z0-9]* 

19 (?: \s+ # optional in HTML5 

20 [a-zA-Z_][a-zA-Z0-9]*\s+ 

21 "[^"]*")? 

22 ) 

23 [^>]*> 

24''', re.DOTALL | re.MULTILINE | re.VERBOSE) 

25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', 

26 re.IGNORECASE | re.DOTALL | re.MULTILINE) 

27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) 

28 

29 

30class ClassNotFound(ValueError): 

31 """Raised if one of the lookup functions didn't find a matching class.""" 

32 

33 

34class OptionError(Exception): 

35 """ 

36 This exception will be raised by all option processing functions if 

37 the type or value of the argument is not correct. 

38 """ 

39 

40def get_choice_opt(options, optname, allowed, default=None, normcase=False): 

41 """ 

42 If the key `optname` from the dictionary is not in the sequence 

43 `allowed`, raise an error, otherwise return it. 

44 """ 

45 string = options.get(optname, default) 

46 if normcase: 

47 string = string.lower() 

48 if string not in allowed: 

49 raise OptionError('Value for option %s must be one of %s' % 

50 (optname, ', '.join(map(str, allowed)))) 

51 return string 

52 

53 

54def get_bool_opt(options, optname, default=None): 

55 """ 

56 Intuitively, this is `options.get(optname, default)`, but restricted to 

57 Boolean value. The Booleans can be represented as string, in order to accept 

58 Boolean value from the command line arguments. If the key `optname` is 

59 present in the dictionary `options` and is not associated with a Boolean, 

60 raise an `OptionError`. If it is absent, `default` is returned instead. 

61 

62 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and 

63 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` 

64 (matched case-insensitively). 

65 """ 

66 string = options.get(optname, default) 

67 if isinstance(string, bool): 

68 return string 

69 elif isinstance(string, int): 

70 return bool(string) 

71 elif not isinstance(string, str): 

72 raise OptionError('Invalid type %r for option %s; use ' 

73 '1/0, yes/no, true/false, on/off' % ( 

74 string, optname)) 

75 elif string.lower() in ('1', 'yes', 'true', 'on'): 

76 return True 

77 elif string.lower() in ('0', 'no', 'false', 'off'): 

78 return False 

79 else: 

80 raise OptionError('Invalid value %r for option %s; use ' 

81 '1/0, yes/no, true/false, on/off' % ( 

82 string, optname)) 

83 

84 

85def get_int_opt(options, optname, default=None): 

86 """As :func:`get_bool_opt`, but interpret the value as an integer.""" 

87 string = options.get(optname, default) 

88 try: 

89 return int(string) 

90 except TypeError: 

91 raise OptionError('Invalid type %r for option %s; you ' 

92 'must give an integer value' % ( 

93 string, optname)) 

94 except ValueError: 

95 raise OptionError('Invalid value %r for option %s; you ' 

96 'must give an integer value' % ( 

97 string, optname)) 

98 

99def get_list_opt(options, optname, default=None): 

100 """ 

101 If the key `optname` from the dictionary `options` is a string, 

102 split it at whitespace and return it. If it is already a list 

103 or a tuple, it is returned as a list. 

104 """ 

105 val = options.get(optname, default) 

106 if isinstance(val, str): 

107 return val.split() 

108 elif isinstance(val, (list, tuple)): 

109 return list(val) 

110 else: 

111 raise OptionError('Invalid type %r for option %s; you ' 

112 'must give a list value' % ( 

113 val, optname)) 

114 

115 

116def docstring_headline(obj): 

117 if not obj.__doc__: 

118 return '' 

119 res = [] 

120 for line in obj.__doc__.strip().splitlines(): 

121 if line.strip(): 

122 res.append(" " + line.strip()) 

123 else: 

124 break 

125 return ''.join(res).lstrip() 

126 

127 

128def make_analysator(f): 

129 """Return a static text analyser function that returns float values.""" 

130 def text_analyse(text): 

131 try: 

132 rv = f(text) 

133 except Exception: 

134 return 0.0 

135 if not rv: 

136 return 0.0 

137 try: 

138 return min(1.0, max(0.0, float(rv))) 

139 except (ValueError, TypeError): 

140 return 0.0 

141 text_analyse.__doc__ = f.__doc__ 

142 return staticmethod(text_analyse) 

143 

144 

145def shebang_matches(text, regex): 

146 r"""Check if the given regular expression matches the last part of the 

147 shebang if one exists. 

148 

149 >>> from pygments.util import shebang_matches 

150 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') 

151 True 

152 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') 

153 True 

154 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') 

155 False 

156 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') 

157 False 

158 >>> shebang_matches('#!/usr/bin/startsomethingwith python', 

159 ... r'python(2\.\d)?') 

160 True 

161 

162 It also checks for common windows executable file extensions:: 

163 

164 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') 

165 True 

166 

167 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does 

168 the same as ``'perl -e'``) 

169 

170 Note that this method automatically searches the whole string (eg: 

171 the regular expression is wrapped in ``'^$'``) 

172 """ 

173 index = text.find('\n') 

174 if index >= 0: 

175 first_line = text[:index].lower() 

176 else: 

177 first_line = text.lower() 

178 if first_line.startswith('#!'): 

179 try: 

180 found = [x for x in split_path_re.split(first_line[2:].strip()) 

181 if x and not x.startswith('-')][-1] 

182 except IndexError: 

183 return False 

184 regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE) 

185 if regex.search(found) is not None: 

186 return True 

187 return False 

188 

189 

190def doctype_matches(text, regex): 

191 """Check if the doctype matches a regular expression (if present). 

192 

193 Note that this method only checks the first part of a DOCTYPE. 

194 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' 

195 """ 

196 m = doctype_lookup_re.search(text) 

197 if m is None: 

198 return False 

199 doctype = m.group(1) 

200 return re.compile(regex, re.I).match(doctype.strip()) is not None 

201 

202 

203def html_doctype_matches(text): 

204 """Check if the file looks like it has a html doctype.""" 

205 return doctype_matches(text, r'html') 

206 

207 

208_looks_like_xml_cache = {} 

209 

210 

211def looks_like_xml(text): 

212 """Check if a doctype exists or if we have some tags.""" 

213 if xml_decl_re.match(text): 

214 return True 

215 key = hash(text) 

216 try: 

217 return _looks_like_xml_cache[key] 

218 except KeyError: 

219 m = doctype_lookup_re.search(text) 

220 if m is not None: 

221 return True 

222 rv = tag_re.search(text[:1000]) is not None 

223 _looks_like_xml_cache[key] = rv 

224 return rv 

225 

226 

227def surrogatepair(c): 

228 """Given a unicode character code with length greater than 16 bits, 

229 return the two 16 bit surrogate pair. 

230 """ 

231 # From example D28 of: 

232 # http://www.unicode.org/book/ch03.pdf 

233 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) 

234 

235 

236def format_lines(var_name, seq, raw=False, indent_level=0): 

237 """Formats a sequence of strings for output.""" 

238 lines = [] 

239 base_indent = ' ' * indent_level * 4 

240 inner_indent = ' ' * (indent_level + 1) * 4 

241 lines.append(base_indent + var_name + ' = (') 

242 if raw: 

243 # These should be preformatted reprs of, say, tuples. 

244 for i in seq: 

245 lines.append(inner_indent + i + ',') 

246 else: 

247 for i in seq: 

248 # Force use of single quotes 

249 r = repr(i + '"') 

250 lines.append(inner_indent + r[:-2] + r[-1] + ',') 

251 lines.append(base_indent + ')') 

252 return '\n'.join(lines) 

253 

254 

255def duplicates_removed(it, already_seen=()): 

256 """ 

257 Returns a list with duplicates removed from the iterable `it`. 

258 

259 Order is preserved. 

260 """ 

261 lst = [] 

262 seen = set() 

263 for i in it: 

264 if i in seen or i in already_seen: 

265 continue 

266 lst.append(i) 

267 seen.add(i) 

268 return lst 

269 

270 

271class Future: 

272 """Generic class to defer some work. 

273 

274 Handled specially in RegexLexerMeta, to support regex string construction at 

275 first use. 

276 """ 

277 def get(self): 

278 raise NotImplementedError 

279 

280 

281def guess_decode(text): 

282 """Decode *text* with guessed encoding. 

283 

284 First try UTF-8; this should fail for non-UTF-8 encodings. 

285 Then try the preferred locale encoding. 

286 Fall back to latin-1, which always works. 

287 """ 

288 try: 

289 text = text.decode('utf-8') 

290 return text, 'utf-8' 

291 except UnicodeDecodeError: 

292 try: 

293 import locale 

294 prefencoding = locale.getpreferredencoding() 

295 text = text.decode() 

296 return text, prefencoding 

297 except (UnicodeDecodeError, LookupError): 

298 text = text.decode('latin1') 

299 return text, 'latin1' 

300 

301 

302def guess_decode_from_terminal(text, term): 

303 """Decode *text* coming from terminal *term*. 

304 

305 First try the terminal encoding, if given. 

306 Then try UTF-8. Then try the preferred locale encoding. 

307 Fall back to latin-1, which always works. 

308 """ 

309 if getattr(term, 'encoding', None): 

310 try: 

311 text = text.decode(term.encoding) 

312 except UnicodeDecodeError: 

313 pass 

314 else: 

315 return text, term.encoding 

316 return guess_decode(text) 

317 

318 

319def terminal_encoding(term): 

320 """Return our best guess of encoding for the given *term*.""" 

321 if getattr(term, 'encoding', None): 

322 return term.encoding 

323 import locale 

324 return locale.getpreferredencoding() 

325 

326 

327class UnclosingTextIOWrapper(TextIOWrapper): 

328 # Don't close underlying buffer on destruction. 

329 def close(self): 

330 self.flush()