Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/util.py: 48%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

164 statements  

1""" 

2 pygments.util 

3 ~~~~~~~~~~~~~ 

4 

5 Utility functions. 

6 

7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from io import TextIOWrapper 

13import html 

14 

15 

16split_path_re = re.compile(r'[/\\ ]') 

17doctype_lookup_re = re.compile(r''' 

18 <!DOCTYPE\s+( 

19 [a-zA-Z_][a-zA-Z0-9]* 

20 (?: \s+ # optional in HTML5 

21 [a-zA-Z_][a-zA-Z0-9]*\s+ 

22 "[^"]*")? 

23 ) 

24 [^>]*> 

25''', re.DOTALL | re.MULTILINE | re.VERBOSE) 

26tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', 

27 re.IGNORECASE | re.DOTALL | re.MULTILINE) 

28xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) 

29 

30 

31class ClassNotFound(ValueError): 

32 """Raised if one of the lookup functions didn't find a matching class.""" 

33 

34 

35class OptionError(Exception): 

36 """ 

37 This exception will be raised by all option processing functions if 

38 the type or value of the argument is not correct. 

39 """ 

40 

41def get_choice_opt(options, optname, allowed, default=None, normcase=False): 

42 """ 

43 If the key `optname` from the dictionary is not in the sequence 

44 `allowed`, raise an error, otherwise return it. 

45 """ 

46 string = options.get(optname, default) 

47 if normcase: 

48 string = string.lower() 

49 if string not in allowed: 

50 raise OptionError('Value for option {} must be one of {}'.format(optname, ', '.join(map(str, allowed)))) 

51 return string 

52 

53 

54def get_bool_opt(options, optname, default=None): 

55 """ 

56 Intuitively, this is `options.get(optname, default)`, but restricted to 

57 Boolean value. The Booleans can be represented as string, in order to accept 

58 Boolean value from the command line arguments. If the key `optname` is 

59 present in the dictionary `options` and is not associated with a Boolean, 

60 raise an `OptionError`. If it is absent, `default` is returned instead. 

61 

62 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and 

63 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` 

64 (matched case-insensitively). 

65 """ 

66 string = options.get(optname, default) 

67 if isinstance(string, bool): 

68 return string 

69 elif isinstance(string, int): 

70 return bool(string) 

71 elif not isinstance(string, str): 

72 raise OptionError(f'Invalid type {string!r} for option {optname}; use ' 

73 '1/0, yes/no, true/false, on/off') 

74 elif string.lower() in ('1', 'yes', 'true', 'on'): 

75 return True 

76 elif string.lower() in ('0', 'no', 'false', 'off'): 

77 return False 

78 else: 

79 raise OptionError(f'Invalid value {string!r} for option {optname}; use ' 

80 '1/0, yes/no, true/false, on/off') 

81 

82 

83def get_int_opt(options, optname, default=None): 

84 """As :func:`get_bool_opt`, but interpret the value as an integer.""" 

85 string = options.get(optname, default) 

86 try: 

87 return int(string) 

88 except TypeError: 

89 raise OptionError(f'Invalid type {string!r} for option {optname}; you ' 

90 'must give an integer value') 

91 except ValueError: 

92 raise OptionError(f'Invalid value {string!r} for option {optname}; you ' 

93 'must give an integer value') 

94 

95def get_list_opt(options, optname, default=None): 

96 """ 

97 If the key `optname` from the dictionary `options` is a string, 

98 split it at whitespace and return it. If it is already a list 

99 or a tuple, it is returned as a list. 

100 """ 

101 val = options.get(optname, default) 

102 if isinstance(val, str): 

103 return val.split() 

104 elif isinstance(val, (list, tuple)): 

105 return list(val) 

106 else: 

107 raise OptionError(f'Invalid type {val!r} for option {optname}; you ' 

108 'must give a list value') 

109 

110 

111def docstring_headline(obj): 

112 if not obj.__doc__: 

113 return '' 

114 res = [] 

115 for line in obj.__doc__.strip().splitlines(): 

116 if line.strip(): 

117 res.append(" " + line.strip()) 

118 else: 

119 break 

120 return ''.join(res).lstrip() 

121 

122 

123def make_analysator(f): 

124 """Return a static text analyser function that returns float values.""" 

125 def text_analyse(text): 

126 try: 

127 rv = f(text) 

128 except Exception: 

129 return 0.0 

130 if not rv: 

131 return 0.0 

132 try: 

133 return min(1.0, max(0.0, float(rv))) 

134 except (ValueError, TypeError): 

135 return 0.0 

136 text_analyse.__doc__ = f.__doc__ 

137 return staticmethod(text_analyse) 

138 

139 

140def shebang_matches(text, regex): 

141 r"""Check if the given regular expression matches the last part of the 

142 shebang if one exists. 

143 

144 >>> from pygments.util import shebang_matches 

145 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') 

146 True 

147 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') 

148 True 

149 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') 

150 False 

151 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') 

152 False 

153 >>> shebang_matches('#!/usr/bin/startsomethingwith python', 

154 ... r'python(2\.\d)?') 

155 True 

156 

157 It also checks for common windows executable file extensions:: 

158 

159 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') 

160 True 

161 

162 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does 

163 the same as ``'perl -e'``) 

164 

165 Note that this method automatically searches the whole string (eg: 

166 the regular expression is wrapped in ``'^$'``) 

167 """ 

168 index = text.find('\n') 

169 if index >= 0: 

170 first_line = text[:index].lower() 

171 else: 

172 first_line = text.lower() 

173 if first_line.startswith('#!'): 

174 try: 

175 found = [x for x in split_path_re.split(first_line[2:].strip()) 

176 if x and not x.startswith('-')][-1] 

177 except IndexError: 

178 return False 

179 regex = re.compile(rf'^{regex}(\.(exe|cmd|bat|bin))?$', re.IGNORECASE) 

180 if regex.search(found) is not None: 

181 return True 

182 return False 

183 

184 

185def doctype_matches(text, regex): 

186 """Check if the doctype matches a regular expression (if present). 

187 

188 Note that this method only checks the first part of a DOCTYPE. 

189 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' 

190 """ 

191 m = doctype_lookup_re.search(text) 

192 if m is None: 

193 return False 

194 doctype = m.group(1) 

195 return re.compile(regex, re.I).match(doctype.strip()) is not None 

196 

197 

198def html_doctype_matches(text): 

199 """Check if the file looks like it has a html doctype.""" 

200 return doctype_matches(text, r'html') 

201 

202 

203_looks_like_xml_cache = {} 

204 

205 

206def looks_like_xml(text): 

207 """Check if a doctype exists or if we have some tags.""" 

208 if xml_decl_re.match(text): 

209 return True 

210 key = hash(text) 

211 try: 

212 return _looks_like_xml_cache[key] 

213 except KeyError: 

214 m = doctype_lookup_re.search(text) 

215 if m is not None: 

216 return True 

217 rv = tag_re.search(text[:1000]) is not None 

218 _looks_like_xml_cache[key] = rv 

219 return rv 

220 

221 

222def surrogatepair(c): 

223 """Given a unicode character code with length greater than 16 bits, 

224 return the two 16 bit surrogate pair. 

225 """ 

226 # From example D28 of: 

227 # http://www.unicode.org/book/ch03.pdf 

228 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) 

229 

230 

231def format_lines(var_name, seq, raw=False, indent_level=0): 

232 """Formats a sequence of strings for output.""" 

233 lines = [] 

234 base_indent = ' ' * indent_level * 4 

235 inner_indent = ' ' * (indent_level + 1) * 4 

236 lines.append(base_indent + var_name + ' = (') 

237 if raw: 

238 # These should be preformatted reprs of, say, tuples. 

239 for i in seq: 

240 lines.append(inner_indent + i + ',') 

241 else: 

242 for i in seq: 

243 # Force use of single quotes 

244 r = repr(i + '"') 

245 lines.append(inner_indent + r[:-2] + r[-1] + ',') 

246 lines.append(base_indent + ')') 

247 return '\n'.join(lines) 

248 

249 

250def duplicates_removed(it, already_seen=()): 

251 """ 

252 Returns a list with duplicates removed from the iterable `it`. 

253 

254 Order is preserved. 

255 """ 

256 lst = [] 

257 seen = set() 

258 for i in it: 

259 if i in seen or i in already_seen: 

260 continue 

261 lst.append(i) 

262 seen.add(i) 

263 return lst 

264 

265 

266class Future: 

267 """Generic class to defer some work. 

268 

269 Handled specially in RegexLexerMeta, to support regex string construction at 

270 first use. 

271 """ 

272 def get(self): 

273 raise NotImplementedError 

274 

275 

276def guess_decode(text): 

277 """Decode *text* with guessed encoding. 

278 

279 First try UTF-8; this should fail for non-UTF-8 encodings. 

280 Then try the preferred locale encoding. 

281 Fall back to latin-1, which always works. 

282 """ 

283 try: 

284 text = text.decode('utf-8') 

285 return text, 'utf-8' 

286 except UnicodeDecodeError: 

287 try: 

288 import locale 

289 prefencoding = locale.getpreferredencoding() 

290 text = text.decode(prefencoding) 

291 return text, prefencoding 

292 except (UnicodeDecodeError, LookupError): 

293 text = text.decode('latin1') 

294 return text, 'latin1' 

295 

296 

297def guess_decode_from_terminal(text, term): 

298 """Decode *text* coming from terminal *term*. 

299 

300 First try the terminal encoding, if given. 

301 Then try UTF-8. Then try the preferred locale encoding. 

302 Fall back to latin-1, which always works. 

303 """ 

304 if getattr(term, 'encoding', None): 

305 try: 

306 text = text.decode(term.encoding) 

307 except UnicodeDecodeError: 

308 pass 

309 else: 

310 return text, term.encoding 

311 return guess_decode(text) 

312 

313 

314def terminal_encoding(term): 

315 """Return our best guess of encoding for the given *term*.""" 

316 if getattr(term, 'encoding', None): 

317 return term.encoding 

318 import locale 

319 return locale.getpreferredencoding() 

320 

321 

322class UnclosingTextIOWrapper(TextIOWrapper): 

323 # Don't close underlying buffer on destruction. 

324 def close(self): 

325 self.flush() 

326 

327def html_escape(string, quote=True): 

328 """Return a safe version of the passed `string`, 

329 and an empty string if `None`. 

330 

331 `NoneType` is not supported by `html.escape`, as `html.escape` 

332 uses the built-in `replace` function on `string`, so we need to 

333 check for it first. 

334 

335 Optional flag quote is true by default, which also escapes 

336 double and single quotes. 

337 See https://docs.python.org/3/library/html.html#html.escape for more details. 

338 """ 

339 if string is not None: 

340 return html.escape(string, quote=quote) 

341 return ''