Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/util.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

159 statements  

1""" 

2 pygments.util 

3 ~~~~~~~~~~~~~ 

4 

5 Utility functions. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from io import TextIOWrapper 

13 

14 

15split_path_re = re.compile(r'[/\\ ]') 

16doctype_lookup_re = re.compile(r''' 

17 <!DOCTYPE\s+( 

18 [a-zA-Z_][a-zA-Z0-9]* 

19 (?: \s+ # optional in HTML5 

20 [a-zA-Z_][a-zA-Z0-9]*\s+ 

21 "[^"]*")? 

22 ) 

23 [^>]*> 

24''', re.DOTALL | re.MULTILINE | re.VERBOSE) 

25tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', 

26 re.IGNORECASE | re.DOTALL | re.MULTILINE) 

27xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) 

28 

29 

30class ClassNotFound(ValueError): 

31 """Raised if one of the lookup functions didn't find a matching class.""" 

32 

33 

34class OptionError(Exception): 

35 """ 

36 This exception will be raised by all option processing functions if 

37 the type or value of the argument is not correct. 

38 """ 

39 

40def get_choice_opt(options, optname, allowed, default=None, normcase=False): 

41 """ 

42 If the key `optname` from the dictionary is not in the sequence 

43 `allowed`, raise an error, otherwise return it. 

44 """ 

45 string = options.get(optname, default) 

46 if normcase: 

47 string = string.lower() 

48 if string not in allowed: 

49 raise OptionError('Value for option {} must be one of {}'.format(optname, ', '.join(map(str, allowed)))) 

50 return string 

51 

52 

53def get_bool_opt(options, optname, default=None): 

54 """ 

55 Intuitively, this is `options.get(optname, default)`, but restricted to 

56 Boolean value. The Booleans can be represented as string, in order to accept 

57 Boolean value from the command line arguments. If the key `optname` is 

58 present in the dictionary `options` and is not associated with a Boolean, 

59 raise an `OptionError`. If it is absent, `default` is returned instead. 

60 

61 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and 

62 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` 

63 (matched case-insensitively). 

64 """ 

65 string = options.get(optname, default) 

66 if isinstance(string, bool): 

67 return string 

68 elif isinstance(string, int): 

69 return bool(string) 

70 elif not isinstance(string, str): 

71 raise OptionError(f'Invalid type {string!r} for option {optname}; use ' 

72 '1/0, yes/no, true/false, on/off') 

73 elif string.lower() in ('1', 'yes', 'true', 'on'): 

74 return True 

75 elif string.lower() in ('0', 'no', 'false', 'off'): 

76 return False 

77 else: 

78 raise OptionError(f'Invalid value {string!r} for option {optname}; use ' 

79 '1/0, yes/no, true/false, on/off') 

80 

81 

82def get_int_opt(options, optname, default=None): 

83 """As :func:`get_bool_opt`, but interpret the value as an integer.""" 

84 string = options.get(optname, default) 

85 try: 

86 return int(string) 

87 except TypeError: 

88 raise OptionError(f'Invalid type {string!r} for option {optname}; you ' 

89 'must give an integer value') 

90 except ValueError: 

91 raise OptionError(f'Invalid value {string!r} for option {optname}; you ' 

92 'must give an integer value') 

93 

94def get_list_opt(options, optname, default=None): 

95 """ 

96 If the key `optname` from the dictionary `options` is a string, 

97 split it at whitespace and return it. If it is already a list 

98 or a tuple, it is returned as a list. 

99 """ 

100 val = options.get(optname, default) 

101 if isinstance(val, str): 

102 return val.split() 

103 elif isinstance(val, (list, tuple)): 

104 return list(val) 

105 else: 

106 raise OptionError(f'Invalid type {val!r} for option {optname}; you ' 

107 'must give a list value') 

108 

109 

110def docstring_headline(obj): 

111 if not obj.__doc__: 

112 return '' 

113 res = [] 

114 for line in obj.__doc__.strip().splitlines(): 

115 if line.strip(): 

116 res.append(" " + line.strip()) 

117 else: 

118 break 

119 return ''.join(res).lstrip() 

120 

121 

122def make_analysator(f): 

123 """Return a static text analyser function that returns float values.""" 

124 def text_analyse(text): 

125 try: 

126 rv = f(text) 

127 except Exception: 

128 return 0.0 

129 if not rv: 

130 return 0.0 

131 try: 

132 return min(1.0, max(0.0, float(rv))) 

133 except (ValueError, TypeError): 

134 return 0.0 

135 text_analyse.__doc__ = f.__doc__ 

136 return staticmethod(text_analyse) 

137 

138 

139def shebang_matches(text, regex): 

140 r"""Check if the given regular expression matches the last part of the 

141 shebang if one exists. 

142 

143 >>> from pygments.util import shebang_matches 

144 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') 

145 True 

146 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') 

147 True 

148 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') 

149 False 

150 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') 

151 False 

152 >>> shebang_matches('#!/usr/bin/startsomethingwith python', 

153 ... r'python(2\.\d)?') 

154 True 

155 

156 It also checks for common windows executable file extensions:: 

157 

158 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') 

159 True 

160 

161 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does 

162 the same as ``'perl -e'``) 

163 

164 Note that this method automatically searches the whole string (eg: 

165 the regular expression is wrapped in ``'^$'``) 

166 """ 

167 index = text.find('\n') 

168 if index >= 0: 

169 first_line = text[:index].lower() 

170 else: 

171 first_line = text.lower() 

172 if first_line.startswith('#!'): 

173 try: 

174 found = [x for x in split_path_re.split(first_line[2:].strip()) 

175 if x and not x.startswith('-')][-1] 

176 except IndexError: 

177 return False 

178 regex = re.compile(rf'^{regex}(\.(exe|cmd|bat|bin))?$', re.IGNORECASE) 

179 if regex.search(found) is not None: 

180 return True 

181 return False 

182 

183 

184def doctype_matches(text, regex): 

185 """Check if the doctype matches a regular expression (if present). 

186 

187 Note that this method only checks the first part of a DOCTYPE. 

188 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' 

189 """ 

190 m = doctype_lookup_re.search(text) 

191 if m is None: 

192 return False 

193 doctype = m.group(1) 

194 return re.compile(regex, re.I).match(doctype.strip()) is not None 

195 

196 

197def html_doctype_matches(text): 

198 """Check if the file looks like it has a html doctype.""" 

199 return doctype_matches(text, r'html') 

200 

201 

202_looks_like_xml_cache = {} 

203 

204 

205def looks_like_xml(text): 

206 """Check if a doctype exists or if we have some tags.""" 

207 if xml_decl_re.match(text): 

208 return True 

209 key = hash(text) 

210 try: 

211 return _looks_like_xml_cache[key] 

212 except KeyError: 

213 m = doctype_lookup_re.search(text) 

214 if m is not None: 

215 return True 

216 rv = tag_re.search(text[:1000]) is not None 

217 _looks_like_xml_cache[key] = rv 

218 return rv 

219 

220 

221def surrogatepair(c): 

222 """Given a unicode character code with length greater than 16 bits, 

223 return the two 16 bit surrogate pair. 

224 """ 

225 # From example D28 of: 

226 # http://www.unicode.org/book/ch03.pdf 

227 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) 

228 

229 

230def format_lines(var_name, seq, raw=False, indent_level=0): 

231 """Formats a sequence of strings for output.""" 

232 lines = [] 

233 base_indent = ' ' * indent_level * 4 

234 inner_indent = ' ' * (indent_level + 1) * 4 

235 lines.append(base_indent + var_name + ' = (') 

236 if raw: 

237 # These should be preformatted reprs of, say, tuples. 

238 for i in seq: 

239 lines.append(inner_indent + i + ',') 

240 else: 

241 for i in seq: 

242 # Force use of single quotes 

243 r = repr(i + '"') 

244 lines.append(inner_indent + r[:-2] + r[-1] + ',') 

245 lines.append(base_indent + ')') 

246 return '\n'.join(lines) 

247 

248 

249def duplicates_removed(it, already_seen=()): 

250 """ 

251 Returns a list with duplicates removed from the iterable `it`. 

252 

253 Order is preserved. 

254 """ 

255 lst = [] 

256 seen = set() 

257 for i in it: 

258 if i in seen or i in already_seen: 

259 continue 

260 lst.append(i) 

261 seen.add(i) 

262 return lst 

263 

264 

265class Future: 

266 """Generic class to defer some work. 

267 

268 Handled specially in RegexLexerMeta, to support regex string construction at 

269 first use. 

270 """ 

271 def get(self): 

272 raise NotImplementedError 

273 

274 

275def guess_decode(text): 

276 """Decode *text* with guessed encoding. 

277 

278 First try UTF-8; this should fail for non-UTF-8 encodings. 

279 Then try the preferred locale encoding. 

280 Fall back to latin-1, which always works. 

281 """ 

282 try: 

283 text = text.decode('utf-8') 

284 return text, 'utf-8' 

285 except UnicodeDecodeError: 

286 try: 

287 import locale 

288 prefencoding = locale.getpreferredencoding() 

289 text = text.decode() 

290 return text, prefencoding 

291 except (UnicodeDecodeError, LookupError): 

292 text = text.decode('latin1') 

293 return text, 'latin1' 

294 

295 

296def guess_decode_from_terminal(text, term): 

297 """Decode *text* coming from terminal *term*. 

298 

299 First try the terminal encoding, if given. 

300 Then try UTF-8. Then try the preferred locale encoding. 

301 Fall back to latin-1, which always works. 

302 """ 

303 if getattr(term, 'encoding', None): 

304 try: 

305 text = text.decode(term.encoding) 

306 except UnicodeDecodeError: 

307 pass 

308 else: 

309 return text, term.encoding 

310 return guess_decode(text) 

311 

312 

313def terminal_encoding(term): 

314 """Return our best guess of encoding for the given *term*.""" 

315 if getattr(term, 'encoding', None): 

316 return term.encoding 

317 import locale 

318 return locale.getpreferredencoding() 

319 

320 

321class UnclosingTextIOWrapper(TextIOWrapper): 

322 # Don't close underlying buffer on destruction. 

323 def close(self): 

324 self.flush()