Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/util.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

125 statements  

1""" 

2babel.util 

3~~~~~~~~~~ 

4 

5Various utility classes and functions. 

6 

7:copyright: (c) 2013-2025 by the Babel Team. 

8:license: BSD, see LICENSE for more details. 

9""" 

10 

11from __future__ import annotations 

12 

13import codecs 

14import datetime 

15import os 

16import re 

17import textwrap 

18import warnings 

19from collections.abc import Generator, Iterable 

20from typing import IO, Any, TypeVar 

21 

22from babel import dates, localtime 

23 

24missing = object() 

25 

26_T = TypeVar("_T") 

27 

28 

29def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]: 

30 """Yield all items in an iterable collection that are distinct. 

31 

32 Unlike when using sets for a similar effect, the original ordering of the 

33 items in the collection is preserved by this function. 

34 

35 >>> print(list(distinct([1, 2, 1, 3, 4, 4]))) 

36 [1, 2, 3, 4] 

37 >>> print(list(distinct('foobar'))) 

38 ['f', 'o', 'b', 'a', 'r'] 

39 

40 :param iterable: the iterable collection providing the data 

41 """ 

42 seen = set() 

43 for item in iter(iterable): 

44 if item not in seen: 

45 yield item 

46 seen.add(item) 

47 

48 

49# Regexp to match python magic encoding line 

50PYTHON_MAGIC_COMMENT_re = re.compile( 

51 rb'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', 

52 flags=re.VERBOSE, 

53) 

54 

55 

56def parse_encoding(fp: IO[bytes]) -> str | None: 

57 """Deduce the encoding of a source file from magic comment. 

58 

59 It does this in the same way as the `Python interpreter`__ 

60 

61 .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations 

62 

63 The ``fp`` argument should be a seekable file object. 

64 

65 (From Jeff Dairiki) 

66 """ 

67 pos = fp.tell() 

68 fp.seek(0) 

69 try: 

70 line1 = fp.readline() 

71 has_bom = line1.startswith(codecs.BOM_UTF8) 

72 if has_bom: 

73 line1 = line1[len(codecs.BOM_UTF8) :] 

74 

75 m = PYTHON_MAGIC_COMMENT_re.match(line1) 

76 if not m: 

77 try: 

78 import ast 

79 

80 ast.parse(line1.decode('latin-1')) 

81 except (ImportError, SyntaxError, UnicodeEncodeError): 

82 # Either it's a real syntax error, in which case the source is 

83 # not valid python source, or line2 is a continuation of line1, 

84 # in which case we don't want to scan line2 for a magic 

85 # comment. 

86 pass 

87 else: 

88 line2 = fp.readline() 

89 m = PYTHON_MAGIC_COMMENT_re.match(line2) 

90 

91 if has_bom: 

92 if m: 

93 magic_comment_encoding = m.group(1).decode('latin-1') 

94 if magic_comment_encoding != 'utf-8': 

95 raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM") 

96 return 'utf-8' 

97 elif m: 

98 return m.group(1).decode('latin-1') 

99 else: 

100 return None 

101 finally: 

102 fp.seek(pos) 

103 

104 

105PYTHON_FUTURE_IMPORT_re = re.compile(r'from\s+__future__\s+import\s+\(*(.+)\)*') 

106 

107 

108def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int: 

109 """Parse the compiler flags by :mod:`__future__` from the given Python 

110 code. 

111 """ 

112 import __future__ 

113 

114 pos = fp.tell() 

115 fp.seek(0) 

116 flags = 0 

117 try: 

118 body = fp.read().decode(encoding) 

119 

120 # Fix up the source to be (hopefully) parsable by regexpen. 

121 # This will likely do untoward things if the source code itself is broken. 

122 

123 # (1) Fix `import (\n...` to be `import (...`. 

124 body = re.sub(r'import\s*\([\r\n]+', 'import (', body) 

125 # (2) Join line-ending commas with the next line. 

126 body = re.sub(r',\s*[\r\n]+', ', ', body) 

127 # (3) Remove backslash line continuations. 

128 body = re.sub(r'\\\s*[\r\n]+', ' ', body) 

129 

130 for m in PYTHON_FUTURE_IMPORT_re.finditer(body): 

131 names = [x.strip().strip('()') for x in m.group(1).split(',')] 

132 for name in names: 

133 feature = getattr(__future__, name, None) 

134 if feature: 

135 flags |= feature.compiler_flag 

136 finally: 

137 fp.seek(pos) 

138 return flags 

139 

140 

141def pathmatch(pattern: str, filename: str) -> bool: 

142 """Extended pathname pattern matching. 

143 

144 This function is similar to what is provided by the ``fnmatch`` module in 

145 the Python standard library, but: 

146 

147 * can match complete (relative or absolute) path names, and not just file 

148 names, and 

149 * also supports a convenience pattern ("**") to match files at any 

150 directory level. 

151 

152 Examples: 

153 

154 >>> pathmatch('**.py', 'bar.py') 

155 True 

156 >>> pathmatch('**.py', 'foo/bar/baz.py') 

157 True 

158 >>> pathmatch('**.py', 'templates/index.html') 

159 False 

160 

161 >>> pathmatch('./foo/**.py', 'foo/bar/baz.py') 

162 True 

163 >>> pathmatch('./foo/**.py', 'bar/baz.py') 

164 False 

165 

166 >>> pathmatch('^foo/**.py', 'foo/bar/baz.py') 

167 True 

168 >>> pathmatch('^foo/**.py', 'bar/baz.py') 

169 False 

170 

171 >>> pathmatch('**/templates/*.html', 'templates/index.html') 

172 True 

173 >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html') 

174 False 

175 

176 :param pattern: the glob pattern 

177 :param filename: the path name of the file to match against 

178 """ 

179 symbols = { 

180 '?': '[^/]', 

181 '?/': '[^/]/', 

182 '*': '[^/]+', 

183 '*/': '[^/]+/', 

184 '**/': '(?:.+/)*?', 

185 '**': '(?:.+/)*?[^/]+', 

186 } 

187 

188 if pattern.startswith('^'): 

189 buf = ['^'] 

190 pattern = pattern[1:] 

191 elif pattern.startswith('./'): 

192 buf = ['^'] 

193 pattern = pattern[2:] 

194 else: 

195 buf = [] 

196 

197 for idx, part in enumerate(re.split('([?*]+/?)', pattern)): 

198 if idx % 2: 

199 buf.append(symbols[part]) 

200 elif part: 

201 buf.append(re.escape(part)) 

202 match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/")) 

203 return match is not None 

204 

205 

206class TextWrapper(textwrap.TextWrapper): 

207 wordsep_re = re.compile( 

208 r'(\s+|' # any whitespace 

209 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash 

210 ) 

211 

212 # e.g. '\u2068foo bar.py\u2069:42' 

213 _enclosed_filename_re = re.compile(r'(\u2068[^\u2068]+?\u2069(?::-?\d+)?)') 

214 

215 def _split(self, text): 

216 """Splits the text into indivisible chunks while ensuring that file names 

217 containing spaces are not broken up. 

218 """ 

219 enclosed_filename_start = '\u2068' 

220 if enclosed_filename_start not in text: 

221 # There are no file names which contain spaces, fallback to the default implementation 

222 return super()._split(text) 

223 

224 chunks = [] 

225 for chunk in re.split(self._enclosed_filename_re, text): 

226 if chunk.startswith(enclosed_filename_start): 

227 chunks.append(chunk) 

228 else: 

229 chunks.extend(super()._split(chunk)) 

230 return [c for c in chunks if c] 

231 

232 

233def wraptext( 

234 text: str, 

235 width: int = 70, 

236 initial_indent: str = '', 

237 subsequent_indent: str = '', 

238) -> list[str]: 

239 """Simple wrapper around the ``textwrap.wrap`` function in the standard 

240 library. This version does not wrap lines on hyphens in words. It also 

241 does not wrap PO file locations containing spaces. 

242 

243 :param text: the text to wrap 

244 :param width: the maximum line width 

245 :param initial_indent: string that will be prepended to the first line of 

246 wrapped output 

247 :param subsequent_indent: string that will be prepended to all lines save 

248 the first of wrapped output 

249 """ 

250 warnings.warn( 

251 "`babel.util.wraptext` is deprecated and will be removed in a future version of Babel. " 

252 "If you need this functionality, use the `babel.util.TextWrapper` class directly.", 

253 DeprecationWarning, 

254 stacklevel=2, 

255 ) 

256 return TextWrapper( 

257 width=width, 

258 initial_indent=initial_indent, 

259 subsequent_indent=subsequent_indent, 

260 break_long_words=False, 

261 ).wrap(text) 

262 

263 

264# TODO (Babel 3.x): Remove this re-export 

265odict = dict 

266 

267 

268class FixedOffsetTimezone(datetime.tzinfo): 

269 """ 

270 Fixed offset in minutes east from UTC. 

271 

272 DEPRECATED: Use the standard library `datetime.timezone` instead. 

273 """ 

274 

275 # TODO (Babel 3.x): Remove this class 

276 

277 def __init__(self, offset: float, name: str | None = None) -> None: 

278 warnings.warn( 

279 "`FixedOffsetTimezone` is deprecated and will be removed in a future version of Babel. " 

280 "Use the standard library `datetime.timezone` class.", 

281 DeprecationWarning, 

282 stacklevel=2, 

283 ) 

284 self._offset = datetime.timedelta(minutes=offset) 

285 if name is None: 

286 name = 'Etc/GMT%+d' % offset 

287 self.zone = name 

288 

289 def __str__(self) -> str: 

290 return self.zone 

291 

292 def __repr__(self) -> str: 

293 return f'<FixedOffset "{self.zone}" {self._offset}>' 

294 

295 def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta: 

296 return self._offset 

297 

298 def tzname(self, dt: datetime.datetime) -> str: 

299 return self.zone 

300 

301 def dst(self, dt: datetime.datetime) -> datetime.timedelta: 

302 return ZERO 

303 

304 

305# Export the localtime functionality here because that's 

306# where it was in the past. 

307# TODO(3.0): remove these aliases 

308UTC = dates.UTC 

309LOCALTZ = dates.LOCALTZ 

310get_localzone = localtime.get_localzone 

311STDOFFSET = localtime.STDOFFSET 

312DSTOFFSET = localtime.DSTOFFSET 

313DSTDIFF = localtime.DSTDIFF 

314ZERO = localtime.ZERO 

315 

316 

317def _cmp(a: Any, b: Any): 

318 return (a > b) - (a < b)