Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/util.py: 32%

1"""

2babel.util

3~~~~~~~~~~

5Various utility classes and functions.

8:license: BSD, see LICENSE for more details.

9"""

11from __future__ import annotations

13import codecs

14import datetime

15import os

16import re

17import textwrap

18import warnings

19from collections.abc import Generator, Iterable

20from typing import IO, Any, TypeVar

22from babel import dates, localtime

24missing = object()

26_T = TypeVar("_T")

29def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]:

30 """Yield all items in an iterable collection that are distinct.

32 Unlike when using sets for a similar effect, the original ordering of the

33 items in the collection is preserved by this function.

35 >>> print(list(distinct([1, 2, 1, 3, 4, 4])))

36 [1, 2, 3, 4]

37 >>> print(list(distinct('foobar')))

38 ['f', 'o', 'b', 'a', 'r']

40 :param iterable: the iterable collection providing the data

41 """

42 seen = set()

43 for item in iter(iterable):

44 if item not in seen:

45 yield item

46 seen.add(item)

49# Regexp to match python magic encoding line

50PYTHON_MAGIC_COMMENT_re = re.compile(

51 rb'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)',

52 flags=re.VERBOSE,

53)

56def parse_encoding(fp: IO[bytes]) -> str | None:

57 """Deduce the encoding of a source file from magic comment.

59 It does this in the same way as the `Python interpreter`__

61 .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations

63 The ``fp`` argument should be a seekable file object.

65 (From Jeff Dairiki)

66 """

67 pos = fp.tell()

68 fp.seek(0)

69 try:

70 line1 = fp.readline()

71 has_bom = line1.startswith(codecs.BOM_UTF8)

72 if has_bom:

73 line1 = line1[len(codecs.BOM_UTF8) :]

75 m = PYTHON_MAGIC_COMMENT_re.match(line1)

76 if not m:

77 try:

78 import ast

80 ast.parse(line1.decode('latin-1'))

81 except (ImportError, SyntaxError, UnicodeEncodeError):

82 # Either it's a real syntax error, in which case the source is

83 # not valid python source, or line2 is a continuation of line1,

84 # in which case we don't want to scan line2 for a magic

85 # comment.

86 pass

87 else:

88 line2 = fp.readline()

89 m = PYTHON_MAGIC_COMMENT_re.match(line2)

91 if has_bom:

92 if m:

93 magic_comment_encoding = m.group(1).decode('latin-1')

94 if magic_comment_encoding != 'utf-8':

95 raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM")

96 return 'utf-8'

97 elif m:

98 return m.group(1).decode('latin-1')

99 else:

100 return None

101 finally:

102 fp.seek(pos)

103

104

105PYTHON_FUTURE_IMPORT_re = re.compile(r'from\s+__future__\s+import\s+$*(.+)$*')

106

107

108def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int:

109 """Parse the compiler flags by :mod:`__future__` from the given Python

110 code.

111 """

112 import __future__

113

114 pos = fp.tell()

115 fp.seek(0)

116 flags = 0

117 try:

118 body = fp.read().decode(encoding)

119

120 # Fix up the source to be (hopefully) parsable by regexpen.

121 # This will likely do untoward things if the source code itself is broken.

122

123 # (1) Fix `import (\n...` to be `import (...`.

124 body = re.sub(r'import\s*\([\r\n]+', 'import (', body)

125 # (2) Join line-ending commas with the next line.

126 body = re.sub(r',\s*[\r\n]+', ', ', body)

127 # (3) Remove backslash line continuations.

128 body = re.sub(r'\\\s*[\r\n]+', ' ', body)

129

130 for m in PYTHON_FUTURE_IMPORT_re.finditer(body):

131 names = [x.strip().strip('()') for x in m.group(1).split(',')]

132 for name in names:

133 feature = getattr(__future__, name, None)

134 if feature:

135 flags |= feature.compiler_flag

136 finally:

137 fp.seek(pos)

138 return flags

139

140

141def pathmatch(pattern: str, filename: str) -> bool:

142 """Extended pathname pattern matching.

143

144 This function is similar to what is provided by the ``fnmatch`` module in

145 the Python standard library, but:

146

147 * can match complete (relative or absolute) path names, and not just file

148 names, and

149 * also supports a convenience pattern ("**") to match files at any

150 directory level.

151

152 Examples:

153

154 >>> pathmatch('**.py', 'bar.py')

155 True

156 >>> pathmatch('**.py', 'foo/bar/baz.py')

157 True

158 >>> pathmatch('**.py', 'templates/index.html')

159 False

160

161 >>> pathmatch('./foo/**.py', 'foo/bar/baz.py')

162 True

163 >>> pathmatch('./foo/**.py', 'bar/baz.py')

164 False

165

166 >>> pathmatch('^foo/**.py', 'foo/bar/baz.py')

167 True

168 >>> pathmatch('^foo/**.py', 'bar/baz.py')

169 False

170

171 >>> pathmatch('**/templates/*.html', 'templates/index.html')

172 True

173 >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')

174 False

175

176 :param pattern: the glob pattern

177 :param filename: the path name of the file to match against

178 """

179 symbols = {

180 '?': '[^/]',

181 '?/': '[^/]/',

182 '*': '[^/]+',

183 '*/': '[^/]+/',

184 '**/': '(?:.+/)*?',

185 '**': '(?:.+/)*?[^/]+',

186 }

187

188 if pattern.startswith('^'):

189 buf = ['^']

190 pattern = pattern[1:]

191 elif pattern.startswith('./'):

192 buf = ['^']

193 pattern = pattern[2:]

194 else:

195 buf = []

196

197 for idx, part in enumerate(re.split('([?*]+/?)', pattern)):

198 if idx % 2:

199 buf.append(symbols[part])

200 elif part:

201 buf.append(re.escape(part))

202 match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/"))

203 return match is not None

204

205

206class TextWrapper(textwrap.TextWrapper):

207 wordsep_re = re.compile(

208 r'(\s+|' # any whitespace

209 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash

210 )

211

212 # e.g. '\u2068foo bar.py\u2069:42'

213 _enclosed_filename_re = re.compile(r'(\u2068[^\u2068]+?\u2069(?::-?\d+)?)')

214

215 def _split(self, text):

216 """Splits the text into indivisible chunks while ensuring that file names

217 containing spaces are not broken up.

218 """

219 enclosed_filename_start = '\u2068'

220 if enclosed_filename_start not in text:

221 # There are no file names which contain spaces, fallback to the default implementation

222 return super()._split(text)

223

224 chunks = []

225 for chunk in re.split(self._enclosed_filename_re, text):

226 if chunk.startswith(enclosed_filename_start):

227 chunks.append(chunk)

228 else:

229 chunks.extend(super()._split(chunk))

230 return [c for c in chunks if c]

231

232

233def wraptext(

234 text: str,

235 width: int = 70,

236 initial_indent: str = '',

237 subsequent_indent: str = '',

238) -> list[str]:

239 """Simple wrapper around the ``textwrap.wrap`` function in the standard

240 library. This version does not wrap lines on hyphens in words. It also

241 does not wrap PO file locations containing spaces.

242

243 :param text: the text to wrap

244 :param width: the maximum line width

245 :param initial_indent: string that will be prepended to the first line of

246 wrapped output

247 :param subsequent_indent: string that will be prepended to all lines save

248 the first of wrapped output

249 """

250 warnings.warn(

251 "`babel.util.wraptext` is deprecated and will be removed in a future version of Babel. "

252 "If you need this functionality, use the `babel.util.TextWrapper` class directly.",

253 DeprecationWarning,

254 stacklevel=2,

255 )

256 return TextWrapper(

257 width=width,

258 initial_indent=initial_indent,

259 subsequent_indent=subsequent_indent,

260 break_long_words=False,

261 ).wrap(text)

262

263

264# TODO (Babel 3.x): Remove this re-export

265odict = dict

266

267

268class FixedOffsetTimezone(datetime.tzinfo):

269 """

270 Fixed offset in minutes east from UTC.

271

272 DEPRECATED: Use the standard library `datetime.timezone` instead.

273 """

274

275 # TODO (Babel 3.x): Remove this class

276

277 def __init__(self, offset: float, name: str | None = None) -> None:

278 warnings.warn(

279 "`FixedOffsetTimezone` is deprecated and will be removed in a future version of Babel. "

280 "Use the standard library `datetime.timezone` class.",

281 DeprecationWarning,

282 stacklevel=2,

283 )

284 self._offset = datetime.timedelta(minutes=offset)

285 if name is None:

286 name = 'Etc/GMT%+d' % offset

287 self.zone = name

288

289 def __str__(self) -> str:

290 return self.zone

291

292 def __repr__(self) -> str:

293 return f'<FixedOffset "{self.zone}" {self._offset}>'

294

295 def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:

296 return self._offset

297

298 def tzname(self, dt: datetime.datetime) -> str:

299 return self.zone

300

301 def dst(self, dt: datetime.datetime) -> datetime.timedelta:

302 return ZERO

303

304

305# Export the localtime functionality here because that's

306# where it was in the past.

307# TODO(3.0): remove these aliases

308UTC = dates.UTC

309LOCALTZ = dates.LOCALTZ

310get_localzone = localtime.get_localzone

311STDOFFSET = localtime.STDOFFSET

312DSTOFFSET = localtime.DSTOFFSET

313DSTDIFF = localtime.DSTDIFF

314ZERO = localtime.ZERO

315

316

317def _cmp(a: Any, b: Any):

318 return (a > b) - (a < b)