Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/util.py: 32%

1"""

2 babel.util

3 ~~~~~~~~~~

5 Various utility classes and functions.

8 :license: BSD, see LICENSE for more details.

9"""

10from __future__ import annotations

12import codecs

13import datetime

14import os

15import re

16import textwrap

17import warnings

18from collections.abc import Generator, Iterable

19from typing import IO, Any, TypeVar

21from babel import dates, localtime

23missing = object()

25_T = TypeVar("_T")

28def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]:

29 """Yield all items in an iterable collection that are distinct.

31 Unlike when using sets for a similar effect, the original ordering of the

32 items in the collection is preserved by this function.

34 >>> print(list(distinct([1, 2, 1, 3, 4, 4])))

35 [1, 2, 3, 4]

36 >>> print(list(distinct('foobar')))

37 ['f', 'o', 'b', 'a', 'r']

39 :param iterable: the iterable collection providing the data

40 """

41 seen = set()

42 for item in iter(iterable):

43 if item not in seen:

44 yield item

45 seen.add(item)

48# Regexp to match python magic encoding line

49PYTHON_MAGIC_COMMENT_re = re.compile(

50 br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)

53def parse_encoding(fp: IO[bytes]) -> str | None:

54 """Deduce the encoding of a source file from magic comment.

56 It does this in the same way as the `Python interpreter`__

58 .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations

60 The ``fp`` argument should be a seekable file object.

62 (From Jeff Dairiki)

63 """

64 pos = fp.tell()

65 fp.seek(0)

66 try:

67 line1 = fp.readline()

68 has_bom = line1.startswith(codecs.BOM_UTF8)

69 if has_bom:

70 line1 = line1[len(codecs.BOM_UTF8):]

72 m = PYTHON_MAGIC_COMMENT_re.match(line1)

73 if not m:

74 try:

75 import ast

76 ast.parse(line1.decode('latin-1'))

77 except (ImportError, SyntaxError, UnicodeEncodeError):

78 # Either it's a real syntax error, in which case the source is

79 # not valid python source, or line2 is a continuation of line1,

80 # in which case we don't want to scan line2 for a magic

81 # comment.

82 pass

83 else:

84 line2 = fp.readline()

85 m = PYTHON_MAGIC_COMMENT_re.match(line2)

87 if has_bom:

88 if m:

89 magic_comment_encoding = m.group(1).decode('latin-1')

90 if magic_comment_encoding != 'utf-8':

91 raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM")

92 return 'utf-8'

93 elif m:

94 return m.group(1).decode('latin-1')

95 else:

96 return None

97 finally:

98 fp.seek(pos)

100

101PYTHON_FUTURE_IMPORT_re = re.compile(

102 r'from\s+__future__\s+import\s+$*(.+)$*')

103

104

105def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int:

106 """Parse the compiler flags by :mod:`__future__` from the given Python

107 code.

108 """

109 import __future__

110 pos = fp.tell()

111 fp.seek(0)

112 flags = 0

113 try:

114 body = fp.read().decode(encoding)

115

116 # Fix up the source to be (hopefully) parsable by regexpen.

117 # This will likely do untoward things if the source code itself is broken.

118

119 # (1) Fix `import (\n...` to be `import (...`.

120 body = re.sub(r'import\s*\([\r\n]+', 'import (', body)

121 # (2) Join line-ending commas with the next line.

122 body = re.sub(r',\s*[\r\n]+', ', ', body)

123 # (3) Remove backslash line continuations.

124 body = re.sub(r'\\\s*[\r\n]+', ' ', body)

125

126 for m in PYTHON_FUTURE_IMPORT_re.finditer(body):

127 names = [x.strip().strip('()') for x in m.group(1).split(',')]

128 for name in names:

129 feature = getattr(__future__, name, None)

130 if feature:

131 flags |= feature.compiler_flag

132 finally:

133 fp.seek(pos)

134 return flags

135

136

137def pathmatch(pattern: str, filename: str) -> bool:

138 """Extended pathname pattern matching.

139

140 This function is similar to what is provided by the ``fnmatch`` module in

141 the Python standard library, but:

142

143 * can match complete (relative or absolute) path names, and not just file

144 names, and

145 * also supports a convenience pattern ("**") to match files at any

146 directory level.

147

148 Examples:

149

150 >>> pathmatch('**.py', 'bar.py')

151 True

152 >>> pathmatch('**.py', 'foo/bar/baz.py')

153 True

154 >>> pathmatch('**.py', 'templates/index.html')

155 False

156

157 >>> pathmatch('./foo/**.py', 'foo/bar/baz.py')

158 True

159 >>> pathmatch('./foo/**.py', 'bar/baz.py')

160 False

161

162 >>> pathmatch('^foo/**.py', 'foo/bar/baz.py')

163 True

164 >>> pathmatch('^foo/**.py', 'bar/baz.py')

165 False

166

167 >>> pathmatch('**/templates/*.html', 'templates/index.html')

168 True

169 >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')

170 False

171

172 :param pattern: the glob pattern

173 :param filename: the path name of the file to match against

174 """

175 symbols = {

176 '?': '[^/]',

177 '?/': '[^/]/',

178 '*': '[^/]+',

179 '*/': '[^/]+/',

180 '**/': '(?:.+/)*?',

181 '**': '(?:.+/)*?[^/]+',

182 }

183

184 if pattern.startswith('^'):

185 buf = ['^']

186 pattern = pattern[1:]

187 elif pattern.startswith('./'):

188 buf = ['^']

189 pattern = pattern[2:]

190 else:

191 buf = []

192

193 for idx, part in enumerate(re.split('([?*]+/?)', pattern)):

194 if idx % 2:

195 buf.append(symbols[part])

196 elif part:

197 buf.append(re.escape(part))

198 match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/"))

199 return match is not None

200

201

202class TextWrapper(textwrap.TextWrapper):

203 wordsep_re = re.compile(

204 r'(\s+|' # any whitespace

205 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash

206 )

207

208 # e.g. '\u2068foo bar.py\u2069:42'

209 _enclosed_filename_re = re.compile(r'(\u2068[^\u2068]+?\u2069(?::-?\d+)?)')

210

211 def _split(self, text):

212 """Splits the text into indivisible chunks while ensuring that file names

213 containing spaces are not broken up.

214 """

215 enclosed_filename_start = '\u2068'

216 if enclosed_filename_start not in text:

217 # There are no file names which contain spaces, fallback to the default implementation

218 return super()._split(text)

219

220 chunks = []

221 for chunk in re.split(self._enclosed_filename_re, text):

222 if chunk.startswith(enclosed_filename_start):

223 chunks.append(chunk)

224 else:

225 chunks.extend(super()._split(chunk))

226 return [c for c in chunks if c]

227

228

229def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_indent: str = '') -> list[str]:

230 """Simple wrapper around the ``textwrap.wrap`` function in the standard

231 library. This version does not wrap lines on hyphens in words. It also

232 does not wrap PO file locations containing spaces.

233

234 :param text: the text to wrap

235 :param width: the maximum line width

236 :param initial_indent: string that will be prepended to the first line of

237 wrapped output

238 :param subsequent_indent: string that will be prepended to all lines save

239 the first of wrapped output

240 """

241 warnings.warn(

242 "`babel.util.wraptext` is deprecated and will be removed in a future version of Babel. "

243 "If you need this functionality, use the `babel.util.TextWrapper` class directly.",

244 DeprecationWarning,

245 stacklevel=2,

246 )

247 wrapper = TextWrapper(width=width, initial_indent=initial_indent,

248 subsequent_indent=subsequent_indent,

249 break_long_words=False)

250 return wrapper.wrap(text)

251

252

253# TODO (Babel 3.x): Remove this re-export

254odict = dict

255

256

257class FixedOffsetTimezone(datetime.tzinfo):

258 """

259 Fixed offset in minutes east from UTC.

260

261 DEPRECATED: Use the standard library `datetime.timezone` instead.

262 """

263 # TODO (Babel 3.x): Remove this class

264

265 def __init__(self, offset: float, name: str | None = None) -> None:

266 warnings.warn(

267 "`FixedOffsetTimezone` is deprecated and will be removed in a future version of Babel. "

268 "Use the standard library `datetime.timezone` class.",

269 DeprecationWarning,

270 stacklevel=2,

271 )

272 self._offset = datetime.timedelta(minutes=offset)

273 if name is None:

274 name = 'Etc/GMT%+d' % offset

275 self.zone = name

276

277 def __str__(self) -> str:

278 return self.zone

279

280 def __repr__(self) -> str:

281 return f'<FixedOffset "{self.zone}" {self._offset}>'

282

283 def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:

284 return self._offset

285

286 def tzname(self, dt: datetime.datetime) -> str:

287 return self.zone

288

289 def dst(self, dt: datetime.datetime) -> datetime.timedelta:

290 return ZERO

291

292

293# Export the localtime functionality here because that's

294# where it was in the past.

295# TODO(3.0): remove these aliases

296UTC = dates.UTC

297LOCALTZ = dates.LOCALTZ

298get_localzone = localtime.get_localzone

299STDOFFSET = localtime.STDOFFSET

300DSTOFFSET = localtime.DSTOFFSET

301DSTDIFF = localtime.DSTDIFF

302ZERO = localtime.ZERO

303

304

305def _cmp(a: Any, b: Any):

306 return (a > b) - (a < b)