Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/util.py: 32%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2babel.util
3~~~~~~~~~~
5Various utility classes and functions.
7:copyright: (c) 2013-2025 by the Babel Team.
8:license: BSD, see LICENSE for more details.
9"""
11from __future__ import annotations
13import codecs
14import datetime
15import os
16import re
17import textwrap
18import warnings
19from collections.abc import Generator, Iterable
20from typing import IO, Any, TypeVar
22from babel import dates, localtime
24missing = object()
26_T = TypeVar("_T")
29def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]:
30 """Yield all items in an iterable collection that are distinct.
32 Unlike when using sets for a similar effect, the original ordering of the
33 items in the collection is preserved by this function.
35 >>> print(list(distinct([1, 2, 1, 3, 4, 4])))
36 [1, 2, 3, 4]
37 >>> print(list(distinct('foobar')))
38 ['f', 'o', 'b', 'a', 'r']
40 :param iterable: the iterable collection providing the data
41 """
42 seen = set()
43 for item in iter(iterable):
44 if item not in seen:
45 yield item
46 seen.add(item)
49# Regexp to match python magic encoding line
50PYTHON_MAGIC_COMMENT_re = re.compile(
51 rb'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)',
52 flags=re.VERBOSE,
53)
56def parse_encoding(fp: IO[bytes]) -> str | None:
57 """Deduce the encoding of a source file from magic comment.
59 It does this in the same way as the `Python interpreter`__
61 .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations
63 The ``fp`` argument should be a seekable file object.
65 (From Jeff Dairiki)
66 """
67 pos = fp.tell()
68 fp.seek(0)
69 try:
70 line1 = fp.readline()
71 has_bom = line1.startswith(codecs.BOM_UTF8)
72 if has_bom:
73 line1 = line1[len(codecs.BOM_UTF8) :]
75 m = PYTHON_MAGIC_COMMENT_re.match(line1)
76 if not m:
77 try:
78 import ast
80 ast.parse(line1.decode('latin-1'))
81 except (ImportError, SyntaxError, UnicodeEncodeError):
82 # Either it's a real syntax error, in which case the source is
83 # not valid python source, or line2 is a continuation of line1,
84 # in which case we don't want to scan line2 for a magic
85 # comment.
86 pass
87 else:
88 line2 = fp.readline()
89 m = PYTHON_MAGIC_COMMENT_re.match(line2)
91 if has_bom:
92 if m:
93 magic_comment_encoding = m.group(1).decode('latin-1')
94 if magic_comment_encoding != 'utf-8':
95 raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM")
96 return 'utf-8'
97 elif m:
98 return m.group(1).decode('latin-1')
99 else:
100 return None
101 finally:
102 fp.seek(pos)
105PYTHON_FUTURE_IMPORT_re = re.compile(r'from\s+__future__\s+import\s+\(*(.+)\)*')
108def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int:
109 """Parse the compiler flags by :mod:`__future__` from the given Python
110 code.
111 """
112 import __future__
114 pos = fp.tell()
115 fp.seek(0)
116 flags = 0
117 try:
118 body = fp.read().decode(encoding)
120 # Fix up the source to be (hopefully) parsable by regexpen.
121 # This will likely do untoward things if the source code itself is broken.
123 # (1) Fix `import (\n...` to be `import (...`.
124 body = re.sub(r'import\s*\([\r\n]+', 'import (', body)
125 # (2) Join line-ending commas with the next line.
126 body = re.sub(r',\s*[\r\n]+', ', ', body)
127 # (3) Remove backslash line continuations.
128 body = re.sub(r'\\\s*[\r\n]+', ' ', body)
130 for m in PYTHON_FUTURE_IMPORT_re.finditer(body):
131 names = [x.strip().strip('()') for x in m.group(1).split(',')]
132 for name in names:
133 feature = getattr(__future__, name, None)
134 if feature:
135 flags |= feature.compiler_flag
136 finally:
137 fp.seek(pos)
138 return flags
141def pathmatch(pattern: str, filename: str) -> bool:
142 """Extended pathname pattern matching.
144 This function is similar to what is provided by the ``fnmatch`` module in
145 the Python standard library, but:
147 * can match complete (relative or absolute) path names, and not just file
148 names, and
149 * also supports a convenience pattern ("**") to match files at any
150 directory level.
152 Examples:
154 >>> pathmatch('**.py', 'bar.py')
155 True
156 >>> pathmatch('**.py', 'foo/bar/baz.py')
157 True
158 >>> pathmatch('**.py', 'templates/index.html')
159 False
161 >>> pathmatch('./foo/**.py', 'foo/bar/baz.py')
162 True
163 >>> pathmatch('./foo/**.py', 'bar/baz.py')
164 False
166 >>> pathmatch('^foo/**.py', 'foo/bar/baz.py')
167 True
168 >>> pathmatch('^foo/**.py', 'bar/baz.py')
169 False
171 >>> pathmatch('**/templates/*.html', 'templates/index.html')
172 True
173 >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
174 False
176 :param pattern: the glob pattern
177 :param filename: the path name of the file to match against
178 """
179 symbols = {
180 '?': '[^/]',
181 '?/': '[^/]/',
182 '*': '[^/]+',
183 '*/': '[^/]+/',
184 '**/': '(?:.+/)*?',
185 '**': '(?:.+/)*?[^/]+',
186 }
188 if pattern.startswith('^'):
189 buf = ['^']
190 pattern = pattern[1:]
191 elif pattern.startswith('./'):
192 buf = ['^']
193 pattern = pattern[2:]
194 else:
195 buf = []
197 for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
198 if idx % 2:
199 buf.append(symbols[part])
200 elif part:
201 buf.append(re.escape(part))
202 match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/"))
203 return match is not None
206class TextWrapper(textwrap.TextWrapper):
207 wordsep_re = re.compile(
208 r'(\s+|' # any whitespace
209 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash
210 )
212 # e.g. '\u2068foo bar.py\u2069:42'
213 _enclosed_filename_re = re.compile(r'(\u2068[^\u2068]+?\u2069(?::-?\d+)?)')
215 def _split(self, text):
216 """Splits the text into indivisible chunks while ensuring that file names
217 containing spaces are not broken up.
218 """
219 enclosed_filename_start = '\u2068'
220 if enclosed_filename_start not in text:
221 # There are no file names which contain spaces, fallback to the default implementation
222 return super()._split(text)
224 chunks = []
225 for chunk in re.split(self._enclosed_filename_re, text):
226 if chunk.startswith(enclosed_filename_start):
227 chunks.append(chunk)
228 else:
229 chunks.extend(super()._split(chunk))
230 return [c for c in chunks if c]
233def wraptext(
234 text: str,
235 width: int = 70,
236 initial_indent: str = '',
237 subsequent_indent: str = '',
238) -> list[str]:
239 """Simple wrapper around the ``textwrap.wrap`` function in the standard
240 library. This version does not wrap lines on hyphens in words. It also
241 does not wrap PO file locations containing spaces.
243 :param text: the text to wrap
244 :param width: the maximum line width
245 :param initial_indent: string that will be prepended to the first line of
246 wrapped output
247 :param subsequent_indent: string that will be prepended to all lines save
248 the first of wrapped output
249 """
250 warnings.warn(
251 "`babel.util.wraptext` is deprecated and will be removed in a future version of Babel. "
252 "If you need this functionality, use the `babel.util.TextWrapper` class directly.",
253 DeprecationWarning,
254 stacklevel=2,
255 )
256 return TextWrapper(
257 width=width,
258 initial_indent=initial_indent,
259 subsequent_indent=subsequent_indent,
260 break_long_words=False,
261 ).wrap(text)
264# TODO (Babel 3.x): Remove this re-export
265odict = dict
268class FixedOffsetTimezone(datetime.tzinfo):
269 """
270 Fixed offset in minutes east from UTC.
272 DEPRECATED: Use the standard library `datetime.timezone` instead.
273 """
275 # TODO (Babel 3.x): Remove this class
277 def __init__(self, offset: float, name: str | None = None) -> None:
278 warnings.warn(
279 "`FixedOffsetTimezone` is deprecated and will be removed in a future version of Babel. "
280 "Use the standard library `datetime.timezone` class.",
281 DeprecationWarning,
282 stacklevel=2,
283 )
284 self._offset = datetime.timedelta(minutes=offset)
285 if name is None:
286 name = 'Etc/GMT%+d' % offset
287 self.zone = name
289 def __str__(self) -> str:
290 return self.zone
292 def __repr__(self) -> str:
293 return f'<FixedOffset "{self.zone}" {self._offset}>'
295 def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:
296 return self._offset
298 def tzname(self, dt: datetime.datetime) -> str:
299 return self.zone
301 def dst(self, dt: datetime.datetime) -> datetime.timedelta:
302 return ZERO
305# Export the localtime functionality here because that's
306# where it was in the past.
307# TODO(3.0): remove these aliases
308UTC = dates.UTC
309LOCALTZ = dates.LOCALTZ
310get_localzone = localtime.get_localzone
311STDOFFSET = localtime.STDOFFSET
312DSTOFFSET = localtime.DSTOFFSET
313DSTDIFF = localtime.DSTDIFF
314ZERO = localtime.ZERO
317def _cmp(a: Any, b: Any):
318 return (a > b) - (a < b)