Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/util.py: 38%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 babel.util
3 ~~~~~~~~~~
5 Various utility classes and functions.
7 :copyright: (c) 2013-2024 by the Babel Team.
8 :license: BSD, see LICENSE for more details.
9"""
10from __future__ import annotations
12import codecs
13import collections
14import datetime
15import os
16import re
17import textwrap
18from collections.abc import Generator, Iterable
19from typing import IO, Any, TypeVar
21from babel import dates, localtime
23missing = object()
25_T = TypeVar("_T")
28def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]:
29 """Yield all items in an iterable collection that are distinct.
31 Unlike when using sets for a similar effect, the original ordering of the
32 items in the collection is preserved by this function.
34 >>> print(list(distinct([1, 2, 1, 3, 4, 4])))
35 [1, 2, 3, 4]
36 >>> print(list(distinct('foobar')))
37 ['f', 'o', 'b', 'a', 'r']
39 :param iterable: the iterable collection providing the data
40 """
41 seen = set()
42 for item in iter(iterable):
43 if item not in seen:
44 yield item
45 seen.add(item)
48# Regexp to match python magic encoding line
49PYTHON_MAGIC_COMMENT_re = re.compile(
50 br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
53def parse_encoding(fp: IO[bytes]) -> str | None:
54 """Deduce the encoding of a source file from magic comment.
56 It does this in the same way as the `Python interpreter`__
58 .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations
60 The ``fp`` argument should be a seekable file object.
62 (From Jeff Dairiki)
63 """
64 pos = fp.tell()
65 fp.seek(0)
66 try:
67 line1 = fp.readline()
68 has_bom = line1.startswith(codecs.BOM_UTF8)
69 if has_bom:
70 line1 = line1[len(codecs.BOM_UTF8):]
72 m = PYTHON_MAGIC_COMMENT_re.match(line1)
73 if not m:
74 try:
75 import ast
76 ast.parse(line1.decode('latin-1'))
77 except (ImportError, SyntaxError, UnicodeEncodeError):
78 # Either it's a real syntax error, in which case the source is
79 # not valid python source, or line2 is a continuation of line1,
80 # in which case we don't want to scan line2 for a magic
81 # comment.
82 pass
83 else:
84 line2 = fp.readline()
85 m = PYTHON_MAGIC_COMMENT_re.match(line2)
87 if has_bom:
88 if m:
89 magic_comment_encoding = m.group(1).decode('latin-1')
90 if magic_comment_encoding != 'utf-8':
91 raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM")
92 return 'utf-8'
93 elif m:
94 return m.group(1).decode('latin-1')
95 else:
96 return None
97 finally:
98 fp.seek(pos)
101PYTHON_FUTURE_IMPORT_re = re.compile(
102 r'from\s+__future__\s+import\s+\(*(.+)\)*')
105def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int:
106 """Parse the compiler flags by :mod:`__future__` from the given Python
107 code.
108 """
109 import __future__
110 pos = fp.tell()
111 fp.seek(0)
112 flags = 0
113 try:
114 body = fp.read().decode(encoding)
116 # Fix up the source to be (hopefully) parsable by regexpen.
117 # This will likely do untoward things if the source code itself is broken.
119 # (1) Fix `import (\n...` to be `import (...`.
120 body = re.sub(r'import\s*\([\r\n]+', 'import (', body)
121 # (2) Join line-ending commas with the next line.
122 body = re.sub(r',\s*[\r\n]+', ', ', body)
123 # (3) Remove backslash line continuations.
124 body = re.sub(r'\\\s*[\r\n]+', ' ', body)
126 for m in PYTHON_FUTURE_IMPORT_re.finditer(body):
127 names = [x.strip().strip('()') for x in m.group(1).split(',')]
128 for name in names:
129 feature = getattr(__future__, name, None)
130 if feature:
131 flags |= feature.compiler_flag
132 finally:
133 fp.seek(pos)
134 return flags
137def pathmatch(pattern: str, filename: str) -> bool:
138 """Extended pathname pattern matching.
140 This function is similar to what is provided by the ``fnmatch`` module in
141 the Python standard library, but:
143 * can match complete (relative or absolute) path names, and not just file
144 names, and
145 * also supports a convenience pattern ("**") to match files at any
146 directory level.
148 Examples:
150 >>> pathmatch('**.py', 'bar.py')
151 True
152 >>> pathmatch('**.py', 'foo/bar/baz.py')
153 True
154 >>> pathmatch('**.py', 'templates/index.html')
155 False
157 >>> pathmatch('./foo/**.py', 'foo/bar/baz.py')
158 True
159 >>> pathmatch('./foo/**.py', 'bar/baz.py')
160 False
162 >>> pathmatch('^foo/**.py', 'foo/bar/baz.py')
163 True
164 >>> pathmatch('^foo/**.py', 'bar/baz.py')
165 False
167 >>> pathmatch('**/templates/*.html', 'templates/index.html')
168 True
169 >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
170 False
172 :param pattern: the glob pattern
173 :param filename: the path name of the file to match against
174 """
175 symbols = {
176 '?': '[^/]',
177 '?/': '[^/]/',
178 '*': '[^/]+',
179 '*/': '[^/]+/',
180 '**/': '(?:.+/)*?',
181 '**': '(?:.+/)*?[^/]+',
182 }
184 if pattern.startswith('^'):
185 buf = ['^']
186 pattern = pattern[1:]
187 elif pattern.startswith('./'):
188 buf = ['^']
189 pattern = pattern[2:]
190 else:
191 buf = []
193 for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
194 if idx % 2:
195 buf.append(symbols[part])
196 elif part:
197 buf.append(re.escape(part))
198 match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/"))
199 return match is not None
202class TextWrapper(textwrap.TextWrapper):
203 wordsep_re = re.compile(
204 r'(\s+|' # any whitespace
205 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash
206 )
209def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_indent: str = '') -> list[str]:
210 """Simple wrapper around the ``textwrap.wrap`` function in the standard
211 library. This version does not wrap lines on hyphens in words.
213 :param text: the text to wrap
214 :param width: the maximum line width
215 :param initial_indent: string that will be prepended to the first line of
216 wrapped output
217 :param subsequent_indent: string that will be prepended to all lines save
218 the first of wrapped output
219 """
220 wrapper = TextWrapper(width=width, initial_indent=initial_indent,
221 subsequent_indent=subsequent_indent,
222 break_long_words=False)
223 return wrapper.wrap(text)
226# TODO (Babel 3.x): Remove this re-export
227odict = collections.OrderedDict
230class FixedOffsetTimezone(datetime.tzinfo):
231 """Fixed offset in minutes east from UTC."""
233 def __init__(self, offset: float, name: str | None = None) -> None:
235 self._offset = datetime.timedelta(minutes=offset)
236 if name is None:
237 name = 'Etc/GMT%+d' % offset
238 self.zone = name
240 def __str__(self) -> str:
241 return self.zone
243 def __repr__(self) -> str:
244 return f'<FixedOffset "{self.zone}" {self._offset}>'
246 def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:
247 return self._offset
249 def tzname(self, dt: datetime.datetime) -> str:
250 return self.zone
252 def dst(self, dt: datetime.datetime) -> datetime.timedelta:
253 return ZERO
256# Export the localtime functionality here because that's
257# where it was in the past.
258# TODO(3.0): remove these aliases
259UTC = dates.UTC
260LOCALTZ = dates.LOCALTZ
261get_localzone = localtime.get_localzone
262STDOFFSET = localtime.STDOFFSET
263DSTOFFSET = localtime.DSTOFFSET
264DSTDIFF = localtime.DSTDIFF
265ZERO = localtime.ZERO
268def _cmp(a: Any, b: Any):
269 return (a > b) - (a < b)