Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathlib_abc/_glob.py: 18%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Filename globbing utility."""
3import contextlib
4import os
5import re
6from pathlib_abc import _fnmatch as fnmatch
7import functools
8import itertools
9import operator
10import stat
11import sys
14__all__ = ["glob", "iglob", "escape", "translate"]
16def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
17 include_hidden=False):
18 """Return a list of paths matching a pathname pattern.
20 The pattern may contain simple shell-style wildcards a la
21 fnmatch. Unlike fnmatch, filenames starting with a
22 dot are special cases that are not matched by '*' and '?'
23 patterns by default.
25 If `include_hidden` is true, the patterns '*', '?', '**' will match hidden
26 directories.
28 If `recursive` is true, the pattern '**' will match any files and
29 zero or more directories and subdirectories.
30 """
31 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive,
32 include_hidden=include_hidden))
34def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
35 include_hidden=False):
36 """Return an iterator which yields the paths matching a pathname pattern.
38 The pattern may contain simple shell-style wildcards a la
39 fnmatch. However, unlike fnmatch, filenames starting with a
40 dot are special cases that are not matched by '*' and '?'
41 patterns.
43 If recursive is true, the pattern '**' will match any files and
44 zero or more directories and subdirectories.
45 """
46 sys.audit("glob.glob", pathname, recursive)
47 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
48 if root_dir is not None:
49 root_dir = os.fspath(root_dir)
50 else:
51 root_dir = pathname[:0]
52 it = _iglob(pathname, root_dir, dir_fd, recursive, False,
53 include_hidden=include_hidden)
54 if not pathname or recursive and _isrecursive(pathname[:2]):
55 try:
56 s = next(it) # skip empty string
57 if s:
58 it = itertools.chain((s,), it)
59 except StopIteration:
60 pass
61 return it
63def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
64 include_hidden=False):
65 dirname, basename = os.path.split(pathname)
66 if not has_magic(pathname):
67 assert not dironly
68 if basename:
69 if _lexists(_join(root_dir, pathname), dir_fd):
70 yield pathname
71 else:
72 # Patterns ending with a slash should match only directories
73 if _isdir(_join(root_dir, dirname), dir_fd):
74 yield pathname
75 return
76 if not dirname:
77 if recursive and _isrecursive(basename):
78 yield from _glob2(root_dir, basename, dir_fd, dironly,
79 include_hidden=include_hidden)
80 else:
81 yield from _glob1(root_dir, basename, dir_fd, dironly,
82 include_hidden=include_hidden)
83 return
84 # `os.path.split()` returns the argument itself as a dirname if it is a
85 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
86 # contains magic characters (i.e. r'\\?\C:').
87 if dirname != pathname and has_magic(dirname):
88 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True,
89 include_hidden=include_hidden)
90 else:
91 dirs = [dirname]
92 if has_magic(basename):
93 if recursive and _isrecursive(basename):
94 glob_in_dir = _glob2
95 else:
96 glob_in_dir = _glob1
97 else:
98 glob_in_dir = _glob0
99 for dirname in dirs:
100 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly,
101 include_hidden=include_hidden):
102 yield os.path.join(dirname, name)
104# These 2 helper functions non-recursively glob inside a literal directory.
105# They return a list of basenames. _glob1 accepts a pattern while _glob0
106# takes a literal basename (so it only has to check for its existence).
108def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False):
109 names = _listdir(dirname, dir_fd, dironly)
110 if not (include_hidden or _ishidden(pattern)):
111 names = (x for x in names if not _ishidden(x))
112 return fnmatch.filter(names, pattern)
114def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
115 if basename:
116 if _lexists(_join(dirname, basename), dir_fd):
117 return [basename]
118 else:
119 # `os.path.split()` returns an empty basename for paths ending with a
120 # directory separator. 'q*x/' should match only directories.
121 if _isdir(dirname, dir_fd):
122 return [basename]
123 return []
125# This helper function recursively yields relative pathnames inside a literal
126# directory.
128def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False):
129 assert _isrecursive(pattern)
130 if not dirname or _isdir(dirname, dir_fd):
131 yield pattern[:0]
132 yield from _rlistdir(dirname, dir_fd, dironly,
133 include_hidden=include_hidden)
135# If dironly is false, yields all file names inside a directory.
136# If dironly is true, yields only directory names.
137def _iterdir(dirname, dir_fd, dironly):
138 try:
139 fd = None
140 fsencode = None
141 if dir_fd is not None:
142 if dirname:
143 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
144 else:
145 arg = dir_fd
146 if isinstance(dirname, bytes):
147 fsencode = os.fsencode
148 elif dirname:
149 arg = dirname
150 elif isinstance(dirname, bytes):
151 arg = bytes(os.curdir, 'ASCII')
152 else:
153 arg = os.curdir
154 try:
155 with os.scandir(arg) as it:
156 for entry in it:
157 try:
158 if not dironly or entry.is_dir():
159 if fsencode is not None:
160 yield fsencode(entry.name)
161 else:
162 yield entry.name
163 except OSError:
164 pass
165 finally:
166 if fd is not None:
167 os.close(fd)
168 except OSError:
169 return
171def _listdir(dirname, dir_fd, dironly):
172 with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it:
173 return list(it)
175# Recursively yields relative pathnames inside a literal directory.
176def _rlistdir(dirname, dir_fd, dironly, include_hidden=False):
177 names = _listdir(dirname, dir_fd, dironly)
178 for x in names:
179 if include_hidden or not _ishidden(x):
180 yield x
181 path = _join(dirname, x) if dirname else x
182 for y in _rlistdir(path, dir_fd, dironly,
183 include_hidden=include_hidden):
184 yield _join(x, y)
187def _lexists(pathname, dir_fd):
188 # Same as os.path.lexists(), but with dir_fd
189 if dir_fd is None:
190 return os.path.lexists(pathname)
191 try:
192 os.lstat(pathname, dir_fd=dir_fd)
193 except (OSError, ValueError):
194 return False
195 else:
196 return True
198def _isdir(pathname, dir_fd):
199 # Same as os.path.isdir(), but with dir_fd
200 if dir_fd is None:
201 return os.path.isdir(pathname)
202 try:
203 st = os.stat(pathname, dir_fd=dir_fd)
204 except (OSError, ValueError):
205 return False
206 else:
207 return stat.S_ISDIR(st.st_mode)
209def _join(dirname, basename):
210 # It is common if dirname or basename is empty
211 if not dirname or not basename:
212 return dirname or basename
213 return os.path.join(dirname, basename)
215magic_check = re.compile('([*?[])')
216magic_check_bytes = re.compile(b'([*?[])')
218def has_magic(s):
219 if isinstance(s, bytes):
220 match = magic_check_bytes.search(s)
221 else:
222 match = magic_check.search(s)
223 return match is not None
225def _ishidden(path):
226 return path[0] in ('.', b'.'[0])
228def _isrecursive(pattern):
229 if isinstance(pattern, bytes):
230 return pattern == b'**'
231 else:
232 return pattern == '**'
234def escape(pathname):
235 """Escape all special characters.
236 """
237 # Escaping is done by wrapping any of "*?[" between square brackets.
238 # Metacharacters do not work in the drive part and shouldn't be escaped.
239 drive, pathname = os.path.splitdrive(pathname)
240 if isinstance(pathname, bytes):
241 pathname = magic_check_bytes.sub(br'[\1]', pathname)
242 else:
243 pathname = magic_check.sub(r'[\1]', pathname)
244 return drive + pathname
247_special_parts = ('', '.', '..')
248_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
249_no_recurse_symlinks = object()
252def translate(pat, *, recursive=False, include_hidden=False, seps=None):
253 """Translate a pathname with shell wildcards to a regular expression.
255 If `recursive` is true, the pattern segment '**' will match any number of
256 path segments.
258 If `include_hidden` is true, wildcards can match path segments beginning
259 with a dot ('.').
261 If a sequence of separator characters is given to `seps`, they will be
262 used to split the pattern into segments and match path separators. If not
263 given, os.path.sep and os.path.altsep (where available) are used.
264 """
265 if not seps:
266 if os.path.altsep:
267 seps = (os.path.sep, os.path.altsep)
268 else:
269 seps = os.path.sep
270 escaped_seps = ''.join(map(re.escape, seps))
271 any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
272 not_sep = f'[^{escaped_seps}]'
273 if include_hidden:
274 one_last_segment = f'{not_sep}+'
275 one_segment = f'{one_last_segment}{any_sep}'
276 any_segments = f'(?:.+{any_sep})?'
277 any_last_segments = '.*'
278 else:
279 one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
280 one_segment = f'{one_last_segment}{any_sep}'
281 any_segments = f'(?:{one_segment})*'
282 any_last_segments = f'{any_segments}(?:{one_last_segment})?'
284 results = []
285 parts = re.split(any_sep, pat)
286 last_part_idx = len(parts) - 1
287 for idx, part in enumerate(parts):
288 if part == '*':
289 results.append(one_segment if idx < last_part_idx else one_last_segment)
290 elif recursive and part == '**':
291 if idx < last_part_idx:
292 if parts[idx + 1] != '**':
293 results.append(any_segments)
294 else:
295 results.append(any_last_segments)
296 else:
297 if part:
298 if not include_hidden and part[0] in '*?':
299 results.append(r'(?!\.)')
300 results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0])
301 if idx < last_part_idx:
302 results.append(any_sep)
303 res = ''.join(results)
304 return fr'(?s:{res})\Z'
307@functools.lru_cache(maxsize=512)
308def _compile_pattern(pat, seps, case_sensitive, recursive=True):
309 """Compile given glob pattern to a re.Pattern object (observing case
310 sensitivity)."""
311 flags = 0 if case_sensitive else re.IGNORECASE
312 regex = translate(pat, recursive=recursive, include_hidden=True, seps=seps)
313 return re.compile(regex, flags=flags).match
316class _GlobberBase:
317 """Abstract class providing shell-style pattern matching and globbing.
318 """
320 def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
321 self.sep = sep
322 self.case_sensitive = case_sensitive
323 self.case_pedantic = case_pedantic
324 self.recursive = recursive
326 # Abstract methods
328 @staticmethod
329 def lexists(path):
330 """Implements os.path.lexists().
331 """
332 raise NotImplementedError
334 @staticmethod
335 def scandir(path):
336 """Like os.scandir(), but generates (entry, name, path) tuples.
337 """
338 raise NotImplementedError
340 @staticmethod
341 def concat_path(path, text):
342 """Implements path concatenation.
343 """
344 raise NotImplementedError
346 @staticmethod
347 def stringify_path(path):
348 """Converts the path to a string object
349 """
350 raise NotImplementedError
352 # High-level methods
354 def compile(self, pat, altsep=None):
355 seps = (self.sep, altsep) if altsep else self.sep
356 return _compile_pattern(pat, seps, self.case_sensitive, self.recursive)
358 def selector(self, parts):
359 """Returns a function that selects from a given path, walking and
360 filtering according to the glob-style pattern parts in *parts*.
361 """
362 if not parts:
363 return self.select_exists
364 part = parts.pop()
365 if self.recursive and part == '**':
366 selector = self.recursive_selector
367 elif part in _special_parts:
368 selector = self.special_selector
369 elif not self.case_pedantic and magic_check.search(part) is None:
370 selector = self.literal_selector
371 else:
372 selector = self.wildcard_selector
373 return selector(part, parts)
375 def special_selector(self, part, parts):
376 """Returns a function that selects special children of the given path.
377 """
378 if parts:
379 part += self.sep
380 select_next = self.selector(parts)
382 def select_special(path, exists=False):
383 path = self.concat_path(path, part)
384 return select_next(path, exists)
385 return select_special
387 def literal_selector(self, part, parts):
388 """Returns a function that selects a literal descendant of a path.
389 """
391 # Optimization: consume and join any subsequent literal parts here,
392 # rather than leaving them for the next selector. This reduces the
393 # number of string concatenation operations.
394 while parts and magic_check.search(parts[-1]) is None:
395 part += self.sep + parts.pop()
396 if parts:
397 part += self.sep
399 select_next = self.selector(parts)
401 def select_literal(path, exists=False):
402 path = self.concat_path(path, part)
403 return select_next(path, exists=False)
404 return select_literal
406 def wildcard_selector(self, part, parts):
407 """Returns a function that selects direct children of a given path,
408 filtering by pattern.
409 """
411 match = None if part == '*' else self.compile(part)
412 dir_only = bool(parts)
413 if dir_only:
414 select_next = self.selector(parts)
416 def select_wildcard(path, exists=False):
417 try:
418 entries = self.scandir(path)
419 except OSError:
420 pass
421 else:
422 for entry, entry_name, entry_path in entries:
423 if match is None or match(entry_name):
424 if dir_only:
425 try:
426 if not entry.is_dir():
427 continue
428 except OSError:
429 continue
430 entry_path = self.concat_path(entry_path, self.sep)
431 yield from select_next(entry_path, exists=True)
432 else:
433 yield entry_path
434 return select_wildcard
436 def recursive_selector(self, part, parts):
437 """Returns a function that selects a given path and all its children,
438 recursively, filtering by pattern.
439 """
440 # Optimization: consume following '**' parts, which have no effect.
441 while parts and parts[-1] == '**':
442 parts.pop()
444 # Optimization: consume and join any following non-special parts here,
445 # rather than leaving them for the next selector. They're used to
446 # build a regular expression, which we use to filter the results of
447 # the recursive walk. As a result, non-special pattern segments
448 # following a '**' wildcard don't require additional filesystem access
449 # to expand.
450 follow_symlinks = self.recursive is not _no_recurse_symlinks
451 if follow_symlinks:
452 while parts and parts[-1] not in _special_parts:
453 part += self.sep + parts.pop()
455 match = None if part == '**' else self.compile(part)
456 dir_only = bool(parts)
457 select_next = self.selector(parts)
459 def select_recursive(path, exists=False):
460 path_str = self.stringify_path(path)
461 match_pos = len(path_str)
462 if match is None or match(path_str, match_pos):
463 yield from select_next(path, exists)
464 stack = [path]
465 while stack:
466 yield from select_recursive_step(stack, match_pos)
468 def select_recursive_step(stack, match_pos):
469 path = stack.pop()
470 try:
471 entries = self.scandir(path)
472 except OSError:
473 pass
474 else:
475 for entry, _entry_name, entry_path in entries:
476 is_dir = False
477 try:
478 if entry.is_dir(follow_symlinks=follow_symlinks):
479 is_dir = True
480 except OSError:
481 pass
483 if is_dir or not dir_only:
484 entry_path_str = self.stringify_path(entry_path)
485 if dir_only:
486 entry_path = self.concat_path(entry_path, self.sep)
487 if match is None or match(entry_path_str, match_pos):
488 if dir_only:
489 yield from select_next(entry_path, exists=True)
490 else:
491 # Optimization: directly yield the path if this is
492 # last pattern part.
493 yield entry_path
494 if is_dir:
495 stack.append(entry_path)
497 return select_recursive
499 def select_exists(self, path, exists=False):
500 """Yields the given path, if it exists.
501 """
502 if exists:
503 # Optimization: this path is already known to exist, e.g. because
504 # it was returned from os.scandir(), so we skip calling lstat().
505 yield path
506 elif self.lexists(path):
507 yield path
510class _StringGlobber(_GlobberBase):
511 """Provides shell-style pattern matching and globbing for string paths.
512 """
513 lexists = staticmethod(os.path.lexists)
514 concat_path = operator.add
516 @staticmethod
517 def scandir(path):
518 # We must close the scandir() object before proceeding to
519 # avoid exhausting file descriptors when globbing deep trees.
520 with os.scandir(path) as scandir_it:
521 entries = list(scandir_it)
522 return ((entry, entry.name, entry.path) for entry in entries)
524 @staticmethod
525 def stringify_path(path):
526 return path # Already a string.