Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pathlib_abc/_glob.py: 18%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

325 statements  

1"""Filename globbing utility.""" 

2 

3import contextlib 

4import os 

5import re 

6from pathlib_abc import _fnmatch as fnmatch 

7import functools 

8import itertools 

9import operator 

10import stat 

11import sys 

12 

13 

14__all__ = ["glob", "iglob", "escape", "translate"] 

15 

16def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, 

17 include_hidden=False): 

18 """Return a list of paths matching a pathname pattern. 

19 

20 The pattern may contain simple shell-style wildcards a la 

21 fnmatch. Unlike fnmatch, filenames starting with a 

22 dot are special cases that are not matched by '*' and '?' 

23 patterns by default. 

24 

25 If `include_hidden` is true, the patterns '*', '?', '**' will match hidden 

26 directories. 

27 

28 If `recursive` is true, the pattern '**' will match any files and 

29 zero or more directories and subdirectories. 

30 """ 

31 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive, 

32 include_hidden=include_hidden)) 

33 

34def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, 

35 include_hidden=False): 

36 """Return an iterator which yields the paths matching a pathname pattern. 

37 

38 The pattern may contain simple shell-style wildcards a la 

39 fnmatch. However, unlike fnmatch, filenames starting with a 

40 dot are special cases that are not matched by '*' and '?' 

41 patterns. 

42 

43 If recursive is true, the pattern '**' will match any files and 

44 zero or more directories and subdirectories. 

45 """ 

46 sys.audit("glob.glob", pathname, recursive) 

47 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) 

48 if root_dir is not None: 

49 root_dir = os.fspath(root_dir) 

50 else: 

51 root_dir = pathname[:0] 

52 it = _iglob(pathname, root_dir, dir_fd, recursive, False, 

53 include_hidden=include_hidden) 

54 if not pathname or recursive and _isrecursive(pathname[:2]): 

55 try: 

56 s = next(it) # skip empty string 

57 if s: 

58 it = itertools.chain((s,), it) 

59 except StopIteration: 

60 pass 

61 return it 

62 

63def _iglob(pathname, root_dir, dir_fd, recursive, dironly, 

64 include_hidden=False): 

65 dirname, basename = os.path.split(pathname) 

66 if not has_magic(pathname): 

67 assert not dironly 

68 if basename: 

69 if _lexists(_join(root_dir, pathname), dir_fd): 

70 yield pathname 

71 else: 

72 # Patterns ending with a slash should match only directories 

73 if _isdir(_join(root_dir, dirname), dir_fd): 

74 yield pathname 

75 return 

76 if not dirname: 

77 if recursive and _isrecursive(basename): 

78 yield from _glob2(root_dir, basename, dir_fd, dironly, 

79 include_hidden=include_hidden) 

80 else: 

81 yield from _glob1(root_dir, basename, dir_fd, dironly, 

82 include_hidden=include_hidden) 

83 return 

84 # `os.path.split()` returns the argument itself as a dirname if it is a 

85 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 

86 # contains magic characters (i.e. r'\\?\C:'). 

87 if dirname != pathname and has_magic(dirname): 

88 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True, 

89 include_hidden=include_hidden) 

90 else: 

91 dirs = [dirname] 

92 if has_magic(basename): 

93 if recursive and _isrecursive(basename): 

94 glob_in_dir = _glob2 

95 else: 

96 glob_in_dir = _glob1 

97 else: 

98 glob_in_dir = _glob0 

99 for dirname in dirs: 

100 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly, 

101 include_hidden=include_hidden): 

102 yield os.path.join(dirname, name) 

103 

104# These 2 helper functions non-recursively glob inside a literal directory. 

105# They return a list of basenames. _glob1 accepts a pattern while _glob0 

106# takes a literal basename (so it only has to check for its existence). 

107 

108def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): 

109 names = _listdir(dirname, dir_fd, dironly) 

110 if not (include_hidden or _ishidden(pattern)): 

111 names = (x for x in names if not _ishidden(x)) 

112 return fnmatch.filter(names, pattern) 

113 

114def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): 

115 if basename: 

116 if _lexists(_join(dirname, basename), dir_fd): 

117 return [basename] 

118 else: 

119 # `os.path.split()` returns an empty basename for paths ending with a 

120 # directory separator. 'q*x/' should match only directories. 

121 if _isdir(dirname, dir_fd): 

122 return [basename] 

123 return [] 

124 

125# This helper function recursively yields relative pathnames inside a literal 

126# directory. 

127 

128def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): 

129 assert _isrecursive(pattern) 

130 if not dirname or _isdir(dirname, dir_fd): 

131 yield pattern[:0] 

132 yield from _rlistdir(dirname, dir_fd, dironly, 

133 include_hidden=include_hidden) 

134 

135# If dironly is false, yields all file names inside a directory. 

136# If dironly is true, yields only directory names. 

137def _iterdir(dirname, dir_fd, dironly): 

138 try: 

139 fd = None 

140 fsencode = None 

141 if dir_fd is not None: 

142 if dirname: 

143 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd) 

144 else: 

145 arg = dir_fd 

146 if isinstance(dirname, bytes): 

147 fsencode = os.fsencode 

148 elif dirname: 

149 arg = dirname 

150 elif isinstance(dirname, bytes): 

151 arg = bytes(os.curdir, 'ASCII') 

152 else: 

153 arg = os.curdir 

154 try: 

155 with os.scandir(arg) as it: 

156 for entry in it: 

157 try: 

158 if not dironly or entry.is_dir(): 

159 if fsencode is not None: 

160 yield fsencode(entry.name) 

161 else: 

162 yield entry.name 

163 except OSError: 

164 pass 

165 finally: 

166 if fd is not None: 

167 os.close(fd) 

168 except OSError: 

169 return 

170 

171def _listdir(dirname, dir_fd, dironly): 

172 with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it: 

173 return list(it) 

174 

175# Recursively yields relative pathnames inside a literal directory. 

176def _rlistdir(dirname, dir_fd, dironly, include_hidden=False): 

177 names = _listdir(dirname, dir_fd, dironly) 

178 for x in names: 

179 if include_hidden or not _ishidden(x): 

180 yield x 

181 path = _join(dirname, x) if dirname else x 

182 for y in _rlistdir(path, dir_fd, dironly, 

183 include_hidden=include_hidden): 

184 yield _join(x, y) 

185 

186 

187def _lexists(pathname, dir_fd): 

188 # Same as os.path.lexists(), but with dir_fd 

189 if dir_fd is None: 

190 return os.path.lexists(pathname) 

191 try: 

192 os.lstat(pathname, dir_fd=dir_fd) 

193 except (OSError, ValueError): 

194 return False 

195 else: 

196 return True 

197 

198def _isdir(pathname, dir_fd): 

199 # Same as os.path.isdir(), but with dir_fd 

200 if dir_fd is None: 

201 return os.path.isdir(pathname) 

202 try: 

203 st = os.stat(pathname, dir_fd=dir_fd) 

204 except (OSError, ValueError): 

205 return False 

206 else: 

207 return stat.S_ISDIR(st.st_mode) 

208 

209def _join(dirname, basename): 

210 # It is common if dirname or basename is empty 

211 if not dirname or not basename: 

212 return dirname or basename 

213 return os.path.join(dirname, basename) 

214 

215magic_check = re.compile('([*?[])') 

216magic_check_bytes = re.compile(b'([*?[])') 

217 

218def has_magic(s): 

219 if isinstance(s, bytes): 

220 match = magic_check_bytes.search(s) 

221 else: 

222 match = magic_check.search(s) 

223 return match is not None 

224 

225def _ishidden(path): 

226 return path[0] in ('.', b'.'[0]) 

227 

228def _isrecursive(pattern): 

229 if isinstance(pattern, bytes): 

230 return pattern == b'**' 

231 else: 

232 return pattern == '**' 

233 

234def escape(pathname): 

235 """Escape all special characters. 

236 """ 

237 # Escaping is done by wrapping any of "*?[" between square brackets. 

238 # Metacharacters do not work in the drive part and shouldn't be escaped. 

239 drive, pathname = os.path.splitdrive(pathname) 

240 if isinstance(pathname, bytes): 

241 pathname = magic_check_bytes.sub(br'[\1]', pathname) 

242 else: 

243 pathname = magic_check.sub(r'[\1]', pathname) 

244 return drive + pathname 

245 

246 

247_special_parts = ('', '.', '..') 

248_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) 

249_no_recurse_symlinks = object() 

250 

251 

252def translate(pat, *, recursive=False, include_hidden=False, seps=None): 

253 """Translate a pathname with shell wildcards to a regular expression. 

254 

255 If `recursive` is true, the pattern segment '**' will match any number of 

256 path segments. 

257 

258 If `include_hidden` is true, wildcards can match path segments beginning 

259 with a dot ('.'). 

260 

261 If a sequence of separator characters is given to `seps`, they will be 

262 used to split the pattern into segments and match path separators. If not 

263 given, os.path.sep and os.path.altsep (where available) are used. 

264 """ 

265 if not seps: 

266 if os.path.altsep: 

267 seps = (os.path.sep, os.path.altsep) 

268 else: 

269 seps = os.path.sep 

270 escaped_seps = ''.join(map(re.escape, seps)) 

271 any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps 

272 not_sep = f'[^{escaped_seps}]' 

273 if include_hidden: 

274 one_last_segment = f'{not_sep}+' 

275 one_segment = f'{one_last_segment}{any_sep}' 

276 any_segments = f'(?:.+{any_sep})?' 

277 any_last_segments = '.*' 

278 else: 

279 one_last_segment = f'[^{escaped_seps}.]{not_sep}*' 

280 one_segment = f'{one_last_segment}{any_sep}' 

281 any_segments = f'(?:{one_segment})*' 

282 any_last_segments = f'{any_segments}(?:{one_last_segment})?' 

283 

284 results = [] 

285 parts = re.split(any_sep, pat) 

286 last_part_idx = len(parts) - 1 

287 for idx, part in enumerate(parts): 

288 if part == '*': 

289 results.append(one_segment if idx < last_part_idx else one_last_segment) 

290 elif recursive and part == '**': 

291 if idx < last_part_idx: 

292 if parts[idx + 1] != '**': 

293 results.append(any_segments) 

294 else: 

295 results.append(any_last_segments) 

296 else: 

297 if part: 

298 if not include_hidden and part[0] in '*?': 

299 results.append(r'(?!\.)') 

300 results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0]) 

301 if idx < last_part_idx: 

302 results.append(any_sep) 

303 res = ''.join(results) 

304 return fr'(?s:{res})\Z' 

305 

306 

307@functools.lru_cache(maxsize=512) 

308def _compile_pattern(pat, seps, case_sensitive, recursive=True): 

309 """Compile given glob pattern to a re.Pattern object (observing case 

310 sensitivity).""" 

311 flags = 0 if case_sensitive else re.IGNORECASE 

312 regex = translate(pat, recursive=recursive, include_hidden=True, seps=seps) 

313 return re.compile(regex, flags=flags).match 

314 

315 

316class _GlobberBase: 

317 """Abstract class providing shell-style pattern matching and globbing. 

318 """ 

319 

320 def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): 

321 self.sep = sep 

322 self.case_sensitive = case_sensitive 

323 self.case_pedantic = case_pedantic 

324 self.recursive = recursive 

325 

326 # Abstract methods 

327 

328 @staticmethod 

329 def lexists(path): 

330 """Implements os.path.lexists(). 

331 """ 

332 raise NotImplementedError 

333 

334 @staticmethod 

335 def scandir(path): 

336 """Like os.scandir(), but generates (entry, name, path) tuples. 

337 """ 

338 raise NotImplementedError 

339 

340 @staticmethod 

341 def concat_path(path, text): 

342 """Implements path concatenation. 

343 """ 

344 raise NotImplementedError 

345 

346 @staticmethod 

347 def stringify_path(path): 

348 """Converts the path to a string object 

349 """ 

350 raise NotImplementedError 

351 

352 # High-level methods 

353 

354 def compile(self, pat, altsep=None): 

355 seps = (self.sep, altsep) if altsep else self.sep 

356 return _compile_pattern(pat, seps, self.case_sensitive, self.recursive) 

357 

358 def selector(self, parts): 

359 """Returns a function that selects from a given path, walking and 

360 filtering according to the glob-style pattern parts in *parts*. 

361 """ 

362 if not parts: 

363 return self.select_exists 

364 part = parts.pop() 

365 if self.recursive and part == '**': 

366 selector = self.recursive_selector 

367 elif part in _special_parts: 

368 selector = self.special_selector 

369 elif not self.case_pedantic and magic_check.search(part) is None: 

370 selector = self.literal_selector 

371 else: 

372 selector = self.wildcard_selector 

373 return selector(part, parts) 

374 

375 def special_selector(self, part, parts): 

376 """Returns a function that selects special children of the given path. 

377 """ 

378 if parts: 

379 part += self.sep 

380 select_next = self.selector(parts) 

381 

382 def select_special(path, exists=False): 

383 path = self.concat_path(path, part) 

384 return select_next(path, exists) 

385 return select_special 

386 

387 def literal_selector(self, part, parts): 

388 """Returns a function that selects a literal descendant of a path. 

389 """ 

390 

391 # Optimization: consume and join any subsequent literal parts here, 

392 # rather than leaving them for the next selector. This reduces the 

393 # number of string concatenation operations. 

394 while parts and magic_check.search(parts[-1]) is None: 

395 part += self.sep + parts.pop() 

396 if parts: 

397 part += self.sep 

398 

399 select_next = self.selector(parts) 

400 

401 def select_literal(path, exists=False): 

402 path = self.concat_path(path, part) 

403 return select_next(path, exists=False) 

404 return select_literal 

405 

406 def wildcard_selector(self, part, parts): 

407 """Returns a function that selects direct children of a given path, 

408 filtering by pattern. 

409 """ 

410 

411 match = None if part == '*' else self.compile(part) 

412 dir_only = bool(parts) 

413 if dir_only: 

414 select_next = self.selector(parts) 

415 

416 def select_wildcard(path, exists=False): 

417 try: 

418 entries = self.scandir(path) 

419 except OSError: 

420 pass 

421 else: 

422 for entry, entry_name, entry_path in entries: 

423 if match is None or match(entry_name): 

424 if dir_only: 

425 try: 

426 if not entry.is_dir(): 

427 continue 

428 except OSError: 

429 continue 

430 entry_path = self.concat_path(entry_path, self.sep) 

431 yield from select_next(entry_path, exists=True) 

432 else: 

433 yield entry_path 

434 return select_wildcard 

435 

436 def recursive_selector(self, part, parts): 

437 """Returns a function that selects a given path and all its children, 

438 recursively, filtering by pattern. 

439 """ 

440 # Optimization: consume following '**' parts, which have no effect. 

441 while parts and parts[-1] == '**': 

442 parts.pop() 

443 

444 # Optimization: consume and join any following non-special parts here, 

445 # rather than leaving them for the next selector. They're used to 

446 # build a regular expression, which we use to filter the results of 

447 # the recursive walk. As a result, non-special pattern segments 

448 # following a '**' wildcard don't require additional filesystem access 

449 # to expand. 

450 follow_symlinks = self.recursive is not _no_recurse_symlinks 

451 if follow_symlinks: 

452 while parts and parts[-1] not in _special_parts: 

453 part += self.sep + parts.pop() 

454 

455 match = None if part == '**' else self.compile(part) 

456 dir_only = bool(parts) 

457 select_next = self.selector(parts) 

458 

459 def select_recursive(path, exists=False): 

460 path_str = self.stringify_path(path) 

461 match_pos = len(path_str) 

462 if match is None or match(path_str, match_pos): 

463 yield from select_next(path, exists) 

464 stack = [path] 

465 while stack: 

466 yield from select_recursive_step(stack, match_pos) 

467 

468 def select_recursive_step(stack, match_pos): 

469 path = stack.pop() 

470 try: 

471 entries = self.scandir(path) 

472 except OSError: 

473 pass 

474 else: 

475 for entry, _entry_name, entry_path in entries: 

476 is_dir = False 

477 try: 

478 if entry.is_dir(follow_symlinks=follow_symlinks): 

479 is_dir = True 

480 except OSError: 

481 pass 

482 

483 if is_dir or not dir_only: 

484 entry_path_str = self.stringify_path(entry_path) 

485 if dir_only: 

486 entry_path = self.concat_path(entry_path, self.sep) 

487 if match is None or match(entry_path_str, match_pos): 

488 if dir_only: 

489 yield from select_next(entry_path, exists=True) 

490 else: 

491 # Optimization: directly yield the path if this is 

492 # last pattern part. 

493 yield entry_path 

494 if is_dir: 

495 stack.append(entry_path) 

496 

497 return select_recursive 

498 

499 def select_exists(self, path, exists=False): 

500 """Yields the given path, if it exists. 

501 """ 

502 if exists: 

503 # Optimization: this path is already known to exist, e.g. because 

504 # it was returned from os.scandir(), so we skip calling lstat(). 

505 yield path 

506 elif self.lexists(path): 

507 yield path 

508 

509 

510class _StringGlobber(_GlobberBase): 

511 """Provides shell-style pattern matching and globbing for string paths. 

512 """ 

513 lexists = staticmethod(os.path.lexists) 

514 concat_path = operator.add 

515 

516 @staticmethod 

517 def scandir(path): 

518 # We must close the scandir() object before proceeding to 

519 # avoid exhausting file descriptors when globbing deep trees. 

520 with os.scandir(path) as scandir_it: 

521 entries = list(scandir_it) 

522 return ((entry, entry.name, entry.path) for entry in entries) 

523 

524 @staticmethod 

525 def stringify_path(path): 

526 return path # Already a string.