Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/zipp/__init__.py: 53%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

184 statements  

1""" 

2A Path-like interface for zipfiles. 

3 

4This codebase is shared between zipfile.Path in the stdlib 

5and zipp in PyPI. See 

6https://github.com/python/importlib_metadata/wiki/Development-Methodology 

7for more detail. 

8""" 

9 

10import functools 

11import io 

12import itertools 

13import pathlib 

14import posixpath 

15import re 

16import stat 

17import sys 

18import zipfile 

19 

20from ._functools import save_method_args 

21from .compat.py310 import text_encoding 

22from .glob import Translator 

23 

24__all__ = ['Path'] 

25 

26 

27def _parents(path): 

28 """ 

29 Given a path with elements separated by 

30 posixpath.sep, generate all parents of that path. 

31 

32 >>> list(_parents('b/d')) 

33 ['b'] 

34 >>> list(_parents('/b/d/')) 

35 ['/b'] 

36 >>> list(_parents('b/d/f/')) 

37 ['b/d', 'b'] 

38 >>> list(_parents('b')) 

39 [] 

40 >>> list(_parents('')) 

41 [] 

42 """ 

43 return itertools.islice(_ancestry(path), 1, None) 

44 

45 

46def _ancestry(path): 

47 """ 

48 Given a path with elements separated by 

49 posixpath.sep, generate all elements of that path. 

50 

51 >>> list(_ancestry('b/d')) 

52 ['b/d', 'b'] 

53 >>> list(_ancestry('/b/d/')) 

54 ['/b/d', '/b'] 

55 >>> list(_ancestry('b/d/f/')) 

56 ['b/d/f', 'b/d', 'b'] 

57 >>> list(_ancestry('b')) 

58 ['b'] 

59 >>> list(_ancestry('')) 

60 [] 

61 

62 Multiple separators are treated like a single. 

63 

64 >>> list(_ancestry('//b//d///f//')) 

65 ['//b//d///f', '//b//d', '//b'] 

66 """ 

67 path = path.rstrip(posixpath.sep) 

68 while path.rstrip(posixpath.sep): 

69 yield path 

70 path, tail = posixpath.split(path) 

71 

72 

73_dedupe = dict.fromkeys 

74"""Deduplicate an iterable in original order""" 

75 

76 

77def _difference(minuend, subtrahend): 

78 """ 

79 Return items in minuend not in subtrahend, retaining order 

80 with O(1) lookup. 

81 """ 

82 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 

83 

84 

85class InitializedState: 

86 """ 

87 Mix-in to save the initialization state for pickling. 

88 """ 

89 

90 @save_method_args 

91 def __init__(self, *args, **kwargs): 

92 super().__init__(*args, **kwargs) 

93 

94 def __getstate__(self): 

95 return self._saved___init__.args, self._saved___init__.kwargs 

96 

97 def __setstate__(self, state): 

98 args, kwargs = state 

99 super().__init__(*args, **kwargs) 

100 

101 

102class CompleteDirs(InitializedState, zipfile.ZipFile): 

103 """ 

104 A ZipFile subclass that ensures that implied directories 

105 are always included in the namelist. 

106 

107 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt'])) 

108 ['foo/', 'foo/bar/'] 

109 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/'])) 

110 ['foo/'] 

111 """ 

112 

113 @staticmethod 

114 def _implied_dirs(names): 

115 parents = itertools.chain.from_iterable(map(_parents, names)) 

116 as_dirs = (p + posixpath.sep for p in parents) 

117 return _dedupe(_difference(as_dirs, names)) 

118 

119 def namelist(self): 

120 names = super().namelist() 

121 return names + list(self._implied_dirs(names)) 

122 

123 def _name_set(self): 

124 return set(self.namelist()) 

125 

126 def resolve_dir(self, name): 

127 """ 

128 If the name represents a directory, return that name 

129 as a directory (with the trailing slash). 

130 """ 

131 names = self._name_set() 

132 dirname = name + '/' 

133 dir_match = name not in names and dirname in names 

134 return dirname if dir_match else name 

135 

136 def getinfo(self, name): 

137 """ 

138 Supplement getinfo for implied dirs. 

139 """ 

140 try: 

141 return super().getinfo(name) 

142 except KeyError: 

143 if not name.endswith('/') or name not in self._name_set(): 

144 raise 

145 return zipfile.ZipInfo(filename=name) 

146 

147 @classmethod 

148 def make(cls, source): 

149 """ 

150 Given a source (filename or zipfile), return an 

151 appropriate CompleteDirs subclass. 

152 """ 

153 if isinstance(source, CompleteDirs): 

154 return source 

155 

156 if not isinstance(source, zipfile.ZipFile): 

157 return cls(source) 

158 

159 # Only allow for FastLookup when supplied zipfile is read-only 

160 if 'r' not in source.mode: 

161 cls = CompleteDirs 

162 

163 source.__class__ = cls 

164 return source 

165 

166 @classmethod 

167 def inject(cls, zf: zipfile.ZipFile) -> zipfile.ZipFile: 

168 """ 

169 Given a writable zip file zf, inject directory entries for 

170 any directories implied by the presence of children. 

171 """ 

172 for name in cls._implied_dirs(zf.namelist()): 

173 zf.writestr(name, b"") 

174 return zf 

175 

176 

177class FastLookup(CompleteDirs): 

178 """ 

179 ZipFile subclass to ensure implicit 

180 dirs exist and are resolved rapidly. 

181 """ 

182 

183 def namelist(self): 

184 return self._namelist 

185 

186 @functools.cached_property 

187 def _namelist(self): 

188 return super().namelist() 

189 

190 def _name_set(self): 

191 return self._name_set_prop 

192 

193 @functools.cached_property 

194 def _name_set_prop(self): 

195 return super()._name_set() 

196 

197 

198def _extract_text_encoding(encoding=None, *args, **kwargs): 

199 # compute stack level so that the caller of the caller sees any warning. 

200 is_pypy = sys.implementation.name == 'pypy' 

201 # PyPy no longer special cased after 7.3.19 (or maybe 7.3.18) 

202 # See jaraco/zipp#143 

203 is_old_pypi = is_pypy and sys.pypy_version_info < (7, 3, 19) 

204 stack_level = 3 + is_old_pypi 

205 return text_encoding(encoding, stack_level), args, kwargs 

206 

207 

208class Path: 

209 """ 

210 A :class:`importlib.resources.abc.Traversable` interface for zip files. 

211 

212 Implements many of the features users enjoy from 

213 :class:`pathlib.Path`. 

214 

215 Consider a zip file with this structure:: 

216 

217 . 

218 ├── a.txt 

219 └── b 

220 ├── c.txt 

221 └── d 

222 └── e.txt 

223 

224 >>> data = io.BytesIO() 

225 >>> zf = zipfile.ZipFile(data, 'w') 

226 >>> zf.writestr('a.txt', 'content of a') 

227 >>> zf.writestr('b/c.txt', 'content of c') 

228 >>> zf.writestr('b/d/e.txt', 'content of e') 

229 >>> zf.filename = 'mem/abcde.zip' 

230 

231 Path accepts the zipfile object itself or a filename 

232 

233 >>> path = Path(zf) 

234 

235 From there, several path operations are available. 

236 

237 Directory iteration (including the zip file itself): 

238 

239 >>> a, b = path.iterdir() 

240 >>> a 

241 Path('mem/abcde.zip', 'a.txt') 

242 >>> b 

243 Path('mem/abcde.zip', 'b/') 

244 

245 name property: 

246 

247 >>> b.name 

248 'b' 

249 

250 join with divide operator: 

251 

252 >>> c = b / 'c.txt' 

253 >>> c 

254 Path('mem/abcde.zip', 'b/c.txt') 

255 >>> c.name 

256 'c.txt' 

257 

258 Read text: 

259 

260 >>> c.read_text(encoding='utf-8') 

261 'content of c' 

262 

263 existence: 

264 

265 >>> c.exists() 

266 True 

267 >>> (b / 'missing.txt').exists() 

268 False 

269 

270 Coercion to string: 

271 

272 >>> import os 

273 >>> str(c).replace(os.sep, posixpath.sep) 

274 'mem/abcde.zip/b/c.txt' 

275 

276 At the root, ``name``, ``filename``, and ``parent`` 

277 resolve to the zipfile. 

278 

279 >>> str(path) 

280 'mem/abcde.zip/' 

281 >>> path.name 

282 'abcde.zip' 

283 >>> path.filename == pathlib.Path('mem/abcde.zip') 

284 True 

285 >>> str(path.parent) 

286 'mem' 

287 

288 If the zipfile has no filename, such attributes are not 

289 valid and accessing them will raise an Exception. 

290 

291 >>> zf.filename = None 

292 >>> path.name 

293 Traceback (most recent call last): 

294 ... 

295 TypeError: ... 

296 

297 >>> path.filename 

298 Traceback (most recent call last): 

299 ... 

300 TypeError: ... 

301 

302 >>> path.parent 

303 Traceback (most recent call last): 

304 ... 

305 TypeError: ... 

306 

307 # workaround python/cpython#106763 

308 >>> pass 

309 """ 

310 

311 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 

312 

313 def __init__(self, root, at=""): 

314 """ 

315 Construct a Path from a ZipFile or filename. 

316 

317 Note: When the source is an existing ZipFile object, 

318 its type (__class__) will be mutated to a 

319 specialized type. If the caller wishes to retain the 

320 original type, the caller should either create a 

321 separate ZipFile object or pass a filename. 

322 """ 

323 self.root = FastLookup.make(root) 

324 self.at = at 

325 

326 def __eq__(self, other): 

327 """ 

328 >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo' 

329 False 

330 """ 

331 if self.__class__ is not other.__class__: 

332 return NotImplemented 

333 return (self.root, self.at) == (other.root, other.at) 

334 

335 def __hash__(self): 

336 return hash((self.root, self.at)) 

337 

338 def open(self, mode='r', *args, pwd=None, **kwargs): 

339 """ 

340 Open this entry as text or binary following the semantics 

341 of ``pathlib.Path.open()`` by passing arguments through 

342 to io.TextIOWrapper(). 

343 """ 

344 if self.is_dir(): 

345 raise IsADirectoryError(self) 

346 zip_mode = mode[0] 

347 if zip_mode == 'r' and not self.exists(): 

348 raise FileNotFoundError(self) 

349 stream = self.root.open(self.at, zip_mode, pwd=pwd) 

350 if 'b' in mode: 

351 if args or kwargs: 

352 raise ValueError("encoding args invalid for binary operation") 

353 return stream 

354 # Text mode: 

355 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 

356 return io.TextIOWrapper(stream, encoding, *args, **kwargs) 

357 

358 def _base(self): 

359 return pathlib.PurePosixPath(self.at) if self.at else self.filename 

360 

361 @property 

362 def name(self): 

363 return self._base().name 

364 

365 @property 

366 def suffix(self): 

367 return self._base().suffix 

368 

369 @property 

370 def suffixes(self): 

371 return self._base().suffixes 

372 

373 @property 

374 def stem(self): 

375 return self._base().stem 

376 

377 @property 

378 def filename(self): 

379 return pathlib.Path(self.root.filename).joinpath(self.at) 

380 

381 def read_text(self, *args, **kwargs): 

382 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 

383 with self.open('r', encoding, *args, **kwargs) as strm: 

384 return strm.read() 

385 

386 def read_bytes(self): 

387 with self.open('rb') as strm: 

388 return strm.read() 

389 

390 def _is_child(self, path): 

391 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 

392 

393 def _next(self, at): 

394 return self.__class__(self.root, at) 

395 

396 def is_dir(self): 

397 return not self.at or self.at.endswith("/") 

398 

399 def is_file(self): 

400 return self.exists() and not self.is_dir() 

401 

402 def exists(self): 

403 return self.at in self.root._name_set() 

404 

405 def iterdir(self): 

406 if not self.is_dir(): 

407 raise ValueError("Can't listdir a file") 

408 subs = map(self._next, self.root.namelist()) 

409 return filter(self._is_child, subs) 

410 

411 def match(self, path_pattern): 

412 return pathlib.PurePosixPath(self.at).match(path_pattern) 

413 

414 def is_symlink(self): 

415 """ 

416 Return whether this path is a symlink. 

417 """ 

418 info = self.root.getinfo(self.at) 

419 mode = info.external_attr >> 16 

420 return stat.S_ISLNK(mode) 

421 

422 def glob(self, pattern): 

423 if not pattern: 

424 raise ValueError(f"Unacceptable pattern: {pattern!r}") 

425 

426 prefix = re.escape(self.at) 

427 tr = Translator(seps='/') 

428 matches = re.compile(prefix + tr.translate(pattern)).fullmatch 

429 return map(self._next, filter(matches, self.root.namelist())) 

430 

431 def rglob(self, pattern): 

432 return self.glob(f'**/{pattern}') 

433 

434 def relative_to(self, other, *extra): 

435 return posixpath.relpath(str(self), str(other.joinpath(*extra))) 

436 

437 def __str__(self): 

438 return posixpath.join(self.root.filename, self.at) 

439 

440 def __repr__(self): 

441 return self.__repr.format(self=self) 

442 

443 def joinpath(self, *other): 

444 next = posixpath.join(self.at, *other) 

445 return self._next(self.root.resolve_dir(next)) 

446 

447 __truediv__ = joinpath 

448 

449 @property 

450 def parent(self): 

451 if not self.at: 

452 return self.filename.parent 

453 parent_at = posixpath.dirname(self.at.rstrip('/')) 

454 if parent_at: 

455 parent_at += '/' 

456 return self._next(parent_at)