Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/zipp/__init__.py: 53%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

182 statements  

1""" 

2A Path-like interface for zipfiles. 

3 

4This codebase is shared between zipfile.Path in the stdlib 

5and zipp in PyPI. See 

6https://github.com/python/importlib_metadata/wiki/Development-Methodology 

7for more detail. 

8""" 

9 

10import functools 

11import io 

12import itertools 

13import pathlib 

14import posixpath 

15import re 

16import stat 

17import sys 

18import zipfile 

19 

20from .compat.py310 import text_encoding 

21from .glob import Translator 

22 

23from ._functools import save_method_args 

24 

25 

26__all__ = ['Path'] 

27 

28 

29def _parents(path): 

30 """ 

31 Given a path with elements separated by 

32 posixpath.sep, generate all parents of that path. 

33 

34 >>> list(_parents('b/d')) 

35 ['b'] 

36 >>> list(_parents('/b/d/')) 

37 ['/b'] 

38 >>> list(_parents('b/d/f/')) 

39 ['b/d', 'b'] 

40 >>> list(_parents('b')) 

41 [] 

42 >>> list(_parents('')) 

43 [] 

44 """ 

45 return itertools.islice(_ancestry(path), 1, None) 

46 

47 

48def _ancestry(path): 

49 """ 

50 Given a path with elements separated by 

51 posixpath.sep, generate all elements of that path. 

52 

53 >>> list(_ancestry('b/d')) 

54 ['b/d', 'b'] 

55 >>> list(_ancestry('/b/d/')) 

56 ['/b/d', '/b'] 

57 >>> list(_ancestry('b/d/f/')) 

58 ['b/d/f', 'b/d', 'b'] 

59 >>> list(_ancestry('b')) 

60 ['b'] 

61 >>> list(_ancestry('')) 

62 [] 

63 

64 Multiple separators are treated like a single. 

65 

66 >>> list(_ancestry('//b//d///f//')) 

67 ['//b//d///f', '//b//d', '//b'] 

68 """ 

69 path = path.rstrip(posixpath.sep) 

70 while path.rstrip(posixpath.sep): 

71 yield path 

72 path, tail = posixpath.split(path) 

73 

74 

75_dedupe = dict.fromkeys 

76"""Deduplicate an iterable in original order""" 

77 

78 

79def _difference(minuend, subtrahend): 

80 """ 

81 Return items in minuend not in subtrahend, retaining order 

82 with O(1) lookup. 

83 """ 

84 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 

85 

86 

87class InitializedState: 

88 """ 

89 Mix-in to save the initialization state for pickling. 

90 """ 

91 

92 @save_method_args 

93 def __init__(self, *args, **kwargs): 

94 super().__init__(*args, **kwargs) 

95 

96 def __getstate__(self): 

97 return self._saved___init__.args, self._saved___init__.kwargs 

98 

99 def __setstate__(self, state): 

100 args, kwargs = state 

101 super().__init__(*args, **kwargs) 

102 

103 

104class CompleteDirs(InitializedState, zipfile.ZipFile): 

105 """ 

106 A ZipFile subclass that ensures that implied directories 

107 are always included in the namelist. 

108 

109 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt'])) 

110 ['foo/', 'foo/bar/'] 

111 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/'])) 

112 ['foo/'] 

113 """ 

114 

115 @staticmethod 

116 def _implied_dirs(names): 

117 parents = itertools.chain.from_iterable(map(_parents, names)) 

118 as_dirs = (p + posixpath.sep for p in parents) 

119 return _dedupe(_difference(as_dirs, names)) 

120 

121 def namelist(self): 

122 names = super().namelist() 

123 return names + list(self._implied_dirs(names)) 

124 

125 def _name_set(self): 

126 return set(self.namelist()) 

127 

128 def resolve_dir(self, name): 

129 """ 

130 If the name represents a directory, return that name 

131 as a directory (with the trailing slash). 

132 """ 

133 names = self._name_set() 

134 dirname = name + '/' 

135 dir_match = name not in names and dirname in names 

136 return dirname if dir_match else name 

137 

138 def getinfo(self, name): 

139 """ 

140 Supplement getinfo for implied dirs. 

141 """ 

142 try: 

143 return super().getinfo(name) 

144 except KeyError: 

145 if not name.endswith('/') or name not in self._name_set(): 

146 raise 

147 return zipfile.ZipInfo(filename=name) 

148 

149 @classmethod 

150 def make(cls, source): 

151 """ 

152 Given a source (filename or zipfile), return an 

153 appropriate CompleteDirs subclass. 

154 """ 

155 if isinstance(source, CompleteDirs): 

156 return source 

157 

158 if not isinstance(source, zipfile.ZipFile): 

159 return cls(source) 

160 

161 # Only allow for FastLookup when supplied zipfile is read-only 

162 if 'r' not in source.mode: 

163 cls = CompleteDirs 

164 

165 source.__class__ = cls 

166 return source 

167 

168 @classmethod 

169 def inject(cls, zf: zipfile.ZipFile) -> zipfile.ZipFile: 

170 """ 

171 Given a writable zip file zf, inject directory entries for 

172 any directories implied by the presence of children. 

173 """ 

174 for name in cls._implied_dirs(zf.namelist()): 

175 zf.writestr(name, b"") 

176 return zf 

177 

178 

179class FastLookup(CompleteDirs): 

180 """ 

181 ZipFile subclass to ensure implicit 

182 dirs exist and are resolved rapidly. 

183 """ 

184 

185 def namelist(self): 

186 return self._namelist 

187 

188 @functools.cached_property 

189 def _namelist(self): 

190 return super().namelist() 

191 

192 def _name_set(self): 

193 return self._name_set_prop 

194 

195 @functools.cached_property 

196 def _name_set_prop(self): 

197 return super()._name_set() 

198 

199 

200def _extract_text_encoding(encoding=None, *args, **kwargs): 

201 # compute stack level so that the caller of the caller sees any warning. 

202 is_pypy = sys.implementation.name == 'pypy' 

203 stack_level = 3 + is_pypy 

204 return text_encoding(encoding, stack_level), args, kwargs 

205 

206 

207class Path: 

208 """ 

209 A :class:`importlib.resources.abc.Traversable` interface for zip files. 

210 

211 Implements many of the features users enjoy from 

212 :class:`pathlib.Path`. 

213 

214 Consider a zip file with this structure:: 

215 

216 . 

217 ├── a.txt 

218 └── b 

219 ├── c.txt 

220 └── d 

221 └── e.txt 

222 

223 >>> data = io.BytesIO() 

224 >>> zf = zipfile.ZipFile(data, 'w') 

225 >>> zf.writestr('a.txt', 'content of a') 

226 >>> zf.writestr('b/c.txt', 'content of c') 

227 >>> zf.writestr('b/d/e.txt', 'content of e') 

228 >>> zf.filename = 'mem/abcde.zip' 

229 

230 Path accepts the zipfile object itself or a filename 

231 

232 >>> path = Path(zf) 

233 

234 From there, several path operations are available. 

235 

236 Directory iteration (including the zip file itself): 

237 

238 >>> a, b = path.iterdir() 

239 >>> a 

240 Path('mem/abcde.zip', 'a.txt') 

241 >>> b 

242 Path('mem/abcde.zip', 'b/') 

243 

244 name property: 

245 

246 >>> b.name 

247 'b' 

248 

249 join with divide operator: 

250 

251 >>> c = b / 'c.txt' 

252 >>> c 

253 Path('mem/abcde.zip', 'b/c.txt') 

254 >>> c.name 

255 'c.txt' 

256 

257 Read text: 

258 

259 >>> c.read_text(encoding='utf-8') 

260 'content of c' 

261 

262 existence: 

263 

264 >>> c.exists() 

265 True 

266 >>> (b / 'missing.txt').exists() 

267 False 

268 

269 Coercion to string: 

270 

271 >>> import os 

272 >>> str(c).replace(os.sep, posixpath.sep) 

273 'mem/abcde.zip/b/c.txt' 

274 

275 At the root, ``name``, ``filename``, and ``parent`` 

276 resolve to the zipfile. 

277 

278 >>> str(path) 

279 'mem/abcde.zip/' 

280 >>> path.name 

281 'abcde.zip' 

282 >>> path.filename == pathlib.Path('mem/abcde.zip') 

283 True 

284 >>> str(path.parent) 

285 'mem' 

286 

287 If the zipfile has no filename, such attributes are not 

288 valid and accessing them will raise an Exception. 

289 

290 >>> zf.filename = None 

291 >>> path.name 

292 Traceback (most recent call last): 

293 ... 

294 TypeError: ... 

295 

296 >>> path.filename 

297 Traceback (most recent call last): 

298 ... 

299 TypeError: ... 

300 

301 >>> path.parent 

302 Traceback (most recent call last): 

303 ... 

304 TypeError: ... 

305 

306 # workaround python/cpython#106763 

307 >>> pass 

308 """ 

309 

310 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 

311 

312 def __init__(self, root, at=""): 

313 """ 

314 Construct a Path from a ZipFile or filename. 

315 

316 Note: When the source is an existing ZipFile object, 

317 its type (__class__) will be mutated to a 

318 specialized type. If the caller wishes to retain the 

319 original type, the caller should either create a 

320 separate ZipFile object or pass a filename. 

321 """ 

322 self.root = FastLookup.make(root) 

323 self.at = at 

324 

325 def __eq__(self, other): 

326 """ 

327 >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo' 

328 False 

329 """ 

330 if self.__class__ is not other.__class__: 

331 return NotImplemented 

332 return (self.root, self.at) == (other.root, other.at) 

333 

334 def __hash__(self): 

335 return hash((self.root, self.at)) 

336 

337 def open(self, mode='r', *args, pwd=None, **kwargs): 

338 """ 

339 Open this entry as text or binary following the semantics 

340 of ``pathlib.Path.open()`` by passing arguments through 

341 to io.TextIOWrapper(). 

342 """ 

343 if self.is_dir(): 

344 raise IsADirectoryError(self) 

345 zip_mode = mode[0] 

346 if zip_mode == 'r' and not self.exists(): 

347 raise FileNotFoundError(self) 

348 stream = self.root.open(self.at, zip_mode, pwd=pwd) 

349 if 'b' in mode: 

350 if args or kwargs: 

351 raise ValueError("encoding args invalid for binary operation") 

352 return stream 

353 # Text mode: 

354 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 

355 return io.TextIOWrapper(stream, encoding, *args, **kwargs) 

356 

357 def _base(self): 

358 return pathlib.PurePosixPath(self.at or self.root.filename) 

359 

360 @property 

361 def name(self): 

362 return self._base().name 

363 

364 @property 

365 def suffix(self): 

366 return self._base().suffix 

367 

368 @property 

369 def suffixes(self): 

370 return self._base().suffixes 

371 

372 @property 

373 def stem(self): 

374 return self._base().stem 

375 

376 @property 

377 def filename(self): 

378 return pathlib.Path(self.root.filename).joinpath(self.at) 

379 

380 def read_text(self, *args, **kwargs): 

381 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 

382 with self.open('r', encoding, *args, **kwargs) as strm: 

383 return strm.read() 

384 

385 def read_bytes(self): 

386 with self.open('rb') as strm: 

387 return strm.read() 

388 

389 def _is_child(self, path): 

390 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 

391 

392 def _next(self, at): 

393 return self.__class__(self.root, at) 

394 

395 def is_dir(self): 

396 return not self.at or self.at.endswith("/") 

397 

398 def is_file(self): 

399 return self.exists() and not self.is_dir() 

400 

401 def exists(self): 

402 return self.at in self.root._name_set() 

403 

404 def iterdir(self): 

405 if not self.is_dir(): 

406 raise ValueError("Can't listdir a file") 

407 subs = map(self._next, self.root.namelist()) 

408 return filter(self._is_child, subs) 

409 

410 def match(self, path_pattern): 

411 return pathlib.PurePosixPath(self.at).match(path_pattern) 

412 

413 def is_symlink(self): 

414 """ 

415 Return whether this path is a symlink. 

416 """ 

417 info = self.root.getinfo(self.at) 

418 mode = info.external_attr >> 16 

419 return stat.S_ISLNK(mode) 

420 

421 def glob(self, pattern): 

422 if not pattern: 

423 raise ValueError(f"Unacceptable pattern: {pattern!r}") 

424 

425 prefix = re.escape(self.at) 

426 tr = Translator(seps='/') 

427 matches = re.compile(prefix + tr.translate(pattern)).fullmatch 

428 return map(self._next, filter(matches, self.root.namelist())) 

429 

430 def rglob(self, pattern): 

431 return self.glob(f'**/{pattern}') 

432 

433 def relative_to(self, other, *extra): 

434 return posixpath.relpath(str(self), str(other.joinpath(*extra))) 

435 

436 def __str__(self): 

437 return posixpath.join(self.root.filename, self.at) 

438 

439 def __repr__(self): 

440 return self.__repr.format(self=self) 

441 

442 def joinpath(self, *other): 

443 next = posixpath.join(self.at, *other) 

444 return self._next(self.root.resolve_dir(next)) 

445 

446 __truediv__ = joinpath 

447 

448 @property 

449 def parent(self): 

450 if not self.at: 

451 return self.filename.parent 

452 parent_at = posixpath.dirname(self.at.rstrip('/')) 

453 if parent_at: 

454 parent_at += '/' 

455 return self._next(parent_at)