Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/zipp/__init__.py: 59%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

182 statements  

1""" 

2A Path-like interface for zipfiles. 

3 

4This codebase is shared between zipfile.Path in the stdlib 

5and zipp in PyPI. See 

6https://github.com/python/importlib_metadata/wiki/Development-Methodology 

7for more detail. 

8""" 

9 

10import functools 

11import io 

12import itertools 

13import pathlib 

14import posixpath 

15import re 

16import stat 

17import zipfile 

18 

19from ._functools import none_as, save_method_args 

20from .compat.py310 import text_encoding 

21from .glob import Translator 

22 

23__all__ = ['Path'] 

24 

25 

26def _parents(path): 

27 """ 

28 Given a path with elements separated by 

29 posixpath.sep, generate all parents of that path. 

30 

31 >>> list(_parents('b/d')) 

32 ['b'] 

33 >>> list(_parents('/b/d/')) 

34 ['/b'] 

35 >>> list(_parents('b/d/f/')) 

36 ['b/d', 'b'] 

37 >>> list(_parents('b')) 

38 [] 

39 >>> list(_parents('')) 

40 [] 

41 """ 

42 return itertools.islice(_ancestry(path), 1, None) 

43 

44 

45def _ancestry(path): 

46 """ 

47 Given a path with elements separated by 

48 posixpath.sep, generate all elements of that path. 

49 

50 >>> list(_ancestry('b/d')) 

51 ['b/d', 'b'] 

52 >>> list(_ancestry('/b/d/')) 

53 ['/b/d', '/b'] 

54 >>> list(_ancestry('b/d/f/')) 

55 ['b/d/f', 'b/d', 'b'] 

56 >>> list(_ancestry('b')) 

57 ['b'] 

58 >>> list(_ancestry('')) 

59 [] 

60 

61 Multiple separators are treated like a single. 

62 

63 >>> list(_ancestry('//b//d///f//')) 

64 ['//b//d///f', '//b//d', '//b'] 

65 """ 

66 path = path.rstrip(posixpath.sep) 

67 while path.rstrip(posixpath.sep): 

68 yield path 

69 path, tail = posixpath.split(path) 

70 

71 

72_dedupe = dict.fromkeys 

73"""Deduplicate an iterable in original order""" 

74 

75 

76def _difference(minuend, subtrahend): 

77 """ 

78 Return items in minuend not in subtrahend, retaining order 

79 with O(1) lookup. 

80 """ 

81 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 

82 

83 

84class InitializedState: 

85 """ 

86 Mix-in to save the initialization state for pickling. 

87 """ 

88 

89 @save_method_args 

90 def __init__(self, *args, **kwargs): 

91 super().__init__(*args, **kwargs) 

92 

93 def __getstate__(self): 

94 return self._saved___init__.args, self._saved___init__.kwargs 

95 

96 def __setstate__(self, state): 

97 args, kwargs = state 

98 super().__init__(*args, **kwargs) 

99 

100 

101class CompleteDirs(InitializedState, zipfile.ZipFile): 

102 """ 

103 A ZipFile subclass that ensures that implied directories 

104 are always included in the namelist. 

105 

106 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt'])) 

107 ['foo/', 'foo/bar/'] 

108 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/'])) 

109 ['foo/'] 

110 """ 

111 

112 @staticmethod 

113 def _implied_dirs(names): 

114 parents = itertools.chain.from_iterable(map(_parents, names)) 

115 as_dirs = (p + posixpath.sep for p in parents) 

116 return _dedupe(_difference(as_dirs, names)) 

117 

118 def namelist(self): 

119 names = super().namelist() 

120 return names + list(self._implied_dirs(names)) 

121 

122 def _name_set(self): 

123 return set(self.namelist()) 

124 

125 def resolve_dir(self, name): 

126 """ 

127 If the name represents a directory, return that name 

128 as a directory (with the trailing slash). 

129 """ 

130 names = self._name_set() 

131 dirname = name + '/' 

132 dir_match = name not in names and dirname in names 

133 return dirname if dir_match else name 

134 

135 def getinfo(self, name): 

136 """ 

137 Supplement getinfo for implied dirs. 

138 """ 

139 try: 

140 return super().getinfo(name) 

141 except KeyError: 

142 if not name.endswith('/') or name not in self._name_set(): 

143 raise 

144 return zipfile.ZipInfo(filename=name) 

145 

146 @classmethod 

147 def make(cls, source): 

148 """ 

149 Given a source (filename or zipfile), return an 

150 appropriate CompleteDirs subclass. 

151 """ 

152 if isinstance(source, CompleteDirs): 

153 return source 

154 

155 if not isinstance(source, zipfile.ZipFile): 

156 return cls(source) 

157 

158 # Only allow for FastLookup when supplied zipfile is read-only 

159 if 'r' not in source.mode: 

160 cls = CompleteDirs 

161 

162 source.__class__ = cls 

163 return source 

164 

165 @classmethod 

166 def inject(cls, zf: zipfile.ZipFile) -> zipfile.ZipFile: 

167 """ 

168 Given a writable zip file zf, inject directory entries for 

169 any directories implied by the presence of children. 

170 """ 

171 for name in cls._implied_dirs(zf.namelist()): 

172 zf.writestr(name, b"") 

173 return zf 

174 

175 

176class FastLookup(CompleteDirs): 

177 """ 

178 ZipFile subclass to ensure implicit 

179 dirs exist and are resolved rapidly. 

180 """ 

181 

182 def namelist(self): 

183 return self._namelist 

184 

185 @functools.cached_property 

186 def _namelist(self): 

187 return super().namelist() 

188 

189 def _name_set(self): 

190 return self._name_set_prop 

191 

192 @functools.cached_property 

193 def _name_set_prop(self): 

194 return super()._name_set() 

195 

196 

197def _extract_text_encoding(encoding=None, *args, **kwargs): 

198 stack_level = 3 

199 return text_encoding(encoding, stack_level), args, kwargs 

200 

201 

202class Path: 

203 """ 

204 A :class:`importlib.resources.abc.Traversable` interface for zip files. 

205 

206 Implements many of the features users enjoy from 

207 :class:`pathlib.Path`. 

208 

209 Consider a zip file with this structure:: 

210 

211 . 

212 ├── a.txt 

213 └── b 

214 ├── c.txt 

215 └── d 

216 └── e.txt 

217 

218 >>> data = io.BytesIO() 

219 >>> zf = zipfile.ZipFile(data, 'w') 

220 >>> zf.writestr('a.txt', 'content of a') 

221 >>> zf.writestr('b/c.txt', 'content of c') 

222 >>> zf.writestr('b/d/e.txt', 'content of e') 

223 >>> zf.filename = 'mem/abcde.zip' 

224 

225 Path accepts the zipfile object itself or a filename 

226 

227 >>> path = Path(zf) 

228 

229 From there, several path operations are available. 

230 

231 Directory iteration (including the zip file itself): 

232 

233 >>> a, b = path.iterdir() 

234 >>> a 

235 Path('mem/abcde.zip', 'a.txt') 

236 >>> b 

237 Path('mem/abcde.zip', 'b/') 

238 

239 name property: 

240 

241 >>> b.name 

242 'b' 

243 

244 join with divide operator: 

245 

246 >>> c = b / 'c.txt' 

247 >>> c 

248 Path('mem/abcde.zip', 'b/c.txt') 

249 >>> c.name 

250 'c.txt' 

251 

252 Read text: 

253 

254 >>> c.read_text(encoding='utf-8') 

255 'content of c' 

256 

257 existence: 

258 

259 >>> c.exists() 

260 True 

261 >>> (b / 'missing.txt').exists() 

262 False 

263 

264 Coercion to string: 

265 

266 >>> import os 

267 >>> str(c).replace(os.sep, posixpath.sep) 

268 'mem/abcde.zip/b/c.txt' 

269 

270 At the root, ``name``, ``filename``, and ``parent`` 

271 resolve to the zipfile. 

272 

273 >>> str(path) 

274 'mem/abcde.zip' 

275 >>> path.name 

276 'abcde.zip' 

277 >>> path.filename == pathlib.Path('mem/abcde.zip') 

278 True 

279 >>> str(path.parent) 

280 'mem' 

281 

282 If the zipfile has no filename, such attributes are not 

283 valid and accessing them will raise an Exception. 

284 

285 >>> zf.filename = None 

286 >>> path.name 

287 Traceback (most recent call last): 

288 ... 

289 TypeError: ... 

290 

291 >>> path.filename 

292 Traceback (most recent call last): 

293 ... 

294 TypeError: ... 

295 

296 >>> path.parent 

297 Traceback (most recent call last): 

298 ... 

299 TypeError: ... 

300 

301 # workaround python/cpython#106763 

302 >>> pass 

303 """ 

304 

305 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 

306 

307 def __init__(self, root, at=""): 

308 """ 

309 Construct a Path from a ZipFile or filename. 

310 

311 Note: When the source is an existing ZipFile object, 

312 its type (__class__) will be mutated to a 

313 specialized type. If the caller wishes to retain the 

314 original type, the caller should either create a 

315 separate ZipFile object or pass a filename. 

316 """ 

317 self.root = FastLookup.make(root) 

318 self.at = at 

319 

320 def __eq__(self, other): 

321 """ 

322 >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo' 

323 False 

324 """ 

325 if self.__class__ is not other.__class__: 

326 return NotImplemented 

327 return (self.root, self.at) == (other.root, other.at) 

328 

329 def __hash__(self): 

330 return hash((self.root, self.at)) 

331 

332 def open(self, mode='r', *args, pwd=None, **kwargs): 

333 """ 

334 Open this entry as text or binary following the semantics 

335 of ``pathlib.Path.open()`` by passing arguments through 

336 to io.TextIOWrapper(). 

337 """ 

338 if self.is_dir(): 

339 raise IsADirectoryError(self) 

340 zip_mode = mode[0] 

341 if zip_mode == 'r' and not self.exists(): 

342 raise FileNotFoundError(self) 

343 stream = self.root.open(self.at, zip_mode, pwd=pwd) 

344 if 'b' in mode: 

345 if args or kwargs: 

346 raise ValueError("encoding args invalid for binary operation") 

347 return stream 

348 # Text mode: 

349 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 

350 return io.TextIOWrapper(stream, encoding, *args, **kwargs) 

351 

352 def _base(self): 

353 return pathlib.PurePosixPath(self.at) if self.at else self.filename 

354 

355 @property 

356 def name(self): 

357 return self._base().name 

358 

359 @property 

360 def suffix(self): 

361 return self._base().suffix 

362 

363 @property 

364 def suffixes(self): 

365 return self._base().suffixes 

366 

367 @property 

368 def stem(self): 

369 return self._base().stem 

370 

371 @property 

372 def filename(self): 

373 return pathlib.Path(self.root.filename).joinpath(self.at) 

374 

375 def read_text(self, *args, **kwargs): 

376 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 

377 with self.open('r', encoding, *args, **kwargs) as strm: 

378 return strm.read() 

379 

380 def read_bytes(self): 

381 with self.open('rb') as strm: 

382 return strm.read() 

383 

384 def _is_child(self, path): 

385 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 

386 

387 def _next(self, at): 

388 return self.__class__(self.root, at) 

389 

390 def is_dir(self): 

391 return not self.at or self.at.endswith("/") 

392 

393 def is_file(self): 

394 return self.exists() and not self.is_dir() 

395 

396 def exists(self): 

397 return self.at in self.root._name_set() 

398 

399 def iterdir(self): 

400 if not self.is_dir(): 

401 raise ValueError("Can't listdir a file") 

402 subs = map(self._next, self.root.namelist()) 

403 return filter(self._is_child, subs) 

404 

405 def match(self, path_pattern): 

406 return pathlib.PurePosixPath(self.at).match(path_pattern) 

407 

408 def is_symlink(self): 

409 """ 

410 Return whether this path is a symlink. 

411 """ 

412 info = self.root.getinfo(self.at) 

413 mode = info.external_attr >> 16 

414 return stat.S_ISLNK(mode) 

415 

416 def glob(self, pattern): 

417 if not pattern: 

418 raise ValueError(f"Unacceptable pattern: {pattern!r}") 

419 

420 prefix = re.escape(self.at) 

421 tr = Translator(seps='/') 

422 matches = re.compile(prefix + tr.translate(pattern)).fullmatch 

423 return map(self._next, filter(matches, self.root.namelist())) 

424 

425 def rglob(self, pattern): 

426 return self.glob(f'**/{pattern}') 

427 

428 def relative_to(self, other, *extra): 

429 return posixpath.relpath(str(self), str(other.joinpath(*extra))) 

430 

431 def __str__(self): 

432 root = none_as(self.root.filename, ':zipfile:') 

433 return posixpath.join(root, self.at) if self.at else root 

434 

435 def __repr__(self): 

436 return self.__repr.format(self=self) 

437 

438 def joinpath(self, *other): 

439 next = posixpath.join(self.at, *other) 

440 return self._next(self.root.resolve_dir(next)) 

441 

442 __truediv__ = joinpath 

443 

444 @property 

445 def parent(self): 

446 if not self.at: 

447 return self.filename.parent 

448 parent_at = posixpath.dirname(self.at.rstrip('/')) 

449 if parent_at: 

450 parent_at += '/' 

451 return self._next(parent_at)