Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/zipfile.py: 26%

1495 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1""" 

2Read and write ZIP files. 

3 

4XXX references to utf-8 need further investigation. 

5""" 

6import binascii 

7import importlib.util 

8import io 

9import itertools 

10import os 

11import posixpath 

12import shutil 

13import stat 

14import struct 

15import sys 

16import threading 

17import time 

18import contextlib 

19 

20try: 

21 import zlib # We may need its compression method 

22 crc32 = zlib.crc32 

23except ImportError: 

24 zlib = None 

25 crc32 = binascii.crc32 

26 

27try: 

28 import bz2 # We may need its compression method 

29except ImportError: 

30 bz2 = None 

31 

32try: 

33 import lzma # We may need its compression method 

34except ImportError: 

35 lzma = None 

36 

37__all__ = ["BadZipFile", "BadZipfile", "error", 

38 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 

39 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 

40 "Path"] 

41 

42class BadZipFile(Exception): 

43 pass 

44 

45 

46class LargeZipFile(Exception): 

47 """ 

48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 

49 and those extensions are disabled. 

50 """ 

51 

52error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 

53 

54 

55ZIP64_LIMIT = (1 << 31) - 1 

56ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 

57ZIP_MAX_COMMENT = (1 << 16) - 1 

58 

59# constants for Zip file compression methods 

60ZIP_STORED = 0 

61ZIP_DEFLATED = 8 

62ZIP_BZIP2 = 12 

63ZIP_LZMA = 14 

64# Other ZIP compression methods not supported 

65 

66DEFAULT_VERSION = 20 

67ZIP64_VERSION = 45 

68BZIP2_VERSION = 46 

69LZMA_VERSION = 63 

70# we recognize (but not necessarily support) all features up to that version 

71MAX_EXTRACT_VERSION = 63 

72 

73# Below are some formats and associated data for reading/writing headers using 

74# the struct module. The names and structures of headers/records are those used 

75# in the PKWARE description of the ZIP file format: 

76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 

77# (URL valid as of January 2008) 

78 

79# The "end of central directory" structure, magic number, size, and indices 

80# (section V.I in the format document) 

81structEndArchive = b"<4s4H2LH" 

82stringEndArchive = b"PK\005\006" 

83sizeEndCentDir = struct.calcsize(structEndArchive) 

84 

85_ECD_SIGNATURE = 0 

86_ECD_DISK_NUMBER = 1 

87_ECD_DISK_START = 2 

88_ECD_ENTRIES_THIS_DISK = 3 

89_ECD_ENTRIES_TOTAL = 4 

90_ECD_SIZE = 5 

91_ECD_OFFSET = 6 

92_ECD_COMMENT_SIZE = 7 

93# These last two indices are not part of the structure as defined in the 

94# spec, but they are used internally by this module as a convenience 

95_ECD_COMMENT = 8 

96_ECD_LOCATION = 9 

97 

98# The "central directory" structure, magic number, size, and indices 

99# of entries in the structure (section V.F in the format document) 

100structCentralDir = "<4s4B4HL2L5H2L" 

101stringCentralDir = b"PK\001\002" 

102sizeCentralDir = struct.calcsize(structCentralDir) 

103 

104# indexes of entries in the central directory structure 

105_CD_SIGNATURE = 0 

106_CD_CREATE_VERSION = 1 

107_CD_CREATE_SYSTEM = 2 

108_CD_EXTRACT_VERSION = 3 

109_CD_EXTRACT_SYSTEM = 4 

110_CD_FLAG_BITS = 5 

111_CD_COMPRESS_TYPE = 6 

112_CD_TIME = 7 

113_CD_DATE = 8 

114_CD_CRC = 9 

115_CD_COMPRESSED_SIZE = 10 

116_CD_UNCOMPRESSED_SIZE = 11 

117_CD_FILENAME_LENGTH = 12 

118_CD_EXTRA_FIELD_LENGTH = 13 

119_CD_COMMENT_LENGTH = 14 

120_CD_DISK_NUMBER_START = 15 

121_CD_INTERNAL_FILE_ATTRIBUTES = 16 

122_CD_EXTERNAL_FILE_ATTRIBUTES = 17 

123_CD_LOCAL_HEADER_OFFSET = 18 

124 

125# The "local file header" structure, magic number, size, and indices 

126# (section V.A in the format document) 

127structFileHeader = "<4s2B4HL2L2H" 

128stringFileHeader = b"PK\003\004" 

129sizeFileHeader = struct.calcsize(structFileHeader) 

130 

131_FH_SIGNATURE = 0 

132_FH_EXTRACT_VERSION = 1 

133_FH_EXTRACT_SYSTEM = 2 

134_FH_GENERAL_PURPOSE_FLAG_BITS = 3 

135_FH_COMPRESSION_METHOD = 4 

136_FH_LAST_MOD_TIME = 5 

137_FH_LAST_MOD_DATE = 6 

138_FH_CRC = 7 

139_FH_COMPRESSED_SIZE = 8 

140_FH_UNCOMPRESSED_SIZE = 9 

141_FH_FILENAME_LENGTH = 10 

142_FH_EXTRA_FIELD_LENGTH = 11 

143 

144# The "Zip64 end of central directory locator" structure, magic number, and size 

145structEndArchive64Locator = "<4sLQL" 

146stringEndArchive64Locator = b"PK\x06\x07" 

147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 

148 

149# The "Zip64 end of central directory" record, magic number, size, and indices 

150# (section V.G in the format document) 

151structEndArchive64 = "<4sQ2H2L4Q" 

152stringEndArchive64 = b"PK\x06\x06" 

153sizeEndCentDir64 = struct.calcsize(structEndArchive64) 

154 

155_CD64_SIGNATURE = 0 

156_CD64_DIRECTORY_RECSIZE = 1 

157_CD64_CREATE_VERSION = 2 

158_CD64_EXTRACT_VERSION = 3 

159_CD64_DISK_NUMBER = 4 

160_CD64_DISK_NUMBER_START = 5 

161_CD64_NUMBER_ENTRIES_THIS_DISK = 6 

162_CD64_NUMBER_ENTRIES_TOTAL = 7 

163_CD64_DIRECTORY_SIZE = 8 

164_CD64_OFFSET_START_CENTDIR = 9 

165 

166_DD_SIGNATURE = 0x08074b50 

167 

168_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 

169 

170def _strip_extra(extra, xids): 

171 # Remove Extra Fields with specified IDs. 

172 unpack = _EXTRA_FIELD_STRUCT.unpack 

173 modified = False 

174 buffer = [] 

175 start = i = 0 

176 while i + 4 <= len(extra): 

177 xid, xlen = unpack(extra[i : i + 4]) 

178 j = i + 4 + xlen 

179 if xid in xids: 

180 if i != start: 

181 buffer.append(extra[start : i]) 

182 start = j 

183 modified = True 

184 i = j 

185 if not modified: 

186 return extra 

187 return b''.join(buffer) 

188 

189def _check_zipfile(fp): 

190 try: 

191 if _EndRecData(fp): 

192 return True # file has correct magic number 

193 except OSError: 

194 pass 

195 return False 

196 

197def is_zipfile(filename): 

198 """Quickly see if a file is a ZIP file by checking the magic number. 

199 

200 The filename argument may be a file or file-like object too. 

201 """ 

202 result = False 

203 try: 

204 if hasattr(filename, "read"): 

205 result = _check_zipfile(fp=filename) 

206 else: 

207 with open(filename, "rb") as fp: 

208 result = _check_zipfile(fp) 

209 except OSError: 

210 pass 

211 return result 

212 

213def _EndRecData64(fpin, offset, endrec): 

214 """ 

215 Read the ZIP64 end-of-archive records and use that to update endrec 

216 """ 

217 try: 

218 fpin.seek(offset - sizeEndCentDir64Locator, 2) 

219 except OSError: 

220 # If the seek fails, the file is not large enough to contain a ZIP64 

221 # end-of-archive record, so just return the end record we were given. 

222 return endrec 

223 

224 data = fpin.read(sizeEndCentDir64Locator) 

225 if len(data) != sizeEndCentDir64Locator: 

226 return endrec 

227 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 

228 if sig != stringEndArchive64Locator: 

229 return endrec 

230 

231 if diskno != 0 or disks > 1: 

232 raise BadZipFile("zipfiles that span multiple disks are not supported") 

233 

234 # Assume no 'zip64 extensible data' 

235 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 

236 data = fpin.read(sizeEndCentDir64) 

237 if len(data) != sizeEndCentDir64: 

238 return endrec 

239 sig, sz, create_version, read_version, disk_num, disk_dir, \ 

240 dircount, dircount2, dirsize, diroffset = \ 

241 struct.unpack(structEndArchive64, data) 

242 if sig != stringEndArchive64: 

243 return endrec 

244 

245 # Update the original endrec using data from the ZIP64 record 

246 endrec[_ECD_SIGNATURE] = sig 

247 endrec[_ECD_DISK_NUMBER] = disk_num 

248 endrec[_ECD_DISK_START] = disk_dir 

249 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 

250 endrec[_ECD_ENTRIES_TOTAL] = dircount2 

251 endrec[_ECD_SIZE] = dirsize 

252 endrec[_ECD_OFFSET] = diroffset 

253 return endrec 

254 

255 

256def _EndRecData(fpin): 

257 """Return data from the "End of Central Directory" record, or None. 

258 

259 The data is a list of the nine items in the ZIP "End of central dir" 

260 record followed by a tenth item, the file seek offset of this record.""" 

261 

262 # Determine file size 

263 fpin.seek(0, 2) 

264 filesize = fpin.tell() 

265 

266 # Check to see if this is ZIP file with no archive comment (the 

267 # "end of central directory" structure should be the last item in the 

268 # file if this is the case). 

269 try: 

270 fpin.seek(-sizeEndCentDir, 2) 

271 except OSError: 

272 return None 

273 data = fpin.read() 

274 if (len(data) == sizeEndCentDir and 

275 data[0:4] == stringEndArchive and 

276 data[-2:] == b"\000\000"): 

277 # the signature is correct and there's no comment, unpack structure 

278 endrec = struct.unpack(structEndArchive, data) 

279 endrec=list(endrec) 

280 

281 # Append a blank comment and record start offset 

282 endrec.append(b"") 

283 endrec.append(filesize - sizeEndCentDir) 

284 

285 # Try to read the "Zip64 end of central directory" structure 

286 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 

287 

288 # Either this is not a ZIP file, or it is a ZIP file with an archive 

289 # comment. Search the end of the file for the "end of central directory" 

290 # record signature. The comment is the last item in the ZIP file and may be 

291 # up to 64K long. It is assumed that the "end of central directory" magic 

292 # number does not appear in the comment. 

293 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 

294 fpin.seek(maxCommentStart, 0) 

295 data = fpin.read() 

296 start = data.rfind(stringEndArchive) 

297 if start >= 0: 

298 # found the magic number; attempt to unpack and interpret 

299 recData = data[start:start+sizeEndCentDir] 

300 if len(recData) != sizeEndCentDir: 

301 # Zip file is corrupted. 

302 return None 

303 endrec = list(struct.unpack(structEndArchive, recData)) 

304 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 

305 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 

306 endrec.append(comment) 

307 endrec.append(maxCommentStart + start) 

308 

309 # Try to read the "Zip64 end of central directory" structure 

310 return _EndRecData64(fpin, maxCommentStart + start - filesize, 

311 endrec) 

312 

313 # Unable to find a valid end of central directory structure 

314 return None 

315 

316 

317class ZipInfo (object): 

318 """Class with attributes describing each file in the ZIP archive.""" 

319 

320 __slots__ = ( 

321 'orig_filename', 

322 'filename', 

323 'date_time', 

324 'compress_type', 

325 '_compresslevel', 

326 'comment', 

327 'extra', 

328 'create_system', 

329 'create_version', 

330 'extract_version', 

331 'reserved', 

332 'flag_bits', 

333 'volume', 

334 'internal_attr', 

335 'external_attr', 

336 'header_offset', 

337 'CRC', 

338 'compress_size', 

339 'file_size', 

340 '_raw_time', 

341 ) 

342 

343 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 

344 self.orig_filename = filename # Original file name in archive 

345 

346 # Terminate the file name at the first null byte. Null bytes in file 

347 # names are used as tricks by viruses in archives. 

348 null_byte = filename.find(chr(0)) 

349 if null_byte >= 0: 

350 filename = filename[0:null_byte] 

351 # This is used to ensure paths in generated ZIP files always use 

352 # forward slashes as the directory separator, as required by the 

353 # ZIP format specification. 

354 if os.sep != "/" and os.sep in filename: 

355 filename = filename.replace(os.sep, "/") 

356 

357 self.filename = filename # Normalized file name 

358 self.date_time = date_time # year, month, day, hour, min, sec 

359 

360 if date_time[0] < 1980: 

361 raise ValueError('ZIP does not support timestamps before 1980') 

362 

363 # Standard values: 

364 self.compress_type = ZIP_STORED # Type of compression for the file 

365 self._compresslevel = None # Level for the compressor 

366 self.comment = b"" # Comment for each file 

367 self.extra = b"" # ZIP extra data 

368 if sys.platform == 'win32': 

369 self.create_system = 0 # System which created ZIP archive 

370 else: 

371 # Assume everything else is unix-y 

372 self.create_system = 3 # System which created ZIP archive 

373 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 

374 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 

375 self.reserved = 0 # Must be zero 

376 self.flag_bits = 0 # ZIP flag bits 

377 self.volume = 0 # Volume number of file header 

378 self.internal_attr = 0 # Internal attributes 

379 self.external_attr = 0 # External file attributes 

380 self.compress_size = 0 # Size of the compressed file 

381 self.file_size = 0 # Size of the uncompressed file 

382 # Other attributes are set by class ZipFile: 

383 # header_offset Byte offset to the file header 

384 # CRC CRC-32 of the uncompressed file 

385 

386 def __repr__(self): 

387 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 

388 if self.compress_type != ZIP_STORED: 

389 result.append(' compress_type=%s' % 

390 compressor_names.get(self.compress_type, 

391 self.compress_type)) 

392 hi = self.external_attr >> 16 

393 lo = self.external_attr & 0xFFFF 

394 if hi: 

395 result.append(' filemode=%r' % stat.filemode(hi)) 

396 if lo: 

397 result.append(' external_attr=%#x' % lo) 

398 isdir = self.is_dir() 

399 if not isdir or self.file_size: 

400 result.append(' file_size=%r' % self.file_size) 

401 if ((not isdir or self.compress_size) and 

402 (self.compress_type != ZIP_STORED or 

403 self.file_size != self.compress_size)): 

404 result.append(' compress_size=%r' % self.compress_size) 

405 result.append('>') 

406 return ''.join(result) 

407 

408 def FileHeader(self, zip64=None): 

409 """Return the per-file header as a bytes object.""" 

410 dt = self.date_time 

411 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 

412 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 

413 if self.flag_bits & 0x08: 

414 # Set these to zero because we write them after the file data 

415 CRC = compress_size = file_size = 0 

416 else: 

417 CRC = self.CRC 

418 compress_size = self.compress_size 

419 file_size = self.file_size 

420 

421 extra = self.extra 

422 

423 min_version = 0 

424 if zip64 is None: 

425 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 

426 if zip64: 

427 fmt = '<HHQQ' 

428 extra = extra + struct.pack(fmt, 

429 1, struct.calcsize(fmt)-4, file_size, compress_size) 

430 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 

431 if not zip64: 

432 raise LargeZipFile("Filesize would require ZIP64 extensions") 

433 # File is larger than what fits into a 4 byte integer, 

434 # fall back to the ZIP64 extension 

435 file_size = 0xffffffff 

436 compress_size = 0xffffffff 

437 min_version = ZIP64_VERSION 

438 

439 if self.compress_type == ZIP_BZIP2: 

440 min_version = max(BZIP2_VERSION, min_version) 

441 elif self.compress_type == ZIP_LZMA: 

442 min_version = max(LZMA_VERSION, min_version) 

443 

444 self.extract_version = max(min_version, self.extract_version) 

445 self.create_version = max(min_version, self.create_version) 

446 filename, flag_bits = self._encodeFilenameFlags() 

447 header = struct.pack(structFileHeader, stringFileHeader, 

448 self.extract_version, self.reserved, flag_bits, 

449 self.compress_type, dostime, dosdate, CRC, 

450 compress_size, file_size, 

451 len(filename), len(extra)) 

452 return header + filename + extra 

453 

454 def _encodeFilenameFlags(self): 

455 try: 

456 return self.filename.encode('ascii'), self.flag_bits 

457 except UnicodeEncodeError: 

458 return self.filename.encode('utf-8'), self.flag_bits | 0x800 

459 

460 def _decodeExtra(self): 

461 # Try to decode the extra field. 

462 extra = self.extra 

463 unpack = struct.unpack 

464 while len(extra) >= 4: 

465 tp, ln = unpack('<HH', extra[:4]) 

466 if ln+4 > len(extra): 

467 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 

468 if tp == 0x0001: 

469 data = extra[4:ln+4] 

470 # ZIP64 extension (large files and/or large archives) 

471 try: 

472 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 

473 field = "File size" 

474 self.file_size, = unpack('<Q', data[:8]) 

475 data = data[8:] 

476 if self.compress_size == 0xFFFF_FFFF: 

477 field = "Compress size" 

478 self.compress_size, = unpack('<Q', data[:8]) 

479 data = data[8:] 

480 if self.header_offset == 0xFFFF_FFFF: 

481 field = "Header offset" 

482 self.header_offset, = unpack('<Q', data[:8]) 

483 except struct.error: 

484 raise BadZipFile(f"Corrupt zip64 extra field. " 

485 f"{field} not found.") from None 

486 

487 extra = extra[ln+4:] 

488 

489 @classmethod 

490 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 

491 """Construct an appropriate ZipInfo for a file on the filesystem. 

492 

493 filename should be the path to a file or directory on the filesystem. 

494 

495 arcname is the name which it will have within the archive (by default, 

496 this will be the same as filename, but without a drive letter and with 

497 leading path separators removed). 

498 """ 

499 if isinstance(filename, os.PathLike): 

500 filename = os.fspath(filename) 

501 st = os.stat(filename) 

502 isdir = stat.S_ISDIR(st.st_mode) 

503 mtime = time.localtime(st.st_mtime) 

504 date_time = mtime[0:6] 

505 if not strict_timestamps and date_time[0] < 1980: 

506 date_time = (1980, 1, 1, 0, 0, 0) 

507 elif not strict_timestamps and date_time[0] > 2107: 

508 date_time = (2107, 12, 31, 23, 59, 59) 

509 # Create ZipInfo instance to store file information 

510 if arcname is None: 

511 arcname = filename 

512 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 

513 while arcname[0] in (os.sep, os.altsep): 

514 arcname = arcname[1:] 

515 if isdir: 

516 arcname += '/' 

517 zinfo = cls(arcname, date_time) 

518 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 

519 if isdir: 

520 zinfo.file_size = 0 

521 zinfo.external_attr |= 0x10 # MS-DOS directory flag 

522 else: 

523 zinfo.file_size = st.st_size 

524 

525 return zinfo 

526 

527 def is_dir(self): 

528 """Return True if this archive member is a directory.""" 

529 return self.filename[-1] == '/' 

530 

531 

532# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 

533# internal keys. We noticed that a direct implementation is faster than 

534# relying on binascii.crc32(). 

535 

536_crctable = None 

537def _gen_crc(crc): 

538 for j in range(8): 

539 if crc & 1: 

540 crc = (crc >> 1) ^ 0xEDB88320 

541 else: 

542 crc >>= 1 

543 return crc 

544 

545# ZIP supports a password-based form of encryption. Even though known 

546# plaintext attacks have been found against it, it is still useful 

547# to be able to get data out of such a file. 

548# 

549# Usage: 

550# zd = _ZipDecrypter(mypwd) 

551# plain_bytes = zd(cypher_bytes) 

552 

553def _ZipDecrypter(pwd): 

554 key0 = 305419896 

555 key1 = 591751049 

556 key2 = 878082192 

557 

558 global _crctable 

559 if _crctable is None: 

560 _crctable = list(map(_gen_crc, range(256))) 

561 crctable = _crctable 

562 

563 def crc32(ch, crc): 

564 """Compute the CRC32 primitive on one byte.""" 

565 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 

566 

567 def update_keys(c): 

568 nonlocal key0, key1, key2 

569 key0 = crc32(c, key0) 

570 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 

571 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 

572 key2 = crc32(key1 >> 24, key2) 

573 

574 for p in pwd: 

575 update_keys(p) 

576 

577 def decrypter(data): 

578 """Decrypt a bytes object.""" 

579 result = bytearray() 

580 append = result.append 

581 for c in data: 

582 k = key2 | 2 

583 c ^= ((k * (k^1)) >> 8) & 0xFF 

584 update_keys(c) 

585 append(c) 

586 return bytes(result) 

587 

588 return decrypter 

589 

590 

591class LZMACompressor: 

592 

593 def __init__(self): 

594 self._comp = None 

595 

596 def _init(self): 

597 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 

598 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 

599 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 

600 ]) 

601 return struct.pack('<BBH', 9, 4, len(props)) + props 

602 

603 def compress(self, data): 

604 if self._comp is None: 

605 return self._init() + self._comp.compress(data) 

606 return self._comp.compress(data) 

607 

608 def flush(self): 

609 if self._comp is None: 

610 return self._init() + self._comp.flush() 

611 return self._comp.flush() 

612 

613 

614class LZMADecompressor: 

615 

616 def __init__(self): 

617 self._decomp = None 

618 self._unconsumed = b'' 

619 self.eof = False 

620 

621 def decompress(self, data): 

622 if self._decomp is None: 

623 self._unconsumed += data 

624 if len(self._unconsumed) <= 4: 

625 return b'' 

626 psize, = struct.unpack('<H', self._unconsumed[2:4]) 

627 if len(self._unconsumed) <= 4 + psize: 

628 return b'' 

629 

630 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 

631 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 

632 self._unconsumed[4:4 + psize]) 

633 ]) 

634 data = self._unconsumed[4 + psize:] 

635 del self._unconsumed 

636 

637 result = self._decomp.decompress(data) 

638 self.eof = self._decomp.eof 

639 return result 

640 

641 

642compressor_names = { 

643 0: 'store', 

644 1: 'shrink', 

645 2: 'reduce', 

646 3: 'reduce', 

647 4: 'reduce', 

648 5: 'reduce', 

649 6: 'implode', 

650 7: 'tokenize', 

651 8: 'deflate', 

652 9: 'deflate64', 

653 10: 'implode', 

654 12: 'bzip2', 

655 14: 'lzma', 

656 18: 'terse', 

657 19: 'lz77', 

658 97: 'wavpack', 

659 98: 'ppmd', 

660} 

661 

662def _check_compression(compression): 

663 if compression == ZIP_STORED: 

664 pass 

665 elif compression == ZIP_DEFLATED: 

666 if not zlib: 

667 raise RuntimeError( 

668 "Compression requires the (missing) zlib module") 

669 elif compression == ZIP_BZIP2: 

670 if not bz2: 

671 raise RuntimeError( 

672 "Compression requires the (missing) bz2 module") 

673 elif compression == ZIP_LZMA: 

674 if not lzma: 

675 raise RuntimeError( 

676 "Compression requires the (missing) lzma module") 

677 else: 

678 raise NotImplementedError("That compression method is not supported") 

679 

680 

681def _get_compressor(compress_type, compresslevel=None): 

682 if compress_type == ZIP_DEFLATED: 

683 if compresslevel is not None: 

684 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 

685 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 

686 elif compress_type == ZIP_BZIP2: 

687 if compresslevel is not None: 

688 return bz2.BZ2Compressor(compresslevel) 

689 return bz2.BZ2Compressor() 

690 # compresslevel is ignored for ZIP_LZMA 

691 elif compress_type == ZIP_LZMA: 

692 return LZMACompressor() 

693 else: 

694 return None 

695 

696 

697def _get_decompressor(compress_type): 

698 _check_compression(compress_type) 

699 if compress_type == ZIP_STORED: 

700 return None 

701 elif compress_type == ZIP_DEFLATED: 

702 return zlib.decompressobj(-15) 

703 elif compress_type == ZIP_BZIP2: 

704 return bz2.BZ2Decompressor() 

705 elif compress_type == ZIP_LZMA: 

706 return LZMADecompressor() 

707 else: 

708 descr = compressor_names.get(compress_type) 

709 if descr: 

710 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 

711 else: 

712 raise NotImplementedError("compression type %d" % (compress_type,)) 

713 

714 

715class _SharedFile: 

716 def __init__(self, file, pos, close, lock, writing): 

717 self._file = file 

718 self._pos = pos 

719 self._close = close 

720 self._lock = lock 

721 self._writing = writing 

722 self.seekable = file.seekable 

723 self.tell = file.tell 

724 

725 def seek(self, offset, whence=0): 

726 with self._lock: 

727 if self._writing(): 

728 raise ValueError("Can't reposition in the ZIP file while " 

729 "there is an open writing handle on it. " 

730 "Close the writing handle before trying to read.") 

731 self._file.seek(offset, whence) 

732 self._pos = self._file.tell() 

733 return self._pos 

734 

735 def read(self, n=-1): 

736 with self._lock: 

737 if self._writing(): 

738 raise ValueError("Can't read from the ZIP file while there " 

739 "is an open writing handle on it. " 

740 "Close the writing handle before trying to read.") 

741 self._file.seek(self._pos) 

742 data = self._file.read(n) 

743 self._pos = self._file.tell() 

744 return data 

745 

746 def close(self): 

747 if self._file is not None: 

748 fileobj = self._file 

749 self._file = None 

750 self._close(fileobj) 

751 

752# Provide the tell method for unseekable stream 

753class _Tellable: 

754 def __init__(self, fp): 

755 self.fp = fp 

756 self.offset = 0 

757 

758 def write(self, data): 

759 n = self.fp.write(data) 

760 self.offset += n 

761 return n 

762 

763 def tell(self): 

764 return self.offset 

765 

766 def flush(self): 

767 self.fp.flush() 

768 

769 def close(self): 

770 self.fp.close() 

771 

772 

773class ZipExtFile(io.BufferedIOBase): 

774 """File-like object for reading an archive member. 

775 Is returned by ZipFile.open(). 

776 """ 

777 

778 # Max size supported by decompressor. 

779 MAX_N = 1 << 31 - 1 

780 

781 # Read from compressed files in 4k blocks. 

782 MIN_READ_SIZE = 4096 

783 

784 # Chunk size to read during seek 

785 MAX_SEEK_READ = 1 << 24 

786 

787 def __init__(self, fileobj, mode, zipinfo, pwd=None, 

788 close_fileobj=False): 

789 self._fileobj = fileobj 

790 self._pwd = pwd 

791 self._close_fileobj = close_fileobj 

792 

793 self._compress_type = zipinfo.compress_type 

794 self._compress_left = zipinfo.compress_size 

795 self._left = zipinfo.file_size 

796 

797 self._decompressor = _get_decompressor(self._compress_type) 

798 

799 self._eof = False 

800 self._readbuffer = b'' 

801 self._offset = 0 

802 

803 self.newlines = None 

804 

805 self.mode = mode 

806 self.name = zipinfo.filename 

807 

808 if hasattr(zipinfo, 'CRC'): 

809 self._expected_crc = zipinfo.CRC 

810 self._running_crc = crc32(b'') 

811 else: 

812 self._expected_crc = None 

813 

814 self._seekable = False 

815 try: 

816 if fileobj.seekable(): 

817 self._orig_compress_start = fileobj.tell() 

818 self._orig_compress_size = zipinfo.compress_size 

819 self._orig_file_size = zipinfo.file_size 

820 self._orig_start_crc = self._running_crc 

821 self._seekable = True 

822 except AttributeError: 

823 pass 

824 

825 self._decrypter = None 

826 if pwd: 

827 if zipinfo.flag_bits & 0x8: 

828 # compare against the file type from extended local headers 

829 check_byte = (zipinfo._raw_time >> 8) & 0xff 

830 else: 

831 # compare against the CRC otherwise 

832 check_byte = (zipinfo.CRC >> 24) & 0xff 

833 h = self._init_decrypter() 

834 if h != check_byte: 

835 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 

836 

837 

838 def _init_decrypter(self): 

839 self._decrypter = _ZipDecrypter(self._pwd) 

840 # The first 12 bytes in the cypher stream is an encryption header 

841 # used to strengthen the algorithm. The first 11 bytes are 

842 # completely random, while the 12th contains the MSB of the CRC, 

843 # or the MSB of the file time depending on the header type 

844 # and is used to check the correctness of the password. 

845 header = self._fileobj.read(12) 

846 self._compress_left -= 12 

847 return self._decrypter(header)[11] 

848 

849 def __repr__(self): 

850 result = ['<%s.%s' % (self.__class__.__module__, 

851 self.__class__.__qualname__)] 

852 if not self.closed: 

853 result.append(' name=%r mode=%r' % (self.name, self.mode)) 

854 if self._compress_type != ZIP_STORED: 

855 result.append(' compress_type=%s' % 

856 compressor_names.get(self._compress_type, 

857 self._compress_type)) 

858 else: 

859 result.append(' [closed]') 

860 result.append('>') 

861 return ''.join(result) 

862 

863 def readline(self, limit=-1): 

864 """Read and return a line from the stream. 

865 

866 If limit is specified, at most limit bytes will be read. 

867 """ 

868 

869 if limit < 0: 

870 # Shortcut common case - newline found in buffer. 

871 i = self._readbuffer.find(b'\n', self._offset) + 1 

872 if i > 0: 

873 line = self._readbuffer[self._offset: i] 

874 self._offset = i 

875 return line 

876 

877 return io.BufferedIOBase.readline(self, limit) 

878 

879 def peek(self, n=1): 

880 """Returns buffered bytes without advancing the position.""" 

881 if n > len(self._readbuffer) - self._offset: 

882 chunk = self.read(n) 

883 if len(chunk) > self._offset: 

884 self._readbuffer = chunk + self._readbuffer[self._offset:] 

885 self._offset = 0 

886 else: 

887 self._offset -= len(chunk) 

888 

889 # Return up to 512 bytes to reduce allocation overhead for tight loops. 

890 return self._readbuffer[self._offset: self._offset + 512] 

891 

892 def readable(self): 

893 if self.closed: 

894 raise ValueError("I/O operation on closed file.") 

895 return True 

896 

897 def read(self, n=-1): 

898 """Read and return up to n bytes. 

899 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 

900 """ 

901 if self.closed: 

902 raise ValueError("read from closed file.") 

903 if n is None or n < 0: 

904 buf = self._readbuffer[self._offset:] 

905 self._readbuffer = b'' 

906 self._offset = 0 

907 while not self._eof: 

908 buf += self._read1(self.MAX_N) 

909 return buf 

910 

911 end = n + self._offset 

912 if end < len(self._readbuffer): 

913 buf = self._readbuffer[self._offset:end] 

914 self._offset = end 

915 return buf 

916 

917 n = end - len(self._readbuffer) 

918 buf = self._readbuffer[self._offset:] 

919 self._readbuffer = b'' 

920 self._offset = 0 

921 while n > 0 and not self._eof: 

922 data = self._read1(n) 

923 if n < len(data): 

924 self._readbuffer = data 

925 self._offset = n 

926 buf += data[:n] 

927 break 

928 buf += data 

929 n -= len(data) 

930 return buf 

931 

932 def _update_crc(self, newdata): 

933 # Update the CRC using the given data. 

934 if self._expected_crc is None: 

935 # No need to compute the CRC if we don't have a reference value 

936 return 

937 self._running_crc = crc32(newdata, self._running_crc) 

938 # Check the CRC if we're at the end of the file 

939 if self._eof and self._running_crc != self._expected_crc: 

940 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 

941 

942 def read1(self, n): 

943 """Read up to n bytes with at most one read() system call.""" 

944 

945 if n is None or n < 0: 

946 buf = self._readbuffer[self._offset:] 

947 self._readbuffer = b'' 

948 self._offset = 0 

949 while not self._eof: 

950 data = self._read1(self.MAX_N) 

951 if data: 

952 buf += data 

953 break 

954 return buf 

955 

956 end = n + self._offset 

957 if end < len(self._readbuffer): 

958 buf = self._readbuffer[self._offset:end] 

959 self._offset = end 

960 return buf 

961 

962 n = end - len(self._readbuffer) 

963 buf = self._readbuffer[self._offset:] 

964 self._readbuffer = b'' 

965 self._offset = 0 

966 if n > 0: 

967 while not self._eof: 

968 data = self._read1(n) 

969 if n < len(data): 

970 self._readbuffer = data 

971 self._offset = n 

972 buf += data[:n] 

973 break 

974 if data: 

975 buf += data 

976 break 

977 return buf 

978 

979 def _read1(self, n): 

980 # Read up to n compressed bytes with at most one read() system call, 

981 # decrypt and decompress them. 

982 if self._eof or n <= 0: 

983 return b'' 

984 

985 # Read from file. 

986 if self._compress_type == ZIP_DEFLATED: 

987 ## Handle unconsumed data. 

988 data = self._decompressor.unconsumed_tail 

989 if n > len(data): 

990 data += self._read2(n - len(data)) 

991 else: 

992 data = self._read2(n) 

993 

994 if self._compress_type == ZIP_STORED: 

995 self._eof = self._compress_left <= 0 

996 elif self._compress_type == ZIP_DEFLATED: 

997 n = max(n, self.MIN_READ_SIZE) 

998 data = self._decompressor.decompress(data, n) 

999 self._eof = (self._decompressor.eof or 

1000 self._compress_left <= 0 and 

1001 not self._decompressor.unconsumed_tail) 

1002 if self._eof: 

1003 data += self._decompressor.flush() 

1004 else: 

1005 data = self._decompressor.decompress(data) 

1006 self._eof = self._decompressor.eof or self._compress_left <= 0 

1007 

1008 data = data[:self._left] 

1009 self._left -= len(data) 

1010 if self._left <= 0: 

1011 self._eof = True 

1012 self._update_crc(data) 

1013 return data 

1014 

1015 def _read2(self, n): 

1016 if self._compress_left <= 0: 

1017 return b'' 

1018 

1019 n = max(n, self.MIN_READ_SIZE) 

1020 n = min(n, self._compress_left) 

1021 

1022 data = self._fileobj.read(n) 

1023 self._compress_left -= len(data) 

1024 if not data: 

1025 raise EOFError 

1026 

1027 if self._decrypter is not None: 

1028 data = self._decrypter(data) 

1029 return data 

1030 

1031 def close(self): 

1032 try: 

1033 if self._close_fileobj: 

1034 self._fileobj.close() 

1035 finally: 

1036 super().close() 

1037 

1038 def seekable(self): 

1039 if self.closed: 

1040 raise ValueError("I/O operation on closed file.") 

1041 return self._seekable 

1042 

1043 def seek(self, offset, whence=0): 

1044 if self.closed: 

1045 raise ValueError("seek on closed file.") 

1046 if not self._seekable: 

1047 raise io.UnsupportedOperation("underlying stream is not seekable") 

1048 curr_pos = self.tell() 

1049 if whence == 0: # Seek from start of file 

1050 new_pos = offset 

1051 elif whence == 1: # Seek from current position 

1052 new_pos = curr_pos + offset 

1053 elif whence == 2: # Seek from EOF 

1054 new_pos = self._orig_file_size + offset 

1055 else: 

1056 raise ValueError("whence must be os.SEEK_SET (0), " 

1057 "os.SEEK_CUR (1), or os.SEEK_END (2)") 

1058 

1059 if new_pos > self._orig_file_size: 

1060 new_pos = self._orig_file_size 

1061 

1062 if new_pos < 0: 

1063 new_pos = 0 

1064 

1065 read_offset = new_pos - curr_pos 

1066 buff_offset = read_offset + self._offset 

1067 

1068 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 

1069 # Just move the _offset index if the new position is in the _readbuffer 

1070 self._offset = buff_offset 

1071 read_offset = 0 

1072 elif read_offset < 0: 

1073 # Position is before the current position. Reset the ZipExtFile 

1074 self._fileobj.seek(self._orig_compress_start) 

1075 self._running_crc = self._orig_start_crc 

1076 self._compress_left = self._orig_compress_size 

1077 self._left = self._orig_file_size 

1078 self._readbuffer = b'' 

1079 self._offset = 0 

1080 self._decompressor = _get_decompressor(self._compress_type) 

1081 self._eof = False 

1082 read_offset = new_pos 

1083 if self._decrypter is not None: 

1084 self._init_decrypter() 

1085 

1086 while read_offset > 0: 

1087 read_len = min(self.MAX_SEEK_READ, read_offset) 

1088 self.read(read_len) 

1089 read_offset -= read_len 

1090 

1091 return self.tell() 

1092 

1093 def tell(self): 

1094 if self.closed: 

1095 raise ValueError("tell on closed file.") 

1096 if not self._seekable: 

1097 raise io.UnsupportedOperation("underlying stream is not seekable") 

1098 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 

1099 return filepos 

1100 

1101 

1102class _ZipWriteFile(io.BufferedIOBase): 

1103 def __init__(self, zf, zinfo, zip64): 

1104 self._zinfo = zinfo 

1105 self._zip64 = zip64 

1106 self._zipfile = zf 

1107 self._compressor = _get_compressor(zinfo.compress_type, 

1108 zinfo._compresslevel) 

1109 self._file_size = 0 

1110 self._compress_size = 0 

1111 self._crc = 0 

1112 

1113 @property 

1114 def _fileobj(self): 

1115 return self._zipfile.fp 

1116 

1117 def writable(self): 

1118 return True 

1119 

1120 def write(self, data): 

1121 if self.closed: 

1122 raise ValueError('I/O operation on closed file.') 

1123 nbytes = len(data) 

1124 self._file_size += nbytes 

1125 self._crc = crc32(data, self._crc) 

1126 if self._compressor: 

1127 data = self._compressor.compress(data) 

1128 self._compress_size += len(data) 

1129 self._fileobj.write(data) 

1130 return nbytes 

1131 

1132 def close(self): 

1133 if self.closed: 

1134 return 

1135 try: 

1136 super().close() 

1137 # Flush any data from the compressor, and update header info 

1138 if self._compressor: 

1139 buf = self._compressor.flush() 

1140 self._compress_size += len(buf) 

1141 self._fileobj.write(buf) 

1142 self._zinfo.compress_size = self._compress_size 

1143 else: 

1144 self._zinfo.compress_size = self._file_size 

1145 self._zinfo.CRC = self._crc 

1146 self._zinfo.file_size = self._file_size 

1147 

1148 # Write updated header info 

1149 if self._zinfo.flag_bits & 0x08: 

1150 # Write CRC and file sizes after the file data 

1151 fmt = '<LLQQ' if self._zip64 else '<LLLL' 

1152 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 

1153 self._zinfo.compress_size, self._zinfo.file_size)) 

1154 self._zipfile.start_dir = self._fileobj.tell() 

1155 else: 

1156 if not self._zip64: 

1157 if self._file_size > ZIP64_LIMIT: 

1158 raise RuntimeError( 

1159 'File size unexpectedly exceeded ZIP64 limit') 

1160 if self._compress_size > ZIP64_LIMIT: 

1161 raise RuntimeError( 

1162 'Compressed size unexpectedly exceeded ZIP64 limit') 

1163 # Seek backwards and write file header (which will now include 

1164 # correct CRC and file sizes) 

1165 

1166 # Preserve current position in file 

1167 self._zipfile.start_dir = self._fileobj.tell() 

1168 self._fileobj.seek(self._zinfo.header_offset) 

1169 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 

1170 self._fileobj.seek(self._zipfile.start_dir) 

1171 

1172 # Successfully written: Add file to our caches 

1173 self._zipfile.filelist.append(self._zinfo) 

1174 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 

1175 finally: 

1176 self._zipfile._writing = False 

1177 

1178 

1179 

1180class ZipFile: 

1181 """ Class with methods to open, read, write, close, list zip files. 

1182 

1183 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 

1184 compresslevel=None) 

1185 

1186 file: Either the path to the file, or a file-like object. 

1187 If it is a path, the file will be opened and closed by ZipFile. 

1188 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 

1189 or append 'a'. 

1190 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 

1191 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 

1192 allowZip64: if True ZipFile will create files with ZIP64 extensions when 

1193 needed, otherwise it will raise an exception when this would 

1194 be necessary. 

1195 compresslevel: None (default for the given compression type) or an integer 

1196 specifying the level to pass to the compressor. 

1197 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 

1198 When using ZIP_DEFLATED integers 0 through 9 are accepted. 

1199 When using ZIP_BZIP2 integers 1 through 9 are accepted. 

1200 

1201 """ 

1202 

1203 fp = None # Set here since __del__ checks it 

1204 _windows_illegal_name_trans_table = None 

1205 

1206 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 

1207 compresslevel=None, *, strict_timestamps=True): 

1208 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 

1209 or append 'a'.""" 

1210 if mode not in ('r', 'w', 'x', 'a'): 

1211 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 

1212 

1213 _check_compression(compression) 

1214 

1215 self._allowZip64 = allowZip64 

1216 self._didModify = False 

1217 self.debug = 0 # Level of printing: 0 through 3 

1218 self.NameToInfo = {} # Find file info given name 

1219 self.filelist = [] # List of ZipInfo instances for archive 

1220 self.compression = compression # Method of compression 

1221 self.compresslevel = compresslevel 

1222 self.mode = mode 

1223 self.pwd = None 

1224 self._comment = b'' 

1225 self._strict_timestamps = strict_timestamps 

1226 

1227 # Check if we were passed a file-like object 

1228 if isinstance(file, os.PathLike): 

1229 file = os.fspath(file) 

1230 if isinstance(file, str): 

1231 # No, it's a filename 

1232 self._filePassed = 0 

1233 self.filename = file 

1234 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 

1235 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 

1236 filemode = modeDict[mode] 

1237 while True: 

1238 try: 

1239 self.fp = io.open(file, filemode) 

1240 except OSError: 

1241 if filemode in modeDict: 

1242 filemode = modeDict[filemode] 

1243 continue 

1244 raise 

1245 break 

1246 else: 

1247 self._filePassed = 1 

1248 self.fp = file 

1249 self.filename = getattr(file, 'name', None) 

1250 self._fileRefCnt = 1 

1251 self._lock = threading.RLock() 

1252 self._seekable = True 

1253 self._writing = False 

1254 

1255 try: 

1256 if mode == 'r': 

1257 self._RealGetContents() 

1258 elif mode in ('w', 'x'): 

1259 # set the modified flag so central directory gets written 

1260 # even if no files are added to the archive 

1261 self._didModify = True 

1262 try: 

1263 self.start_dir = self.fp.tell() 

1264 except (AttributeError, OSError): 

1265 self.fp = _Tellable(self.fp) 

1266 self.start_dir = 0 

1267 self._seekable = False 

1268 else: 

1269 # Some file-like objects can provide tell() but not seek() 

1270 try: 

1271 self.fp.seek(self.start_dir) 

1272 except (AttributeError, OSError): 

1273 self._seekable = False 

1274 elif mode == 'a': 

1275 try: 

1276 # See if file is a zip file 

1277 self._RealGetContents() 

1278 # seek to start of directory and overwrite 

1279 self.fp.seek(self.start_dir) 

1280 except BadZipFile: 

1281 # file is not a zip file, just append 

1282 self.fp.seek(0, 2) 

1283 

1284 # set the modified flag so central directory gets written 

1285 # even if no files are added to the archive 

1286 self._didModify = True 

1287 self.start_dir = self.fp.tell() 

1288 else: 

1289 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 

1290 except: 

1291 fp = self.fp 

1292 self.fp = None 

1293 self._fpclose(fp) 

1294 raise 

1295 

1296 def __enter__(self): 

1297 return self 

1298 

1299 def __exit__(self, type, value, traceback): 

1300 self.close() 

1301 

1302 def __repr__(self): 

1303 result = ['<%s.%s' % (self.__class__.__module__, 

1304 self.__class__.__qualname__)] 

1305 if self.fp is not None: 

1306 if self._filePassed: 

1307 result.append(' file=%r' % self.fp) 

1308 elif self.filename is not None: 

1309 result.append(' filename=%r' % self.filename) 

1310 result.append(' mode=%r' % self.mode) 

1311 else: 

1312 result.append(' [closed]') 

1313 result.append('>') 

1314 return ''.join(result) 

1315 

1316 def _RealGetContents(self): 

1317 """Read in the table of contents for the ZIP file.""" 

1318 fp = self.fp 

1319 try: 

1320 endrec = _EndRecData(fp) 

1321 except OSError: 

1322 raise BadZipFile("File is not a zip file") 

1323 if not endrec: 

1324 raise BadZipFile("File is not a zip file") 

1325 if self.debug > 1: 

1326 print(endrec) 

1327 size_cd = endrec[_ECD_SIZE] # bytes in central directory 

1328 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 

1329 self._comment = endrec[_ECD_COMMENT] # archive comment 

1330 

1331 # "concat" is zero, unless zip was concatenated to another file 

1332 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 

1333 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 

1334 # If Zip64 extension structures are present, account for them 

1335 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 

1336 

1337 if self.debug > 2: 

1338 inferred = concat + offset_cd 

1339 print("given, inferred, offset", offset_cd, inferred, concat) 

1340 # self.start_dir: Position of start of central directory 

1341 self.start_dir = offset_cd + concat 

1342 fp.seek(self.start_dir, 0) 

1343 data = fp.read(size_cd) 

1344 fp = io.BytesIO(data) 

1345 total = 0 

1346 while total < size_cd: 

1347 centdir = fp.read(sizeCentralDir) 

1348 if len(centdir) != sizeCentralDir: 

1349 raise BadZipFile("Truncated central directory") 

1350 centdir = struct.unpack(structCentralDir, centdir) 

1351 if centdir[_CD_SIGNATURE] != stringCentralDir: 

1352 raise BadZipFile("Bad magic number for central directory") 

1353 if self.debug > 2: 

1354 print(centdir) 

1355 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 

1356 flags = centdir[5] 

1357 if flags & 0x800: 

1358 # UTF-8 file names extension 

1359 filename = filename.decode('utf-8') 

1360 else: 

1361 # Historical ZIP filename encoding 

1362 filename = filename.decode('cp437') 

1363 # Create ZipInfo instance to store file information 

1364 x = ZipInfo(filename) 

1365 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 

1366 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 

1367 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 

1368 (x.create_version, x.create_system, x.extract_version, x.reserved, 

1369 x.flag_bits, x.compress_type, t, d, 

1370 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 

1371 if x.extract_version > MAX_EXTRACT_VERSION: 

1372 raise NotImplementedError("zip file version %.1f" % 

1373 (x.extract_version / 10)) 

1374 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 

1375 # Convert date/time code to (year, month, day, hour, min, sec) 

1376 x._raw_time = t 

1377 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 

1378 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 

1379 

1380 x._decodeExtra() 

1381 x.header_offset = x.header_offset + concat 

1382 self.filelist.append(x) 

1383 self.NameToInfo[x.filename] = x 

1384 

1385 # update total bytes read from central directory 

1386 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 

1387 + centdir[_CD_EXTRA_FIELD_LENGTH] 

1388 + centdir[_CD_COMMENT_LENGTH]) 

1389 

1390 if self.debug > 2: 

1391 print("total", total) 

1392 

1393 

1394 def namelist(self): 

1395 """Return a list of file names in the archive.""" 

1396 return [data.filename for data in self.filelist] 

1397 

1398 def infolist(self): 

1399 """Return a list of class ZipInfo instances for files in the 

1400 archive.""" 

1401 return self.filelist 

1402 

1403 def printdir(self, file=None): 

1404 """Print a table of contents for the zip file.""" 

1405 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 

1406 file=file) 

1407 for zinfo in self.filelist: 

1408 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 

1409 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 

1410 file=file) 

1411 

1412 def testzip(self): 

1413 """Read all the files and check the CRC.""" 

1414 chunk_size = 2 ** 20 

1415 for zinfo in self.filelist: 

1416 try: 

1417 # Read by chunks, to avoid an OverflowError or a 

1418 # MemoryError with very large embedded files. 

1419 with self.open(zinfo.filename, "r") as f: 

1420 while f.read(chunk_size): # Check CRC-32 

1421 pass 

1422 except BadZipFile: 

1423 return zinfo.filename 

1424 

1425 def getinfo(self, name): 

1426 """Return the instance of ZipInfo given 'name'.""" 

1427 info = self.NameToInfo.get(name) 

1428 if info is None: 

1429 raise KeyError( 

1430 'There is no item named %r in the archive' % name) 

1431 

1432 return info 

1433 

1434 def setpassword(self, pwd): 

1435 """Set default password for encrypted files.""" 

1436 if pwd and not isinstance(pwd, bytes): 

1437 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 

1438 if pwd: 

1439 self.pwd = pwd 

1440 else: 

1441 self.pwd = None 

1442 

1443 @property 

1444 def comment(self): 

1445 """The comment text associated with the ZIP file.""" 

1446 return self._comment 

1447 

1448 @comment.setter 

1449 def comment(self, comment): 

1450 if not isinstance(comment, bytes): 

1451 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 

1452 # check for valid comment length 

1453 if len(comment) > ZIP_MAX_COMMENT: 

1454 import warnings 

1455 warnings.warn('Archive comment is too long; truncating to %d bytes' 

1456 % ZIP_MAX_COMMENT, stacklevel=2) 

1457 comment = comment[:ZIP_MAX_COMMENT] 

1458 self._comment = comment 

1459 self._didModify = True 

1460 

1461 def read(self, name, pwd=None): 

1462 """Return file bytes for name.""" 

1463 with self.open(name, "r", pwd) as fp: 

1464 return fp.read() 

1465 

1466 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 

1467 """Return file-like object for 'name'. 

1468 

1469 name is a string for the file name within the ZIP file, or a ZipInfo 

1470 object. 

1471 

1472 mode should be 'r' to read a file already in the ZIP file, or 'w' to 

1473 write to a file newly added to the archive. 

1474 

1475 pwd is the password to decrypt files (only used for reading). 

1476 

1477 When writing, if the file size is not known in advance but may exceed 

1478 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 

1479 files. If the size is known in advance, it is best to pass a ZipInfo 

1480 instance for name, with zinfo.file_size set. 

1481 """ 

1482 if mode not in {"r", "w"}: 

1483 raise ValueError('open() requires mode "r" or "w"') 

1484 if pwd and not isinstance(pwd, bytes): 

1485 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 

1486 if pwd and (mode == "w"): 

1487 raise ValueError("pwd is only supported for reading files") 

1488 if not self.fp: 

1489 raise ValueError( 

1490 "Attempt to use ZIP archive that was already closed") 

1491 

1492 # Make sure we have an info object 

1493 if isinstance(name, ZipInfo): 

1494 # 'name' is already an info object 

1495 zinfo = name 

1496 elif mode == 'w': 

1497 zinfo = ZipInfo(name) 

1498 zinfo.compress_type = self.compression 

1499 zinfo._compresslevel = self.compresslevel 

1500 else: 

1501 # Get info object for name 

1502 zinfo = self.getinfo(name) 

1503 

1504 if mode == 'w': 

1505 return self._open_to_write(zinfo, force_zip64=force_zip64) 

1506 

1507 if self._writing: 

1508 raise ValueError("Can't read from the ZIP file while there " 

1509 "is an open writing handle on it. " 

1510 "Close the writing handle before trying to read.") 

1511 

1512 # Open for reading: 

1513 self._fileRefCnt += 1 

1514 zef_file = _SharedFile(self.fp, zinfo.header_offset, 

1515 self._fpclose, self._lock, lambda: self._writing) 

1516 try: 

1517 # Skip the file header: 

1518 fheader = zef_file.read(sizeFileHeader) 

1519 if len(fheader) != sizeFileHeader: 

1520 raise BadZipFile("Truncated file header") 

1521 fheader = struct.unpack(structFileHeader, fheader) 

1522 if fheader[_FH_SIGNATURE] != stringFileHeader: 

1523 raise BadZipFile("Bad magic number for file header") 

1524 

1525 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 

1526 if fheader[_FH_EXTRA_FIELD_LENGTH]: 

1527 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 

1528 

1529 if zinfo.flag_bits & 0x20: 

1530 # Zip 2.7: compressed patched data 

1531 raise NotImplementedError("compressed patched data (flag bit 5)") 

1532 

1533 if zinfo.flag_bits & 0x40: 

1534 # strong encryption 

1535 raise NotImplementedError("strong encryption (flag bit 6)") 

1536 

1537 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 

1538 # UTF-8 filename 

1539 fname_str = fname.decode("utf-8") 

1540 else: 

1541 fname_str = fname.decode("cp437") 

1542 

1543 if fname_str != zinfo.orig_filename: 

1544 raise BadZipFile( 

1545 'File name in directory %r and header %r differ.' 

1546 % (zinfo.orig_filename, fname)) 

1547 

1548 # check for encrypted flag & handle password 

1549 is_encrypted = zinfo.flag_bits & 0x1 

1550 if is_encrypted: 

1551 if not pwd: 

1552 pwd = self.pwd 

1553 if not pwd: 

1554 raise RuntimeError("File %r is encrypted, password " 

1555 "required for extraction" % name) 

1556 else: 

1557 pwd = None 

1558 

1559 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 

1560 except: 

1561 zef_file.close() 

1562 raise 

1563 

1564 def _open_to_write(self, zinfo, force_zip64=False): 

1565 if force_zip64 and not self._allowZip64: 

1566 raise ValueError( 

1567 "force_zip64 is True, but allowZip64 was False when opening " 

1568 "the ZIP file." 

1569 ) 

1570 if self._writing: 

1571 raise ValueError("Can't write to the ZIP file while there is " 

1572 "another write handle open on it. " 

1573 "Close the first handle before opening another.") 

1574 

1575 # Size and CRC are overwritten with correct data after processing the file 

1576 zinfo.compress_size = 0 

1577 zinfo.CRC = 0 

1578 

1579 zinfo.flag_bits = 0x00 

1580 if zinfo.compress_type == ZIP_LZMA: 

1581 # Compressed data includes an end-of-stream (EOS) marker 

1582 zinfo.flag_bits |= 0x02 

1583 if not self._seekable: 

1584 zinfo.flag_bits |= 0x08 

1585 

1586 if not zinfo.external_attr: 

1587 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 

1588 

1589 # Compressed size can be larger than uncompressed size 

1590 zip64 = self._allowZip64 and \ 

1591 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 

1592 

1593 if self._seekable: 

1594 self.fp.seek(self.start_dir) 

1595 zinfo.header_offset = self.fp.tell() 

1596 

1597 self._writecheck(zinfo) 

1598 self._didModify = True 

1599 

1600 self.fp.write(zinfo.FileHeader(zip64)) 

1601 

1602 self._writing = True 

1603 return _ZipWriteFile(self, zinfo, zip64) 

1604 

1605 def extract(self, member, path=None, pwd=None): 

1606 """Extract a member from the archive to the current working directory, 

1607 using its full name. Its file information is extracted as accurately 

1608 as possible. `member' may be a filename or a ZipInfo object. You can 

1609 specify a different directory using `path'. 

1610 """ 

1611 if path is None: 

1612 path = os.getcwd() 

1613 else: 

1614 path = os.fspath(path) 

1615 

1616 return self._extract_member(member, path, pwd) 

1617 

1618 def extractall(self, path=None, members=None, pwd=None): 

1619 """Extract all members from the archive to the current working 

1620 directory. `path' specifies a different directory to extract to. 

1621 `members' is optional and must be a subset of the list returned 

1622 by namelist(). 

1623 """ 

1624 if members is None: 

1625 members = self.namelist() 

1626 

1627 if path is None: 

1628 path = os.getcwd() 

1629 else: 

1630 path = os.fspath(path) 

1631 

1632 for zipinfo in members: 

1633 self._extract_member(zipinfo, path, pwd) 

1634 

1635 @classmethod 

1636 def _sanitize_windows_name(cls, arcname, pathsep): 

1637 """Replace bad characters and remove trailing dots from parts.""" 

1638 table = cls._windows_illegal_name_trans_table 

1639 if not table: 

1640 illegal = ':<>|"?*' 

1641 table = str.maketrans(illegal, '_' * len(illegal)) 

1642 cls._windows_illegal_name_trans_table = table 

1643 arcname = arcname.translate(table) 

1644 # remove trailing dots 

1645 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 

1646 # rejoin, removing empty parts. 

1647 arcname = pathsep.join(x for x in arcname if x) 

1648 return arcname 

1649 

1650 def _extract_member(self, member, targetpath, pwd): 

1651 """Extract the ZipInfo object 'member' to a physical 

1652 file on the path targetpath. 

1653 """ 

1654 if not isinstance(member, ZipInfo): 

1655 member = self.getinfo(member) 

1656 

1657 # build the destination pathname, replacing 

1658 # forward slashes to platform specific separators. 

1659 arcname = member.filename.replace('/', os.path.sep) 

1660 

1661 if os.path.altsep: 

1662 arcname = arcname.replace(os.path.altsep, os.path.sep) 

1663 # interpret absolute pathname as relative, remove drive letter or 

1664 # UNC path, redundant separators, "." and ".." components. 

1665 arcname = os.path.splitdrive(arcname)[1] 

1666 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 

1667 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 

1668 if x not in invalid_path_parts) 

1669 if os.path.sep == '\\': 

1670 # filter illegal characters on Windows 

1671 arcname = self._sanitize_windows_name(arcname, os.path.sep) 

1672 

1673 targetpath = os.path.join(targetpath, arcname) 

1674 targetpath = os.path.normpath(targetpath) 

1675 

1676 # Create all upper directories if necessary. 

1677 upperdirs = os.path.dirname(targetpath) 

1678 if upperdirs and not os.path.exists(upperdirs): 

1679 os.makedirs(upperdirs) 

1680 

1681 if member.is_dir(): 

1682 if not os.path.isdir(targetpath): 

1683 os.mkdir(targetpath) 

1684 return targetpath 

1685 

1686 with self.open(member, pwd=pwd) as source, \ 

1687 open(targetpath, "wb") as target: 

1688 shutil.copyfileobj(source, target) 

1689 

1690 return targetpath 

1691 

1692 def _writecheck(self, zinfo): 

1693 """Check for errors before writing a file to the archive.""" 

1694 if zinfo.filename in self.NameToInfo: 

1695 import warnings 

1696 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 

1697 if self.mode not in ('w', 'x', 'a'): 

1698 raise ValueError("write() requires mode 'w', 'x', or 'a'") 

1699 if not self.fp: 

1700 raise ValueError( 

1701 "Attempt to write ZIP archive that was already closed") 

1702 _check_compression(zinfo.compress_type) 

1703 if not self._allowZip64: 

1704 requires_zip64 = None 

1705 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 

1706 requires_zip64 = "Files count" 

1707 elif zinfo.file_size > ZIP64_LIMIT: 

1708 requires_zip64 = "Filesize" 

1709 elif zinfo.header_offset > ZIP64_LIMIT: 

1710 requires_zip64 = "Zipfile size" 

1711 if requires_zip64: 

1712 raise LargeZipFile(requires_zip64 + 

1713 " would require ZIP64 extensions") 

1714 

1715 def write(self, filename, arcname=None, 

1716 compress_type=None, compresslevel=None): 

1717 """Put the bytes from filename into the archive under the name 

1718 arcname.""" 

1719 if not self.fp: 

1720 raise ValueError( 

1721 "Attempt to write to ZIP archive that was already closed") 

1722 if self._writing: 

1723 raise ValueError( 

1724 "Can't write to ZIP archive while an open writing handle exists" 

1725 ) 

1726 

1727 zinfo = ZipInfo.from_file(filename, arcname, 

1728 strict_timestamps=self._strict_timestamps) 

1729 

1730 if zinfo.is_dir(): 

1731 zinfo.compress_size = 0 

1732 zinfo.CRC = 0 

1733 else: 

1734 if compress_type is not None: 

1735 zinfo.compress_type = compress_type 

1736 else: 

1737 zinfo.compress_type = self.compression 

1738 

1739 if compresslevel is not None: 

1740 zinfo._compresslevel = compresslevel 

1741 else: 

1742 zinfo._compresslevel = self.compresslevel 

1743 

1744 if zinfo.is_dir(): 

1745 with self._lock: 

1746 if self._seekable: 

1747 self.fp.seek(self.start_dir) 

1748 zinfo.header_offset = self.fp.tell() # Start of header bytes 

1749 if zinfo.compress_type == ZIP_LZMA: 

1750 # Compressed data includes an end-of-stream (EOS) marker 

1751 zinfo.flag_bits |= 0x02 

1752 

1753 self._writecheck(zinfo) 

1754 self._didModify = True 

1755 

1756 self.filelist.append(zinfo) 

1757 self.NameToInfo[zinfo.filename] = zinfo 

1758 self.fp.write(zinfo.FileHeader(False)) 

1759 self.start_dir = self.fp.tell() 

1760 else: 

1761 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 

1762 shutil.copyfileobj(src, dest, 1024*8) 

1763 

1764 def writestr(self, zinfo_or_arcname, data, 

1765 compress_type=None, compresslevel=None): 

1766 """Write a file into the archive. The contents is 'data', which 

1767 may be either a 'str' or a 'bytes' instance; if it is a 'str', 

1768 it is encoded as UTF-8 first. 

1769 'zinfo_or_arcname' is either a ZipInfo instance or 

1770 the name of the file in the archive.""" 

1771 if isinstance(data, str): 

1772 data = data.encode("utf-8") 

1773 if not isinstance(zinfo_or_arcname, ZipInfo): 

1774 zinfo = ZipInfo(filename=zinfo_or_arcname, 

1775 date_time=time.localtime(time.time())[:6]) 

1776 zinfo.compress_type = self.compression 

1777 zinfo._compresslevel = self.compresslevel 

1778 if zinfo.filename[-1] == '/': 

1779 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 

1780 zinfo.external_attr |= 0x10 # MS-DOS directory flag 

1781 else: 

1782 zinfo.external_attr = 0o600 << 16 # ?rw------- 

1783 else: 

1784 zinfo = zinfo_or_arcname 

1785 

1786 if not self.fp: 

1787 raise ValueError( 

1788 "Attempt to write to ZIP archive that was already closed") 

1789 if self._writing: 

1790 raise ValueError( 

1791 "Can't write to ZIP archive while an open writing handle exists." 

1792 ) 

1793 

1794 if compress_type is not None: 

1795 zinfo.compress_type = compress_type 

1796 

1797 if compresslevel is not None: 

1798 zinfo._compresslevel = compresslevel 

1799 

1800 zinfo.file_size = len(data) # Uncompressed size 

1801 with self._lock: 

1802 with self.open(zinfo, mode='w') as dest: 

1803 dest.write(data) 

1804 

1805 def __del__(self): 

1806 """Call the "close()" method in case the user forgot.""" 

1807 self.close() 

1808 

1809 def close(self): 

1810 """Close the file, and for mode 'w', 'x' and 'a' write the ending 

1811 records.""" 

1812 if self.fp is None: 

1813 return 

1814 

1815 if self._writing: 

1816 raise ValueError("Can't close the ZIP file while there is " 

1817 "an open writing handle on it. " 

1818 "Close the writing handle before closing the zip.") 

1819 

1820 try: 

1821 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 

1822 with self._lock: 

1823 if self._seekable: 

1824 self.fp.seek(self.start_dir) 

1825 self._write_end_record() 

1826 finally: 

1827 fp = self.fp 

1828 self.fp = None 

1829 self._fpclose(fp) 

1830 

1831 def _write_end_record(self): 

1832 for zinfo in self.filelist: # write central directory 

1833 dt = zinfo.date_time 

1834 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 

1835 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 

1836 extra = [] 

1837 if zinfo.file_size > ZIP64_LIMIT \ 

1838 or zinfo.compress_size > ZIP64_LIMIT: 

1839 extra.append(zinfo.file_size) 

1840 extra.append(zinfo.compress_size) 

1841 file_size = 0xffffffff 

1842 compress_size = 0xffffffff 

1843 else: 

1844 file_size = zinfo.file_size 

1845 compress_size = zinfo.compress_size 

1846 

1847 if zinfo.header_offset > ZIP64_LIMIT: 

1848 extra.append(zinfo.header_offset) 

1849 header_offset = 0xffffffff 

1850 else: 

1851 header_offset = zinfo.header_offset 

1852 

1853 extra_data = zinfo.extra 

1854 min_version = 0 

1855 if extra: 

1856 # Append a ZIP64 field to the extra's 

1857 extra_data = _strip_extra(extra_data, (1,)) 

1858 extra_data = struct.pack( 

1859 '<HH' + 'Q'*len(extra), 

1860 1, 8*len(extra), *extra) + extra_data 

1861 

1862 min_version = ZIP64_VERSION 

1863 

1864 if zinfo.compress_type == ZIP_BZIP2: 

1865 min_version = max(BZIP2_VERSION, min_version) 

1866 elif zinfo.compress_type == ZIP_LZMA: 

1867 min_version = max(LZMA_VERSION, min_version) 

1868 

1869 extract_version = max(min_version, zinfo.extract_version) 

1870 create_version = max(min_version, zinfo.create_version) 

1871 filename, flag_bits = zinfo._encodeFilenameFlags() 

1872 centdir = struct.pack(structCentralDir, 

1873 stringCentralDir, create_version, 

1874 zinfo.create_system, extract_version, zinfo.reserved, 

1875 flag_bits, zinfo.compress_type, dostime, dosdate, 

1876 zinfo.CRC, compress_size, file_size, 

1877 len(filename), len(extra_data), len(zinfo.comment), 

1878 0, zinfo.internal_attr, zinfo.external_attr, 

1879 header_offset) 

1880 self.fp.write(centdir) 

1881 self.fp.write(filename) 

1882 self.fp.write(extra_data) 

1883 self.fp.write(zinfo.comment) 

1884 

1885 pos2 = self.fp.tell() 

1886 # Write end-of-zip-archive record 

1887 centDirCount = len(self.filelist) 

1888 centDirSize = pos2 - self.start_dir 

1889 centDirOffset = self.start_dir 

1890 requires_zip64 = None 

1891 if centDirCount > ZIP_FILECOUNT_LIMIT: 

1892 requires_zip64 = "Files count" 

1893 elif centDirOffset > ZIP64_LIMIT: 

1894 requires_zip64 = "Central directory offset" 

1895 elif centDirSize > ZIP64_LIMIT: 

1896 requires_zip64 = "Central directory size" 

1897 if requires_zip64: 

1898 # Need to write the ZIP64 end-of-archive records 

1899 if not self._allowZip64: 

1900 raise LargeZipFile(requires_zip64 + 

1901 " would require ZIP64 extensions") 

1902 zip64endrec = struct.pack( 

1903 structEndArchive64, stringEndArchive64, 

1904 44, 45, 45, 0, 0, centDirCount, centDirCount, 

1905 centDirSize, centDirOffset) 

1906 self.fp.write(zip64endrec) 

1907 

1908 zip64locrec = struct.pack( 

1909 structEndArchive64Locator, 

1910 stringEndArchive64Locator, 0, pos2, 1) 

1911 self.fp.write(zip64locrec) 

1912 centDirCount = min(centDirCount, 0xFFFF) 

1913 centDirSize = min(centDirSize, 0xFFFFFFFF) 

1914 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 

1915 

1916 endrec = struct.pack(structEndArchive, stringEndArchive, 

1917 0, 0, centDirCount, centDirCount, 

1918 centDirSize, centDirOffset, len(self._comment)) 

1919 self.fp.write(endrec) 

1920 self.fp.write(self._comment) 

1921 if self.mode == "a": 

1922 self.fp.truncate() 

1923 self.fp.flush() 

1924 

1925 def _fpclose(self, fp): 

1926 assert self._fileRefCnt > 0 

1927 self._fileRefCnt -= 1 

1928 if not self._fileRefCnt and not self._filePassed: 

1929 fp.close() 

1930 

1931 

1932class PyZipFile(ZipFile): 

1933 """Class to create ZIP archives with Python library files and packages.""" 

1934 

1935 def __init__(self, file, mode="r", compression=ZIP_STORED, 

1936 allowZip64=True, optimize=-1): 

1937 ZipFile.__init__(self, file, mode=mode, compression=compression, 

1938 allowZip64=allowZip64) 

1939 self._optimize = optimize 

1940 

1941 def writepy(self, pathname, basename="", filterfunc=None): 

1942 """Add all files from "pathname" to the ZIP archive. 

1943 

1944 If pathname is a package directory, search the directory and 

1945 all package subdirectories recursively for all *.py and enter 

1946 the modules into the archive. If pathname is a plain 

1947 directory, listdir *.py and enter all modules. Else, pathname 

1948 must be a Python *.py file and the module will be put into the 

1949 archive. Added modules are always module.pyc. 

1950 This method will compile the module.py into module.pyc if 

1951 necessary. 

1952 If filterfunc(pathname) is given, it is called with every argument. 

1953 When it is False, the file or directory is skipped. 

1954 """ 

1955 pathname = os.fspath(pathname) 

1956 if filterfunc and not filterfunc(pathname): 

1957 if self.debug: 

1958 label = 'path' if os.path.isdir(pathname) else 'file' 

1959 print('%s %r skipped by filterfunc' % (label, pathname)) 

1960 return 

1961 dir, name = os.path.split(pathname) 

1962 if os.path.isdir(pathname): 

1963 initname = os.path.join(pathname, "__init__.py") 

1964 if os.path.isfile(initname): 

1965 # This is a package directory, add it 

1966 if basename: 

1967 basename = "%s/%s" % (basename, name) 

1968 else: 

1969 basename = name 

1970 if self.debug: 

1971 print("Adding package in", pathname, "as", basename) 

1972 fname, arcname = self._get_codename(initname[0:-3], basename) 

1973 if self.debug: 

1974 print("Adding", arcname) 

1975 self.write(fname, arcname) 

1976 dirlist = sorted(os.listdir(pathname)) 

1977 dirlist.remove("__init__.py") 

1978 # Add all *.py files and package subdirectories 

1979 for filename in dirlist: 

1980 path = os.path.join(pathname, filename) 

1981 root, ext = os.path.splitext(filename) 

1982 if os.path.isdir(path): 

1983 if os.path.isfile(os.path.join(path, "__init__.py")): 

1984 # This is a package directory, add it 

1985 self.writepy(path, basename, 

1986 filterfunc=filterfunc) # Recursive call 

1987 elif ext == ".py": 

1988 if filterfunc and not filterfunc(path): 

1989 if self.debug: 

1990 print('file %r skipped by filterfunc' % path) 

1991 continue 

1992 fname, arcname = self._get_codename(path[0:-3], 

1993 basename) 

1994 if self.debug: 

1995 print("Adding", arcname) 

1996 self.write(fname, arcname) 

1997 else: 

1998 # This is NOT a package directory, add its files at top level 

1999 if self.debug: 

2000 print("Adding files from directory", pathname) 

2001 for filename in sorted(os.listdir(pathname)): 

2002 path = os.path.join(pathname, filename) 

2003 root, ext = os.path.splitext(filename) 

2004 if ext == ".py": 

2005 if filterfunc and not filterfunc(path): 

2006 if self.debug: 

2007 print('file %r skipped by filterfunc' % path) 

2008 continue 

2009 fname, arcname = self._get_codename(path[0:-3], 

2010 basename) 

2011 if self.debug: 

2012 print("Adding", arcname) 

2013 self.write(fname, arcname) 

2014 else: 

2015 if pathname[-3:] != ".py": 

2016 raise RuntimeError( 

2017 'Files added with writepy() must end with ".py"') 

2018 fname, arcname = self._get_codename(pathname[0:-3], basename) 

2019 if self.debug: 

2020 print("Adding file", arcname) 

2021 self.write(fname, arcname) 

2022 

2023 def _get_codename(self, pathname, basename): 

2024 """Return (filename, archivename) for the path. 

2025 

2026 Given a module name path, return the correct file path and 

2027 archive name, compiling if necessary. For example, given 

2028 /python/lib/string, return (/python/lib/string.pyc, string). 

2029 """ 

2030 def _compile(file, optimize=-1): 

2031 import py_compile 

2032 if self.debug: 

2033 print("Compiling", file) 

2034 try: 

2035 py_compile.compile(file, doraise=True, optimize=optimize) 

2036 except py_compile.PyCompileError as err: 

2037 print(err.msg) 

2038 return False 

2039 return True 

2040 

2041 file_py = pathname + ".py" 

2042 file_pyc = pathname + ".pyc" 

2043 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 

2044 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 

2045 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 

2046 if self._optimize == -1: 

2047 # legacy mode: use whatever file is present 

2048 if (os.path.isfile(file_pyc) and 

2049 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 

2050 # Use .pyc file. 

2051 arcname = fname = file_pyc 

2052 elif (os.path.isfile(pycache_opt0) and 

2053 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 

2054 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 

2055 # file name in the archive. 

2056 fname = pycache_opt0 

2057 arcname = file_pyc 

2058 elif (os.path.isfile(pycache_opt1) and 

2059 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 

2060 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 

2061 # file name in the archive. 

2062 fname = pycache_opt1 

2063 arcname = file_pyc 

2064 elif (os.path.isfile(pycache_opt2) and 

2065 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 

2066 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 

2067 # file name in the archive. 

2068 fname = pycache_opt2 

2069 arcname = file_pyc 

2070 else: 

2071 # Compile py into PEP 3147 pyc file. 

2072 if _compile(file_py): 

2073 if sys.flags.optimize == 0: 

2074 fname = pycache_opt0 

2075 elif sys.flags.optimize == 1: 

2076 fname = pycache_opt1 

2077 else: 

2078 fname = pycache_opt2 

2079 arcname = file_pyc 

2080 else: 

2081 fname = arcname = file_py 

2082 else: 

2083 # new mode: use given optimization level 

2084 if self._optimize == 0: 

2085 fname = pycache_opt0 

2086 arcname = file_pyc 

2087 else: 

2088 arcname = file_pyc 

2089 if self._optimize == 1: 

2090 fname = pycache_opt1 

2091 elif self._optimize == 2: 

2092 fname = pycache_opt2 

2093 else: 

2094 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 

2095 raise ValueError(msg) 

2096 if not (os.path.isfile(fname) and 

2097 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 

2098 if not _compile(file_py, optimize=self._optimize): 

2099 fname = arcname = file_py 

2100 archivename = os.path.split(arcname)[1] 

2101 if basename: 

2102 archivename = "%s/%s" % (basename, archivename) 

2103 return (fname, archivename) 

2104 

2105 

2106def _parents(path): 

2107 """ 

2108 Given a path with elements separated by 

2109 posixpath.sep, generate all parents of that path. 

2110 

2111 >>> list(_parents('b/d')) 

2112 ['b'] 

2113 >>> list(_parents('/b/d/')) 

2114 ['/b'] 

2115 >>> list(_parents('b/d/f/')) 

2116 ['b/d', 'b'] 

2117 >>> list(_parents('b')) 

2118 [] 

2119 >>> list(_parents('')) 

2120 [] 

2121 """ 

2122 return itertools.islice(_ancestry(path), 1, None) 

2123 

2124 

2125def _ancestry(path): 

2126 """ 

2127 Given a path with elements separated by 

2128 posixpath.sep, generate all elements of that path 

2129 

2130 >>> list(_ancestry('b/d')) 

2131 ['b/d', 'b'] 

2132 >>> list(_ancestry('/b/d/')) 

2133 ['/b/d', '/b'] 

2134 >>> list(_ancestry('b/d/f/')) 

2135 ['b/d/f', 'b/d', 'b'] 

2136 >>> list(_ancestry('b')) 

2137 ['b'] 

2138 >>> list(_ancestry('')) 

2139 [] 

2140 """ 

2141 path = path.rstrip(posixpath.sep) 

2142 while path and path != posixpath.sep: 

2143 yield path 

2144 path, tail = posixpath.split(path) 

2145 

2146 

2147_dedupe = dict.fromkeys 

2148"""Deduplicate an iterable in original order""" 

2149 

2150 

2151def _difference(minuend, subtrahend): 

2152 """ 

2153 Return items in minuend not in subtrahend, retaining order 

2154 with O(1) lookup. 

2155 """ 

2156 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 

2157 

2158 

2159class CompleteDirs(ZipFile): 

2160 """ 

2161 A ZipFile subclass that ensures that implied directories 

2162 are always included in the namelist. 

2163 """ 

2164 

2165 @staticmethod 

2166 def _implied_dirs(names): 

2167 parents = itertools.chain.from_iterable(map(_parents, names)) 

2168 as_dirs = (p + posixpath.sep for p in parents) 

2169 return _dedupe(_difference(as_dirs, names)) 

2170 

2171 def namelist(self): 

2172 names = super(CompleteDirs, self).namelist() 

2173 return names + list(self._implied_dirs(names)) 

2174 

2175 def _name_set(self): 

2176 return set(self.namelist()) 

2177 

2178 def resolve_dir(self, name): 

2179 """ 

2180 If the name represents a directory, return that name 

2181 as a directory (with the trailing slash). 

2182 """ 

2183 names = self._name_set() 

2184 dirname = name + '/' 

2185 dir_match = name not in names and dirname in names 

2186 return dirname if dir_match else name 

2187 

2188 @classmethod 

2189 def make(cls, source): 

2190 """ 

2191 Given a source (filename or zipfile), return an 

2192 appropriate CompleteDirs subclass. 

2193 """ 

2194 if isinstance(source, CompleteDirs): 

2195 return source 

2196 

2197 if not isinstance(source, ZipFile): 

2198 return cls(source) 

2199 

2200 # Only allow for FastPath when supplied zipfile is read-only 

2201 if 'r' not in source.mode: 

2202 cls = CompleteDirs 

2203 

2204 res = cls.__new__(cls) 

2205 vars(res).update(vars(source)) 

2206 return res 

2207 

2208 

2209class FastLookup(CompleteDirs): 

2210 """ 

2211 ZipFile subclass to ensure implicit 

2212 dirs exist and are resolved rapidly. 

2213 """ 

2214 def namelist(self): 

2215 with contextlib.suppress(AttributeError): 

2216 return self.__names 

2217 self.__names = super(FastLookup, self).namelist() 

2218 return self.__names 

2219 

2220 def _name_set(self): 

2221 with contextlib.suppress(AttributeError): 

2222 return self.__lookup 

2223 self.__lookup = super(FastLookup, self)._name_set() 

2224 return self.__lookup 

2225 

2226 

2227class Path: 

2228 """ 

2229 A pathlib-compatible interface for zip files. 

2230 

2231 Consider a zip file with this structure:: 

2232 

2233 . 

2234 ├── a.txt 

2235 └── b 

2236 ├── c.txt 

2237 └── d 

2238 └── e.txt 

2239 

2240 >>> data = io.BytesIO() 

2241 >>> zf = ZipFile(data, 'w') 

2242 >>> zf.writestr('a.txt', 'content of a') 

2243 >>> zf.writestr('b/c.txt', 'content of c') 

2244 >>> zf.writestr('b/d/e.txt', 'content of e') 

2245 >>> zf.filename = 'abcde.zip' 

2246 

2247 Path accepts the zipfile object itself or a filename 

2248 

2249 >>> root = Path(zf) 

2250 

2251 From there, several path operations are available. 

2252 

2253 Directory iteration (including the zip file itself): 

2254 

2255 >>> a, b = root.iterdir() 

2256 >>> a 

2257 Path('abcde.zip', 'a.txt') 

2258 >>> b 

2259 Path('abcde.zip', 'b/') 

2260 

2261 name property: 

2262 

2263 >>> b.name 

2264 'b' 

2265 

2266 join with divide operator: 

2267 

2268 >>> c = b / 'c.txt' 

2269 >>> c 

2270 Path('abcde.zip', 'b/c.txt') 

2271 >>> c.name 

2272 'c.txt' 

2273 

2274 Read text: 

2275 

2276 >>> c.read_text() 

2277 'content of c' 

2278 

2279 existence: 

2280 

2281 >>> c.exists() 

2282 True 

2283 >>> (b / 'missing.txt').exists() 

2284 False 

2285 

2286 Coercion to string: 

2287 

2288 >>> str(c) 

2289 'abcde.zip/b/c.txt' 

2290 """ 

2291 

2292 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 

2293 

2294 def __init__(self, root, at=""): 

2295 self.root = FastLookup.make(root) 

2296 self.at = at 

2297 

2298 def open(self, mode='r', *args, **kwargs): 

2299 """ 

2300 Open this entry as text or binary following the semantics 

2301 of ``pathlib.Path.open()`` by passing arguments through 

2302 to io.TextIOWrapper(). 

2303 """ 

2304 pwd = kwargs.pop('pwd', None) 

2305 zip_mode = mode[0] 

2306 stream = self.root.open(self.at, zip_mode, pwd=pwd) 

2307 if 'b' in mode: 

2308 if args or kwargs: 

2309 raise ValueError("encoding args invalid for binary operation") 

2310 return stream 

2311 return io.TextIOWrapper(stream, *args, **kwargs) 

2312 

2313 @property 

2314 def name(self): 

2315 return posixpath.basename(self.at.rstrip("/")) 

2316 

2317 def read_text(self, *args, **kwargs): 

2318 with self.open('r', *args, **kwargs) as strm: 

2319 return strm.read() 

2320 

2321 def read_bytes(self): 

2322 with self.open('rb') as strm: 

2323 return strm.read() 

2324 

2325 def _is_child(self, path): 

2326 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 

2327 

2328 def _next(self, at): 

2329 return Path(self.root, at) 

2330 

2331 def is_dir(self): 

2332 return not self.at or self.at.endswith("/") 

2333 

2334 def is_file(self): 

2335 return not self.is_dir() 

2336 

2337 def exists(self): 

2338 return self.at in self.root._name_set() 

2339 

2340 def iterdir(self): 

2341 if not self.is_dir(): 

2342 raise ValueError("Can't listdir a file") 

2343 subs = map(self._next, self.root.namelist()) 

2344 return filter(self._is_child, subs) 

2345 

2346 def __str__(self): 

2347 return posixpath.join(self.root.filename, self.at) 

2348 

2349 def __repr__(self): 

2350 return self.__repr.format(self=self) 

2351 

2352 def joinpath(self, add): 

2353 next = posixpath.join(self.at, add) 

2354 return self._next(self.root.resolve_dir(next)) 

2355 

2356 __truediv__ = joinpath 

2357 

2358 @property 

2359 def parent(self): 

2360 parent_at = posixpath.dirname(self.at.rstrip('/')) 

2361 if parent_at: 

2362 parent_at += '/' 

2363 return self._next(parent_at) 

2364 

2365 

2366def main(args=None): 

2367 import argparse 

2368 

2369 description = 'A simple command-line interface for zipfile module.' 

2370 parser = argparse.ArgumentParser(description=description) 

2371 group = parser.add_mutually_exclusive_group(required=True) 

2372 group.add_argument('-l', '--list', metavar='<zipfile>', 

2373 help='Show listing of a zipfile') 

2374 group.add_argument('-e', '--extract', nargs=2, 

2375 metavar=('<zipfile>', '<output_dir>'), 

2376 help='Extract zipfile into target dir') 

2377 group.add_argument('-c', '--create', nargs='+', 

2378 metavar=('<name>', '<file>'), 

2379 help='Create zipfile from sources') 

2380 group.add_argument('-t', '--test', metavar='<zipfile>', 

2381 help='Test if a zipfile is valid') 

2382 args = parser.parse_args(args) 

2383 

2384 if args.test is not None: 

2385 src = args.test 

2386 with ZipFile(src, 'r') as zf: 

2387 badfile = zf.testzip() 

2388 if badfile: 

2389 print("The following enclosed file is corrupted: {!r}".format(badfile)) 

2390 print("Done testing") 

2391 

2392 elif args.list is not None: 

2393 src = args.list 

2394 with ZipFile(src, 'r') as zf: 

2395 zf.printdir() 

2396 

2397 elif args.extract is not None: 

2398 src, curdir = args.extract 

2399 with ZipFile(src, 'r') as zf: 

2400 zf.extractall(curdir) 

2401 

2402 elif args.create is not None: 

2403 zip_name = args.create.pop(0) 

2404 files = args.create 

2405 

2406 def addToZip(zf, path, zippath): 

2407 if os.path.isfile(path): 

2408 zf.write(path, zippath, ZIP_DEFLATED) 

2409 elif os.path.isdir(path): 

2410 if zippath: 

2411 zf.write(path, zippath) 

2412 for nm in sorted(os.listdir(path)): 

2413 addToZip(zf, 

2414 os.path.join(path, nm), os.path.join(zippath, nm)) 

2415 # else: ignore 

2416 

2417 with ZipFile(zip_name, 'w') as zf: 

2418 for path in files: 

2419 zippath = os.path.basename(path) 

2420 if not zippath: 

2421 zippath = os.path.basename(os.path.dirname(path)) 

2422 if zippath in ('', os.curdir, os.pardir): 

2423 zippath = '' 

2424 addToZip(zf, path, zippath) 

2425 

2426 

2427if __name__ == "__main__": 

2428 main()