Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/zipfile.py: 26%

1"""

2Read and write ZIP files.

4XXX references to utf-8 need further investigation.

5"""

6import binascii

7import importlib.util

8import io

9import itertools

10import os

11import posixpath

12import shutil

13import stat

14import struct

15import sys

16import threading

17import time

18import contextlib

20try:

21 import zlib # We may need its compression method

22 crc32 = zlib.crc32

23except ImportError:

24 zlib = None

25 crc32 = binascii.crc32

27try:

28 import bz2 # We may need its compression method

29except ImportError:

30 bz2 = None

32try:

33 import lzma # We may need its compression method

34except ImportError:

35 lzma = None

37__all__ = ["BadZipFile", "BadZipfile", "error",

38 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",

39 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",

40 "Path"]

42class BadZipFile(Exception):

43 pass

46class LargeZipFile(Exception):

47 """

48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions

49 and those extensions are disabled.

50 """

52error = BadZipfile = BadZipFile # Pre-3.2 compatibility names

55ZIP64_LIMIT = (1 << 31) - 1

56ZIP_FILECOUNT_LIMIT = (1 << 16) - 1

57ZIP_MAX_COMMENT = (1 << 16) - 1

59# constants for Zip file compression methods

60ZIP_STORED = 0

61ZIP_DEFLATED = 8

62ZIP_BZIP2 = 12

63ZIP_LZMA = 14

64# Other ZIP compression methods not supported

66DEFAULT_VERSION = 20

67ZIP64_VERSION = 45

68BZIP2_VERSION = 46

69LZMA_VERSION = 63

70# we recognize (but not necessarily support) all features up to that version

71MAX_EXTRACT_VERSION = 63

73# Below are some formats and associated data for reading/writing headers using

74# the struct module. The names and structures of headers/records are those used

75# in the PKWARE description of the ZIP file format:

76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT

77# (URL valid as of January 2008)

79# The "end of central directory" structure, magic number, size, and indices

80# (section V.I in the format document)

81structEndArchive = b"<4s4H2LH"

82stringEndArchive = b"PK\005\006"

83sizeEndCentDir = struct.calcsize(structEndArchive)

85_ECD_SIGNATURE = 0

86_ECD_DISK_NUMBER = 1

87_ECD_DISK_START = 2

88_ECD_ENTRIES_THIS_DISK = 3

89_ECD_ENTRIES_TOTAL = 4

90_ECD_SIZE = 5

91_ECD_OFFSET = 6

92_ECD_COMMENT_SIZE = 7

93# These last two indices are not part of the structure as defined in the

94# spec, but they are used internally by this module as a convenience

95_ECD_COMMENT = 8

96_ECD_LOCATION = 9

98# The "central directory" structure, magic number, size, and indices

99# of entries in the structure (section V.F in the format document)

100structCentralDir = "<4s4B4HL2L5H2L"

101stringCentralDir = b"PK\001\002"

102sizeCentralDir = struct.calcsize(structCentralDir)

103

104# indexes of entries in the central directory structure

105_CD_SIGNATURE = 0

106_CD_CREATE_VERSION = 1

107_CD_CREATE_SYSTEM = 2

108_CD_EXTRACT_VERSION = 3

109_CD_EXTRACT_SYSTEM = 4

110_CD_FLAG_BITS = 5

111_CD_COMPRESS_TYPE = 6

112_CD_TIME = 7

113_CD_DATE = 8

114_CD_CRC = 9

115_CD_COMPRESSED_SIZE = 10

116_CD_UNCOMPRESSED_SIZE = 11

117_CD_FILENAME_LENGTH = 12

118_CD_EXTRA_FIELD_LENGTH = 13

119_CD_COMMENT_LENGTH = 14

120_CD_DISK_NUMBER_START = 15

121_CD_INTERNAL_FILE_ATTRIBUTES = 16

122_CD_EXTERNAL_FILE_ATTRIBUTES = 17

123_CD_LOCAL_HEADER_OFFSET = 18

124

125# The "local file header" structure, magic number, size, and indices

126# (section V.A in the format document)

127structFileHeader = "<4s2B4HL2L2H"

128stringFileHeader = b"PK\003\004"

129sizeFileHeader = struct.calcsize(structFileHeader)

130

131_FH_SIGNATURE = 0

132_FH_EXTRACT_VERSION = 1

133_FH_EXTRACT_SYSTEM = 2

134_FH_GENERAL_PURPOSE_FLAG_BITS = 3

135_FH_COMPRESSION_METHOD = 4

136_FH_LAST_MOD_TIME = 5

137_FH_LAST_MOD_DATE = 6

138_FH_CRC = 7

139_FH_COMPRESSED_SIZE = 8

140_FH_UNCOMPRESSED_SIZE = 9

141_FH_FILENAME_LENGTH = 10

142_FH_EXTRA_FIELD_LENGTH = 11

143

144# The "Zip64 end of central directory locator" structure, magic number, and size

145structEndArchive64Locator = "<4sLQL"

146stringEndArchive64Locator = b"PK\x06\x07"

147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)

148

149# The "Zip64 end of central directory" record, magic number, size, and indices

150# (section V.G in the format document)

151structEndArchive64 = "<4sQ2H2L4Q"

152stringEndArchive64 = b"PK\x06\x06"

153sizeEndCentDir64 = struct.calcsize(structEndArchive64)

154

155_CD64_SIGNATURE = 0

156_CD64_DIRECTORY_RECSIZE = 1

157_CD64_CREATE_VERSION = 2

158_CD64_EXTRACT_VERSION = 3

159_CD64_DISK_NUMBER = 4

160_CD64_DISK_NUMBER_START = 5

161_CD64_NUMBER_ENTRIES_THIS_DISK = 6

162_CD64_NUMBER_ENTRIES_TOTAL = 7

163_CD64_DIRECTORY_SIZE = 8

164_CD64_OFFSET_START_CENTDIR = 9

165

166_DD_SIGNATURE = 0x08074b50

167

168_EXTRA_FIELD_STRUCT = struct.Struct('<HH')

169

170def _strip_extra(extra, xids):

171 # Remove Extra Fields with specified IDs.

172 unpack = _EXTRA_FIELD_STRUCT.unpack

173 modified = False

174 buffer = []

175 start = i = 0

176 while i + 4 <= len(extra):

177 xid, xlen = unpack(extra[i : i + 4])

178 j = i + 4 + xlen

179 if xid in xids:

180 if i != start:

181 buffer.append(extra[start : i])

182 start = j

183 modified = True

184 i = j

185 if not modified:

186 return extra

187 return b''.join(buffer)

188

189def _check_zipfile(fp):

190 try:

191 if _EndRecData(fp):

192 return True # file has correct magic number

193 except OSError:

194 pass

195 return False

196

197def is_zipfile(filename):

198 """Quickly see if a file is a ZIP file by checking the magic number.

199

200 The filename argument may be a file or file-like object too.

201 """

202 result = False

203 try:

204 if hasattr(filename, "read"):

205 result = _check_zipfile(fp=filename)

206 else:

207 with open(filename, "rb") as fp:

208 result = _check_zipfile(fp)

209 except OSError:

210 pass

211 return result

212

213def _EndRecData64(fpin, offset, endrec):

214 """

215 Read the ZIP64 end-of-archive records and use that to update endrec

216 """

217 try:

218 fpin.seek(offset - sizeEndCentDir64Locator, 2)

219 except OSError:

220 # If the seek fails, the file is not large enough to contain a ZIP64

221 # end-of-archive record, so just return the end record we were given.

222 return endrec

223

224 data = fpin.read(sizeEndCentDir64Locator)

225 if len(data) != sizeEndCentDir64Locator:

226 return endrec

227 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)

228 if sig != stringEndArchive64Locator:

229 return endrec

230

231 if diskno != 0 or disks > 1:

232 raise BadZipFile("zipfiles that span multiple disks are not supported")

233

234 # Assume no 'zip64 extensible data'

235 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)

236 data = fpin.read(sizeEndCentDir64)

237 if len(data) != sizeEndCentDir64:

238 return endrec

239 sig, sz, create_version, read_version, disk_num, disk_dir, \

240 dircount, dircount2, dirsize, diroffset = \

241 struct.unpack(structEndArchive64, data)

242 if sig != stringEndArchive64:

243 return endrec

244

245 # Update the original endrec using data from the ZIP64 record

246 endrec[_ECD_SIGNATURE] = sig

247 endrec[_ECD_DISK_NUMBER] = disk_num

248 endrec[_ECD_DISK_START] = disk_dir

249 endrec[_ECD_ENTRIES_THIS_DISK] = dircount

250 endrec[_ECD_ENTRIES_TOTAL] = dircount2

251 endrec[_ECD_SIZE] = dirsize

252 endrec[_ECD_OFFSET] = diroffset

253 return endrec

254

255

256def _EndRecData(fpin):

257 """Return data from the "End of Central Directory" record, or None.

258

259 The data is a list of the nine items in the ZIP "End of central dir"

260 record followed by a tenth item, the file seek offset of this record."""

261

262 # Determine file size

263 fpin.seek(0, 2)

264 filesize = fpin.tell()

265

266 # Check to see if this is ZIP file with no archive comment (the

267 # "end of central directory" structure should be the last item in the

268 # file if this is the case).

269 try:

270 fpin.seek(-sizeEndCentDir, 2)

271 except OSError:

272 return None

273 data = fpin.read()

274 if (len(data) == sizeEndCentDir and

275 data[0:4] == stringEndArchive and

276 data[-2:] == b"\000\000"):

277 # the signature is correct and there's no comment, unpack structure

278 endrec = struct.unpack(structEndArchive, data)

279 endrec=list(endrec)

280

281 # Append a blank comment and record start offset

282 endrec.append(b"")

283 endrec.append(filesize - sizeEndCentDir)

284

285 # Try to read the "Zip64 end of central directory" structure

286 return _EndRecData64(fpin, -sizeEndCentDir, endrec)

287

288 # Either this is not a ZIP file, or it is a ZIP file with an archive

289 # comment. Search the end of the file for the "end of central directory"

290 # record signature. The comment is the last item in the ZIP file and may be

291 # up to 64K long. It is assumed that the "end of central directory" magic

292 # number does not appear in the comment.

293 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)

294 fpin.seek(maxCommentStart, 0)

295 data = fpin.read()

296 start = data.rfind(stringEndArchive)

297 if start >= 0:

298 # found the magic number; attempt to unpack and interpret

299 recData = data[start:start+sizeEndCentDir]

300 if len(recData) != sizeEndCentDir:

301 # Zip file is corrupted.

302 return None

303 endrec = list(struct.unpack(structEndArchive, recData))

304 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file

305 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]

306 endrec.append(comment)

307 endrec.append(maxCommentStart + start)

308

309 # Try to read the "Zip64 end of central directory" structure

310 return _EndRecData64(fpin, maxCommentStart + start - filesize,

311 endrec)

312

313 # Unable to find a valid end of central directory structure

314 return None

315

316

317class ZipInfo (object):

318 """Class with attributes describing each file in the ZIP archive."""

319

320 __slots__ = (

321 'orig_filename',

322 'filename',

323 'date_time',

324 'compress_type',

325 '_compresslevel',

326 'comment',

327 'extra',

328 'create_system',

329 'create_version',

330 'extract_version',

331 'reserved',

332 'flag_bits',

333 'volume',

334 'internal_attr',

335 'external_attr',

336 'header_offset',

337 'CRC',

338 'compress_size',

339 'file_size',

340 '_raw_time',

341 )

342

343 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):

344 self.orig_filename = filename # Original file name in archive

345

346 # Terminate the file name at the first null byte. Null bytes in file

347 # names are used as tricks by viruses in archives.

348 null_byte = filename.find(chr(0))

349 if null_byte >= 0:

350 filename = filename[0:null_byte]

351 # This is used to ensure paths in generated ZIP files always use

352 # forward slashes as the directory separator, as required by the

353 # ZIP format specification.

354 if os.sep != "/" and os.sep in filename:

355 filename = filename.replace(os.sep, "/")

356

357 self.filename = filename # Normalized file name

358 self.date_time = date_time # year, month, day, hour, min, sec

359

360 if date_time[0] < 1980:

361 raise ValueError('ZIP does not support timestamps before 1980')

362

363 # Standard values:

364 self.compress_type = ZIP_STORED # Type of compression for the file

365 self._compresslevel = None # Level for the compressor

366 self.comment = b"" # Comment for each file

367 self.extra = b"" # ZIP extra data

368 if sys.platform == 'win32':

369 self.create_system = 0 # System which created ZIP archive

370 else:

371 # Assume everything else is unix-y

372 self.create_system = 3 # System which created ZIP archive

373 self.create_version = DEFAULT_VERSION # Version which created ZIP archive

374 self.extract_version = DEFAULT_VERSION # Version needed to extract archive

375 self.reserved = 0 # Must be zero

376 self.flag_bits = 0 # ZIP flag bits

377 self.volume = 0 # Volume number of file header

378 self.internal_attr = 0 # Internal attributes

379 self.external_attr = 0 # External file attributes

380 self.compress_size = 0 # Size of the compressed file

381 self.file_size = 0 # Size of the uncompressed file

382 # Other attributes are set by class ZipFile:

383 # header_offset Byte offset to the file header

384 # CRC CRC-32 of the uncompressed file

385

386 def __repr__(self):

387 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]

388 if self.compress_type != ZIP_STORED:

389 result.append(' compress_type=%s' %

390 compressor_names.get(self.compress_type,

391 self.compress_type))

392 hi = self.external_attr >> 16

393 lo = self.external_attr & 0xFFFF

394 if hi:

395 result.append(' filemode=%r' % stat.filemode(hi))

396 if lo:

397 result.append(' external_attr=%#x' % lo)

398 isdir = self.is_dir()

399 if not isdir or self.file_size:

400 result.append(' file_size=%r' % self.file_size)

401 if ((not isdir or self.compress_size) and

402 (self.compress_type != ZIP_STORED or

403 self.file_size != self.compress_size)):

404 result.append(' compress_size=%r' % self.compress_size)

405 result.append('>')

406 return ''.join(result)

407

408 def FileHeader(self, zip64=None):

409 """Return the per-file header as a bytes object."""

410 dt = self.date_time

411 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]

412 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)

413 if self.flag_bits & 0x08:

414 # Set these to zero because we write them after the file data

415 CRC = compress_size = file_size = 0

416 else:

417 CRC = self.CRC

418 compress_size = self.compress_size

419 file_size = self.file_size

420

421 extra = self.extra

422

423 min_version = 0

424 if zip64 is None:

425 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT

426 if zip64:

427 fmt = '<HHQQ'

428 extra = extra + struct.pack(fmt,

429 1, struct.calcsize(fmt)-4, file_size, compress_size)

430 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:

431 if not zip64:

432 raise LargeZipFile("Filesize would require ZIP64 extensions")

433 # File is larger than what fits into a 4 byte integer,

434 # fall back to the ZIP64 extension

435 file_size = 0xffffffff

436 compress_size = 0xffffffff

437 min_version = ZIP64_VERSION

438

439 if self.compress_type == ZIP_BZIP2:

440 min_version = max(BZIP2_VERSION, min_version)

441 elif self.compress_type == ZIP_LZMA:

442 min_version = max(LZMA_VERSION, min_version)

443

444 self.extract_version = max(min_version, self.extract_version)

445 self.create_version = max(min_version, self.create_version)

446 filename, flag_bits = self._encodeFilenameFlags()

447 header = struct.pack(structFileHeader, stringFileHeader,

448 self.extract_version, self.reserved, flag_bits,

449 self.compress_type, dostime, dosdate, CRC,

450 compress_size, file_size,

451 len(filename), len(extra))

452 return header + filename + extra

453

454 def _encodeFilenameFlags(self):

455 try:

456 return self.filename.encode('ascii'), self.flag_bits

457 except UnicodeEncodeError:

458 return self.filename.encode('utf-8'), self.flag_bits | 0x800

459

460 def _decodeExtra(self):

461 # Try to decode the extra field.

462 extra = self.extra

463 unpack = struct.unpack

464 while len(extra) >= 4:

465 tp, ln = unpack('<HH', extra[:4])

466 if ln+4 > len(extra):

467 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))

468 if tp == 0x0001:

469 data = extra[4:ln+4]

470 # ZIP64 extension (large files and/or large archives)

471 try:

472 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):

473 field = "File size"

474 self.file_size, = unpack('<Q', data[:8])

475 data = data[8:]

476 if self.compress_size == 0xFFFF_FFFF:

477 field = "Compress size"

478 self.compress_size, = unpack('<Q', data[:8])

479 data = data[8:]

480 if self.header_offset == 0xFFFF_FFFF:

481 field = "Header offset"

482 self.header_offset, = unpack('<Q', data[:8])

483 except struct.error:

484 raise BadZipFile(f"Corrupt zip64 extra field. "

485 f"{field} not found.") from None

486

487 extra = extra[ln+4:]

488

489 @classmethod

490 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):

491 """Construct an appropriate ZipInfo for a file on the filesystem.

492

493 filename should be the path to a file or directory on the filesystem.

494

495 arcname is the name which it will have within the archive (by default,

496 this will be the same as filename, but without a drive letter and with

497 leading path separators removed).

498 """

499 if isinstance(filename, os.PathLike):

500 filename = os.fspath(filename)

501 st = os.stat(filename)

502 isdir = stat.S_ISDIR(st.st_mode)

503 mtime = time.localtime(st.st_mtime)

504 date_time = mtime[0:6]

505 if not strict_timestamps and date_time[0] < 1980:

506 date_time = (1980, 1, 1, 0, 0, 0)

507 elif not strict_timestamps and date_time[0] > 2107:

508 date_time = (2107, 12, 31, 23, 59, 59)

509 # Create ZipInfo instance to store file information

510 if arcname is None:

511 arcname = filename

512 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])

513 while arcname[0] in (os.sep, os.altsep):

514 arcname = arcname[1:]

515 if isdir:

516 arcname += '/'

517 zinfo = cls(arcname, date_time)

518 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes

519 if isdir:

520 zinfo.file_size = 0

521 zinfo.external_attr |= 0x10 # MS-DOS directory flag

522 else:

523 zinfo.file_size = st.st_size

524

525 return zinfo

526

527 def is_dir(self):

528 """Return True if this archive member is a directory."""

529 return self.filename[-1] == '/'

530

531

532# ZIP encryption uses the CRC32 one-byte primitive for scrambling some

533# internal keys. We noticed that a direct implementation is faster than

534# relying on binascii.crc32().

535

536_crctable = None

537def _gen_crc(crc):

538 for j in range(8):

539 if crc & 1:

540 crc = (crc >> 1) ^ 0xEDB88320

541 else:

542 crc >>= 1

543 return crc

544

545# ZIP supports a password-based form of encryption. Even though known

546# plaintext attacks have been found against it, it is still useful

547# to be able to get data out of such a file.

548#

549# Usage:

550# zd = _ZipDecrypter(mypwd)

551# plain_bytes = zd(cypher_bytes)

552

553def _ZipDecrypter(pwd):

554 key0 = 305419896

555 key1 = 591751049

556 key2 = 878082192

557

558 global _crctable

559 if _crctable is None:

560 _crctable = list(map(_gen_crc, range(256)))

561 crctable = _crctable

562

563 def crc32(ch, crc):

564 """Compute the CRC32 primitive on one byte."""

565 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]

566

567 def update_keys(c):

568 nonlocal key0, key1, key2

569 key0 = crc32(c, key0)

570 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF

571 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF

572 key2 = crc32(key1 >> 24, key2)

573

574 for p in pwd:

575 update_keys(p)

576

577 def decrypter(data):

578 """Decrypt a bytes object."""

579 result = bytearray()

580 append = result.append

581 for c in data:

582 k = key2 | 2

583 c ^= ((k * (k^1)) >> 8) & 0xFF

584 update_keys(c)

585 append(c)

586 return bytes(result)

587

588 return decrypter

589

590

591class LZMACompressor:

592

593 def __init__(self):

594 self._comp = None

595

596 def _init(self):

597 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})

598 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[

599 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)

600 ])

601 return struct.pack('<BBH', 9, 4, len(props)) + props

602

603 def compress(self, data):

604 if self._comp is None:

605 return self._init() + self._comp.compress(data)

606 return self._comp.compress(data)

607

608 def flush(self):

609 if self._comp is None:

610 return self._init() + self._comp.flush()

611 return self._comp.flush()

612

613

614class LZMADecompressor:

615

616 def __init__(self):

617 self._decomp = None

618 self._unconsumed = b''

619 self.eof = False

620

621 def decompress(self, data):

622 if self._decomp is None:

623 self._unconsumed += data

624 if len(self._unconsumed) <= 4:

625 return b''

626 psize, = struct.unpack('<H', self._unconsumed[2:4])

627 if len(self._unconsumed) <= 4 + psize:

628 return b''

629

630 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[

631 lzma._decode_filter_properties(lzma.FILTER_LZMA1,

632 self._unconsumed[4:4 + psize])

633 ])

634 data = self._unconsumed[4 + psize:]

635 del self._unconsumed

636

637 result = self._decomp.decompress(data)

638 self.eof = self._decomp.eof

639 return result

640

641

642compressor_names = {

643 0: 'store',

644 1: 'shrink',

645 2: 'reduce',

646 3: 'reduce',

647 4: 'reduce',

648 5: 'reduce',

649 6: 'implode',

650 7: 'tokenize',

651 8: 'deflate',

652 9: 'deflate64',

653 10: 'implode',

654 12: 'bzip2',

655 14: 'lzma',

656 18: 'terse',

657 19: 'lz77',

658 97: 'wavpack',

659 98: 'ppmd',

660}

661

662def _check_compression(compression):

663 if compression == ZIP_STORED:

664 pass

665 elif compression == ZIP_DEFLATED:

666 if not zlib:

667 raise RuntimeError(

668 "Compression requires the (missing) zlib module")

669 elif compression == ZIP_BZIP2:

670 if not bz2:

671 raise RuntimeError(

672 "Compression requires the (missing) bz2 module")

673 elif compression == ZIP_LZMA:

674 if not lzma:

675 raise RuntimeError(

676 "Compression requires the (missing) lzma module")

677 else:

678 raise NotImplementedError("That compression method is not supported")

679

680

681def _get_compressor(compress_type, compresslevel=None):

682 if compress_type == ZIP_DEFLATED:

683 if compresslevel is not None:

684 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)

685 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)

686 elif compress_type == ZIP_BZIP2:

687 if compresslevel is not None:

688 return bz2.BZ2Compressor(compresslevel)

689 return bz2.BZ2Compressor()

690 # compresslevel is ignored for ZIP_LZMA

691 elif compress_type == ZIP_LZMA:

692 return LZMACompressor()

693 else:

694 return None

695

696

697def _get_decompressor(compress_type):

698 _check_compression(compress_type)

699 if compress_type == ZIP_STORED:

700 return None

701 elif compress_type == ZIP_DEFLATED:

702 return zlib.decompressobj(-15)

703 elif compress_type == ZIP_BZIP2:

704 return bz2.BZ2Decompressor()

705 elif compress_type == ZIP_LZMA:

706 return LZMADecompressor()

707 else:

708 descr = compressor_names.get(compress_type)

709 if descr:

710 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))

711 else:

712 raise NotImplementedError("compression type %d" % (compress_type,))

713

714

715class _SharedFile:

716 def __init__(self, file, pos, close, lock, writing):

717 self._file = file

718 self._pos = pos

719 self._close = close

720 self._lock = lock

721 self._writing = writing

722 self.seekable = file.seekable

723 self.tell = file.tell

724

725 def seek(self, offset, whence=0):

726 with self._lock:

727 if self._writing():

728 raise ValueError("Can't reposition in the ZIP file while "

729 "there is an open writing handle on it. "

730 "Close the writing handle before trying to read.")

731 self._file.seek(offset, whence)

732 self._pos = self._file.tell()

733 return self._pos

734

735 def read(self, n=-1):

736 with self._lock:

737 if self._writing():

738 raise ValueError("Can't read from the ZIP file while there "

739 "is an open writing handle on it. "

740 "Close the writing handle before trying to read.")

741 self._file.seek(self._pos)

742 data = self._file.read(n)

743 self._pos = self._file.tell()

744 return data

745

746 def close(self):

747 if self._file is not None:

748 fileobj = self._file

749 self._file = None

750 self._close(fileobj)

751

752# Provide the tell method for unseekable stream

753class _Tellable:

754 def __init__(self, fp):

755 self.fp = fp

756 self.offset = 0

757

758 def write(self, data):

759 n = self.fp.write(data)

760 self.offset += n

761 return n

762

763 def tell(self):

764 return self.offset

765

766 def flush(self):

767 self.fp.flush()

768

769 def close(self):

770 self.fp.close()

771

772

773class ZipExtFile(io.BufferedIOBase):

774 """File-like object for reading an archive member.

775 Is returned by ZipFile.open().

776 """

777

778 # Max size supported by decompressor.

779 MAX_N = 1 << 31 - 1

780

781 # Read from compressed files in 4k blocks.

782 MIN_READ_SIZE = 4096

783

784 # Chunk size to read during seek

785 MAX_SEEK_READ = 1 << 24

786

787 def __init__(self, fileobj, mode, zipinfo, pwd=None,

788 close_fileobj=False):

789 self._fileobj = fileobj

790 self._pwd = pwd

791 self._close_fileobj = close_fileobj

792

793 self._compress_type = zipinfo.compress_type

794 self._compress_left = zipinfo.compress_size

795 self._left = zipinfo.file_size

796

797 self._decompressor = _get_decompressor(self._compress_type)

798

799 self._eof = False

800 self._readbuffer = b''

801 self._offset = 0

802

803 self.newlines = None

804

805 self.mode = mode

806 self.name = zipinfo.filename

807

808 if hasattr(zipinfo, 'CRC'):

809 self._expected_crc = zipinfo.CRC

810 self._running_crc = crc32(b'')

811 else:

812 self._expected_crc = None

813

814 self._seekable = False

815 try:

816 if fileobj.seekable():

817 self._orig_compress_start = fileobj.tell()

818 self._orig_compress_size = zipinfo.compress_size

819 self._orig_file_size = zipinfo.file_size

820 self._orig_start_crc = self._running_crc

821 self._seekable = True

822 except AttributeError:

823 pass

824

825 self._decrypter = None

826 if pwd:

827 if zipinfo.flag_bits & 0x8:

828 # compare against the file type from extended local headers

829 check_byte = (zipinfo._raw_time >> 8) & 0xff

830 else:

831 # compare against the CRC otherwise

832 check_byte = (zipinfo.CRC >> 24) & 0xff

833 h = self._init_decrypter()

834 if h != check_byte:

835 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)

836

837

838 def _init_decrypter(self):

839 self._decrypter = _ZipDecrypter(self._pwd)

840 # The first 12 bytes in the cypher stream is an encryption header

841 # used to strengthen the algorithm. The first 11 bytes are

842 # completely random, while the 12th contains the MSB of the CRC,

843 # or the MSB of the file time depending on the header type

844 # and is used to check the correctness of the password.

845 header = self._fileobj.read(12)

846 self._compress_left -= 12

847 return self._decrypter(header)[11]

848

849 def __repr__(self):

850 result = ['<%s.%s' % (self.__class__.__module__,

851 self.__class__.__qualname__)]

852 if not self.closed:

853 result.append(' name=%r mode=%r' % (self.name, self.mode))

854 if self._compress_type != ZIP_STORED:

855 result.append(' compress_type=%s' %

856 compressor_names.get(self._compress_type,

857 self._compress_type))

858 else:

859 result.append(' [closed]')

860 result.append('>')

861 return ''.join(result)

862

863 def readline(self, limit=-1):

864 """Read and return a line from the stream.

865

866 If limit is specified, at most limit bytes will be read.

867 """

868

869 if limit < 0:

870 # Shortcut common case - newline found in buffer.

871 i = self._readbuffer.find(b'\n', self._offset) + 1

872 if i > 0:

873 line = self._readbuffer[self._offset: i]

874 self._offset = i

875 return line

876

877 return io.BufferedIOBase.readline(self, limit)

878

879 def peek(self, n=1):

880 """Returns buffered bytes without advancing the position."""

881 if n > len(self._readbuffer) - self._offset:

882 chunk = self.read(n)

883 if len(chunk) > self._offset:

884 self._readbuffer = chunk + self._readbuffer[self._offset:]

885 self._offset = 0

886 else:

887 self._offset -= len(chunk)

888

889 # Return up to 512 bytes to reduce allocation overhead for tight loops.

890 return self._readbuffer[self._offset: self._offset + 512]

891

892 def readable(self):

893 if self.closed:

894 raise ValueError("I/O operation on closed file.")

895 return True

896

897 def read(self, n=-1):

898 """Read and return up to n bytes.

899 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.

900 """

901 if self.closed:

902 raise ValueError("read from closed file.")

903 if n is None or n < 0:

904 buf = self._readbuffer[self._offset:]

905 self._readbuffer = b''

906 self._offset = 0

907 while not self._eof:

908 buf += self._read1(self.MAX_N)

909 return buf

910

911 end = n + self._offset

912 if end < len(self._readbuffer):

913 buf = self._readbuffer[self._offset:end]

914 self._offset = end

915 return buf

916

917 n = end - len(self._readbuffer)

918 buf = self._readbuffer[self._offset:]

919 self._readbuffer = b''

920 self._offset = 0

921 while n > 0 and not self._eof:

922 data = self._read1(n)

923 if n < len(data):

924 self._readbuffer = data

925 self._offset = n

926 buf += data[:n]

927 break

928 buf += data

929 n -= len(data)

930 return buf

931

932 def _update_crc(self, newdata):

933 # Update the CRC using the given data.

934 if self._expected_crc is None:

935 # No need to compute the CRC if we don't have a reference value

936 return

937 self._running_crc = crc32(newdata, self._running_crc)

938 # Check the CRC if we're at the end of the file

939 if self._eof and self._running_crc != self._expected_crc:

940 raise BadZipFile("Bad CRC-32 for file %r" % self.name)

941

942 def read1(self, n):

943 """Read up to n bytes with at most one read() system call."""

944

945 if n is None or n < 0:

946 buf = self._readbuffer[self._offset:]

947 self._readbuffer = b''

948 self._offset = 0

949 while not self._eof:

950 data = self._read1(self.MAX_N)

951 if data:

952 buf += data

953 break

954 return buf

955

956 end = n + self._offset

957 if end < len(self._readbuffer):

958 buf = self._readbuffer[self._offset:end]

959 self._offset = end

960 return buf

961

962 n = end - len(self._readbuffer)

963 buf = self._readbuffer[self._offset:]

964 self._readbuffer = b''

965 self._offset = 0

966 if n > 0:

967 while not self._eof:

968 data = self._read1(n)

969 if n < len(data):

970 self._readbuffer = data

971 self._offset = n

972 buf += data[:n]

973 break

974 if data:

975 buf += data

976 break

977 return buf

978

979 def _read1(self, n):

980 # Read up to n compressed bytes with at most one read() system call,

981 # decrypt and decompress them.

982 if self._eof or n <= 0:

983 return b''

984

985 # Read from file.

986 if self._compress_type == ZIP_DEFLATED:

987 ## Handle unconsumed data.

988 data = self._decompressor.unconsumed_tail

989 if n > len(data):

990 data += self._read2(n - len(data))

991 else:

992 data = self._read2(n)

993

994 if self._compress_type == ZIP_STORED:

995 self._eof = self._compress_left <= 0

996 elif self._compress_type == ZIP_DEFLATED:

997 n = max(n, self.MIN_READ_SIZE)

998 data = self._decompressor.decompress(data, n)

999 self._eof = (self._decompressor.eof or

1000 self._compress_left <= 0 and

1001 not self._decompressor.unconsumed_tail)

1002 if self._eof:

1003 data += self._decompressor.flush()

1004 else:

1005 data = self._decompressor.decompress(data)

1006 self._eof = self._decompressor.eof or self._compress_left <= 0

1007

1008 data = data[:self._left]

1009 self._left -= len(data)

1010 if self._left <= 0:

1011 self._eof = True

1012 self._update_crc(data)

1013 return data

1014

1015 def _read2(self, n):

1016 if self._compress_left <= 0:

1017 return b''

1018

1019 n = max(n, self.MIN_READ_SIZE)

1020 n = min(n, self._compress_left)

1021

1022 data = self._fileobj.read(n)

1023 self._compress_left -= len(data)

1024 if not data:

1025 raise EOFError

1026

1027 if self._decrypter is not None:

1028 data = self._decrypter(data)

1029 return data

1030

1031 def close(self):

1032 try:

1033 if self._close_fileobj:

1034 self._fileobj.close()

1035 finally:

1036 super().close()

1037

1038 def seekable(self):

1039 if self.closed:

1040 raise ValueError("I/O operation on closed file.")

1041 return self._seekable

1042

1043 def seek(self, offset, whence=0):

1044 if self.closed:

1045 raise ValueError("seek on closed file.")

1046 if not self._seekable:

1047 raise io.UnsupportedOperation("underlying stream is not seekable")

1048 curr_pos = self.tell()

1049 if whence == 0: # Seek from start of file

1050 new_pos = offset

1051 elif whence == 1: # Seek from current position

1052 new_pos = curr_pos + offset

1053 elif whence == 2: # Seek from EOF

1054 new_pos = self._orig_file_size + offset

1055 else:

1056 raise ValueError("whence must be os.SEEK_SET (0), "

1057 "os.SEEK_CUR (1), or os.SEEK_END (2)")

1058

1059 if new_pos > self._orig_file_size:

1060 new_pos = self._orig_file_size

1061

1062 if new_pos < 0:

1063 new_pos = 0

1064

1065 read_offset = new_pos - curr_pos

1066 buff_offset = read_offset + self._offset

1067

1068 if buff_offset >= 0 and buff_offset < len(self._readbuffer):

1069 # Just move the _offset index if the new position is in the _readbuffer

1070 self._offset = buff_offset

1071 read_offset = 0

1072 elif read_offset < 0:

1073 # Position is before the current position. Reset the ZipExtFile

1074 self._fileobj.seek(self._orig_compress_start)

1075 self._running_crc = self._orig_start_crc

1076 self._compress_left = self._orig_compress_size

1077 self._left = self._orig_file_size

1078 self._readbuffer = b''

1079 self._offset = 0

1080 self._decompressor = _get_decompressor(self._compress_type)

1081 self._eof = False

1082 read_offset = new_pos

1083 if self._decrypter is not None:

1084 self._init_decrypter()

1085

1086 while read_offset > 0:

1087 read_len = min(self.MAX_SEEK_READ, read_offset)

1088 self.read(read_len)

1089 read_offset -= read_len

1090

1091 return self.tell()

1092

1093 def tell(self):

1094 if self.closed:

1095 raise ValueError("tell on closed file.")

1096 if not self._seekable:

1097 raise io.UnsupportedOperation("underlying stream is not seekable")

1098 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset

1099 return filepos

1100

1101

1102class _ZipWriteFile(io.BufferedIOBase):

1103 def __init__(self, zf, zinfo, zip64):

1104 self._zinfo = zinfo

1105 self._zip64 = zip64

1106 self._zipfile = zf

1107 self._compressor = _get_compressor(zinfo.compress_type,

1108 zinfo._compresslevel)

1109 self._file_size = 0

1110 self._compress_size = 0

1111 self._crc = 0

1112

1113 @property

1114 def _fileobj(self):

1115 return self._zipfile.fp

1116

1117 def writable(self):

1118 return True

1119

1120 def write(self, data):

1121 if self.closed:

1122 raise ValueError('I/O operation on closed file.')

1123 nbytes = len(data)

1124 self._file_size += nbytes

1125 self._crc = crc32(data, self._crc)

1126 if self._compressor:

1127 data = self._compressor.compress(data)

1128 self._compress_size += len(data)

1129 self._fileobj.write(data)

1130 return nbytes

1131

1132 def close(self):

1133 if self.closed:

1134 return

1135 try:

1136 super().close()

1137 # Flush any data from the compressor, and update header info

1138 if self._compressor:

1139 buf = self._compressor.flush()

1140 self._compress_size += len(buf)

1141 self._fileobj.write(buf)

1142 self._zinfo.compress_size = self._compress_size

1143 else:

1144 self._zinfo.compress_size = self._file_size

1145 self._zinfo.CRC = self._crc

1146 self._zinfo.file_size = self._file_size

1147

1148 # Write updated header info

1149 if self._zinfo.flag_bits & 0x08:

1150 # Write CRC and file sizes after the file data

1151 fmt = '<LLQQ' if self._zip64 else '<LLLL'

1152 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,

1153 self._zinfo.compress_size, self._zinfo.file_size))

1154 self._zipfile.start_dir = self._fileobj.tell()

1155 else:

1156 if not self._zip64:

1157 if self._file_size > ZIP64_LIMIT:

1158 raise RuntimeError(

1159 'File size unexpectedly exceeded ZIP64 limit')

1160 if self._compress_size > ZIP64_LIMIT:

1161 raise RuntimeError(

1162 'Compressed size unexpectedly exceeded ZIP64 limit')

1163 # Seek backwards and write file header (which will now include

1164 # correct CRC and file sizes)

1165

1166 # Preserve current position in file

1167 self._zipfile.start_dir = self._fileobj.tell()

1168 self._fileobj.seek(self._zinfo.header_offset)

1169 self._fileobj.write(self._zinfo.FileHeader(self._zip64))

1170 self._fileobj.seek(self._zipfile.start_dir)

1171

1172 # Successfully written: Add file to our caches

1173 self._zipfile.filelist.append(self._zinfo)

1174 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo

1175 finally:

1176 self._zipfile._writing = False

1177

1178

1179

1180class ZipFile:

1181 """ Class with methods to open, read, write, close, list zip files.

1182

1183 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,

1184 compresslevel=None)

1185

1186 file: Either the path to the file, or a file-like object.

1187 If it is a path, the file will be opened and closed by ZipFile.

1188 mode: The mode can be either read 'r', write 'w', exclusive create 'x',

1189 or append 'a'.

1190 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),

1191 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).

1192 allowZip64: if True ZipFile will create files with ZIP64 extensions when

1193 needed, otherwise it will raise an exception when this would

1194 be necessary.

1195 compresslevel: None (default for the given compression type) or an integer

1196 specifying the level to pass to the compressor.

1197 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.

1198 When using ZIP_DEFLATED integers 0 through 9 are accepted.

1199 When using ZIP_BZIP2 integers 1 through 9 are accepted.

1200

1201 """

1202

1203 fp = None # Set here since __del__ checks it

1204 _windows_illegal_name_trans_table = None

1205

1206 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,

1207 compresslevel=None, *, strict_timestamps=True):

1208 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',

1209 or append 'a'."""

1210 if mode not in ('r', 'w', 'x', 'a'):

1211 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")

1212

1213 _check_compression(compression)

1214

1215 self._allowZip64 = allowZip64

1216 self._didModify = False

1217 self.debug = 0 # Level of printing: 0 through 3

1218 self.NameToInfo = {} # Find file info given name

1219 self.filelist = [] # List of ZipInfo instances for archive

1220 self.compression = compression # Method of compression

1221 self.compresslevel = compresslevel

1222 self.mode = mode

1223 self.pwd = None

1224 self._comment = b''

1225 self._strict_timestamps = strict_timestamps

1226

1227 # Check if we were passed a file-like object

1228 if isinstance(file, os.PathLike):

1229 file = os.fspath(file)

1230 if isinstance(file, str):

1231 # No, it's a filename

1232 self._filePassed = 0

1233 self.filename = file

1234 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',

1235 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}

1236 filemode = modeDict[mode]

1237 while True:

1238 try:

1239 self.fp = io.open(file, filemode)

1240 except OSError:

1241 if filemode in modeDict:

1242 filemode = modeDict[filemode]

1243 continue

1244 raise

1245 break

1246 else:

1247 self._filePassed = 1

1248 self.fp = file

1249 self.filename = getattr(file, 'name', None)

1250 self._fileRefCnt = 1

1251 self._lock = threading.RLock()

1252 self._seekable = True

1253 self._writing = False

1254

1255 try:

1256 if mode == 'r':

1257 self._RealGetContents()

1258 elif mode in ('w', 'x'):

1259 # set the modified flag so central directory gets written

1260 # even if no files are added to the archive

1261 self._didModify = True

1262 try:

1263 self.start_dir = self.fp.tell()

1264 except (AttributeError, OSError):

1265 self.fp = _Tellable(self.fp)

1266 self.start_dir = 0

1267 self._seekable = False

1268 else:

1269 # Some file-like objects can provide tell() but not seek()

1270 try:

1271 self.fp.seek(self.start_dir)

1272 except (AttributeError, OSError):

1273 self._seekable = False

1274 elif mode == 'a':

1275 try:

1276 # See if file is a zip file

1277 self._RealGetContents()

1278 # seek to start of directory and overwrite

1279 self.fp.seek(self.start_dir)

1280 except BadZipFile:

1281 # file is not a zip file, just append

1282 self.fp.seek(0, 2)

1283

1284 # set the modified flag so central directory gets written

1285 # even if no files are added to the archive

1286 self._didModify = True

1287 self.start_dir = self.fp.tell()

1288 else:

1289 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")

1290 except:

1291 fp = self.fp

1292 self.fp = None

1293 self._fpclose(fp)

1294 raise

1295

1296 def __enter__(self):

1297 return self

1298

1299 def __exit__(self, type, value, traceback):

1300 self.close()

1301

1302 def __repr__(self):

1303 result = ['<%s.%s' % (self.__class__.__module__,

1304 self.__class__.__qualname__)]

1305 if self.fp is not None:

1306 if self._filePassed:

1307 result.append(' file=%r' % self.fp)

1308 elif self.filename is not None:

1309 result.append(' filename=%r' % self.filename)

1310 result.append(' mode=%r' % self.mode)

1311 else:

1312 result.append(' [closed]')

1313 result.append('>')

1314 return ''.join(result)

1315

1316 def _RealGetContents(self):

1317 """Read in the table of contents for the ZIP file."""

1318 fp = self.fp

1319 try:

1320 endrec = _EndRecData(fp)

1321 except OSError:

1322 raise BadZipFile("File is not a zip file")

1323 if not endrec:

1324 raise BadZipFile("File is not a zip file")

1325 if self.debug > 1:

1326 print(endrec)

1327 size_cd = endrec[_ECD_SIZE] # bytes in central directory

1328 offset_cd = endrec[_ECD_OFFSET] # offset of central directory

1329 self._comment = endrec[_ECD_COMMENT] # archive comment

1330

1331 # "concat" is zero, unless zip was concatenated to another file

1332 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd

1333 if endrec[_ECD_SIGNATURE] == stringEndArchive64:

1334 # If Zip64 extension structures are present, account for them

1335 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

1336

1337 if self.debug > 2:

1338 inferred = concat + offset_cd

1339 print("given, inferred, offset", offset_cd, inferred, concat)

1340 # self.start_dir: Position of start of central directory

1341 self.start_dir = offset_cd + concat

1342 fp.seek(self.start_dir, 0)

1343 data = fp.read(size_cd)

1344 fp = io.BytesIO(data)

1345 total = 0

1346 while total < size_cd:

1347 centdir = fp.read(sizeCentralDir)

1348 if len(centdir) != sizeCentralDir:

1349 raise BadZipFile("Truncated central directory")

1350 centdir = struct.unpack(structCentralDir, centdir)

1351 if centdir[_CD_SIGNATURE] != stringCentralDir:

1352 raise BadZipFile("Bad magic number for central directory")

1353 if self.debug > 2:

1354 print(centdir)

1355 filename = fp.read(centdir[_CD_FILENAME_LENGTH])

1356 flags = centdir[5]

1357 if flags & 0x800:

1358 # UTF-8 file names extension

1359 filename = filename.decode('utf-8')

1360 else:

1361 # Historical ZIP filename encoding

1362 filename = filename.decode('cp437')

1363 # Create ZipInfo instance to store file information

1364 x = ZipInfo(filename)

1365 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])

1366 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])

1367 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]

1368 (x.create_version, x.create_system, x.extract_version, x.reserved,

1369 x.flag_bits, x.compress_type, t, d,

1370 x.CRC, x.compress_size, x.file_size) = centdir[1:12]

1371 if x.extract_version > MAX_EXTRACT_VERSION:

1372 raise NotImplementedError("zip file version %.1f" %

1373 (x.extract_version / 10))

1374 x.volume, x.internal_attr, x.external_attr = centdir[15:18]

1375 # Convert date/time code to (year, month, day, hour, min, sec)

1376 x._raw_time = t

1377 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,

1378 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )

1379

1380 x._decodeExtra()

1381 x.header_offset = x.header_offset + concat

1382 self.filelist.append(x)

1383 self.NameToInfo[x.filename] = x

1384

1385 # update total bytes read from central directory

1386 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]

1387 + centdir[_CD_EXTRA_FIELD_LENGTH]

1388 + centdir[_CD_COMMENT_LENGTH])

1389

1390 if self.debug > 2:

1391 print("total", total)

1392

1393

1394 def namelist(self):

1395 """Return a list of file names in the archive."""

1396 return [data.filename for data in self.filelist]

1397

1398 def infolist(self):

1399 """Return a list of class ZipInfo instances for files in the

1400 archive."""

1401 return self.filelist

1402

1403 def printdir(self, file=None):

1404 """Print a table of contents for the zip file."""

1405 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),

1406 file=file)

1407 for zinfo in self.filelist:

1408 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]

1409 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),

1410 file=file)

1411

1412 def testzip(self):

1413 """Read all the files and check the CRC."""

1414 chunk_size = 2 ** 20

1415 for zinfo in self.filelist:

1416 try:

1417 # Read by chunks, to avoid an OverflowError or a

1418 # MemoryError with very large embedded files.

1419 with self.open(zinfo.filename, "r") as f:

1420 while f.read(chunk_size): # Check CRC-32

1421 pass

1422 except BadZipFile:

1423 return zinfo.filename

1424

1425 def getinfo(self, name):

1426 """Return the instance of ZipInfo given 'name'."""

1427 info = self.NameToInfo.get(name)

1428 if info is None:

1429 raise KeyError(

1430 'There is no item named %r in the archive' % name)

1431

1432 return info

1433

1434 def setpassword(self, pwd):

1435 """Set default password for encrypted files."""

1436 if pwd and not isinstance(pwd, bytes):

1437 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)

1438 if pwd:

1439 self.pwd = pwd

1440 else:

1441 self.pwd = None

1442

1443 @property

1444 def comment(self):

1445 """The comment text associated with the ZIP file."""

1446 return self._comment

1447

1448 @comment.setter

1449 def comment(self, comment):

1450 if not isinstance(comment, bytes):

1451 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)

1452 # check for valid comment length

1453 if len(comment) > ZIP_MAX_COMMENT:

1454 import warnings

1455 warnings.warn('Archive comment is too long; truncating to %d bytes'

1456 % ZIP_MAX_COMMENT, stacklevel=2)

1457 comment = comment[:ZIP_MAX_COMMENT]

1458 self._comment = comment

1459 self._didModify = True

1460

1461 def read(self, name, pwd=None):

1462 """Return file bytes for name."""

1463 with self.open(name, "r", pwd) as fp:

1464 return fp.read()

1465

1466 def open(self, name, mode="r", pwd=None, *, force_zip64=False):

1467 """Return file-like object for 'name'.

1468

1469 name is a string for the file name within the ZIP file, or a ZipInfo

1470 object.

1471

1472 mode should be 'r' to read a file already in the ZIP file, or 'w' to

1473 write to a file newly added to the archive.

1474

1475 pwd is the password to decrypt files (only used for reading).

1476

1477 When writing, if the file size is not known in advance but may exceed

1478 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large

1479 files. If the size is known in advance, it is best to pass a ZipInfo

1480 instance for name, with zinfo.file_size set.

1481 """

1482 if mode not in {"r", "w"}:

1483 raise ValueError('open() requires mode "r" or "w"')

1484 if pwd and not isinstance(pwd, bytes):

1485 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)

1486 if pwd and (mode == "w"):

1487 raise ValueError("pwd is only supported for reading files")

1488 if not self.fp:

1489 raise ValueError(

1490 "Attempt to use ZIP archive that was already closed")

1491

1492 # Make sure we have an info object

1493 if isinstance(name, ZipInfo):

1494 # 'name' is already an info object

1495 zinfo = name

1496 elif mode == 'w':

1497 zinfo = ZipInfo(name)

1498 zinfo.compress_type = self.compression

1499 zinfo._compresslevel = self.compresslevel

1500 else:

1501 # Get info object for name

1502 zinfo = self.getinfo(name)

1503

1504 if mode == 'w':

1505 return self._open_to_write(zinfo, force_zip64=force_zip64)

1506

1507 if self._writing:

1508 raise ValueError("Can't read from the ZIP file while there "

1509 "is an open writing handle on it. "

1510 "Close the writing handle before trying to read.")

1511

1512 # Open for reading:

1513 self._fileRefCnt += 1

1514 zef_file = _SharedFile(self.fp, zinfo.header_offset,

1515 self._fpclose, self._lock, lambda: self._writing)

1516 try:

1517 # Skip the file header:

1518 fheader = zef_file.read(sizeFileHeader)

1519 if len(fheader) != sizeFileHeader:

1520 raise BadZipFile("Truncated file header")

1521 fheader = struct.unpack(structFileHeader, fheader)

1522 if fheader[_FH_SIGNATURE] != stringFileHeader:

1523 raise BadZipFile("Bad magic number for file header")

1524

1525 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])

1526 if fheader[_FH_EXTRA_FIELD_LENGTH]:

1527 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])

1528

1529 if zinfo.flag_bits & 0x20:

1530 # Zip 2.7: compressed patched data

1531 raise NotImplementedError("compressed patched data (flag bit 5)")

1532

1533 if zinfo.flag_bits & 0x40:

1534 # strong encryption

1535 raise NotImplementedError("strong encryption (flag bit 6)")

1536

1537 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:

1538 # UTF-8 filename

1539 fname_str = fname.decode("utf-8")

1540 else:

1541 fname_str = fname.decode("cp437")

1542

1543 if fname_str != zinfo.orig_filename:

1544 raise BadZipFile(

1545 'File name in directory %r and header %r differ.'

1546 % (zinfo.orig_filename, fname))

1547

1548 # check for encrypted flag & handle password

1549 is_encrypted = zinfo.flag_bits & 0x1

1550 if is_encrypted:

1551 if not pwd:

1552 pwd = self.pwd

1553 if not pwd:

1554 raise RuntimeError("File %r is encrypted, password "

1555 "required for extraction" % name)

1556 else:

1557 pwd = None

1558

1559 return ZipExtFile(zef_file, mode, zinfo, pwd, True)

1560 except:

1561 zef_file.close()

1562 raise

1563

1564 def _open_to_write(self, zinfo, force_zip64=False):

1565 if force_zip64 and not self._allowZip64:

1566 raise ValueError(

1567 "force_zip64 is True, but allowZip64 was False when opening "

1568 "the ZIP file."

1569 )

1570 if self._writing:

1571 raise ValueError("Can't write to the ZIP file while there is "

1572 "another write handle open on it. "

1573 "Close the first handle before opening another.")

1574

1575 # Size and CRC are overwritten with correct data after processing the file

1576 zinfo.compress_size = 0

1577 zinfo.CRC = 0

1578

1579 zinfo.flag_bits = 0x00

1580 if zinfo.compress_type == ZIP_LZMA:

1581 # Compressed data includes an end-of-stream (EOS) marker

1582 zinfo.flag_bits |= 0x02

1583 if not self._seekable:

1584 zinfo.flag_bits |= 0x08

1585

1586 if not zinfo.external_attr:

1587 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------

1588

1589 # Compressed size can be larger than uncompressed size

1590 zip64 = self._allowZip64 and \

1591 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)

1592

1593 if self._seekable:

1594 self.fp.seek(self.start_dir)

1595 zinfo.header_offset = self.fp.tell()

1596

1597 self._writecheck(zinfo)

1598 self._didModify = True

1599

1600 self.fp.write(zinfo.FileHeader(zip64))

1601

1602 self._writing = True

1603 return _ZipWriteFile(self, zinfo, zip64)

1604

1605 def extract(self, member, path=None, pwd=None):

1606 """Extract a member from the archive to the current working directory,

1607 using its full name. Its file information is extracted as accurately

1608 as possible. `member' may be a filename or a ZipInfo object. You can

1609 specify a different directory using `path'.

1610 """

1611 if path is None:

1612 path = os.getcwd()

1613 else:

1614 path = os.fspath(path)

1615

1616 return self._extract_member(member, path, pwd)

1617

1618 def extractall(self, path=None, members=None, pwd=None):

1619 """Extract all members from the archive to the current working

1620 directory. `path' specifies a different directory to extract to.

1621 `members' is optional and must be a subset of the list returned

1622 by namelist().

1623 """

1624 if members is None:

1625 members = self.namelist()

1626

1627 if path is None:

1628 path = os.getcwd()

1629 else:

1630 path = os.fspath(path)

1631

1632 for zipinfo in members:

1633 self._extract_member(zipinfo, path, pwd)

1634

1635 @classmethod

1636 def _sanitize_windows_name(cls, arcname, pathsep):

1637 """Replace bad characters and remove trailing dots from parts."""

1638 table = cls._windows_illegal_name_trans_table

1639 if not table:

1640 illegal = ':<>|"?*'

1641 table = str.maketrans(illegal, '_' * len(illegal))

1642 cls._windows_illegal_name_trans_table = table

1643 arcname = arcname.translate(table)

1644 # remove trailing dots

1645 arcname = (x.rstrip('.') for x in arcname.split(pathsep))

1646 # rejoin, removing empty parts.

1647 arcname = pathsep.join(x for x in arcname if x)

1648 return arcname

1649

1650 def _extract_member(self, member, targetpath, pwd):

1651 """Extract the ZipInfo object 'member' to a physical

1652 file on the path targetpath.

1653 """

1654 if not isinstance(member, ZipInfo):

1655 member = self.getinfo(member)

1656

1657 # build the destination pathname, replacing

1658 # forward slashes to platform specific separators.

1659 arcname = member.filename.replace('/', os.path.sep)

1660

1661 if os.path.altsep:

1662 arcname = arcname.replace(os.path.altsep, os.path.sep)

1663 # interpret absolute pathname as relative, remove drive letter or

1664 # UNC path, redundant separators, "." and ".." components.

1665 arcname = os.path.splitdrive(arcname)[1]

1666 invalid_path_parts = ('', os.path.curdir, os.path.pardir)

1667 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)

1668 if x not in invalid_path_parts)

1669 if os.path.sep == '\\':

1670 # filter illegal characters on Windows

1671 arcname = self._sanitize_windows_name(arcname, os.path.sep)

1672

1673 targetpath = os.path.join(targetpath, arcname)

1674 targetpath = os.path.normpath(targetpath)

1675

1676 # Create all upper directories if necessary.

1677 upperdirs = os.path.dirname(targetpath)

1678 if upperdirs and not os.path.exists(upperdirs):

1679 os.makedirs(upperdirs)

1680

1681 if member.is_dir():

1682 if not os.path.isdir(targetpath):

1683 os.mkdir(targetpath)

1684 return targetpath

1685

1686 with self.open(member, pwd=pwd) as source, \

1687 open(targetpath, "wb") as target:

1688 shutil.copyfileobj(source, target)

1689

1690 return targetpath

1691

1692 def _writecheck(self, zinfo):

1693 """Check for errors before writing a file to the archive."""

1694 if zinfo.filename in self.NameToInfo:

1695 import warnings

1696 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)

1697 if self.mode not in ('w', 'x', 'a'):

1698 raise ValueError("write() requires mode 'w', 'x', or 'a'")

1699 if not self.fp:

1700 raise ValueError(

1701 "Attempt to write ZIP archive that was already closed")

1702 _check_compression(zinfo.compress_type)

1703 if not self._allowZip64:

1704 requires_zip64 = None

1705 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:

1706 requires_zip64 = "Files count"

1707 elif zinfo.file_size > ZIP64_LIMIT:

1708 requires_zip64 = "Filesize"

1709 elif zinfo.header_offset > ZIP64_LIMIT:

1710 requires_zip64 = "Zipfile size"

1711 if requires_zip64:

1712 raise LargeZipFile(requires_zip64 +

1713 " would require ZIP64 extensions")

1714

1715 def write(self, filename, arcname=None,

1716 compress_type=None, compresslevel=None):

1717 """Put the bytes from filename into the archive under the name

1718 arcname."""

1719 if not self.fp:

1720 raise ValueError(

1721 "Attempt to write to ZIP archive that was already closed")

1722 if self._writing:

1723 raise ValueError(

1724 "Can't write to ZIP archive while an open writing handle exists"

1725 )

1726

1727 zinfo = ZipInfo.from_file(filename, arcname,

1728 strict_timestamps=self._strict_timestamps)

1729

1730 if zinfo.is_dir():

1731 zinfo.compress_size = 0

1732 zinfo.CRC = 0

1733 else:

1734 if compress_type is not None:

1735 zinfo.compress_type = compress_type

1736 else:

1737 zinfo.compress_type = self.compression

1738

1739 if compresslevel is not None:

1740 zinfo._compresslevel = compresslevel

1741 else:

1742 zinfo._compresslevel = self.compresslevel

1743

1744 if zinfo.is_dir():

1745 with self._lock:

1746 if self._seekable:

1747 self.fp.seek(self.start_dir)

1748 zinfo.header_offset = self.fp.tell() # Start of header bytes

1749 if zinfo.compress_type == ZIP_LZMA:

1750 # Compressed data includes an end-of-stream (EOS) marker

1751 zinfo.flag_bits |= 0x02

1752

1753 self._writecheck(zinfo)

1754 self._didModify = True

1755

1756 self.filelist.append(zinfo)

1757 self.NameToInfo[zinfo.filename] = zinfo

1758 self.fp.write(zinfo.FileHeader(False))

1759 self.start_dir = self.fp.tell()

1760 else:

1761 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:

1762 shutil.copyfileobj(src, dest, 1024*8)

1763

1764 def writestr(self, zinfo_or_arcname, data,

1765 compress_type=None, compresslevel=None):

1766 """Write a file into the archive. The contents is 'data', which

1767 may be either a 'str' or a 'bytes' instance; if it is a 'str',

1768 it is encoded as UTF-8 first.

1769 'zinfo_or_arcname' is either a ZipInfo instance or

1770 the name of the file in the archive."""

1771 if isinstance(data, str):

1772 data = data.encode("utf-8")

1773 if not isinstance(zinfo_or_arcname, ZipInfo):

1774 zinfo = ZipInfo(filename=zinfo_or_arcname,

1775 date_time=time.localtime(time.time())[:6])

1776 zinfo.compress_type = self.compression

1777 zinfo._compresslevel = self.compresslevel

1778 if zinfo.filename[-1] == '/':

1779 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x

1780 zinfo.external_attr |= 0x10 # MS-DOS directory flag

1781 else:

1782 zinfo.external_attr = 0o600 << 16 # ?rw-------

1783 else:

1784 zinfo = zinfo_or_arcname

1785

1786 if not self.fp:

1787 raise ValueError(

1788 "Attempt to write to ZIP archive that was already closed")

1789 if self._writing:

1790 raise ValueError(

1791 "Can't write to ZIP archive while an open writing handle exists."

1792 )

1793

1794 if compress_type is not None:

1795 zinfo.compress_type = compress_type

1796

1797 if compresslevel is not None:

1798 zinfo._compresslevel = compresslevel

1799

1800 zinfo.file_size = len(data) # Uncompressed size

1801 with self._lock:

1802 with self.open(zinfo, mode='w') as dest:

1803 dest.write(data)

1804

1805 def __del__(self):

1806 """Call the "close()" method in case the user forgot."""

1807 self.close()

1808

1809 def close(self):

1810 """Close the file, and for mode 'w', 'x' and 'a' write the ending

1811 records."""

1812 if self.fp is None:

1813 return

1814

1815 if self._writing:

1816 raise ValueError("Can't close the ZIP file while there is "

1817 "an open writing handle on it. "

1818 "Close the writing handle before closing the zip.")

1819

1820 try:

1821 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records

1822 with self._lock:

1823 if self._seekable:

1824 self.fp.seek(self.start_dir)

1825 self._write_end_record()

1826 finally:

1827 fp = self.fp

1828 self.fp = None

1829 self._fpclose(fp)

1830

1831 def _write_end_record(self):

1832 for zinfo in self.filelist: # write central directory

1833 dt = zinfo.date_time

1834 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]

1835 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)

1836 extra = []

1837 if zinfo.file_size > ZIP64_LIMIT \

1838 or zinfo.compress_size > ZIP64_LIMIT:

1839 extra.append(zinfo.file_size)

1840 extra.append(zinfo.compress_size)

1841 file_size = 0xffffffff

1842 compress_size = 0xffffffff

1843 else:

1844 file_size = zinfo.file_size

1845 compress_size = zinfo.compress_size

1846

1847 if zinfo.header_offset > ZIP64_LIMIT:

1848 extra.append(zinfo.header_offset)

1849 header_offset = 0xffffffff

1850 else:

1851 header_offset = zinfo.header_offset

1852

1853 extra_data = zinfo.extra

1854 min_version = 0

1855 if extra:

1856 # Append a ZIP64 field to the extra's

1857 extra_data = _strip_extra(extra_data, (1,))

1858 extra_data = struct.pack(

1859 '<HH' + 'Q'*len(extra),

1860 1, 8*len(extra), *extra) + extra_data

1861

1862 min_version = ZIP64_VERSION

1863

1864 if zinfo.compress_type == ZIP_BZIP2:

1865 min_version = max(BZIP2_VERSION, min_version)

1866 elif zinfo.compress_type == ZIP_LZMA:

1867 min_version = max(LZMA_VERSION, min_version)

1868

1869 extract_version = max(min_version, zinfo.extract_version)

1870 create_version = max(min_version, zinfo.create_version)

1871 filename, flag_bits = zinfo._encodeFilenameFlags()

1872 centdir = struct.pack(structCentralDir,

1873 stringCentralDir, create_version,

1874 zinfo.create_system, extract_version, zinfo.reserved,

1875 flag_bits, zinfo.compress_type, dostime, dosdate,

1876 zinfo.CRC, compress_size, file_size,

1877 len(filename), len(extra_data), len(zinfo.comment),

1878 0, zinfo.internal_attr, zinfo.external_attr,

1879 header_offset)

1880 self.fp.write(centdir)

1881 self.fp.write(filename)

1882 self.fp.write(extra_data)

1883 self.fp.write(zinfo.comment)

1884

1885 pos2 = self.fp.tell()

1886 # Write end-of-zip-archive record

1887 centDirCount = len(self.filelist)

1888 centDirSize = pos2 - self.start_dir

1889 centDirOffset = self.start_dir

1890 requires_zip64 = None

1891 if centDirCount > ZIP_FILECOUNT_LIMIT:

1892 requires_zip64 = "Files count"

1893 elif centDirOffset > ZIP64_LIMIT:

1894 requires_zip64 = "Central directory offset"

1895 elif centDirSize > ZIP64_LIMIT:

1896 requires_zip64 = "Central directory size"

1897 if requires_zip64:

1898 # Need to write the ZIP64 end-of-archive records

1899 if not self._allowZip64:

1900 raise LargeZipFile(requires_zip64 +

1901 " would require ZIP64 extensions")

1902 zip64endrec = struct.pack(

1903 structEndArchive64, stringEndArchive64,

1904 44, 45, 45, 0, 0, centDirCount, centDirCount,

1905 centDirSize, centDirOffset)

1906 self.fp.write(zip64endrec)

1907

1908 zip64locrec = struct.pack(

1909 structEndArchive64Locator,

1910 stringEndArchive64Locator, 0, pos2, 1)

1911 self.fp.write(zip64locrec)

1912 centDirCount = min(centDirCount, 0xFFFF)

1913 centDirSize = min(centDirSize, 0xFFFFFFFF)

1914 centDirOffset = min(centDirOffset, 0xFFFFFFFF)

1915

1916 endrec = struct.pack(structEndArchive, stringEndArchive,

1917 0, 0, centDirCount, centDirCount,

1918 centDirSize, centDirOffset, len(self._comment))

1919 self.fp.write(endrec)

1920 self.fp.write(self._comment)

1921 if self.mode == "a":

1922 self.fp.truncate()

1923 self.fp.flush()

1924

1925 def _fpclose(self, fp):

1926 assert self._fileRefCnt > 0

1927 self._fileRefCnt -= 1

1928 if not self._fileRefCnt and not self._filePassed:

1929 fp.close()

1930

1931

1932class PyZipFile(ZipFile):

1933 """Class to create ZIP archives with Python library files and packages."""

1934

1935 def __init__(self, file, mode="r", compression=ZIP_STORED,

1936 allowZip64=True, optimize=-1):

1937 ZipFile.__init__(self, file, mode=mode, compression=compression,

1938 allowZip64=allowZip64)

1939 self._optimize = optimize

1940

1941 def writepy(self, pathname, basename="", filterfunc=None):

1942 """Add all files from "pathname" to the ZIP archive.

1943

1944 If pathname is a package directory, search the directory and

1945 all package subdirectories recursively for all *.py and enter

1946 the modules into the archive. If pathname is a plain

1947 directory, listdir *.py and enter all modules. Else, pathname

1948 must be a Python *.py file and the module will be put into the

1949 archive. Added modules are always module.pyc.

1950 This method will compile the module.py into module.pyc if

1951 necessary.

1952 If filterfunc(pathname) is given, it is called with every argument.

1953 When it is False, the file or directory is skipped.

1954 """

1955 pathname = os.fspath(pathname)

1956 if filterfunc and not filterfunc(pathname):

1957 if self.debug:

1958 label = 'path' if os.path.isdir(pathname) else 'file'

1959 print('%s %r skipped by filterfunc' % (label, pathname))

1960 return

1961 dir, name = os.path.split(pathname)

1962 if os.path.isdir(pathname):

1963 initname = os.path.join(pathname, "__init__.py")

1964 if os.path.isfile(initname):

1965 # This is a package directory, add it

1966 if basename:

1967 basename = "%s/%s" % (basename, name)

1968 else:

1969 basename = name

1970 if self.debug:

1971 print("Adding package in", pathname, "as", basename)

1972 fname, arcname = self._get_codename(initname[0:-3], basename)

1973 if self.debug:

1974 print("Adding", arcname)

1975 self.write(fname, arcname)

1976 dirlist = sorted(os.listdir(pathname))

1977 dirlist.remove("__init__.py")

1978 # Add all *.py files and package subdirectories

1979 for filename in dirlist:

1980 path = os.path.join(pathname, filename)

1981 root, ext = os.path.splitext(filename)

1982 if os.path.isdir(path):

1983 if os.path.isfile(os.path.join(path, "__init__.py")):

1984 # This is a package directory, add it

1985 self.writepy(path, basename,

1986 filterfunc=filterfunc) # Recursive call

1987 elif ext == ".py":

1988 if filterfunc and not filterfunc(path):

1989 if self.debug:

1990 print('file %r skipped by filterfunc' % path)

1991 continue

1992 fname, arcname = self._get_codename(path[0:-3],

1993 basename)

1994 if self.debug:

1995 print("Adding", arcname)

1996 self.write(fname, arcname)

1997 else:

1998 # This is NOT a package directory, add its files at top level

1999 if self.debug:

2000 print("Adding files from directory", pathname)

2001 for filename in sorted(os.listdir(pathname)):

2002 path = os.path.join(pathname, filename)

2003 root, ext = os.path.splitext(filename)

2004 if ext == ".py":

2005 if filterfunc and not filterfunc(path):

2006 if self.debug:

2007 print('file %r skipped by filterfunc' % path)

2008 continue

2009 fname, arcname = self._get_codename(path[0:-3],

2010 basename)

2011 if self.debug:

2012 print("Adding", arcname)

2013 self.write(fname, arcname)

2014 else:

2015 if pathname[-3:] != ".py":

2016 raise RuntimeError(

2017 'Files added with writepy() must end with ".py"')

2018 fname, arcname = self._get_codename(pathname[0:-3], basename)

2019 if self.debug:

2020 print("Adding file", arcname)

2021 self.write(fname, arcname)

2022

2023 def _get_codename(self, pathname, basename):

2024 """Return (filename, archivename) for the path.

2025

2026 Given a module name path, return the correct file path and

2027 archive name, compiling if necessary. For example, given

2028 /python/lib/string, return (/python/lib/string.pyc, string).

2029 """

2030 def _compile(file, optimize=-1):

2031 import py_compile

2032 if self.debug:

2033 print("Compiling", file)

2034 try:

2035 py_compile.compile(file, doraise=True, optimize=optimize)

2036 except py_compile.PyCompileError as err:

2037 print(err.msg)

2038 return False

2039 return True

2040

2041 file_py = pathname + ".py"

2042 file_pyc = pathname + ".pyc"

2043 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')

2044 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)

2045 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)

2046 if self._optimize == -1:

2047 # legacy mode: use whatever file is present

2048 if (os.path.isfile(file_pyc) and

2049 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):

2050 # Use .pyc file.

2051 arcname = fname = file_pyc

2052 elif (os.path.isfile(pycache_opt0) and

2053 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):

2054 # Use the __pycache__/*.pyc file, but write it to the legacy pyc

2055 # file name in the archive.

2056 fname = pycache_opt0

2057 arcname = file_pyc

2058 elif (os.path.isfile(pycache_opt1) and

2059 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):

2060 # Use the __pycache__/*.pyc file, but write it to the legacy pyc

2061 # file name in the archive.

2062 fname = pycache_opt1

2063 arcname = file_pyc

2064 elif (os.path.isfile(pycache_opt2) and

2065 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):

2066 # Use the __pycache__/*.pyc file, but write it to the legacy pyc

2067 # file name in the archive.

2068 fname = pycache_opt2

2069 arcname = file_pyc

2070 else:

2071 # Compile py into PEP 3147 pyc file.

2072 if _compile(file_py):

2073 if sys.flags.optimize == 0:

2074 fname = pycache_opt0

2075 elif sys.flags.optimize == 1:

2076 fname = pycache_opt1

2077 else:

2078 fname = pycache_opt2

2079 arcname = file_pyc

2080 else:

2081 fname = arcname = file_py

2082 else:

2083 # new mode: use given optimization level

2084 if self._optimize == 0:

2085 fname = pycache_opt0

2086 arcname = file_pyc

2087 else:

2088 arcname = file_pyc

2089 if self._optimize == 1:

2090 fname = pycache_opt1

2091 elif self._optimize == 2:

2092 fname = pycache_opt2

2093 else:

2094 msg = "invalid value for 'optimize': {!r}".format(self._optimize)

2095 raise ValueError(msg)

2096 if not (os.path.isfile(fname) and

2097 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):

2098 if not _compile(file_py, optimize=self._optimize):

2099 fname = arcname = file_py

2100 archivename = os.path.split(arcname)[1]

2101 if basename:

2102 archivename = "%s/%s" % (basename, archivename)

2103 return (fname, archivename)

2104

2105

2106def _parents(path):

2107 """

2108 Given a path with elements separated by

2109 posixpath.sep, generate all parents of that path.

2110

2111 >>> list(_parents('b/d'))

2112 ['b']

2113 >>> list(_parents('/b/d/'))

2114 ['/b']

2115 >>> list(_parents('b/d/f/'))

2116 ['b/d', 'b']

2117 >>> list(_parents('b'))

2118 []

2119 >>> list(_parents(''))

2120 []

2121 """

2122 return itertools.islice(_ancestry(path), 1, None)

2123

2124

2125def _ancestry(path):

2126 """

2127 Given a path with elements separated by

2128 posixpath.sep, generate all elements of that path

2129

2130 >>> list(_ancestry('b/d'))

2131 ['b/d', 'b']

2132 >>> list(_ancestry('/b/d/'))

2133 ['/b/d', '/b']

2134 >>> list(_ancestry('b/d/f/'))

2135 ['b/d/f', 'b/d', 'b']

2136 >>> list(_ancestry('b'))

2137 ['b']

2138 >>> list(_ancestry(''))

2139 []

2140 """

2141 path = path.rstrip(posixpath.sep)

2142 while path and path != posixpath.sep:

2143 yield path

2144 path, tail = posixpath.split(path)

2145

2146

2147_dedupe = dict.fromkeys

2148"""Deduplicate an iterable in original order"""

2149

2150

2151def _difference(minuend, subtrahend):

2152 """

2153 Return items in minuend not in subtrahend, retaining order

2154 with O(1) lookup.

2155 """

2156 return itertools.filterfalse(set(subtrahend).__contains__, minuend)

2157

2158

2159class CompleteDirs(ZipFile):

2160 """

2161 A ZipFile subclass that ensures that implied directories

2162 are always included in the namelist.

2163 """

2164

2165 @staticmethod

2166 def _implied_dirs(names):

2167 parents = itertools.chain.from_iterable(map(_parents, names))

2168 as_dirs = (p + posixpath.sep for p in parents)

2169 return _dedupe(_difference(as_dirs, names))

2170

2171 def namelist(self):

2172 names = super(CompleteDirs, self).namelist()

2173 return names + list(self._implied_dirs(names))

2174

2175 def _name_set(self):

2176 return set(self.namelist())

2177

2178 def resolve_dir(self, name):

2179 """

2180 If the name represents a directory, return that name

2181 as a directory (with the trailing slash).

2182 """

2183 names = self._name_set()

2184 dirname = name + '/'

2185 dir_match = name not in names and dirname in names

2186 return dirname if dir_match else name

2187

2188 @classmethod

2189 def make(cls, source):

2190 """

2191 Given a source (filename or zipfile), return an

2192 appropriate CompleteDirs subclass.

2193 """

2194 if isinstance(source, CompleteDirs):

2195 return source

2196

2197 if not isinstance(source, ZipFile):

2198 return cls(source)

2199

2200 # Only allow for FastPath when supplied zipfile is read-only

2201 if 'r' not in source.mode:

2202 cls = CompleteDirs

2203

2204 res = cls.__new__(cls)

2205 vars(res).update(vars(source))

2206 return res

2207

2208

2209class FastLookup(CompleteDirs):

2210 """

2211 ZipFile subclass to ensure implicit

2212 dirs exist and are resolved rapidly.

2213 """

2214 def namelist(self):

2215 with contextlib.suppress(AttributeError):

2216 return self.__names

2217 self.__names = super(FastLookup, self).namelist()

2218 return self.__names

2219

2220 def _name_set(self):

2221 with contextlib.suppress(AttributeError):

2222 return self.__lookup

2223 self.__lookup = super(FastLookup, self)._name_set()

2224 return self.__lookup

2225

2226

2227class Path:

2228 """

2229 A pathlib-compatible interface for zip files.

2230

2231 Consider a zip file with this structure::

2232

2233 .

2234 ├── a.txt

2235 └── b

2236 ├── c.txt

2237 └── d

2238 └── e.txt

2239

2240 >>> data = io.BytesIO()

2241 >>> zf = ZipFile(data, 'w')

2242 >>> zf.writestr('a.txt', 'content of a')

2243 >>> zf.writestr('b/c.txt', 'content of c')

2244 >>> zf.writestr('b/d/e.txt', 'content of e')

2245 >>> zf.filename = 'abcde.zip'

2246

2247 Path accepts the zipfile object itself or a filename

2248

2249 >>> root = Path(zf)

2250

2251 From there, several path operations are available.

2252

2253 Directory iteration (including the zip file itself):

2254

2255 >>> a, b = root.iterdir()

2256 >>> a

2257 Path('abcde.zip', 'a.txt')

2258 >>> b

2259 Path('abcde.zip', 'b/')

2260

2261 name property:

2262

2263 >>> b.name

2264 'b'

2265

2266 join with divide operator:

2267

2268 >>> c = b / 'c.txt'

2269 >>> c

2270 Path('abcde.zip', 'b/c.txt')

2271 >>> c.name

2272 'c.txt'

2273

2274 Read text:

2275

2276 >>> c.read_text()

2277 'content of c'

2278

2279 existence:

2280

2281 >>> c.exists()

2282 True

2283 >>> (b / 'missing.txt').exists()

2284 False

2285

2286 Coercion to string:

2287

2288 >>> str(c)

2289 'abcde.zip/b/c.txt'

2290 """

2291

2292 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"

2293

2294 def __init__(self, root, at=""):

2295 self.root = FastLookup.make(root)

2296 self.at = at

2297

2298 def open(self, mode='r', *args, **kwargs):

2299 """

2300 Open this entry as text or binary following the semantics

2301 of ``pathlib.Path.open()`` by passing arguments through

2302 to io.TextIOWrapper().

2303 """

2304 pwd = kwargs.pop('pwd', None)

2305 zip_mode = mode[0]

2306 stream = self.root.open(self.at, zip_mode, pwd=pwd)

2307 if 'b' in mode:

2308 if args or kwargs:

2309 raise ValueError("encoding args invalid for binary operation")

2310 return stream

2311 return io.TextIOWrapper(stream, *args, **kwargs)

2312

2313 @property

2314 def name(self):

2315 return posixpath.basename(self.at.rstrip("/"))

2316

2317 def read_text(self, *args, **kwargs):

2318 with self.open('r', *args, **kwargs) as strm:

2319 return strm.read()

2320

2321 def read_bytes(self):

2322 with self.open('rb') as strm:

2323 return strm.read()

2324

2325 def _is_child(self, path):

2326 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")

2327

2328 def _next(self, at):

2329 return Path(self.root, at)

2330

2331 def is_dir(self):

2332 return not self.at or self.at.endswith("/")

2333

2334 def is_file(self):

2335 return not self.is_dir()

2336

2337 def exists(self):

2338 return self.at in self.root._name_set()

2339

2340 def iterdir(self):

2341 if not self.is_dir():

2342 raise ValueError("Can't listdir a file")

2343 subs = map(self._next, self.root.namelist())

2344 return filter(self._is_child, subs)

2345

2346 def __str__(self):

2347 return posixpath.join(self.root.filename, self.at)

2348

2349 def __repr__(self):

2350 return self.__repr.format(self=self)

2351

2352 def joinpath(self, add):

2353 next = posixpath.join(self.at, add)

2354 return self._next(self.root.resolve_dir(next))

2355

2356 __truediv__ = joinpath

2357

2358 @property

2359 def parent(self):

2360 parent_at = posixpath.dirname(self.at.rstrip('/'))

2361 if parent_at:

2362 parent_at += '/'

2363 return self._next(parent_at)

2364

2365

2366def main(args=None):

2367 import argparse

2368

2369 description = 'A simple command-line interface for zipfile module.'

2370 parser = argparse.ArgumentParser(description=description)

2371 group = parser.add_mutually_exclusive_group(required=True)

2372 group.add_argument('-l', '--list', metavar='<zipfile>',

2373 help='Show listing of a zipfile')

2374 group.add_argument('-e', '--extract', nargs=2,

2375 metavar=('<zipfile>', '<output_dir>'),

2376 help='Extract zipfile into target dir')

2377 group.add_argument('-c', '--create', nargs='+',

2378 metavar=('<name>', '<file>'),

2379 help='Create zipfile from sources')

2380 group.add_argument('-t', '--test', metavar='<zipfile>',

2381 help='Test if a zipfile is valid')

2382 args = parser.parse_args(args)

2383

2384 if args.test is not None:

2385 src = args.test

2386 with ZipFile(src, 'r') as zf:

2387 badfile = zf.testzip()

2388 if badfile:

2389 print("The following enclosed file is corrupted: {!r}".format(badfile))

2390 print("Done testing")

2391

2392 elif args.list is not None:

2393 src = args.list

2394 with ZipFile(src, 'r') as zf:

2395 zf.printdir()

2396

2397 elif args.extract is not None:

2398 src, curdir = args.extract

2399 with ZipFile(src, 'r') as zf:

2400 zf.extractall(curdir)

2401

2402 elif args.create is not None:

2403 zip_name = args.create.pop(0)

2404 files = args.create

2405

2406 def addToZip(zf, path, zippath):

2407 if os.path.isfile(path):

2408 zf.write(path, zippath, ZIP_DEFLATED)

2409 elif os.path.isdir(path):

2410 if zippath:

2411 zf.write(path, zippath)

2412 for nm in sorted(os.listdir(path)):

2413 addToZip(zf,

2414 os.path.join(path, nm), os.path.join(zippath, nm))

2415 # else: ignore

2416

2417 with ZipFile(zip_name, 'w') as zf:

2418 for path in files:

2419 zippath = os.path.basename(path)

2420 if not zippath:

2421 zippath = os.path.basename(os.path.dirname(path))

2422 if zippath in ('', os.curdir, os.pardir):

2423 zippath = ''

2424 addToZip(zf, path, zippath)

2425

2426

2427if __name__ == "__main__":

2428 main()