Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/zipfile.py: 8%

1"""

2Read and write ZIP files.

4XXX references to utf-8 need further investigation.

5"""

6import binascii

7import functools

8import importlib.util

9import io

10import itertools

11import os

12import posixpath

13import shutil

14import stat

15import struct

16import sys

17import threading

18import time

19import contextlib

21try:

22 import zlib # We may need its compression method

23 crc32 = zlib.crc32

24except ImportError:

25 zlib = None

26 crc32 = binascii.crc32

28try:

29 import bz2 # We may need its compression method

30except ImportError:

31 bz2 = None

33try:

34 import lzma # We may need its compression method

35except ImportError:

36 lzma = None

38__all__ = ["BadZipFile", "BadZipfile", "error",

39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",

40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]

42class BadZipFile(Exception):

43 pass

46class LargeZipFile(Exception):

47 """

48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions

49 and those extensions are disabled.

50 """

52error = BadZipfile = BadZipFile # Pre-3.2 compatibility names

55ZIP64_LIMIT = (1 << 31) - 1

56ZIP_FILECOUNT_LIMIT = (1 << 16) - 1

57ZIP_MAX_COMMENT = (1 << 16) - 1

59# constants for Zip file compression methods

60ZIP_STORED = 0

61ZIP_DEFLATED = 8

62ZIP_BZIP2 = 12

63ZIP_LZMA = 14

64# Other ZIP compression methods not supported

66DEFAULT_VERSION = 20

67ZIP64_VERSION = 45

68BZIP2_VERSION = 46

69LZMA_VERSION = 63

70# we recognize (but not necessarily support) all features up to that version

71MAX_EXTRACT_VERSION = 63

73# Below are some formats and associated data for reading/writing headers using

74# the struct module. The names and structures of headers/records are those used

75# in the PKWARE description of the ZIP file format:

76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT

77# (URL valid as of January 2008)

79# The "end of central directory" structure, magic number, size, and indices

80# (section V.I in the format document)

81structEndArchive = b"<4s4H2LH"

82stringEndArchive = b"PK\005\006"

83sizeEndCentDir = struct.calcsize(structEndArchive)

85_ECD_SIGNATURE = 0

86_ECD_DISK_NUMBER = 1

87_ECD_DISK_START = 2

88_ECD_ENTRIES_THIS_DISK = 3

89_ECD_ENTRIES_TOTAL = 4

90_ECD_SIZE = 5

91_ECD_OFFSET = 6

92_ECD_COMMENT_SIZE = 7

93# These last two indices are not part of the structure as defined in the

94# spec, but they are used internally by this module as a convenience

95_ECD_COMMENT = 8

96_ECD_LOCATION = 9

98# The "central directory" structure, magic number, size, and indices

99# of entries in the structure (section V.F in the format document)

100structCentralDir = "<4s4B4HL2L5H2L"

101stringCentralDir = b"PK\001\002"

102sizeCentralDir = struct.calcsize(structCentralDir)

103

104# indexes of entries in the central directory structure

105_CD_SIGNATURE = 0

106_CD_CREATE_VERSION = 1

107_CD_CREATE_SYSTEM = 2

108_CD_EXTRACT_VERSION = 3

109_CD_EXTRACT_SYSTEM = 4

110_CD_FLAG_BITS = 5

111_CD_COMPRESS_TYPE = 6

112_CD_TIME = 7

113_CD_DATE = 8

114_CD_CRC = 9

115_CD_COMPRESSED_SIZE = 10

116_CD_UNCOMPRESSED_SIZE = 11

117_CD_FILENAME_LENGTH = 12

118_CD_EXTRA_FIELD_LENGTH = 13

119_CD_COMMENT_LENGTH = 14

120_CD_DISK_NUMBER_START = 15

121_CD_INTERNAL_FILE_ATTRIBUTES = 16

122_CD_EXTERNAL_FILE_ATTRIBUTES = 17

123_CD_LOCAL_HEADER_OFFSET = 18

124

125# The "local file header" structure, magic number, size, and indices

126# (section V.A in the format document)

127structFileHeader = "<4s2B4HL2L2H"

128stringFileHeader = b"PK\003\004"

129sizeFileHeader = struct.calcsize(structFileHeader)

130

131_FH_SIGNATURE = 0

132_FH_EXTRACT_VERSION = 1

133_FH_EXTRACT_SYSTEM = 2

134_FH_GENERAL_PURPOSE_FLAG_BITS = 3

135_FH_COMPRESSION_METHOD = 4

136_FH_LAST_MOD_TIME = 5

137_FH_LAST_MOD_DATE = 6

138_FH_CRC = 7

139_FH_COMPRESSED_SIZE = 8

140_FH_UNCOMPRESSED_SIZE = 9

141_FH_FILENAME_LENGTH = 10

142_FH_EXTRA_FIELD_LENGTH = 11

143

144# The "Zip64 end of central directory locator" structure, magic number, and size

145structEndArchive64Locator = "<4sLQL"

146stringEndArchive64Locator = b"PK\x06\x07"

147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)

148

149# The "Zip64 end of central directory" record, magic number, size, and indices

150# (section V.G in the format document)

151structEndArchive64 = "<4sQ2H2L4Q"

152stringEndArchive64 = b"PK\x06\x06"

153sizeEndCentDir64 = struct.calcsize(structEndArchive64)

154

155_CD64_SIGNATURE = 0

156_CD64_DIRECTORY_RECSIZE = 1

157_CD64_CREATE_VERSION = 2

158_CD64_EXTRACT_VERSION = 3

159_CD64_DISK_NUMBER = 4

160_CD64_DISK_NUMBER_START = 5

161_CD64_NUMBER_ENTRIES_THIS_DISK = 6

162_CD64_NUMBER_ENTRIES_TOTAL = 7

163_CD64_DIRECTORY_SIZE = 8

164_CD64_OFFSET_START_CENTDIR = 9

165

166_DD_SIGNATURE = 0x08074b50

167

168_EXTRA_FIELD_STRUCT = struct.Struct('<HH')

169

170def _strip_extra(extra, xids):

171 # Remove Extra Fields with specified IDs.

172 unpack = _EXTRA_FIELD_STRUCT.unpack

173 modified = False

174 buffer = []

175 start = i = 0

176 while i + 4 <= len(extra):

177 xid, xlen = unpack(extra[i : i + 4])

178 j = i + 4 + xlen

179 if xid in xids:

180 if i != start:

181 buffer.append(extra[start : i])

182 start = j

183 modified = True

184 i = j

185 if not modified:

186 return extra

187 return b''.join(buffer)

188

189def _check_zipfile(fp):

190 try:

191 if _EndRecData(fp):

192 return True # file has correct magic number

193 except OSError:

194 pass

195 return False

196

197def is_zipfile(filename):

198 """Quickly see if a file is a ZIP file by checking the magic number.

199

200 The filename argument may be a file or file-like object too.

201 """

202 result = False

203 try:

204 if hasattr(filename, "read"):

205 result = _check_zipfile(fp=filename)

206 else:

207 with open(filename, "rb") as fp:

208 result = _check_zipfile(fp)

209 except OSError:

210 pass

211 return result

212

213def _EndRecData64(fpin, offset, endrec):

214 """

215 Read the ZIP64 end-of-archive records and use that to update endrec

216 """

217 try:

218 fpin.seek(offset - sizeEndCentDir64Locator, 2)

219 except OSError:

220 # If the seek fails, the file is not large enough to contain a ZIP64

221 # end-of-archive record, so just return the end record we were given.

222 return endrec

223

224 data = fpin.read(sizeEndCentDir64Locator)

225 if len(data) != sizeEndCentDir64Locator:

226 return endrec

227 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)

228 if sig != stringEndArchive64Locator:

229 return endrec

230

231 if diskno != 0 or disks > 1:

232 raise BadZipFile("zipfiles that span multiple disks are not supported")

233

234 # Assume no 'zip64 extensible data'

235 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)

236 data = fpin.read(sizeEndCentDir64)

237 if len(data) != sizeEndCentDir64:

238 return endrec

239 sig, sz, create_version, read_version, disk_num, disk_dir, \

240 dircount, dircount2, dirsize, diroffset = \

241 struct.unpack(structEndArchive64, data)

242 if sig != stringEndArchive64:

243 return endrec

244

245 # Update the original endrec using data from the ZIP64 record

246 endrec[_ECD_SIGNATURE] = sig

247 endrec[_ECD_DISK_NUMBER] = disk_num

248 endrec[_ECD_DISK_START] = disk_dir

249 endrec[_ECD_ENTRIES_THIS_DISK] = dircount

250 endrec[_ECD_ENTRIES_TOTAL] = dircount2

251 endrec[_ECD_SIZE] = dirsize

252 endrec[_ECD_OFFSET] = diroffset

253 return endrec

254

255

256def _EndRecData(fpin):

257 """Return data from the "End of Central Directory" record, or None.

258

259 The data is a list of the nine items in the ZIP "End of central dir"

260 record followed by a tenth item, the file seek offset of this record."""

261

262 # Determine file size

263 fpin.seek(0, 2)

264 filesize = fpin.tell()

265

266 # Check to see if this is ZIP file with no archive comment (the

267 # "end of central directory" structure should be the last item in the

268 # file if this is the case).

269 try:

270 fpin.seek(-sizeEndCentDir, 2)

271 except OSError:

272 return None

273 data = fpin.read()

274 if (len(data) == sizeEndCentDir and

275 data[0:4] == stringEndArchive and

276 data[-2:] == b"\000\000"):

277 # the signature is correct and there's no comment, unpack structure

278 endrec = struct.unpack(structEndArchive, data)

279 endrec=list(endrec)

280

281 # Append a blank comment and record start offset

282 endrec.append(b"")

283 endrec.append(filesize - sizeEndCentDir)

284

285 # Try to read the "Zip64 end of central directory" structure

286 return _EndRecData64(fpin, -sizeEndCentDir, endrec)

287

288 # Either this is not a ZIP file, or it is a ZIP file with an archive

289 # comment. Search the end of the file for the "end of central directory"

290 # record signature. The comment is the last item in the ZIP file and may be

291 # up to 64K long. It is assumed that the "end of central directory" magic

292 # number does not appear in the comment.

293 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)

294 fpin.seek(maxCommentStart, 0)

295 data = fpin.read()

296 start = data.rfind(stringEndArchive)

297 if start >= 0:

298 # found the magic number; attempt to unpack and interpret

299 recData = data[start:start+sizeEndCentDir]

300 if len(recData) != sizeEndCentDir:

301 # Zip file is corrupted.

302 return None

303 endrec = list(struct.unpack(structEndArchive, recData))

304 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file

305 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]

306 endrec.append(comment)

307 endrec.append(maxCommentStart + start)

308

309 # Try to read the "Zip64 end of central directory" structure

310 return _EndRecData64(fpin, maxCommentStart + start - filesize,

311 endrec)

312

313 # Unable to find a valid end of central directory structure

314 return None

315

316

317class ZipInfo (object):

318 """Class with attributes describing each file in the ZIP archive."""

319

320 __slots__ = (

321 'orig_filename',

322 'filename',

323 'date_time',

324 'compress_type',

325 '_compresslevel',

326 'comment',

327 'extra',

328 'create_system',

329 'create_version',

330 'extract_version',

331 'reserved',

332 'flag_bits',

333 'volume',

334 'internal_attr',

335 'external_attr',

336 'header_offset',

337 'CRC',

338 'compress_size',

339 'file_size',

340 '_raw_time',

341 )

342

343 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):

344 self.orig_filename = filename # Original file name in archive

345

346 # Terminate the file name at the first null byte. Null bytes in file

347 # names are used as tricks by viruses in archives.

348 null_byte = filename.find(chr(0))

349 if null_byte >= 0:

350 filename = filename[0:null_byte]

351 # This is used to ensure paths in generated ZIP files always use

352 # forward slashes as the directory separator, as required by the

353 # ZIP format specification.

354 if os.sep != "/" and os.sep in filename:

355 filename = filename.replace(os.sep, "/")

356

357 self.filename = filename # Normalized file name

358 self.date_time = date_time # year, month, day, hour, min, sec

359

360 if date_time[0] < 1980:

361 raise ValueError('ZIP does not support timestamps before 1980')

362

363 # Standard values:

364 self.compress_type = ZIP_STORED # Type of compression for the file

365 self._compresslevel = None # Level for the compressor

366 self.comment = b"" # Comment for each file

367 self.extra = b"" # ZIP extra data

368 if sys.platform == 'win32':

369 self.create_system = 0 # System which created ZIP archive

370 else:

371 # Assume everything else is unix-y

372 self.create_system = 3 # System which created ZIP archive

373 self.create_version = DEFAULT_VERSION # Version which created ZIP archive

374 self.extract_version = DEFAULT_VERSION # Version needed to extract archive

375 self.reserved = 0 # Must be zero

376 self.flag_bits = 0 # ZIP flag bits

377 self.volume = 0 # Volume number of file header

378 self.internal_attr = 0 # Internal attributes

379 self.external_attr = 0 # External file attributes

380 # Other attributes are set by class ZipFile:

381 # header_offset Byte offset to the file header

382 # CRC CRC-32 of the uncompressed file

383 # compress_size Size of the compressed file

384 # file_size Size of the uncompressed file

385

386 def __repr__(self):

387 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]

388 if self.compress_type != ZIP_STORED:

389 result.append(' compress_type=%s' %

390 compressor_names.get(self.compress_type,

391 self.compress_type))

392 hi = self.external_attr >> 16

393 lo = self.external_attr & 0xFFFF

394 if hi:

395 result.append(' filemode=%r' % stat.filemode(hi))

396 if lo:

397 result.append(' external_attr=%#x' % lo)

398 isdir = self.is_dir()

399 if not isdir or self.file_size:

400 result.append(' file_size=%r' % self.file_size)

401 if ((not isdir or self.compress_size) and

402 (self.compress_type != ZIP_STORED or

403 self.file_size != self.compress_size)):

404 result.append(' compress_size=%r' % self.compress_size)

405 result.append('>')

406 return ''.join(result)

407

408 def FileHeader(self, zip64=None):

409 """Return the per-file header as a bytes object."""

410 dt = self.date_time

411 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]

412 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)

413 if self.flag_bits & 0x08:

414 # Set these to zero because we write them after the file data

415 CRC = compress_size = file_size = 0

416 else:

417 CRC = self.CRC

418 compress_size = self.compress_size

419 file_size = self.file_size

420

421 extra = self.extra

422

423 min_version = 0

424 if zip64 is None:

425 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT

426 if zip64:

427 fmt = '<HHQQ'

428 extra = extra + struct.pack(fmt,

429 1, struct.calcsize(fmt)-4, file_size, compress_size)

430 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:

431 if not zip64:

432 raise LargeZipFile("Filesize would require ZIP64 extensions")

433 # File is larger than what fits into a 4 byte integer,

434 # fall back to the ZIP64 extension

435 file_size = 0xffffffff

436 compress_size = 0xffffffff

437 min_version = ZIP64_VERSION

438

439 if self.compress_type == ZIP_BZIP2:

440 min_version = max(BZIP2_VERSION, min_version)

441 elif self.compress_type == ZIP_LZMA:

442 min_version = max(LZMA_VERSION, min_version)

443

444 self.extract_version = max(min_version, self.extract_version)

445 self.create_version = max(min_version, self.create_version)

446 filename, flag_bits = self._encodeFilenameFlags()

447 header = struct.pack(structFileHeader, stringFileHeader,

448 self.extract_version, self.reserved, flag_bits,

449 self.compress_type, dostime, dosdate, CRC,

450 compress_size, file_size,

451 len(filename), len(extra))

452 return header + filename + extra

453

454 def _encodeFilenameFlags(self):

455 try:

456 return self.filename.encode('ascii'), self.flag_bits

457 except UnicodeEncodeError:

458 return self.filename.encode('utf-8'), self.flag_bits | 0x800

459

460 def _decodeExtra(self):

461 # Try to decode the extra field.

462 extra = self.extra

463 unpack = struct.unpack

464 while len(extra) >= 4:

465 tp, ln = unpack('<HH', extra[:4])

466 if ln+4 > len(extra):

467 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))

468 if tp == 0x0001:

469 if ln >= 24:

470 counts = unpack('<QQQ', extra[4:28])

471 elif ln == 16:

472 counts = unpack('<QQ', extra[4:20])

473 elif ln == 8:

474 counts = unpack('<Q', extra[4:12])

475 elif ln == 0:

476 counts = ()

477 else:

478 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))

479

480 idx = 0

481

482 # ZIP64 extension (large files and/or large archives)

483 if self.file_size in (0xffffffffffffffff, 0xffffffff):

484 if len(counts) <= idx:

485 raise BadZipFile(

486 "Corrupt zip64 extra field. File size not found."

487 )

488 self.file_size = counts[idx]

489 idx += 1

490

491 if self.compress_size == 0xFFFFFFFF:

492 if len(counts) <= idx:

493 raise BadZipFile(

494 "Corrupt zip64 extra field. Compress size not found."

495 )

496 self.compress_size = counts[idx]

497 idx += 1

498

499 if self.header_offset == 0xffffffff:

500 if len(counts) <= idx:

501 raise BadZipFile(

502 "Corrupt zip64 extra field. Header offset not found."

503 )

504 old = self.header_offset

505 self.header_offset = counts[idx]

506 idx+=1

507

508 extra = extra[ln+4:]

509

510 @classmethod

511 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):

512 """Construct an appropriate ZipInfo for a file on the filesystem.

513

514 filename should be the path to a file or directory on the filesystem.

515

516 arcname is the name which it will have within the archive (by default,

517 this will be the same as filename, but without a drive letter and with

518 leading path separators removed).

519 """

520 if isinstance(filename, os.PathLike):

521 filename = os.fspath(filename)

522 st = os.stat(filename)

523 isdir = stat.S_ISDIR(st.st_mode)

524 mtime = time.localtime(st.st_mtime)

525 date_time = mtime[0:6]

526 if not strict_timestamps and date_time[0] < 1980:

527 date_time = (1980, 1, 1, 0, 0, 0)

528 elif not strict_timestamps and date_time[0] > 2107:

529 date_time = (2107, 12, 31, 23, 59, 59)

530 # Create ZipInfo instance to store file information

531 if arcname is None:

532 arcname = filename

533 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])

534 while arcname[0] in (os.sep, os.altsep):

535 arcname = arcname[1:]

536 if isdir:

537 arcname += '/'

538 zinfo = cls(arcname, date_time)

539 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes

540 if isdir:

541 zinfo.file_size = 0

542 zinfo.external_attr |= 0x10 # MS-DOS directory flag

543 else:

544 zinfo.file_size = st.st_size

545

546 return zinfo

547

548 def is_dir(self):

549 """Return True if this archive member is a directory."""

550 return self.filename[-1] == '/'

551

552

553# ZIP encryption uses the CRC32 one-byte primitive for scrambling some

554# internal keys. We noticed that a direct implementation is faster than

555# relying on binascii.crc32().

556

557_crctable = None

558def _gen_crc(crc):

559 for j in range(8):

560 if crc & 1:

561 crc = (crc >> 1) ^ 0xEDB88320

562 else:

563 crc >>= 1

564 return crc

565

566# ZIP supports a password-based form of encryption. Even though known

567# plaintext attacks have been found against it, it is still useful

568# to be able to get data out of such a file.

569#

570# Usage:

571# zd = _ZipDecrypter(mypwd)

572# plain_bytes = zd(cypher_bytes)

573

574def _ZipDecrypter(pwd):

575 key0 = 305419896

576 key1 = 591751049

577 key2 = 878082192

578

579 global _crctable

580 if _crctable is None:

581 _crctable = list(map(_gen_crc, range(256)))

582 crctable = _crctable

583

584 def crc32(ch, crc):

585 """Compute the CRC32 primitive on one byte."""

586 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]

587

588 def update_keys(c):

589 nonlocal key0, key1, key2

590 key0 = crc32(c, key0)

591 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF

592 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF

593 key2 = crc32(key1 >> 24, key2)

594

595 for p in pwd:

596 update_keys(p)

597

598 def decrypter(data):

599 """Decrypt a bytes object."""

600 result = bytearray()

601 append = result.append

602 for c in data:

603 k = key2 | 2

604 c ^= ((k * (k^1)) >> 8) & 0xFF

605 update_keys(c)

606 append(c)

607 return bytes(result)

608

609 return decrypter

610

611

612class LZMACompressor:

613

614 def __init__(self):

615 self._comp = None

616

617 def _init(self):

618 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})

619 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[

620 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)

621 ])

622 return struct.pack('<BBH', 9, 4, len(props)) + props

623

624 def compress(self, data):

625 if self._comp is None:

626 return self._init() + self._comp.compress(data)

627 return self._comp.compress(data)

628

629 def flush(self):

630 if self._comp is None:

631 return self._init() + self._comp.flush()

632 return self._comp.flush()

633

634

635class LZMADecompressor:

636

637 def __init__(self):

638 self._decomp = None

639 self._unconsumed = b''

640 self.eof = False

641

642 def decompress(self, data):

643 if self._decomp is None:

644 self._unconsumed += data

645 if len(self._unconsumed) <= 4:

646 return b''

647 psize, = struct.unpack('<H', self._unconsumed[2:4])

648 if len(self._unconsumed) <= 4 + psize:

649 return b''

650

651 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[

652 lzma._decode_filter_properties(lzma.FILTER_LZMA1,

653 self._unconsumed[4:4 + psize])

654 ])

655 data = self._unconsumed[4 + psize:]

656 del self._unconsumed

657

658 result = self._decomp.decompress(data)

659 self.eof = self._decomp.eof

660 return result

661

662

663compressor_names = {

664 0: 'store',

665 1: 'shrink',

666 2: 'reduce',

667 3: 'reduce',

668 4: 'reduce',

669 5: 'reduce',

670 6: 'implode',

671 7: 'tokenize',

672 8: 'deflate',

673 9: 'deflate64',

674 10: 'implode',

675 12: 'bzip2',

676 14: 'lzma',

677 18: 'terse',

678 19: 'lz77',

679 97: 'wavpack',

680 98: 'ppmd',

681}

682

683def _check_compression(compression):

684 if compression == ZIP_STORED:

685 pass

686 elif compression == ZIP_DEFLATED:

687 if not zlib:

688 raise RuntimeError(

689 "Compression requires the (missing) zlib module")

690 elif compression == ZIP_BZIP2:

691 if not bz2:

692 raise RuntimeError(

693 "Compression requires the (missing) bz2 module")

694 elif compression == ZIP_LZMA:

695 if not lzma:

696 raise RuntimeError(

697 "Compression requires the (missing) lzma module")

698 else:

699 raise NotImplementedError("That compression method is not supported")

700

701

702def _get_compressor(compress_type, compresslevel=None):

703 if compress_type == ZIP_DEFLATED:

704 if compresslevel is not None:

705 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)

706 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)

707 elif compress_type == ZIP_BZIP2:

708 if compresslevel is not None:

709 return bz2.BZ2Compressor(compresslevel)

710 return bz2.BZ2Compressor()

711 # compresslevel is ignored for ZIP_LZMA

712 elif compress_type == ZIP_LZMA:

713 return LZMACompressor()

714 else:

715 return None

716

717

718def _get_decompressor(compress_type):

719 _check_compression(compress_type)

720 if compress_type == ZIP_STORED:

721 return None

722 elif compress_type == ZIP_DEFLATED:

723 return zlib.decompressobj(-15)

724 elif compress_type == ZIP_BZIP2:

725 return bz2.BZ2Decompressor()

726 elif compress_type == ZIP_LZMA:

727 return LZMADecompressor()

728 else:

729 descr = compressor_names.get(compress_type)

730 if descr:

731 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))

732 else:

733 raise NotImplementedError("compression type %d" % (compress_type,))

734

735

736class _SharedFile:

737 def __init__(self, file, pos, close, lock, writing):

738 self._file = file

739 self._pos = pos

740 self._close = close

741 self._lock = lock

742 self._writing = writing

743 self.seekable = file.seekable

744 self.tell = file.tell

745

746 def seek(self, offset, whence=0):

747 with self._lock:

748 if self._writing():

749 raise ValueError("Can't reposition in the ZIP file while "

750 "there is an open writing handle on it. "

751 "Close the writing handle before trying to read.")

752 self._file.seek(offset, whence)

753 self._pos = self._file.tell()

754 return self._pos

755

756 def read(self, n=-1):

757 with self._lock:

758 if self._writing():

759 raise ValueError("Can't read from the ZIP file while there "

760 "is an open writing handle on it. "

761 "Close the writing handle before trying to read.")

762 self._file.seek(self._pos)

763 data = self._file.read(n)

764 self._pos = self._file.tell()

765 return data

766

767 def close(self):

768 if self._file is not None:

769 fileobj = self._file

770 self._file = None

771 self._close(fileobj)

772

773# Provide the tell method for unseekable stream

774class _Tellable:

775 def __init__(self, fp):

776 self.fp = fp

777 self.offset = 0

778

779 def write(self, data):

780 n = self.fp.write(data)

781 self.offset += n

782 return n

783

784 def tell(self):

785 return self.offset

786

787 def flush(self):

788 self.fp.flush()

789

790 def close(self):

791 self.fp.close()

792

793

794class ZipExtFile(io.BufferedIOBase):

795 """File-like object for reading an archive member.

796 Is returned by ZipFile.open().

797 """

798

799 # Max size supported by decompressor.

800 MAX_N = 1 << 31 - 1

801

802 # Read from compressed files in 4k blocks.

803 MIN_READ_SIZE = 4096

804

805 # Chunk size to read during seek

806 MAX_SEEK_READ = 1 << 24

807

808 def __init__(self, fileobj, mode, zipinfo, pwd=None,

809 close_fileobj=False):

810 self._fileobj = fileobj

811 self._pwd = pwd

812 self._close_fileobj = close_fileobj

813

814 self._compress_type = zipinfo.compress_type

815 self._compress_left = zipinfo.compress_size

816 self._left = zipinfo.file_size

817

818 self._decompressor = _get_decompressor(self._compress_type)

819

820 self._eof = False

821 self._readbuffer = b''

822 self._offset = 0

823

824 self.newlines = None

825

826 self.mode = mode

827 self.name = zipinfo.filename

828

829 if hasattr(zipinfo, 'CRC'):

830 self._expected_crc = zipinfo.CRC

831 self._running_crc = crc32(b'')

832 else:

833 self._expected_crc = None

834

835 self._seekable = False

836 try:

837 if fileobj.seekable():

838 self._orig_compress_start = fileobj.tell()

839 self._orig_compress_size = zipinfo.compress_size

840 self._orig_file_size = zipinfo.file_size

841 self._orig_start_crc = self._running_crc

842 self._seekable = True

843 except AttributeError:

844 pass

845

846 self._decrypter = None

847 if pwd:

848 if zipinfo.flag_bits & 0x8:

849 # compare against the file type from extended local headers

850 check_byte = (zipinfo._raw_time >> 8) & 0xff

851 else:

852 # compare against the CRC otherwise

853 check_byte = (zipinfo.CRC >> 24) & 0xff

854 h = self._init_decrypter()

855 if h != check_byte:

856 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)

857

858

859 def _init_decrypter(self):

860 self._decrypter = _ZipDecrypter(self._pwd)

861 # The first 12 bytes in the cypher stream is an encryption header

862 # used to strengthen the algorithm. The first 11 bytes are

863 # completely random, while the 12th contains the MSB of the CRC,

864 # or the MSB of the file time depending on the header type

865 # and is used to check the correctness of the password.

866 header = self._fileobj.read(12)

867 self._compress_left -= 12

868 return self._decrypter(header)[11]

869

870 def __repr__(self):

871 result = ['<%s.%s' % (self.__class__.__module__,

872 self.__class__.__qualname__)]

873 if not self.closed:

874 result.append(' name=%r mode=%r' % (self.name, self.mode))

875 if self._compress_type != ZIP_STORED:

876 result.append(' compress_type=%s' %

877 compressor_names.get(self._compress_type,

878 self._compress_type))

879 else:

880 result.append(' [closed]')

881 result.append('>')

882 return ''.join(result)

883

884 def readline(self, limit=-1):

885 """Read and return a line from the stream.

886

887 If limit is specified, at most limit bytes will be read.

888 """

889

890 if limit < 0:

891 # Shortcut common case - newline found in buffer.

892 i = self._readbuffer.find(b'\n', self._offset) + 1

893 if i > 0:

894 line = self._readbuffer[self._offset: i]

895 self._offset = i

896 return line

897

898 return io.BufferedIOBase.readline(self, limit)

899

900 def peek(self, n=1):

901 """Returns buffered bytes without advancing the position."""

902 if n > len(self._readbuffer) - self._offset:

903 chunk = self.read(n)

904 if len(chunk) > self._offset:

905 self._readbuffer = chunk + self._readbuffer[self._offset:]

906 self._offset = 0

907 else:

908 self._offset -= len(chunk)

909

910 # Return up to 512 bytes to reduce allocation overhead for tight loops.

911 return self._readbuffer[self._offset: self._offset + 512]

912

913 def readable(self):

914 return True

915

916 def read(self, n=-1):

917 """Read and return up to n bytes.

918 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.

919 """

920 if n is None or n < 0:

921 buf = self._readbuffer[self._offset:]

922 self._readbuffer = b''

923 self._offset = 0

924 while not self._eof:

925 buf += self._read1(self.MAX_N)

926 return buf

927

928 end = n + self._offset

929 if end < len(self._readbuffer):

930 buf = self._readbuffer[self._offset:end]

931 self._offset = end

932 return buf

933

934 n = end - len(self._readbuffer)

935 buf = self._readbuffer[self._offset:]

936 self._readbuffer = b''

937 self._offset = 0

938 while n > 0 and not self._eof:

939 data = self._read1(n)

940 if n < len(data):

941 self._readbuffer = data

942 self._offset = n

943 buf += data[:n]

944 break

945 buf += data

946 n -= len(data)

947 return buf

948

949 def _update_crc(self, newdata):

950 # Update the CRC using the given data.

951 if self._expected_crc is None:

952 # No need to compute the CRC if we don't have a reference value

953 return

954 self._running_crc = crc32(newdata, self._running_crc)

955 # Check the CRC if we're at the end of the file

956 if self._eof and self._running_crc != self._expected_crc:

957 raise BadZipFile("Bad CRC-32 for file %r" % self.name)

958

959 def read1(self, n):

960 """Read up to n bytes with at most one read() system call."""

961

962 if n is None or n < 0:

963 buf = self._readbuffer[self._offset:]

964 self._readbuffer = b''

965 self._offset = 0

966 while not self._eof:

967 data = self._read1(self.MAX_N)

968 if data:

969 buf += data

970 break

971 return buf

972

973 end = n + self._offset

974 if end < len(self._readbuffer):

975 buf = self._readbuffer[self._offset:end]

976 self._offset = end

977 return buf

978

979 n = end - len(self._readbuffer)

980 buf = self._readbuffer[self._offset:]

981 self._readbuffer = b''

982 self._offset = 0

983 if n > 0:

984 while not self._eof:

985 data = self._read1(n)

986 if n < len(data):

987 self._readbuffer = data

988 self._offset = n

989 buf += data[:n]

990 break

991 if data:

992 buf += data

993 break

994 return buf

995

996 def _read1(self, n):

997 # Read up to n compressed bytes with at most one read() system call,

998 # decrypt and decompress them.

999 if self._eof or n <= 0:

1000 return b''

1001

1002 # Read from file.

1003 if self._compress_type == ZIP_DEFLATED:

1004 ## Handle unconsumed data.

1005 data = self._decompressor.unconsumed_tail

1006 if n > len(data):

1007 data += self._read2(n - len(data))

1008 else:

1009 data = self._read2(n)

1010

1011 if self._compress_type == ZIP_STORED:

1012 self._eof = self._compress_left <= 0

1013 elif self._compress_type == ZIP_DEFLATED:

1014 n = max(n, self.MIN_READ_SIZE)

1015 data = self._decompressor.decompress(data, n)

1016 self._eof = (self._decompressor.eof or

1017 self._compress_left <= 0 and

1018 not self._decompressor.unconsumed_tail)

1019 if self._eof:

1020 data += self._decompressor.flush()

1021 else:

1022 data = self._decompressor.decompress(data)

1023 self._eof = self._decompressor.eof or self._compress_left <= 0

1024

1025 data = data[:self._left]

1026 self._left -= len(data)

1027 if self._left <= 0:

1028 self._eof = True

1029 self._update_crc(data)

1030 return data

1031

1032 def _read2(self, n):

1033 if self._compress_left <= 0:

1034 return b''

1035

1036 n = max(n, self.MIN_READ_SIZE)

1037 n = min(n, self._compress_left)

1038

1039 data = self._fileobj.read(n)

1040 self._compress_left -= len(data)

1041 if not data:

1042 raise EOFError

1043

1044 if self._decrypter is not None:

1045 data = self._decrypter(data)

1046 return data

1047

1048 def close(self):

1049 try:

1050 if self._close_fileobj:

1051 self._fileobj.close()

1052 finally:

1053 super().close()

1054

1055 def seekable(self):

1056 return self._seekable

1057

1058 def seek(self, offset, whence=0):

1059 if not self._seekable:

1060 raise io.UnsupportedOperation("underlying stream is not seekable")

1061 curr_pos = self.tell()

1062 if whence == 0: # Seek from start of file

1063 new_pos = offset

1064 elif whence == 1: # Seek from current position

1065 new_pos = curr_pos + offset

1066 elif whence == 2: # Seek from EOF

1067 new_pos = self._orig_file_size + offset

1068 else:

1069 raise ValueError("whence must be os.SEEK_SET (0), "

1070 "os.SEEK_CUR (1), or os.SEEK_END (2)")

1071

1072 if new_pos > self._orig_file_size:

1073 new_pos = self._orig_file_size

1074

1075 if new_pos < 0:

1076 new_pos = 0

1077

1078 read_offset = new_pos - curr_pos

1079 buff_offset = read_offset + self._offset

1080

1081 if buff_offset >= 0 and buff_offset < len(self._readbuffer):

1082 # Just move the _offset index if the new position is in the _readbuffer

1083 self._offset = buff_offset

1084 read_offset = 0

1085 elif read_offset < 0:

1086 # Position is before the current position. Reset the ZipExtFile

1087 self._fileobj.seek(self._orig_compress_start)

1088 self._running_crc = self._orig_start_crc

1089 self._compress_left = self._orig_compress_size

1090 self._left = self._orig_file_size

1091 self._readbuffer = b''

1092 self._offset = 0

1093 self._decompressor = _get_decompressor(self._compress_type)

1094 self._eof = False

1095 read_offset = new_pos

1096 if self._decrypter is not None:

1097 self._init_decrypter()

1098

1099 while read_offset > 0:

1100 read_len = min(self.MAX_SEEK_READ, read_offset)

1101 self.read(read_len)

1102 read_offset -= read_len

1103

1104 return self.tell()

1105

1106 def tell(self):

1107 if not self._seekable:

1108 raise io.UnsupportedOperation("underlying stream is not seekable")

1109 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset

1110 return filepos

1111

1112

1113class _ZipWriteFile(io.BufferedIOBase):

1114 def __init__(self, zf, zinfo, zip64):

1115 self._zinfo = zinfo

1116 self._zip64 = zip64

1117 self._zipfile = zf

1118 self._compressor = _get_compressor(zinfo.compress_type,

1119 zinfo._compresslevel)

1120 self._file_size = 0

1121 self._compress_size = 0

1122 self._crc = 0

1123

1124 @property

1125 def _fileobj(self):

1126 return self._zipfile.fp

1127

1128 def writable(self):

1129 return True

1130

1131 def write(self, data):

1132 if self.closed:

1133 raise ValueError('I/O operation on closed file.')

1134 nbytes = len(data)

1135 self._file_size += nbytes

1136 self._crc = crc32(data, self._crc)

1137 if self._compressor:

1138 data = self._compressor.compress(data)

1139 self._compress_size += len(data)

1140 self._fileobj.write(data)

1141 return nbytes

1142

1143 def close(self):

1144 if self.closed:

1145 return

1146 try:

1147 super().close()

1148 # Flush any data from the compressor, and update header info

1149 if self._compressor:

1150 buf = self._compressor.flush()

1151 self._compress_size += len(buf)

1152 self._fileobj.write(buf)

1153 self._zinfo.compress_size = self._compress_size

1154 else:

1155 self._zinfo.compress_size = self._file_size

1156 self._zinfo.CRC = self._crc

1157 self._zinfo.file_size = self._file_size

1158

1159 # Write updated header info

1160 if self._zinfo.flag_bits & 0x08:

1161 # Write CRC and file sizes after the file data

1162 fmt = '<LLQQ' if self._zip64 else '<LLLL'

1163 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,

1164 self._zinfo.compress_size, self._zinfo.file_size))

1165 self._zipfile.start_dir = self._fileobj.tell()

1166 else:

1167 if not self._zip64:

1168 if self._file_size > ZIP64_LIMIT:

1169 raise RuntimeError(

1170 'File size unexpectedly exceeded ZIP64 limit')

1171 if self._compress_size > ZIP64_LIMIT:

1172 raise RuntimeError(

1173 'Compressed size unexpectedly exceeded ZIP64 limit')

1174 # Seek backwards and write file header (which will now include

1175 # correct CRC and file sizes)

1176

1177 # Preserve current position in file

1178 self._zipfile.start_dir = self._fileobj.tell()

1179 self._fileobj.seek(self._zinfo.header_offset)

1180 self._fileobj.write(self._zinfo.FileHeader(self._zip64))

1181 self._fileobj.seek(self._zipfile.start_dir)

1182

1183 # Successfully written: Add file to our caches

1184 self._zipfile.filelist.append(self._zinfo)

1185 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo

1186 finally:

1187 self._zipfile._writing = False

1188

1189

1190

1191class ZipFile:

1192 """ Class with methods to open, read, write, close, list zip files.

1193

1194 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,

1195 compresslevel=None)

1196

1197 file: Either the path to the file, or a file-like object.

1198 If it is a path, the file will be opened and closed by ZipFile.

1199 mode: The mode can be either read 'r', write 'w', exclusive create 'x',

1200 or append 'a'.

1201 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),

1202 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).

1203 allowZip64: if True ZipFile will create files with ZIP64 extensions when

1204 needed, otherwise it will raise an exception when this would

1205 be necessary.

1206 compresslevel: None (default for the given compression type) or an integer

1207 specifying the level to pass to the compressor.

1208 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.

1209 When using ZIP_DEFLATED integers 0 through 9 are accepted.

1210 When using ZIP_BZIP2 integers 1 through 9 are accepted.

1211

1212 """

1213

1214 fp = None # Set here since __del__ checks it

1215 _windows_illegal_name_trans_table = None

1216

1217 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,

1218 compresslevel=None, *, strict_timestamps=True):

1219 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',

1220 or append 'a'."""

1221 if mode not in ('r', 'w', 'x', 'a'):

1222 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")

1223

1224 _check_compression(compression)

1225

1226 self._allowZip64 = allowZip64

1227 self._didModify = False

1228 self.debug = 0 # Level of printing: 0 through 3

1229 self.NameToInfo = {} # Find file info given name

1230 self.filelist = [] # List of ZipInfo instances for archive

1231 self.compression = compression # Method of compression

1232 self.compresslevel = compresslevel

1233 self.mode = mode

1234 self.pwd = None

1235 self._comment = b''

1236 self._strict_timestamps = strict_timestamps

1237

1238 # Check if we were passed a file-like object

1239 if isinstance(file, os.PathLike):

1240 file = os.fspath(file)

1241 if isinstance(file, str):

1242 # No, it's a filename

1243 self._filePassed = 0

1244 self.filename = file

1245 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',

1246 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}

1247 filemode = modeDict[mode]

1248 while True:

1249 try:

1250 self.fp = io.open(file, filemode)

1251 except OSError:

1252 if filemode in modeDict:

1253 filemode = modeDict[filemode]

1254 continue

1255 raise

1256 break

1257 else:

1258 self._filePassed = 1

1259 self.fp = file

1260 self.filename = getattr(file, 'name', None)

1261 self._fileRefCnt = 1

1262 self._lock = threading.RLock()

1263 self._seekable = True

1264 self._writing = False

1265

1266 try:

1267 if mode == 'r':

1268 self._RealGetContents()

1269 elif mode in ('w', 'x'):

1270 # set the modified flag so central directory gets written

1271 # even if no files are added to the archive

1272 self._didModify = True

1273 try:

1274 self.start_dir = self.fp.tell()

1275 except (AttributeError, OSError):

1276 self.fp = _Tellable(self.fp)

1277 self.start_dir = 0

1278 self._seekable = False

1279 else:

1280 # Some file-like objects can provide tell() but not seek()

1281 try:

1282 self.fp.seek(self.start_dir)

1283 except (AttributeError, OSError):

1284 self._seekable = False

1285 elif mode == 'a':

1286 try:

1287 # See if file is a zip file

1288 self._RealGetContents()

1289 # seek to start of directory and overwrite

1290 self.fp.seek(self.start_dir)

1291 except BadZipFile:

1292 # file is not a zip file, just append

1293 self.fp.seek(0, 2)

1294

1295 # set the modified flag so central directory gets written

1296 # even if no files are added to the archive

1297 self._didModify = True

1298 self.start_dir = self.fp.tell()

1299 else:

1300 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")

1301 except:

1302 fp = self.fp

1303 self.fp = None

1304 self._fpclose(fp)

1305 raise

1306

1307 def __enter__(self):

1308 return self

1309

1310 def __exit__(self, type, value, traceback):

1311 self.close()

1312

1313 def __repr__(self):

1314 result = ['<%s.%s' % (self.__class__.__module__,

1315 self.__class__.__qualname__)]

1316 if self.fp is not None:

1317 if self._filePassed:

1318 result.append(' file=%r' % self.fp)

1319 elif self.filename is not None:

1320 result.append(' filename=%r' % self.filename)

1321 result.append(' mode=%r' % self.mode)

1322 else:

1323 result.append(' [closed]')

1324 result.append('>')

1325 return ''.join(result)

1326

1327 def _RealGetContents(self):

1328 """Read in the table of contents for the ZIP file."""

1329 fp = self.fp

1330 try:

1331 endrec = _EndRecData(fp)

1332 except OSError:

1333 raise BadZipFile("File is not a zip file")

1334 if not endrec:

1335 raise BadZipFile("File is not a zip file")

1336 if self.debug > 1:

1337 print(endrec)

1338 size_cd = endrec[_ECD_SIZE] # bytes in central directory

1339 offset_cd = endrec[_ECD_OFFSET] # offset of central directory

1340 self._comment = endrec[_ECD_COMMENT] # archive comment

1341

1342 # "concat" is zero, unless zip was concatenated to another file

1343 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd

1344 if endrec[_ECD_SIGNATURE] == stringEndArchive64:

1345 # If Zip64 extension structures are present, account for them

1346 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

1347

1348 if self.debug > 2:

1349 inferred = concat + offset_cd

1350 print("given, inferred, offset", offset_cd, inferred, concat)

1351 # self.start_dir: Position of start of central directory

1352 self.start_dir = offset_cd + concat

1353 fp.seek(self.start_dir, 0)

1354 data = fp.read(size_cd)

1355 fp = io.BytesIO(data)

1356 total = 0

1357 while total < size_cd:

1358 centdir = fp.read(sizeCentralDir)

1359 if len(centdir) != sizeCentralDir:

1360 raise BadZipFile("Truncated central directory")

1361 centdir = struct.unpack(structCentralDir, centdir)

1362 if centdir[_CD_SIGNATURE] != stringCentralDir:

1363 raise BadZipFile("Bad magic number for central directory")

1364 if self.debug > 2:

1365 print(centdir)

1366 filename = fp.read(centdir[_CD_FILENAME_LENGTH])

1367 flags = centdir[5]

1368 if flags & 0x800:

1369 # UTF-8 file names extension

1370 filename = filename.decode('utf-8')

1371 else:

1372 # Historical ZIP filename encoding

1373 filename = filename.decode('cp437')

1374 # Create ZipInfo instance to store file information

1375 x = ZipInfo(filename)

1376 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])

1377 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])

1378 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]

1379 (x.create_version, x.create_system, x.extract_version, x.reserved,

1380 x.flag_bits, x.compress_type, t, d,

1381 x.CRC, x.compress_size, x.file_size) = centdir[1:12]

1382 if x.extract_version > MAX_EXTRACT_VERSION:

1383 raise NotImplementedError("zip file version %.1f" %

1384 (x.extract_version / 10))

1385 x.volume, x.internal_attr, x.external_attr = centdir[15:18]

1386 # Convert date/time code to (year, month, day, hour, min, sec)

1387 x._raw_time = t

1388 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,

1389 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )

1390

1391 x._decodeExtra()

1392 x.header_offset = x.header_offset + concat

1393 self.filelist.append(x)

1394 self.NameToInfo[x.filename] = x

1395

1396 # update total bytes read from central directory

1397 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]

1398 + centdir[_CD_EXTRA_FIELD_LENGTH]

1399 + centdir[_CD_COMMENT_LENGTH])

1400

1401 if self.debug > 2:

1402 print("total", total)

1403

1404

1405 def namelist(self):

1406 """Return a list of file names in the archive."""

1407 return [data.filename for data in self.filelist]

1408

1409 def infolist(self):

1410 """Return a list of class ZipInfo instances for files in the

1411 archive."""

1412 return self.filelist

1413

1414 def printdir(self, file=None):

1415 """Print a table of contents for the zip file."""

1416 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),

1417 file=file)

1418 for zinfo in self.filelist:

1419 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]

1420 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),

1421 file=file)

1422

1423 def testzip(self):

1424 """Read all the files and check the CRC."""

1425 chunk_size = 2 ** 20

1426 for zinfo in self.filelist:

1427 try:

1428 # Read by chunks, to avoid an OverflowError or a

1429 # MemoryError with very large embedded files.

1430 with self.open(zinfo.filename, "r") as f:

1431 while f.read(chunk_size): # Check CRC-32

1432 pass

1433 except BadZipFile:

1434 return zinfo.filename

1435

1436 def getinfo(self, name):

1437 """Return the instance of ZipInfo given 'name'."""

1438 info = self.NameToInfo.get(name)

1439 if info is None:

1440 raise KeyError(

1441 'There is no item named %r in the archive' % name)

1442

1443 return info

1444

1445 def setpassword(self, pwd):

1446 """Set default password for encrypted files."""

1447 if pwd and not isinstance(pwd, bytes):

1448 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)

1449 if pwd:

1450 self.pwd = pwd

1451 else:

1452 self.pwd = None

1453

1454 @property

1455 def comment(self):

1456 """The comment text associated with the ZIP file."""

1457 return self._comment

1458

1459 @comment.setter

1460 def comment(self, comment):

1461 if not isinstance(comment, bytes):

1462 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)

1463 # check for valid comment length

1464 if len(comment) > ZIP_MAX_COMMENT:

1465 import warnings

1466 warnings.warn('Archive comment is too long; truncating to %d bytes'

1467 % ZIP_MAX_COMMENT, stacklevel=2)

1468 comment = comment[:ZIP_MAX_COMMENT]

1469 self._comment = comment

1470 self._didModify = True

1471

1472 def read(self, name, pwd=None):

1473 """Return file bytes for name."""

1474 with self.open(name, "r", pwd) as fp:

1475 return fp.read()

1476

1477 def open(self, name, mode="r", pwd=None, *, force_zip64=False):

1478 """Return file-like object for 'name'.

1479

1480 name is a string for the file name within the ZIP file, or a ZipInfo

1481 object.

1482

1483 mode should be 'r' to read a file already in the ZIP file, or 'w' to

1484 write to a file newly added to the archive.

1485

1486 pwd is the password to decrypt files (only used for reading).

1487

1488 When writing, if the file size is not known in advance but may exceed

1489 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large

1490 files. If the size is known in advance, it is best to pass a ZipInfo

1491 instance for name, with zinfo.file_size set.

1492 """

1493 if mode not in {"r", "w"}:

1494 raise ValueError('open() requires mode "r" or "w"')

1495 if pwd and not isinstance(pwd, bytes):

1496 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)

1497 if pwd and (mode == "w"):

1498 raise ValueError("pwd is only supported for reading files")

1499 if not self.fp:

1500 raise ValueError(

1501 "Attempt to use ZIP archive that was already closed")

1502

1503 # Make sure we have an info object

1504 if isinstance(name, ZipInfo):

1505 # 'name' is already an info object

1506 zinfo = name

1507 elif mode == 'w':

1508 zinfo = ZipInfo(name)

1509 zinfo.compress_type = self.compression

1510 zinfo._compresslevel = self.compresslevel

1511 else:

1512 # Get info object for name

1513 zinfo = self.getinfo(name)

1514

1515 if mode == 'w':

1516 return self._open_to_write(zinfo, force_zip64=force_zip64)

1517

1518 if self._writing:

1519 raise ValueError("Can't read from the ZIP file while there "

1520 "is an open writing handle on it. "

1521 "Close the writing handle before trying to read.")

1522

1523 # Open for reading:

1524 self._fileRefCnt += 1

1525 zef_file = _SharedFile(self.fp, zinfo.header_offset,

1526 self._fpclose, self._lock, lambda: self._writing)

1527 try:

1528 # Skip the file header:

1529 fheader = zef_file.read(sizeFileHeader)

1530 if len(fheader) != sizeFileHeader:

1531 raise BadZipFile("Truncated file header")

1532 fheader = struct.unpack(structFileHeader, fheader)

1533 if fheader[_FH_SIGNATURE] != stringFileHeader:

1534 raise BadZipFile("Bad magic number for file header")

1535

1536 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])

1537 if fheader[_FH_EXTRA_FIELD_LENGTH]:

1538 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])

1539

1540 if zinfo.flag_bits & 0x20:

1541 # Zip 2.7: compressed patched data

1542 raise NotImplementedError("compressed patched data (flag bit 5)")

1543

1544 if zinfo.flag_bits & 0x40:

1545 # strong encryption

1546 raise NotImplementedError("strong encryption (flag bit 6)")

1547

1548 if zinfo.flag_bits & 0x800:

1549 # UTF-8 filename

1550 fname_str = fname.decode("utf-8")

1551 else:

1552 fname_str = fname.decode("cp437")

1553

1554 if fname_str != zinfo.orig_filename:

1555 raise BadZipFile(

1556 'File name in directory %r and header %r differ.'

1557 % (zinfo.orig_filename, fname))

1558

1559 # check for encrypted flag & handle password

1560 is_encrypted = zinfo.flag_bits & 0x1

1561 if is_encrypted:

1562 if not pwd:

1563 pwd = self.pwd

1564 if not pwd:

1565 raise RuntimeError("File %r is encrypted, password "

1566 "required for extraction" % name)

1567 else:

1568 pwd = None

1569

1570 return ZipExtFile(zef_file, mode, zinfo, pwd, True)

1571 except:

1572 zef_file.close()

1573 raise

1574

1575 def _open_to_write(self, zinfo, force_zip64=False):

1576 if force_zip64 and not self._allowZip64:

1577 raise ValueError(

1578 "force_zip64 is True, but allowZip64 was False when opening "

1579 "the ZIP file."

1580 )

1581 if self._writing:

1582 raise ValueError("Can't write to the ZIP file while there is "

1583 "another write handle open on it. "

1584 "Close the first handle before opening another.")

1585

1586 # Sizes and CRC are overwritten with correct data after processing the file

1587 if not hasattr(zinfo, 'file_size'):

1588 zinfo.file_size = 0

1589 zinfo.compress_size = 0

1590 zinfo.CRC = 0

1591

1592 zinfo.flag_bits = 0x00

1593 if zinfo.compress_type == ZIP_LZMA:

1594 # Compressed data includes an end-of-stream (EOS) marker

1595 zinfo.flag_bits |= 0x02

1596 if not self._seekable:

1597 zinfo.flag_bits |= 0x08

1598

1599 if not zinfo.external_attr:

1600 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------

1601

1602 # Compressed size can be larger than uncompressed size

1603 zip64 = self._allowZip64 and \

1604 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)

1605

1606 if self._seekable:

1607 self.fp.seek(self.start_dir)

1608 zinfo.header_offset = self.fp.tell()

1609

1610 self._writecheck(zinfo)

1611 self._didModify = True

1612

1613 self.fp.write(zinfo.FileHeader(zip64))

1614

1615 self._writing = True

1616 return _ZipWriteFile(self, zinfo, zip64)

1617

1618 def extract(self, member, path=None, pwd=None):

1619 """Extract a member from the archive to the current working directory,

1620 using its full name. Its file information is extracted as accurately

1621 as possible. `member' may be a filename or a ZipInfo object. You can

1622 specify a different directory using `path'.

1623 """

1624 if path is None:

1625 path = os.getcwd()

1626 else:

1627 path = os.fspath(path)

1628

1629 return self._extract_member(member, path, pwd)

1630

1631 def extractall(self, path=None, members=None, pwd=None):

1632 """Extract all members from the archive to the current working

1633 directory. `path' specifies a different directory to extract to.

1634 `members' is optional and must be a subset of the list returned

1635 by namelist().

1636 """

1637 if members is None:

1638 members = self.namelist()

1639

1640 if path is None:

1641 path = os.getcwd()

1642 else:

1643 path = os.fspath(path)

1644

1645 for zipinfo in members:

1646 self._extract_member(zipinfo, path, pwd)

1647

1648 @classmethod

1649 def _sanitize_windows_name(cls, arcname, pathsep):

1650 """Replace bad characters and remove trailing dots from parts."""

1651 table = cls._windows_illegal_name_trans_table

1652 if not table:

1653 illegal = ':<>|"?*'

1654 table = str.maketrans(illegal, '_' * len(illegal))

1655 cls._windows_illegal_name_trans_table = table

1656 arcname = arcname.translate(table)

1657 # remove trailing dots

1658 arcname = (x.rstrip('.') for x in arcname.split(pathsep))

1659 # rejoin, removing empty parts.

1660 arcname = pathsep.join(x for x in arcname if x)

1661 return arcname

1662

1663 def _extract_member(self, member, targetpath, pwd):

1664 """Extract the ZipInfo object 'member' to a physical

1665 file on the path targetpath.

1666 """

1667 if not isinstance(member, ZipInfo):

1668 member = self.getinfo(member)

1669

1670 # build the destination pathname, replacing

1671 # forward slashes to platform specific separators.

1672 arcname = member.filename.replace('/', os.path.sep)

1673

1674 if os.path.altsep:

1675 arcname = arcname.replace(os.path.altsep, os.path.sep)

1676 # interpret absolute pathname as relative, remove drive letter or

1677 # UNC path, redundant separators, "." and ".." components.

1678 arcname = os.path.splitdrive(arcname)[1]

1679 invalid_path_parts = ('', os.path.curdir, os.path.pardir)

1680 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)

1681 if x not in invalid_path_parts)

1682 if os.path.sep == '\\':

1683 # filter illegal characters on Windows

1684 arcname = self._sanitize_windows_name(arcname, os.path.sep)

1685

1686 targetpath = os.path.join(targetpath, arcname)

1687 targetpath = os.path.normpath(targetpath)

1688

1689 # Create all upper directories if necessary.

1690 upperdirs = os.path.dirname(targetpath)

1691 if upperdirs and not os.path.exists(upperdirs):

1692 os.makedirs(upperdirs)

1693

1694 if member.is_dir():

1695 if not os.path.isdir(targetpath):

1696 os.mkdir(targetpath)

1697 return targetpath

1698

1699 with self.open(member, pwd=pwd) as source, \

1700 open(targetpath, "wb") as target:

1701 shutil.copyfileobj(source, target)

1702

1703 return targetpath

1704

1705 def _writecheck(self, zinfo):

1706 """Check for errors before writing a file to the archive."""

1707 if zinfo.filename in self.NameToInfo:

1708 import warnings

1709 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)

1710 if self.mode not in ('w', 'x', 'a'):

1711 raise ValueError("write() requires mode 'w', 'x', or 'a'")

1712 if not self.fp:

1713 raise ValueError(

1714 "Attempt to write ZIP archive that was already closed")

1715 _check_compression(zinfo.compress_type)

1716 if not self._allowZip64:

1717 requires_zip64 = None

1718 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:

1719 requires_zip64 = "Files count"

1720 elif zinfo.file_size > ZIP64_LIMIT:

1721 requires_zip64 = "Filesize"

1722 elif zinfo.header_offset > ZIP64_LIMIT:

1723 requires_zip64 = "Zipfile size"

1724 if requires_zip64:

1725 raise LargeZipFile(requires_zip64 +

1726 " would require ZIP64 extensions")

1727

1728 def write(self, filename, arcname=None,

1729 compress_type=None, compresslevel=None):

1730 """Put the bytes from filename into the archive under the name

1731 arcname."""

1732 if not self.fp:

1733 raise ValueError(

1734 "Attempt to write to ZIP archive that was already closed")

1735 if self._writing:

1736 raise ValueError(

1737 "Can't write to ZIP archive while an open writing handle exists"

1738 )

1739

1740 zinfo = ZipInfo.from_file(filename, arcname,

1741 strict_timestamps=self._strict_timestamps)

1742

1743 if zinfo.is_dir():

1744 zinfo.compress_size = 0

1745 zinfo.CRC = 0

1746 else:

1747 if compress_type is not None:

1748 zinfo.compress_type = compress_type

1749 else:

1750 zinfo.compress_type = self.compression

1751

1752 if compresslevel is not None:

1753 zinfo._compresslevel = compresslevel

1754 else:

1755 zinfo._compresslevel = self.compresslevel

1756

1757 if zinfo.is_dir():

1758 with self._lock:

1759 if self._seekable:

1760 self.fp.seek(self.start_dir)

1761 zinfo.header_offset = self.fp.tell() # Start of header bytes

1762 if zinfo.compress_type == ZIP_LZMA:

1763 # Compressed data includes an end-of-stream (EOS) marker

1764 zinfo.flag_bits |= 0x02

1765

1766 self._writecheck(zinfo)

1767 self._didModify = True

1768

1769 self.filelist.append(zinfo)

1770 self.NameToInfo[zinfo.filename] = zinfo

1771 self.fp.write(zinfo.FileHeader(False))

1772 self.start_dir = self.fp.tell()

1773 else:

1774 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:

1775 shutil.copyfileobj(src, dest, 1024*8)

1776

1777 def writestr(self, zinfo_or_arcname, data,

1778 compress_type=None, compresslevel=None):

1779 """Write a file into the archive. The contents is 'data', which

1780 may be either a 'str' or a 'bytes' instance; if it is a 'str',

1781 it is encoded as UTF-8 first.

1782 'zinfo_or_arcname' is either a ZipInfo instance or

1783 the name of the file in the archive."""

1784 if isinstance(data, str):

1785 data = data.encode("utf-8")

1786 if not isinstance(zinfo_or_arcname, ZipInfo):

1787 zinfo = ZipInfo(filename=zinfo_or_arcname,

1788 date_time=time.localtime(time.time())[:6])

1789 zinfo.compress_type = self.compression

1790 zinfo._compresslevel = self.compresslevel

1791 if zinfo.filename[-1] == '/':

1792 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x

1793 zinfo.external_attr |= 0x10 # MS-DOS directory flag

1794 else:

1795 zinfo.external_attr = 0o600 << 16 # ?rw-------

1796 else:

1797 zinfo = zinfo_or_arcname

1798

1799 if not self.fp:

1800 raise ValueError(

1801 "Attempt to write to ZIP archive that was already closed")

1802 if self._writing:

1803 raise ValueError(

1804 "Can't write to ZIP archive while an open writing handle exists."

1805 )

1806

1807 if compress_type is not None:

1808 zinfo.compress_type = compress_type

1809

1810 if compresslevel is not None:

1811 zinfo._compresslevel = compresslevel

1812

1813 zinfo.file_size = len(data) # Uncompressed size

1814 with self._lock:

1815 with self.open(zinfo, mode='w') as dest:

1816 dest.write(data)

1817

1818 def __del__(self):

1819 """Call the "close()" method in case the user forgot."""

1820 self.close()

1821

1822 def close(self):

1823 """Close the file, and for mode 'w', 'x' and 'a' write the ending

1824 records."""

1825 if self.fp is None:

1826 return

1827

1828 if self._writing:

1829 raise ValueError("Can't close the ZIP file while there is "

1830 "an open writing handle on it. "

1831 "Close the writing handle before closing the zip.")

1832

1833 try:

1834 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records

1835 with self._lock:

1836 if self._seekable:

1837 self.fp.seek(self.start_dir)

1838 self._write_end_record()

1839 finally:

1840 fp = self.fp

1841 self.fp = None

1842 self._fpclose(fp)

1843

1844 def _write_end_record(self):

1845 for zinfo in self.filelist: # write central directory

1846 dt = zinfo.date_time

1847 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]

1848 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)

1849 extra = []

1850 if zinfo.file_size > ZIP64_LIMIT \

1851 or zinfo.compress_size > ZIP64_LIMIT:

1852 extra.append(zinfo.file_size)

1853 extra.append(zinfo.compress_size)

1854 file_size = 0xffffffff

1855 compress_size = 0xffffffff

1856 else:

1857 file_size = zinfo.file_size

1858 compress_size = zinfo.compress_size

1859

1860 if zinfo.header_offset > ZIP64_LIMIT:

1861 extra.append(zinfo.header_offset)

1862 header_offset = 0xffffffff

1863 else:

1864 header_offset = zinfo.header_offset

1865

1866 extra_data = zinfo.extra

1867 min_version = 0

1868 if extra:

1869 # Append a ZIP64 field to the extra's

1870 extra_data = _strip_extra(extra_data, (1,))

1871 extra_data = struct.pack(

1872 '<HH' + 'Q'*len(extra),

1873 1, 8*len(extra), *extra) + extra_data

1874

1875 min_version = ZIP64_VERSION

1876

1877 if zinfo.compress_type == ZIP_BZIP2:

1878 min_version = max(BZIP2_VERSION, min_version)

1879 elif zinfo.compress_type == ZIP_LZMA:

1880 min_version = max(LZMA_VERSION, min_version)

1881

1882 extract_version = max(min_version, zinfo.extract_version)

1883 create_version = max(min_version, zinfo.create_version)

1884 try:

1885 filename, flag_bits = zinfo._encodeFilenameFlags()

1886 centdir = struct.pack(structCentralDir,

1887 stringCentralDir, create_version,

1888 zinfo.create_system, extract_version, zinfo.reserved,

1889 flag_bits, zinfo.compress_type, dostime, dosdate,

1890 zinfo.CRC, compress_size, file_size,

1891 len(filename), len(extra_data), len(zinfo.comment),

1892 0, zinfo.internal_attr, zinfo.external_attr,

1893 header_offset)

1894 except DeprecationWarning:

1895 print((structCentralDir, stringCentralDir, create_version,

1896 zinfo.create_system, extract_version, zinfo.reserved,

1897 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,

1898 zinfo.CRC, compress_size, file_size,

1899 len(zinfo.filename), len(extra_data), len(zinfo.comment),

1900 0, zinfo.internal_attr, zinfo.external_attr,

1901 header_offset), file=sys.stderr)

1902 raise

1903 self.fp.write(centdir)

1904 self.fp.write(filename)

1905 self.fp.write(extra_data)

1906 self.fp.write(zinfo.comment)

1907

1908 pos2 = self.fp.tell()

1909 # Write end-of-zip-archive record

1910 centDirCount = len(self.filelist)

1911 centDirSize = pos2 - self.start_dir

1912 centDirOffset = self.start_dir

1913 requires_zip64 = None

1914 if centDirCount > ZIP_FILECOUNT_LIMIT:

1915 requires_zip64 = "Files count"

1916 elif centDirOffset > ZIP64_LIMIT:

1917 requires_zip64 = "Central directory offset"

1918 elif centDirSize > ZIP64_LIMIT:

1919 requires_zip64 = "Central directory size"

1920 if requires_zip64:

1921 # Need to write the ZIP64 end-of-archive records

1922 if not self._allowZip64:

1923 raise LargeZipFile(requires_zip64 +

1924 " would require ZIP64 extensions")

1925 zip64endrec = struct.pack(

1926 structEndArchive64, stringEndArchive64,

1927 44, 45, 45, 0, 0, centDirCount, centDirCount,

1928 centDirSize, centDirOffset)

1929 self.fp.write(zip64endrec)

1930

1931 zip64locrec = struct.pack(

1932 structEndArchive64Locator,

1933 stringEndArchive64Locator, 0, pos2, 1)

1934 self.fp.write(zip64locrec)

1935 centDirCount = min(centDirCount, 0xFFFF)

1936 centDirSize = min(centDirSize, 0xFFFFFFFF)

1937 centDirOffset = min(centDirOffset, 0xFFFFFFFF)

1938

1939 endrec = struct.pack(structEndArchive, stringEndArchive,

1940 0, 0, centDirCount, centDirCount,

1941 centDirSize, centDirOffset, len(self._comment))

1942 self.fp.write(endrec)

1943 self.fp.write(self._comment)

1944 self.fp.flush()

1945

1946 def _fpclose(self, fp):

1947 assert self._fileRefCnt > 0

1948 self._fileRefCnt -= 1

1949 if not self._fileRefCnt and not self._filePassed:

1950 fp.close()

1951

1952

1953class PyZipFile(ZipFile):

1954 """Class to create ZIP archives with Python library files and packages."""

1955

1956 def __init__(self, file, mode="r", compression=ZIP_STORED,

1957 allowZip64=True, optimize=-1):

1958 ZipFile.__init__(self, file, mode=mode, compression=compression,

1959 allowZip64=allowZip64)

1960 self._optimize = optimize

1961

1962 def writepy(self, pathname, basename="", filterfunc=None):

1963 """Add all files from "pathname" to the ZIP archive.

1964

1965 If pathname is a package directory, search the directory and

1966 all package subdirectories recursively for all *.py and enter

1967 the modules into the archive. If pathname is a plain

1968 directory, listdir *.py and enter all modules. Else, pathname

1969 must be a Python *.py file and the module will be put into the

1970 archive. Added modules are always module.pyc.

1971 This method will compile the module.py into module.pyc if

1972 necessary.

1973 If filterfunc(pathname) is given, it is called with every argument.

1974 When it is False, the file or directory is skipped.

1975 """

1976 pathname = os.fspath(pathname)

1977 if filterfunc and not filterfunc(pathname):

1978 if self.debug:

1979 label = 'path' if os.path.isdir(pathname) else 'file'

1980 print('%s %r skipped by filterfunc' % (label, pathname))

1981 return

1982 dir, name = os.path.split(pathname)

1983 if os.path.isdir(pathname):

1984 initname = os.path.join(pathname, "__init__.py")

1985 if os.path.isfile(initname):

1986 # This is a package directory, add it

1987 if basename:

1988 basename = "%s/%s" % (basename, name)

1989 else:

1990 basename = name

1991 if self.debug:

1992 print("Adding package in", pathname, "as", basename)

1993 fname, arcname = self._get_codename(initname[0:-3], basename)

1994 if self.debug:

1995 print("Adding", arcname)

1996 self.write(fname, arcname)

1997 dirlist = sorted(os.listdir(pathname))

1998 dirlist.remove("__init__.py")

1999 # Add all *.py files and package subdirectories

2000 for filename in dirlist:

2001 path = os.path.join(pathname, filename)

2002 root, ext = os.path.splitext(filename)

2003 if os.path.isdir(path):

2004 if os.path.isfile(os.path.join(path, "__init__.py")):

2005 # This is a package directory, add it

2006 self.writepy(path, basename,

2007 filterfunc=filterfunc) # Recursive call

2008 elif ext == ".py":

2009 if filterfunc and not filterfunc(path):

2010 if self.debug:

2011 print('file %r skipped by filterfunc' % path)

2012 continue

2013 fname, arcname = self._get_codename(path[0:-3],

2014 basename)

2015 if self.debug:

2016 print("Adding", arcname)

2017 self.write(fname, arcname)

2018 else:

2019 # This is NOT a package directory, add its files at top level

2020 if self.debug:

2021 print("Adding files from directory", pathname)

2022 for filename in sorted(os.listdir(pathname)):

2023 path = os.path.join(pathname, filename)

2024 root, ext = os.path.splitext(filename)

2025 if ext == ".py":

2026 if filterfunc and not filterfunc(path):

2027 if self.debug:

2028 print('file %r skipped by filterfunc' % path)

2029 continue

2030 fname, arcname = self._get_codename(path[0:-3],

2031 basename)

2032 if self.debug:

2033 print("Adding", arcname)

2034 self.write(fname, arcname)

2035 else:

2036 if pathname[-3:] != ".py":

2037 raise RuntimeError(

2038 'Files added with writepy() must end with ".py"')

2039 fname, arcname = self._get_codename(pathname[0:-3], basename)

2040 if self.debug:

2041 print("Adding file", arcname)

2042 self.write(fname, arcname)

2043

2044 def _get_codename(self, pathname, basename):

2045 """Return (filename, archivename) for the path.

2046

2047 Given a module name path, return the correct file path and

2048 archive name, compiling if necessary. For example, given

2049 /python/lib/string, return (/python/lib/string.pyc, string).

2050 """

2051 def _compile(file, optimize=-1):

2052 import py_compile

2053 if self.debug:

2054 print("Compiling", file)

2055 try:

2056 py_compile.compile(file, doraise=True, optimize=optimize)

2057 except py_compile.PyCompileError as err:

2058 print(err.msg)

2059 return False

2060 return True

2061

2062 file_py = pathname + ".py"

2063 file_pyc = pathname + ".pyc"

2064 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')

2065 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)

2066 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)

2067 if self._optimize == -1:

2068 # legacy mode: use whatever file is present

2069 if (os.path.isfile(file_pyc) and

2070 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):

2071 # Use .pyc file.

2072 arcname = fname = file_pyc

2073 elif (os.path.isfile(pycache_opt0) and

2074 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):

2075 # Use the __pycache__/*.pyc file, but write it to the legacy pyc

2076 # file name in the archive.

2077 fname = pycache_opt0

2078 arcname = file_pyc

2079 elif (os.path.isfile(pycache_opt1) and

2080 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):

2081 # Use the __pycache__/*.pyc file, but write it to the legacy pyc

2082 # file name in the archive.

2083 fname = pycache_opt1

2084 arcname = file_pyc

2085 elif (os.path.isfile(pycache_opt2) and

2086 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):

2087 # Use the __pycache__/*.pyc file, but write it to the legacy pyc

2088 # file name in the archive.

2089 fname = pycache_opt2

2090 arcname = file_pyc

2091 else:

2092 # Compile py into PEP 3147 pyc file.

2093 if _compile(file_py):

2094 if sys.flags.optimize == 0:

2095 fname = pycache_opt0

2096 elif sys.flags.optimize == 1:

2097 fname = pycache_opt1

2098 else:

2099 fname = pycache_opt2

2100 arcname = file_pyc

2101 else:

2102 fname = arcname = file_py

2103 else:

2104 # new mode: use given optimization level

2105 if self._optimize == 0:

2106 fname = pycache_opt0

2107 arcname = file_pyc

2108 else:

2109 arcname = file_pyc

2110 if self._optimize == 1:

2111 fname = pycache_opt1

2112 elif self._optimize == 2:

2113 fname = pycache_opt2

2114 else:

2115 msg = "invalid value for 'optimize': {!r}".format(self._optimize)

2116 raise ValueError(msg)

2117 if not (os.path.isfile(fname) and

2118 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):

2119 if not _compile(file_py, optimize=self._optimize):

2120 fname = arcname = file_py

2121 archivename = os.path.split(arcname)[1]

2122 if basename:

2123 archivename = "%s/%s" % (basename, archivename)

2124 return (fname, archivename)

2125

2126

2127def _parents(path):

2128 """

2129 Given a path with elements separated by

2130 posixpath.sep, generate all parents of that path.

2131

2132 >>> list(_parents('b/d'))

2133 ['b']

2134 >>> list(_parents('/b/d/'))

2135 ['/b']

2136 >>> list(_parents('b/d/f/'))

2137 ['b/d', 'b']

2138 >>> list(_parents('b'))

2139 []

2140 >>> list(_parents(''))

2141 []

2142 """

2143 return itertools.islice(_ancestry(path), 1, None)

2144

2145

2146def _ancestry(path):

2147 """

2148 Given a path with elements separated by

2149 posixpath.sep, generate all elements of that path

2150

2151 >>> list(_ancestry('b/d'))

2152 ['b/d', 'b']

2153 >>> list(_ancestry('/b/d/'))

2154 ['/b/d', '/b']

2155 >>> list(_ancestry('b/d/f/'))

2156 ['b/d/f', 'b/d', 'b']

2157 >>> list(_ancestry('b'))

2158 ['b']

2159 >>> list(_ancestry(''))

2160 []

2161 """

2162 path = path.rstrip(posixpath.sep)

2163 while path and path != posixpath.sep:

2164 yield path

2165 path, tail = posixpath.split(path)

2166

2167

2168_dedupe = dict.fromkeys

2169"""Deduplicate an iterable in original order"""

2170

2171

2172def _difference(minuend, subtrahend):

2173 """

2174 Return items in minuend not in subtrahend, retaining order

2175 with O(1) lookup.

2176 """

2177 return itertools.filterfalse(set(subtrahend).__contains__, minuend)

2178

2179

2180class CompleteDirs(ZipFile):

2181 """

2182 A ZipFile subclass that ensures that implied directories

2183 are always included in the namelist.

2184 """

2185

2186 @staticmethod

2187 def _implied_dirs(names):

2188 parents = itertools.chain.from_iterable(map(_parents, names))

2189 as_dirs = (p + posixpath.sep for p in parents)

2190 return _dedupe(_difference(as_dirs, names))

2191

2192 def namelist(self):

2193 names = super(CompleteDirs, self).namelist()

2194 return names + list(self._implied_dirs(names))

2195

2196 def _name_set(self):

2197 return set(self.namelist())

2198

2199 def resolve_dir(self, name):

2200 """

2201 If the name represents a directory, return that name

2202 as a directory (with the trailing slash).

2203 """

2204 names = self._name_set()

2205 dirname = name + '/'

2206 dir_match = name not in names and dirname in names

2207 return dirname if dir_match else name

2208

2209 @classmethod

2210 def make(cls, source):

2211 """

2212 Given a source (filename or zipfile), return an

2213 appropriate CompleteDirs subclass.

2214 """

2215 if isinstance(source, CompleteDirs):

2216 return source

2217

2218 if not isinstance(source, ZipFile):

2219 return cls(source)

2220

2221 # Only allow for FastPath when supplied zipfile is read-only

2222 if 'r' not in source.mode:

2223 cls = CompleteDirs

2224

2225 res = cls.__new__(cls)

2226 vars(res).update(vars(source))

2227 return res

2228

2229

2230class FastLookup(CompleteDirs):

2231 """

2232 ZipFile subclass to ensure implicit

2233 dirs exist and are resolved rapidly.

2234 """

2235 def namelist(self):

2236 with contextlib.suppress(AttributeError):

2237 return self.__names

2238 self.__names = super(FastLookup, self).namelist()

2239 return self.__names

2240

2241 def _name_set(self):

2242 with contextlib.suppress(AttributeError):

2243 return self.__lookup

2244 self.__lookup = super(FastLookup, self)._name_set()

2245 return self.__lookup

2246

2247

2248class Path:

2249 """

2250 A pathlib-compatible interface for zip files.

2251

2252 Consider a zip file with this structure::

2253

2254 .

2255 ├── a.txt

2256 └── b

2257 ├── c.txt

2258 └── d

2259 └── e.txt

2260

2261 >>> data = io.BytesIO()

2262 >>> zf = ZipFile(data, 'w')

2263 >>> zf.writestr('a.txt', 'content of a')

2264 >>> zf.writestr('b/c.txt', 'content of c')

2265 >>> zf.writestr('b/d/e.txt', 'content of e')

2266 >>> zf.filename = 'abcde.zip'

2267

2268 Path accepts the zipfile object itself or a filename

2269

2270 >>> root = Path(zf)

2271

2272 From there, several path operations are available.

2273

2274 Directory iteration (including the zip file itself):

2275

2276 >>> a, b = root.iterdir()

2277 >>> a

2278 Path('abcde.zip', 'a.txt')

2279 >>> b

2280 Path('abcde.zip', 'b/')

2281

2282 name property:

2283

2284 >>> b.name

2285 'b'

2286

2287 join with divide operator:

2288

2289 >>> c = b / 'c.txt'

2290 >>> c

2291 Path('abcde.zip', 'b/c.txt')

2292 >>> c.name

2293 'c.txt'

2294

2295 Read text:

2296

2297 >>> c.read_text()

2298 'content of c'

2299

2300 existence:

2301

2302 >>> c.exists()

2303 True

2304 >>> (b / 'missing.txt').exists()

2305 False

2306

2307 Coercion to string:

2308

2309 >>> str(c)

2310 'abcde.zip/b/c.txt'

2311 """

2312

2313 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"

2314

2315 def __init__(self, root, at=""):

2316 self.root = FastLookup.make(root)

2317 self.at = at

2318

2319 @property

2320 def open(self):

2321 return functools.partial(self.root.open, self.at)

2322

2323 @property

2324 def name(self):

2325 return posixpath.basename(self.at.rstrip("/"))

2326

2327 def read_text(self, *args, **kwargs):

2328 with self.open() as strm:

2329 return io.TextIOWrapper(strm, *args, **kwargs).read()

2330

2331 def read_bytes(self):

2332 with self.open() as strm:

2333 return strm.read()

2334

2335 def _is_child(self, path):

2336 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")

2337

2338 def _next(self, at):

2339 return Path(self.root, at)

2340

2341 def is_dir(self):

2342 return not self.at or self.at.endswith("/")

2343

2344 def is_file(self):

2345 return not self.is_dir()

2346

2347 def exists(self):

2348 return self.at in self.root._name_set()

2349

2350 def iterdir(self):

2351 if not self.is_dir():

2352 raise ValueError("Can't listdir a file")

2353 subs = map(self._next, self.root.namelist())

2354 return filter(self._is_child, subs)

2355

2356 def __str__(self):

2357 return posixpath.join(self.root.filename, self.at)

2358

2359 def __repr__(self):

2360 return self.__repr.format(self=self)

2361

2362 def joinpath(self, add):

2363 next = posixpath.join(self.at, add)

2364 return self._next(self.root.resolve_dir(next))

2365

2366 __truediv__ = joinpath

2367

2368 @property

2369 def parent(self):

2370 parent_at = posixpath.dirname(self.at.rstrip('/'))

2371 if parent_at:

2372 parent_at += '/'

2373 return self._next(parent_at)

2374

2375

2376def main(args=None):

2377 import argparse

2378

2379 description = 'A simple command-line interface for zipfile module.'

2380 parser = argparse.ArgumentParser(description=description)

2381 group = parser.add_mutually_exclusive_group(required=True)

2382 group.add_argument('-l', '--list', metavar='<zipfile>',

2383 help='Show listing of a zipfile')

2384 group.add_argument('-e', '--extract', nargs=2,

2385 metavar=('<zipfile>', '<output_dir>'),

2386 help='Extract zipfile into target dir')

2387 group.add_argument('-c', '--create', nargs='+',

2388 metavar=('<name>', '<file>'),

2389 help='Create zipfile from sources')

2390 group.add_argument('-t', '--test', metavar='<zipfile>',

2391 help='Test if a zipfile is valid')

2392 args = parser.parse_args(args)

2393

2394 if args.test is not None:

2395 src = args.test

2396 with ZipFile(src, 'r') as zf:

2397 badfile = zf.testzip()

2398 if badfile:

2399 print("The following enclosed file is corrupted: {!r}".format(badfile))

2400 print("Done testing")

2401

2402 elif args.list is not None:

2403 src = args.list

2404 with ZipFile(src, 'r') as zf:

2405 zf.printdir()

2406

2407 elif args.extract is not None:

2408 src, curdir = args.extract

2409 with ZipFile(src, 'r') as zf:

2410 zf.extractall(curdir)

2411

2412 elif args.create is not None:

2413 zip_name = args.create.pop(0)

2414 files = args.create

2415

2416 def addToZip(zf, path, zippath):

2417 if os.path.isfile(path):

2418 zf.write(path, zippath, ZIP_DEFLATED)

2419 elif os.path.isdir(path):

2420 if zippath:

2421 zf.write(path, zippath)

2422 for nm in sorted(os.listdir(path)):

2423 addToZip(zf,

2424 os.path.join(path, nm), os.path.join(zippath, nm))

2425 # else: ignore

2426

2427 with ZipFile(zip_name, 'w') as zf:

2428 for path in files:

2429 zippath = os.path.basename(path)

2430 if not zippath:

2431 zippath = os.path.basename(os.path.dirname(path))

2432 if zippath in ('', os.curdir, os.pardir):

2433 zippath = ''

2434 addToZip(zf, path, zippath)

2435

2436

2437if __name__ == "__main__":

2438 main()