Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pkg_resources/

1#!/usr/bin/env python3

2#-------------------------------------------------------------------

3# tarfile.py

4#-------------------------------------------------------------------

8# Permission is hereby granted, free of charge, to any person

9# obtaining a copy of this software and associated documentation

10# files (the "Software"), to deal in the Software without

11# restriction, including without limitation the rights to use,

12# copy, modify, merge, publish, distribute, sublicense, and/or sell

13# copies of the Software, and to permit persons to whom the

14# Software is furnished to do so, subject to the following

15# conditions:

16#

17# The above copyright notice and this permission notice shall be

18# included in all copies or substantial portions of the Software.

19#

20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES

22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

27# OTHER DEALINGS IN THE SOFTWARE.

28#

29"""Read from and write to tar format archives.

30"""

32version = "0.9.0"

33__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"

34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."

36#---------

37# Imports

38#---------

39from builtins import open as bltn_open

40import sys

41import os

42import io

43import shutil

44import stat

45import time

46import struct

47import copy

48import re

49import warnings

51try:

52 import pwd

53except ImportError:

54 pwd = None

55try:

56 import grp

57except ImportError:

58 grp = None

60# os.symlink on Windows prior to 6.0 raises NotImplementedError

61# OSError (winerror=1314) will be raised if the caller does not hold the

62# SeCreateSymbolicLinkPrivilege privilege

63symlink_exception = (AttributeError, NotImplementedError, OSError)

65# from tarfile import *

66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",

67 "CompressionError", "StreamError", "ExtractError", "HeaderError",

68 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",

69 "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",

70 "tar_filter", "FilterError", "AbsoluteLinkError",

71 "OutsideDestinationError", "SpecialFileError", "AbsolutePathError",

72 "LinkOutsideDestinationError"]

75#---------------------------------------------------------

76# tar constants

77#---------------------------------------------------------

78NUL = b"\0" # the null character

79BLOCKSIZE = 512 # length of processing blocks

80RECORDSIZE = BLOCKSIZE * 20 # length of records

81GNU_MAGIC = b"ustar \0" # magic gnu tar string

82POSIX_MAGIC = b"ustar\x0000" # magic posix tar string

84LENGTH_NAME = 100 # maximum length of a filename

85LENGTH_LINK = 100 # maximum length of a linkname

86LENGTH_PREFIX = 155 # maximum length of the prefix field

88REGTYPE = b"0" # regular file

89AREGTYPE = b"\0" # regular file

90LNKTYPE = b"1" # link (inside tarfile)

91SYMTYPE = b"2" # symbolic link

92CHRTYPE = b"3" # character special device

93BLKTYPE = b"4" # block special device

94DIRTYPE = b"5" # directory

95FIFOTYPE = b"6" # fifo special device

96CONTTYPE = b"7" # contiguous file

98GNUTYPE_LONGNAME = b"L" # GNU tar longname

99GNUTYPE_LONGLINK = b"K" # GNU tar longlink

100GNUTYPE_SPARSE = b"S" # GNU tar sparse file

101

102XHDTYPE = b"x" # POSIX.1-2001 extended header

103XGLTYPE = b"g" # POSIX.1-2001 global header

104SOLARIS_XHDTYPE = b"X" # Solaris extended header

105

106USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format

107GNU_FORMAT = 1 # GNU tar format

108PAX_FORMAT = 2 # POSIX.1-2001 (pax) format

109DEFAULT_FORMAT = PAX_FORMAT

110

111#---------------------------------------------------------

112# tarfile constants

113#---------------------------------------------------------

114# File types that tarfile supports:

115SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,

116 SYMTYPE, DIRTYPE, FIFOTYPE,

117 CONTTYPE, CHRTYPE, BLKTYPE,

118 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,

119 GNUTYPE_SPARSE)

120

121# File types that will be treated as a regular file.

122REGULAR_TYPES = (REGTYPE, AREGTYPE,

123 CONTTYPE, GNUTYPE_SPARSE)

124

125# File types that are part of the GNU tar format.

126GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,

127 GNUTYPE_SPARSE)

128

129# Fields from a pax header that override a TarInfo attribute.

130PAX_FIELDS = ("path", "linkpath", "size", "mtime",

131 "uid", "gid", "uname", "gname")

132

133# Fields from a pax header that are affected by hdrcharset.

134PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}

135

136# Fields in a pax header that are numbers, all other fields

137# are treated as strings.

138PAX_NUMBER_FIELDS = {

139 "atime": float,

140 "ctime": float,

141 "mtime": float,

142 "uid": int,

143 "gid": int,

144 "size": int

145}

146

147#---------------------------------------------------------

148# initialization

149#---------------------------------------------------------

150if os.name == "nt":

151 ENCODING = "utf-8"

152else:

153 ENCODING = sys.getfilesystemencoding()

154

155#---------------------------------------------------------

156# Some useful functions

157#---------------------------------------------------------

158

159def stn(s, length, encoding, errors):

160 """Convert a string to a null-terminated bytes object.

161 """

162 if s is None:

163 raise ValueError("metadata cannot contain None")

164 s = s.encode(encoding, errors)

165 return s[:length] + (length - len(s)) * NUL

166

167def nts(s, encoding, errors):

168 """Convert a null-terminated bytes object to a string.

169 """

170 p = s.find(b"\0")

171 if p != -1:

172 s = s[:p]

173 return s.decode(encoding, errors)

174

175def nti(s):

176 """Convert a number field to a python number.

177 """

178 # There are two possible encodings for a number field, see

179 # itn() below.

180 if s[0] in (0o200, 0o377):

181 n = 0

182 for i in range(len(s) - 1):

183 n <<= 8

184 n += s[i + 1]

185 if s[0] == 0o377:

186 n = -(256 ** (len(s) - 1) - n)

187 else:

188 try:

189 s = nts(s, "ascii", "strict")

190 n = int(s.strip() or "0", 8)

191 except ValueError:

192 raise InvalidHeaderError("invalid header")

193 return n

194

195def itn(n, digits=8, format=DEFAULT_FORMAT):

196 """Convert a python number to a number field.

197 """

198 # POSIX 1003.1-1988 requires numbers to be encoded as a string of

199 # octal digits followed by a null-byte, this allows values up to

200 # (8**(digits-1))-1. GNU tar allows storing numbers greater than

201 # that if necessary. A leading 0o200 or 0o377 byte indicate this

202 # particular encoding, the following digits-1 bytes are a big-endian

203 # base-256 representation. This allows values up to (256**(digits-1))-1.

204 # A 0o200 byte indicates a positive number, a 0o377 byte a negative

205 # number.

206 original_n = n

207 n = int(n)

208 if 0 <= n < 8 ** (digits - 1):

209 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL

210 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):

211 if n >= 0:

212 s = bytearray([0o200])

213 else:

214 s = bytearray([0o377])

215 n = 256 ** digits + n

216

217 for i in range(digits - 1):

218 s.insert(1, n & 0o377)

219 n >>= 8

220 else:

221 raise ValueError("overflow in number field")

222

223 return s

224

225def calc_chksums(buf):

226 """Calculate the checksum for a member's header by summing up all

227 characters except for the chksum field which is treated as if

228 it was filled with spaces. According to the GNU tar sources,

229 some tars (Sun and NeXT) calculate chksum with signed char,

230 which will be different if there are chars in the buffer with

231 the high bit set. So we calculate two checksums, unsigned and

232 signed.

233 """

234 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))

235 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))

236 return unsigned_chksum, signed_chksum

237

238def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):

239 """Copy length bytes from fileobj src to fileobj dst.

240 If length is None, copy the entire content.

241 """

242 bufsize = bufsize or 16 * 1024

243 if length == 0:

244 return

245 if length is None:

246 shutil.copyfileobj(src, dst, bufsize)

247 return

248

249 blocks, remainder = divmod(length, bufsize)

250 for b in range(blocks):

251 buf = src.read(bufsize)

252 if len(buf) < bufsize:

253 raise exception("unexpected end of data")

254 dst.write(buf)

255

256 if remainder != 0:

257 buf = src.read(remainder)

258 if len(buf) < remainder:

259 raise exception("unexpected end of data")

260 dst.write(buf)

261 return

262

263def _safe_print(s):

264 encoding = getattr(sys.stdout, 'encoding', None)

265 if encoding is not None:

266 s = s.encode(encoding, 'backslashreplace').decode(encoding)

267 print(s, end=' ')

268

269

270class TarError(Exception):

271 """Base exception."""

272 pass

273class ExtractError(TarError):

274 """General exception for extract errors."""

275 pass

276class ReadError(TarError):

277 """Exception for unreadable tar archives."""

278 pass

279class CompressionError(TarError):

280 """Exception for unavailable compression methods."""

281 pass

282class StreamError(TarError):

283 """Exception for unsupported operations on stream-like TarFiles."""

284 pass

285class HeaderError(TarError):

286 """Base exception for header errors."""

287 pass

288class EmptyHeaderError(HeaderError):

289 """Exception for empty headers."""

290 pass

291class TruncatedHeaderError(HeaderError):

292 """Exception for truncated headers."""

293 pass

294class EOFHeaderError(HeaderError):

295 """Exception for end of file headers."""

296 pass

297class InvalidHeaderError(HeaderError):

298 """Exception for invalid headers."""

299 pass

300class SubsequentHeaderError(HeaderError):

301 """Exception for missing and invalid extended headers."""

302 pass

303

304#---------------------------

305# internal stream interface

306#---------------------------

307class _LowLevelFile:

308 """Low-level file object. Supports reading and writing.

309 It is used instead of a regular file object for streaming

310 access.

311 """

312

313 def __init__(self, name, mode):

314 mode = {

315 "r": os.O_RDONLY,

316 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,

317 }[mode]

318 if hasattr(os, "O_BINARY"):

319 mode |= os.O_BINARY

320 self.fd = os.open(name, mode, 0o666)

321

322 def close(self):

323 os.close(self.fd)

324

325 def read(self, size):

326 return os.read(self.fd, size)

327

328 def write(self, s):

329 os.write(self.fd, s)

330

331class _Stream:

332 """Class that serves as an adapter between TarFile and

333 a stream-like object. The stream-like object only

334 needs to have a read() or write() method that works with bytes,

335 and the method is accessed blockwise.

336 Use of gzip or bzip2 compression is possible.

337 A stream-like object could be for example: sys.stdin.buffer,

338 sys.stdout.buffer, a socket, a tape device etc.

339

340 _Stream is intended to be used only internally.

341 """

342

343 def __init__(self, name, mode, comptype, fileobj, bufsize,

344 compresslevel):

345 """Construct a _Stream object.

346 """

347 self._extfileobj = True

348 if fileobj is None:

349 fileobj = _LowLevelFile(name, mode)

350 self._extfileobj = False

351

352 if comptype == '*':

353 # Enable transparent compression detection for the

354 # stream interface

355 fileobj = _StreamProxy(fileobj)

356 comptype = fileobj.getcomptype()

357

358 self.name = name or ""

359 self.mode = mode

360 self.comptype = comptype

361 self.fileobj = fileobj

362 self.bufsize = bufsize

363 self.buf = b""

364 self.pos = 0

365 self.closed = False

366

367 try:

368 if comptype == "gz":

369 try:

370 import zlib

371 except ImportError:

372 raise CompressionError("zlib module is not available") from None

373 self.zlib = zlib

374 self.crc = zlib.crc32(b"")

375 if mode == "r":

376 self.exception = zlib.error

377 self._init_read_gz()

378 else:

379 self._init_write_gz(compresslevel)

380

381 elif comptype == "bz2":

382 try:

383 import bz2

384 except ImportError:

385 raise CompressionError("bz2 module is not available") from None

386 if mode == "r":

387 self.dbuf = b""

388 self.cmp = bz2.BZ2Decompressor()

389 self.exception = OSError

390 else:

391 self.cmp = bz2.BZ2Compressor(compresslevel)

392

393 elif comptype == "xz":

394 try:

395 import lzma

396 except ImportError:

397 raise CompressionError("lzma module is not available") from None

398 if mode == "r":

399 self.dbuf = b""

400 self.cmp = lzma.LZMADecompressor()

401 self.exception = lzma.LZMAError

402 else:

403 self.cmp = lzma.LZMACompressor()

404

405 elif comptype != "tar":

406 raise CompressionError("unknown compression type %r" % comptype)

407

408 except:

409 if not self._extfileobj:

410 self.fileobj.close()

411 self.closed = True

412 raise

413

414 def __del__(self):

415 if hasattr(self, "closed") and not self.closed:

416 self.close()

417

418 def _init_write_gz(self, compresslevel):

419 """Initialize for writing with gzip compression.

420 """

421 self.cmp = self.zlib.compressobj(compresslevel,

422 self.zlib.DEFLATED,

423 -self.zlib.MAX_WBITS,

424 self.zlib.DEF_MEM_LEVEL,

425 0)

426 timestamp = struct.pack("<L", int(time.time()))

427 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")

428 if self.name.endswith(".gz"):

429 self.name = self.name[:-3]

430 # Honor "directory components removed" from RFC1952

431 self.name = os.path.basename(self.name)

432 # RFC1952 says we must use ISO-8859-1 for the FNAME field.

433 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)

434

435 def write(self, s):

436 """Write string s to the stream.

437 """

438 if self.comptype == "gz":

439 self.crc = self.zlib.crc32(s, self.crc)

440 self.pos += len(s)

441 if self.comptype != "tar":

442 s = self.cmp.compress(s)

443 self.__write(s)

444

445 def __write(self, s):

446 """Write string s to the stream if a whole new block

447 is ready to be written.

448 """

449 self.buf += s

450 while len(self.buf) > self.bufsize:

451 self.fileobj.write(self.buf[:self.bufsize])

452 self.buf = self.buf[self.bufsize:]

453

454 def close(self):

455 """Close the _Stream object. No operation should be

456 done on it afterwards.

457 """

458 if self.closed:

459 return

460

461 self.closed = True

462 try:

463 if self.mode == "w" and self.comptype != "tar":

464 self.buf += self.cmp.flush()

465

466 if self.mode == "w" and self.buf:

467 self.fileobj.write(self.buf)

468 self.buf = b""

469 if self.comptype == "gz":

470 self.fileobj.write(struct.pack("<L", self.crc))

471 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))

472 finally:

473 if not self._extfileobj:

474 self.fileobj.close()

475

476 def _init_read_gz(self):

477 """Initialize for reading a gzip compressed fileobj.

478 """

479 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)

480 self.dbuf = b""

481

482 # taken from gzip.GzipFile with some alterations

483 if self.__read(2) != b"\037\213":

484 raise ReadError("not a gzip file")

485 if self.__read(1) != b"\010":

486 raise CompressionError("unsupported compression method")

487

488 flag = ord(self.__read(1))

489 self.__read(6)

490

491 if flag & 4:

492 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))

493 self.read(xlen)

494 if flag & 8:

495 while True:

496 s = self.__read(1)

497 if not s or s == NUL:

498 break

499 if flag & 16:

500 while True:

501 s = self.__read(1)

502 if not s or s == NUL:

503 break

504 if flag & 2:

505 self.__read(2)

506

507 def tell(self):

508 """Return the stream's file pointer position.

509 """

510 return self.pos

511

512 def seek(self, pos=0):

513 """Set the stream's file pointer to pos. Negative seeking

514 is forbidden.

515 """

516 if pos - self.pos >= 0:

517 blocks, remainder = divmod(pos - self.pos, self.bufsize)

518 for i in range(blocks):

519 self.read(self.bufsize)

520 self.read(remainder)

521 else:

522 raise StreamError("seeking backwards is not allowed")

523 return self.pos

524

525 def read(self, size):

526 """Return the next size number of bytes from the stream."""

527 assert size is not None

528 buf = self._read(size)

529 self.pos += len(buf)

530 return buf

531

532 def _read(self, size):

533 """Return size bytes from the stream.

534 """

535 if self.comptype == "tar":

536 return self.__read(size)

537

538 c = len(self.dbuf)

539 t = [self.dbuf]

540 while c < size:

541 # Skip underlying buffer to avoid unaligned double buffering.

542 if self.buf:

543 buf = self.buf

544 self.buf = b""

545 else:

546 buf = self.fileobj.read(self.bufsize)

547 if not buf:

548 break

549 try:

550 buf = self.cmp.decompress(buf)

551 except self.exception as e:

552 raise ReadError("invalid compressed data") from e

553 t.append(buf)

554 c += len(buf)

555 t = b"".join(t)

556 self.dbuf = t[size:]

557 return t[:size]

558

559 def __read(self, size):

560 """Return size bytes from stream. If internal buffer is empty,

561 read another block from the stream.

562 """

563 c = len(self.buf)

564 t = [self.buf]

565 while c < size:

566 buf = self.fileobj.read(self.bufsize)

567 if not buf:

568 break

569 t.append(buf)

570 c += len(buf)

571 t = b"".join(t)

572 self.buf = t[size:]

573 return t[:size]

574# class _Stream

575

576class _StreamProxy(object):

577 """Small proxy class that enables transparent compression

578 detection for the Stream interface (mode 'r|*').

579 """

580

581 def __init__(self, fileobj):

582 self.fileobj = fileobj

583 self.buf = self.fileobj.read(BLOCKSIZE)

584

585 def read(self, size):

586 self.read = self.fileobj.read

587 return self.buf

588

589 def getcomptype(self):

590 if self.buf.startswith(b"\x1f\x8b\x08"):

591 return "gz"

592 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":

593 return "bz2"

594 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):

595 return "xz"

596 else:

597 return "tar"

598

599 def close(self):

600 self.fileobj.close()

601# class StreamProxy

602

603#------------------------

604# Extraction file object

605#------------------------

606class _FileInFile(object):

607 """A thin wrapper around an existing file object that

608 provides a part of its data as an individual file

609 object.

610 """

611

612 def __init__(self, fileobj, offset, size, name, blockinfo=None):

613 self.fileobj = fileobj

614 self.offset = offset

615 self.size = size

616 self.position = 0

617 self.name = name

618 self.closed = False

619

620 if blockinfo is None:

621 blockinfo = [(0, size)]

622

623 # Construct a map with data and zero blocks.

624 self.map_index = 0

625 self.map = []

626 lastpos = 0

627 realpos = self.offset

628 for offset, size in blockinfo:

629 if offset > lastpos:

630 self.map.append((False, lastpos, offset, None))

631 self.map.append((True, offset, offset + size, realpos))

632 realpos += size

633 lastpos = offset + size

634 if lastpos < self.size:

635 self.map.append((False, lastpos, self.size, None))

636

637 def flush(self):

638 pass

639

640 def readable(self):

641 return True

642

643 def writable(self):

644 return False

645

646 def seekable(self):

647 return self.fileobj.seekable()

648

649 def tell(self):

650 """Return the current file position.

651 """

652 return self.position

653

654 def seek(self, position, whence=io.SEEK_SET):

655 """Seek to a position in the file.

656 """

657 if whence == io.SEEK_SET:

658 self.position = min(max(position, 0), self.size)

659 elif whence == io.SEEK_CUR:

660 if position < 0:

661 self.position = max(self.position + position, 0)

662 else:

663 self.position = min(self.position + position, self.size)

664 elif whence == io.SEEK_END:

665 self.position = max(min(self.size + position, self.size), 0)

666 else:

667 raise ValueError("Invalid argument")

668 return self.position

669

670 def read(self, size=None):

671 """Read data from the file.

672 """

673 if size is None:

674 size = self.size - self.position

675 else:

676 size = min(size, self.size - self.position)

677

678 buf = b""

679 while size > 0:

680 while True:

681 data, start, stop, offset = self.map[self.map_index]

682 if start <= self.position < stop:

683 break

684 else:

685 self.map_index += 1

686 if self.map_index == len(self.map):

687 self.map_index = 0

688 length = min(size, stop - self.position)

689 if data:

690 self.fileobj.seek(offset + (self.position - start))

691 b = self.fileobj.read(length)

692 if len(b) != length:

693 raise ReadError("unexpected end of data")

694 buf += b

695 else:

696 buf += NUL * length

697 size -= length

698 self.position += length

699 return buf

700

701 def readinto(self, b):

702 buf = self.read(len(b))

703 b[:len(buf)] = buf

704 return len(buf)

705

706 def close(self):

707 self.closed = True

708#class _FileInFile

709

710class ExFileObject(io.BufferedReader):

711

712 def __init__(self, tarfile, tarinfo):

713 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,

714 tarinfo.size, tarinfo.name, tarinfo.sparse)

715 super().__init__(fileobj)

716#class ExFileObject

717

718

719#-----------------------------

720# extraction filters (PEP 706)

721#-----------------------------

722

723class FilterError(TarError):

724 pass

725

726class AbsolutePathError(FilterError):

727 def __init__(self, tarinfo):

728 self.tarinfo = tarinfo

729 super().__init__(f'member {tarinfo.name!r} has an absolute path')

730

731class OutsideDestinationError(FilterError):

732 def __init__(self, tarinfo, path):

733 self.tarinfo = tarinfo

734 self._path = path

735 super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '

736 + 'which is outside the destination')

737

738class SpecialFileError(FilterError):

739 def __init__(self, tarinfo):

740 self.tarinfo = tarinfo

741 super().__init__(f'{tarinfo.name!r} is a special file')

742

743class AbsoluteLinkError(FilterError):

744 def __init__(self, tarinfo):

745 self.tarinfo = tarinfo

746 super().__init__(f'{tarinfo.name!r} is a link to an absolute path')

747

748class LinkOutsideDestinationError(FilterError):

749 def __init__(self, tarinfo, path):

750 self.tarinfo = tarinfo

751 self._path = path

752 super().__init__(f'{tarinfo.name!r} would link to {path!r}, '

753 + 'which is outside the destination')

754

755def _get_filtered_attrs(member, dest_path, for_data=True):

756 new_attrs = {}

757 name = member.name

758 dest_path = os.path.realpath(dest_path)

759 # Strip leading / (tar's directory separator) from filenames.

760 # Include os.sep (target OS directory separator) as well.

761 if name.startswith(('/', os.sep)):

762 name = new_attrs['name'] = member.path.lstrip('/' + os.sep)

763 if os.path.isabs(name):

764 # Path is absolute even after stripping.

765 # For example, 'C:/foo' on Windows.

766 raise AbsolutePathError(member)

767 # Ensure we stay in the destination

768 target_path = os.path.realpath(os.path.join(dest_path, name))

769 if os.path.commonpath([target_path, dest_path]) != dest_path:

770 raise OutsideDestinationError(member, target_path)

771 # Limit permissions (no high bits, and go-w)

772 mode = member.mode

773 if mode is not None:

774 # Strip high bits & group/other write bits

775 mode = mode & 0o755

776 if for_data:

777 # For data, handle permissions & file types

778 if member.isreg() or member.islnk():

779 if not mode & 0o100:

780 # Clear executable bits if not executable by user

781 mode &= ~0o111

782 # Ensure owner can read & write

783 mode |= 0o600

784 elif member.isdir() or member.issym():

785 # Ignore mode for directories & symlinks

786 mode = None

787 else:

788 # Reject special files

789 raise SpecialFileError(member)

790 if mode != member.mode:

791 new_attrs['mode'] = mode

792 if for_data:

793 # Ignore ownership for 'data'

794 if member.uid is not None:

795 new_attrs['uid'] = None

796 if member.gid is not None:

797 new_attrs['gid'] = None

798 if member.uname is not None:

799 new_attrs['uname'] = None

800 if member.gname is not None:

801 new_attrs['gname'] = None

802 # Check link destination for 'data'

803 if member.islnk() or member.issym():

804 if os.path.isabs(member.linkname):

805 raise AbsoluteLinkError(member)

806 if member.issym():

807 target_path = os.path.join(dest_path,

808 os.path.dirname(name),

809 member.linkname)

810 else:

811 target_path = os.path.join(dest_path,

812 member.linkname)

813 target_path = os.path.realpath(target_path)

814 if os.path.commonpath([target_path, dest_path]) != dest_path:

815 raise LinkOutsideDestinationError(member, target_path)

816 return new_attrs

817

818def fully_trusted_filter(member, dest_path):

819 return member

820

821def tar_filter(member, dest_path):

822 new_attrs = _get_filtered_attrs(member, dest_path, False)

823 if new_attrs:

824 return member.replace(**new_attrs, deep=False)

825 return member

826

827def data_filter(member, dest_path):

828 new_attrs = _get_filtered_attrs(member, dest_path, True)

829 if new_attrs:

830 return member.replace(**new_attrs, deep=False)

831 return member

832

833_NAMED_FILTERS = {

834 "fully_trusted": fully_trusted_filter,

835 "tar": tar_filter,

836 "data": data_filter,

837}

838

839#------------------

840# Exported Classes

841#------------------

842

843# Sentinel for replace() defaults, meaning "don't change the attribute"

844_KEEP = object()

845

846class TarInfo(object):

847 """Informational class which holds the details about an

848 archive member given by a tar header block.

849 TarInfo objects are returned by TarFile.getmember(),

850 TarFile.getmembers() and TarFile.gettarinfo() and are

851 usually created internally.

852 """

853

854 __slots__ = dict(

855 name = 'Name of the archive member.',

856 mode = 'Permission bits.',

857 uid = 'User ID of the user who originally stored this member.',

858 gid = 'Group ID of the user who originally stored this member.',

859 size = 'Size in bytes.',

860 mtime = 'Time of last modification.',

861 chksum = 'Header checksum.',

862 type = ('File type. type is usually one of these constants: '

863 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '

864 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),

865 linkname = ('Name of the target file name, which is only present '

866 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),

867 uname = 'User name.',

868 gname = 'Group name.',

869 devmajor = 'Device major number.',

870 devminor = 'Device minor number.',

871 offset = 'The tar header starts here.',

872 offset_data = "The file's data starts here.",

873 pax_headers = ('A dictionary containing key-value pairs of an '

874 'associated pax extended header.'),

875 sparse = 'Sparse member information.',

876 tarfile = None,

877 _sparse_structs = None,

878 _link_target = None,

879 )

880

881 def __init__(self, name=""):

882 """Construct a TarInfo object. name is the optional name

883 of the member.

884 """

885 self.name = name # member name

886 self.mode = 0o644 # file permissions

887 self.uid = 0 # user id

888 self.gid = 0 # group id

889 self.size = 0 # file size

890 self.mtime = 0 # modification time

891 self.chksum = 0 # header checksum

892 self.type = REGTYPE # member type

893 self.linkname = "" # link name

894 self.uname = "" # user name

895 self.gname = "" # group name

896 self.devmajor = 0 # device major number

897 self.devminor = 0 # device minor number

898

899 self.offset = 0 # the tar header starts here

900 self.offset_data = 0 # the file's data starts here

901

902 self.sparse = None # sparse member information

903 self.pax_headers = {} # pax header information

904

905 @property

906 def path(self):

907 'In pax headers, "name" is called "path".'

908 return self.name

909

910 @path.setter

911 def path(self, name):

912 self.name = name

913

914 @property

915 def linkpath(self):

916 'In pax headers, "linkname" is called "linkpath".'

917 return self.linkname

918

919 @linkpath.setter

920 def linkpath(self, linkname):

921 self.linkname = linkname

922

923 def __repr__(self):

924 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))

925

926 def replace(self, *,

927 name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,

928 uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,

929 deep=True, _KEEP=_KEEP):

930 """Return a deep copy of self with the given attributes replaced.

931 """

932 if deep:

933 result = copy.deepcopy(self)

934 else:

935 result = copy.copy(self)

936 if name is not _KEEP:

937 result.name = name

938 if mtime is not _KEEP:

939 result.mtime = mtime

940 if mode is not _KEEP:

941 result.mode = mode

942 if linkname is not _KEEP:

943 result.linkname = linkname

944 if uid is not _KEEP:

945 result.uid = uid

946 if gid is not _KEEP:

947 result.gid = gid

948 if uname is not _KEEP:

949 result.uname = uname

950 if gname is not _KEEP:

951 result.gname = gname

952 return result

953

954 def get_info(self):

955 """Return the TarInfo's attributes as a dictionary.

956 """

957 if self.mode is None:

958 mode = None

959 else:

960 mode = self.mode & 0o7777

961 info = {

962 "name": self.name,

963 "mode": mode,

964 "uid": self.uid,

965 "gid": self.gid,

966 "size": self.size,

967 "mtime": self.mtime,

968 "chksum": self.chksum,

969 "type": self.type,

970 "linkname": self.linkname,

971 "uname": self.uname,

972 "gname": self.gname,

973 "devmajor": self.devmajor,

974 "devminor": self.devminor

975 }

976

977 if info["type"] == DIRTYPE and not info["name"].endswith("/"):

978 info["name"] += "/"

979

980 return info

981

982 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):

983 """Return a tar header as a string of 512 byte blocks.

984 """

985 info = self.get_info()

986 for name, value in info.items():

987 if value is None:

988 raise ValueError("%s may not be None" % name)

989

990 if format == USTAR_FORMAT:

991 return self.create_ustar_header(info, encoding, errors)

992 elif format == GNU_FORMAT:

993 return self.create_gnu_header(info, encoding, errors)

994 elif format == PAX_FORMAT:

995 return self.create_pax_header(info, encoding)

996 else:

997 raise ValueError("invalid format")

998

999 def create_ustar_header(self, info, encoding, errors):

1000 """Return the object as a ustar header block.

1001 """

1002 info["magic"] = POSIX_MAGIC

1003

1004 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:

1005 raise ValueError("linkname is too long")

1006

1007 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:

1008 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)

1009

1010 return self._create_header(info, USTAR_FORMAT, encoding, errors)

1011

1012 def create_gnu_header(self, info, encoding, errors):

1013 """Return the object as a GNU header block sequence.

1014 """

1015 info["magic"] = GNU_MAGIC

1016

1017 buf = b""

1018 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:

1019 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)

1020

1021 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:

1022 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)

1023

1024 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)

1025

1026 def create_pax_header(self, info, encoding):

1027 """Return the object as a ustar header block. If it cannot be

1028 represented this way, prepend a pax extended header sequence

1029 with supplement information.

1030 """

1031 info["magic"] = POSIX_MAGIC

1032 pax_headers = self.pax_headers.copy()

1033

1034 # Test string fields for values that exceed the field length or cannot

1035 # be represented in ASCII encoding.

1036 for name, hname, length in (

1037 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),

1038 ("uname", "uname", 32), ("gname", "gname", 32)):

1039

1040 if hname in pax_headers:

1041 # The pax header has priority.

1042 continue

1043

1044 # Try to encode the string as ASCII.

1045 try:

1046 info[name].encode("ascii", "strict")

1047 except UnicodeEncodeError:

1048 pax_headers[hname] = info[name]

1049 continue

1050

1051 if len(info[name]) > length:

1052 pax_headers[hname] = info[name]

1053

1054 # Test number fields for values that exceed the field limit or values

1055 # that like to be stored as float.

1056 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):

1057 needs_pax = False

1058

1059 val = info[name]

1060 val_is_float = isinstance(val, float)

1061 val_int = round(val) if val_is_float else val

1062 if not 0 <= val_int < 8 ** (digits - 1):

1063 # Avoid overflow.

1064 info[name] = 0

1065 needs_pax = True

1066 elif val_is_float:

1067 # Put rounded value in ustar header, and full

1068 # precision value in pax header.

1069 info[name] = val_int

1070 needs_pax = True

1071

1072 # The existing pax header has priority.

1073 if needs_pax and name not in pax_headers:

1074 pax_headers[name] = str(val)

1075

1076 # Create a pax extended header if necessary.

1077 if pax_headers:

1078 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)

1079 else:

1080 buf = b""

1081

1082 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")

1083

1084 @classmethod

1085 def create_pax_global_header(cls, pax_headers):

1086 """Return the object as a pax global header block sequence.

1087 """

1088 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")

1089

1090 def _posix_split_name(self, name, encoding, errors):

1091 """Split a name longer than 100 chars into a prefix

1092 and a name part.

1093 """

1094 components = name.split("/")

1095 for i in range(1, len(components)):

1096 prefix = "/".join(components[:i])

1097 name = "/".join(components[i:])

1098 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \

1099 len(name.encode(encoding, errors)) <= LENGTH_NAME:

1100 break

1101 else:

1102 raise ValueError("name is too long")

1103

1104 return prefix, name

1105

1106 @staticmethod

1107 def _create_header(info, format, encoding, errors):

1108 """Return a header block. info is a dictionary with file

1109 information, format must be one of the *_FORMAT constants.

1110 """

1111 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)

1112 if has_device_fields:

1113 devmajor = itn(info.get("devmajor", 0), 8, format)

1114 devminor = itn(info.get("devminor", 0), 8, format)

1115 else:

1116 devmajor = stn("", 8, encoding, errors)

1117 devminor = stn("", 8, encoding, errors)

1118

1119 # None values in metadata should cause ValueError.

1120 # itn()/stn() do this for all fields except type.

1121 filetype = info.get("type", REGTYPE)

1122 if filetype is None:

1123 raise ValueError("TarInfo.type must not be None")

1124

1125 parts = [

1126 stn(info.get("name", ""), 100, encoding, errors),

1127 itn(info.get("mode", 0) & 0o7777, 8, format),

1128 itn(info.get("uid", 0), 8, format),

1129 itn(info.get("gid", 0), 8, format),

1130 itn(info.get("size", 0), 12, format),

1131 itn(info.get("mtime", 0), 12, format),

1132 b" ", # checksum field

1133 filetype,

1134 stn(info.get("linkname", ""), 100, encoding, errors),

1135 info.get("magic", POSIX_MAGIC),

1136 stn(info.get("uname", ""), 32, encoding, errors),

1137 stn(info.get("gname", ""), 32, encoding, errors),

1138 devmajor,

1139 devminor,

1140 stn(info.get("prefix", ""), 155, encoding, errors)

1141 ]

1142

1143 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))

1144 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]

1145 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]

1146 return buf

1147

1148 @staticmethod

1149 def _create_payload(payload):

1150 """Return the string payload filled with zero bytes

1151 up to the next 512 byte border.

1152 """

1153 blocks, remainder = divmod(len(payload), BLOCKSIZE)

1154 if remainder > 0:

1155 payload += (BLOCKSIZE - remainder) * NUL

1156 return payload

1157

1158 @classmethod

1159 def _create_gnu_long_header(cls, name, type, encoding, errors):

1160 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence

1161 for name.

1162 """

1163 name = name.encode(encoding, errors) + NUL

1164

1165 info = {}

1166 info["name"] = "././@LongLink"

1167 info["type"] = type

1168 info["size"] = len(name)

1169 info["magic"] = GNU_MAGIC

1170

1171 # create extended header + name blocks.

1172 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \

1173 cls._create_payload(name)

1174

1175 @classmethod

1176 def _create_pax_generic_header(cls, pax_headers, type, encoding):

1177 """Return a POSIX.1-2008 extended or global header sequence

1178 that contains a list of keyword, value pairs. The values

1179 must be strings.

1180 """

1181 # Check if one of the fields contains surrogate characters and thereby

1182 # forces hdrcharset=BINARY, see _proc_pax() for more information.

1183 binary = False

1184 for keyword, value in pax_headers.items():

1185 try:

1186 value.encode("utf-8", "strict")

1187 except UnicodeEncodeError:

1188 binary = True

1189 break

1190

1191 records = b""

1192 if binary:

1193 # Put the hdrcharset field at the beginning of the header.

1194 records += b"21 hdrcharset=BINARY\n"

1195

1196 for keyword, value in pax_headers.items():

1197 keyword = keyword.encode("utf-8")

1198 if binary:

1199 # Try to restore the original byte representation of `value'.

1200 # Needless to say, that the encoding must match the string.

1201 value = value.encode(encoding, "surrogateescape")

1202 else:

1203 value = value.encode("utf-8")

1204

1205 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'

1206 n = p = 0

1207 while True:

1208 n = l + len(str(p))

1209 if n == p:

1210 break

1211 p = n

1212 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"

1213

1214 # We use a hardcoded "././@PaxHeader" name like star does

1215 # instead of the one that POSIX recommends.

1216 info = {}

1217 info["name"] = "././@PaxHeader"

1218 info["type"] = type

1219 info["size"] = len(records)

1220 info["magic"] = POSIX_MAGIC

1221

1222 # Create pax header + record blocks.

1223 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \

1224 cls._create_payload(records)

1225

1226 @classmethod

1227 def frombuf(cls, buf, encoding, errors):

1228 """Construct a TarInfo object from a 512 byte bytes object.

1229 """

1230 if len(buf) == 0:

1231 raise EmptyHeaderError("empty header")

1232 if len(buf) != BLOCKSIZE:

1233 raise TruncatedHeaderError("truncated header")

1234 if buf.count(NUL) == BLOCKSIZE:

1235 raise EOFHeaderError("end of file header")

1236

1237 chksum = nti(buf[148:156])

1238 if chksum not in calc_chksums(buf):

1239 raise InvalidHeaderError("bad checksum")

1240

1241 obj = cls()

1242 obj.name = nts(buf[0:100], encoding, errors)

1243 obj.mode = nti(buf[100:108])

1244 obj.uid = nti(buf[108:116])

1245 obj.gid = nti(buf[116:124])

1246 obj.size = nti(buf[124:136])

1247 obj.mtime = nti(buf[136:148])

1248 obj.chksum = chksum

1249 obj.type = buf[156:157]

1250 obj.linkname = nts(buf[157:257], encoding, errors)

1251 obj.uname = nts(buf[265:297], encoding, errors)

1252 obj.gname = nts(buf[297:329], encoding, errors)

1253 obj.devmajor = nti(buf[329:337])

1254 obj.devminor = nti(buf[337:345])

1255 prefix = nts(buf[345:500], encoding, errors)

1256

1257 # Old V7 tar format represents a directory as a regular

1258 # file with a trailing slash.

1259 if obj.type == AREGTYPE and obj.name.endswith("/"):

1260 obj.type = DIRTYPE

1261

1262 # The old GNU sparse format occupies some of the unused

1263 # space in the buffer for up to 4 sparse structures.

1264 # Save them for later processing in _proc_sparse().

1265 if obj.type == GNUTYPE_SPARSE:

1266 pos = 386

1267 structs = []

1268 for i in range(4):

1269 try:

1270 offset = nti(buf[pos:pos + 12])

1271 numbytes = nti(buf[pos + 12:pos + 24])

1272 except ValueError:

1273 break

1274 structs.append((offset, numbytes))

1275 pos += 24

1276 isextended = bool(buf[482])

1277 origsize = nti(buf[483:495])

1278 obj._sparse_structs = (structs, isextended, origsize)

1279

1280 # Remove redundant slashes from directories.

1281 if obj.isdir():

1282 obj.name = obj.name.rstrip("/")

1283

1284 # Reconstruct a ustar longname.

1285 if prefix and obj.type not in GNU_TYPES:

1286 obj.name = prefix + "/" + obj.name

1287 return obj

1288

1289 @classmethod

1290 def fromtarfile(cls, tarfile):

1291 """Return the next TarInfo object from TarFile object

1292 tarfile.

1293 """

1294 buf = tarfile.fileobj.read(BLOCKSIZE)

1295 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)

1296 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE

1297 return obj._proc_member(tarfile)

1298

1299 #--------------------------------------------------------------------------

1300 # The following are methods that are called depending on the type of a

1301 # member. The entry point is _proc_member() which can be overridden in a

1302 # subclass to add custom _proc_*() methods. A _proc_*() method MUST

1303 # implement the following

1304 # operations:

1305 # 1. Set self.offset_data to the position where the data blocks begin,

1306 # if there is data that follows.

1307 # 2. Set tarfile.offset to the position where the next member's header will

1308 # begin.

1309 # 3. Return self or another valid TarInfo object.

1310 def _proc_member(self, tarfile):

1311 """Choose the right processing method depending on

1312 the type and call it.

1313 """

1314 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):

1315 return self._proc_gnulong(tarfile)

1316 elif self.type == GNUTYPE_SPARSE:

1317 return self._proc_sparse(tarfile)

1318 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):

1319 return self._proc_pax(tarfile)

1320 else:

1321 return self._proc_builtin(tarfile)

1322

1323 def _proc_builtin(self, tarfile):

1324 """Process a builtin type or an unknown type which

1325 will be treated as a regular file.

1326 """

1327 self.offset_data = tarfile.fileobj.tell()

1328 offset = self.offset_data

1329 if self.isreg() or self.type not in SUPPORTED_TYPES:

1330 # Skip the following data blocks.

1331 offset += self._block(self.size)

1332 tarfile.offset = offset

1333

1334 # Patch the TarInfo object with saved global

1335 # header information.

1336 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)

1337

1338 # Remove redundant slashes from directories. This is to be consistent

1339 # with frombuf().

1340 if self.isdir():

1341 self.name = self.name.rstrip("/")

1342

1343 return self

1344

1345 def _proc_gnulong(self, tarfile):

1346 """Process the blocks that hold a GNU longname

1347 or longlink member.

1348 """

1349 buf = tarfile.fileobj.read(self._block(self.size))

1350

1351 # Fetch the next header and process it.

1352 try:

1353 next = self.fromtarfile(tarfile)

1354 except HeaderError as e:

1355 raise SubsequentHeaderError(str(e)) from None

1356

1357 # Patch the TarInfo object from the next header with

1358 # the longname information.

1359 next.offset = self.offset

1360 if self.type == GNUTYPE_LONGNAME:

1361 next.name = nts(buf, tarfile.encoding, tarfile.errors)

1362 elif self.type == GNUTYPE_LONGLINK:

1363 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)

1364

1365 # Remove redundant slashes from directories. This is to be consistent

1366 # with frombuf().

1367 if next.isdir():

1368 next.name = next.name.removesuffix("/")

1369

1370 return next

1371

1372 def _proc_sparse(self, tarfile):

1373 """Process a GNU sparse header plus extra headers.

1374 """

1375 # We already collected some sparse structures in frombuf().

1376 structs, isextended, origsize = self._sparse_structs

1377 del self._sparse_structs

1378

1379 # Collect sparse structures from extended header blocks.

1380 while isextended:

1381 buf = tarfile.fileobj.read(BLOCKSIZE)

1382 pos = 0

1383 for i in range(21):

1384 try:

1385 offset = nti(buf[pos:pos + 12])

1386 numbytes = nti(buf[pos + 12:pos + 24])

1387 except ValueError:

1388 break

1389 if offset and numbytes:

1390 structs.append((offset, numbytes))

1391 pos += 24

1392 isextended = bool(buf[504])

1393 self.sparse = structs

1394

1395 self.offset_data = tarfile.fileobj.tell()

1396 tarfile.offset = self.offset_data + self._block(self.size)

1397 self.size = origsize

1398 return self

1399

1400 def _proc_pax(self, tarfile):

1401 """Process an extended or global header as described in

1402 POSIX.1-2008.

1403 """

1404 # Read the header information.

1405 buf = tarfile.fileobj.read(self._block(self.size))

1406

1407 # A pax header stores supplemental information for either

1408 # the following file (extended) or all following files

1409 # (global).

1410 if self.type == XGLTYPE:

1411 pax_headers = tarfile.pax_headers

1412 else:

1413 pax_headers = tarfile.pax_headers.copy()

1414

1415 # Check if the pax header contains a hdrcharset field. This tells us

1416 # the encoding of the path, linkpath, uname and gname fields. Normally,

1417 # these fields are UTF-8 encoded but since POSIX.1-2008 tar

1418 # implementations are allowed to store them as raw binary strings if

1419 # the translation to UTF-8 fails.

1420 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)

1421 if match is not None:

1422 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")

1423

1424 # For the time being, we don't care about anything other than "BINARY".

1425 # The only other value that is currently allowed by the standard is

1426 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.

1427 hdrcharset = pax_headers.get("hdrcharset")

1428 if hdrcharset == "BINARY":

1429 encoding = tarfile.encoding

1430 else:

1431 encoding = "utf-8"

1432

1433 # Parse pax header information. A record looks like that:

1434 # "%d %s=%s\n" % (length, keyword, value). length is the size

1435 # of the complete record including the length field itself and

1436 # the newline. keyword and value are both UTF-8 encoded strings.

1437 regex = re.compile(br"(\d+) ([^=]+)=")

1438 pos = 0

1439 while match := regex.match(buf, pos):

1440 length, keyword = match.groups()

1441 length = int(length)

1442 if length == 0:

1443 raise InvalidHeaderError("invalid header")

1444 value = buf[match.end(2) + 1:match.start(1) + length - 1]

1445

1446 # Normally, we could just use "utf-8" as the encoding and "strict"

1447 # as the error handler, but we better not take the risk. For

1448 # example, GNU tar <= 1.23 is known to store filenames it cannot

1449 # translate to UTF-8 as raw strings (unfortunately without a

1450 # hdrcharset=BINARY header).

1451 # We first try the strict standard encoding, and if that fails we

1452 # fall back on the user's encoding and error handler.

1453 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",

1454 tarfile.errors)

1455 if keyword in PAX_NAME_FIELDS:

1456 value = self._decode_pax_field(value, encoding, tarfile.encoding,

1457 tarfile.errors)

1458 else:

1459 value = self._decode_pax_field(value, "utf-8", "utf-8",

1460 tarfile.errors)

1461

1462 pax_headers[keyword] = value

1463 pos += length

1464

1465 # Fetch the next header.

1466 try:

1467 next = self.fromtarfile(tarfile)

1468 except HeaderError as e:

1469 raise SubsequentHeaderError(str(e)) from None

1470

1471 # Process GNU sparse information.

1472 if "GNU.sparse.map" in pax_headers:

1473 # GNU extended sparse format version 0.1.

1474 self._proc_gnusparse_01(next, pax_headers)

1475

1476 elif "GNU.sparse.size" in pax_headers:

1477 # GNU extended sparse format version 0.0.

1478 self._proc_gnusparse_00(next, pax_headers, buf)

1479

1480 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":

1481 # GNU extended sparse format version 1.0.

1482 self._proc_gnusparse_10(next, pax_headers, tarfile)

1483

1484 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):

1485 # Patch the TarInfo object with the extended header info.

1486 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)

1487 next.offset = self.offset

1488

1489 if "size" in pax_headers:

1490 # If the extended header replaces the size field,

1491 # we need to recalculate the offset where the next

1492 # header starts.

1493 offset = next.offset_data

1494 if next.isreg() or next.type not in SUPPORTED_TYPES:

1495 offset += next._block(next.size)

1496 tarfile.offset = offset

1497

1498 return next

1499

1500 def _proc_gnusparse_00(self, next, pax_headers, buf):

1501 """Process a GNU tar extended sparse header, version 0.0.

1502 """

1503 offsets = []

1504 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):

1505 offsets.append(int(match.group(1)))

1506 numbytes = []

1507 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):

1508 numbytes.append(int(match.group(1)))

1509 next.sparse = list(zip(offsets, numbytes))

1510

1511 def _proc_gnusparse_01(self, next, pax_headers):

1512 """Process a GNU tar extended sparse header, version 0.1.

1513 """

1514 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]

1515 next.sparse = list(zip(sparse[::2], sparse[1::2]))

1516

1517 def _proc_gnusparse_10(self, next, pax_headers, tarfile):

1518 """Process a GNU tar extended sparse header, version 1.0.

1519 """

1520 fields = None

1521 sparse = []

1522 buf = tarfile.fileobj.read(BLOCKSIZE)

1523 fields, buf = buf.split(b"\n", 1)

1524 fields = int(fields)

1525 while len(sparse) < fields * 2:

1526 if b"\n" not in buf:

1527 buf += tarfile.fileobj.read(BLOCKSIZE)

1528 number, buf = buf.split(b"\n", 1)

1529 sparse.append(int(number))

1530 next.offset_data = tarfile.fileobj.tell()

1531 next.sparse = list(zip(sparse[::2], sparse[1::2]))

1532

1533 def _apply_pax_info(self, pax_headers, encoding, errors):

1534 """Replace fields with supplemental information from a previous

1535 pax extended or global header.

1536 """

1537 for keyword, value in pax_headers.items():

1538 if keyword == "GNU.sparse.name":

1539 setattr(self, "path", value)

1540 elif keyword == "GNU.sparse.size":

1541 setattr(self, "size", int(value))

1542 elif keyword == "GNU.sparse.realsize":

1543 setattr(self, "size", int(value))

1544 elif keyword in PAX_FIELDS:

1545 if keyword in PAX_NUMBER_FIELDS:

1546 try:

1547 value = PAX_NUMBER_FIELDS[keyword](value)

1548 except ValueError:

1549 value = 0

1550 if keyword == "path":

1551 value = value.rstrip("/")

1552 setattr(self, keyword, value)

1553

1554 self.pax_headers = pax_headers.copy()

1555

1556 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):

1557 """Decode a single field from a pax record.

1558 """

1559 try:

1560 return value.decode(encoding, "strict")

1561 except UnicodeDecodeError:

1562 return value.decode(fallback_encoding, fallback_errors)

1563

1564 def _block(self, count):

1565 """Round up a byte count by BLOCKSIZE and return it,

1566 e.g. _block(834) => 1024.

1567 """

1568 blocks, remainder = divmod(count, BLOCKSIZE)

1569 if remainder:

1570 blocks += 1

1571 return blocks * BLOCKSIZE

1572

1573 def isreg(self):

1574 'Return True if the Tarinfo object is a regular file.'

1575 return self.type in REGULAR_TYPES

1576

1577 def isfile(self):

1578 'Return True if the Tarinfo object is a regular file.'

1579 return self.isreg()

1580

1581 def isdir(self):

1582 'Return True if it is a directory.'

1583 return self.type == DIRTYPE

1584

1585 def issym(self):

1586 'Return True if it is a symbolic link.'

1587 return self.type == SYMTYPE

1588

1589 def islnk(self):

1590 'Return True if it is a hard link.'

1591 return self.type == LNKTYPE

1592

1593 def ischr(self):

1594 'Return True if it is a character device.'

1595 return self.type == CHRTYPE

1596

1597 def isblk(self):

1598 'Return True if it is a block device.'

1599 return self.type == BLKTYPE

1600

1601 def isfifo(self):

1602 'Return True if it is a FIFO.'

1603 return self.type == FIFOTYPE

1604

1605 def issparse(self):

1606 return self.sparse is not None

1607

1608 def isdev(self):

1609 'Return True if it is one of character device, block device or FIFO.'

1610 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)

1611# class TarInfo

1612

1613class TarFile(object):

1614 """The TarFile Class provides an interface to tar archives.

1615 """

1616

1617 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)

1618

1619 dereference = False # If true, add content of linked file to the

1620 # tar file, else the link.

1621

1622 ignore_zeros = False # If true, skips empty or invalid blocks and

1623 # continues processing.

1624

1625 errorlevel = 1 # If 0, fatal errors only appear in debug

1626 # messages (if debug >= 0). If > 0, errors

1627 # are passed to the caller as exceptions.

1628

1629 format = DEFAULT_FORMAT # The format to use when creating an archive.

1630

1631 encoding = ENCODING # Encoding for 8-bit character strings.

1632

1633 errors = None # Error handler for unicode conversion.

1634

1635 tarinfo = TarInfo # The default TarInfo class to use.

1636

1637 fileobject = ExFileObject # The file-object for extractfile().

1638

1639 extraction_filter = None # The default filter for extraction.

1640

1641 def __init__(self, name=None, mode="r", fileobj=None, format=None,

1642 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,

1643 errors="surrogateescape", pax_headers=None, debug=None,

1644 errorlevel=None, copybufsize=None):

1645 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to

1646 read from an existing archive, 'a' to append data to an existing

1647 file or 'w' to create a new file overwriting an existing one. `mode'

1648 defaults to 'r'.

1649 If `fileobj' is given, it is used for reading or writing data. If it

1650 can be determined, `mode' is overridden by `fileobj's mode.

1651 `fileobj' is not closed, when TarFile is closed.

1652 """

1653 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}

1654 if mode not in modes:

1655 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")

1656 self.mode = mode

1657 self._mode = modes[mode]

1658

1659 if not fileobj:

1660 if self.mode == "a" and not os.path.exists(name):

1661 # Create nonexistent files in append mode.

1662 self.mode = "w"

1663 self._mode = "wb"

1664 fileobj = bltn_open(name, self._mode)

1665 self._extfileobj = False

1666 else:

1667 if (name is None and hasattr(fileobj, "name") and

1668 isinstance(fileobj.name, (str, bytes))):

1669 name = fileobj.name

1670 if hasattr(fileobj, "mode"):

1671 self._mode = fileobj.mode

1672 self._extfileobj = True

1673 self.name = os.path.abspath(name) if name else None

1674 self.fileobj = fileobj

1675

1676 # Init attributes.

1677 if format is not None:

1678 self.format = format

1679 if tarinfo is not None:

1680 self.tarinfo = tarinfo

1681 if dereference is not None:

1682 self.dereference = dereference

1683 if ignore_zeros is not None:

1684 self.ignore_zeros = ignore_zeros

1685 if encoding is not None:

1686 self.encoding = encoding

1687 self.errors = errors

1688

1689 if pax_headers is not None and self.format == PAX_FORMAT:

1690 self.pax_headers = pax_headers

1691 else:

1692 self.pax_headers = {}

1693

1694 if debug is not None:

1695 self.debug = debug

1696 if errorlevel is not None:

1697 self.errorlevel = errorlevel

1698

1699 # Init datastructures.

1700 self.copybufsize = copybufsize

1701 self.closed = False

1702 self.members = [] # list of members as TarInfo objects

1703 self._loaded = False # flag if all members have been read

1704 self.offset = self.fileobj.tell()

1705 # current position in the archive file

1706 self.inodes = {} # dictionary caching the inodes of

1707 # archive members already added

1708

1709 try:

1710 if self.mode == "r":

1711 self.firstmember = None

1712 self.firstmember = self.next()

1713

1714 if self.mode == "a":

1715 # Move to the end of the archive,

1716 # before the first empty block.

1717 while True:

1718 self.fileobj.seek(self.offset)

1719 try:

1720 tarinfo = self.tarinfo.fromtarfile(self)

1721 self.members.append(tarinfo)

1722 except EOFHeaderError:

1723 self.fileobj.seek(self.offset)

1724 break

1725 except HeaderError as e:

1726 raise ReadError(str(e)) from None

1727

1728 if self.mode in ("a", "w", "x"):

1729 self._loaded = True

1730

1731 if self.pax_headers:

1732 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())

1733 self.fileobj.write(buf)

1734 self.offset += len(buf)

1735 except:

1736 if not self._extfileobj:

1737 self.fileobj.close()

1738 self.closed = True

1739 raise

1740

1741 #--------------------------------------------------------------------------

1742 # Below are the classmethods which act as alternate constructors to the

1743 # TarFile class. The open() method is the only one that is needed for

1744 # public use; it is the "super"-constructor and is able to select an

1745 # adequate "sub"-constructor for a particular compression using the mapping

1746 # from OPEN_METH.

1747 #

1748 # This concept allows one to subclass TarFile without losing the comfort of

1749 # the super-constructor. A sub-constructor is registered and made available

1750 # by adding it to the mapping in OPEN_METH.

1751

1752 @classmethod

1753 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):

1754 r"""Open a tar archive for reading, writing or appending. Return

1755 an appropriate TarFile class.

1756

1757 mode:

1758 'r' or 'r:\*' open for reading with transparent compression

1759 'r:' open for reading exclusively uncompressed

1760 'r:gz' open for reading with gzip compression

1761 'r:bz2' open for reading with bzip2 compression

1762 'r:xz' open for reading with lzma compression

1763 'a' or 'a:' open for appending, creating the file if necessary

1764 'w' or 'w:' open for writing without compression

1765 'w:gz' open for writing with gzip compression

1766 'w:bz2' open for writing with bzip2 compression

1767 'w:xz' open for writing with lzma compression

1768

1769 'x' or 'x:' create a tarfile exclusively without compression, raise

1770 an exception if the file is already created

1771 'x:gz' create a gzip compressed tarfile, raise an exception

1772 if the file is already created

1773 'x:bz2' create a bzip2 compressed tarfile, raise an exception

1774 if the file is already created

1775 'x:xz' create an lzma compressed tarfile, raise an exception

1776 if the file is already created

1777

1778 'r|\*' open a stream of tar blocks with transparent compression

1779 'r|' open an uncompressed stream of tar blocks for reading

1780 'r|gz' open a gzip compressed stream of tar blocks

1781 'r|bz2' open a bzip2 compressed stream of tar blocks

1782 'r|xz' open an lzma compressed stream of tar blocks

1783 'w|' open an uncompressed stream for writing

1784 'w|gz' open a gzip compressed stream for writing

1785 'w|bz2' open a bzip2 compressed stream for writing

1786 'w|xz' open an lzma compressed stream for writing

1787 """

1788

1789 if not name and not fileobj:

1790 raise ValueError("nothing to open")

1791

1792 if mode in ("r", "r:*"):

1793 # Find out which *open() is appropriate for opening the file.

1794 def not_compressed(comptype):

1795 return cls.OPEN_METH[comptype] == 'taropen'

1796 error_msgs = []

1797 for comptype in sorted(cls.OPEN_METH, key=not_compressed):

1798 func = getattr(cls, cls.OPEN_METH[comptype])

1799 if fileobj is not None:

1800 saved_pos = fileobj.tell()

1801 try:

1802 return func(name, "r", fileobj, **kwargs)

1803 except (ReadError, CompressionError) as e:

1804 error_msgs.append(f'- method {comptype}: {e!r}')

1805 if fileobj is not None:

1806 fileobj.seek(saved_pos)

1807 continue

1808 error_msgs_summary = '\n'.join(error_msgs)

1809 raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")

1810

1811 elif ":" in mode:

1812 filemode, comptype = mode.split(":", 1)

1813 filemode = filemode or "r"

1814 comptype = comptype or "tar"

1815

1816 # Select the *open() function according to

1817 # given compression.

1818 if comptype in cls.OPEN_METH:

1819 func = getattr(cls, cls.OPEN_METH[comptype])

1820 else:

1821 raise CompressionError("unknown compression type %r" % comptype)

1822 return func(name, filemode, fileobj, **kwargs)

1823

1824 elif "|" in mode:

1825 filemode, comptype = mode.split("|", 1)

1826 filemode = filemode or "r"

1827 comptype = comptype or "tar"

1828

1829 if filemode not in ("r", "w"):

1830 raise ValueError("mode must be 'r' or 'w'")

1831

1832 compresslevel = kwargs.pop("compresslevel", 9)

1833 stream = _Stream(name, filemode, comptype, fileobj, bufsize,

1834 compresslevel)

1835 try:

1836 t = cls(name, filemode, stream, **kwargs)

1837 except:

1838 stream.close()

1839 raise

1840 t._extfileobj = False

1841 return t

1842

1843 elif mode in ("a", "w", "x"):

1844 return cls.taropen(name, mode, fileobj, **kwargs)

1845

1846 raise ValueError("undiscernible mode")

1847

1848 @classmethod

1849 def taropen(cls, name, mode="r", fileobj=None, **kwargs):

1850 """Open uncompressed tar archive name for reading or writing.

1851 """

1852 if mode not in ("r", "a", "w", "x"):

1853 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")

1854 return cls(name, mode, fileobj, **kwargs)

1855

1856 @classmethod

1857 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):

1858 """Open gzip compressed tar archive name for reading or writing.

1859 Appending is not allowed.

1860 """

1861 if mode not in ("r", "w", "x"):

1862 raise ValueError("mode must be 'r', 'w' or 'x'")

1863

1864 try:

1865 from gzip import GzipFile

1866 except ImportError:

1867 raise CompressionError("gzip module is not available") from None

1868

1869 try:

1870 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)

1871 except OSError as e:

1872 if fileobj is not None and mode == 'r':

1873 raise ReadError("not a gzip file") from e

1874 raise

1875

1876 try:

1877 t = cls.taropen(name, mode, fileobj, **kwargs)

1878 except OSError as e:

1879 fileobj.close()

1880 if mode == 'r':

1881 raise ReadError("not a gzip file") from e

1882 raise

1883 except:

1884 fileobj.close()

1885 raise

1886 t._extfileobj = False

1887 return t

1888

1889 @classmethod

1890 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):

1891 """Open bzip2 compressed tar archive name for reading or writing.

1892 Appending is not allowed.

1893 """

1894 if mode not in ("r", "w", "x"):

1895 raise ValueError("mode must be 'r', 'w' or 'x'")

1896

1897 try:

1898 from bz2 import BZ2File

1899 except ImportError:

1900 raise CompressionError("bz2 module is not available") from None

1901

1902 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)

1903

1904 try:

1905 t = cls.taropen(name, mode, fileobj, **kwargs)

1906 except (OSError, EOFError) as e:

1907 fileobj.close()

1908 if mode == 'r':

1909 raise ReadError("not a bzip2 file") from e

1910 raise

1911 except:

1912 fileobj.close()

1913 raise

1914 t._extfileobj = False

1915 return t

1916

1917 @classmethod

1918 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):

1919 """Open lzma compressed tar archive name for reading or writing.

1920 Appending is not allowed.

1921 """

1922 if mode not in ("r", "w", "x"):

1923 raise ValueError("mode must be 'r', 'w' or 'x'")

1924

1925 try:

1926 from lzma import LZMAFile, LZMAError

1927 except ImportError:

1928 raise CompressionError("lzma module is not available") from None

1929

1930 fileobj = LZMAFile(fileobj or name, mode, preset=preset)

1931

1932 try:

1933 t = cls.taropen(name, mode, fileobj, **kwargs)

1934 except (LZMAError, EOFError) as e:

1935 fileobj.close()

1936 if mode == 'r':

1937 raise ReadError("not an lzma file") from e

1938 raise

1939 except:

1940 fileobj.close()

1941 raise

1942 t._extfileobj = False

1943 return t

1944

1945 # All *open() methods are registered here.

1946 OPEN_METH = {

1947 "tar": "taropen", # uncompressed tar

1948 "gz": "gzopen", # gzip compressed tar

1949 "bz2": "bz2open", # bzip2 compressed tar

1950 "xz": "xzopen" # lzma compressed tar

1951 }

1952

1953 #--------------------------------------------------------------------------

1954 # The public methods which TarFile provides:

1955

1956 def close(self):

1957 """Close the TarFile. In write-mode, two finishing zero blocks are

1958 appended to the archive.

1959 """

1960 if self.closed:

1961 return

1962

1963 self.closed = True

1964 try:

1965 if self.mode in ("a", "w", "x"):

1966 self.fileobj.write(NUL * (BLOCKSIZE * 2))

1967 self.offset += (BLOCKSIZE * 2)

1968 # fill up the end with zero-blocks

1969 # (like option -b20 for tar does)

1970 blocks, remainder = divmod(self.offset, RECORDSIZE)

1971 if remainder > 0:

1972 self.fileobj.write(NUL * (RECORDSIZE - remainder))

1973 finally:

1974 if not self._extfileobj:

1975 self.fileobj.close()

1976

1977 def getmember(self, name):

1978 """Return a TarInfo object for member ``name``. If ``name`` can not be

1979 found in the archive, KeyError is raised. If a member occurs more

1980 than once in the archive, its last occurrence is assumed to be the

1981 most up-to-date version.

1982 """

1983 tarinfo = self._getmember(name.rstrip('/'))

1984 if tarinfo is None:

1985 raise KeyError("filename %r not found" % name)

1986 return tarinfo

1987

1988 def getmembers(self):

1989 """Return the members of the archive as a list of TarInfo objects. The

1990 list has the same order as the members in the archive.

1991 """

1992 self._check()

1993 if not self._loaded: # if we want to obtain a list of

1994 self._load() # all members, we first have to

1995 # scan the whole archive.

1996 return self.members

1997

1998 def getnames(self):

1999 """Return the members of the archive as a list of their names. It has

2000 the same order as the list returned by getmembers().

2001 """

2002 return [tarinfo.name for tarinfo in self.getmembers()]

2003

2004 def gettarinfo(self, name=None, arcname=None, fileobj=None):

2005 """Create a TarInfo object from the result of os.stat or equivalent

2006 on an existing file. The file is either named by ``name``, or

2007 specified as a file object ``fileobj`` with a file descriptor. If

2008 given, ``arcname`` specifies an alternative name for the file in the

2009 archive, otherwise, the name is taken from the 'name' attribute of

2010 'fileobj', or the 'name' argument. The name should be a text

2011 string.

2012 """

2013 self._check("awx")

2014

2015 # When fileobj is given, replace name by

2016 # fileobj's real name.

2017 if fileobj is not None:

2018 name = fileobj.name

2019

2020 # Building the name of the member in the archive.

2021 # Backward slashes are converted to forward slashes,

2022 # Absolute paths are turned to relative paths.

2023 if arcname is None:

2024 arcname = name

2025 drv, arcname = os.path.splitdrive(arcname)

2026 arcname = arcname.replace(os.sep, "/")

2027 arcname = arcname.lstrip("/")

2028

2029 # Now, fill the TarInfo object with

2030 # information specific for the file.

2031 tarinfo = self.tarinfo()

2032 tarinfo.tarfile = self # Not needed

2033

2034 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.

2035 if fileobj is None:

2036 if not self.dereference:

2037 statres = os.lstat(name)

2038 else:

2039 statres = os.stat(name)

2040 else:

2041 statres = os.fstat(fileobj.fileno())

2042 linkname = ""

2043

2044 stmd = statres.st_mode

2045 if stat.S_ISREG(stmd):

2046 inode = (statres.st_ino, statres.st_dev)

2047 if not self.dereference and statres.st_nlink > 1 and \

2048 inode in self.inodes and arcname != self.inodes[inode]:

2049 # Is it a hardlink to an already

2050 # archived file?

2051 type = LNKTYPE

2052 linkname = self.inodes[inode]

2053 else:

2054 # The inode is added only if its valid.

2055 # For win32 it is always 0.

2056 type = REGTYPE

2057 if inode[0]:

2058 self.inodes[inode] = arcname

2059 elif stat.S_ISDIR(stmd):

2060 type = DIRTYPE

2061 elif stat.S_ISFIFO(stmd):

2062 type = FIFOTYPE

2063 elif stat.S_ISLNK(stmd):

2064 type = SYMTYPE

2065 linkname = os.readlink(name)

2066 elif stat.S_ISCHR(stmd):

2067 type = CHRTYPE

2068 elif stat.S_ISBLK(stmd):

2069 type = BLKTYPE

2070 else:

2071 return None

2072

2073 # Fill the TarInfo object with all

2074 # information we can get.

2075 tarinfo.name = arcname

2076 tarinfo.mode = stmd

2077 tarinfo.uid = statres.st_uid

2078 tarinfo.gid = statres.st_gid

2079 if type == REGTYPE:

2080 tarinfo.size = statres.st_size

2081 else:

2082 tarinfo.size = 0

2083 tarinfo.mtime = statres.st_mtime

2084 tarinfo.type = type

2085 tarinfo.linkname = linkname

2086 if pwd:

2087 try:

2088 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]

2089 except KeyError:

2090 pass

2091 if grp:

2092 try:

2093 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]

2094 except KeyError:

2095 pass

2096

2097 if type in (CHRTYPE, BLKTYPE):

2098 if hasattr(os, "major") and hasattr(os, "minor"):

2099 tarinfo.devmajor = os.major(statres.st_rdev)

2100 tarinfo.devminor = os.minor(statres.st_rdev)

2101 return tarinfo

2102

2103 def list(self, verbose=True, *, members=None):

2104 """Print a table of contents to sys.stdout. If ``verbose`` is False, only

2105 the names of the members are printed. If it is True, an `ls -l'-like

2106 output is produced. ``members`` is optional and must be a subset of the

2107 list returned by getmembers().

2108 """

2109 self._check()

2110

2111 if members is None:

2112 members = self

2113 for tarinfo in members:

2114 if verbose:

2115 if tarinfo.mode is None:

2116 _safe_print("??????????")

2117 else:

2118 _safe_print(stat.filemode(tarinfo.mode))

2119 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,

2120 tarinfo.gname or tarinfo.gid))

2121 if tarinfo.ischr() or tarinfo.isblk():

2122 _safe_print("%10s" %

2123 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))

2124 else:

2125 _safe_print("%10d" % tarinfo.size)

2126 if tarinfo.mtime is None:

2127 _safe_print("????-??-?? ??:??:??")

2128 else:

2129 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \

2130 % time.localtime(tarinfo.mtime)[:6])

2131

2132 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))

2133

2134 if verbose:

2135 if tarinfo.issym():

2136 _safe_print("-> " + tarinfo.linkname)

2137 if tarinfo.islnk():

2138 _safe_print("link to " + tarinfo.linkname)

2139 print()

2140

2141 def add(self, name, arcname=None, recursive=True, *, filter=None):

2142 """Add the file ``name`` to the archive. ``name`` may be any type of file

2143 (directory, fifo, symbolic link, etc.). If given, ``arcname``

2144 specifies an alternative name for the file in the archive.

2145 Directories are added recursively by default. This can be avoided by

2146 setting ``recursive`` to False. ``filter`` is a function

2147 that expects a TarInfo object argument and returns the changed

2148 TarInfo object, if it returns None the TarInfo object will be

2149 excluded from the archive.

2150 """

2151 self._check("awx")

2152

2153 if arcname is None:

2154 arcname = name

2155

2156 # Skip if somebody tries to archive the archive...

2157 if self.name is not None and os.path.abspath(name) == self.name:

2158 self._dbg(2, "tarfile: Skipped %r" % name)

2159 return

2160

2161 self._dbg(1, name)

2162

2163 # Create a TarInfo object from the file.

2164 tarinfo = self.gettarinfo(name, arcname)

2165

2166 if tarinfo is None:

2167 self._dbg(1, "tarfile: Unsupported type %r" % name)

2168 return

2169

2170 # Change or exclude the TarInfo object.

2171 if filter is not None:

2172 tarinfo = filter(tarinfo)

2173 if tarinfo is None:

2174 self._dbg(2, "tarfile: Excluded %r" % name)

2175 return

2176

2177 # Append the tar header and data to the archive.

2178 if tarinfo.isreg():

2179 with bltn_open(name, "rb") as f:

2180 self.addfile(tarinfo, f)

2181

2182 elif tarinfo.isdir():

2183 self.addfile(tarinfo)

2184 if recursive:

2185 for f in sorted(os.listdir(name)):

2186 self.add(os.path.join(name, f), os.path.join(arcname, f),

2187 recursive, filter=filter)

2188

2189 else:

2190 self.addfile(tarinfo)

2191

2192 def addfile(self, tarinfo, fileobj=None):

2193 """Add the TarInfo object ``tarinfo`` to the archive. If ``fileobj`` is

2194 given, it should be a binary file, and tarinfo.size bytes are read

2195 from it and added to the archive. You can create TarInfo objects

2196 directly, or by using gettarinfo().

2197 """

2198 self._check("awx")

2199

2200 tarinfo = copy.copy(tarinfo)

2201

2202 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)

2203 self.fileobj.write(buf)

2204 self.offset += len(buf)

2205 bufsize=self.copybufsize

2206 # If there's data to follow, append it.

2207 if fileobj is not None:

2208 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)

2209 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)

2210 if remainder > 0:

2211 self.fileobj.write(NUL * (BLOCKSIZE - remainder))

2212 blocks += 1

2213 self.offset += blocks * BLOCKSIZE

2214

2215 self.members.append(tarinfo)

2216

2217 def _get_filter_function(self, filter):

2218 if filter is None:

2219 filter = self.extraction_filter

2220 if filter is None:

2221 warnings.warn(

2222 'Python 3.14 will, by default, filter extracted tar '

2223 + 'archives and reject files or modify their metadata. '

2224 + 'Use the filter argument to control this behavior.',

2225 DeprecationWarning)

2226 return fully_trusted_filter

2227 if isinstance(filter, str):

2228 raise TypeError(

2229 'String names are not supported for '

2230 + 'TarFile.extraction_filter. Use a function such as '

2231 + 'tarfile.data_filter directly.')

2232 return filter

2233 if callable(filter):

2234 return filter

2235 try:

2236 return _NAMED_FILTERS[filter]

2237 except KeyError:

2238 raise ValueError(f"filter {filter!r} not found") from None

2239

2240 def extractall(self, path=".", members=None, *, numeric_owner=False,

2241 filter=None):

2242 """Extract all members from the archive to the current working

2243 directory and set owner, modification time and permissions on

2244 directories afterwards. `path' specifies a different directory

2245 to extract to. `members' is optional and must be a subset of the

2246 list returned by getmembers(). If `numeric_owner` is True, only

2247 the numbers for user/group names are used and not the names.

2248

2249 The `filter` function will be called on each member just

2250 before extraction.

2251 It can return a changed TarInfo or None to skip the member.

2252 String names of common filters are accepted.

2253 """

2254 directories = []

2255

2256 filter_function = self._get_filter_function(filter)

2257 if members is None:

2258 members = self

2259

2260 for member in members:

2261 tarinfo = self._get_extract_tarinfo(member, filter_function, path)

2262 if tarinfo is None:

2263 continue

2264 if tarinfo.isdir():

2265 # For directories, delay setting attributes until later,

2266 # since permissions can interfere with extraction and

2267 # extracting contents can reset mtime.

2268 directories.append(tarinfo)

2269 self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),

2270 numeric_owner=numeric_owner)

2271

2272 # Reverse sort directories.

2273 directories.sort(key=lambda a: a.name, reverse=True)

2274

2275 # Set correct owner, mtime and filemode on directories.

2276 for tarinfo in directories:

2277 dirpath = os.path.join(path, tarinfo.name)

2278 try:

2279 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)

2280 self.utime(tarinfo, dirpath)

2281 self.chmod(tarinfo, dirpath)

2282 except ExtractError as e:

2283 self._handle_nonfatal_error(e)

2284

2285 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,

2286 filter=None):

2287 """Extract a member from the archive to the current working directory,

2288 using its full name. Its file information is extracted as accurately

2289 as possible. `member' may be a filename or a TarInfo object. You can

2290 specify a different directory using `path'. File attributes (owner,

2291 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`

2292 is True, only the numbers for user/group names are used and not

2293 the names.

2294

2295 The `filter` function will be called before extraction.

2296 It can return a changed TarInfo or None to skip the member.

2297 String names of common filters are accepted.

2298 """

2299 filter_function = self._get_filter_function(filter)

2300 tarinfo = self._get_extract_tarinfo(member, filter_function, path)

2301 if tarinfo is not None:

2302 self._extract_one(tarinfo, path, set_attrs, numeric_owner)

2303

2304 def _get_extract_tarinfo(self, member, filter_function, path):

2305 """Get filtered TarInfo (or None) from member, which might be a str"""

2306 if isinstance(member, str):

2307 tarinfo = self.getmember(member)

2308 else:

2309 tarinfo = member

2310

2311 unfiltered = tarinfo

2312 try:

2313 tarinfo = filter_function(tarinfo, path)

2314 except (OSError, FilterError) as e:

2315 self._handle_fatal_error(e)

2316 except ExtractError as e:

2317 self._handle_nonfatal_error(e)

2318 if tarinfo is None:

2319 self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)

2320 return None

2321 # Prepare the link target for makelink().

2322 if tarinfo.islnk():

2323 tarinfo = copy.copy(tarinfo)

2324 tarinfo._link_target = os.path.join(path, tarinfo.linkname)

2325 return tarinfo

2326

2327 def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):

2328 """Extract from filtered tarinfo to disk"""

2329 self._check("r")

2330

2331 try:

2332 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),

2333 set_attrs=set_attrs,

2334 numeric_owner=numeric_owner)

2335 except OSError as e:

2336 self._handle_fatal_error(e)

2337 except ExtractError as e:

2338 self._handle_nonfatal_error(e)

2339

2340 def _handle_nonfatal_error(self, e):

2341 """Handle non-fatal error (ExtractError) according to errorlevel"""

2342 if self.errorlevel > 1:

2343 raise

2344 else:

2345 self._dbg(1, "tarfile: %s" % e)

2346

2347 def _handle_fatal_error(self, e):

2348 """Handle "fatal" error according to self.errorlevel"""

2349 if self.errorlevel > 0:

2350 raise

2351 elif isinstance(e, OSError):

2352 if e.filename is None:

2353 self._dbg(1, "tarfile: %s" % e.strerror)

2354 else:

2355 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))

2356 else:

2357 self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))

2358

2359 def extractfile(self, member):

2360 """Extract a member from the archive as a file object. ``member`` may be

2361 a filename or a TarInfo object. If ``member`` is a regular file or

2362 a link, an io.BufferedReader object is returned. For all other

2363 existing members, None is returned. If ``member`` does not appear

2364 in the archive, KeyError is raised.

2365 """

2366 self._check("r")

2367

2368 if isinstance(member, str):

2369 tarinfo = self.getmember(member)

2370 else:

2371 tarinfo = member

2372

2373 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:

2374 # Members with unknown types are treated as regular files.

2375 return self.fileobject(self, tarinfo)

2376

2377 elif tarinfo.islnk() or tarinfo.issym():

2378 if isinstance(self.fileobj, _Stream):

2379 # A small but ugly workaround for the case that someone tries

2380 # to extract a (sym)link as a file-object from a non-seekable

2381 # stream of tar blocks.

2382 raise StreamError("cannot extract (sym)link as file object")

2383 else:

2384 # A (sym)link's file object is its target's file object.

2385 return self.extractfile(self._find_link_target(tarinfo))

2386 else:

2387 # If there's no data associated with the member (directory, chrdev,

2388 # blkdev, etc.), return None instead of a file object.

2389 return None

2390

2391 def _extract_member(self, tarinfo, targetpath, set_attrs=True,

2392 numeric_owner=False):

2393 """Extract the TarInfo object tarinfo to a physical

2394 file called targetpath.

2395 """

2396 # Fetch the TarInfo object for the given name

2397 # and build the destination pathname, replacing

2398 # forward slashes to platform specific separators.

2399 targetpath = targetpath.rstrip("/")

2400 targetpath = targetpath.replace("/", os.sep)

2401

2402 # Create all upper directories.

2403 upperdirs = os.path.dirname(targetpath)

2404 if upperdirs and not os.path.exists(upperdirs):

2405 # Create directories that are not part of the archive with

2406 # default permissions.

2407 os.makedirs(upperdirs)

2408

2409 if tarinfo.islnk() or tarinfo.issym():

2410 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))

2411 else:

2412 self._dbg(1, tarinfo.name)

2413

2414 if tarinfo.isreg():

2415 self.makefile(tarinfo, targetpath)

2416 elif tarinfo.isdir():

2417 self.makedir(tarinfo, targetpath)

2418 elif tarinfo.isfifo():

2419 self.makefifo(tarinfo, targetpath)

2420 elif tarinfo.ischr() or tarinfo.isblk():

2421 self.makedev(tarinfo, targetpath)

2422 elif tarinfo.islnk() or tarinfo.issym():

2423 self.makelink(tarinfo, targetpath)

2424 elif tarinfo.type not in SUPPORTED_TYPES:

2425 self.makeunknown(tarinfo, targetpath)

2426 else:

2427 self.makefile(tarinfo, targetpath)

2428

2429 if set_attrs:

2430 self.chown(tarinfo, targetpath, numeric_owner)

2431 if not tarinfo.issym():

2432 self.chmod(tarinfo, targetpath)

2433 self.utime(tarinfo, targetpath)

2434

2435 #--------------------------------------------------------------------------

2436 # Below are the different file methods. They are called via

2437 # _extract_member() when extract() is called. They can be replaced in a

2438 # subclass to implement other functionality.

2439

2440 def makedir(self, tarinfo, targetpath):

2441 """Make a directory called targetpath.

2442 """

2443 try:

2444 if tarinfo.mode is None:

2445 # Use the system's default mode

2446 os.mkdir(targetpath)

2447 else:

2448 # Use a safe mode for the directory, the real mode is set

2449 # later in _extract_member().

2450 os.mkdir(targetpath, 0o700)

2451 except FileExistsError:

2452 if not os.path.isdir(targetpath):

2453 raise

2454

2455 def makefile(self, tarinfo, targetpath):

2456 """Make a file called targetpath.

2457 """

2458 source = self.fileobj

2459 source.seek(tarinfo.offset_data)

2460 bufsize = self.copybufsize

2461 with bltn_open(targetpath, "wb") as target:

2462 if tarinfo.sparse is not None:

2463 for offset, size in tarinfo.sparse:

2464 target.seek(offset)

2465 copyfileobj(source, target, size, ReadError, bufsize)

2466 target.seek(tarinfo.size)

2467 target.truncate()

2468 else:

2469 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)

2470

2471 def makeunknown(self, tarinfo, targetpath):

2472 """Make a file from a TarInfo object with an unknown type

2473 at targetpath.

2474 """

2475 self.makefile(tarinfo, targetpath)

2476 self._dbg(1, "tarfile: Unknown file type %r, " \

2477 "extracted as regular file." % tarinfo.type)

2478

2479 def makefifo(self, tarinfo, targetpath):

2480 """Make a fifo called targetpath.

2481 """

2482 if hasattr(os, "mkfifo"):

2483 os.mkfifo(targetpath)

2484 else:

2485 raise ExtractError("fifo not supported by system")

2486

2487 def makedev(self, tarinfo, targetpath):

2488 """Make a character or block device called targetpath.

2489 """

2490 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):

2491 raise ExtractError("special devices not supported by system")

2492

2493 mode = tarinfo.mode

2494 if mode is None:

2495 # Use mknod's default

2496 mode = 0o600

2497 if tarinfo.isblk():

2498 mode |= stat.S_IFBLK

2499 else:

2500 mode |= stat.S_IFCHR

2501

2502 os.mknod(targetpath, mode,

2503 os.makedev(tarinfo.devmajor, tarinfo.devminor))

2504

2505 def makelink(self, tarinfo, targetpath):

2506 """Make a (symbolic) link called targetpath. If it cannot be created

2507 (platform limitation), we try to make a copy of the referenced file

2508 instead of a link.

2509 """

2510 try:

2511 # For systems that support symbolic and hard links.

2512 if tarinfo.issym():

2513 if os.path.lexists(targetpath):

2514 # Avoid FileExistsError on following os.symlink.

2515 os.unlink(targetpath)

2516 os.symlink(tarinfo.linkname, targetpath)

2517 else:

2518 if os.path.exists(tarinfo._link_target):

2519 os.link(tarinfo._link_target, targetpath)

2520 else:

2521 self._extract_member(self._find_link_target(tarinfo),

2522 targetpath)

2523 except symlink_exception:

2524 try:

2525 self._extract_member(self._find_link_target(tarinfo),

2526 targetpath)

2527 except KeyError:

2528 raise ExtractError("unable to resolve link inside archive") from None

2529

2530 def chown(self, tarinfo, targetpath, numeric_owner):

2531 """Set owner of targetpath according to tarinfo. If numeric_owner

2532 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner

2533 is False, fall back to .gid/.uid when the search based on name

2534 fails.

2535 """

2536 if hasattr(os, "geteuid") and os.geteuid() == 0:

2537 # We have to be root to do so.

2538 g = tarinfo.gid

2539 u = tarinfo.uid

2540 if not numeric_owner:

2541 try:

2542 if grp and tarinfo.gname:

2543 g = grp.getgrnam(tarinfo.gname)[2]

2544 except KeyError:

2545 pass

2546 try:

2547 if pwd and tarinfo.uname:

2548 u = pwd.getpwnam(tarinfo.uname)[2]

2549 except KeyError:

2550 pass

2551 if g is None:

2552 g = -1

2553 if u is None:

2554 u = -1

2555 try:

2556 if tarinfo.issym() and hasattr(os, "lchown"):

2557 os.lchown(targetpath, u, g)

2558 else:

2559 os.chown(targetpath, u, g)

2560 except OSError as e:

2561 raise ExtractError("could not change owner") from e

2562

2563 def chmod(self, tarinfo, targetpath):

2564 """Set file permissions of targetpath according to tarinfo.

2565 """

2566 if tarinfo.mode is None:

2567 return

2568 try:

2569 os.chmod(targetpath, tarinfo.mode)

2570 except OSError as e:

2571 raise ExtractError("could not change mode") from e

2572

2573 def utime(self, tarinfo, targetpath):

2574 """Set modification time of targetpath according to tarinfo.

2575 """

2576 mtime = tarinfo.mtime

2577 if mtime is None:

2578 return

2579 if not hasattr(os, 'utime'):

2580 return

2581 try:

2582 os.utime(targetpath, (mtime, mtime))

2583 except OSError as e:

2584 raise ExtractError("could not change modification time") from e

2585

2586 #--------------------------------------------------------------------------

2587 def next(self):

2588 """Return the next member of the archive as a TarInfo object, when

2589 TarFile is opened for reading. Return None if there is no more

2590 available.

2591 """

2592 self._check("ra")

2593 if self.firstmember is not None:

2594 m = self.firstmember

2595 self.firstmember = None

2596 return m

2597

2598 # Advance the file pointer.

2599 if self.offset != self.fileobj.tell():

2600 if self.offset == 0:

2601 return None

2602 self.fileobj.seek(self.offset - 1)

2603 if not self.fileobj.read(1):

2604 raise ReadError("unexpected end of data")

2605

2606 # Read the next block.

2607 tarinfo = None

2608 while True:

2609 try:

2610 tarinfo = self.tarinfo.fromtarfile(self)

2611 except EOFHeaderError as e:

2612 if self.ignore_zeros:

2613 self._dbg(2, "0x%X: %s" % (self.offset, e))

2614 self.offset += BLOCKSIZE

2615 continue

2616 except InvalidHeaderError as e:

2617 if self.ignore_zeros:

2618 self._dbg(2, "0x%X: %s" % (self.offset, e))

2619 self.offset += BLOCKSIZE

2620 continue

2621 elif self.offset == 0:

2622 raise ReadError(str(e)) from None

2623 except EmptyHeaderError:

2624 if self.offset == 0:

2625 raise ReadError("empty file") from None

2626 except TruncatedHeaderError as e:

2627 if self.offset == 0:

2628 raise ReadError(str(e)) from None

2629 except SubsequentHeaderError as e:

2630 raise ReadError(str(e)) from None

2631 except Exception as e:

2632 try:

2633 import zlib

2634 if isinstance(e, zlib.error):

2635 raise ReadError(f'zlib error: {e}') from None

2636 else:

2637 raise e

2638 except ImportError:

2639 raise e

2640 break

2641

2642 if tarinfo is not None:

2643 self.members.append(tarinfo)

2644 else:

2645 self._loaded = True

2646

2647 return tarinfo

2648

2649 #--------------------------------------------------------------------------

2650 # Little helper methods:

2651

2652 def _getmember(self, name, tarinfo=None, normalize=False):

2653 """Find an archive member by name from bottom to top.

2654 If tarinfo is given, it is used as the starting point.

2655 """

2656 # Ensure that all members have been loaded.

2657 members = self.getmembers()

2658

2659 # Limit the member search list up to tarinfo.

2660 skipping = False

2661 if tarinfo is not None:

2662 try:

2663 index = members.index(tarinfo)

2664 except ValueError:

2665 # The given starting point might be a (modified) copy.

2666 # We'll later skip members until we find an equivalent.

2667 skipping = True

2668 else:

2669 # Happy fast path

2670 members = members[:index]

2671

2672 if normalize:

2673 name = os.path.normpath(name)

2674

2675 for member in reversed(members):

2676 if skipping:

2677 if tarinfo.offset == member.offset:

2678 skipping = False

2679 continue

2680 if normalize:

2681 member_name = os.path.normpath(member.name)

2682 else:

2683 member_name = member.name

2684

2685 if name == member_name:

2686 return member

2687

2688 if skipping:

2689 # Starting point was not found

2690 raise ValueError(tarinfo)

2691

2692 def _load(self):

2693 """Read through the entire archive file and look for readable

2694 members.

2695 """

2696 while self.next() is not None:

2697 pass

2698 self._loaded = True

2699

2700 def _check(self, mode=None):

2701 """Check if TarFile is still open, and if the operation's mode

2702 corresponds to TarFile's mode.

2703 """

2704 if self.closed:

2705 raise OSError("%s is closed" % self.__class__.__name__)

2706 if mode is not None and self.mode not in mode:

2707 raise OSError("bad operation for mode %r" % self.mode)

2708

2709 def _find_link_target(self, tarinfo):

2710 """Find the target member of a symlink or hardlink member in the

2711 archive.

2712 """

2713 if tarinfo.issym():

2714 # Always search the entire archive.

2715 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))

2716 limit = None

2717 else:

2718 # Search the archive before the link, because a hard link is

2719 # just a reference to an already archived file.

2720 linkname = tarinfo.linkname

2721 limit = tarinfo

2722

2723 member = self._getmember(linkname, tarinfo=limit, normalize=True)

2724 if member is None:

2725 raise KeyError("linkname %r not found" % linkname)

2726 return member

2727

2728 def __iter__(self):

2729 """Provide an iterator object.

2730 """

2731 if self._loaded:

2732 yield from self.members

2733 return

2734

2735 # Yield items using TarFile's next() method.

2736 # When all members have been read, set TarFile as _loaded.

2737 index = 0

2738 # Fix for SF #1100429: Under rare circumstances it can

2739 # happen that getmembers() is called during iteration,

2740 # which will have already exhausted the next() method.

2741 if self.firstmember is not None:

2742 tarinfo = self.next()

2743 index += 1

2744 yield tarinfo

2745

2746 while True:

2747 if index < len(self.members):

2748 tarinfo = self.members[index]

2749 elif not self._loaded:

2750 tarinfo = self.next()

2751 if not tarinfo:

2752 self._loaded = True

2753 return

2754 else:

2755 return

2756 index += 1

2757 yield tarinfo

2758

2759 def _dbg(self, level, msg):

2760 """Write debugging output to sys.stderr.

2761 """

2762 if level <= self.debug:

2763 print(msg, file=sys.stderr)

2764

2765 def __enter__(self):

2766 self._check()

2767 return self

2768

2769 def __exit__(self, type, value, traceback):

2770 if type is None:

2771 self.close()

2772 else:

2773 # An exception occurred. We must not call close() because

2774 # it would try to write end-of-archive blocks and padding.

2775 if not self._extfileobj:

2776 self.fileobj.close()

2777 self.closed = True

2778

2779#--------------------

2780# exported functions

2781#--------------------

2782

2783def is_tarfile(name):

2784 """Return True if name points to a tar archive that we

2785 are able to handle, else return False.

2786

2787 'name' should be a string, file, or file-like object.

2788 """

2789 try:

2790 if hasattr(name, "read"):

2791 pos = name.tell()

2792 t = open(fileobj=name)

2793 name.seek(pos)

2794 else:

2795 t = open(name)

2796 t.close()

2797 return True

2798 except TarError:

2799 return False

2800

2801open = TarFile.open

2802

2803

2804def main():

2805 import argparse

2806

2807 description = 'A simple command-line interface for tarfile module.'

2808 parser = argparse.ArgumentParser(description=description)

2809 parser.add_argument('-v', '--verbose', action='store_true', default=False,

2810 help='Verbose output')

2811 parser.add_argument('--filter', metavar='<filtername>',

2812 choices=_NAMED_FILTERS,

2813 help='Filter for extraction')

2814

2815 group = parser.add_mutually_exclusive_group(required=True)

2816 group.add_argument('-l', '--list', metavar='<tarfile>',

2817 help='Show listing of a tarfile')

2818 group.add_argument('-e', '--extract', nargs='+',

2819 metavar=('<tarfile>', '<output_dir>'),

2820 help='Extract tarfile into target dir')

2821 group.add_argument('-c', '--create', nargs='+',

2822 metavar=('<name>', '<file>'),

2823 help='Create tarfile from sources')

2824 group.add_argument('-t', '--test', metavar='<tarfile>',

2825 help='Test if a tarfile is valid')

2826

2827 args = parser.parse_args()

2828

2829 if args.filter and args.extract is None:

2830 parser.exit(1, '--filter is only valid for extraction\n')

2831

2832 if args.test is not None:

2833 src = args.test

2834 if is_tarfile(src):

2835 with open(src, 'r') as tar:

2836 tar.getmembers()

2837 print(tar.getmembers(), file=sys.stderr)

2838 if args.verbose:

2839 print('{!r} is a tar archive.'.format(src))

2840 else:

2841 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))

2842

2843 elif args.list is not None:

2844 src = args.list

2845 if is_tarfile(src):

2846 with TarFile.open(src, 'r:*') as tf:

2847 tf.list(verbose=args.verbose)

2848 else:

2849 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))

2850

2851 elif args.extract is not None:

2852 if len(args.extract) == 1:

2853 src = args.extract[0]

2854 curdir = os.curdir

2855 elif len(args.extract) == 2:

2856 src, curdir = args.extract

2857 else:

2858 parser.exit(1, parser.format_help())

2859

2860 if is_tarfile(src):

2861 with TarFile.open(src, 'r:*') as tf:

2862 tf.extractall(path=curdir, filter=args.filter)

2863 if args.verbose:

2864 if curdir == '.':

2865 msg = '{!r} file is extracted.'.format(src)

2866 else:

2867 msg = ('{!r} file is extracted '

2868 'into {!r} directory.').format(src, curdir)

2869 print(msg)

2870 else:

2871 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))

2872

2873 elif args.create is not None:

2874 tar_name = args.create.pop(0)

2875 _, ext = os.path.splitext(tar_name)

2876 compressions = {

2877 # gz

2878 '.gz': 'gz',

2879 '.tgz': 'gz',

2880 # xz

2881 '.xz': 'xz',

2882 '.txz': 'xz',

2883 # bz2

2884 '.bz2': 'bz2',

2885 '.tbz': 'bz2',

2886 '.tbz2': 'bz2',

2887 '.tb2': 'bz2',

2888 }

2889 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'

2890 tar_files = args.create

2891

2892 with TarFile.open(tar_name, tar_mode) as tf:

2893 for file_name in tar_files:

2894 tf.add(file_name)

2895

2896 if args.verbose:

2897 print('{!r} file created.'.format(tar_name))

2898

2899if __name__ == '__main__':

2900 main()

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pkg_resources/_vendor/backports/tarfile.py: 10%

1670 statements