Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/tarfile.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1475 statements  

1#!/usr/bin/env python3 

2#------------------------------------------------------------------- 

3# tarfile.py 

4#------------------------------------------------------------------- 

5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de> 

6# All rights reserved. 

7# 

8# Permission is hereby granted, free of charge, to any person 

9# obtaining a copy of this software and associated documentation 

10# files (the "Software"), to deal in the Software without 

11# restriction, including without limitation the rights to use, 

12# copy, modify, merge, publish, distribute, sublicense, and/or sell 

13# copies of the Software, and to permit persons to whom the 

14# Software is furnished to do so, subject to the following 

15# conditions: 

16# 

17# The above copyright notice and this permission notice shall be 

18# included in all copies or substantial portions of the Software. 

19# 

20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 

22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 

23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 

24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 

25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 

26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 

27# OTHER DEALINGS IN THE SOFTWARE. 

28# 

29"""Read from and write to tar format archives. 

30""" 

31 

32version = "0.9.0" 

33__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" 

34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 

35 

36#--------- 

37# Imports 

38#--------- 

39from builtins import open as bltn_open 

40import sys 

41import os 

42import io 

43import shutil 

44import stat 

45import time 

46import struct 

47import copy 

48import re 

49 

50try: 

51 import pwd 

52except ImportError: 

53 pwd = None 

54try: 

55 import grp 

56except ImportError: 

57 grp = None 

58 

59# os.symlink on Windows prior to 6.0 raises NotImplementedError 

60symlink_exception = (AttributeError, NotImplementedError) 

61try: 

62 # OSError (winerror=1314) will be raised if the caller does not hold the 

63 # SeCreateSymbolicLinkPrivilege privilege 

64 symlink_exception += (OSError,) 

65except NameError: 

66 pass 

67 

68# from tarfile import * 

69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", 

70 "CompressionError", "StreamError", "ExtractError", "HeaderError", 

71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", 

72 "DEFAULT_FORMAT", "open"] 

73 

74#--------------------------------------------------------- 

75# tar constants 

76#--------------------------------------------------------- 

77NUL = b"\0" # the null character 

78BLOCKSIZE = 512 # length of processing blocks 

79RECORDSIZE = BLOCKSIZE * 20 # length of records 

80GNU_MAGIC = b"ustar \0" # magic gnu tar string 

81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string 

82 

83LENGTH_NAME = 100 # maximum length of a filename 

84LENGTH_LINK = 100 # maximum length of a linkname 

85LENGTH_PREFIX = 155 # maximum length of the prefix field 

86 

87REGTYPE = b"0" # regular file 

88AREGTYPE = b"\0" # regular file 

89LNKTYPE = b"1" # link (inside tarfile) 

90SYMTYPE = b"2" # symbolic link 

91CHRTYPE = b"3" # character special device 

92BLKTYPE = b"4" # block special device 

93DIRTYPE = b"5" # directory 

94FIFOTYPE = b"6" # fifo special device 

95CONTTYPE = b"7" # contiguous file 

96 

97GNUTYPE_LONGNAME = b"L" # GNU tar longname 

98GNUTYPE_LONGLINK = b"K" # GNU tar longlink 

99GNUTYPE_SPARSE = b"S" # GNU tar sparse file 

100 

101XHDTYPE = b"x" # POSIX.1-2001 extended header 

102XGLTYPE = b"g" # POSIX.1-2001 global header 

103SOLARIS_XHDTYPE = b"X" # Solaris extended header 

104 

105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 

106GNU_FORMAT = 1 # GNU tar format 

107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 

108DEFAULT_FORMAT = PAX_FORMAT 

109 

110#--------------------------------------------------------- 

111# tarfile constants 

112#--------------------------------------------------------- 

113# File types that tarfile supports: 

114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 

115 SYMTYPE, DIRTYPE, FIFOTYPE, 

116 CONTTYPE, CHRTYPE, BLKTYPE, 

117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 

118 GNUTYPE_SPARSE) 

119 

120# File types that will be treated as a regular file. 

121REGULAR_TYPES = (REGTYPE, AREGTYPE, 

122 CONTTYPE, GNUTYPE_SPARSE) 

123 

124# File types that are part of the GNU tar format. 

125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 

126 GNUTYPE_SPARSE) 

127 

128# Fields from a pax header that override a TarInfo attribute. 

129PAX_FIELDS = ("path", "linkpath", "size", "mtime", 

130 "uid", "gid", "uname", "gname") 

131 

132# Fields from a pax header that are affected by hdrcharset. 

133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} 

134 

135# Fields in a pax header that are numbers, all other fields 

136# are treated as strings. 

137PAX_NUMBER_FIELDS = { 

138 "atime": float, 

139 "ctime": float, 

140 "mtime": float, 

141 "uid": int, 

142 "gid": int, 

143 "size": int 

144} 

145 

146#--------------------------------------------------------- 

147# initialization 

148#--------------------------------------------------------- 

149if os.name == "nt": 

150 ENCODING = "utf-8" 

151else: 

152 ENCODING = sys.getfilesystemencoding() 

153 

154#--------------------------------------------------------- 

155# Some useful functions 

156#--------------------------------------------------------- 

157 

158def stn(s, length, encoding, errors): 

159 """Convert a string to a null-terminated bytes object. 

160 """ 

161 s = s.encode(encoding, errors) 

162 return s[:length] + (length - len(s)) * NUL 

163 

164def nts(s, encoding, errors): 

165 """Convert a null-terminated bytes object to a string. 

166 """ 

167 p = s.find(b"\0") 

168 if p != -1: 

169 s = s[:p] 

170 return s.decode(encoding, errors) 

171 

172def nti(s): 

173 """Convert a number field to a python number. 

174 """ 

175 # There are two possible encodings for a number field, see 

176 # itn() below. 

177 if s[0] in (0o200, 0o377): 

178 n = 0 

179 for i in range(len(s) - 1): 

180 n <<= 8 

181 n += s[i + 1] 

182 if s[0] == 0o377: 

183 n = -(256 ** (len(s) - 1) - n) 

184 else: 

185 try: 

186 s = nts(s, "ascii", "strict") 

187 n = int(s.strip() or "0", 8) 

188 except ValueError: 

189 raise InvalidHeaderError("invalid header") 

190 return n 

191 

192def itn(n, digits=8, format=DEFAULT_FORMAT): 

193 """Convert a python number to a number field. 

194 """ 

195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 

196 # octal digits followed by a null-byte, this allows values up to 

197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 

198 # that if necessary. A leading 0o200 or 0o377 byte indicate this 

199 # particular encoding, the following digits-1 bytes are a big-endian 

200 # base-256 representation. This allows values up to (256**(digits-1))-1. 

201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative 

202 # number. 

203 n = int(n) 

204 if 0 <= n < 8 ** (digits - 1): 

205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL 

206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): 

207 if n >= 0: 

208 s = bytearray([0o200]) 

209 else: 

210 s = bytearray([0o377]) 

211 n = 256 ** digits + n 

212 

213 for i in range(digits - 1): 

214 s.insert(1, n & 0o377) 

215 n >>= 8 

216 else: 

217 raise ValueError("overflow in number field") 

218 

219 return s 

220 

221def calc_chksums(buf): 

222 """Calculate the checksum for a member's header by summing up all 

223 characters except for the chksum field which is treated as if 

224 it was filled with spaces. According to the GNU tar sources, 

225 some tars (Sun and NeXT) calculate chksum with signed char, 

226 which will be different if there are chars in the buffer with 

227 the high bit set. So we calculate two checksums, unsigned and 

228 signed. 

229 """ 

230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 

231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 

232 return unsigned_chksum, signed_chksum 

233 

234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): 

235 """Copy length bytes from fileobj src to fileobj dst. 

236 If length is None, copy the entire content. 

237 """ 

238 bufsize = bufsize or 16 * 1024 

239 if length == 0: 

240 return 

241 if length is None: 

242 shutil.copyfileobj(src, dst, bufsize) 

243 return 

244 

245 blocks, remainder = divmod(length, bufsize) 

246 for b in range(blocks): 

247 buf = src.read(bufsize) 

248 if len(buf) < bufsize: 

249 raise exception("unexpected end of data") 

250 dst.write(buf) 

251 

252 if remainder != 0: 

253 buf = src.read(remainder) 

254 if len(buf) < remainder: 

255 raise exception("unexpected end of data") 

256 dst.write(buf) 

257 return 

258 

259def _safe_print(s): 

260 encoding = getattr(sys.stdout, 'encoding', None) 

261 if encoding is not None: 

262 s = s.encode(encoding, 'backslashreplace').decode(encoding) 

263 print(s, end=' ') 

264 

265 

266class TarError(Exception): 

267 """Base exception.""" 

268 pass 

269class ExtractError(TarError): 

270 """General exception for extract errors.""" 

271 pass 

272class ReadError(TarError): 

273 """Exception for unreadable tar archives.""" 

274 pass 

275class CompressionError(TarError): 

276 """Exception for unavailable compression methods.""" 

277 pass 

278class StreamError(TarError): 

279 """Exception for unsupported operations on stream-like TarFiles.""" 

280 pass 

281class HeaderError(TarError): 

282 """Base exception for header errors.""" 

283 pass 

284class EmptyHeaderError(HeaderError): 

285 """Exception for empty headers.""" 

286 pass 

287class TruncatedHeaderError(HeaderError): 

288 """Exception for truncated headers.""" 

289 pass 

290class EOFHeaderError(HeaderError): 

291 """Exception for end of file headers.""" 

292 pass 

293class InvalidHeaderError(HeaderError): 

294 """Exception for invalid headers.""" 

295 pass 

296class SubsequentHeaderError(HeaderError): 

297 """Exception for missing and invalid extended headers.""" 

298 pass 

299 

300#--------------------------- 

301# internal stream interface 

302#--------------------------- 

303class _LowLevelFile: 

304 """Low-level file object. Supports reading and writing. 

305 It is used instead of a regular file object for streaming 

306 access. 

307 """ 

308 

309 def __init__(self, name, mode): 

310 mode = { 

311 "r": os.O_RDONLY, 

312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 

313 }[mode] 

314 if hasattr(os, "O_BINARY"): 

315 mode |= os.O_BINARY 

316 self.fd = os.open(name, mode, 0o666) 

317 

318 def close(self): 

319 os.close(self.fd) 

320 

321 def read(self, size): 

322 return os.read(self.fd, size) 

323 

324 def write(self, s): 

325 os.write(self.fd, s) 

326 

327class _Stream: 

328 """Class that serves as an adapter between TarFile and 

329 a stream-like object. The stream-like object only 

330 needs to have a read() or write() method and is accessed 

331 blockwise. Use of gzip or bzip2 compression is possible. 

332 A stream-like object could be for example: sys.stdin, 

333 sys.stdout, a socket, a tape device etc. 

334 

335 _Stream is intended to be used only internally. 

336 """ 

337 

338 def __init__(self, name, mode, comptype, fileobj, bufsize): 

339 """Construct a _Stream object. 

340 """ 

341 self._extfileobj = True 

342 if fileobj is None: 

343 fileobj = _LowLevelFile(name, mode) 

344 self._extfileobj = False 

345 

346 if comptype == '*': 

347 # Enable transparent compression detection for the 

348 # stream interface 

349 fileobj = _StreamProxy(fileobj) 

350 comptype = fileobj.getcomptype() 

351 

352 self.name = name or "" 

353 self.mode = mode 

354 self.comptype = comptype 

355 self.fileobj = fileobj 

356 self.bufsize = bufsize 

357 self.buf = b"" 

358 self.pos = 0 

359 self.closed = False 

360 

361 try: 

362 if comptype == "gz": 

363 try: 

364 import zlib 

365 except ImportError: 

366 raise CompressionError("zlib module is not available") 

367 self.zlib = zlib 

368 self.crc = zlib.crc32(b"") 

369 if mode == "r": 

370 self._init_read_gz() 

371 self.exception = zlib.error 

372 else: 

373 self._init_write_gz() 

374 

375 elif comptype == "bz2": 

376 try: 

377 import bz2 

378 except ImportError: 

379 raise CompressionError("bz2 module is not available") 

380 if mode == "r": 

381 self.dbuf = b"" 

382 self.cmp = bz2.BZ2Decompressor() 

383 self.exception = OSError 

384 else: 

385 self.cmp = bz2.BZ2Compressor() 

386 

387 elif comptype == "xz": 

388 try: 

389 import lzma 

390 except ImportError: 

391 raise CompressionError("lzma module is not available") 

392 if mode == "r": 

393 self.dbuf = b"" 

394 self.cmp = lzma.LZMADecompressor() 

395 self.exception = lzma.LZMAError 

396 else: 

397 self.cmp = lzma.LZMACompressor() 

398 

399 elif comptype != "tar": 

400 raise CompressionError("unknown compression type %r" % comptype) 

401 

402 except: 

403 if not self._extfileobj: 

404 self.fileobj.close() 

405 self.closed = True 

406 raise 

407 

408 def __del__(self): 

409 if hasattr(self, "closed") and not self.closed: 

410 self.close() 

411 

412 def _init_write_gz(self): 

413 """Initialize for writing with gzip compression. 

414 """ 

415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 

416 -self.zlib.MAX_WBITS, 

417 self.zlib.DEF_MEM_LEVEL, 

418 0) 

419 timestamp = struct.pack("<L", int(time.time())) 

420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377") 

421 if self.name.endswith(".gz"): 

422 self.name = self.name[:-3] 

423 # Honor "directory components removed" from RFC1952 

424 self.name = os.path.basename(self.name) 

425 # RFC1952 says we must use ISO-8859-1 for the FNAME field. 

426 self.__write(self.name.encode("iso-8859-1", "replace") + NUL) 

427 

428 def write(self, s): 

429 """Write string s to the stream. 

430 """ 

431 if self.comptype == "gz": 

432 self.crc = self.zlib.crc32(s, self.crc) 

433 self.pos += len(s) 

434 if self.comptype != "tar": 

435 s = self.cmp.compress(s) 

436 self.__write(s) 

437 

438 def __write(self, s): 

439 """Write string s to the stream if a whole new block 

440 is ready to be written. 

441 """ 

442 self.buf += s 

443 while len(self.buf) > self.bufsize: 

444 self.fileobj.write(self.buf[:self.bufsize]) 

445 self.buf = self.buf[self.bufsize:] 

446 

447 def close(self): 

448 """Close the _Stream object. No operation should be 

449 done on it afterwards. 

450 """ 

451 if self.closed: 

452 return 

453 

454 self.closed = True 

455 try: 

456 if self.mode == "w" and self.comptype != "tar": 

457 self.buf += self.cmp.flush() 

458 

459 if self.mode == "w" and self.buf: 

460 self.fileobj.write(self.buf) 

461 self.buf = b"" 

462 if self.comptype == "gz": 

463 self.fileobj.write(struct.pack("<L", self.crc)) 

464 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) 

465 finally: 

466 if not self._extfileobj: 

467 self.fileobj.close() 

468 

469 def _init_read_gz(self): 

470 """Initialize for reading a gzip compressed fileobj. 

471 """ 

472 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 

473 self.dbuf = b"" 

474 

475 # taken from gzip.GzipFile with some alterations 

476 if self.__read(2) != b"\037\213": 

477 raise ReadError("not a gzip file") 

478 if self.__read(1) != b"\010": 

479 raise CompressionError("unsupported compression method") 

480 

481 flag = ord(self.__read(1)) 

482 self.__read(6) 

483 

484 if flag & 4: 

485 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 

486 self.read(xlen) 

487 if flag & 8: 

488 while True: 

489 s = self.__read(1) 

490 if not s or s == NUL: 

491 break 

492 if flag & 16: 

493 while True: 

494 s = self.__read(1) 

495 if not s or s == NUL: 

496 break 

497 if flag & 2: 

498 self.__read(2) 

499 

500 def tell(self): 

501 """Return the stream's file pointer position. 

502 """ 

503 return self.pos 

504 

505 def seek(self, pos=0): 

506 """Set the stream's file pointer to pos. Negative seeking 

507 is forbidden. 

508 """ 

509 if pos - self.pos >= 0: 

510 blocks, remainder = divmod(pos - self.pos, self.bufsize) 

511 for i in range(blocks): 

512 self.read(self.bufsize) 

513 self.read(remainder) 

514 else: 

515 raise StreamError("seeking backwards is not allowed") 

516 return self.pos 

517 

518 def read(self, size): 

519 """Return the next size number of bytes from the stream.""" 

520 assert size is not None 

521 buf = self._read(size) 

522 self.pos += len(buf) 

523 return buf 

524 

525 def _read(self, size): 

526 """Return size bytes from the stream. 

527 """ 

528 if self.comptype == "tar": 

529 return self.__read(size) 

530 

531 c = len(self.dbuf) 

532 t = [self.dbuf] 

533 while c < size: 

534 # Skip underlying buffer to avoid unaligned double buffering. 

535 if self.buf: 

536 buf = self.buf 

537 self.buf = b"" 

538 else: 

539 buf = self.fileobj.read(self.bufsize) 

540 if not buf: 

541 break 

542 try: 

543 buf = self.cmp.decompress(buf) 

544 except self.exception: 

545 raise ReadError("invalid compressed data") 

546 t.append(buf) 

547 c += len(buf) 

548 t = b"".join(t) 

549 self.dbuf = t[size:] 

550 return t[:size] 

551 

552 def __read(self, size): 

553 """Return size bytes from stream. If internal buffer is empty, 

554 read another block from the stream. 

555 """ 

556 c = len(self.buf) 

557 t = [self.buf] 

558 while c < size: 

559 buf = self.fileobj.read(self.bufsize) 

560 if not buf: 

561 break 

562 t.append(buf) 

563 c += len(buf) 

564 t = b"".join(t) 

565 self.buf = t[size:] 

566 return t[:size] 

567# class _Stream 

568 

569class _StreamProxy(object): 

570 """Small proxy class that enables transparent compression 

571 detection for the Stream interface (mode 'r|*'). 

572 """ 

573 

574 def __init__(self, fileobj): 

575 self.fileobj = fileobj 

576 self.buf = self.fileobj.read(BLOCKSIZE) 

577 

578 def read(self, size): 

579 self.read = self.fileobj.read 

580 return self.buf 

581 

582 def getcomptype(self): 

583 if self.buf.startswith(b"\x1f\x8b\x08"): 

584 return "gz" 

585 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": 

586 return "bz2" 

587 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): 

588 return "xz" 

589 else: 

590 return "tar" 

591 

592 def close(self): 

593 self.fileobj.close() 

594# class StreamProxy 

595 

596#------------------------ 

597# Extraction file object 

598#------------------------ 

599class _FileInFile(object): 

600 """A thin wrapper around an existing file object that 

601 provides a part of its data as an individual file 

602 object. 

603 """ 

604 

605 def __init__(self, fileobj, offset, size, blockinfo=None): 

606 self.fileobj = fileobj 

607 self.offset = offset 

608 self.size = size 

609 self.position = 0 

610 self.name = getattr(fileobj, "name", None) 

611 self.closed = False 

612 

613 if blockinfo is None: 

614 blockinfo = [(0, size)] 

615 

616 # Construct a map with data and zero blocks. 

617 self.map_index = 0 

618 self.map = [] 

619 lastpos = 0 

620 realpos = self.offset 

621 for offset, size in blockinfo: 

622 if offset > lastpos: 

623 self.map.append((False, lastpos, offset, None)) 

624 self.map.append((True, offset, offset + size, realpos)) 

625 realpos += size 

626 lastpos = offset + size 

627 if lastpos < self.size: 

628 self.map.append((False, lastpos, self.size, None)) 

629 

630 def flush(self): 

631 pass 

632 

633 def readable(self): 

634 return True 

635 

636 def writable(self): 

637 return False 

638 

639 def seekable(self): 

640 return self.fileobj.seekable() 

641 

642 def tell(self): 

643 """Return the current file position. 

644 """ 

645 return self.position 

646 

647 def seek(self, position, whence=io.SEEK_SET): 

648 """Seek to a position in the file. 

649 """ 

650 if whence == io.SEEK_SET: 

651 self.position = min(max(position, 0), self.size) 

652 elif whence == io.SEEK_CUR: 

653 if position < 0: 

654 self.position = max(self.position + position, 0) 

655 else: 

656 self.position = min(self.position + position, self.size) 

657 elif whence == io.SEEK_END: 

658 self.position = max(min(self.size + position, self.size), 0) 

659 else: 

660 raise ValueError("Invalid argument") 

661 return self.position 

662 

663 def read(self, size=None): 

664 """Read data from the file. 

665 """ 

666 if size is None: 

667 size = self.size - self.position 

668 else: 

669 size = min(size, self.size - self.position) 

670 

671 buf = b"" 

672 while size > 0: 

673 while True: 

674 data, start, stop, offset = self.map[self.map_index] 

675 if start <= self.position < stop: 

676 break 

677 else: 

678 self.map_index += 1 

679 if self.map_index == len(self.map): 

680 self.map_index = 0 

681 length = min(size, stop - self.position) 

682 if data: 

683 self.fileobj.seek(offset + (self.position - start)) 

684 b = self.fileobj.read(length) 

685 if len(b) != length: 

686 raise ReadError("unexpected end of data") 

687 buf += b 

688 else: 

689 buf += NUL * length 

690 size -= length 

691 self.position += length 

692 return buf 

693 

694 def readinto(self, b): 

695 buf = self.read(len(b)) 

696 b[:len(buf)] = buf 

697 return len(buf) 

698 

699 def close(self): 

700 self.closed = True 

701#class _FileInFile 

702 

703class ExFileObject(io.BufferedReader): 

704 

705 def __init__(self, tarfile, tarinfo): 

706 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, 

707 tarinfo.size, tarinfo.sparse) 

708 super().__init__(fileobj) 

709#class ExFileObject 

710 

711#------------------ 

712# Exported Classes 

713#------------------ 

714class TarInfo(object): 

715 """Informational class which holds the details about an 

716 archive member given by a tar header block. 

717 TarInfo objects are returned by TarFile.getmember(), 

718 TarFile.getmembers() and TarFile.gettarinfo() and are 

719 usually created internally. 

720 """ 

721 

722 __slots__ = dict( 

723 name = 'Name of the archive member.', 

724 mode = 'Permission bits.', 

725 uid = 'User ID of the user who originally stored this member.', 

726 gid = 'Group ID of the user who originally stored this member.', 

727 size = 'Size in bytes.', 

728 mtime = 'Time of last modification.', 

729 chksum = 'Header checksum.', 

730 type = ('File type. type is usually one of these constants: ' 

731 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, ' 

732 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'), 

733 linkname = ('Name of the target file name, which is only present ' 

734 'in TarInfo objects of type LNKTYPE and SYMTYPE.'), 

735 uname = 'User name.', 

736 gname = 'Group name.', 

737 devmajor = 'Device major number.', 

738 devminor = 'Device minor number.', 

739 offset = 'The tar header starts here.', 

740 offset_data = "The file's data starts here.", 

741 pax_headers = ('A dictionary containing key-value pairs of an ' 

742 'associated pax extended header.'), 

743 sparse = 'Sparse member information.', 

744 tarfile = None, 

745 _sparse_structs = None, 

746 _link_target = None, 

747 ) 

748 

749 def __init__(self, name=""): 

750 """Construct a TarInfo object. name is the optional name 

751 of the member. 

752 """ 

753 self.name = name # member name 

754 self.mode = 0o644 # file permissions 

755 self.uid = 0 # user id 

756 self.gid = 0 # group id 

757 self.size = 0 # file size 

758 self.mtime = 0 # modification time 

759 self.chksum = 0 # header checksum 

760 self.type = REGTYPE # member type 

761 self.linkname = "" # link name 

762 self.uname = "" # user name 

763 self.gname = "" # group name 

764 self.devmajor = 0 # device major number 

765 self.devminor = 0 # device minor number 

766 

767 self.offset = 0 # the tar header starts here 

768 self.offset_data = 0 # the file's data starts here 

769 

770 self.sparse = None # sparse member information 

771 self.pax_headers = {} # pax header information 

772 

773 @property 

774 def path(self): 

775 'In pax headers, "name" is called "path".' 

776 return self.name 

777 

778 @path.setter 

779 def path(self, name): 

780 self.name = name 

781 

782 @property 

783 def linkpath(self): 

784 'In pax headers, "linkname" is called "linkpath".' 

785 return self.linkname 

786 

787 @linkpath.setter 

788 def linkpath(self, linkname): 

789 self.linkname = linkname 

790 

791 def __repr__(self): 

792 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 

793 

794 def get_info(self): 

795 """Return the TarInfo's attributes as a dictionary. 

796 """ 

797 info = { 

798 "name": self.name, 

799 "mode": self.mode & 0o7777, 

800 "uid": self.uid, 

801 "gid": self.gid, 

802 "size": self.size, 

803 "mtime": self.mtime, 

804 "chksum": self.chksum, 

805 "type": self.type, 

806 "linkname": self.linkname, 

807 "uname": self.uname, 

808 "gname": self.gname, 

809 "devmajor": self.devmajor, 

810 "devminor": self.devminor 

811 } 

812 

813 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 

814 info["name"] += "/" 

815 

816 return info 

817 

818 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): 

819 """Return a tar header as a string of 512 byte blocks. 

820 """ 

821 info = self.get_info() 

822 

823 if format == USTAR_FORMAT: 

824 return self.create_ustar_header(info, encoding, errors) 

825 elif format == GNU_FORMAT: 

826 return self.create_gnu_header(info, encoding, errors) 

827 elif format == PAX_FORMAT: 

828 return self.create_pax_header(info, encoding) 

829 else: 

830 raise ValueError("invalid format") 

831 

832 def create_ustar_header(self, info, encoding, errors): 

833 """Return the object as a ustar header block. 

834 """ 

835 info["magic"] = POSIX_MAGIC 

836 

837 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 

838 raise ValueError("linkname is too long") 

839 

840 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 

841 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) 

842 

843 return self._create_header(info, USTAR_FORMAT, encoding, errors) 

844 

845 def create_gnu_header(self, info, encoding, errors): 

846 """Return the object as a GNU header block sequence. 

847 """ 

848 info["magic"] = GNU_MAGIC 

849 

850 buf = b"" 

851 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 

852 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) 

853 

854 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 

855 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 

856 

857 return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 

858 

859 def create_pax_header(self, info, encoding): 

860 """Return the object as a ustar header block. If it cannot be 

861 represented this way, prepend a pax extended header sequence 

862 with supplement information. 

863 """ 

864 info["magic"] = POSIX_MAGIC 

865 pax_headers = self.pax_headers.copy() 

866 

867 # Test string fields for values that exceed the field length or cannot 

868 # be represented in ASCII encoding. 

869 for name, hname, length in ( 

870 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 

871 ("uname", "uname", 32), ("gname", "gname", 32)): 

872 

873 if hname in pax_headers: 

874 # The pax header has priority. 

875 continue 

876 

877 # Try to encode the string as ASCII. 

878 try: 

879 info[name].encode("ascii", "strict") 

880 except UnicodeEncodeError: 

881 pax_headers[hname] = info[name] 

882 continue 

883 

884 if len(info[name]) > length: 

885 pax_headers[hname] = info[name] 

886 

887 # Test number fields for values that exceed the field limit or values 

888 # that like to be stored as float. 

889 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 

890 if name in pax_headers: 

891 # The pax header has priority. Avoid overflow. 

892 info[name] = 0 

893 continue 

894 

895 val = info[name] 

896 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): 

897 pax_headers[name] = str(val) 

898 info[name] = 0 

899 

900 # Create a pax extended header if necessary. 

901 if pax_headers: 

902 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) 

903 else: 

904 buf = b"" 

905 

906 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 

907 

908 @classmethod 

909 def create_pax_global_header(cls, pax_headers): 

910 """Return the object as a pax global header block sequence. 

911 """ 

912 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 

913 

914 def _posix_split_name(self, name, encoding, errors): 

915 """Split a name longer than 100 chars into a prefix 

916 and a name part. 

917 """ 

918 components = name.split("/") 

919 for i in range(1, len(components)): 

920 prefix = "/".join(components[:i]) 

921 name = "/".join(components[i:]) 

922 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ 

923 len(name.encode(encoding, errors)) <= LENGTH_NAME: 

924 break 

925 else: 

926 raise ValueError("name is too long") 

927 

928 return prefix, name 

929 

930 @staticmethod 

931 def _create_header(info, format, encoding, errors): 

932 """Return a header block. info is a dictionary with file 

933 information, format must be one of the *_FORMAT constants. 

934 """ 

935 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE) 

936 if has_device_fields: 

937 devmajor = itn(info.get("devmajor", 0), 8, format) 

938 devminor = itn(info.get("devminor", 0), 8, format) 

939 else: 

940 devmajor = stn("", 8, encoding, errors) 

941 devminor = stn("", 8, encoding, errors) 

942 

943 parts = [ 

944 stn(info.get("name", ""), 100, encoding, errors), 

945 itn(info.get("mode", 0) & 0o7777, 8, format), 

946 itn(info.get("uid", 0), 8, format), 

947 itn(info.get("gid", 0), 8, format), 

948 itn(info.get("size", 0), 12, format), 

949 itn(info.get("mtime", 0), 12, format), 

950 b" ", # checksum field 

951 info.get("type", REGTYPE), 

952 stn(info.get("linkname", ""), 100, encoding, errors), 

953 info.get("magic", POSIX_MAGIC), 

954 stn(info.get("uname", ""), 32, encoding, errors), 

955 stn(info.get("gname", ""), 32, encoding, errors), 

956 devmajor, 

957 devminor, 

958 stn(info.get("prefix", ""), 155, encoding, errors) 

959 ] 

960 

961 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) 

962 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 

963 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] 

964 return buf 

965 

966 @staticmethod 

967 def _create_payload(payload): 

968 """Return the string payload filled with zero bytes 

969 up to the next 512 byte border. 

970 """ 

971 blocks, remainder = divmod(len(payload), BLOCKSIZE) 

972 if remainder > 0: 

973 payload += (BLOCKSIZE - remainder) * NUL 

974 return payload 

975 

976 @classmethod 

977 def _create_gnu_long_header(cls, name, type, encoding, errors): 

978 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 

979 for name. 

980 """ 

981 name = name.encode(encoding, errors) + NUL 

982 

983 info = {} 

984 info["name"] = "././@LongLink" 

985 info["type"] = type 

986 info["size"] = len(name) 

987 info["magic"] = GNU_MAGIC 

988 

989 # create extended header + name blocks. 

990 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ 

991 cls._create_payload(name) 

992 

993 @classmethod 

994 def _create_pax_generic_header(cls, pax_headers, type, encoding): 

995 """Return a POSIX.1-2008 extended or global header sequence 

996 that contains a list of keyword, value pairs. The values 

997 must be strings. 

998 """ 

999 # Check if one of the fields contains surrogate characters and thereby 

1000 # forces hdrcharset=BINARY, see _proc_pax() for more information. 

1001 binary = False 

1002 for keyword, value in pax_headers.items(): 

1003 try: 

1004 value.encode("utf-8", "strict") 

1005 except UnicodeEncodeError: 

1006 binary = True 

1007 break 

1008 

1009 records = b"" 

1010 if binary: 

1011 # Put the hdrcharset field at the beginning of the header. 

1012 records += b"21 hdrcharset=BINARY\n" 

1013 

1014 for keyword, value in pax_headers.items(): 

1015 keyword = keyword.encode("utf-8") 

1016 if binary: 

1017 # Try to restore the original byte representation of `value'. 

1018 # Needless to say, that the encoding must match the string. 

1019 value = value.encode(encoding, "surrogateescape") 

1020 else: 

1021 value = value.encode("utf-8") 

1022 

1023 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 

1024 n = p = 0 

1025 while True: 

1026 n = l + len(str(p)) 

1027 if n == p: 

1028 break 

1029 p = n 

1030 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" 

1031 

1032 # We use a hardcoded "././@PaxHeader" name like star does 

1033 # instead of the one that POSIX recommends. 

1034 info = {} 

1035 info["name"] = "././@PaxHeader" 

1036 info["type"] = type 

1037 info["size"] = len(records) 

1038 info["magic"] = POSIX_MAGIC 

1039 

1040 # Create pax header + record blocks. 

1041 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ 

1042 cls._create_payload(records) 

1043 

1044 @classmethod 

1045 def frombuf(cls, buf, encoding, errors): 

1046 """Construct a TarInfo object from a 512 byte bytes object. 

1047 """ 

1048 if len(buf) == 0: 

1049 raise EmptyHeaderError("empty header") 

1050 if len(buf) != BLOCKSIZE: 

1051 raise TruncatedHeaderError("truncated header") 

1052 if buf.count(NUL) == BLOCKSIZE: 

1053 raise EOFHeaderError("end of file header") 

1054 

1055 chksum = nti(buf[148:156]) 

1056 if chksum not in calc_chksums(buf): 

1057 raise InvalidHeaderError("bad checksum") 

1058 

1059 obj = cls() 

1060 obj.name = nts(buf[0:100], encoding, errors) 

1061 obj.mode = nti(buf[100:108]) 

1062 obj.uid = nti(buf[108:116]) 

1063 obj.gid = nti(buf[116:124]) 

1064 obj.size = nti(buf[124:136]) 

1065 obj.mtime = nti(buf[136:148]) 

1066 obj.chksum = chksum 

1067 obj.type = buf[156:157] 

1068 obj.linkname = nts(buf[157:257], encoding, errors) 

1069 obj.uname = nts(buf[265:297], encoding, errors) 

1070 obj.gname = nts(buf[297:329], encoding, errors) 

1071 obj.devmajor = nti(buf[329:337]) 

1072 obj.devminor = nti(buf[337:345]) 

1073 prefix = nts(buf[345:500], encoding, errors) 

1074 

1075 # Old V7 tar format represents a directory as a regular 

1076 # file with a trailing slash. 

1077 if obj.type == AREGTYPE and obj.name.endswith("/"): 

1078 obj.type = DIRTYPE 

1079 

1080 # The old GNU sparse format occupies some of the unused 

1081 # space in the buffer for up to 4 sparse structures. 

1082 # Save them for later processing in _proc_sparse(). 

1083 if obj.type == GNUTYPE_SPARSE: 

1084 pos = 386 

1085 structs = [] 

1086 for i in range(4): 

1087 try: 

1088 offset = nti(buf[pos:pos + 12]) 

1089 numbytes = nti(buf[pos + 12:pos + 24]) 

1090 except ValueError: 

1091 break 

1092 structs.append((offset, numbytes)) 

1093 pos += 24 

1094 isextended = bool(buf[482]) 

1095 origsize = nti(buf[483:495]) 

1096 obj._sparse_structs = (structs, isextended, origsize) 

1097 

1098 # Remove redundant slashes from directories. 

1099 if obj.isdir(): 

1100 obj.name = obj.name.rstrip("/") 

1101 

1102 # Reconstruct a ustar longname. 

1103 if prefix and obj.type not in GNU_TYPES: 

1104 obj.name = prefix + "/" + obj.name 

1105 return obj 

1106 

1107 @classmethod 

1108 def fromtarfile(cls, tarfile): 

1109 """Return the next TarInfo object from TarFile object 

1110 tarfile. 

1111 """ 

1112 buf = tarfile.fileobj.read(BLOCKSIZE) 

1113 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) 

1114 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 

1115 return obj._proc_member(tarfile) 

1116 

1117 #-------------------------------------------------------------------------- 

1118 # The following are methods that are called depending on the type of a 

1119 # member. The entry point is _proc_member() which can be overridden in a 

1120 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 

1121 # implement the following 

1122 # operations: 

1123 # 1. Set self.offset_data to the position where the data blocks begin, 

1124 # if there is data that follows. 

1125 # 2. Set tarfile.offset to the position where the next member's header will 

1126 # begin. 

1127 # 3. Return self or another valid TarInfo object. 

1128 def _proc_member(self, tarfile): 

1129 """Choose the right processing method depending on 

1130 the type and call it. 

1131 """ 

1132 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 

1133 return self._proc_gnulong(tarfile) 

1134 elif self.type == GNUTYPE_SPARSE: 

1135 return self._proc_sparse(tarfile) 

1136 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 

1137 return self._proc_pax(tarfile) 

1138 else: 

1139 return self._proc_builtin(tarfile) 

1140 

1141 def _proc_builtin(self, tarfile): 

1142 """Process a builtin type or an unknown type which 

1143 will be treated as a regular file. 

1144 """ 

1145 self.offset_data = tarfile.fileobj.tell() 

1146 offset = self.offset_data 

1147 if self.isreg() or self.type not in SUPPORTED_TYPES: 

1148 # Skip the following data blocks. 

1149 offset += self._block(self.size) 

1150 tarfile.offset = offset 

1151 

1152 # Patch the TarInfo object with saved global 

1153 # header information. 

1154 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 

1155 

1156 return self 

1157 

1158 def _proc_gnulong(self, tarfile): 

1159 """Process the blocks that hold a GNU longname 

1160 or longlink member. 

1161 """ 

1162 buf = tarfile.fileobj.read(self._block(self.size)) 

1163 

1164 # Fetch the next header and process it. 

1165 try: 

1166 next = self.fromtarfile(tarfile) 

1167 except HeaderError: 

1168 raise SubsequentHeaderError("missing or bad subsequent header") 

1169 

1170 # Patch the TarInfo object from the next header with 

1171 # the longname information. 

1172 next.offset = self.offset 

1173 if self.type == GNUTYPE_LONGNAME: 

1174 next.name = nts(buf, tarfile.encoding, tarfile.errors) 

1175 elif self.type == GNUTYPE_LONGLINK: 

1176 next.linkname = nts(buf, tarfile.encoding, tarfile.errors) 

1177 

1178 return next 

1179 

1180 def _proc_sparse(self, tarfile): 

1181 """Process a GNU sparse header plus extra headers. 

1182 """ 

1183 # We already collected some sparse structures in frombuf(). 

1184 structs, isextended, origsize = self._sparse_structs 

1185 del self._sparse_structs 

1186 

1187 # Collect sparse structures from extended header blocks. 

1188 while isextended: 

1189 buf = tarfile.fileobj.read(BLOCKSIZE) 

1190 pos = 0 

1191 for i in range(21): 

1192 try: 

1193 offset = nti(buf[pos:pos + 12]) 

1194 numbytes = nti(buf[pos + 12:pos + 24]) 

1195 except ValueError: 

1196 break 

1197 if offset and numbytes: 

1198 structs.append((offset, numbytes)) 

1199 pos += 24 

1200 isextended = bool(buf[504]) 

1201 self.sparse = structs 

1202 

1203 self.offset_data = tarfile.fileobj.tell() 

1204 tarfile.offset = self.offset_data + self._block(self.size) 

1205 self.size = origsize 

1206 return self 

1207 

1208 def _proc_pax(self, tarfile): 

1209 """Process an extended or global header as described in 

1210 POSIX.1-2008. 

1211 """ 

1212 # Read the header information. 

1213 buf = tarfile.fileobj.read(self._block(self.size)) 

1214 

1215 # A pax header stores supplemental information for either 

1216 # the following file (extended) or all following files 

1217 # (global). 

1218 if self.type == XGLTYPE: 

1219 pax_headers = tarfile.pax_headers 

1220 else: 

1221 pax_headers = tarfile.pax_headers.copy() 

1222 

1223 # Check if the pax header contains a hdrcharset field. This tells us 

1224 # the encoding of the path, linkpath, uname and gname fields. Normally, 

1225 # these fields are UTF-8 encoded but since POSIX.1-2008 tar 

1226 # implementations are allowed to store them as raw binary strings if 

1227 # the translation to UTF-8 fails. 

1228 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) 

1229 if match is not None: 

1230 pax_headers["hdrcharset"] = match.group(1).decode("utf-8") 

1231 

1232 # For the time being, we don't care about anything other than "BINARY". 

1233 # The only other value that is currently allowed by the standard is 

1234 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. 

1235 hdrcharset = pax_headers.get("hdrcharset") 

1236 if hdrcharset == "BINARY": 

1237 encoding = tarfile.encoding 

1238 else: 

1239 encoding = "utf-8" 

1240 

1241 # Parse pax header information. A record looks like that: 

1242 # "%d %s=%s\n" % (length, keyword, value). length is the size 

1243 # of the complete record including the length field itself and 

1244 # the newline. keyword and value are both UTF-8 encoded strings. 

1245 regex = re.compile(br"(\d+) ([^=]+)=") 

1246 pos = 0 

1247 while True: 

1248 match = regex.match(buf, pos) 

1249 if not match: 

1250 break 

1251 

1252 length, keyword = match.groups() 

1253 length = int(length) 

1254 if length == 0: 

1255 raise InvalidHeaderError("invalid header") 

1256 value = buf[match.end(2) + 1:match.start(1) + length - 1] 

1257 

1258 # Normally, we could just use "utf-8" as the encoding and "strict" 

1259 # as the error handler, but we better not take the risk. For 

1260 # example, GNU tar <= 1.23 is known to store filenames it cannot 

1261 # translate to UTF-8 as raw strings (unfortunately without a 

1262 # hdrcharset=BINARY header). 

1263 # We first try the strict standard encoding, and if that fails we 

1264 # fall back on the user's encoding and error handler. 

1265 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", 

1266 tarfile.errors) 

1267 if keyword in PAX_NAME_FIELDS: 

1268 value = self._decode_pax_field(value, encoding, tarfile.encoding, 

1269 tarfile.errors) 

1270 else: 

1271 value = self._decode_pax_field(value, "utf-8", "utf-8", 

1272 tarfile.errors) 

1273 

1274 pax_headers[keyword] = value 

1275 pos += length 

1276 

1277 # Fetch the next header. 

1278 try: 

1279 next = self.fromtarfile(tarfile) 

1280 except HeaderError: 

1281 raise SubsequentHeaderError("missing or bad subsequent header") 

1282 

1283 # Process GNU sparse information. 

1284 if "GNU.sparse.map" in pax_headers: 

1285 # GNU extended sparse format version 0.1. 

1286 self._proc_gnusparse_01(next, pax_headers) 

1287 

1288 elif "GNU.sparse.size" in pax_headers: 

1289 # GNU extended sparse format version 0.0. 

1290 self._proc_gnusparse_00(next, pax_headers, buf) 

1291 

1292 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": 

1293 # GNU extended sparse format version 1.0. 

1294 self._proc_gnusparse_10(next, pax_headers, tarfile) 

1295 

1296 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 

1297 # Patch the TarInfo object with the extended header info. 

1298 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 

1299 next.offset = self.offset 

1300 

1301 if "size" in pax_headers: 

1302 # If the extended header replaces the size field, 

1303 # we need to recalculate the offset where the next 

1304 # header starts. 

1305 offset = next.offset_data 

1306 if next.isreg() or next.type not in SUPPORTED_TYPES: 

1307 offset += next._block(next.size) 

1308 tarfile.offset = offset 

1309 

1310 return next 

1311 

1312 def _proc_gnusparse_00(self, next, pax_headers, buf): 

1313 """Process a GNU tar extended sparse header, version 0.0. 

1314 """ 

1315 offsets = [] 

1316 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): 

1317 offsets.append(int(match.group(1))) 

1318 numbytes = [] 

1319 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): 

1320 numbytes.append(int(match.group(1))) 

1321 next.sparse = list(zip(offsets, numbytes)) 

1322 

1323 def _proc_gnusparse_01(self, next, pax_headers): 

1324 """Process a GNU tar extended sparse header, version 0.1. 

1325 """ 

1326 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] 

1327 next.sparse = list(zip(sparse[::2], sparse[1::2])) 

1328 

1329 def _proc_gnusparse_10(self, next, pax_headers, tarfile): 

1330 """Process a GNU tar extended sparse header, version 1.0. 

1331 """ 

1332 fields = None 

1333 sparse = [] 

1334 buf = tarfile.fileobj.read(BLOCKSIZE) 

1335 fields, buf = buf.split(b"\n", 1) 

1336 fields = int(fields) 

1337 while len(sparse) < fields * 2: 

1338 if b"\n" not in buf: 

1339 buf += tarfile.fileobj.read(BLOCKSIZE) 

1340 number, buf = buf.split(b"\n", 1) 

1341 sparse.append(int(number)) 

1342 next.offset_data = tarfile.fileobj.tell() 

1343 next.sparse = list(zip(sparse[::2], sparse[1::2])) 

1344 

1345 def _apply_pax_info(self, pax_headers, encoding, errors): 

1346 """Replace fields with supplemental information from a previous 

1347 pax extended or global header. 

1348 """ 

1349 for keyword, value in pax_headers.items(): 

1350 if keyword == "GNU.sparse.name": 

1351 setattr(self, "path", value) 

1352 elif keyword == "GNU.sparse.size": 

1353 setattr(self, "size", int(value)) 

1354 elif keyword == "GNU.sparse.realsize": 

1355 setattr(self, "size", int(value)) 

1356 elif keyword in PAX_FIELDS: 

1357 if keyword in PAX_NUMBER_FIELDS: 

1358 try: 

1359 value = PAX_NUMBER_FIELDS[keyword](value) 

1360 except ValueError: 

1361 value = 0 

1362 if keyword == "path": 

1363 value = value.rstrip("/") 

1364 setattr(self, keyword, value) 

1365 

1366 self.pax_headers = pax_headers.copy() 

1367 

1368 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): 

1369 """Decode a single field from a pax record. 

1370 """ 

1371 try: 

1372 return value.decode(encoding, "strict") 

1373 except UnicodeDecodeError: 

1374 return value.decode(fallback_encoding, fallback_errors) 

1375 

1376 def _block(self, count): 

1377 """Round up a byte count by BLOCKSIZE and return it, 

1378 e.g. _block(834) => 1024. 

1379 """ 

1380 blocks, remainder = divmod(count, BLOCKSIZE) 

1381 if remainder: 

1382 blocks += 1 

1383 return blocks * BLOCKSIZE 

1384 

1385 def isreg(self): 

1386 'Return True if the Tarinfo object is a regular file.' 

1387 return self.type in REGULAR_TYPES 

1388 

1389 def isfile(self): 

1390 'Return True if the Tarinfo object is a regular file.' 

1391 return self.isreg() 

1392 

1393 def isdir(self): 

1394 'Return True if it is a directory.' 

1395 return self.type == DIRTYPE 

1396 

1397 def issym(self): 

1398 'Return True if it is a symbolic link.' 

1399 return self.type == SYMTYPE 

1400 

1401 def islnk(self): 

1402 'Return True if it is a hard link.' 

1403 return self.type == LNKTYPE 

1404 

1405 def ischr(self): 

1406 'Return True if it is a character device.' 

1407 return self.type == CHRTYPE 

1408 

1409 def isblk(self): 

1410 'Return True if it is a block device.' 

1411 return self.type == BLKTYPE 

1412 

1413 def isfifo(self): 

1414 'Return True if it is a FIFO.' 

1415 return self.type == FIFOTYPE 

1416 

1417 def issparse(self): 

1418 return self.sparse is not None 

1419 

1420 def isdev(self): 

1421 'Return True if it is one of character device, block device or FIFO.' 

1422 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 

1423# class TarInfo 

1424 

1425class TarFile(object): 

1426 """The TarFile Class provides an interface to tar archives. 

1427 """ 

1428 

1429 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 

1430 

1431 dereference = False # If true, add content of linked file to the 

1432 # tar file, else the link. 

1433 

1434 ignore_zeros = False # If true, skips empty or invalid blocks and 

1435 # continues processing. 

1436 

1437 errorlevel = 1 # If 0, fatal errors only appear in debug 

1438 # messages (if debug >= 0). If > 0, errors 

1439 # are passed to the caller as exceptions. 

1440 

1441 format = DEFAULT_FORMAT # The format to use when creating an archive. 

1442 

1443 encoding = ENCODING # Encoding for 8-bit character strings. 

1444 

1445 errors = None # Error handler for unicode conversion. 

1446 

1447 tarinfo = TarInfo # The default TarInfo class to use. 

1448 

1449 fileobject = ExFileObject # The file-object for extractfile(). 

1450 

1451 def __init__(self, name=None, mode="r", fileobj=None, format=None, 

1452 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 

1453 errors="surrogateescape", pax_headers=None, debug=None, 

1454 errorlevel=None, copybufsize=None): 

1455 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 

1456 read from an existing archive, 'a' to append data to an existing 

1457 file or 'w' to create a new file overwriting an existing one. `mode' 

1458 defaults to 'r'. 

1459 If `fileobj' is given, it is used for reading or writing data. If it 

1460 can be determined, `mode' is overridden by `fileobj's mode. 

1461 `fileobj' is not closed, when TarFile is closed. 

1462 """ 

1463 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"} 

1464 if mode not in modes: 

1465 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 

1466 self.mode = mode 

1467 self._mode = modes[mode] 

1468 

1469 if not fileobj: 

1470 if self.mode == "a" and not os.path.exists(name): 

1471 # Create nonexistent files in append mode. 

1472 self.mode = "w" 

1473 self._mode = "wb" 

1474 fileobj = bltn_open(name, self._mode) 

1475 self._extfileobj = False 

1476 else: 

1477 if (name is None and hasattr(fileobj, "name") and 

1478 isinstance(fileobj.name, (str, bytes))): 

1479 name = fileobj.name 

1480 if hasattr(fileobj, "mode"): 

1481 self._mode = fileobj.mode 

1482 self._extfileobj = True 

1483 self.name = os.path.abspath(name) if name else None 

1484 self.fileobj = fileobj 

1485 

1486 # Init attributes. 

1487 if format is not None: 

1488 self.format = format 

1489 if tarinfo is not None: 

1490 self.tarinfo = tarinfo 

1491 if dereference is not None: 

1492 self.dereference = dereference 

1493 if ignore_zeros is not None: 

1494 self.ignore_zeros = ignore_zeros 

1495 if encoding is not None: 

1496 self.encoding = encoding 

1497 self.errors = errors 

1498 

1499 if pax_headers is not None and self.format == PAX_FORMAT: 

1500 self.pax_headers = pax_headers 

1501 else: 

1502 self.pax_headers = {} 

1503 

1504 if debug is not None: 

1505 self.debug = debug 

1506 if errorlevel is not None: 

1507 self.errorlevel = errorlevel 

1508 

1509 # Init datastructures. 

1510 self.copybufsize = copybufsize 

1511 self.closed = False 

1512 self.members = [] # list of members as TarInfo objects 

1513 self._loaded = False # flag if all members have been read 

1514 self.offset = self.fileobj.tell() 

1515 # current position in the archive file 

1516 self.inodes = {} # dictionary caching the inodes of 

1517 # archive members already added 

1518 

1519 try: 

1520 if self.mode == "r": 

1521 self.firstmember = None 

1522 self.firstmember = self.next() 

1523 

1524 if self.mode == "a": 

1525 # Move to the end of the archive, 

1526 # before the first empty block. 

1527 while True: 

1528 self.fileobj.seek(self.offset) 

1529 try: 

1530 tarinfo = self.tarinfo.fromtarfile(self) 

1531 self.members.append(tarinfo) 

1532 except EOFHeaderError: 

1533 self.fileobj.seek(self.offset) 

1534 break 

1535 except HeaderError as e: 

1536 raise ReadError(str(e)) 

1537 

1538 if self.mode in ("a", "w", "x"): 

1539 self._loaded = True 

1540 

1541 if self.pax_headers: 

1542 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 

1543 self.fileobj.write(buf) 

1544 self.offset += len(buf) 

1545 except: 

1546 if not self._extfileobj: 

1547 self.fileobj.close() 

1548 self.closed = True 

1549 raise 

1550 

1551 #-------------------------------------------------------------------------- 

1552 # Below are the classmethods which act as alternate constructors to the 

1553 # TarFile class. The open() method is the only one that is needed for 

1554 # public use; it is the "super"-constructor and is able to select an 

1555 # adequate "sub"-constructor for a particular compression using the mapping 

1556 # from OPEN_METH. 

1557 # 

1558 # This concept allows one to subclass TarFile without losing the comfort of 

1559 # the super-constructor. A sub-constructor is registered and made available 

1560 # by adding it to the mapping in OPEN_METH. 

1561 

1562 @classmethod 

1563 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 

1564 """Open a tar archive for reading, writing or appending. Return 

1565 an appropriate TarFile class. 

1566 

1567 mode: 

1568 'r' or 'r:*' open for reading with transparent compression 

1569 'r:' open for reading exclusively uncompressed 

1570 'r:gz' open for reading with gzip compression 

1571 'r:bz2' open for reading with bzip2 compression 

1572 'r:xz' open for reading with lzma compression 

1573 'a' or 'a:' open for appending, creating the file if necessary 

1574 'w' or 'w:' open for writing without compression 

1575 'w:gz' open for writing with gzip compression 

1576 'w:bz2' open for writing with bzip2 compression 

1577 'w:xz' open for writing with lzma compression 

1578 

1579 'x' or 'x:' create a tarfile exclusively without compression, raise 

1580 an exception if the file is already created 

1581 'x:gz' create a gzip compressed tarfile, raise an exception 

1582 if the file is already created 

1583 'x:bz2' create a bzip2 compressed tarfile, raise an exception 

1584 if the file is already created 

1585 'x:xz' create an lzma compressed tarfile, raise an exception 

1586 if the file is already created 

1587 

1588 'r|*' open a stream of tar blocks with transparent compression 

1589 'r|' open an uncompressed stream of tar blocks for reading 

1590 'r|gz' open a gzip compressed stream of tar blocks 

1591 'r|bz2' open a bzip2 compressed stream of tar blocks 

1592 'r|xz' open an lzma compressed stream of tar blocks 

1593 'w|' open an uncompressed stream for writing 

1594 'w|gz' open a gzip compressed stream for writing 

1595 'w|bz2' open a bzip2 compressed stream for writing 

1596 'w|xz' open an lzma compressed stream for writing 

1597 """ 

1598 

1599 if not name and not fileobj: 

1600 raise ValueError("nothing to open") 

1601 

1602 if mode in ("r", "r:*"): 

1603 # Find out which *open() is appropriate for opening the file. 

1604 def not_compressed(comptype): 

1605 return cls.OPEN_METH[comptype] == 'taropen' 

1606 for comptype in sorted(cls.OPEN_METH, key=not_compressed): 

1607 func = getattr(cls, cls.OPEN_METH[comptype]) 

1608 if fileobj is not None: 

1609 saved_pos = fileobj.tell() 

1610 try: 

1611 return func(name, "r", fileobj, **kwargs) 

1612 except (ReadError, CompressionError): 

1613 if fileobj is not None: 

1614 fileobj.seek(saved_pos) 

1615 continue 

1616 raise ReadError("file could not be opened successfully") 

1617 

1618 elif ":" in mode: 

1619 filemode, comptype = mode.split(":", 1) 

1620 filemode = filemode or "r" 

1621 comptype = comptype or "tar" 

1622 

1623 # Select the *open() function according to 

1624 # given compression. 

1625 if comptype in cls.OPEN_METH: 

1626 func = getattr(cls, cls.OPEN_METH[comptype]) 

1627 else: 

1628 raise CompressionError("unknown compression type %r" % comptype) 

1629 return func(name, filemode, fileobj, **kwargs) 

1630 

1631 elif "|" in mode: 

1632 filemode, comptype = mode.split("|", 1) 

1633 filemode = filemode or "r" 

1634 comptype = comptype or "tar" 

1635 

1636 if filemode not in ("r", "w"): 

1637 raise ValueError("mode must be 'r' or 'w'") 

1638 

1639 stream = _Stream(name, filemode, comptype, fileobj, bufsize) 

1640 try: 

1641 t = cls(name, filemode, stream, **kwargs) 

1642 except: 

1643 stream.close() 

1644 raise 

1645 t._extfileobj = False 

1646 return t 

1647 

1648 elif mode in ("a", "w", "x"): 

1649 return cls.taropen(name, mode, fileobj, **kwargs) 

1650 

1651 raise ValueError("undiscernible mode") 

1652 

1653 @classmethod 

1654 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 

1655 """Open uncompressed tar archive name for reading or writing. 

1656 """ 

1657 if mode not in ("r", "a", "w", "x"): 

1658 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 

1659 return cls(name, mode, fileobj, **kwargs) 

1660 

1661 @classmethod 

1662 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 

1663 """Open gzip compressed tar archive name for reading or writing. 

1664 Appending is not allowed. 

1665 """ 

1666 if mode not in ("r", "w", "x"): 

1667 raise ValueError("mode must be 'r', 'w' or 'x'") 

1668 

1669 try: 

1670 from gzip import GzipFile 

1671 except ImportError: 

1672 raise CompressionError("gzip module is not available") 

1673 

1674 try: 

1675 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) 

1676 except OSError: 

1677 if fileobj is not None and mode == 'r': 

1678 raise ReadError("not a gzip file") 

1679 raise 

1680 

1681 try: 

1682 t = cls.taropen(name, mode, fileobj, **kwargs) 

1683 except OSError: 

1684 fileobj.close() 

1685 if mode == 'r': 

1686 raise ReadError("not a gzip file") 

1687 raise 

1688 except: 

1689 fileobj.close() 

1690 raise 

1691 t._extfileobj = False 

1692 return t 

1693 

1694 @classmethod 

1695 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 

1696 """Open bzip2 compressed tar archive name for reading or writing. 

1697 Appending is not allowed. 

1698 """ 

1699 if mode not in ("r", "w", "x"): 

1700 raise ValueError("mode must be 'r', 'w' or 'x'") 

1701 

1702 try: 

1703 from bz2 import BZ2File 

1704 except ImportError: 

1705 raise CompressionError("bz2 module is not available") 

1706 

1707 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel) 

1708 

1709 try: 

1710 t = cls.taropen(name, mode, fileobj, **kwargs) 

1711 except (OSError, EOFError): 

1712 fileobj.close() 

1713 if mode == 'r': 

1714 raise ReadError("not a bzip2 file") 

1715 raise 

1716 except: 

1717 fileobj.close() 

1718 raise 

1719 t._extfileobj = False 

1720 return t 

1721 

1722 @classmethod 

1723 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 

1724 """Open lzma compressed tar archive name for reading or writing. 

1725 Appending is not allowed. 

1726 """ 

1727 if mode not in ("r", "w", "x"): 

1728 raise ValueError("mode must be 'r', 'w' or 'x'") 

1729 

1730 try: 

1731 from lzma import LZMAFile, LZMAError 

1732 except ImportError: 

1733 raise CompressionError("lzma module is not available") 

1734 

1735 fileobj = LZMAFile(fileobj or name, mode, preset=preset) 

1736 

1737 try: 

1738 t = cls.taropen(name, mode, fileobj, **kwargs) 

1739 except (LZMAError, EOFError): 

1740 fileobj.close() 

1741 if mode == 'r': 

1742 raise ReadError("not an lzma file") 

1743 raise 

1744 except: 

1745 fileobj.close() 

1746 raise 

1747 t._extfileobj = False 

1748 return t 

1749 

1750 # All *open() methods are registered here. 

1751 OPEN_METH = { 

1752 "tar": "taropen", # uncompressed tar 

1753 "gz": "gzopen", # gzip compressed tar 

1754 "bz2": "bz2open", # bzip2 compressed tar 

1755 "xz": "xzopen" # lzma compressed tar 

1756 } 

1757 

1758 #-------------------------------------------------------------------------- 

1759 # The public methods which TarFile provides: 

1760 

1761 def close(self): 

1762 """Close the TarFile. In write-mode, two finishing zero blocks are 

1763 appended to the archive. 

1764 """ 

1765 if self.closed: 

1766 return 

1767 

1768 self.closed = True 

1769 try: 

1770 if self.mode in ("a", "w", "x"): 

1771 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 

1772 self.offset += (BLOCKSIZE * 2) 

1773 # fill up the end with zero-blocks 

1774 # (like option -b20 for tar does) 

1775 blocks, remainder = divmod(self.offset, RECORDSIZE) 

1776 if remainder > 0: 

1777 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 

1778 finally: 

1779 if not self._extfileobj: 

1780 self.fileobj.close() 

1781 

1782 def getmember(self, name): 

1783 """Return a TarInfo object for member `name'. If `name' can not be 

1784 found in the archive, KeyError is raised. If a member occurs more 

1785 than once in the archive, its last occurrence is assumed to be the 

1786 most up-to-date version. 

1787 """ 

1788 tarinfo = self._getmember(name) 

1789 if tarinfo is None: 

1790 raise KeyError("filename %r not found" % name) 

1791 return tarinfo 

1792 

1793 def getmembers(self): 

1794 """Return the members of the archive as a list of TarInfo objects. The 

1795 list has the same order as the members in the archive. 

1796 """ 

1797 self._check() 

1798 if not self._loaded: # if we want to obtain a list of 

1799 self._load() # all members, we first have to 

1800 # scan the whole archive. 

1801 return self.members 

1802 

1803 def getnames(self): 

1804 """Return the members of the archive as a list of their names. It has 

1805 the same order as the list returned by getmembers(). 

1806 """ 

1807 return [tarinfo.name for tarinfo in self.getmembers()] 

1808 

1809 def gettarinfo(self, name=None, arcname=None, fileobj=None): 

1810 """Create a TarInfo object from the result of os.stat or equivalent 

1811 on an existing file. The file is either named by `name', or 

1812 specified as a file object `fileobj' with a file descriptor. If 

1813 given, `arcname' specifies an alternative name for the file in the 

1814 archive, otherwise, the name is taken from the 'name' attribute of 

1815 'fileobj', or the 'name' argument. The name should be a text 

1816 string. 

1817 """ 

1818 self._check("awx") 

1819 

1820 # When fileobj is given, replace name by 

1821 # fileobj's real name. 

1822 if fileobj is not None: 

1823 name = fileobj.name 

1824 

1825 # Building the name of the member in the archive. 

1826 # Backward slashes are converted to forward slashes, 

1827 # Absolute paths are turned to relative paths. 

1828 if arcname is None: 

1829 arcname = name 

1830 drv, arcname = os.path.splitdrive(arcname) 

1831 arcname = arcname.replace(os.sep, "/") 

1832 arcname = arcname.lstrip("/") 

1833 

1834 # Now, fill the TarInfo object with 

1835 # information specific for the file. 

1836 tarinfo = self.tarinfo() 

1837 tarinfo.tarfile = self # Not needed 

1838 

1839 # Use os.stat or os.lstat, depending on if symlinks shall be resolved. 

1840 if fileobj is None: 

1841 if not self.dereference: 

1842 statres = os.lstat(name) 

1843 else: 

1844 statres = os.stat(name) 

1845 else: 

1846 statres = os.fstat(fileobj.fileno()) 

1847 linkname = "" 

1848 

1849 stmd = statres.st_mode 

1850 if stat.S_ISREG(stmd): 

1851 inode = (statres.st_ino, statres.st_dev) 

1852 if not self.dereference and statres.st_nlink > 1 and \ 

1853 inode in self.inodes and arcname != self.inodes[inode]: 

1854 # Is it a hardlink to an already 

1855 # archived file? 

1856 type = LNKTYPE 

1857 linkname = self.inodes[inode] 

1858 else: 

1859 # The inode is added only if its valid. 

1860 # For win32 it is always 0. 

1861 type = REGTYPE 

1862 if inode[0]: 

1863 self.inodes[inode] = arcname 

1864 elif stat.S_ISDIR(stmd): 

1865 type = DIRTYPE 

1866 elif stat.S_ISFIFO(stmd): 

1867 type = FIFOTYPE 

1868 elif stat.S_ISLNK(stmd): 

1869 type = SYMTYPE 

1870 linkname = os.readlink(name) 

1871 elif stat.S_ISCHR(stmd): 

1872 type = CHRTYPE 

1873 elif stat.S_ISBLK(stmd): 

1874 type = BLKTYPE 

1875 else: 

1876 return None 

1877 

1878 # Fill the TarInfo object with all 

1879 # information we can get. 

1880 tarinfo.name = arcname 

1881 tarinfo.mode = stmd 

1882 tarinfo.uid = statres.st_uid 

1883 tarinfo.gid = statres.st_gid 

1884 if type == REGTYPE: 

1885 tarinfo.size = statres.st_size 

1886 else: 

1887 tarinfo.size = 0 

1888 tarinfo.mtime = statres.st_mtime 

1889 tarinfo.type = type 

1890 tarinfo.linkname = linkname 

1891 if pwd: 

1892 try: 

1893 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 

1894 except KeyError: 

1895 pass 

1896 if grp: 

1897 try: 

1898 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 

1899 except KeyError: 

1900 pass 

1901 

1902 if type in (CHRTYPE, BLKTYPE): 

1903 if hasattr(os, "major") and hasattr(os, "minor"): 

1904 tarinfo.devmajor = os.major(statres.st_rdev) 

1905 tarinfo.devminor = os.minor(statres.st_rdev) 

1906 return tarinfo 

1907 

1908 def list(self, verbose=True, *, members=None): 

1909 """Print a table of contents to sys.stdout. If `verbose' is False, only 

1910 the names of the members are printed. If it is True, an `ls -l'-like 

1911 output is produced. `members' is optional and must be a subset of the 

1912 list returned by getmembers(). 

1913 """ 

1914 self._check() 

1915 

1916 if members is None: 

1917 members = self 

1918 for tarinfo in members: 

1919 if verbose: 

1920 _safe_print(stat.filemode(tarinfo.mode)) 

1921 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, 

1922 tarinfo.gname or tarinfo.gid)) 

1923 if tarinfo.ischr() or tarinfo.isblk(): 

1924 _safe_print("%10s" % 

1925 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) 

1926 else: 

1927 _safe_print("%10d" % tarinfo.size) 

1928 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ 

1929 % time.localtime(tarinfo.mtime)[:6]) 

1930 

1931 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) 

1932 

1933 if verbose: 

1934 if tarinfo.issym(): 

1935 _safe_print("-> " + tarinfo.linkname) 

1936 if tarinfo.islnk(): 

1937 _safe_print("link to " + tarinfo.linkname) 

1938 print() 

1939 

1940 def add(self, name, arcname=None, recursive=True, *, filter=None): 

1941 """Add the file `name' to the archive. `name' may be any type of file 

1942 (directory, fifo, symbolic link, etc.). If given, `arcname' 

1943 specifies an alternative name for the file in the archive. 

1944 Directories are added recursively by default. This can be avoided by 

1945 setting `recursive' to False. `filter' is a function 

1946 that expects a TarInfo object argument and returns the changed 

1947 TarInfo object, if it returns None the TarInfo object will be 

1948 excluded from the archive. 

1949 """ 

1950 self._check("awx") 

1951 

1952 if arcname is None: 

1953 arcname = name 

1954 

1955 # Skip if somebody tries to archive the archive... 

1956 if self.name is not None and os.path.abspath(name) == self.name: 

1957 self._dbg(2, "tarfile: Skipped %r" % name) 

1958 return 

1959 

1960 self._dbg(1, name) 

1961 

1962 # Create a TarInfo object from the file. 

1963 tarinfo = self.gettarinfo(name, arcname) 

1964 

1965 if tarinfo is None: 

1966 self._dbg(1, "tarfile: Unsupported type %r" % name) 

1967 return 

1968 

1969 # Change or exclude the TarInfo object. 

1970 if filter is not None: 

1971 tarinfo = filter(tarinfo) 

1972 if tarinfo is None: 

1973 self._dbg(2, "tarfile: Excluded %r" % name) 

1974 return 

1975 

1976 # Append the tar header and data to the archive. 

1977 if tarinfo.isreg(): 

1978 with bltn_open(name, "rb") as f: 

1979 self.addfile(tarinfo, f) 

1980 

1981 elif tarinfo.isdir(): 

1982 self.addfile(tarinfo) 

1983 if recursive: 

1984 for f in sorted(os.listdir(name)): 

1985 self.add(os.path.join(name, f), os.path.join(arcname, f), 

1986 recursive, filter=filter) 

1987 

1988 else: 

1989 self.addfile(tarinfo) 

1990 

1991 def addfile(self, tarinfo, fileobj=None): 

1992 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 

1993 given, it should be a binary file, and tarinfo.size bytes are read 

1994 from it and added to the archive. You can create TarInfo objects 

1995 directly, or by using gettarinfo(). 

1996 """ 

1997 self._check("awx") 

1998 

1999 tarinfo = copy.copy(tarinfo) 

2000 

2001 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 

2002 self.fileobj.write(buf) 

2003 self.offset += len(buf) 

2004 bufsize=self.copybufsize 

2005 # If there's data to follow, append it. 

2006 if fileobj is not None: 

2007 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) 

2008 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 

2009 if remainder > 0: 

2010 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 

2011 blocks += 1 

2012 self.offset += blocks * BLOCKSIZE 

2013 

2014 self.members.append(tarinfo) 

2015 

2016 def extractall(self, path=".", members=None, *, numeric_owner=False): 

2017 """Extract all members from the archive to the current working 

2018 directory and set owner, modification time and permissions on 

2019 directories afterwards. `path' specifies a different directory 

2020 to extract to. `members' is optional and must be a subset of the 

2021 list returned by getmembers(). If `numeric_owner` is True, only 

2022 the numbers for user/group names are used and not the names. 

2023 """ 

2024 directories = [] 

2025 

2026 if members is None: 

2027 members = self 

2028 

2029 for tarinfo in members: 

2030 if tarinfo.isdir(): 

2031 # Extract directories with a safe mode. 

2032 directories.append(tarinfo) 

2033 tarinfo = copy.copy(tarinfo) 

2034 tarinfo.mode = 0o700 

2035 # Do not set_attrs directories, as we will do that further down 

2036 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), 

2037 numeric_owner=numeric_owner) 

2038 

2039 # Reverse sort directories. 

2040 directories.sort(key=lambda a: a.name) 

2041 directories.reverse() 

2042 

2043 # Set correct owner, mtime and filemode on directories. 

2044 for tarinfo in directories: 

2045 dirpath = os.path.join(path, tarinfo.name) 

2046 try: 

2047 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) 

2048 self.utime(tarinfo, dirpath) 

2049 self.chmod(tarinfo, dirpath) 

2050 except ExtractError as e: 

2051 if self.errorlevel > 1: 

2052 raise 

2053 else: 

2054 self._dbg(1, "tarfile: %s" % e) 

2055 

2056 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): 

2057 """Extract a member from the archive to the current working directory, 

2058 using its full name. Its file information is extracted as accurately 

2059 as possible. `member' may be a filename or a TarInfo object. You can 

2060 specify a different directory using `path'. File attributes (owner, 

2061 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` 

2062 is True, only the numbers for user/group names are used and not 

2063 the names. 

2064 """ 

2065 self._check("r") 

2066 

2067 if isinstance(member, str): 

2068 tarinfo = self.getmember(member) 

2069 else: 

2070 tarinfo = member 

2071 

2072 # Prepare the link target for makelink(). 

2073 if tarinfo.islnk(): 

2074 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 

2075 

2076 try: 

2077 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 

2078 set_attrs=set_attrs, 

2079 numeric_owner=numeric_owner) 

2080 except OSError as e: 

2081 if self.errorlevel > 0: 

2082 raise 

2083 else: 

2084 if e.filename is None: 

2085 self._dbg(1, "tarfile: %s" % e.strerror) 

2086 else: 

2087 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 

2088 except ExtractError as e: 

2089 if self.errorlevel > 1: 

2090 raise 

2091 else: 

2092 self._dbg(1, "tarfile: %s" % e) 

2093 

2094 def extractfile(self, member): 

2095 """Extract a member from the archive as a file object. `member' may be 

2096 a filename or a TarInfo object. If `member' is a regular file or 

2097 a link, an io.BufferedReader object is returned. For all other 

2098 existing members, None is returned. If `member' does not appear 

2099 in the archive, KeyError is raised. 

2100 """ 

2101 self._check("r") 

2102 

2103 if isinstance(member, str): 

2104 tarinfo = self.getmember(member) 

2105 else: 

2106 tarinfo = member 

2107 

2108 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 

2109 # Members with unknown types are treated as regular files. 

2110 return self.fileobject(self, tarinfo) 

2111 

2112 elif tarinfo.islnk() or tarinfo.issym(): 

2113 if isinstance(self.fileobj, _Stream): 

2114 # A small but ugly workaround for the case that someone tries 

2115 # to extract a (sym)link as a file-object from a non-seekable 

2116 # stream of tar blocks. 

2117 raise StreamError("cannot extract (sym)link as file object") 

2118 else: 

2119 # A (sym)link's file object is its target's file object. 

2120 return self.extractfile(self._find_link_target(tarinfo)) 

2121 else: 

2122 # If there's no data associated with the member (directory, chrdev, 

2123 # blkdev, etc.), return None instead of a file object. 

2124 return None 

2125 

2126 def _extract_member(self, tarinfo, targetpath, set_attrs=True, 

2127 numeric_owner=False): 

2128 """Extract the TarInfo object tarinfo to a physical 

2129 file called targetpath. 

2130 """ 

2131 # Fetch the TarInfo object for the given name 

2132 # and build the destination pathname, replacing 

2133 # forward slashes to platform specific separators. 

2134 targetpath = targetpath.rstrip("/") 

2135 targetpath = targetpath.replace("/", os.sep) 

2136 

2137 # Create all upper directories. 

2138 upperdirs = os.path.dirname(targetpath) 

2139 if upperdirs and not os.path.exists(upperdirs): 

2140 # Create directories that are not part of the archive with 

2141 # default permissions. 

2142 os.makedirs(upperdirs) 

2143 

2144 if tarinfo.islnk() or tarinfo.issym(): 

2145 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 

2146 else: 

2147 self._dbg(1, tarinfo.name) 

2148 

2149 if tarinfo.isreg(): 

2150 self.makefile(tarinfo, targetpath) 

2151 elif tarinfo.isdir(): 

2152 self.makedir(tarinfo, targetpath) 

2153 elif tarinfo.isfifo(): 

2154 self.makefifo(tarinfo, targetpath) 

2155 elif tarinfo.ischr() or tarinfo.isblk(): 

2156 self.makedev(tarinfo, targetpath) 

2157 elif tarinfo.islnk() or tarinfo.issym(): 

2158 self.makelink(tarinfo, targetpath) 

2159 elif tarinfo.type not in SUPPORTED_TYPES: 

2160 self.makeunknown(tarinfo, targetpath) 

2161 else: 

2162 self.makefile(tarinfo, targetpath) 

2163 

2164 if set_attrs: 

2165 self.chown(tarinfo, targetpath, numeric_owner) 

2166 if not tarinfo.issym(): 

2167 self.chmod(tarinfo, targetpath) 

2168 self.utime(tarinfo, targetpath) 

2169 

2170 #-------------------------------------------------------------------------- 

2171 # Below are the different file methods. They are called via 

2172 # _extract_member() when extract() is called. They can be replaced in a 

2173 # subclass to implement other functionality. 

2174 

2175 def makedir(self, tarinfo, targetpath): 

2176 """Make a directory called targetpath. 

2177 """ 

2178 try: 

2179 # Use a safe mode for the directory, the real mode is set 

2180 # later in _extract_member(). 

2181 os.mkdir(targetpath, 0o700) 

2182 except FileExistsError: 

2183 pass 

2184 

2185 def makefile(self, tarinfo, targetpath): 

2186 """Make a file called targetpath. 

2187 """ 

2188 source = self.fileobj 

2189 source.seek(tarinfo.offset_data) 

2190 bufsize = self.copybufsize 

2191 with bltn_open(targetpath, "wb") as target: 

2192 if tarinfo.sparse is not None: 

2193 for offset, size in tarinfo.sparse: 

2194 target.seek(offset) 

2195 copyfileobj(source, target, size, ReadError, bufsize) 

2196 target.seek(tarinfo.size) 

2197 target.truncate() 

2198 else: 

2199 copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 

2200 

2201 def makeunknown(self, tarinfo, targetpath): 

2202 """Make a file from a TarInfo object with an unknown type 

2203 at targetpath. 

2204 """ 

2205 self.makefile(tarinfo, targetpath) 

2206 self._dbg(1, "tarfile: Unknown file type %r, " \ 

2207 "extracted as regular file." % tarinfo.type) 

2208 

2209 def makefifo(self, tarinfo, targetpath): 

2210 """Make a fifo called targetpath. 

2211 """ 

2212 if hasattr(os, "mkfifo"): 

2213 os.mkfifo(targetpath) 

2214 else: 

2215 raise ExtractError("fifo not supported by system") 

2216 

2217 def makedev(self, tarinfo, targetpath): 

2218 """Make a character or block device called targetpath. 

2219 """ 

2220 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 

2221 raise ExtractError("special devices not supported by system") 

2222 

2223 mode = tarinfo.mode 

2224 if tarinfo.isblk(): 

2225 mode |= stat.S_IFBLK 

2226 else: 

2227 mode |= stat.S_IFCHR 

2228 

2229 os.mknod(targetpath, mode, 

2230 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 

2231 

2232 def makelink(self, tarinfo, targetpath): 

2233 """Make a (symbolic) link called targetpath. If it cannot be created 

2234 (platform limitation), we try to make a copy of the referenced file 

2235 instead of a link. 

2236 """ 

2237 try: 

2238 # For systems that support symbolic and hard links. 

2239 if tarinfo.issym(): 

2240 if os.path.lexists(targetpath): 

2241 # Avoid FileExistsError on following os.symlink. 

2242 os.unlink(targetpath) 

2243 os.symlink(tarinfo.linkname, targetpath) 

2244 else: 

2245 # See extract(). 

2246 if os.path.exists(tarinfo._link_target): 

2247 os.link(tarinfo._link_target, targetpath) 

2248 else: 

2249 self._extract_member(self._find_link_target(tarinfo), 

2250 targetpath) 

2251 except symlink_exception: 

2252 try: 

2253 self._extract_member(self._find_link_target(tarinfo), 

2254 targetpath) 

2255 except KeyError: 

2256 raise ExtractError("unable to resolve link inside archive") 

2257 

2258 def chown(self, tarinfo, targetpath, numeric_owner): 

2259 """Set owner of targetpath according to tarinfo. If numeric_owner 

2260 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner 

2261 is False, fall back to .gid/.uid when the search based on name 

2262 fails. 

2263 """ 

2264 if hasattr(os, "geteuid") and os.geteuid() == 0: 

2265 # We have to be root to do so. 

2266 g = tarinfo.gid 

2267 u = tarinfo.uid 

2268 if not numeric_owner: 

2269 try: 

2270 if grp: 

2271 g = grp.getgrnam(tarinfo.gname)[2] 

2272 except KeyError: 

2273 pass 

2274 try: 

2275 if pwd: 

2276 u = pwd.getpwnam(tarinfo.uname)[2] 

2277 except KeyError: 

2278 pass 

2279 try: 

2280 if tarinfo.issym() and hasattr(os, "lchown"): 

2281 os.lchown(targetpath, u, g) 

2282 else: 

2283 os.chown(targetpath, u, g) 

2284 except OSError: 

2285 raise ExtractError("could not change owner") 

2286 

2287 def chmod(self, tarinfo, targetpath): 

2288 """Set file permissions of targetpath according to tarinfo. 

2289 """ 

2290 try: 

2291 os.chmod(targetpath, tarinfo.mode) 

2292 except OSError: 

2293 raise ExtractError("could not change mode") 

2294 

2295 def utime(self, tarinfo, targetpath): 

2296 """Set modification time of targetpath according to tarinfo. 

2297 """ 

2298 if not hasattr(os, 'utime'): 

2299 return 

2300 try: 

2301 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) 

2302 except OSError: 

2303 raise ExtractError("could not change modification time") 

2304 

2305 #-------------------------------------------------------------------------- 

2306 def next(self): 

2307 """Return the next member of the archive as a TarInfo object, when 

2308 TarFile is opened for reading. Return None if there is no more 

2309 available. 

2310 """ 

2311 self._check("ra") 

2312 if self.firstmember is not None: 

2313 m = self.firstmember 

2314 self.firstmember = None 

2315 return m 

2316 

2317 # Advance the file pointer. 

2318 if self.offset != self.fileobj.tell(): 

2319 self.fileobj.seek(self.offset - 1) 

2320 if not self.fileobj.read(1): 

2321 raise ReadError("unexpected end of data") 

2322 

2323 # Read the next block. 

2324 tarinfo = None 

2325 while True: 

2326 try: 

2327 tarinfo = self.tarinfo.fromtarfile(self) 

2328 except EOFHeaderError as e: 

2329 if self.ignore_zeros: 

2330 self._dbg(2, "0x%X: %s" % (self.offset, e)) 

2331 self.offset += BLOCKSIZE 

2332 continue 

2333 except InvalidHeaderError as e: 

2334 if self.ignore_zeros: 

2335 self._dbg(2, "0x%X: %s" % (self.offset, e)) 

2336 self.offset += BLOCKSIZE 

2337 continue 

2338 elif self.offset == 0: 

2339 raise ReadError(str(e)) 

2340 except EmptyHeaderError: 

2341 if self.offset == 0: 

2342 raise ReadError("empty file") 

2343 except TruncatedHeaderError as e: 

2344 if self.offset == 0: 

2345 raise ReadError(str(e)) 

2346 except SubsequentHeaderError as e: 

2347 raise ReadError(str(e)) 

2348 break 

2349 

2350 if tarinfo is not None: 

2351 self.members.append(tarinfo) 

2352 else: 

2353 self._loaded = True 

2354 

2355 return tarinfo 

2356 

2357 #-------------------------------------------------------------------------- 

2358 # Little helper methods: 

2359 

2360 def _getmember(self, name, tarinfo=None, normalize=False): 

2361 """Find an archive member by name from bottom to top. 

2362 If tarinfo is given, it is used as the starting point. 

2363 """ 

2364 # Ensure that all members have been loaded. 

2365 members = self.getmembers() 

2366 

2367 # Limit the member search list up to tarinfo. 

2368 if tarinfo is not None: 

2369 members = members[:members.index(tarinfo)] 

2370 

2371 if normalize: 

2372 name = os.path.normpath(name) 

2373 

2374 for member in reversed(members): 

2375 if normalize: 

2376 member_name = os.path.normpath(member.name) 

2377 else: 

2378 member_name = member.name 

2379 

2380 if name == member_name: 

2381 return member 

2382 

2383 def _load(self): 

2384 """Read through the entire archive file and look for readable 

2385 members. 

2386 """ 

2387 while True: 

2388 tarinfo = self.next() 

2389 if tarinfo is None: 

2390 break 

2391 self._loaded = True 

2392 

2393 def _check(self, mode=None): 

2394 """Check if TarFile is still open, and if the operation's mode 

2395 corresponds to TarFile's mode. 

2396 """ 

2397 if self.closed: 

2398 raise OSError("%s is closed" % self.__class__.__name__) 

2399 if mode is not None and self.mode not in mode: 

2400 raise OSError("bad operation for mode %r" % self.mode) 

2401 

2402 def _find_link_target(self, tarinfo): 

2403 """Find the target member of a symlink or hardlink member in the 

2404 archive. 

2405 """ 

2406 if tarinfo.issym(): 

2407 # Always search the entire archive. 

2408 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 

2409 limit = None 

2410 else: 

2411 # Search the archive before the link, because a hard link is 

2412 # just a reference to an already archived file. 

2413 linkname = tarinfo.linkname 

2414 limit = tarinfo 

2415 

2416 member = self._getmember(linkname, tarinfo=limit, normalize=True) 

2417 if member is None: 

2418 raise KeyError("linkname %r not found" % linkname) 

2419 return member 

2420 

2421 def __iter__(self): 

2422 """Provide an iterator object. 

2423 """ 

2424 if self._loaded: 

2425 yield from self.members 

2426 return 

2427 

2428 # Yield items using TarFile's next() method. 

2429 # When all members have been read, set TarFile as _loaded. 

2430 index = 0 

2431 # Fix for SF #1100429: Under rare circumstances it can 

2432 # happen that getmembers() is called during iteration, 

2433 # which will have already exhausted the next() method. 

2434 if self.firstmember is not None: 

2435 tarinfo = self.next() 

2436 index += 1 

2437 yield tarinfo 

2438 

2439 while True: 

2440 if index < len(self.members): 

2441 tarinfo = self.members[index] 

2442 elif not self._loaded: 

2443 tarinfo = self.next() 

2444 if not tarinfo: 

2445 self._loaded = True 

2446 return 

2447 else: 

2448 return 

2449 index += 1 

2450 yield tarinfo 

2451 

2452 def _dbg(self, level, msg): 

2453 """Write debugging output to sys.stderr. 

2454 """ 

2455 if level <= self.debug: 

2456 print(msg, file=sys.stderr) 

2457 

2458 def __enter__(self): 

2459 self._check() 

2460 return self 

2461 

2462 def __exit__(self, type, value, traceback): 

2463 if type is None: 

2464 self.close() 

2465 else: 

2466 # An exception occurred. We must not call close() because 

2467 # it would try to write end-of-archive blocks and padding. 

2468 if not self._extfileobj: 

2469 self.fileobj.close() 

2470 self.closed = True 

2471 

2472#-------------------- 

2473# exported functions 

2474#-------------------- 

2475def is_tarfile(name): 

2476 """Return True if name points to a tar archive that we 

2477 are able to handle, else return False. 

2478 

2479 'name' should be a string, file, or file-like object. 

2480 """ 

2481 try: 

2482 if hasattr(name, "read"): 

2483 t = open(fileobj=name) 

2484 else: 

2485 t = open(name) 

2486 t.close() 

2487 return True 

2488 except TarError: 

2489 return False 

2490 

2491open = TarFile.open 

2492 

2493 

2494def main(): 

2495 import argparse 

2496 

2497 description = 'A simple command-line interface for tarfile module.' 

2498 parser = argparse.ArgumentParser(description=description) 

2499 parser.add_argument('-v', '--verbose', action='store_true', default=False, 

2500 help='Verbose output') 

2501 group = parser.add_mutually_exclusive_group(required=True) 

2502 group.add_argument('-l', '--list', metavar='<tarfile>', 

2503 help='Show listing of a tarfile') 

2504 group.add_argument('-e', '--extract', nargs='+', 

2505 metavar=('<tarfile>', '<output_dir>'), 

2506 help='Extract tarfile into target dir') 

2507 group.add_argument('-c', '--create', nargs='+', 

2508 metavar=('<name>', '<file>'), 

2509 help='Create tarfile from sources') 

2510 group.add_argument('-t', '--test', metavar='<tarfile>', 

2511 help='Test if a tarfile is valid') 

2512 args = parser.parse_args() 

2513 

2514 if args.test is not None: 

2515 src = args.test 

2516 if is_tarfile(src): 

2517 with open(src, 'r') as tar: 

2518 tar.getmembers() 

2519 print(tar.getmembers(), file=sys.stderr) 

2520 if args.verbose: 

2521 print('{!r} is a tar archive.'.format(src)) 

2522 else: 

2523 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 

2524 

2525 elif args.list is not None: 

2526 src = args.list 

2527 if is_tarfile(src): 

2528 with TarFile.open(src, 'r:*') as tf: 

2529 tf.list(verbose=args.verbose) 

2530 else: 

2531 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 

2532 

2533 elif args.extract is not None: 

2534 if len(args.extract) == 1: 

2535 src = args.extract[0] 

2536 curdir = os.curdir 

2537 elif len(args.extract) == 2: 

2538 src, curdir = args.extract 

2539 else: 

2540 parser.exit(1, parser.format_help()) 

2541 

2542 if is_tarfile(src): 

2543 with TarFile.open(src, 'r:*') as tf: 

2544 tf.extractall(path=curdir) 

2545 if args.verbose: 

2546 if curdir == '.': 

2547 msg = '{!r} file is extracted.'.format(src) 

2548 else: 

2549 msg = ('{!r} file is extracted ' 

2550 'into {!r} directory.').format(src, curdir) 

2551 print(msg) 

2552 else: 

2553 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 

2554 

2555 elif args.create is not None: 

2556 tar_name = args.create.pop(0) 

2557 _, ext = os.path.splitext(tar_name) 

2558 compressions = { 

2559 # gz 

2560 '.gz': 'gz', 

2561 '.tgz': 'gz', 

2562 # xz 

2563 '.xz': 'xz', 

2564 '.txz': 'xz', 

2565 # bz2 

2566 '.bz2': 'bz2', 

2567 '.tbz': 'bz2', 

2568 '.tbz2': 'bz2', 

2569 '.tb2': 'bz2', 

2570 } 

2571 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 

2572 tar_files = args.create 

2573 

2574 with TarFile.open(tar_name, tar_mode) as tf: 

2575 for file_name in tar_files: 

2576 tf.add(file_name) 

2577 

2578 if args.verbose: 

2579 print('{!r} file created.'.format(tar_name)) 

2580 

2581if __name__ == '__main__': 

2582 main()