1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version = "0.9.0"
33__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49import warnings
50
51try:
52 import pwd
53except ImportError:
54 pwd = None
55try:
56 import grp
57except ImportError:
58 grp = None
59
60# os.symlink on Windows prior to 6.0 raises NotImplementedError
61# OSError (winerror=1314) will be raised if the caller does not hold the
62# SeCreateSymbolicLinkPrivilege privilege
63symlink_exception = (AttributeError, NotImplementedError, OSError)
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
67 "CompressionError", "StreamError", "ExtractError", "HeaderError",
68 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
69 "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",
70 "tar_filter", "FilterError", "AbsoluteLinkError",
71 "OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
72 "LinkOutsideDestinationError"]
73
74
75#---------------------------------------------------------
76# tar constants
77#---------------------------------------------------------
78NUL = b"\0" # the null character
79BLOCKSIZE = 512 # length of processing blocks
80RECORDSIZE = BLOCKSIZE * 20 # length of records
81GNU_MAGIC = b"ustar \0" # magic gnu tar string
82POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
83
84LENGTH_NAME = 100 # maximum length of a filename
85LENGTH_LINK = 100 # maximum length of a linkname
86LENGTH_PREFIX = 155 # maximum length of the prefix field
87
88REGTYPE = b"0" # regular file
89AREGTYPE = b"\0" # regular file
90LNKTYPE = b"1" # link (inside tarfile)
91SYMTYPE = b"2" # symbolic link
92CHRTYPE = b"3" # character special device
93BLKTYPE = b"4" # block special device
94DIRTYPE = b"5" # directory
95FIFOTYPE = b"6" # fifo special device
96CONTTYPE = b"7" # contiguous file
97
98GNUTYPE_LONGNAME = b"L" # GNU tar longname
99GNUTYPE_LONGLINK = b"K" # GNU tar longlink
100GNUTYPE_SPARSE = b"S" # GNU tar sparse file
101
102XHDTYPE = b"x" # POSIX.1-2001 extended header
103XGLTYPE = b"g" # POSIX.1-2001 global header
104SOLARIS_XHDTYPE = b"X" # Solaris extended header
105
106USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
107GNU_FORMAT = 1 # GNU tar format
108PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
109DEFAULT_FORMAT = PAX_FORMAT
110
111#---------------------------------------------------------
112# tarfile constants
113#---------------------------------------------------------
114# File types that tarfile supports:
115SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
116 SYMTYPE, DIRTYPE, FIFOTYPE,
117 CONTTYPE, CHRTYPE, BLKTYPE,
118 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
119 GNUTYPE_SPARSE)
120
121# File types that will be treated as a regular file.
122REGULAR_TYPES = (REGTYPE, AREGTYPE,
123 CONTTYPE, GNUTYPE_SPARSE)
124
125# File types that are part of the GNU tar format.
126GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
127 GNUTYPE_SPARSE)
128
129# Fields from a pax header that override a TarInfo attribute.
130PAX_FIELDS = ("path", "linkpath", "size", "mtime",
131 "uid", "gid", "uname", "gname")
132
133# Fields from a pax header that are affected by hdrcharset.
134PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
135
136# Fields in a pax header that are numbers, all other fields
137# are treated as strings.
138PAX_NUMBER_FIELDS = {
139 "atime": float,
140 "ctime": float,
141 "mtime": float,
142 "uid": int,
143 "gid": int,
144 "size": int
145}
146
147#---------------------------------------------------------
148# initialization
149#---------------------------------------------------------
150if os.name == "nt":
151 ENCODING = "utf-8"
152else:
153 ENCODING = sys.getfilesystemencoding()
154
155#---------------------------------------------------------
156# Some useful functions
157#---------------------------------------------------------
158
159def stn(s, length, encoding, errors):
160 """Convert a string to a null-terminated bytes object.
161 """
162 if s is None:
163 raise ValueError("metadata cannot contain None")
164 s = s.encode(encoding, errors)
165 return s[:length] + (length - len(s)) * NUL
166
167def nts(s, encoding, errors):
168 """Convert a null-terminated bytes object to a string.
169 """
170 p = s.find(b"\0")
171 if p != -1:
172 s = s[:p]
173 return s.decode(encoding, errors)
174
175def nti(s):
176 """Convert a number field to a python number.
177 """
178 # There are two possible encodings for a number field, see
179 # itn() below.
180 if s[0] in (0o200, 0o377):
181 n = 0
182 for i in range(len(s) - 1):
183 n <<= 8
184 n += s[i + 1]
185 if s[0] == 0o377:
186 n = -(256 ** (len(s) - 1) - n)
187 else:
188 try:
189 s = nts(s, "ascii", "strict")
190 n = int(s.strip() or "0", 8)
191 except ValueError:
192 raise InvalidHeaderError("invalid header")
193 return n
194
195def itn(n, digits=8, format=DEFAULT_FORMAT):
196 """Convert a python number to a number field.
197 """
198 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
199 # octal digits followed by a null-byte, this allows values up to
200 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
201 # that if necessary. A leading 0o200 or 0o377 byte indicate this
202 # particular encoding, the following digits-1 bytes are a big-endian
203 # base-256 representation. This allows values up to (256**(digits-1))-1.
204 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
205 # number.
206 original_n = n
207 n = int(n)
208 if 0 <= n < 8 ** (digits - 1):
209 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
210 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
211 if n >= 0:
212 s = bytearray([0o200])
213 else:
214 s = bytearray([0o377])
215 n = 256 ** digits + n
216
217 for i in range(digits - 1):
218 s.insert(1, n & 0o377)
219 n >>= 8
220 else:
221 raise ValueError("overflow in number field")
222
223 return s
224
225def calc_chksums(buf):
226 """Calculate the checksum for a member's header by summing up all
227 characters except for the chksum field which is treated as if
228 it was filled with spaces. According to the GNU tar sources,
229 some tars (Sun and NeXT) calculate chksum with signed char,
230 which will be different if there are chars in the buffer with
231 the high bit set. So we calculate two checksums, unsigned and
232 signed.
233 """
234 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
235 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
236 return unsigned_chksum, signed_chksum
237
238def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
239 """Copy length bytes from fileobj src to fileobj dst.
240 If length is None, copy the entire content.
241 """
242 bufsize = bufsize or 16 * 1024
243 if length == 0:
244 return
245 if length is None:
246 shutil.copyfileobj(src, dst, bufsize)
247 return
248
249 blocks, remainder = divmod(length, bufsize)
250 for b in range(blocks):
251 buf = src.read(bufsize)
252 if len(buf) < bufsize:
253 raise exception("unexpected end of data")
254 dst.write(buf)
255
256 if remainder != 0:
257 buf = src.read(remainder)
258 if len(buf) < remainder:
259 raise exception("unexpected end of data")
260 dst.write(buf)
261 return
262
263def _safe_print(s):
264 encoding = getattr(sys.stdout, 'encoding', None)
265 if encoding is not None:
266 s = s.encode(encoding, 'backslashreplace').decode(encoding)
267 print(s, end=' ')
268
269
270class TarError(Exception):
271 """Base exception."""
272 pass
273class ExtractError(TarError):
274 """General exception for extract errors."""
275 pass
276class ReadError(TarError):
277 """Exception for unreadable tar archives."""
278 pass
279class CompressionError(TarError):
280 """Exception for unavailable compression methods."""
281 pass
282class StreamError(TarError):
283 """Exception for unsupported operations on stream-like TarFiles."""
284 pass
285class HeaderError(TarError):
286 """Base exception for header errors."""
287 pass
288class EmptyHeaderError(HeaderError):
289 """Exception for empty headers."""
290 pass
291class TruncatedHeaderError(HeaderError):
292 """Exception for truncated headers."""
293 pass
294class EOFHeaderError(HeaderError):
295 """Exception for end of file headers."""
296 pass
297class InvalidHeaderError(HeaderError):
298 """Exception for invalid headers."""
299 pass
300class SubsequentHeaderError(HeaderError):
301 """Exception for missing and invalid extended headers."""
302 pass
303
304#---------------------------
305# internal stream interface
306#---------------------------
307class _LowLevelFile:
308 """Low-level file object. Supports reading and writing.
309 It is used instead of a regular file object for streaming
310 access.
311 """
312
313 def __init__(self, name, mode):
314 mode = {
315 "r": os.O_RDONLY,
316 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
317 }[mode]
318 if hasattr(os, "O_BINARY"):
319 mode |= os.O_BINARY
320 self.fd = os.open(name, mode, 0o666)
321
322 def close(self):
323 os.close(self.fd)
324
325 def read(self, size):
326 return os.read(self.fd, size)
327
328 def write(self, s):
329 os.write(self.fd, s)
330
331class _Stream:
332 """Class that serves as an adapter between TarFile and
333 a stream-like object. The stream-like object only
334 needs to have a read() or write() method that works with bytes,
335 and the method is accessed blockwise.
336 Use of gzip or bzip2 compression is possible.
337 A stream-like object could be for example: sys.stdin.buffer,
338 sys.stdout.buffer, a socket, a tape device etc.
339
340 _Stream is intended to be used only internally.
341 """
342
343 def __init__(self, name, mode, comptype, fileobj, bufsize,
344 compresslevel):
345 """Construct a _Stream object.
346 """
347 self._extfileobj = True
348 if fileobj is None:
349 fileobj = _LowLevelFile(name, mode)
350 self._extfileobj = False
351
352 if comptype == '*':
353 # Enable transparent compression detection for the
354 # stream interface
355 fileobj = _StreamProxy(fileobj)
356 comptype = fileobj.getcomptype()
357
358 self.name = name or ""
359 self.mode = mode
360 self.comptype = comptype
361 self.fileobj = fileobj
362 self.bufsize = bufsize
363 self.buf = b""
364 self.pos = 0
365 self.closed = False
366
367 try:
368 if comptype == "gz":
369 try:
370 import zlib
371 except ImportError:
372 raise CompressionError("zlib module is not available") from None
373 self.zlib = zlib
374 self.crc = zlib.crc32(b"")
375 if mode == "r":
376 self.exception = zlib.error
377 self._init_read_gz()
378 else:
379 self._init_write_gz(compresslevel)
380
381 elif comptype == "bz2":
382 try:
383 import bz2
384 except ImportError:
385 raise CompressionError("bz2 module is not available") from None
386 if mode == "r":
387 self.dbuf = b""
388 self.cmp = bz2.BZ2Decompressor()
389 self.exception = OSError
390 else:
391 self.cmp = bz2.BZ2Compressor(compresslevel)
392
393 elif comptype == "xz":
394 try:
395 import lzma
396 except ImportError:
397 raise CompressionError("lzma module is not available") from None
398 if mode == "r":
399 self.dbuf = b""
400 self.cmp = lzma.LZMADecompressor()
401 self.exception = lzma.LZMAError
402 else:
403 self.cmp = lzma.LZMACompressor()
404
405 elif comptype != "tar":
406 raise CompressionError("unknown compression type %r" % comptype)
407
408 except:
409 if not self._extfileobj:
410 self.fileobj.close()
411 self.closed = True
412 raise
413
414 def __del__(self):
415 if hasattr(self, "closed") and not self.closed:
416 self.close()
417
418 def _init_write_gz(self, compresslevel):
419 """Initialize for writing with gzip compression.
420 """
421 self.cmp = self.zlib.compressobj(compresslevel,
422 self.zlib.DEFLATED,
423 -self.zlib.MAX_WBITS,
424 self.zlib.DEF_MEM_LEVEL,
425 0)
426 timestamp = struct.pack("<L", int(time.time()))
427 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
428 if self.name.endswith(".gz"):
429 self.name = self.name[:-3]
430 # Honor "directory components removed" from RFC1952
431 self.name = os.path.basename(self.name)
432 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
433 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
434
435 def write(self, s):
436 """Write string s to the stream.
437 """
438 if self.comptype == "gz":
439 self.crc = self.zlib.crc32(s, self.crc)
440 self.pos += len(s)
441 if self.comptype != "tar":
442 s = self.cmp.compress(s)
443 self.__write(s)
444
445 def __write(self, s):
446 """Write string s to the stream if a whole new block
447 is ready to be written.
448 """
449 self.buf += s
450 while len(self.buf) > self.bufsize:
451 self.fileobj.write(self.buf[:self.bufsize])
452 self.buf = self.buf[self.bufsize:]
453
454 def close(self):
455 """Close the _Stream object. No operation should be
456 done on it afterwards.
457 """
458 if self.closed:
459 return
460
461 self.closed = True
462 try:
463 if self.mode == "w" and self.comptype != "tar":
464 self.buf += self.cmp.flush()
465
466 if self.mode == "w" and self.buf:
467 self.fileobj.write(self.buf)
468 self.buf = b""
469 if self.comptype == "gz":
470 self.fileobj.write(struct.pack("<L", self.crc))
471 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
472 finally:
473 if not self._extfileobj:
474 self.fileobj.close()
475
476 def _init_read_gz(self):
477 """Initialize for reading a gzip compressed fileobj.
478 """
479 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
480 self.dbuf = b""
481
482 # taken from gzip.GzipFile with some alterations
483 if self.__read(2) != b"\037\213":
484 raise ReadError("not a gzip file")
485 if self.__read(1) != b"\010":
486 raise CompressionError("unsupported compression method")
487
488 flag = ord(self.__read(1))
489 self.__read(6)
490
491 if flag & 4:
492 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
493 self.read(xlen)
494 if flag & 8:
495 while True:
496 s = self.__read(1)
497 if not s or s == NUL:
498 break
499 if flag & 16:
500 while True:
501 s = self.__read(1)
502 if not s or s == NUL:
503 break
504 if flag & 2:
505 self.__read(2)
506
507 def tell(self):
508 """Return the stream's file pointer position.
509 """
510 return self.pos
511
512 def seek(self, pos=0):
513 """Set the stream's file pointer to pos. Negative seeking
514 is forbidden.
515 """
516 if pos - self.pos >= 0:
517 blocks, remainder = divmod(pos - self.pos, self.bufsize)
518 for i in range(blocks):
519 self.read(self.bufsize)
520 self.read(remainder)
521 else:
522 raise StreamError("seeking backwards is not allowed")
523 return self.pos
524
525 def read(self, size):
526 """Return the next size number of bytes from the stream."""
527 assert size is not None
528 buf = self._read(size)
529 self.pos += len(buf)
530 return buf
531
532 def _read(self, size):
533 """Return size bytes from the stream.
534 """
535 if self.comptype == "tar":
536 return self.__read(size)
537
538 c = len(self.dbuf)
539 t = [self.dbuf]
540 while c < size:
541 # Skip underlying buffer to avoid unaligned double buffering.
542 if self.buf:
543 buf = self.buf
544 self.buf = b""
545 else:
546 buf = self.fileobj.read(self.bufsize)
547 if not buf:
548 break
549 try:
550 buf = self.cmp.decompress(buf)
551 except self.exception as e:
552 raise ReadError("invalid compressed data") from e
553 t.append(buf)
554 c += len(buf)
555 t = b"".join(t)
556 self.dbuf = t[size:]
557 return t[:size]
558
559 def __read(self, size):
560 """Return size bytes from stream. If internal buffer is empty,
561 read another block from the stream.
562 """
563 c = len(self.buf)
564 t = [self.buf]
565 while c < size:
566 buf = self.fileobj.read(self.bufsize)
567 if not buf:
568 break
569 t.append(buf)
570 c += len(buf)
571 t = b"".join(t)
572 self.buf = t[size:]
573 return t[:size]
574# class _Stream
575
576class _StreamProxy(object):
577 """Small proxy class that enables transparent compression
578 detection for the Stream interface (mode 'r|*').
579 """
580
581 def __init__(self, fileobj):
582 self.fileobj = fileobj
583 self.buf = self.fileobj.read(BLOCKSIZE)
584
585 def read(self, size):
586 self.read = self.fileobj.read
587 return self.buf
588
589 def getcomptype(self):
590 if self.buf.startswith(b"\x1f\x8b\x08"):
591 return "gz"
592 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
593 return "bz2"
594 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
595 return "xz"
596 else:
597 return "tar"
598
599 def close(self):
600 self.fileobj.close()
601# class StreamProxy
602
603#------------------------
604# Extraction file object
605#------------------------
606class _FileInFile(object):
607 """A thin wrapper around an existing file object that
608 provides a part of its data as an individual file
609 object.
610 """
611
612 def __init__(self, fileobj, offset, size, name, blockinfo=None):
613 self.fileobj = fileobj
614 self.offset = offset
615 self.size = size
616 self.position = 0
617 self.name = name
618 self.closed = False
619
620 if blockinfo is None:
621 blockinfo = [(0, size)]
622
623 # Construct a map with data and zero blocks.
624 self.map_index = 0
625 self.map = []
626 lastpos = 0
627 realpos = self.offset
628 for offset, size in blockinfo:
629 if offset > lastpos:
630 self.map.append((False, lastpos, offset, None))
631 self.map.append((True, offset, offset + size, realpos))
632 realpos += size
633 lastpos = offset + size
634 if lastpos < self.size:
635 self.map.append((False, lastpos, self.size, None))
636
637 def flush(self):
638 pass
639
640 def readable(self):
641 return True
642
643 def writable(self):
644 return False
645
646 def seekable(self):
647 return self.fileobj.seekable()
648
649 def tell(self):
650 """Return the current file position.
651 """
652 return self.position
653
654 def seek(self, position, whence=io.SEEK_SET):
655 """Seek to a position in the file.
656 """
657 if whence == io.SEEK_SET:
658 self.position = min(max(position, 0), self.size)
659 elif whence == io.SEEK_CUR:
660 if position < 0:
661 self.position = max(self.position + position, 0)
662 else:
663 self.position = min(self.position + position, self.size)
664 elif whence == io.SEEK_END:
665 self.position = max(min(self.size + position, self.size), 0)
666 else:
667 raise ValueError("Invalid argument")
668 return self.position
669
670 def read(self, size=None):
671 """Read data from the file.
672 """
673 if size is None:
674 size = self.size - self.position
675 else:
676 size = min(size, self.size - self.position)
677
678 buf = b""
679 while size > 0:
680 while True:
681 data, start, stop, offset = self.map[self.map_index]
682 if start <= self.position < stop:
683 break
684 else:
685 self.map_index += 1
686 if self.map_index == len(self.map):
687 self.map_index = 0
688 length = min(size, stop - self.position)
689 if data:
690 self.fileobj.seek(offset + (self.position - start))
691 b = self.fileobj.read(length)
692 if len(b) != length:
693 raise ReadError("unexpected end of data")
694 buf += b
695 else:
696 buf += NUL * length
697 size -= length
698 self.position += length
699 return buf
700
701 def readinto(self, b):
702 buf = self.read(len(b))
703 b[:len(buf)] = buf
704 return len(buf)
705
706 def close(self):
707 self.closed = True
708#class _FileInFile
709
710class ExFileObject(io.BufferedReader):
711
712 def __init__(self, tarfile, tarinfo):
713 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
714 tarinfo.size, tarinfo.name, tarinfo.sparse)
715 super().__init__(fileobj)
716#class ExFileObject
717
718
719#-----------------------------
720# extraction filters (PEP 706)
721#-----------------------------
722
723class FilterError(TarError):
724 pass
725
726class AbsolutePathError(FilterError):
727 def __init__(self, tarinfo):
728 self.tarinfo = tarinfo
729 super().__init__(f'member {tarinfo.name!r} has an absolute path')
730
731class OutsideDestinationError(FilterError):
732 def __init__(self, tarinfo, path):
733 self.tarinfo = tarinfo
734 self._path = path
735 super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
736 + 'which is outside the destination')
737
738class SpecialFileError(FilterError):
739 def __init__(self, tarinfo):
740 self.tarinfo = tarinfo
741 super().__init__(f'{tarinfo.name!r} is a special file')
742
743class AbsoluteLinkError(FilterError):
744 def __init__(self, tarinfo):
745 self.tarinfo = tarinfo
746 super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
747
748class LinkOutsideDestinationError(FilterError):
749 def __init__(self, tarinfo, path):
750 self.tarinfo = tarinfo
751 self._path = path
752 super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
753 + 'which is outside the destination')
754
755def _get_filtered_attrs(member, dest_path, for_data=True):
756 new_attrs = {}
757 name = member.name
758 dest_path = os.path.realpath(dest_path)
759 # Strip leading / (tar's directory separator) from filenames.
760 # Include os.sep (target OS directory separator) as well.
761 if name.startswith(('/', os.sep)):
762 name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
763 if os.path.isabs(name):
764 # Path is absolute even after stripping.
765 # For example, 'C:/foo' on Windows.
766 raise AbsolutePathError(member)
767 # Ensure we stay in the destination
768 target_path = os.path.realpath(os.path.join(dest_path, name))
769 if os.path.commonpath([target_path, dest_path]) != dest_path:
770 raise OutsideDestinationError(member, target_path)
771 # Limit permissions (no high bits, and go-w)
772 mode = member.mode
773 if mode is not None:
774 # Strip high bits & group/other write bits
775 mode = mode & 0o755
776 if for_data:
777 # For data, handle permissions & file types
778 if member.isreg() or member.islnk():
779 if not mode & 0o100:
780 # Clear executable bits if not executable by user
781 mode &= ~0o111
782 # Ensure owner can read & write
783 mode |= 0o600
784 elif member.isdir() or member.issym():
785 # Ignore mode for directories & symlinks
786 mode = None
787 else:
788 # Reject special files
789 raise SpecialFileError(member)
790 if mode != member.mode:
791 new_attrs['mode'] = mode
792 if for_data:
793 # Ignore ownership for 'data'
794 if member.uid is not None:
795 new_attrs['uid'] = None
796 if member.gid is not None:
797 new_attrs['gid'] = None
798 if member.uname is not None:
799 new_attrs['uname'] = None
800 if member.gname is not None:
801 new_attrs['gname'] = None
802 # Check link destination for 'data'
803 if member.islnk() or member.issym():
804 if os.path.isabs(member.linkname):
805 raise AbsoluteLinkError(member)
806 if member.issym():
807 target_path = os.path.join(dest_path,
808 os.path.dirname(name),
809 member.linkname)
810 else:
811 target_path = os.path.join(dest_path,
812 member.linkname)
813 target_path = os.path.realpath(target_path)
814 if os.path.commonpath([target_path, dest_path]) != dest_path:
815 raise LinkOutsideDestinationError(member, target_path)
816 return new_attrs
817
818def fully_trusted_filter(member, dest_path):
819 return member
820
821def tar_filter(member, dest_path):
822 new_attrs = _get_filtered_attrs(member, dest_path, False)
823 if new_attrs:
824 return member.replace(**new_attrs, deep=False)
825 return member
826
827def data_filter(member, dest_path):
828 new_attrs = _get_filtered_attrs(member, dest_path, True)
829 if new_attrs:
830 return member.replace(**new_attrs, deep=False)
831 return member
832
833_NAMED_FILTERS = {
834 "fully_trusted": fully_trusted_filter,
835 "tar": tar_filter,
836 "data": data_filter,
837}
838
839#------------------
840# Exported Classes
841#------------------
842
843# Sentinel for replace() defaults, meaning "don't change the attribute"
844_KEEP = object()
845
846class TarInfo(object):
847 """Informational class which holds the details about an
848 archive member given by a tar header block.
849 TarInfo objects are returned by TarFile.getmember(),
850 TarFile.getmembers() and TarFile.gettarinfo() and are
851 usually created internally.
852 """
853
854 __slots__ = dict(
855 name = 'Name of the archive member.',
856 mode = 'Permission bits.',
857 uid = 'User ID of the user who originally stored this member.',
858 gid = 'Group ID of the user who originally stored this member.',
859 size = 'Size in bytes.',
860 mtime = 'Time of last modification.',
861 chksum = 'Header checksum.',
862 type = ('File type. type is usually one of these constants: '
863 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
864 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
865 linkname = ('Name of the target file name, which is only present '
866 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
867 uname = 'User name.',
868 gname = 'Group name.',
869 devmajor = 'Device major number.',
870 devminor = 'Device minor number.',
871 offset = 'The tar header starts here.',
872 offset_data = "The file's data starts here.",
873 pax_headers = ('A dictionary containing key-value pairs of an '
874 'associated pax extended header.'),
875 sparse = 'Sparse member information.',
876 tarfile = None,
877 _sparse_structs = None,
878 _link_target = None,
879 )
880
881 def __init__(self, name=""):
882 """Construct a TarInfo object. name is the optional name
883 of the member.
884 """
885 self.name = name # member name
886 self.mode = 0o644 # file permissions
887 self.uid = 0 # user id
888 self.gid = 0 # group id
889 self.size = 0 # file size
890 self.mtime = 0 # modification time
891 self.chksum = 0 # header checksum
892 self.type = REGTYPE # member type
893 self.linkname = "" # link name
894 self.uname = "" # user name
895 self.gname = "" # group name
896 self.devmajor = 0 # device major number
897 self.devminor = 0 # device minor number
898
899 self.offset = 0 # the tar header starts here
900 self.offset_data = 0 # the file's data starts here
901
902 self.sparse = None # sparse member information
903 self.pax_headers = {} # pax header information
904
905 @property
906 def path(self):
907 'In pax headers, "name" is called "path".'
908 return self.name
909
910 @path.setter
911 def path(self, name):
912 self.name = name
913
914 @property
915 def linkpath(self):
916 'In pax headers, "linkname" is called "linkpath".'
917 return self.linkname
918
919 @linkpath.setter
920 def linkpath(self, linkname):
921 self.linkname = linkname
922
923 def __repr__(self):
924 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
925
926 def replace(self, *,
927 name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
928 uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
929 deep=True, _KEEP=_KEEP):
930 """Return a deep copy of self with the given attributes replaced.
931 """
932 if deep:
933 result = copy.deepcopy(self)
934 else:
935 result = copy.copy(self)
936 if name is not _KEEP:
937 result.name = name
938 if mtime is not _KEEP:
939 result.mtime = mtime
940 if mode is not _KEEP:
941 result.mode = mode
942 if linkname is not _KEEP:
943 result.linkname = linkname
944 if uid is not _KEEP:
945 result.uid = uid
946 if gid is not _KEEP:
947 result.gid = gid
948 if uname is not _KEEP:
949 result.uname = uname
950 if gname is not _KEEP:
951 result.gname = gname
952 return result
953
954 def get_info(self):
955 """Return the TarInfo's attributes as a dictionary.
956 """
957 if self.mode is None:
958 mode = None
959 else:
960 mode = self.mode & 0o7777
961 info = {
962 "name": self.name,
963 "mode": mode,
964 "uid": self.uid,
965 "gid": self.gid,
966 "size": self.size,
967 "mtime": self.mtime,
968 "chksum": self.chksum,
969 "type": self.type,
970 "linkname": self.linkname,
971 "uname": self.uname,
972 "gname": self.gname,
973 "devmajor": self.devmajor,
974 "devminor": self.devminor
975 }
976
977 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978 info["name"] += "/"
979
980 return info
981
982 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
983 """Return a tar header as a string of 512 byte blocks.
984 """
985 info = self.get_info()
986 for name, value in info.items():
987 if value is None:
988 raise ValueError("%s may not be None" % name)
989
990 if format == USTAR_FORMAT:
991 return self.create_ustar_header(info, encoding, errors)
992 elif format == GNU_FORMAT:
993 return self.create_gnu_header(info, encoding, errors)
994 elif format == PAX_FORMAT:
995 return self.create_pax_header(info, encoding)
996 else:
997 raise ValueError("invalid format")
998
999 def create_ustar_header(self, info, encoding, errors):
1000 """Return the object as a ustar header block.
1001 """
1002 info["magic"] = POSIX_MAGIC
1003
1004 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1005 raise ValueError("linkname is too long")
1006
1007 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1008 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
1009
1010 return self._create_header(info, USTAR_FORMAT, encoding, errors)
1011
1012 def create_gnu_header(self, info, encoding, errors):
1013 """Return the object as a GNU header block sequence.
1014 """
1015 info["magic"] = GNU_MAGIC
1016
1017 buf = b""
1018 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1019 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1020
1021 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1022 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1023
1024 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1025
1026 def create_pax_header(self, info, encoding):
1027 """Return the object as a ustar header block. If it cannot be
1028 represented this way, prepend a pax extended header sequence
1029 with supplement information.
1030 """
1031 info["magic"] = POSIX_MAGIC
1032 pax_headers = self.pax_headers.copy()
1033
1034 # Test string fields for values that exceed the field length or cannot
1035 # be represented in ASCII encoding.
1036 for name, hname, length in (
1037 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1038 ("uname", "uname", 32), ("gname", "gname", 32)):
1039
1040 if hname in pax_headers:
1041 # The pax header has priority.
1042 continue
1043
1044 # Try to encode the string as ASCII.
1045 try:
1046 info[name].encode("ascii", "strict")
1047 except UnicodeEncodeError:
1048 pax_headers[hname] = info[name]
1049 continue
1050
1051 if len(info[name]) > length:
1052 pax_headers[hname] = info[name]
1053
1054 # Test number fields for values that exceed the field limit or values
1055 # that like to be stored as float.
1056 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1057 needs_pax = False
1058
1059 val = info[name]
1060 val_is_float = isinstance(val, float)
1061 val_int = round(val) if val_is_float else val
1062 if not 0 <= val_int < 8 ** (digits - 1):
1063 # Avoid overflow.
1064 info[name] = 0
1065 needs_pax = True
1066 elif val_is_float:
1067 # Put rounded value in ustar header, and full
1068 # precision value in pax header.
1069 info[name] = val_int
1070 needs_pax = True
1071
1072 # The existing pax header has priority.
1073 if needs_pax and name not in pax_headers:
1074 pax_headers[name] = str(val)
1075
1076 # Create a pax extended header if necessary.
1077 if pax_headers:
1078 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1079 else:
1080 buf = b""
1081
1082 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1083
1084 @classmethod
1085 def create_pax_global_header(cls, pax_headers):
1086 """Return the object as a pax global header block sequence.
1087 """
1088 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1089
1090 def _posix_split_name(self, name, encoding, errors):
1091 """Split a name longer than 100 chars into a prefix
1092 and a name part.
1093 """
1094 components = name.split("/")
1095 for i in range(1, len(components)):
1096 prefix = "/".join(components[:i])
1097 name = "/".join(components[i:])
1098 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
1099 len(name.encode(encoding, errors)) <= LENGTH_NAME:
1100 break
1101 else:
1102 raise ValueError("name is too long")
1103
1104 return prefix, name
1105
1106 @staticmethod
1107 def _create_header(info, format, encoding, errors):
1108 """Return a header block. info is a dictionary with file
1109 information, format must be one of the *_FORMAT constants.
1110 """
1111 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
1112 if has_device_fields:
1113 devmajor = itn(info.get("devmajor", 0), 8, format)
1114 devminor = itn(info.get("devminor", 0), 8, format)
1115 else:
1116 devmajor = stn("", 8, encoding, errors)
1117 devminor = stn("", 8, encoding, errors)
1118
1119 # None values in metadata should cause ValueError.
1120 # itn()/stn() do this for all fields except type.
1121 filetype = info.get("type", REGTYPE)
1122 if filetype is None:
1123 raise ValueError("TarInfo.type must not be None")
1124
1125 parts = [
1126 stn(info.get("name", ""), 100, encoding, errors),
1127 itn(info.get("mode", 0) & 0o7777, 8, format),
1128 itn(info.get("uid", 0), 8, format),
1129 itn(info.get("gid", 0), 8, format),
1130 itn(info.get("size", 0), 12, format),
1131 itn(info.get("mtime", 0), 12, format),
1132 b" ", # checksum field
1133 filetype,
1134 stn(info.get("linkname", ""), 100, encoding, errors),
1135 info.get("magic", POSIX_MAGIC),
1136 stn(info.get("uname", ""), 32, encoding, errors),
1137 stn(info.get("gname", ""), 32, encoding, errors),
1138 devmajor,
1139 devminor,
1140 stn(info.get("prefix", ""), 155, encoding, errors)
1141 ]
1142
1143 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1144 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1145 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1146 return buf
1147
1148 @staticmethod
1149 def _create_payload(payload):
1150 """Return the string payload filled with zero bytes
1151 up to the next 512 byte border.
1152 """
1153 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1154 if remainder > 0:
1155 payload += (BLOCKSIZE - remainder) * NUL
1156 return payload
1157
1158 @classmethod
1159 def _create_gnu_long_header(cls, name, type, encoding, errors):
1160 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1161 for name.
1162 """
1163 name = name.encode(encoding, errors) + NUL
1164
1165 info = {}
1166 info["name"] = "././@LongLink"
1167 info["type"] = type
1168 info["size"] = len(name)
1169 info["magic"] = GNU_MAGIC
1170
1171 # create extended header + name blocks.
1172 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1173 cls._create_payload(name)
1174
1175 @classmethod
1176 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1177 """Return a POSIX.1-2008 extended or global header sequence
1178 that contains a list of keyword, value pairs. The values
1179 must be strings.
1180 """
1181 # Check if one of the fields contains surrogate characters and thereby
1182 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1183 binary = False
1184 for keyword, value in pax_headers.items():
1185 try:
1186 value.encode("utf-8", "strict")
1187 except UnicodeEncodeError:
1188 binary = True
1189 break
1190
1191 records = b""
1192 if binary:
1193 # Put the hdrcharset field at the beginning of the header.
1194 records += b"21 hdrcharset=BINARY\n"
1195
1196 for keyword, value in pax_headers.items():
1197 keyword = keyword.encode("utf-8")
1198 if binary:
1199 # Try to restore the original byte representation of `value'.
1200 # Needless to say, that the encoding must match the string.
1201 value = value.encode(encoding, "surrogateescape")
1202 else:
1203 value = value.encode("utf-8")
1204
1205 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1206 n = p = 0
1207 while True:
1208 n = l + len(str(p))
1209 if n == p:
1210 break
1211 p = n
1212 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1213
1214 # We use a hardcoded "././@PaxHeader" name like star does
1215 # instead of the one that POSIX recommends.
1216 info = {}
1217 info["name"] = "././@PaxHeader"
1218 info["type"] = type
1219 info["size"] = len(records)
1220 info["magic"] = POSIX_MAGIC
1221
1222 # Create pax header + record blocks.
1223 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1224 cls._create_payload(records)
1225
1226 @classmethod
1227 def frombuf(cls, buf, encoding, errors):
1228 """Construct a TarInfo object from a 512 byte bytes object.
1229 """
1230 if len(buf) == 0:
1231 raise EmptyHeaderError("empty header")
1232 if len(buf) != BLOCKSIZE:
1233 raise TruncatedHeaderError("truncated header")
1234 if buf.count(NUL) == BLOCKSIZE:
1235 raise EOFHeaderError("end of file header")
1236
1237 chksum = nti(buf[148:156])
1238 if chksum not in calc_chksums(buf):
1239 raise InvalidHeaderError("bad checksum")
1240
1241 obj = cls()
1242 obj.name = nts(buf[0:100], encoding, errors)
1243 obj.mode = nti(buf[100:108])
1244 obj.uid = nti(buf[108:116])
1245 obj.gid = nti(buf[116:124])
1246 obj.size = nti(buf[124:136])
1247 obj.mtime = nti(buf[136:148])
1248 obj.chksum = chksum
1249 obj.type = buf[156:157]
1250 obj.linkname = nts(buf[157:257], encoding, errors)
1251 obj.uname = nts(buf[265:297], encoding, errors)
1252 obj.gname = nts(buf[297:329], encoding, errors)
1253 obj.devmajor = nti(buf[329:337])
1254 obj.devminor = nti(buf[337:345])
1255 prefix = nts(buf[345:500], encoding, errors)
1256
1257 # Old V7 tar format represents a directory as a regular
1258 # file with a trailing slash.
1259 if obj.type == AREGTYPE and obj.name.endswith("/"):
1260 obj.type = DIRTYPE
1261
1262 # The old GNU sparse format occupies some of the unused
1263 # space in the buffer for up to 4 sparse structures.
1264 # Save them for later processing in _proc_sparse().
1265 if obj.type == GNUTYPE_SPARSE:
1266 pos = 386
1267 structs = []
1268 for i in range(4):
1269 try:
1270 offset = nti(buf[pos:pos + 12])
1271 numbytes = nti(buf[pos + 12:pos + 24])
1272 except ValueError:
1273 break
1274 structs.append((offset, numbytes))
1275 pos += 24
1276 isextended = bool(buf[482])
1277 origsize = nti(buf[483:495])
1278 obj._sparse_structs = (structs, isextended, origsize)
1279
1280 # Remove redundant slashes from directories.
1281 if obj.isdir():
1282 obj.name = obj.name.rstrip("/")
1283
1284 # Reconstruct a ustar longname.
1285 if prefix and obj.type not in GNU_TYPES:
1286 obj.name = prefix + "/" + obj.name
1287 return obj
1288
1289 @classmethod
1290 def fromtarfile(cls, tarfile):
1291 """Return the next TarInfo object from TarFile object
1292 tarfile.
1293 """
1294 buf = tarfile.fileobj.read(BLOCKSIZE)
1295 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1296 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1297 return obj._proc_member(tarfile)
1298
1299 #--------------------------------------------------------------------------
1300 # The following are methods that are called depending on the type of a
1301 # member. The entry point is _proc_member() which can be overridden in a
1302 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1303 # implement the following
1304 # operations:
1305 # 1. Set self.offset_data to the position where the data blocks begin,
1306 # if there is data that follows.
1307 # 2. Set tarfile.offset to the position where the next member's header will
1308 # begin.
1309 # 3. Return self or another valid TarInfo object.
1310 def _proc_member(self, tarfile):
1311 """Choose the right processing method depending on
1312 the type and call it.
1313 """
1314 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1315 return self._proc_gnulong(tarfile)
1316 elif self.type == GNUTYPE_SPARSE:
1317 return self._proc_sparse(tarfile)
1318 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1319 return self._proc_pax(tarfile)
1320 else:
1321 return self._proc_builtin(tarfile)
1322
1323 def _proc_builtin(self, tarfile):
1324 """Process a builtin type or an unknown type which
1325 will be treated as a regular file.
1326 """
1327 self.offset_data = tarfile.fileobj.tell()
1328 offset = self.offset_data
1329 if self.isreg() or self.type not in SUPPORTED_TYPES:
1330 # Skip the following data blocks.
1331 offset += self._block(self.size)
1332 tarfile.offset = offset
1333
1334 # Patch the TarInfo object with saved global
1335 # header information.
1336 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1337
1338 # Remove redundant slashes from directories. This is to be consistent
1339 # with frombuf().
1340 if self.isdir():
1341 self.name = self.name.rstrip("/")
1342
1343 return self
1344
1345 def _proc_gnulong(self, tarfile):
1346 """Process the blocks that hold a GNU longname
1347 or longlink member.
1348 """
1349 buf = tarfile.fileobj.read(self._block(self.size))
1350
1351 # Fetch the next header and process it.
1352 try:
1353 next = self.fromtarfile(tarfile)
1354 except HeaderError as e:
1355 raise SubsequentHeaderError(str(e)) from None
1356
1357 # Patch the TarInfo object from the next header with
1358 # the longname information.
1359 next.offset = self.offset
1360 if self.type == GNUTYPE_LONGNAME:
1361 next.name = nts(buf, tarfile.encoding, tarfile.errors)
1362 elif self.type == GNUTYPE_LONGLINK:
1363 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1364
1365 # Remove redundant slashes from directories. This is to be consistent
1366 # with frombuf().
1367 if next.isdir():
1368 next.name = next.name.removesuffix("/")
1369
1370 return next
1371
1372 def _proc_sparse(self, tarfile):
1373 """Process a GNU sparse header plus extra headers.
1374 """
1375 # We already collected some sparse structures in frombuf().
1376 structs, isextended, origsize = self._sparse_structs
1377 del self._sparse_structs
1378
1379 # Collect sparse structures from extended header blocks.
1380 while isextended:
1381 buf = tarfile.fileobj.read(BLOCKSIZE)
1382 pos = 0
1383 for i in range(21):
1384 try:
1385 offset = nti(buf[pos:pos + 12])
1386 numbytes = nti(buf[pos + 12:pos + 24])
1387 except ValueError:
1388 break
1389 if offset and numbytes:
1390 structs.append((offset, numbytes))
1391 pos += 24
1392 isextended = bool(buf[504])
1393 self.sparse = structs
1394
1395 self.offset_data = tarfile.fileobj.tell()
1396 tarfile.offset = self.offset_data + self._block(self.size)
1397 self.size = origsize
1398 return self
1399
1400 def _proc_pax(self, tarfile):
1401 """Process an extended or global header as described in
1402 POSIX.1-2008.
1403 """
1404 # Read the header information.
1405 buf = tarfile.fileobj.read(self._block(self.size))
1406
1407 # A pax header stores supplemental information for either
1408 # the following file (extended) or all following files
1409 # (global).
1410 if self.type == XGLTYPE:
1411 pax_headers = tarfile.pax_headers
1412 else:
1413 pax_headers = tarfile.pax_headers.copy()
1414
1415 # Check if the pax header contains a hdrcharset field. This tells us
1416 # the encoding of the path, linkpath, uname and gname fields. Normally,
1417 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1418 # implementations are allowed to store them as raw binary strings if
1419 # the translation to UTF-8 fails.
1420 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1421 if match is not None:
1422 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1423
1424 # For the time being, we don't care about anything other than "BINARY".
1425 # The only other value that is currently allowed by the standard is
1426 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1427 hdrcharset = pax_headers.get("hdrcharset")
1428 if hdrcharset == "BINARY":
1429 encoding = tarfile.encoding
1430 else:
1431 encoding = "utf-8"
1432
1433 # Parse pax header information. A record looks like that:
1434 # "%d %s=%s\n" % (length, keyword, value). length is the size
1435 # of the complete record including the length field itself and
1436 # the newline. keyword and value are both UTF-8 encoded strings.
1437 regex = re.compile(br"(\d+) ([^=]+)=")
1438 pos = 0
1439 while match := regex.match(buf, pos):
1440 length, keyword = match.groups()
1441 length = int(length)
1442 if length == 0:
1443 raise InvalidHeaderError("invalid header")
1444 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1445
1446 # Normally, we could just use "utf-8" as the encoding and "strict"
1447 # as the error handler, but we better not take the risk. For
1448 # example, GNU tar <= 1.23 is known to store filenames it cannot
1449 # translate to UTF-8 as raw strings (unfortunately without a
1450 # hdrcharset=BINARY header).
1451 # We first try the strict standard encoding, and if that fails we
1452 # fall back on the user's encoding and error handler.
1453 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1454 tarfile.errors)
1455 if keyword in PAX_NAME_FIELDS:
1456 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1457 tarfile.errors)
1458 else:
1459 value = self._decode_pax_field(value, "utf-8", "utf-8",
1460 tarfile.errors)
1461
1462 pax_headers[keyword] = value
1463 pos += length
1464
1465 # Fetch the next header.
1466 try:
1467 next = self.fromtarfile(tarfile)
1468 except HeaderError as e:
1469 raise SubsequentHeaderError(str(e)) from None
1470
1471 # Process GNU sparse information.
1472 if "GNU.sparse.map" in pax_headers:
1473 # GNU extended sparse format version 0.1.
1474 self._proc_gnusparse_01(next, pax_headers)
1475
1476 elif "GNU.sparse.size" in pax_headers:
1477 # GNU extended sparse format version 0.0.
1478 self._proc_gnusparse_00(next, pax_headers, buf)
1479
1480 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1481 # GNU extended sparse format version 1.0.
1482 self._proc_gnusparse_10(next, pax_headers, tarfile)
1483
1484 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1485 # Patch the TarInfo object with the extended header info.
1486 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1487 next.offset = self.offset
1488
1489 if "size" in pax_headers:
1490 # If the extended header replaces the size field,
1491 # we need to recalculate the offset where the next
1492 # header starts.
1493 offset = next.offset_data
1494 if next.isreg() or next.type not in SUPPORTED_TYPES:
1495 offset += next._block(next.size)
1496 tarfile.offset = offset
1497
1498 return next
1499
1500 def _proc_gnusparse_00(self, next, pax_headers, buf):
1501 """Process a GNU tar extended sparse header, version 0.0.
1502 """
1503 offsets = []
1504 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1505 offsets.append(int(match.group(1)))
1506 numbytes = []
1507 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1508 numbytes.append(int(match.group(1)))
1509 next.sparse = list(zip(offsets, numbytes))
1510
1511 def _proc_gnusparse_01(self, next, pax_headers):
1512 """Process a GNU tar extended sparse header, version 0.1.
1513 """
1514 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1515 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1516
1517 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1518 """Process a GNU tar extended sparse header, version 1.0.
1519 """
1520 fields = None
1521 sparse = []
1522 buf = tarfile.fileobj.read(BLOCKSIZE)
1523 fields, buf = buf.split(b"\n", 1)
1524 fields = int(fields)
1525 while len(sparse) < fields * 2:
1526 if b"\n" not in buf:
1527 buf += tarfile.fileobj.read(BLOCKSIZE)
1528 number, buf = buf.split(b"\n", 1)
1529 sparse.append(int(number))
1530 next.offset_data = tarfile.fileobj.tell()
1531 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1532
1533 def _apply_pax_info(self, pax_headers, encoding, errors):
1534 """Replace fields with supplemental information from a previous
1535 pax extended or global header.
1536 """
1537 for keyword, value in pax_headers.items():
1538 if keyword == "GNU.sparse.name":
1539 setattr(self, "path", value)
1540 elif keyword == "GNU.sparse.size":
1541 setattr(self, "size", int(value))
1542 elif keyword == "GNU.sparse.realsize":
1543 setattr(self, "size", int(value))
1544 elif keyword in PAX_FIELDS:
1545 if keyword in PAX_NUMBER_FIELDS:
1546 try:
1547 value = PAX_NUMBER_FIELDS[keyword](value)
1548 except ValueError:
1549 value = 0
1550 if keyword == "path":
1551 value = value.rstrip("/")
1552 setattr(self, keyword, value)
1553
1554 self.pax_headers = pax_headers.copy()
1555
1556 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1557 """Decode a single field from a pax record.
1558 """
1559 try:
1560 return value.decode(encoding, "strict")
1561 except UnicodeDecodeError:
1562 return value.decode(fallback_encoding, fallback_errors)
1563
1564 def _block(self, count):
1565 """Round up a byte count by BLOCKSIZE and return it,
1566 e.g. _block(834) => 1024.
1567 """
1568 blocks, remainder = divmod(count, BLOCKSIZE)
1569 if remainder:
1570 blocks += 1
1571 return blocks * BLOCKSIZE
1572
1573 def isreg(self):
1574 'Return True if the Tarinfo object is a regular file.'
1575 return self.type in REGULAR_TYPES
1576
1577 def isfile(self):
1578 'Return True if the Tarinfo object is a regular file.'
1579 return self.isreg()
1580
1581 def isdir(self):
1582 'Return True if it is a directory.'
1583 return self.type == DIRTYPE
1584
1585 def issym(self):
1586 'Return True if it is a symbolic link.'
1587 return self.type == SYMTYPE
1588
1589 def islnk(self):
1590 'Return True if it is a hard link.'
1591 return self.type == LNKTYPE
1592
1593 def ischr(self):
1594 'Return True if it is a character device.'
1595 return self.type == CHRTYPE
1596
1597 def isblk(self):
1598 'Return True if it is a block device.'
1599 return self.type == BLKTYPE
1600
1601 def isfifo(self):
1602 'Return True if it is a FIFO.'
1603 return self.type == FIFOTYPE
1604
1605 def issparse(self):
1606 return self.sparse is not None
1607
1608 def isdev(self):
1609 'Return True if it is one of character device, block device or FIFO.'
1610 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1611# class TarInfo
1612
1613class TarFile(object):
1614 """The TarFile Class provides an interface to tar archives.
1615 """
1616
1617 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1618
1619 dereference = False # If true, add content of linked file to the
1620 # tar file, else the link.
1621
1622 ignore_zeros = False # If true, skips empty or invalid blocks and
1623 # continues processing.
1624
1625 errorlevel = 1 # If 0, fatal errors only appear in debug
1626 # messages (if debug >= 0). If > 0, errors
1627 # are passed to the caller as exceptions.
1628
1629 format = DEFAULT_FORMAT # The format to use when creating an archive.
1630
1631 encoding = ENCODING # Encoding for 8-bit character strings.
1632
1633 errors = None # Error handler for unicode conversion.
1634
1635 tarinfo = TarInfo # The default TarInfo class to use.
1636
1637 fileobject = ExFileObject # The file-object for extractfile().
1638
1639 extraction_filter = None # The default filter for extraction.
1640
1641 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1642 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1643 errors="surrogateescape", pax_headers=None, debug=None,
1644 errorlevel=None, copybufsize=None):
1645 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1646 read from an existing archive, 'a' to append data to an existing
1647 file or 'w' to create a new file overwriting an existing one. `mode'
1648 defaults to 'r'.
1649 If `fileobj' is given, it is used for reading or writing data. If it
1650 can be determined, `mode' is overridden by `fileobj's mode.
1651 `fileobj' is not closed, when TarFile is closed.
1652 """
1653 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1654 if mode not in modes:
1655 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1656 self.mode = mode
1657 self._mode = modes[mode]
1658
1659 if not fileobj:
1660 if self.mode == "a" and not os.path.exists(name):
1661 # Create nonexistent files in append mode.
1662 self.mode = "w"
1663 self._mode = "wb"
1664 fileobj = bltn_open(name, self._mode)
1665 self._extfileobj = False
1666 else:
1667 if (name is None and hasattr(fileobj, "name") and
1668 isinstance(fileobj.name, (str, bytes))):
1669 name = fileobj.name
1670 if hasattr(fileobj, "mode"):
1671 self._mode = fileobj.mode
1672 self._extfileobj = True
1673 self.name = os.path.abspath(name) if name else None
1674 self.fileobj = fileobj
1675
1676 # Init attributes.
1677 if format is not None:
1678 self.format = format
1679 if tarinfo is not None:
1680 self.tarinfo = tarinfo
1681 if dereference is not None:
1682 self.dereference = dereference
1683 if ignore_zeros is not None:
1684 self.ignore_zeros = ignore_zeros
1685 if encoding is not None:
1686 self.encoding = encoding
1687 self.errors = errors
1688
1689 if pax_headers is not None and self.format == PAX_FORMAT:
1690 self.pax_headers = pax_headers
1691 else:
1692 self.pax_headers = {}
1693
1694 if debug is not None:
1695 self.debug = debug
1696 if errorlevel is not None:
1697 self.errorlevel = errorlevel
1698
1699 # Init datastructures.
1700 self.copybufsize = copybufsize
1701 self.closed = False
1702 self.members = [] # list of members as TarInfo objects
1703 self._loaded = False # flag if all members have been read
1704 self.offset = self.fileobj.tell()
1705 # current position in the archive file
1706 self.inodes = {} # dictionary caching the inodes of
1707 # archive members already added
1708
1709 try:
1710 if self.mode == "r":
1711 self.firstmember = None
1712 self.firstmember = self.next()
1713
1714 if self.mode == "a":
1715 # Move to the end of the archive,
1716 # before the first empty block.
1717 while True:
1718 self.fileobj.seek(self.offset)
1719 try:
1720 tarinfo = self.tarinfo.fromtarfile(self)
1721 self.members.append(tarinfo)
1722 except EOFHeaderError:
1723 self.fileobj.seek(self.offset)
1724 break
1725 except HeaderError as e:
1726 raise ReadError(str(e)) from None
1727
1728 if self.mode in ("a", "w", "x"):
1729 self._loaded = True
1730
1731 if self.pax_headers:
1732 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1733 self.fileobj.write(buf)
1734 self.offset += len(buf)
1735 except:
1736 if not self._extfileobj:
1737 self.fileobj.close()
1738 self.closed = True
1739 raise
1740
1741 #--------------------------------------------------------------------------
1742 # Below are the classmethods which act as alternate constructors to the
1743 # TarFile class. The open() method is the only one that is needed for
1744 # public use; it is the "super"-constructor and is able to select an
1745 # adequate "sub"-constructor for a particular compression using the mapping
1746 # from OPEN_METH.
1747 #
1748 # This concept allows one to subclass TarFile without losing the comfort of
1749 # the super-constructor. A sub-constructor is registered and made available
1750 # by adding it to the mapping in OPEN_METH.
1751
1752 @classmethod
1753 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1754 r"""Open a tar archive for reading, writing or appending. Return
1755 an appropriate TarFile class.
1756
1757 mode:
1758 'r' or 'r:\*' open for reading with transparent compression
1759 'r:' open for reading exclusively uncompressed
1760 'r:gz' open for reading with gzip compression
1761 'r:bz2' open for reading with bzip2 compression
1762 'r:xz' open for reading with lzma compression
1763 'a' or 'a:' open for appending, creating the file if necessary
1764 'w' or 'w:' open for writing without compression
1765 'w:gz' open for writing with gzip compression
1766 'w:bz2' open for writing with bzip2 compression
1767 'w:xz' open for writing with lzma compression
1768
1769 'x' or 'x:' create a tarfile exclusively without compression, raise
1770 an exception if the file is already created
1771 'x:gz' create a gzip compressed tarfile, raise an exception
1772 if the file is already created
1773 'x:bz2' create a bzip2 compressed tarfile, raise an exception
1774 if the file is already created
1775 'x:xz' create an lzma compressed tarfile, raise an exception
1776 if the file is already created
1777
1778 'r|\*' open a stream of tar blocks with transparent compression
1779 'r|' open an uncompressed stream of tar blocks for reading
1780 'r|gz' open a gzip compressed stream of tar blocks
1781 'r|bz2' open a bzip2 compressed stream of tar blocks
1782 'r|xz' open an lzma compressed stream of tar blocks
1783 'w|' open an uncompressed stream for writing
1784 'w|gz' open a gzip compressed stream for writing
1785 'w|bz2' open a bzip2 compressed stream for writing
1786 'w|xz' open an lzma compressed stream for writing
1787 """
1788
1789 if not name and not fileobj:
1790 raise ValueError("nothing to open")
1791
1792 if mode in ("r", "r:*"):
1793 # Find out which *open() is appropriate for opening the file.
1794 def not_compressed(comptype):
1795 return cls.OPEN_METH[comptype] == 'taropen'
1796 error_msgs = []
1797 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1798 func = getattr(cls, cls.OPEN_METH[comptype])
1799 if fileobj is not None:
1800 saved_pos = fileobj.tell()
1801 try:
1802 return func(name, "r", fileobj, **kwargs)
1803 except (ReadError, CompressionError) as e:
1804 error_msgs.append(f'- method {comptype}: {e!r}')
1805 if fileobj is not None:
1806 fileobj.seek(saved_pos)
1807 continue
1808 error_msgs_summary = '\n'.join(error_msgs)
1809 raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1810
1811 elif ":" in mode:
1812 filemode, comptype = mode.split(":", 1)
1813 filemode = filemode or "r"
1814 comptype = comptype or "tar"
1815
1816 # Select the *open() function according to
1817 # given compression.
1818 if comptype in cls.OPEN_METH:
1819 func = getattr(cls, cls.OPEN_METH[comptype])
1820 else:
1821 raise CompressionError("unknown compression type %r" % comptype)
1822 return func(name, filemode, fileobj, **kwargs)
1823
1824 elif "|" in mode:
1825 filemode, comptype = mode.split("|", 1)
1826 filemode = filemode or "r"
1827 comptype = comptype or "tar"
1828
1829 if filemode not in ("r", "w"):
1830 raise ValueError("mode must be 'r' or 'w'")
1831
1832 compresslevel = kwargs.pop("compresslevel", 9)
1833 stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1834 compresslevel)
1835 try:
1836 t = cls(name, filemode, stream, **kwargs)
1837 except:
1838 stream.close()
1839 raise
1840 t._extfileobj = False
1841 return t
1842
1843 elif mode in ("a", "w", "x"):
1844 return cls.taropen(name, mode, fileobj, **kwargs)
1845
1846 raise ValueError("undiscernible mode")
1847
1848 @classmethod
1849 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1850 """Open uncompressed tar archive name for reading or writing.
1851 """
1852 if mode not in ("r", "a", "w", "x"):
1853 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1854 return cls(name, mode, fileobj, **kwargs)
1855
1856 @classmethod
1857 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1858 """Open gzip compressed tar archive name for reading or writing.
1859 Appending is not allowed.
1860 """
1861 if mode not in ("r", "w", "x"):
1862 raise ValueError("mode must be 'r', 'w' or 'x'")
1863
1864 try:
1865 from gzip import GzipFile
1866 except ImportError:
1867 raise CompressionError("gzip module is not available") from None
1868
1869 try:
1870 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1871 except OSError as e:
1872 if fileobj is not None and mode == 'r':
1873 raise ReadError("not a gzip file") from e
1874 raise
1875
1876 try:
1877 t = cls.taropen(name, mode, fileobj, **kwargs)
1878 except OSError as e:
1879 fileobj.close()
1880 if mode == 'r':
1881 raise ReadError("not a gzip file") from e
1882 raise
1883 except:
1884 fileobj.close()
1885 raise
1886 t._extfileobj = False
1887 return t
1888
1889 @classmethod
1890 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1891 """Open bzip2 compressed tar archive name for reading or writing.
1892 Appending is not allowed.
1893 """
1894 if mode not in ("r", "w", "x"):
1895 raise ValueError("mode must be 'r', 'w' or 'x'")
1896
1897 try:
1898 from bz2 import BZ2File
1899 except ImportError:
1900 raise CompressionError("bz2 module is not available") from None
1901
1902 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1903
1904 try:
1905 t = cls.taropen(name, mode, fileobj, **kwargs)
1906 except (OSError, EOFError) as e:
1907 fileobj.close()
1908 if mode == 'r':
1909 raise ReadError("not a bzip2 file") from e
1910 raise
1911 except:
1912 fileobj.close()
1913 raise
1914 t._extfileobj = False
1915 return t
1916
1917 @classmethod
1918 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1919 """Open lzma compressed tar archive name for reading or writing.
1920 Appending is not allowed.
1921 """
1922 if mode not in ("r", "w", "x"):
1923 raise ValueError("mode must be 'r', 'w' or 'x'")
1924
1925 try:
1926 from lzma import LZMAFile, LZMAError
1927 except ImportError:
1928 raise CompressionError("lzma module is not available") from None
1929
1930 fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1931
1932 try:
1933 t = cls.taropen(name, mode, fileobj, **kwargs)
1934 except (LZMAError, EOFError) as e:
1935 fileobj.close()
1936 if mode == 'r':
1937 raise ReadError("not an lzma file") from e
1938 raise
1939 except:
1940 fileobj.close()
1941 raise
1942 t._extfileobj = False
1943 return t
1944
1945 # All *open() methods are registered here.
1946 OPEN_METH = {
1947 "tar": "taropen", # uncompressed tar
1948 "gz": "gzopen", # gzip compressed tar
1949 "bz2": "bz2open", # bzip2 compressed tar
1950 "xz": "xzopen" # lzma compressed tar
1951 }
1952
1953 #--------------------------------------------------------------------------
1954 # The public methods which TarFile provides:
1955
1956 def close(self):
1957 """Close the TarFile. In write-mode, two finishing zero blocks are
1958 appended to the archive.
1959 """
1960 if self.closed:
1961 return
1962
1963 self.closed = True
1964 try:
1965 if self.mode in ("a", "w", "x"):
1966 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1967 self.offset += (BLOCKSIZE * 2)
1968 # fill up the end with zero-blocks
1969 # (like option -b20 for tar does)
1970 blocks, remainder = divmod(self.offset, RECORDSIZE)
1971 if remainder > 0:
1972 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1973 finally:
1974 if not self._extfileobj:
1975 self.fileobj.close()
1976
1977 def getmember(self, name):
1978 """Return a TarInfo object for member ``name``. If ``name`` can not be
1979 found in the archive, KeyError is raised. If a member occurs more
1980 than once in the archive, its last occurrence is assumed to be the
1981 most up-to-date version.
1982 """
1983 tarinfo = self._getmember(name.rstrip('/'))
1984 if tarinfo is None:
1985 raise KeyError("filename %r not found" % name)
1986 return tarinfo
1987
1988 def getmembers(self):
1989 """Return the members of the archive as a list of TarInfo objects. The
1990 list has the same order as the members in the archive.
1991 """
1992 self._check()
1993 if not self._loaded: # if we want to obtain a list of
1994 self._load() # all members, we first have to
1995 # scan the whole archive.
1996 return self.members
1997
1998 def getnames(self):
1999 """Return the members of the archive as a list of their names. It has
2000 the same order as the list returned by getmembers().
2001 """
2002 return [tarinfo.name for tarinfo in self.getmembers()]
2003
2004 def gettarinfo(self, name=None, arcname=None, fileobj=None):
2005 """Create a TarInfo object from the result of os.stat or equivalent
2006 on an existing file. The file is either named by ``name``, or
2007 specified as a file object ``fileobj`` with a file descriptor. If
2008 given, ``arcname`` specifies an alternative name for the file in the
2009 archive, otherwise, the name is taken from the 'name' attribute of
2010 'fileobj', or the 'name' argument. The name should be a text
2011 string.
2012 """
2013 self._check("awx")
2014
2015 # When fileobj is given, replace name by
2016 # fileobj's real name.
2017 if fileobj is not None:
2018 name = fileobj.name
2019
2020 # Building the name of the member in the archive.
2021 # Backward slashes are converted to forward slashes,
2022 # Absolute paths are turned to relative paths.
2023 if arcname is None:
2024 arcname = name
2025 drv, arcname = os.path.splitdrive(arcname)
2026 arcname = arcname.replace(os.sep, "/")
2027 arcname = arcname.lstrip("/")
2028
2029 # Now, fill the TarInfo object with
2030 # information specific for the file.
2031 tarinfo = self.tarinfo()
2032 tarinfo.tarfile = self # Not needed
2033
2034 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
2035 if fileobj is None:
2036 if not self.dereference:
2037 statres = os.lstat(name)
2038 else:
2039 statres = os.stat(name)
2040 else:
2041 statres = os.fstat(fileobj.fileno())
2042 linkname = ""
2043
2044 stmd = statres.st_mode
2045 if stat.S_ISREG(stmd):
2046 inode = (statres.st_ino, statres.st_dev)
2047 if not self.dereference and statres.st_nlink > 1 and \
2048 inode in self.inodes and arcname != self.inodes[inode]:
2049 # Is it a hardlink to an already
2050 # archived file?
2051 type = LNKTYPE
2052 linkname = self.inodes[inode]
2053 else:
2054 # The inode is added only if its valid.
2055 # For win32 it is always 0.
2056 type = REGTYPE
2057 if inode[0]:
2058 self.inodes[inode] = arcname
2059 elif stat.S_ISDIR(stmd):
2060 type = DIRTYPE
2061 elif stat.S_ISFIFO(stmd):
2062 type = FIFOTYPE
2063 elif stat.S_ISLNK(stmd):
2064 type = SYMTYPE
2065 linkname = os.readlink(name)
2066 elif stat.S_ISCHR(stmd):
2067 type = CHRTYPE
2068 elif stat.S_ISBLK(stmd):
2069 type = BLKTYPE
2070 else:
2071 return None
2072
2073 # Fill the TarInfo object with all
2074 # information we can get.
2075 tarinfo.name = arcname
2076 tarinfo.mode = stmd
2077 tarinfo.uid = statres.st_uid
2078 tarinfo.gid = statres.st_gid
2079 if type == REGTYPE:
2080 tarinfo.size = statres.st_size
2081 else:
2082 tarinfo.size = 0
2083 tarinfo.mtime = statres.st_mtime
2084 tarinfo.type = type
2085 tarinfo.linkname = linkname
2086 if pwd:
2087 try:
2088 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2089 except KeyError:
2090 pass
2091 if grp:
2092 try:
2093 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2094 except KeyError:
2095 pass
2096
2097 if type in (CHRTYPE, BLKTYPE):
2098 if hasattr(os, "major") and hasattr(os, "minor"):
2099 tarinfo.devmajor = os.major(statres.st_rdev)
2100 tarinfo.devminor = os.minor(statres.st_rdev)
2101 return tarinfo
2102
2103 def list(self, verbose=True, *, members=None):
2104 """Print a table of contents to sys.stdout. If ``verbose`` is False, only
2105 the names of the members are printed. If it is True, an `ls -l'-like
2106 output is produced. ``members`` is optional and must be a subset of the
2107 list returned by getmembers().
2108 """
2109 self._check()
2110
2111 if members is None:
2112 members = self
2113 for tarinfo in members:
2114 if verbose:
2115 if tarinfo.mode is None:
2116 _safe_print("??????????")
2117 else:
2118 _safe_print(stat.filemode(tarinfo.mode))
2119 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2120 tarinfo.gname or tarinfo.gid))
2121 if tarinfo.ischr() or tarinfo.isblk():
2122 _safe_print("%10s" %
2123 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
2124 else:
2125 _safe_print("%10d" % tarinfo.size)
2126 if tarinfo.mtime is None:
2127 _safe_print("????-??-?? ??:??:??")
2128 else:
2129 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
2130 % time.localtime(tarinfo.mtime)[:6])
2131
2132 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
2133
2134 if verbose:
2135 if tarinfo.issym():
2136 _safe_print("-> " + tarinfo.linkname)
2137 if tarinfo.islnk():
2138 _safe_print("link to " + tarinfo.linkname)
2139 print()
2140
2141 def add(self, name, arcname=None, recursive=True, *, filter=None):
2142 """Add the file ``name`` to the archive. ``name`` may be any type of file
2143 (directory, fifo, symbolic link, etc.). If given, ``arcname``
2144 specifies an alternative name for the file in the archive.
2145 Directories are added recursively by default. This can be avoided by
2146 setting ``recursive`` to False. ``filter`` is a function
2147 that expects a TarInfo object argument and returns the changed
2148 TarInfo object, if it returns None the TarInfo object will be
2149 excluded from the archive.
2150 """
2151 self._check("awx")
2152
2153 if arcname is None:
2154 arcname = name
2155
2156 # Skip if somebody tries to archive the archive...
2157 if self.name is not None and os.path.abspath(name) == self.name:
2158 self._dbg(2, "tarfile: Skipped %r" % name)
2159 return
2160
2161 self._dbg(1, name)
2162
2163 # Create a TarInfo object from the file.
2164 tarinfo = self.gettarinfo(name, arcname)
2165
2166 if tarinfo is None:
2167 self._dbg(1, "tarfile: Unsupported type %r" % name)
2168 return
2169
2170 # Change or exclude the TarInfo object.
2171 if filter is not None:
2172 tarinfo = filter(tarinfo)
2173 if tarinfo is None:
2174 self._dbg(2, "tarfile: Excluded %r" % name)
2175 return
2176
2177 # Append the tar header and data to the archive.
2178 if tarinfo.isreg():
2179 with bltn_open(name, "rb") as f:
2180 self.addfile(tarinfo, f)
2181
2182 elif tarinfo.isdir():
2183 self.addfile(tarinfo)
2184 if recursive:
2185 for f in sorted(os.listdir(name)):
2186 self.add(os.path.join(name, f), os.path.join(arcname, f),
2187 recursive, filter=filter)
2188
2189 else:
2190 self.addfile(tarinfo)
2191
2192 def addfile(self, tarinfo, fileobj=None):
2193 """Add the TarInfo object ``tarinfo`` to the archive. If ``fileobj`` is
2194 given, it should be a binary file, and tarinfo.size bytes are read
2195 from it and added to the archive. You can create TarInfo objects
2196 directly, or by using gettarinfo().
2197 """
2198 self._check("awx")
2199
2200 tarinfo = copy.copy(tarinfo)
2201
2202 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2203 self.fileobj.write(buf)
2204 self.offset += len(buf)
2205 bufsize=self.copybufsize
2206 # If there's data to follow, append it.
2207 if fileobj is not None:
2208 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2209 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2210 if remainder > 0:
2211 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2212 blocks += 1
2213 self.offset += blocks * BLOCKSIZE
2214
2215 self.members.append(tarinfo)
2216
2217 def _get_filter_function(self, filter):
2218 if filter is None:
2219 filter = self.extraction_filter
2220 if filter is None:
2221 warnings.warn(
2222 'Python 3.14 will, by default, filter extracted tar '
2223 + 'archives and reject files or modify their metadata. '
2224 + 'Use the filter argument to control this behavior.',
2225 DeprecationWarning)
2226 return fully_trusted_filter
2227 if isinstance(filter, str):
2228 raise TypeError(
2229 'String names are not supported for '
2230 + 'TarFile.extraction_filter. Use a function such as '
2231 + 'tarfile.data_filter directly.')
2232 return filter
2233 if callable(filter):
2234 return filter
2235 try:
2236 return _NAMED_FILTERS[filter]
2237 except KeyError:
2238 raise ValueError(f"filter {filter!r} not found") from None
2239
2240 def extractall(self, path=".", members=None, *, numeric_owner=False,
2241 filter=None):
2242 """Extract all members from the archive to the current working
2243 directory and set owner, modification time and permissions on
2244 directories afterwards. `path' specifies a different directory
2245 to extract to. `members' is optional and must be a subset of the
2246 list returned by getmembers(). If `numeric_owner` is True, only
2247 the numbers for user/group names are used and not the names.
2248
2249 The `filter` function will be called on each member just
2250 before extraction.
2251 It can return a changed TarInfo or None to skip the member.
2252 String names of common filters are accepted.
2253 """
2254 directories = []
2255
2256 filter_function = self._get_filter_function(filter)
2257 if members is None:
2258 members = self
2259
2260 for member in members:
2261 tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2262 if tarinfo is None:
2263 continue
2264 if tarinfo.isdir():
2265 # For directories, delay setting attributes until later,
2266 # since permissions can interfere with extraction and
2267 # extracting contents can reset mtime.
2268 directories.append(tarinfo)
2269 self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
2270 numeric_owner=numeric_owner)
2271
2272 # Reverse sort directories.
2273 directories.sort(key=lambda a: a.name, reverse=True)
2274
2275 # Set correct owner, mtime and filemode on directories.
2276 for tarinfo in directories:
2277 dirpath = os.path.join(path, tarinfo.name)
2278 try:
2279 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2280 self.utime(tarinfo, dirpath)
2281 self.chmod(tarinfo, dirpath)
2282 except ExtractError as e:
2283 self._handle_nonfatal_error(e)
2284
2285 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
2286 filter=None):
2287 """Extract a member from the archive to the current working directory,
2288 using its full name. Its file information is extracted as accurately
2289 as possible. `member' may be a filename or a TarInfo object. You can
2290 specify a different directory using `path'. File attributes (owner,
2291 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2292 is True, only the numbers for user/group names are used and not
2293 the names.
2294
2295 The `filter` function will be called before extraction.
2296 It can return a changed TarInfo or None to skip the member.
2297 String names of common filters are accepted.
2298 """
2299 filter_function = self._get_filter_function(filter)
2300 tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2301 if tarinfo is not None:
2302 self._extract_one(tarinfo, path, set_attrs, numeric_owner)
2303
2304 def _get_extract_tarinfo(self, member, filter_function, path):
2305 """Get filtered TarInfo (or None) from member, which might be a str"""
2306 if isinstance(member, str):
2307 tarinfo = self.getmember(member)
2308 else:
2309 tarinfo = member
2310
2311 unfiltered = tarinfo
2312 try:
2313 tarinfo = filter_function(tarinfo, path)
2314 except (OSError, FilterError) as e:
2315 self._handle_fatal_error(e)
2316 except ExtractError as e:
2317 self._handle_nonfatal_error(e)
2318 if tarinfo is None:
2319 self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
2320 return None
2321 # Prepare the link target for makelink().
2322 if tarinfo.islnk():
2323 tarinfo = copy.copy(tarinfo)
2324 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2325 return tarinfo
2326
2327 def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
2328 """Extract from filtered tarinfo to disk"""
2329 self._check("r")
2330
2331 try:
2332 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2333 set_attrs=set_attrs,
2334 numeric_owner=numeric_owner)
2335 except OSError as e:
2336 self._handle_fatal_error(e)
2337 except ExtractError as e:
2338 self._handle_nonfatal_error(e)
2339
2340 def _handle_nonfatal_error(self, e):
2341 """Handle non-fatal error (ExtractError) according to errorlevel"""
2342 if self.errorlevel > 1:
2343 raise
2344 else:
2345 self._dbg(1, "tarfile: %s" % e)
2346
2347 def _handle_fatal_error(self, e):
2348 """Handle "fatal" error according to self.errorlevel"""
2349 if self.errorlevel > 0:
2350 raise
2351 elif isinstance(e, OSError):
2352 if e.filename is None:
2353 self._dbg(1, "tarfile: %s" % e.strerror)
2354 else:
2355 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2356 else:
2357 self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
2358
2359 def extractfile(self, member):
2360 """Extract a member from the archive as a file object. ``member`` may be
2361 a filename or a TarInfo object. If ``member`` is a regular file or
2362 a link, an io.BufferedReader object is returned. For all other
2363 existing members, None is returned. If ``member`` does not appear
2364 in the archive, KeyError is raised.
2365 """
2366 self._check("r")
2367
2368 if isinstance(member, str):
2369 tarinfo = self.getmember(member)
2370 else:
2371 tarinfo = member
2372
2373 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2374 # Members with unknown types are treated as regular files.
2375 return self.fileobject(self, tarinfo)
2376
2377 elif tarinfo.islnk() or tarinfo.issym():
2378 if isinstance(self.fileobj, _Stream):
2379 # A small but ugly workaround for the case that someone tries
2380 # to extract a (sym)link as a file-object from a non-seekable
2381 # stream of tar blocks.
2382 raise StreamError("cannot extract (sym)link as file object")
2383 else:
2384 # A (sym)link's file object is its target's file object.
2385 return self.extractfile(self._find_link_target(tarinfo))
2386 else:
2387 # If there's no data associated with the member (directory, chrdev,
2388 # blkdev, etc.), return None instead of a file object.
2389 return None
2390
2391 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2392 numeric_owner=False):
2393 """Extract the TarInfo object tarinfo to a physical
2394 file called targetpath.
2395 """
2396 # Fetch the TarInfo object for the given name
2397 # and build the destination pathname, replacing
2398 # forward slashes to platform specific separators.
2399 targetpath = targetpath.rstrip("/")
2400 targetpath = targetpath.replace("/", os.sep)
2401
2402 # Create all upper directories.
2403 upperdirs = os.path.dirname(targetpath)
2404 if upperdirs and not os.path.exists(upperdirs):
2405 # Create directories that are not part of the archive with
2406 # default permissions.
2407 os.makedirs(upperdirs)
2408
2409 if tarinfo.islnk() or tarinfo.issym():
2410 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2411 else:
2412 self._dbg(1, tarinfo.name)
2413
2414 if tarinfo.isreg():
2415 self.makefile(tarinfo, targetpath)
2416 elif tarinfo.isdir():
2417 self.makedir(tarinfo, targetpath)
2418 elif tarinfo.isfifo():
2419 self.makefifo(tarinfo, targetpath)
2420 elif tarinfo.ischr() or tarinfo.isblk():
2421 self.makedev(tarinfo, targetpath)
2422 elif tarinfo.islnk() or tarinfo.issym():
2423 self.makelink(tarinfo, targetpath)
2424 elif tarinfo.type not in SUPPORTED_TYPES:
2425 self.makeunknown(tarinfo, targetpath)
2426 else:
2427 self.makefile(tarinfo, targetpath)
2428
2429 if set_attrs:
2430 self.chown(tarinfo, targetpath, numeric_owner)
2431 if not tarinfo.issym():
2432 self.chmod(tarinfo, targetpath)
2433 self.utime(tarinfo, targetpath)
2434
2435 #--------------------------------------------------------------------------
2436 # Below are the different file methods. They are called via
2437 # _extract_member() when extract() is called. They can be replaced in a
2438 # subclass to implement other functionality.
2439
2440 def makedir(self, tarinfo, targetpath):
2441 """Make a directory called targetpath.
2442 """
2443 try:
2444 if tarinfo.mode is None:
2445 # Use the system's default mode
2446 os.mkdir(targetpath)
2447 else:
2448 # Use a safe mode for the directory, the real mode is set
2449 # later in _extract_member().
2450 os.mkdir(targetpath, 0o700)
2451 except FileExistsError:
2452 if not os.path.isdir(targetpath):
2453 raise
2454
2455 def makefile(self, tarinfo, targetpath):
2456 """Make a file called targetpath.
2457 """
2458 source = self.fileobj
2459 source.seek(tarinfo.offset_data)
2460 bufsize = self.copybufsize
2461 with bltn_open(targetpath, "wb") as target:
2462 if tarinfo.sparse is not None:
2463 for offset, size in tarinfo.sparse:
2464 target.seek(offset)
2465 copyfileobj(source, target, size, ReadError, bufsize)
2466 target.seek(tarinfo.size)
2467 target.truncate()
2468 else:
2469 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2470
2471 def makeunknown(self, tarinfo, targetpath):
2472 """Make a file from a TarInfo object with an unknown type
2473 at targetpath.
2474 """
2475 self.makefile(tarinfo, targetpath)
2476 self._dbg(1, "tarfile: Unknown file type %r, " \
2477 "extracted as regular file." % tarinfo.type)
2478
2479 def makefifo(self, tarinfo, targetpath):
2480 """Make a fifo called targetpath.
2481 """
2482 if hasattr(os, "mkfifo"):
2483 os.mkfifo(targetpath)
2484 else:
2485 raise ExtractError("fifo not supported by system")
2486
2487 def makedev(self, tarinfo, targetpath):
2488 """Make a character or block device called targetpath.
2489 """
2490 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2491 raise ExtractError("special devices not supported by system")
2492
2493 mode = tarinfo.mode
2494 if mode is None:
2495 # Use mknod's default
2496 mode = 0o600
2497 if tarinfo.isblk():
2498 mode |= stat.S_IFBLK
2499 else:
2500 mode |= stat.S_IFCHR
2501
2502 os.mknod(targetpath, mode,
2503 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2504
2505 def makelink(self, tarinfo, targetpath):
2506 """Make a (symbolic) link called targetpath. If it cannot be created
2507 (platform limitation), we try to make a copy of the referenced file
2508 instead of a link.
2509 """
2510 try:
2511 # For systems that support symbolic and hard links.
2512 if tarinfo.issym():
2513 if os.path.lexists(targetpath):
2514 # Avoid FileExistsError on following os.symlink.
2515 os.unlink(targetpath)
2516 os.symlink(tarinfo.linkname, targetpath)
2517 else:
2518 if os.path.exists(tarinfo._link_target):
2519 os.link(tarinfo._link_target, targetpath)
2520 else:
2521 self._extract_member(self._find_link_target(tarinfo),
2522 targetpath)
2523 except symlink_exception:
2524 try:
2525 self._extract_member(self._find_link_target(tarinfo),
2526 targetpath)
2527 except KeyError:
2528 raise ExtractError("unable to resolve link inside archive") from None
2529
2530 def chown(self, tarinfo, targetpath, numeric_owner):
2531 """Set owner of targetpath according to tarinfo. If numeric_owner
2532 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2533 is False, fall back to .gid/.uid when the search based on name
2534 fails.
2535 """
2536 if hasattr(os, "geteuid") and os.geteuid() == 0:
2537 # We have to be root to do so.
2538 g = tarinfo.gid
2539 u = tarinfo.uid
2540 if not numeric_owner:
2541 try:
2542 if grp and tarinfo.gname:
2543 g = grp.getgrnam(tarinfo.gname)[2]
2544 except KeyError:
2545 pass
2546 try:
2547 if pwd and tarinfo.uname:
2548 u = pwd.getpwnam(tarinfo.uname)[2]
2549 except KeyError:
2550 pass
2551 if g is None:
2552 g = -1
2553 if u is None:
2554 u = -1
2555 try:
2556 if tarinfo.issym() and hasattr(os, "lchown"):
2557 os.lchown(targetpath, u, g)
2558 else:
2559 os.chown(targetpath, u, g)
2560 except OSError as e:
2561 raise ExtractError("could not change owner") from e
2562
2563 def chmod(self, tarinfo, targetpath):
2564 """Set file permissions of targetpath according to tarinfo.
2565 """
2566 if tarinfo.mode is None:
2567 return
2568 try:
2569 os.chmod(targetpath, tarinfo.mode)
2570 except OSError as e:
2571 raise ExtractError("could not change mode") from e
2572
2573 def utime(self, tarinfo, targetpath):
2574 """Set modification time of targetpath according to tarinfo.
2575 """
2576 mtime = tarinfo.mtime
2577 if mtime is None:
2578 return
2579 if not hasattr(os, 'utime'):
2580 return
2581 try:
2582 os.utime(targetpath, (mtime, mtime))
2583 except OSError as e:
2584 raise ExtractError("could not change modification time") from e
2585
2586 #--------------------------------------------------------------------------
2587 def next(self):
2588 """Return the next member of the archive as a TarInfo object, when
2589 TarFile is opened for reading. Return None if there is no more
2590 available.
2591 """
2592 self._check("ra")
2593 if self.firstmember is not None:
2594 m = self.firstmember
2595 self.firstmember = None
2596 return m
2597
2598 # Advance the file pointer.
2599 if self.offset != self.fileobj.tell():
2600 if self.offset == 0:
2601 return None
2602 self.fileobj.seek(self.offset - 1)
2603 if not self.fileobj.read(1):
2604 raise ReadError("unexpected end of data")
2605
2606 # Read the next block.
2607 tarinfo = None
2608 while True:
2609 try:
2610 tarinfo = self.tarinfo.fromtarfile(self)
2611 except EOFHeaderError as e:
2612 if self.ignore_zeros:
2613 self._dbg(2, "0x%X: %s" % (self.offset, e))
2614 self.offset += BLOCKSIZE
2615 continue
2616 except InvalidHeaderError as e:
2617 if self.ignore_zeros:
2618 self._dbg(2, "0x%X: %s" % (self.offset, e))
2619 self.offset += BLOCKSIZE
2620 continue
2621 elif self.offset == 0:
2622 raise ReadError(str(e)) from None
2623 except EmptyHeaderError:
2624 if self.offset == 0:
2625 raise ReadError("empty file") from None
2626 except TruncatedHeaderError as e:
2627 if self.offset == 0:
2628 raise ReadError(str(e)) from None
2629 except SubsequentHeaderError as e:
2630 raise ReadError(str(e)) from None
2631 except Exception as e:
2632 try:
2633 import zlib
2634 if isinstance(e, zlib.error):
2635 raise ReadError(f'zlib error: {e}') from None
2636 else:
2637 raise e
2638 except ImportError:
2639 raise e
2640 break
2641
2642 if tarinfo is not None:
2643 self.members.append(tarinfo)
2644 else:
2645 self._loaded = True
2646
2647 return tarinfo
2648
2649 #--------------------------------------------------------------------------
2650 # Little helper methods:
2651
2652 def _getmember(self, name, tarinfo=None, normalize=False):
2653 """Find an archive member by name from bottom to top.
2654 If tarinfo is given, it is used as the starting point.
2655 """
2656 # Ensure that all members have been loaded.
2657 members = self.getmembers()
2658
2659 # Limit the member search list up to tarinfo.
2660 skipping = False
2661 if tarinfo is not None:
2662 try:
2663 index = members.index(tarinfo)
2664 except ValueError:
2665 # The given starting point might be a (modified) copy.
2666 # We'll later skip members until we find an equivalent.
2667 skipping = True
2668 else:
2669 # Happy fast path
2670 members = members[:index]
2671
2672 if normalize:
2673 name = os.path.normpath(name)
2674
2675 for member in reversed(members):
2676 if skipping:
2677 if tarinfo.offset == member.offset:
2678 skipping = False
2679 continue
2680 if normalize:
2681 member_name = os.path.normpath(member.name)
2682 else:
2683 member_name = member.name
2684
2685 if name == member_name:
2686 return member
2687
2688 if skipping:
2689 # Starting point was not found
2690 raise ValueError(tarinfo)
2691
2692 def _load(self):
2693 """Read through the entire archive file and look for readable
2694 members.
2695 """
2696 while self.next() is not None:
2697 pass
2698 self._loaded = True
2699
2700 def _check(self, mode=None):
2701 """Check if TarFile is still open, and if the operation's mode
2702 corresponds to TarFile's mode.
2703 """
2704 if self.closed:
2705 raise OSError("%s is closed" % self.__class__.__name__)
2706 if mode is not None and self.mode not in mode:
2707 raise OSError("bad operation for mode %r" % self.mode)
2708
2709 def _find_link_target(self, tarinfo):
2710 """Find the target member of a symlink or hardlink member in the
2711 archive.
2712 """
2713 if tarinfo.issym():
2714 # Always search the entire archive.
2715 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2716 limit = None
2717 else:
2718 # Search the archive before the link, because a hard link is
2719 # just a reference to an already archived file.
2720 linkname = tarinfo.linkname
2721 limit = tarinfo
2722
2723 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2724 if member is None:
2725 raise KeyError("linkname %r not found" % linkname)
2726 return member
2727
2728 def __iter__(self):
2729 """Provide an iterator object.
2730 """
2731 if self._loaded:
2732 yield from self.members
2733 return
2734
2735 # Yield items using TarFile's next() method.
2736 # When all members have been read, set TarFile as _loaded.
2737 index = 0
2738 # Fix for SF #1100429: Under rare circumstances it can
2739 # happen that getmembers() is called during iteration,
2740 # which will have already exhausted the next() method.
2741 if self.firstmember is not None:
2742 tarinfo = self.next()
2743 index += 1
2744 yield tarinfo
2745
2746 while True:
2747 if index < len(self.members):
2748 tarinfo = self.members[index]
2749 elif not self._loaded:
2750 tarinfo = self.next()
2751 if not tarinfo:
2752 self._loaded = True
2753 return
2754 else:
2755 return
2756 index += 1
2757 yield tarinfo
2758
2759 def _dbg(self, level, msg):
2760 """Write debugging output to sys.stderr.
2761 """
2762 if level <= self.debug:
2763 print(msg, file=sys.stderr)
2764
2765 def __enter__(self):
2766 self._check()
2767 return self
2768
2769 def __exit__(self, type, value, traceback):
2770 if type is None:
2771 self.close()
2772 else:
2773 # An exception occurred. We must not call close() because
2774 # it would try to write end-of-archive blocks and padding.
2775 if not self._extfileobj:
2776 self.fileobj.close()
2777 self.closed = True
2778
2779#--------------------
2780# exported functions
2781#--------------------
2782
2783def is_tarfile(name):
2784 """Return True if name points to a tar archive that we
2785 are able to handle, else return False.
2786
2787 'name' should be a string, file, or file-like object.
2788 """
2789 try:
2790 if hasattr(name, "read"):
2791 pos = name.tell()
2792 t = open(fileobj=name)
2793 name.seek(pos)
2794 else:
2795 t = open(name)
2796 t.close()
2797 return True
2798 except TarError:
2799 return False
2800
2801open = TarFile.open
2802
2803
2804def main():
2805 import argparse
2806
2807 description = 'A simple command-line interface for tarfile module.'
2808 parser = argparse.ArgumentParser(description=description)
2809 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2810 help='Verbose output')
2811 parser.add_argument('--filter', metavar='<filtername>',
2812 choices=_NAMED_FILTERS,
2813 help='Filter for extraction')
2814
2815 group = parser.add_mutually_exclusive_group(required=True)
2816 group.add_argument('-l', '--list', metavar='<tarfile>',
2817 help='Show listing of a tarfile')
2818 group.add_argument('-e', '--extract', nargs='+',
2819 metavar=('<tarfile>', '<output_dir>'),
2820 help='Extract tarfile into target dir')
2821 group.add_argument('-c', '--create', nargs='+',
2822 metavar=('<name>', '<file>'),
2823 help='Create tarfile from sources')
2824 group.add_argument('-t', '--test', metavar='<tarfile>',
2825 help='Test if a tarfile is valid')
2826
2827 args = parser.parse_args()
2828
2829 if args.filter and args.extract is None:
2830 parser.exit(1, '--filter is only valid for extraction\n')
2831
2832 if args.test is not None:
2833 src = args.test
2834 if is_tarfile(src):
2835 with open(src, 'r') as tar:
2836 tar.getmembers()
2837 print(tar.getmembers(), file=sys.stderr)
2838 if args.verbose:
2839 print('{!r} is a tar archive.'.format(src))
2840 else:
2841 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2842
2843 elif args.list is not None:
2844 src = args.list
2845 if is_tarfile(src):
2846 with TarFile.open(src, 'r:*') as tf:
2847 tf.list(verbose=args.verbose)
2848 else:
2849 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2850
2851 elif args.extract is not None:
2852 if len(args.extract) == 1:
2853 src = args.extract[0]
2854 curdir = os.curdir
2855 elif len(args.extract) == 2:
2856 src, curdir = args.extract
2857 else:
2858 parser.exit(1, parser.format_help())
2859
2860 if is_tarfile(src):
2861 with TarFile.open(src, 'r:*') as tf:
2862 tf.extractall(path=curdir, filter=args.filter)
2863 if args.verbose:
2864 if curdir == '.':
2865 msg = '{!r} file is extracted.'.format(src)
2866 else:
2867 msg = ('{!r} file is extracted '
2868 'into {!r} directory.').format(src, curdir)
2869 print(msg)
2870 else:
2871 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2872
2873 elif args.create is not None:
2874 tar_name = args.create.pop(0)
2875 _, ext = os.path.splitext(tar_name)
2876 compressions = {
2877 # gz
2878 '.gz': 'gz',
2879 '.tgz': 'gz',
2880 # xz
2881 '.xz': 'xz',
2882 '.txz': 'xz',
2883 # bz2
2884 '.bz2': 'bz2',
2885 '.tbz': 'bz2',
2886 '.tbz2': 'bz2',
2887 '.tb2': 'bz2',
2888 }
2889 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2890 tar_files = args.create
2891
2892 with TarFile.open(tar_name, tar_mode) as tf:
2893 for file_name in tar_files:
2894 tf.add(file_name)
2895
2896 if args.verbose:
2897 print('{!r} file created.'.format(tar_name))
2898
2899if __name__ == '__main__':
2900 main()