Coverage Report

Created: 2024-07-23 06:29

/src/libarchive/libarchive/archive_read_support_format_tar.c
Line
Count
Source (jump to first uncovered line)
1
/*-
2
 * Copyright (c) 2003-2023 Tim Kientzle
3
 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4
 * Copyright (c) 2016 Martin Matuska
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "archive_platform.h"
29
30
#ifdef HAVE_ERRNO_H
31
#include <errno.h>
32
#endif
33
#include <stddef.h>
34
#ifdef HAVE_STDLIB_H
35
#include <stdlib.h>
36
#endif
37
#ifdef HAVE_STRING_H
38
#include <string.h>
39
#endif
40
41
#include "archive.h"
42
#include "archive_acl_private.h" /* For ACL parsing routines. */
43
#include "archive_entry.h"
44
#include "archive_entry_locale.h"
45
#include "archive_private.h"
46
#include "archive_read_private.h"
47
48
0
#define tar_min(a,b) ((a) < (b) ? (a) : (b))
49
50
/*
51
 * Layout of POSIX 'ustar' tar header.
52
 */
53
struct archive_entry_header_ustar {
54
  char  name[100];
55
  char  mode[8];
56
  char  uid[8];
57
  char  gid[8];
58
  char  size[12];
59
  char  mtime[12];
60
  char  checksum[8];
61
  char  typeflag[1];
62
  char  linkname[100];  /* "old format" header ends here */
63
  char  magic[6]; /* For POSIX: "ustar\0" */
64
  char  version[2]; /* For POSIX: "00" */
65
  char  uname[32];
66
  char  gname[32];
67
  char  rdevmajor[8];
68
  char  rdevminor[8];
69
  char  prefix[155];
70
};
71
72
/*
73
 * Structure of GNU tar header
74
 */
75
struct gnu_sparse {
76
  char  offset[12];
77
  char  numbytes[12];
78
};
79
80
struct archive_entry_header_gnutar {
81
  char  name[100];
82
  char  mode[8];
83
  char  uid[8];
84
  char  gid[8];
85
  char  size[12];
86
  char  mtime[12];
87
  char  checksum[8];
88
  char  typeflag[1];
89
  char  linkname[100];
90
  char  magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
91
  char  uname[32];
92
  char  gname[32];
93
  char  rdevmajor[8];
94
  char  rdevminor[8];
95
  char  atime[12];
96
  char  ctime[12];
97
  char  offset[12];
98
  char  longnames[4];
99
  char  unused[1];
100
  struct gnu_sparse sparse[4];
101
  char  isextended[1];
102
  char  realsize[12];
103
  /*
104
   * Old GNU format doesn't use POSIX 'prefix' field; they use
105
   * the 'L' (longname) entry instead.
106
   */
107
};
108
109
/*
110
 * Data specific to this format.
111
 */
112
struct sparse_block {
113
  struct sparse_block *next;
114
  int64_t offset;
115
  int64_t remaining;
116
  int hole;
117
};
118
119
struct tar {
120
  struct archive_string  entry_pathname;
121
  /* For "GNU.sparse.name" and other similar path extensions. */
122
  struct archive_string  entry_pathname_override;
123
  struct archive_string  entry_uname;
124
  struct archive_string  entry_gname;
125
  struct archive_string  entry_linkpath;
126
  struct archive_string  longname;
127
  struct archive_string  pax_global;
128
  struct archive_string  line;
129
  int      pax_hdrcharset_utf8;
130
  int64_t      entry_bytes_remaining;
131
  int64_t      entry_offset;
132
  int64_t      entry_padding;
133
  int64_t      entry_bytes_unconsumed;
134
  int64_t      realsize;
135
  struct sparse_block *sparse_list;
136
  struct sparse_block *sparse_last;
137
  int64_t      sparse_offset;
138
  int64_t      sparse_numbytes;
139
  int      sparse_gnu_major;
140
  int      sparse_gnu_minor;
141
  char       sparse_gnu_attributes_seen;
142
  char       filetype;
143
144
  struct archive_string  localname;
145
  struct archive_string_conv *opt_sconv;
146
  struct archive_string_conv *sconv;
147
  struct archive_string_conv *sconv_acl;
148
  struct archive_string_conv *sconv_default;
149
  int      init_default_conversion;
150
  int      compat_2x;
151
  int      process_mac_extensions;
152
  int      read_concatenated_archives;
153
  int      realsize_override;
154
};
155
156
static int  archive_block_is_null(const char *p);
157
static char *base64_decode(const char *, size_t, size_t *);
158
static int  gnu_add_sparse_entry(struct archive_read *, struct tar *,
159
        int64_t offset, int64_t remaining);
160
161
static void gnu_clear_sparse_list(struct tar *);
162
static int  gnu_sparse_old_read(struct archive_read *, struct tar *,
163
        const struct archive_entry_header_gnutar *header, size_t *);
164
static int  gnu_sparse_old_parse(struct archive_read *, struct tar *,
165
        const struct gnu_sparse *sparse, int length);
166
static int  gnu_sparse_01_parse(struct archive_read *, struct tar *,
167
        const char *, size_t);
168
static ssize_t  gnu_sparse_10_read(struct archive_read *, struct tar *,
169
        size_t *);
170
static int  header_Solaris_ACL(struct archive_read *,  struct tar *,
171
        struct archive_entry *, const void *, size_t *);
172
static int  header_common(struct archive_read *,  struct tar *,
173
        struct archive_entry *, const void *);
174
static int  header_old_tar(struct archive_read *, struct tar *,
175
        struct archive_entry *, const void *);
176
static int  header_pax_extension(struct archive_read *, struct tar *,
177
        struct archive_entry *, const void *, size_t *);
178
static int  header_pax_global(struct archive_read *, struct tar *,
179
        struct archive_entry *, const void *h, size_t *);
180
static int  header_gnu_longlink(struct archive_read *, struct tar *,
181
        struct archive_entry *, const void *h, size_t *);
182
static int  header_gnu_longname(struct archive_read *, struct tar *,
183
        struct archive_entry *, const void *h, size_t *);
184
static int  is_mac_metadata_entry(struct archive_entry *entry);
185
static int  read_mac_metadata_blob(struct archive_read *,
186
        struct archive_entry *, size_t *);
187
static int  header_volume(struct archive_read *, struct tar *,
188
        struct archive_entry *, const void *h, size_t *);
189
static int  header_ustar(struct archive_read *, struct tar *,
190
        struct archive_entry *, const void *h);
191
static int  header_gnutar(struct archive_read *, struct tar *,
192
        struct archive_entry *, const void *h, size_t *);
193
static int  archive_read_format_tar_bid(struct archive_read *, int);
194
static int  archive_read_format_tar_options(struct archive_read *,
195
        const char *, const char *);
196
static int  archive_read_format_tar_cleanup(struct archive_read *);
197
static int  archive_read_format_tar_read_data(struct archive_read *a,
198
        const void **buff, size_t *size, int64_t *offset);
199
static int  archive_read_format_tar_skip(struct archive_read *a);
200
static int  archive_read_format_tar_read_header(struct archive_read *,
201
        struct archive_entry *);
202
static int  checksum(struct archive_read *, const void *);
203
static int  pax_attribute(struct archive_read *, struct tar *,
204
        struct archive_entry *, const char *key, size_t key_length,
205
        size_t value_length, size_t *unconsumed);
206
static int  pax_attribute_LIBARCHIVE_xattr(struct archive_entry *,
207
        const char *, size_t, const char *, size_t);
208
static int  pax_attribute_SCHILY_acl(struct archive_read *, struct tar *,
209
        struct archive_entry *, size_t, int);
210
static int  pax_attribute_SUN_holesdata(struct archive_read *, struct tar *,
211
        struct archive_entry *, const char *, size_t);
212
static void pax_time(const char *, size_t, int64_t *sec, long *nanos);
213
static ssize_t  readline(struct archive_read *, struct tar *, const char **,
214
        ssize_t limit, size_t *);
215
static int  read_body_to_string(struct archive_read *, struct tar *,
216
        struct archive_string *, const void *h, size_t *);
217
static int  read_bytes_to_string(struct archive_read *,
218
        struct archive_string *, size_t, size_t *);
219
static int64_t  tar_atol(const char *, size_t);
220
static int64_t  tar_atol10(const char *, size_t);
221
static int64_t  tar_atol256(const char *, size_t);
222
static int64_t  tar_atol8(const char *, size_t);
223
static int  tar_read_header(struct archive_read *, struct tar *,
224
        struct archive_entry *, size_t *);
225
static int  tohex(int c);
226
static char *url_decode(const char *, size_t);
227
static void tar_flush_unconsumed(struct archive_read *, size_t *);
228
229
/* Sanity limits:  These numbers should be low enough to
230
 * prevent a maliciously-crafted archive from forcing us to
231
 * allocate extreme amounts of memory.  But of course, they
232
 * need to be high enough for any correct value.  These
233
 * will likely need some adjustment as we get more experience. */
234
static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */
235
static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */
236
static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */
237
static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */
238
static const size_t fflags_limit = 512; /* Longest fflags */
239
static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */
240
static const int64_t entry_limit = 0xfffffffffffffffLL; /* 2^60 bytes = 1 ExbiByte */
241
242
int
243
archive_read_support_format_gnutar(struct archive *a)
244
238
{
245
238
  archive_check_magic(a, ARCHIVE_READ_MAGIC,
246
238
      ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
247
238
  return (archive_read_support_format_tar(a));
248
238
}
249
250
251
int
252
archive_read_support_format_tar(struct archive *_a)
253
476
{
254
476
  struct archive_read *a = (struct archive_read *)_a;
255
476
  struct tar *tar;
256
476
  int r;
257
258
476
  archive_check_magic(_a, ARCHIVE_READ_MAGIC,
259
476
      ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
260
261
476
  tar = (struct tar *)calloc(1, sizeof(*tar));
262
476
  if (tar == NULL) {
263
0
    archive_set_error(&a->archive, ENOMEM,
264
0
        "Can't allocate tar data");
265
0
    return (ARCHIVE_FATAL);
266
0
  }
267
#ifdef HAVE_COPYFILE_H
268
  /* Set this by default on Mac OS. */
269
  tar->process_mac_extensions = 1;
270
#endif
271
272
476
  r = __archive_read_register_format(a, tar, "tar",
273
476
      archive_read_format_tar_bid,
274
476
      archive_read_format_tar_options,
275
476
      archive_read_format_tar_read_header,
276
476
      archive_read_format_tar_read_data,
277
476
      archive_read_format_tar_skip,
278
476
      NULL,
279
476
      archive_read_format_tar_cleanup,
280
476
      NULL,
281
476
      NULL);
282
283
476
  if (r != ARCHIVE_OK)
284
238
    free(tar);
285
476
  return (ARCHIVE_OK);
286
476
}
287
288
static int
289
archive_read_format_tar_cleanup(struct archive_read *a)
290
238
{
291
238
  struct tar *tar;
292
293
238
  tar = (struct tar *)(a->format->data);
294
238
  gnu_clear_sparse_list(tar);
295
238
  archive_string_free(&tar->entry_pathname);
296
238
  archive_string_free(&tar->entry_pathname_override);
297
238
  archive_string_free(&tar->entry_uname);
298
238
  archive_string_free(&tar->entry_gname);
299
238
  archive_string_free(&tar->line);
300
238
  archive_string_free(&tar->pax_global);
301
238
  archive_string_free(&tar->longname);
302
238
  archive_string_free(&tar->localname);
303
238
  free(tar);
304
238
  (a->format->data) = NULL;
305
238
  return (ARCHIVE_OK);
306
238
}
307
308
/*
309
 * Validate number field
310
 *
311
 * This has to be pretty lenient in order to accommodate the enormous
312
 * variety of tar writers in the world:
313
 *  = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading
314
 *    zeros and allows fields to be terminated with space or null characters
315
 *  = Many writers use different termination (in particular, libarchive
316
 *    omits terminator bytes to squeeze one or two more digits)
317
 *  = Many writers pad with space and omit leading zeros
318
 *  = GNU tar and star write base-256 values if numbers are too
319
 *    big to be represented in octal
320
 *
321
 *  Examples of specific tar headers that we should support:
322
 *  = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two
323
 *    null bytes, pads size with spaces and other numeric fields with zeroes
324
 *  = plexus-archiver prior to 2.6.3 (before switching to commons-compress)
325
 *    may have uid and gid fields filled with spaces without any octal digits
326
 *    at all and pads all numeric fields with spaces
327
 *
328
 * This should tolerate all variants in use.  It will reject a field
329
 * where the writer just left garbage after a trailing NUL.
330
 */
331
static int
332
validate_number_field(const char* p_field, size_t i_size)
333
35
{
334
35
  unsigned char marker = (unsigned char)p_field[0];
335
35
  if (marker == 128 || marker == 255 || marker == 0) {
336
    /* Base-256 marker, there's nothing we can check. */
337
30
    return 1;
338
30
  } else {
339
    /* Must be octal */
340
5
    size_t i = 0;
341
    /* Skip any leading spaces */
342
5
    while (i < i_size && p_field[i] == ' ') {
343
0
      ++i;
344
0
    }
345
    /* Skip octal digits. */
346
48
    while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') {
347
43
      ++i;
348
43
    }
349
    /* Any remaining characters must be space or NUL padding. */
350
10
    while (i < i_size) {
351
5
      if (p_field[i] != ' ' && p_field[i] != 0) {
352
0
        return 0;
353
0
      }
354
5
      ++i;
355
5
    }
356
5
    return 1;
357
5
  }
358
35
}
359
360
static int
361
archive_read_format_tar_bid(struct archive_read *a, int best_bid)
362
236
{
363
236
  int bid;
364
236
  const char *h;
365
236
  const struct archive_entry_header_ustar *header;
366
367
236
  (void)best_bid; /* UNUSED */
368
369
236
  bid = 0;
370
371
  /* Now let's look at the actual header and see if it matches. */
372
236
  h = __archive_read_ahead(a, 512, NULL);
373
236
  if (h == NULL)
374
5
    return (-1);
375
376
  /* If it's an end-of-archive mark, we can handle it. */
377
231
  if (h[0] == 0 && archive_block_is_null(h)) {
378
    /*
379
     * Usually, I bid the number of bits verified, but
380
     * in this case, 4096 seems excessive so I picked 10 as
381
     * an arbitrary but reasonable-seeming value.
382
     */
383
20
    return (10);
384
20
  }
385
386
  /* If it's not an end-of-archive mark, it must have a valid checksum.*/
387
211
  if (!checksum(a, h))
388
206
    return (0);
389
5
  bid += 48;  /* Checksum is usually 6 octal digits. */
390
391
5
  header = (const struct archive_entry_header_ustar *)h;
392
393
  /* Recognize POSIX formats. */
394
5
  if ((memcmp(header->magic, "ustar\0", 6) == 0)
395
5
      && (memcmp(header->version, "00", 2) == 0))
396
0
    bid += 56;
397
398
  /* Recognize GNU tar format. */
399
5
  if ((memcmp(header->magic, "ustar ", 6) == 0)
400
5
      && (memcmp(header->version, " \0", 2) == 0))
401
1
    bid += 56;
402
403
  /* Type flag must be null, digit or A-Z, a-z. */
404
5
  if (header->typeflag[0] != 0 &&
405
5
      !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
406
5
      !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
407
5
      !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
408
0
    return (0);
409
5
  bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
410
411
  /*
412
   * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields.
413
   */
414
5
  if (validate_number_field(header->mode, sizeof(header->mode)) == 0
415
5
      || validate_number_field(header->uid, sizeof(header->uid)) == 0
416
5
      || validate_number_field(header->gid, sizeof(header->gid)) == 0
417
5
      || validate_number_field(header->mtime, sizeof(header->mtime)) == 0
418
5
      || validate_number_field(header->size, sizeof(header->size)) == 0
419
5
      || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0
420
5
      || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0) {
421
0
    bid = 0;
422
0
  }
423
424
5
  return (bid);
425
5
}
426
427
static int
428
archive_read_format_tar_options(struct archive_read *a,
429
    const char *key, const char *val)
430
476
{
431
476
  struct tar *tar;
432
476
  int ret = ARCHIVE_FAILED;
433
434
476
  tar = (struct tar *)(a->format->data);
435
476
  if (strcmp(key, "compat-2x")  == 0) {
436
    /* Handle UTF-8 filenames as libarchive 2.x */
437
0
    tar->compat_2x = (val != NULL && val[0] != 0);
438
0
    tar->init_default_conversion = tar->compat_2x;
439
0
    return (ARCHIVE_OK);
440
476
  } else if (strcmp(key, "hdrcharset")  == 0) {
441
0
    if (val == NULL || val[0] == 0)
442
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
443
0
          "tar: hdrcharset option needs a character-set name");
444
0
    else {
445
0
      tar->opt_sconv =
446
0
          archive_string_conversion_from_charset(
447
0
        &a->archive, val, 0);
448
0
      if (tar->opt_sconv != NULL)
449
0
        ret = ARCHIVE_OK;
450
0
      else
451
0
        ret = ARCHIVE_FATAL;
452
0
    }
453
0
    return (ret);
454
476
  } else if (strcmp(key, "mac-ext") == 0) {
455
238
    tar->process_mac_extensions = (val != NULL && val[0] != 0);
456
238
    return (ARCHIVE_OK);
457
238
  } else if (strcmp(key, "read_concatenated_archives") == 0) {
458
238
    tar->read_concatenated_archives = (val != NULL && val[0] != 0);
459
238
    return (ARCHIVE_OK);
460
238
  }
461
462
  /* Note: The "warn" return is just to inform the options
463
   * supervisor that we didn't handle it.  It will generate
464
   * a suitable error if no one used this option. */
465
0
  return (ARCHIVE_WARN);
466
476
}
467
468
/* utility function- this exists to centralize the logic of tracking
469
 * how much unconsumed data we have floating around, and to consume
470
 * anything outstanding since we're going to do read_aheads
471
 */
472
static void
473
tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
474
888k
{
475
888k
  if (*unconsumed) {
476
/*
477
    void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
478
     * this block of code is to poison claimed unconsumed space, ensuring
479
     * things break if it is in use still.
480
     * currently it WILL break things, so enable it only for debugging this issue
481
    if (data) {
482
      memset(data, 0xff, *unconsumed);
483
    }
484
*/
485
307k
    __archive_read_consume(a, *unconsumed);
486
307k
    *unconsumed = 0;
487
307k
  }
488
888k
}
489
490
/*
491
 * The function invoked by archive_read_next_header().  This
492
 * just sets up a few things and then calls the internal
493
 * tar_read_header() function below.
494
 */
495
static int
496
archive_read_format_tar_read_header(struct archive_read *a,
497
    struct archive_entry *entry)
498
290k
{
499
  /*
500
   * When converting tar archives to cpio archives, it is
501
   * essential that each distinct file have a distinct inode
502
   * number.  To simplify this, we keep a static count here to
503
   * assign fake dev/inode numbers to each tar entry.  Note that
504
   * pax format archives may overwrite this with something more
505
   * useful.
506
   *
507
   * Ideally, we would track every file read from the archive so
508
   * that we could assign the same dev/ino pair to hardlinks,
509
   * but the memory required to store a complete lookup table is
510
   * probably not worthwhile just to support the relatively
511
   * obscure tar->cpio conversion case.
512
   */
513
  /* TODO: Move this into `struct tar` to avoid conflicts
514
   * when reading multiple archives */
515
290k
  static int default_inode;
516
290k
  static int default_dev;
517
290k
  struct tar *tar;
518
290k
  const char *p;
519
290k
  const wchar_t *wp;
520
290k
  int r;
521
290k
  size_t l, unconsumed = 0;
522
523
  /* Assign default device/inode values. */
524
290k
  archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
525
290k
  archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
526
  /* Limit generated st_ino number to 16 bits. */
527
290k
  if (default_inode >= 0xffff) {
528
4
    ++default_dev;
529
4
    default_inode = 0;
530
4
  }
531
532
290k
  tar = (struct tar *)(a->format->data);
533
290k
  tar->entry_offset = 0;
534
290k
  gnu_clear_sparse_list(tar);
535
290k
  tar->realsize = -1; /* Mark this as "unset" */
536
290k
  tar->realsize_override = 0;
537
538
  /* Setup default string conversion. */
539
290k
  tar->sconv = tar->opt_sconv;
540
290k
  if (tar->sconv == NULL) {
541
290k
    if (!tar->init_default_conversion) {
542
10
      tar->sconv_default =
543
10
          archive_string_default_conversion_for_read(&(a->archive));
544
10
      tar->init_default_conversion = 1;
545
10
    }
546
290k
    tar->sconv = tar->sconv_default;
547
290k
  }
548
549
290k
  r = tar_read_header(a, tar, entry, &unconsumed);
550
551
290k
  tar_flush_unconsumed(a, &unconsumed);
552
553
  /*
554
   * "non-sparse" files are really just sparse files with
555
   * a single block.
556
   */
557
290k
  if (tar->sparse_list == NULL) {
558
290k
    if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
559
290k
        != ARCHIVE_OK)
560
0
      return (ARCHIVE_FATAL);
561
290k
  } else {
562
0
    struct sparse_block *sb;
563
564
0
    for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
565
0
      if (!sb->hole)
566
0
        archive_entry_sparse_add_entry(entry,
567
0
            sb->offset, sb->remaining);
568
0
    }
569
0
  }
570
571
290k
  if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
572
    /*
573
     * "Regular" entry with trailing '/' is really
574
     * directory: This is needed for certain old tar
575
     * variants and even for some broken newer ones.
576
     */
577
5
    if ((wp = archive_entry_pathname_w(entry)) != NULL) {
578
5
      l = wcslen(wp);
579
5
      if (l > 0 && wp[l - 1] == L'/') {
580
0
        archive_entry_set_filetype(entry, AE_IFDIR);
581
0
        tar->entry_bytes_remaining = 0;
582
0
        tar->entry_padding = 0;
583
0
      }
584
5
    } else if ((p = archive_entry_pathname(entry)) != NULL) {
585
0
      l = strlen(p);
586
0
      if (l > 0 && p[l - 1] == '/') {
587
0
        archive_entry_set_filetype(entry, AE_IFDIR);
588
0
        tar->entry_bytes_remaining = 0;
589
0
        tar->entry_padding = 0;
590
0
      }
591
0
    }
592
5
  }
593
290k
  return (r);
594
290k
}
595
596
static int
597
archive_read_format_tar_read_data(struct archive_read *a,
598
    const void **buff, size_t *size, int64_t *offset)
599
7
{
600
7
  ssize_t bytes_read;
601
7
  struct tar *tar;
602
7
  struct sparse_block *p;
603
604
7
  tar = (struct tar *)(a->format->data);
605
606
7
  for (;;) {
607
    /* Remove exhausted entries from sparse list. */
608
12
    while (tar->sparse_list != NULL &&
609
12
        tar->sparse_list->remaining == 0) {
610
5
      p = tar->sparse_list;
611
5
      tar->sparse_list = p->next;
612
5
      free(p);
613
5
    }
614
615
7
    if (tar->entry_bytes_unconsumed) {
616
1
      __archive_read_consume(a, tar->entry_bytes_unconsumed);
617
1
      tar->entry_bytes_unconsumed = 0;
618
1
    }
619
620
    /* If we're at end of file, return EOF. */
621
7
    if (tar->sparse_list == NULL ||
622
7
        tar->entry_bytes_remaining == 0) {
623
6
      if (__archive_read_consume(a, tar->entry_padding) < 0)
624
0
        return (ARCHIVE_FATAL);
625
6
      tar->entry_padding = 0;
626
6
      *buff = NULL;
627
6
      *size = 0;
628
6
      *offset = tar->realsize;
629
6
      return (ARCHIVE_EOF);
630
6
    }
631
632
1
    *buff = __archive_read_ahead(a, 1, &bytes_read);
633
1
    if (bytes_read < 0)
634
0
      return (ARCHIVE_FATAL);
635
1
    if (*buff == NULL) {
636
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
637
0
          "Truncated tar archive"
638
0
          " detected while reading data");
639
0
      return (ARCHIVE_FATAL);
640
0
    }
641
1
    if (bytes_read > tar->entry_bytes_remaining)
642
1
      bytes_read = (ssize_t)tar->entry_bytes_remaining;
643
    /* Don't read more than is available in the
644
     * current sparse block. */
645
1
    if (tar->sparse_list->remaining < bytes_read)
646
0
      bytes_read = (ssize_t)tar->sparse_list->remaining;
647
1
    *size = bytes_read;
648
1
    *offset = tar->sparse_list->offset;
649
1
    tar->sparse_list->remaining -= bytes_read;
650
1
    tar->sparse_list->offset += bytes_read;
651
1
    tar->entry_bytes_remaining -= bytes_read;
652
1
    tar->entry_bytes_unconsumed = bytes_read;
653
654
1
    if (!tar->sparse_list->hole)
655
1
      return (ARCHIVE_OK);
656
    /* Current is hole data and skip this. */
657
1
  }
658
7
}
659
660
static int
661
archive_read_format_tar_skip(struct archive_read *a)
662
5
{
663
5
  int64_t bytes_skipped;
664
5
  int64_t request;
665
5
  struct sparse_block *p;
666
5
  struct tar* tar;
667
668
5
  tar = (struct tar *)(a->format->data);
669
670
  /* Do not consume the hole of a sparse file. */
671
5
  request = 0;
672
5
  for (p = tar->sparse_list; p != NULL; p = p->next) {
673
0
    if (!p->hole) {
674
0
      if (p->remaining >= INT64_MAX - request) {
675
0
        return ARCHIVE_FATAL;
676
0
      }
677
0
      request += p->remaining;
678
0
    }
679
0
  }
680
5
  if (request > tar->entry_bytes_remaining)
681
0
    request = tar->entry_bytes_remaining;
682
5
  request += tar->entry_padding + tar->entry_bytes_unconsumed;
683
684
5
  bytes_skipped = __archive_read_consume(a, request);
685
5
  if (bytes_skipped < 0)
686
0
    return (ARCHIVE_FATAL);
687
688
5
  tar->entry_bytes_remaining = 0;
689
5
  tar->entry_bytes_unconsumed = 0;
690
5
  tar->entry_padding = 0;
691
692
  /* Free the sparse list. */
693
5
  gnu_clear_sparse_list(tar);
694
695
5
  return (ARCHIVE_OK);
696
5
}
697
698
/*
699
 * This function reads and interprets all of the headers associated
700
 * with a single entry.
701
 */
702
static int
703
tar_read_header(struct archive_read *a, struct tar *tar,
704
    struct archive_entry *entry, size_t *unconsumed)
705
290k
{
706
290k
  ssize_t bytes;
707
290k
  int err = ARCHIVE_OK, err2;
708
290k
  int eof_fatal = 0; /* EOF is okay at some points... */
709
290k
  const char *h;
710
290k
  const struct archive_entry_header_ustar *header;
711
290k
  const struct archive_entry_header_gnutar *gnuheader;
712
713
  /* Bitmask of what header types we've seen. */
714
290k
  int32_t seen_headers = 0;
715
290k
  static const int32_t seen_A_header = 1;
716
290k
  static const int32_t seen_g_header = 2;
717
290k
  static const int32_t seen_K_header = 4;
718
290k
  static const int32_t seen_L_header = 8;
719
290k
  static const int32_t seen_V_header = 16;
720
290k
  static const int32_t seen_x_header = 32; /* Also X */
721
290k
  static const int32_t seen_mac_metadata = 512;
722
723
290k
  tar->pax_hdrcharset_utf8 = 1;
724
290k
  tar->sparse_gnu_attributes_seen = 0;
725
290k
  archive_string_empty(&(tar->entry_gname));
726
290k
  archive_string_empty(&(tar->entry_pathname));
727
290k
  archive_string_empty(&(tar->entry_pathname_override));
728
290k
  archive_string_empty(&(tar->entry_uname));
729
730
  /* Ensure format is set. */
731
290k
  if (a->archive.archive_format_name == NULL) {
732
10
    a->archive.archive_format = ARCHIVE_FORMAT_TAR;
733
10
    a->archive.archive_format_name = "tar";
734
10
  }
735
736
  /*
737
   * TODO: Write global/default pax options into
738
   * 'entry' struct here before overwriting with
739
   * file-specific options.
740
   */
741
742
  /* Loop over all the headers needed for the next entry */
743
290k
  for (;;) {
744
745
    /* Find the next valid header record. */
746
307k
    while (1) {
747
307k
      tar_flush_unconsumed(a, unconsumed);
748
749
      /* Read 512-byte header record */
750
307k
      h = __archive_read_ahead(a, 512, &bytes);
751
307k
      if (bytes < 0)
752
1
        return ((int)bytes);
753
307k
      if (bytes == 0) { /* EOF at a block boundary. */
754
4
        if (eof_fatal) {
755
          /* We've read a special header already;
756
           * if there's no regular header, then this is
757
           * a premature EOF. */
758
0
          archive_set_error(&a->archive, EINVAL,
759
0
                "Damaged tar archive");
760
0
          return (ARCHIVE_FATAL);
761
4
        } else {
762
4
          return (ARCHIVE_EOF);
763
4
        }
764
4
      }
765
307k
      if (bytes < 512) {  /* Short block at EOF; this is bad. */
766
5
        archive_set_error(&a->archive,
767
5
            ARCHIVE_ERRNO_FILE_FORMAT,
768
5
            "Truncated tar archive"
769
5
            " detected while reading next heaader");
770
5
        return (ARCHIVE_FATAL);
771
5
      }
772
307k
      *unconsumed += 512;
773
774
307k
      if (h[0] == 0 && archive_block_is_null(h)) {
775
        /* We found a NULL block which indicates end-of-archive */
776
777
16.1k
        if (tar->read_concatenated_archives) {
778
          /* We're ignoring NULL blocks, so keep going. */
779
16.1k
          continue;
780
16.1k
        }
781
782
        /* Try to consume a second all-null record, as well. */
783
        /* If we can't, that's okay. */
784
0
        tar_flush_unconsumed(a, unconsumed);
785
0
        h = __archive_read_ahead(a, 512, NULL);
786
0
        if (h != NULL && h[0] == 0 && archive_block_is_null(h))
787
0
            __archive_read_consume(a, 512);
788
789
0
        archive_clear_error(&a->archive);
790
0
        return (ARCHIVE_EOF);
791
16.1k
      }
792
793
      /* This is NOT a null block, so it must be a valid header. */
794
290k
      if (!checksum(a, h)) {
795
290k
        tar_flush_unconsumed(a, unconsumed);
796
290k
        archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
797
        /* If we've read some critical information (pax headers, etc)
798
         * and _then_ see a bad header, we can't really recover. */
799
290k
        if (eof_fatal) {
800
0
          return (ARCHIVE_FATAL);
801
290k
        } else {
802
290k
          return (ARCHIVE_RETRY);
803
290k
        }
804
290k
      }
805
5
      break;
806
290k
    }
807
808
    /* Determine the format variant. */
809
5
    header = (const struct archive_entry_header_ustar *)h;
810
5
    switch(header->typeflag[0]) {
811
0
    case 'A': /* Solaris tar ACL */
812
0
      if (seen_headers & seen_A_header) {
813
0
        return (ARCHIVE_FATAL);
814
0
      }
815
0
      seen_headers |= seen_A_header;
816
0
      a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
817
0
      a->archive.archive_format_name = "Solaris tar";
818
0
      err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed);
819
0
      break;
820
0
    case 'g': /* POSIX-standard 'g' header. */
821
0
      if (seen_headers & seen_g_header) {
822
0
        return (ARCHIVE_FATAL);
823
0
      }
824
0
      seen_headers |= seen_g_header;
825
0
      a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
826
0
      a->archive.archive_format_name = "POSIX pax interchange format";
827
0
      err2 = header_pax_global(a, tar, entry, h, unconsumed);
828
0
      break;
829
0
    case 'K': /* Long link name (GNU tar, others) */
830
0
      if (seen_headers & seen_K_header) {
831
0
        return (ARCHIVE_FATAL);
832
0
      }
833
0
      seen_headers |= seen_K_header;
834
0
      err2 = header_gnu_longlink(a, tar, entry, h, unconsumed);
835
0
      break;
836
0
    case 'L': /* Long filename (GNU tar, others) */
837
0
      if (seen_headers & seen_L_header) {
838
0
        return (ARCHIVE_FATAL);
839
0
      }
840
0
      seen_headers |= seen_L_header;
841
0
      err2 = header_gnu_longname(a, tar, entry, h, unconsumed);
842
0
      break;
843
0
    case 'V': /* GNU volume header */
844
0
      if (seen_headers & seen_V_header) {
845
0
        return (ARCHIVE_FATAL);
846
0
      }
847
0
      seen_headers |= seen_V_header;
848
0
      err2 = header_volume(a, tar, entry, h, unconsumed);
849
0
      break;
850
0
    case 'X': /* Used by SUN tar; same as 'x'. */
851
0
      if (seen_headers & seen_x_header) {
852
0
        return (ARCHIVE_FATAL);
853
0
      }
854
0
      seen_headers |= seen_x_header;
855
0
      a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
856
0
      a->archive.archive_format_name =
857
0
        "POSIX pax interchange format (Sun variant)";
858
0
      err2 = header_pax_extension(a, tar, entry, h, unconsumed);
859
0
      break;
860
0
    case 'x': /* POSIX-standard 'x' header. */
861
0
      if (seen_headers & seen_x_header) {
862
0
        return (ARCHIVE_FATAL);
863
0
      }
864
0
      seen_headers |= seen_x_header;
865
0
      a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
866
0
      a->archive.archive_format_name = "POSIX pax interchange format";
867
0
      err2 = header_pax_extension(a, tar, entry, h, unconsumed);
868
0
      break;
869
5
    default: /* Regular header: Legacy tar, GNU tar, or ustar */
870
5
      gnuheader = (const struct archive_entry_header_gnutar *)h;
871
5
      if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
872
1
        a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
873
1
        a->archive.archive_format_name = "GNU tar format";
874
1
        err2 = header_gnutar(a, tar, entry, h, unconsumed);
875
4
      } else if (memcmp(header->magic, "ustar", 5) == 0) {
876
0
        if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
877
0
          a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
878
0
          a->archive.archive_format_name = "POSIX ustar format";
879
0
        }
880
0
        err2 = header_ustar(a, tar, entry, h);
881
4
      } else {
882
4
        a->archive.archive_format = ARCHIVE_FORMAT_TAR;
883
4
        a->archive.archive_format_name = "tar (non-POSIX)";
884
4
        err2 = header_old_tar(a, tar, entry, h);
885
4
      }
886
5
      err = err_combine(err, err2);
887
      /* We return warnings or success as-is.  Anything else is fatal. */
888
5
      if (err < ARCHIVE_WARN) {
889
0
        return (ARCHIVE_FATAL);
890
0
      }
891
      /* Filename of the form `._filename` is an AppleDouble
892
       * extension entry.  The body is the macOS metadata blob;
893
       * this is followed by another entry with the actual
894
       * regular file data.
895
       * This design has two drawbacks:
896
       * = it's brittle; you might just have a file with such a name
897
       * = it duplicates any long pathname extensions
898
       *
899
       * TODO: This probably shouldn't be here at all.  Consider
900
       * just returning the contents as a regular entry here and
901
       * then dealing with it when we write data to disk.
902
       */
903
5
      if (tar->process_mac_extensions
904
5
          && ((seen_headers & seen_mac_metadata) == 0)
905
5
          && is_mac_metadata_entry(entry)) {
906
0
        err2 = read_mac_metadata_blob(a, entry, unconsumed);
907
0
        if (err2 < ARCHIVE_WARN) {
908
0
          return (ARCHIVE_FATAL);
909
0
        }
910
0
        err = err_combine(err, err2);
911
        /* Note: Other headers can appear again. */
912
0
        seen_headers = seen_mac_metadata;
913
0
        break;
914
0
      }
915
916
      /* Reconcile GNU sparse attributes */
917
5
      if (tar->sparse_gnu_attributes_seen) {
918
        /* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */
919
0
        if (tar->filetype != 'S' && tar->filetype != '0') {
920
0
          archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
921
0
                "Non-regular file cannot be sparse");
922
0
          return (ARCHIVE_WARN);
923
0
        } else if (tar->sparse_gnu_major == 0 &&
924
0
            tar->sparse_gnu_minor == 0) {
925
          /* Sparse map already parsed from 'x' header */
926
0
        } else if (tar->sparse_gnu_major == 0 &&
927
0
            tar->sparse_gnu_minor == 1) {
928
          /* Sparse map already parsed from 'x' header */
929
0
        } else if (tar->sparse_gnu_major == 1 &&
930
0
            tar->sparse_gnu_minor == 0) {
931
          /* Sparse map is prepended to file contents */
932
0
          ssize_t bytes_read;
933
0
          bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
934
0
          if (bytes_read < 0)
935
0
            return ((int)bytes_read);
936
0
          tar->entry_bytes_remaining -= bytes_read;
937
0
        } else {
938
0
          archive_set_error(&a->archive,
939
0
                ARCHIVE_ERRNO_MISC,
940
0
                "Unrecognized GNU sparse file format");
941
0
          return (ARCHIVE_WARN);
942
0
        }
943
0
      }
944
5
      return (err);
945
5
    }
946
947
    /* We're between headers ... */
948
0
    err = err_combine(err, err2);
949
0
    if (err == ARCHIVE_FATAL)
950
0
      return (err);
951
952
    /* The GNU volume header and the pax `g` global header
953
     * are both allowed to be the only header in an
954
     * archive.  If we've seen any other header, a
955
     * following EOF is fatal. */
956
0
    if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) {
957
0
      eof_fatal = 1;
958
0
    }
959
0
  }
960
290k
}
961
962
/*
963
 * Return true if block checksum is correct.
964
 */
965
static int
966
checksum(struct archive_read *a, const void *h)
967
291k
{
968
291k
  const unsigned char *bytes;
969
291k
  const struct archive_entry_header_ustar *header;
970
291k
  int check, sum;
971
291k
  size_t i;
972
973
291k
  (void)a; /* UNUSED */
974
291k
  bytes = (const unsigned char *)h;
975
291k
  header = (const struct archive_entry_header_ustar *)h;
976
977
  /* Checksum field must hold an octal number */
978
295k
  for (i = 0; i < sizeof(header->checksum); ++i) {
979
295k
    char c = header->checksum[i];
980
295k
    if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
981
290k
      return 0;
982
295k
  }
983
984
  /*
985
   * Test the checksum.  Note that POSIX specifies _unsigned_
986
   * bytes for this calculation.
987
   */
988
544
  sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
989
544
  check = 0;
990
81.0k
  for (i = 0; i < 148; i++)
991
80.5k
    check += (unsigned char)bytes[i];
992
4.89k
  for (; i < 156; i++)
993
4.35k
    check += 32;
994
194k
  for (; i < 512; i++)
995
193k
    check += (unsigned char)bytes[i];
996
544
  if (sum == check)
997
2
    return (1);
998
999
  /*
1000
   * Repeat test with _signed_ bytes, just in case this archive
1001
   * was created by an old BSD, Solaris, or HP-UX tar with a
1002
   * broken checksum calculation.
1003
   */
1004
542
  check = 0;
1005
80.7k
  for (i = 0; i < 148; i++)
1006
80.2k
    check += (signed char)bytes[i];
1007
4.87k
  for (; i < 156; i++)
1008
4.33k
    check += 32;
1009
193k
  for (; i < 512; i++)
1010
192k
    check += (signed char)bytes[i];
1011
542
  if (sum == check)
1012
8
    return (1);
1013
1014
534
  return (0);
1015
542
}
1016
1017
/*
1018
 * Return true if this block contains only nulls.
1019
 */
1020
static int
1021
archive_block_is_null(const char *p)
1022
16.4k
{
1023
16.4k
  unsigned i;
1024
1025
8.34M
  for (i = 0; i < 512; i++)
1026
8.33M
    if (*p++)
1027
311
      return (0);
1028
16.1k
  return (1);
1029
16.4k
}
1030
1031
/*
1032
 * Interpret 'A' Solaris ACL header
1033
 */
1034
static int
1035
header_Solaris_ACL(struct archive_read *a, struct tar *tar,
1036
    struct archive_entry *entry, const void *h, size_t *unconsumed)
1037
0
{
1038
0
  const struct archive_entry_header_ustar *header;
1039
0
  struct archive_string  acl_text;
1040
0
  size_t size;
1041
0
  int err, acl_type;
1042
0
  int64_t type;
1043
0
  char *acl, *p;
1044
1045
  /*
1046
   * read_body_to_string adds a NUL terminator, but we need a little
1047
   * more to make sure that we don't overrun acl_text later.
1048
   */
1049
0
  header = (const struct archive_entry_header_ustar *)h;
1050
0
  size = (size_t)tar_atol(header->size, sizeof(header->size));
1051
0
  archive_string_init(&acl_text);
1052
0
  err = read_body_to_string(a, tar, &acl_text, h, unconsumed);
1053
0
  if (err != ARCHIVE_OK)
1054
0
    return (err);
1055
1056
  /* TODO: Examine the first characters to see if this
1057
   * is an AIX ACL descriptor.  We'll likely never support
1058
   * them, but it would be polite to recognize and warn when
1059
   * we do see them. */
1060
1061
  /* Leading octal number indicates ACL type and number of entries. */
1062
0
  p = acl = acl_text.s;
1063
0
  type = 0;
1064
0
  while (*p != '\0' && p < acl + size) {
1065
0
    if (*p < '0' || *p > '7') {
1066
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1067
0
          "Malformed Solaris ACL attribute (invalid digit)");
1068
0
      archive_string_free(&acl_text);
1069
0
      return(ARCHIVE_WARN);
1070
0
    }
1071
0
    type <<= 3;
1072
0
    type += *p - '0';
1073
0
    if (type > 077777777) {
1074
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1075
0
          "Malformed Solaris ACL attribute (count too large)");
1076
0
      archive_string_free(&acl_text);
1077
0
      return (ARCHIVE_WARN);
1078
0
    }
1079
0
    p++;
1080
0
  }
1081
0
  switch ((int)type & ~0777777) {
1082
0
  case 01000000:
1083
    /* POSIX.1e ACL */
1084
0
    acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
1085
0
    break;
1086
0
  case 03000000:
1087
    /* NFSv4 ACL */
1088
0
    acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4;
1089
0
    break;
1090
0
  default:
1091
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1092
0
        "Malformed Solaris ACL attribute (unsupported type %o)",
1093
0
        (int)type);
1094
0
    archive_string_free(&acl_text);
1095
0
    return (ARCHIVE_WARN);
1096
0
  }
1097
0
  p++;
1098
1099
0
  if (p >= acl + size) {
1100
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1101
0
        "Malformed Solaris ACL attribute (body overflow)");
1102
0
    archive_string_free(&acl_text);
1103
0
    return(ARCHIVE_WARN);
1104
0
  }
1105
1106
  /* ACL text is null-terminated; find the end. */
1107
0
  size -= (p - acl);
1108
0
  acl = p;
1109
1110
0
  while (*p != '\0' && p < acl + size)
1111
0
    p++;
1112
1113
0
  if (tar->sconv_acl == NULL) {
1114
0
    tar->sconv_acl = archive_string_conversion_from_charset(
1115
0
        &(a->archive), "UTF-8", 1);
1116
0
    if (tar->sconv_acl == NULL) {
1117
0
      archive_string_free(&acl_text);
1118
0
      return (ARCHIVE_FATAL);
1119
0
    }
1120
0
  }
1121
0
  archive_strncpy(&(tar->localname), acl, p - acl);
1122
0
  err = archive_acl_from_text_l(archive_entry_acl(entry),
1123
0
      tar->localname.s, acl_type, tar->sconv_acl);
1124
  /* Workaround: Force perm_is_set() to be correct */
1125
  /* If this bit were stored in the ACL, this wouldn't be needed */
1126
0
  archive_entry_set_perm(entry, archive_entry_perm(entry));
1127
0
  if (err != ARCHIVE_OK) {
1128
0
    if (errno == ENOMEM) {
1129
0
      archive_set_error(&a->archive, ENOMEM,
1130
0
          "Can't allocate memory for ACL");
1131
0
    } else
1132
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1133
0
          "Malformed Solaris ACL attribute (unparsable)");
1134
0
  }
1135
0
  archive_string_free(&acl_text);
1136
0
  return (err);
1137
0
}
1138
1139
/*
1140
 * Interpret 'K' long linkname header.
1141
 */
1142
static int
1143
header_gnu_longlink(struct archive_read *a, struct tar *tar,
1144
    struct archive_entry *entry, const void *h, size_t *unconsumed)
1145
0
{
1146
0
  int err;
1147
1148
0
  struct archive_string linkpath;
1149
0
  archive_string_init(&linkpath);
1150
0
  err = read_body_to_string(a, tar, &linkpath, h, unconsumed);
1151
0
  archive_entry_set_link(entry, linkpath.s);
1152
0
  archive_string_free(&linkpath);
1153
0
  return (err);
1154
0
}
1155
1156
static int
1157
set_conversion_failed_error(struct archive_read *a,
1158
    struct archive_string_conv *sconv, const char *name)
1159
0
{
1160
0
  if (errno == ENOMEM) {
1161
0
    archive_set_error(&a->archive, ENOMEM,
1162
0
        "Can't allocate memory for %s", name);
1163
0
    return (ARCHIVE_FATAL);
1164
0
  }
1165
0
  archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1166
0
      "%s can't be converted from %s to current locale.",
1167
0
      name, archive_string_conversion_charset_name(sconv));
1168
0
  return (ARCHIVE_WARN);
1169
0
}
1170
1171
/*
1172
 * Interpret 'L' long filename header.
1173
 */
1174
static int
1175
header_gnu_longname(struct archive_read *a, struct tar *tar,
1176
    struct archive_entry *entry, const void *h, size_t *unconsumed)
1177
0
{
1178
0
  int err;
1179
1180
0
  err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
1181
0
  if (err != ARCHIVE_OK)
1182
0
    return (err);
1183
0
  if (archive_entry_copy_pathname_l(entry, tar->longname.s,
1184
0
      archive_strlen(&(tar->longname)), tar->sconv) != 0)
1185
0
    err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1186
0
  return (err);
1187
0
}
1188
1189
/*
1190
 * Interpret 'V' GNU tar volume header.
1191
 */
1192
static int
1193
header_volume(struct archive_read *a, struct tar *tar,
1194
    struct archive_entry *entry, const void *h, size_t *unconsumed)
1195
0
{
1196
0
  const struct archive_entry_header_ustar *header;
1197
0
  int64_t size, to_consume;
1198
1199
0
  (void)a; /* UNUSED */
1200
0
  (void)tar; /* UNUSED */
1201
0
  (void)entry; /* UNUSED */
1202
1203
0
  header = (const struct archive_entry_header_ustar *)h;
1204
0
  size = tar_atol(header->size, sizeof(header->size));
1205
0
  if (size > (int64_t)pathname_limit) {
1206
0
    return (ARCHIVE_FATAL);
1207
0
  }
1208
0
  to_consume = ((size + 511) & ~511);
1209
0
  *unconsumed += to_consume;
1210
0
  return (ARCHIVE_OK);
1211
0
}
1212
1213
/*
1214
 * Read the next `size` bytes into the provided string.
1215
 * Null-terminate the string.
1216
 */
1217
static int
1218
read_bytes_to_string(struct archive_read *a,
1219
         struct archive_string *as, size_t size,
1220
0
         size_t *unconsumed) {
1221
0
  const void *src;
1222
1223
  /* Fail if we can't make our buffer big enough. */
1224
0
  if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1225
0
    archive_set_error(&a->archive, ENOMEM,
1226
0
        "No memory");
1227
0
    return (ARCHIVE_FATAL);
1228
0
  }
1229
1230
0
  tar_flush_unconsumed(a, unconsumed);
1231
1232
  /* Read the body into the string. */
1233
0
  src = __archive_read_ahead(a, size, NULL);
1234
0
  if (src == NULL) {
1235
0
    archive_set_error(&a->archive, EINVAL,
1236
0
        "Truncated archive"
1237
0
        " detected while reading metadata");
1238
0
    *unconsumed = 0;
1239
0
    return (ARCHIVE_FATAL);
1240
0
  }
1241
0
  memcpy(as->s, src, (size_t)size);
1242
0
  as->s[size] = '\0';
1243
0
  as->length = (size_t)size;
1244
0
  *unconsumed += size;
1245
0
  return (ARCHIVE_OK);
1246
0
}
1247
1248
/*
1249
 * Read body of an archive entry into an archive_string object.
1250
 */
1251
static int
1252
read_body_to_string(struct archive_read *a, struct tar *tar,
1253
    struct archive_string *as, const void *h, size_t *unconsumed)
1254
0
{
1255
0
  int64_t size;
1256
0
  const struct archive_entry_header_ustar *header;
1257
0
  int r;
1258
1259
0
  (void)tar; /* UNUSED */
1260
0
  header = (const struct archive_entry_header_ustar *)h;
1261
0
  size  = tar_atol(header->size, sizeof(header->size));
1262
0
  if (size > entry_limit) {
1263
0
    return (ARCHIVE_FATAL);
1264
0
  }
1265
0
  if ((size > (int64_t)pathname_limit) || (size < 0)) {
1266
0
    archive_string_empty(as);
1267
0
    int64_t to_consume = ((size + 511) & ~511);
1268
0
    if (to_consume != __archive_read_consume(a, to_consume)) {
1269
0
      return (ARCHIVE_FATAL);
1270
0
    }
1271
0
    archive_set_error(&a->archive, EINVAL,
1272
0
        "Special header too large: %d > 1MiB",
1273
0
        (int)size);
1274
0
    return (ARCHIVE_WARN);
1275
0
  }
1276
0
  r = read_bytes_to_string(a, as, size, unconsumed);
1277
0
  *unconsumed += 0x1ff & (-size);
1278
0
  return(r);
1279
0
}
1280
1281
/*
1282
 * Parse out common header elements.
1283
 *
1284
 * This would be the same as header_old_tar, except that the
1285
 * filename is handled slightly differently for old and POSIX
1286
 * entries  (POSIX entries support a 'prefix').  This factoring
1287
 * allows header_old_tar and header_ustar
1288
 * to handle filenames differently, while still putting most of the
1289
 * common parsing into one place.
1290
 */
1291
static int
1292
header_common(struct archive_read *a, struct tar *tar,
1293
    struct archive_entry *entry, const void *h)
1294
5
{
1295
5
  const struct archive_entry_header_ustar *header;
1296
5
  const char *existing_linkpath;
1297
5
  const wchar_t *existing_wcs_linkpath;
1298
5
  int     err = ARCHIVE_OK;
1299
1300
5
  header = (const struct archive_entry_header_ustar *)h;
1301
1302
  /* Parse out the numeric fields (all are octal) */
1303
1304
  /* Split mode handling: Set filetype always, perm only if not already set */
1305
5
  archive_entry_set_filetype(entry,
1306
5
      (mode_t)tar_atol(header->mode, sizeof(header->mode)));
1307
5
  if (!archive_entry_perm_is_set(entry)) {
1308
5
    archive_entry_set_perm(entry,
1309
5
      (mode_t)tar_atol(header->mode, sizeof(header->mode)));
1310
5
  }
1311
5
  if (!archive_entry_uid_is_set(entry)) {
1312
5
    archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1313
5
  }
1314
5
  if (!archive_entry_gid_is_set(entry)) {
1315
5
    archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1316
5
  }
1317
1318
5
  tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
1319
5
  if (tar->entry_bytes_remaining < 0) {
1320
0
    tar->entry_bytes_remaining = 0;
1321
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1322
0
        "Tar entry has negative size");
1323
0
    return (ARCHIVE_FATAL);
1324
0
  }
1325
5
  if (tar->entry_bytes_remaining > entry_limit) {
1326
0
    tar->entry_bytes_remaining = 0;
1327
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1328
0
        "Tar entry size overflow");
1329
0
    return (ARCHIVE_FATAL);
1330
0
  }
1331
5
  if (!tar->realsize_override) {
1332
5
    tar->realsize = tar->entry_bytes_remaining;
1333
5
  }
1334
5
  archive_entry_set_size(entry, tar->realsize);
1335
1336
5
  if (!archive_entry_mtime_is_set(entry)) {
1337
5
    archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1338
5
  }
1339
1340
  /* Handle the tar type flag appropriately. */
1341
5
  tar->filetype = header->typeflag[0];
1342
1343
  /*
1344
   * TODO: If the linkpath came from Pax extension header, then
1345
   * we should obey the hdrcharset_utf8 flag when converting these.
1346
   */
1347
5
  switch (tar->filetype) {
1348
0
  case '1': /* Hard link */
1349
0
    archive_entry_set_link_to_hardlink(entry);
1350
0
    existing_wcs_linkpath = archive_entry_hardlink_w(entry);
1351
0
    existing_linkpath = archive_entry_hardlink(entry);
1352
0
    if ((existing_linkpath == NULL || existing_linkpath[0] == '\0')
1353
0
        && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) {
1354
0
      struct archive_string linkpath;
1355
0
      archive_string_init(&linkpath);
1356
0
      archive_strncpy(&linkpath,
1357
0
          header->linkname, sizeof(header->linkname));
1358
0
      if (archive_entry_copy_hardlink_l(entry, linkpath.s,
1359
0
                archive_strlen(&linkpath), tar->sconv) != 0) {
1360
0
        err = set_conversion_failed_error(a, tar->sconv,
1361
0
                  "Linkname");
1362
0
        if (err == ARCHIVE_FATAL) {
1363
0
          archive_string_free(&linkpath);
1364
0
          return (err);
1365
0
        }
1366
0
      }
1367
0
      archive_string_free(&linkpath);
1368
0
    }
1369
    /*
1370
     * The following may seem odd, but: Technically, tar
1371
     * does not store the file type for a "hard link"
1372
     * entry, only the fact that it is a hard link.  So, I
1373
     * leave the type zero normally.  But, pax interchange
1374
     * format allows hard links to have data, which
1375
     * implies that the underlying entry is a regular
1376
     * file.
1377
     */
1378
0
    if (archive_entry_size(entry) > 0)
1379
0
      archive_entry_set_filetype(entry, AE_IFREG);
1380
1381
    /*
1382
     * A tricky point: Traditionally, tar readers have
1383
     * ignored the size field when reading hardlink
1384
     * entries, and some writers put non-zero sizes even
1385
     * though the body is empty.  POSIX blessed this
1386
     * convention in the 1988 standard, but broke with
1387
     * this tradition in 2001 by permitting hardlink
1388
     * entries to store valid bodies in pax interchange
1389
     * format, but not in ustar format.  Since there is no
1390
     * hard and fast way to distinguish pax interchange
1391
     * from earlier archives (the 'x' and 'g' entries are
1392
     * optional, after all), we need a heuristic.
1393
     */
1394
0
    if (archive_entry_size(entry) == 0) {
1395
      /* If the size is already zero, we're done. */
1396
0
    }  else if (a->archive.archive_format
1397
0
        == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1398
      /* Definitely pax extended; must obey hardlink size. */
1399
0
    } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1400
0
        || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1401
0
    {
1402
      /* Old-style or GNU tar: we must ignore the size. */
1403
0
      archive_entry_set_size(entry, 0);
1404
0
      tar->entry_bytes_remaining = 0;
1405
0
    } else if (archive_read_format_tar_bid(a, 50) > 50) {
1406
      /*
1407
       * We don't know if it's pax: If the bid
1408
       * function sees a valid ustar header
1409
       * immediately following, then let's ignore
1410
       * the hardlink size.
1411
       */
1412
0
      archive_entry_set_size(entry, 0);
1413
0
      tar->entry_bytes_remaining = 0;
1414
0
    }
1415
    /*
1416
     * TODO: There are still two cases I'd like to handle:
1417
     *   = a ustar non-pax archive with a hardlink entry at
1418
     *     end-of-archive.  (Look for block of nulls following?)
1419
     *   = a pax archive that has not seen any pax headers
1420
     *     and has an entry which is a hardlink entry storing
1421
     *     a body containing an uncompressed tar archive.
1422
     * The first is worth addressing; I don't see any reliable
1423
     * way to deal with the second possibility.
1424
     */
1425
0
    break;
1426
0
  case '2': /* Symlink */
1427
0
    archive_entry_set_link_to_symlink(entry);
1428
0
    existing_wcs_linkpath = archive_entry_symlink_w(entry);
1429
0
    existing_linkpath = archive_entry_symlink(entry);
1430
0
    if ((existing_linkpath == NULL || existing_linkpath[0] == '\0')
1431
0
        && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) {
1432
0
      struct archive_string linkpath;
1433
0
      archive_string_init(&linkpath);
1434
0
      archive_strncpy(&linkpath,
1435
0
          header->linkname, sizeof(header->linkname));
1436
0
      if (archive_entry_copy_symlink_l(entry, linkpath.s,
1437
0
          archive_strlen(&linkpath), tar->sconv) != 0) {
1438
0
        err = set_conversion_failed_error(a, tar->sconv,
1439
0
            "Linkname");
1440
0
        if (err == ARCHIVE_FATAL) {
1441
0
          archive_string_free(&linkpath);
1442
0
          return (err);
1443
0
        }
1444
0
      }
1445
0
      archive_string_free(&linkpath);
1446
0
    }
1447
0
    archive_entry_set_filetype(entry, AE_IFLNK);
1448
0
    archive_entry_set_size(entry, 0);
1449
0
    tar->entry_bytes_remaining = 0;
1450
0
    break;
1451
0
  case '3': /* Character device */
1452
0
    archive_entry_set_filetype(entry, AE_IFCHR);
1453
0
    archive_entry_set_size(entry, 0);
1454
0
    tar->entry_bytes_remaining = 0;
1455
0
    break;
1456
0
  case '4': /* Block device */
1457
0
    archive_entry_set_filetype(entry, AE_IFBLK);
1458
0
    archive_entry_set_size(entry, 0);
1459
0
    tar->entry_bytes_remaining = 0;
1460
0
    break;
1461
0
  case '5': /* Dir */
1462
0
    archive_entry_set_filetype(entry, AE_IFDIR);
1463
0
    archive_entry_set_size(entry, 0);
1464
0
    tar->entry_bytes_remaining = 0;
1465
0
    break;
1466
0
  case '6': /* FIFO device */
1467
0
    archive_entry_set_filetype(entry, AE_IFIFO);
1468
0
    archive_entry_set_size(entry, 0);
1469
0
    tar->entry_bytes_remaining = 0;
1470
0
    break;
1471
0
  case 'D': /* GNU incremental directory type */
1472
    /*
1473
     * No special handling is actually required here.
1474
     * It might be nice someday to preprocess the file list and
1475
     * provide it to the client, though.
1476
     */
1477
0
    archive_entry_set_filetype(entry, AE_IFDIR);
1478
0
    break;
1479
0
  case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1480
    /*
1481
     * As far as I can tell, this is just like a regular file
1482
     * entry, except that the contents should be _appended_ to
1483
     * the indicated file at the indicated offset.  This may
1484
     * require some API work to fully support.
1485
     */
1486
0
    break;
1487
0
  case 'N': /* Old GNU "long filename" entry. */
1488
    /* The body of this entry is a script for renaming
1489
     * previously-extracted entries.  Ugh.  It will never
1490
     * be supported by libarchive. */
1491
0
    archive_entry_set_filetype(entry, AE_IFREG);
1492
0
    break;
1493
0
  case 'S': /* GNU sparse files */
1494
    /*
1495
     * Sparse files are really just regular files with
1496
     * sparse information in the extended area.
1497
     */
1498
    /* FALLTHROUGH */
1499
1
  case '0': /* ustar "regular" file */
1500
    /* FALLTHROUGH */
1501
5
  default: /* Non-standard file types */
1502
    /*
1503
     * Per POSIX: non-recognized types should always be
1504
     * treated as regular files.
1505
     */
1506
5
    archive_entry_set_filetype(entry, AE_IFREG);
1507
5
    break;
1508
5
  }
1509
5
  return (err);
1510
5
}
1511
1512
/*
1513
 * Parse out header elements for "old-style" tar archives.
1514
 */
1515
static int
1516
header_old_tar(struct archive_read *a, struct tar *tar,
1517
    struct archive_entry *entry, const void *h)
1518
4
{
1519
4
  const struct archive_entry_header_ustar *header;
1520
4
  int err = ARCHIVE_OK, err2;
1521
1522
  /* Copy filename over (to ensure null termination). */
1523
4
  header = (const struct archive_entry_header_ustar *)h;
1524
4
  if (archive_entry_copy_pathname_l(entry,
1525
4
      header->name, sizeof(header->name), tar->sconv) != 0) {
1526
0
    err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1527
0
    if (err == ARCHIVE_FATAL)
1528
0
      return (err);
1529
0
  }
1530
1531
  /* Grab rest of common fields */
1532
4
  err2 = header_common(a, tar, entry, h);
1533
4
  if (err > err2)
1534
0
    err = err2;
1535
1536
4
  tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1537
4
  return (err);
1538
4
}
1539
1540
/*
1541
 * Is this likely an AppleDouble extension?
1542
 */
1543
static int
1544
5
is_mac_metadata_entry(struct archive_entry *entry) {
1545
5
  const char *p, *name;
1546
5
  const wchar_t *wp, *wname;
1547
1548
5
  wname = wp = archive_entry_pathname_w(entry);
1549
5
  if (wp != NULL) {
1550
    /* Find the last path element. */
1551
13
    for (; *wp != L'\0'; ++wp) {
1552
8
      if (wp[0] == '/' && wp[1] != L'\0')
1553
0
        wname = wp + 1;
1554
8
    }
1555
    /*
1556
     * If last path element starts with "._", then
1557
     * this is a Mac extension.
1558
     */
1559
5
    if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0')
1560
0
      return 1;
1561
5
  } else {
1562
    /* Find the last path element. */
1563
0
    name = p = archive_entry_pathname(entry);
1564
0
    if (p == NULL)
1565
0
      return (ARCHIVE_FAILED);
1566
0
    for (; *p != '\0'; ++p) {
1567
0
      if (p[0] == '/' && p[1] != '\0')
1568
0
        name = p + 1;
1569
0
    }
1570
    /*
1571
     * If last path element starts with "._", then
1572
     * this is a Mac extension.
1573
     */
1574
0
    if (name[0] == '.' && name[1] == '_' && name[2] != '\0')
1575
0
      return 1;
1576
0
  }
1577
  /* Not a mac extension */
1578
5
  return 0;
1579
5
}
1580
1581
/*
1582
 * Read a Mac AppleDouble-encoded blob of file metadata,
1583
 * if there is one.
1584
 *
1585
 * TODO: In Libarchive 4, we should consider ripping this
1586
 * out -- instead, return a file starting with `._` as
1587
 * a regular file and let the client (or archive_write logic)
1588
 * handle it.
1589
 */
1590
static int
1591
read_mac_metadata_blob(struct archive_read *a,
1592
    struct archive_entry *entry, size_t *unconsumed)
1593
0
{
1594
0
  int64_t size;
1595
0
  size_t msize;
1596
0
  const void *data;
1597
1598
  /* Read the body as a Mac OS metadata blob. */
1599
0
  size = archive_entry_size(entry);
1600
0
  msize = (size_t)size;
1601
0
  if (size < 0 || (uintmax_t)msize != (uintmax_t)size) {
1602
0
    *unconsumed = 0;
1603
0
    return (ARCHIVE_FATAL);
1604
0
  }
1605
1606
  /* TODO: Should this merely skip the overlarge entry and
1607
   * WARN?  Or is xattr_limit sufficiently large that we can
1608
   * safely assume anything larger is malicious? */
1609
0
  if (size > (int64_t)xattr_limit) {
1610
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1611
0
        "Oversized AppleDouble extension has size %llu > %llu",
1612
0
        (unsigned long long)size,
1613
0
        (unsigned long long)xattr_limit);
1614
0
    return (ARCHIVE_FATAL);
1615
0
  }
1616
1617
  /*
1618
   * TODO: Look beyond the body here to peek at the next header.
1619
   * If it's a regular header (not an extension header)
1620
   * that has the wrong name, just return the current
1621
   * entry as-is, without consuming the body here.
1622
   * That would reduce the risk of us mis-identifying
1623
   * an ordinary file that just happened to have
1624
   * a name starting with "._".
1625
   *
1626
   * Q: Is the above idea really possible?  Even
1627
   * when there are GNU or pax extension entries?
1628
   */
1629
0
  tar_flush_unconsumed(a, unconsumed);
1630
0
  data = __archive_read_ahead(a, msize, NULL);
1631
0
  if (data == NULL) {
1632
0
    *unconsumed = 0;
1633
0
    return (ARCHIVE_FATAL);
1634
0
  }
1635
0
  archive_entry_clear(entry);
1636
0
  archive_entry_copy_mac_metadata(entry, data, msize);
1637
0
  *unconsumed = (msize + 511) & ~ 511;
1638
0
  return (ARCHIVE_OK);
1639
0
}
1640
1641
/*
1642
 * Parse a file header for a pax extended archive entry.
1643
 */
1644
static int
1645
header_pax_global(struct archive_read *a, struct tar *tar,
1646
    struct archive_entry *entry, const void *h, size_t *unconsumed)
1647
0
{
1648
0
  const struct archive_entry_header_ustar *header;
1649
0
  int64_t size, to_consume;
1650
1651
0
  (void)a; /* UNUSED */
1652
0
  (void)tar; /* UNUSED */
1653
0
  (void)entry; /* UNUSED */
1654
1655
0
  header = (const struct archive_entry_header_ustar *)h;
1656
0
  size = tar_atol(header->size, sizeof(header->size));
1657
0
  if (size > entry_limit) {
1658
0
    return (ARCHIVE_FATAL);
1659
0
  }
1660
0
  to_consume = ((size + 511) & ~511);
1661
0
  *unconsumed += to_consume;
1662
0
  return (ARCHIVE_OK);
1663
0
}
1664
1665
/*
1666
 * Parse a file header for a Posix "ustar" archive entry.  This also
1667
 * handles "pax" or "extended ustar" entries.
1668
 *
1669
 * In order to correctly handle pax attributes (which precede this),
1670
 * we have to skip parsing any field for which the entry already has
1671
 * contents.
1672
 */
1673
static int
1674
header_ustar(struct archive_read *a, struct tar *tar,
1675
    struct archive_entry *entry, const void *h)
1676
0
{
1677
0
  const struct archive_entry_header_ustar *header;
1678
0
  struct archive_string as;
1679
0
  int err = ARCHIVE_OK, r;
1680
1681
0
  header = (const struct archive_entry_header_ustar *)h;
1682
1683
  /* Copy name into an internal buffer to ensure null-termination. */
1684
0
  const char *existing_pathname = archive_entry_pathname(entry);
1685
0
  const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry);
1686
0
  if ((existing_pathname == NULL || existing_pathname[0] == '\0')
1687
0
      && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0')) {
1688
0
    archive_string_init(&as);
1689
0
    if (header->prefix[0]) {
1690
0
      archive_strncpy(&as, header->prefix, sizeof(header->prefix));
1691
0
      if (as.s[archive_strlen(&as) - 1] != '/')
1692
0
        archive_strappend_char(&as, '/');
1693
0
      archive_strncat(&as, header->name, sizeof(header->name));
1694
0
    } else {
1695
0
      archive_strncpy(&as, header->name, sizeof(header->name));
1696
0
    }
1697
0
    if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as),
1698
0
        tar->sconv) != 0) {
1699
0
      err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1700
0
      if (err == ARCHIVE_FATAL)
1701
0
        return (err);
1702
0
    }
1703
0
    archive_string_free(&as);
1704
0
  }
1705
1706
  /* Handle rest of common fields. */
1707
0
  r = header_common(a, tar, entry, h);
1708
0
  if (r == ARCHIVE_FATAL)
1709
0
    return (r);
1710
0
  if (r < err)
1711
0
    err = r;
1712
1713
  /* Handle POSIX ustar fields. */
1714
0
  const char *existing_uname = archive_entry_uname(entry);
1715
0
  if (existing_uname == NULL || existing_uname[0] == '\0') {
1716
0
    if (archive_entry_copy_uname_l(entry,
1717
0
        header->uname, sizeof(header->uname), tar->sconv) != 0) {
1718
0
      err = set_conversion_failed_error(a, tar->sconv, "Uname");
1719
0
      if (err == ARCHIVE_FATAL)
1720
0
        return (err);
1721
0
    }
1722
0
  }
1723
1724
0
  const char *existing_gname = archive_entry_gname(entry);
1725
0
  if (existing_gname == NULL || existing_gname[0] == '\0') {
1726
0
    if (archive_entry_copy_gname_l(entry,
1727
0
        header->gname, sizeof(header->gname), tar->sconv) != 0) {
1728
0
      err = set_conversion_failed_error(a, tar->sconv, "Gname");
1729
0
      if (err == ARCHIVE_FATAL)
1730
0
        return (err);
1731
0
    }
1732
0
  }
1733
1734
  /* Parse out device numbers only for char and block specials. */
1735
0
  if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1736
0
    if (!archive_entry_rdev_is_set(entry)) {
1737
0
      archive_entry_set_rdevmajor(entry, (dev_t)
1738
0
          tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1739
0
      archive_entry_set_rdevminor(entry, (dev_t)
1740
0
          tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1741
0
    }
1742
0
  } else {
1743
0
    archive_entry_set_rdev(entry, 0);
1744
0
  }
1745
1746
0
  tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1747
1748
0
  return (err);
1749
0
}
1750
1751
static int
1752
header_pax_extension(struct archive_read *a, struct tar *tar,
1753
    struct archive_entry *entry, const void *h, size_t *unconsumed)
1754
0
{
1755
  /* Sanity checks: The largest `x` body I've ever heard of was
1756
   * a little over 4MB.  So I doubt there has ever been a
1757
   * well-formed archive with an `x` body over 1GiB.  Similarly,
1758
   * it seems plausible that no single attribute has ever been
1759
   * larger than 100MB.  So if we see a larger value here, it's
1760
   * almost certainly a sign of a corrupted/malicious archive. */
1761
1762
  /* Maximum sane size for extension body: 1 GiB */
1763
  /* This cannot be raised to larger than 8GiB without
1764
   * exceeding the maximum size for a standard ustar
1765
   * entry. */
1766
0
  const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024;
1767
  /* Maximum size for a single line/attr: 100 million characters */
1768
  /* This cannot be raised to more than 2GiB without exceeding
1769
   * a `size_t` on 32-bit platforms. */
1770
0
  const size_t max_parsed_line_length = 99999999ULL;
1771
  /* Largest attribute prolog:  size + name. */
1772
0
  const size_t max_size_name = 512;
1773
1774
  /* Size and padding of the full extension body */
1775
0
  int64_t ext_size, ext_padding;
1776
0
  size_t line_length, value_length, name_length;
1777
0
  ssize_t to_read, did_read;
1778
0
  const struct archive_entry_header_ustar *header;
1779
0
  const char *p, *attr_start, *name_start;
1780
0
  struct archive_string_conv *sconv;
1781
0
  struct archive_string *pas = NULL;
1782
0
  struct archive_string attr_name;
1783
0
  int err = ARCHIVE_OK, r;
1784
1785
0
  header = (const struct archive_entry_header_ustar *)h;
1786
0
  ext_size  = tar_atol(header->size, sizeof(header->size));
1787
0
  if (ext_size > entry_limit) {
1788
0
    return (ARCHIVE_FATAL);
1789
0
  }
1790
0
  if (ext_size < 0) {
1791
0
    archive_set_error(&a->archive, EINVAL,
1792
0
          "pax extension header has invalid size: %lld",
1793
0
          (long long)ext_size);
1794
0
    return (ARCHIVE_FATAL);
1795
0
  }
1796
1797
0
  ext_padding = 0x1ff & (-ext_size);
1798
0
  if (ext_size > ext_size_limit) {
1799
    /* Consume the pax extension body and return an error */
1800
0
    if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) {
1801
0
      return (ARCHIVE_FATAL);
1802
0
    }
1803
0
    archive_set_error(&a->archive, EINVAL,
1804
0
        "Ignoring oversized pax extensions: %d > %d",
1805
0
        (int)ext_size, (int)ext_size_limit);
1806
0
    return (ARCHIVE_WARN);
1807
0
  }
1808
0
  tar_flush_unconsumed(a, unconsumed);
1809
1810
  /* Parse the size/name of each pax attribute in the body */
1811
0
  archive_string_init(&attr_name);
1812
0
  while (ext_size > 0) {
1813
    /* Read enough bytes to parse the size/name of the next attribute */
1814
0
    to_read = max_size_name;
1815
0
    if (to_read > ext_size) {
1816
0
      to_read = ext_size;
1817
0
    }
1818
0
    p = __archive_read_ahead(a, to_read, &did_read);
1819
0
    if (did_read < 0) {
1820
0
      return ((int)did_read);
1821
0
    }
1822
0
    if (did_read == 0) { /* EOF */
1823
0
      archive_set_error(&a->archive, EINVAL,
1824
0
            "Truncated tar archive"
1825
0
            " detected while reading pax attribute name");
1826
0
      return (ARCHIVE_FATAL);
1827
0
    }
1828
0
    if (did_read > ext_size) {
1829
0
      did_read = ext_size;
1830
0
    }
1831
1832
    /* Parse size of attribute */
1833
0
    line_length = 0;
1834
0
    attr_start = p;
1835
0
    while (1) {
1836
0
      if (p >= attr_start + did_read) {
1837
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1838
0
              "Ignoring malformed pax attributes: overlarge attribute size field");
1839
0
        *unconsumed += ext_size + ext_padding;
1840
0
        return (ARCHIVE_WARN);
1841
0
      }
1842
0
      if (*p == ' ') {
1843
0
        p++;
1844
0
        break;
1845
0
      }
1846
0
      if (*p < '0' || *p > '9') {
1847
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1848
0
              "Ignoring malformed pax attributes: malformed attribute size field");
1849
0
        *unconsumed += ext_size + ext_padding;
1850
0
        return (ARCHIVE_WARN);
1851
0
      }
1852
0
      line_length *= 10;
1853
0
      line_length += *p - '0';
1854
0
      if (line_length > max_parsed_line_length) {
1855
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1856
0
              "Ignoring malformed pax attribute: size > %lld",
1857
0
              (long long)max_parsed_line_length);
1858
0
        *unconsumed += ext_size + ext_padding;
1859
0
        return (ARCHIVE_WARN);
1860
0
      }
1861
0
      p++;
1862
0
    }
1863
1864
0
    if ((int64_t)line_length > ext_size) {
1865
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1866
0
              "Ignoring malformed pax attribute:  %lld > %lld",
1867
0
              (long long)line_length, (long long)ext_size);
1868
0
        *unconsumed += ext_size + ext_padding;
1869
0
        return (ARCHIVE_WARN);
1870
0
    }
1871
1872
    /* Parse name of attribute */
1873
0
    if (p >= attr_start + did_read
1874
0
        || p >= attr_start + line_length
1875
0
        || *p == '=') {
1876
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1877
0
            "Ignoring malformed pax attributes: empty name found");
1878
0
      *unconsumed += ext_size + ext_padding;
1879
0
      return (ARCHIVE_WARN);
1880
0
    }
1881
0
    name_start = p;
1882
0
    while (1) {
1883
0
      if (p >= attr_start + did_read || p >= attr_start + line_length) {
1884
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1885
0
              "Ignoring malformed pax attributes: overlarge attribute name");
1886
0
        *unconsumed += ext_size + ext_padding;
1887
0
        return (ARCHIVE_WARN);
1888
0
      }
1889
0
      if (*p == '=') {
1890
0
        break;
1891
0
      }
1892
0
      p++;
1893
0
    }
1894
0
    name_length = p - name_start;
1895
0
    p++; // Skip '='
1896
1897
0
    archive_strncpy(&attr_name, name_start, name_length);
1898
1899
0
    ext_size -= p - attr_start;
1900
0
    value_length = line_length - (p - attr_start);
1901
1902
    /* Consume size, name, and `=` */
1903
0
    *unconsumed += p - attr_start;
1904
0
    tar_flush_unconsumed(a, unconsumed);
1905
1906
    /* pax_attribute will consume value_length - 1 */
1907
0
    r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed);
1908
0
    ext_size -= value_length - 1;
1909
1910
0
    if (r < ARCHIVE_WARN) {
1911
0
      *unconsumed += ext_size + ext_padding;
1912
0
      return (r);
1913
0
    }
1914
0
    err = err_combine(err, r);
1915
1916
    /* Consume the `\n` that follows the pax attribute value. */
1917
0
    tar_flush_unconsumed(a, unconsumed);
1918
0
    p = __archive_read_ahead(a, 1, &did_read);
1919
0
    if (did_read < 0) {
1920
0
      return ((int)did_read);
1921
0
    }
1922
0
    if (did_read == 0) {
1923
0
      archive_set_error(&a->archive, EINVAL,
1924
0
            "Truncated tar archive"
1925
0
            " detected while completing pax attribute");
1926
0
      return (ARCHIVE_FATAL);
1927
0
    }
1928
0
    if (p[0] != '\n') {
1929
0
      archive_set_error(&a->archive, EINVAL,
1930
0
            "Malformed pax attributes");
1931
0
      *unconsumed += ext_size + ext_padding;
1932
0
      return (ARCHIVE_WARN);
1933
0
    }
1934
0
    ext_size -= 1;
1935
0
    *unconsumed += 1;
1936
0
    tar_flush_unconsumed(a, unconsumed);
1937
0
  }
1938
0
  *unconsumed += ext_size + ext_padding;
1939
1940
  /*
1941
   * Some PAX values -- pathname, linkpath, uname, gname --
1942
   * can't be copied into the entry until we know the character
1943
   * set to use:
1944
   */
1945
0
  if (!tar->pax_hdrcharset_utf8)
1946
    /* PAX specified "BINARY", so use the default charset */
1947
0
    sconv = tar->opt_sconv;
1948
0
  else {
1949
    /* PAX default UTF-8 */
1950
0
    sconv = archive_string_conversion_from_charset(
1951
0
        &(a->archive), "UTF-8", 1);
1952
0
    if (sconv == NULL)
1953
0
      return (ARCHIVE_FATAL);
1954
0
    if (tar->compat_2x)
1955
0
      archive_string_conversion_set_opt(sconv,
1956
0
          SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
1957
0
  }
1958
1959
  /* Pathname */
1960
0
  pas = NULL;
1961
0
  if (archive_strlen(&(tar->entry_pathname_override)) > 0) {
1962
    /* Prefer GNU.sparse.name attribute if present */
1963
    /* GNU sparse files store a fake name under the standard
1964
     * "pathname" key. */
1965
0
    pas = &(tar->entry_pathname_override);
1966
0
  } else if (archive_strlen(&(tar->entry_pathname)) > 0) {
1967
    /* Use standard "pathname" PAX extension */
1968
0
    pas = &(tar->entry_pathname);
1969
0
  }
1970
0
  if (pas != NULL) {
1971
0
    if (archive_entry_copy_pathname_l(entry, pas->s,
1972
0
        archive_strlen(pas), sconv) != 0) {
1973
0
      err = set_conversion_failed_error(a, sconv, "Pathname");
1974
0
      if (err == ARCHIVE_FATAL)
1975
0
        return (err);
1976
      /* Use raw name without conversion */
1977
0
      archive_entry_copy_pathname(entry, pas->s);
1978
0
    }
1979
0
  }
1980
  /* Uname */
1981
0
  if (archive_strlen(&(tar->entry_uname)) > 0) {
1982
0
    if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
1983
0
        archive_strlen(&(tar->entry_uname)), sconv) != 0) {
1984
0
      err = set_conversion_failed_error(a, sconv, "Uname");
1985
0
      if (err == ARCHIVE_FATAL)
1986
0
        return (err);
1987
      /* Use raw name without conversion */
1988
0
      archive_entry_copy_uname(entry, tar->entry_uname.s);
1989
0
    }
1990
0
  }
1991
  /* Gname */
1992
0
  if (archive_strlen(&(tar->entry_gname)) > 0) {
1993
0
    if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
1994
0
        archive_strlen(&(tar->entry_gname)), sconv) != 0) {
1995
0
      err = set_conversion_failed_error(a, sconv, "Gname");
1996
0
      if (err == ARCHIVE_FATAL)
1997
0
        return (err);
1998
      /* Use raw name without conversion */
1999
0
      archive_entry_copy_gname(entry, tar->entry_gname.s);
2000
0
    }
2001
0
  }
2002
  /* Linkpath */
2003
0
  if (archive_strlen(&(tar->entry_linkpath)) > 0) {
2004
0
    if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
2005
0
        archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
2006
0
      err = set_conversion_failed_error(a, sconv, "Linkpath");
2007
0
      if (err == ARCHIVE_FATAL)
2008
0
        return (err);
2009
      /* Use raw name without conversion */
2010
0
      archive_entry_copy_link(entry, tar->entry_linkpath.s);
2011
0
    }
2012
0
  }
2013
2014
  /* Extension may have given us a corrected `entry_bytes_remaining` for
2015
   * the main entry; update the padding appropriately. */
2016
0
  tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2017
0
  return (err);
2018
0
}
2019
2020
static int
2021
pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry,
2022
  const char *name, size_t name_length, const char *value, size_t value_length)
2023
0
{
2024
0
  char *name_decoded;
2025
0
  void *value_decoded;
2026
0
  size_t value_len;
2027
2028
0
  if (name_length < 1)
2029
0
    return 3;
2030
2031
  /* URL-decode name */
2032
0
  name_decoded = url_decode(name, name_length);
2033
0
  if (name_decoded == NULL)
2034
0
    return 2;
2035
2036
  /* Base-64 decode value */
2037
0
  value_decoded = base64_decode(value, value_length, &value_len);
2038
0
  if (value_decoded == NULL) {
2039
0
    free(name_decoded);
2040
0
    return 1;
2041
0
  }
2042
2043
0
  archive_entry_xattr_add_entry(entry, name_decoded,
2044
0
    value_decoded, value_len);
2045
2046
0
  free(name_decoded);
2047
0
  free(value_decoded);
2048
0
  return 0;
2049
0
}
2050
2051
static int
2052
pax_attribute_SCHILY_xattr(struct archive_entry *entry,
2053
  const char *name, size_t name_length, const char *value, size_t value_length)
2054
0
{
2055
0
  if (name_length < 1 || name_length > 128) {
2056
0
    return 1;
2057
0
  }
2058
2059
0
  char * null_terminated_name = malloc(name_length + 1);
2060
0
  if (null_terminated_name != NULL) {
2061
0
    memcpy(null_terminated_name, name, name_length);
2062
0
    null_terminated_name[name_length] = '\0';
2063
0
    archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length);
2064
0
    free(null_terminated_name);
2065
0
  }
2066
2067
0
  return 0;
2068
0
}
2069
2070
static int
2071
pax_attribute_RHT_security_selinux(struct archive_entry *entry,
2072
  const char *value, size_t value_length)
2073
0
{
2074
0
  archive_entry_xattr_add_entry(entry, "security.selinux",
2075
0
            value, value_length);
2076
2077
0
  return 0;
2078
0
}
2079
2080
static int
2081
pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar,
2082
  struct archive_entry *entry, size_t value_length, int type)
2083
0
{
2084
0
  int r;
2085
0
  const char *p;
2086
0
  const char* errstr;
2087
2088
0
  switch (type) {
2089
0
  case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
2090
0
    errstr = "SCHILY.acl.access";
2091
0
    break;
2092
0
  case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
2093
0
    errstr = "SCHILY.acl.default";
2094
0
    break;
2095
0
  case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
2096
0
    errstr = "SCHILY.acl.ace";
2097
0
    break;
2098
0
  default:
2099
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2100
0
        "Unknown ACL type: %d", type);
2101
0
    return(ARCHIVE_FATAL);
2102
0
  }
2103
2104
0
  if (tar->sconv_acl == NULL) {
2105
0
    tar->sconv_acl =
2106
0
        archive_string_conversion_from_charset(
2107
0
      &(a->archive), "UTF-8", 1);
2108
0
    if (tar->sconv_acl == NULL)
2109
0
      return (ARCHIVE_FATAL);
2110
0
  }
2111
2112
0
  if (value_length > acl_limit) {
2113
0
    __archive_read_consume(a, value_length);
2114
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2115
0
          "Unreasonably large ACL: %d > %d",
2116
0
          (int)value_length, (int)acl_limit);
2117
0
    return (ARCHIVE_WARN);
2118
0
  }
2119
2120
0
  p = __archive_read_ahead(a, value_length, NULL);
2121
0
  if (p == NULL) {
2122
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2123
0
          "Truncated tar archive "
2124
0
          "detected while reading ACL data");
2125
0
    return (ARCHIVE_FATAL);
2126
0
  }
2127
2128
0
  r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length,
2129
0
      type, tar->sconv_acl);
2130
0
  __archive_read_consume(a, value_length);
2131
  /* Workaround: Force perm_is_set() to be correct */
2132
  /* If this bit were stored in the ACL, this wouldn't be needed */
2133
0
  archive_entry_set_perm(entry, archive_entry_perm(entry));
2134
0
  if (r != ARCHIVE_OK) {
2135
0
    if (r == ARCHIVE_FATAL) {
2136
0
      archive_set_error(&a->archive, ENOMEM,
2137
0
          "%s %s", "Can't allocate memory for ",
2138
0
          errstr);
2139
0
      return (r);
2140
0
    }
2141
0
    archive_set_error(&a->archive,
2142
0
        ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
2143
0
  }
2144
0
  return (r);
2145
0
}
2146
2147
static int
2148
0
pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) {
2149
0
  struct archive_string as;
2150
0
  int r;
2151
2152
0
  if (value_length > 128) {
2153
0
    __archive_read_consume(a, value_length);
2154
0
    *ps = 0;
2155
0
    *pn = 0;
2156
0
    return (ARCHIVE_FATAL);
2157
0
  }
2158
2159
0
  archive_string_init(&as);
2160
0
  r = read_bytes_to_string(a, &as, value_length, unconsumed);
2161
0
  if (r < ARCHIVE_OK) {
2162
0
    archive_string_free(&as);
2163
0
    return (r);
2164
0
  }
2165
2166
0
  pax_time(as.s, archive_strlen(&as), ps, pn);
2167
0
  archive_string_free(&as);
2168
0
  if (*ps < 0 || *ps == INT64_MAX) {
2169
0
    return (ARCHIVE_WARN);
2170
0
  }
2171
0
  return (ARCHIVE_OK);
2172
0
}
2173
2174
static int
2175
0
pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) {
2176
0
  struct archive_string as;
2177
0
  size_t unconsumed = 0;
2178
0
  int r;
2179
2180
0
  if (value_length > 64) {
2181
0
    __archive_read_consume(a, value_length);
2182
0
    *result = 0;
2183
0
    return (ARCHIVE_FATAL);
2184
0
  }
2185
2186
0
  archive_string_init(&as);
2187
0
  r = read_bytes_to_string(a, &as, value_length, &unconsumed);
2188
0
  tar_flush_unconsumed(a, &unconsumed);
2189
0
  if (r < ARCHIVE_OK) {
2190
0
    archive_string_free(&as);
2191
0
    return (r);
2192
0
  }
2193
2194
0
  *result = tar_atol10(as.s, archive_strlen(&as));
2195
0
  archive_string_free(&as);
2196
0
  if (*result < 0 || *result == INT64_MAX) {
2197
0
    *result = INT64_MAX;
2198
0
    return (ARCHIVE_WARN);
2199
0
  }
2200
0
  return (ARCHIVE_OK);
2201
0
}
2202
2203
/*
2204
 * Parse a single key=value attribute.
2205
 *
2206
 * POSIX reserves all-lowercase keywords.  Vendor-specific extensions
2207
 * should always have keywords of the form "VENDOR.attribute" In
2208
 * particular, it's quite feasible to support many different vendor
2209
 * extensions here.  I'm using "LIBARCHIVE" for extensions unique to
2210
 * this library.
2211
 *
2212
 * TODO: Investigate other vendor-specific extensions and see if
2213
 * any of them look useful.
2214
 */
2215
static int
2216
pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry,
2217
        const char *key, size_t key_length, size_t value_length, size_t *unconsumed)
2218
0
{
2219
0
  int64_t t;
2220
0
  long n;
2221
0
  const char *p;
2222
0
  ssize_t bytes_read;
2223
0
  int err = ARCHIVE_OK;
2224
2225
0
  switch (key[0]) {
2226
0
  case 'G':
2227
    /* GNU.* extensions */
2228
0
    if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) {
2229
0
      key += 4;
2230
0
      key_length -= 4;
2231
2232
      /* GNU.sparse marks the existence of GNU sparse information */
2233
0
      if (key_length == 6 && memcmp(key, "sparse", 6) == 0) {
2234
0
        tar->sparse_gnu_attributes_seen = 1;
2235
0
      }
2236
2237
      /* GNU.sparse.* extensions */
2238
0
      else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) {
2239
0
        tar->sparse_gnu_attributes_seen = 1;
2240
0
        key += 7;
2241
0
        key_length -= 7;
2242
2243
        /* GNU "0.0" sparse pax format. */
2244
0
        if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) {
2245
          /* GNU.sparse.numblocks */
2246
0
          tar->sparse_offset = -1;
2247
0
          tar->sparse_numbytes = -1;
2248
0
          tar->sparse_gnu_major = 0;
2249
0
          tar->sparse_gnu_minor = 0;
2250
0
        }
2251
0
        else if (key_length == 6 && memcmp(key, "offset", 6) == 0) {
2252
          /* GNU.sparse.offset */
2253
0
          if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2254
0
            tar->sparse_offset = t;
2255
0
            if (tar->sparse_numbytes != -1) {
2256
0
              if (gnu_add_sparse_entry(a, tar,
2257
0
                   tar->sparse_offset, tar->sparse_numbytes)
2258
0
                  != ARCHIVE_OK)
2259
0
                return (ARCHIVE_FATAL);
2260
0
              tar->sparse_offset = -1;
2261
0
              tar->sparse_numbytes = -1;
2262
0
            }
2263
0
          }
2264
0
          return (err);
2265
0
        }
2266
0
        else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) {
2267
          /* GNU.sparse.numbytes */
2268
0
          if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2269
0
            tar->sparse_numbytes = t;
2270
0
            if (tar->sparse_offset != -1) {
2271
0
              if (gnu_add_sparse_entry(a, tar,
2272
0
                   tar->sparse_offset, tar->sparse_numbytes)
2273
0
                  != ARCHIVE_OK)
2274
0
                return (ARCHIVE_FATAL);
2275
0
              tar->sparse_offset = -1;
2276
0
              tar->sparse_numbytes = -1;
2277
0
            }
2278
0
          }
2279
0
          return (err);
2280
0
        }
2281
0
        else if (key_length == 4 && memcmp(key, "size", 4) == 0) {
2282
          /* GNU.sparse.size */
2283
0
          if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2284
0
            tar->realsize = t;
2285
0
            archive_entry_set_size(entry, tar->realsize);
2286
0
            tar->realsize_override = 1;
2287
0
          }
2288
0
          return (err);
2289
0
        }
2290
2291
        /* GNU "0.1" sparse pax format. */
2292
0
        else if (key_length == 3 && memcmp(key, "map", 3) == 0) {
2293
          /* GNU.sparse.map */
2294
0
          tar->sparse_gnu_major = 0;
2295
0
          tar->sparse_gnu_minor = 1;
2296
0
          if (value_length > sparse_map_limit) {
2297
0
            archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2298
0
                  "Unreasonably large sparse map: %d > %d",
2299
0
                  (int)value_length, (int)sparse_map_limit);
2300
0
            err = ARCHIVE_FAILED;
2301
0
          } else {
2302
0
            p = __archive_read_ahead(a, value_length, &bytes_read);
2303
0
            if (p != NULL) {
2304
0
              if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) {
2305
0
                err = ARCHIVE_WARN;
2306
0
              }
2307
0
            } else {
2308
0
              return (ARCHIVE_FATAL);
2309
0
            }
2310
0
          }
2311
0
          __archive_read_consume(a, value_length);
2312
0
          return (err);
2313
0
        }
2314
2315
        /* GNU "1.0" sparse pax format */
2316
0
        else if (key_length == 5 && memcmp(key, "major", 5) == 0) {
2317
          /* GNU.sparse.major */
2318
0
          if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
2319
0
              && t >= 0
2320
0
              && t <= 10) {
2321
0
            tar->sparse_gnu_major = (int)t;
2322
0
          }
2323
0
          return (err);
2324
0
        }
2325
0
        else if (key_length == 5 && memcmp(key, "minor", 5) == 0) {
2326
          /* GNU.sparse.minor */
2327
0
          if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
2328
0
              && t >= 0
2329
0
              && t <= 10) {
2330
0
            tar->sparse_gnu_minor = (int)t;
2331
0
          }
2332
0
          return (err);
2333
0
        }
2334
0
        else if (key_length == 4 && memcmp(key, "name", 4) == 0) {
2335
          /* GNU.sparse.name */
2336
          /*
2337
           * The real filename; when storing sparse
2338
           * files, GNU tar puts a synthesized name into
2339
           * the regular 'path' attribute in an attempt
2340
           * to limit confusion. ;-)
2341
           */
2342
0
          if (value_length > pathname_limit) {
2343
0
            *unconsumed += value_length;
2344
0
            err = ARCHIVE_WARN;
2345
0
          } else {
2346
0
            err = read_bytes_to_string(a, &(tar->entry_pathname_override),
2347
0
                     value_length, unconsumed);
2348
0
          }
2349
0
          return (err);
2350
0
        }
2351
0
        else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
2352
          /* GNU.sparse.realsize */
2353
0
          if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2354
0
            tar->realsize = t;
2355
0
            archive_entry_set_size(entry, tar->realsize);
2356
0
            tar->realsize_override = 1;
2357
0
          }
2358
0
          return (err);
2359
0
        }
2360
0
      }
2361
0
    }
2362
0
    break;
2363
0
  case 'L':
2364
    /* LIBARCHIVE extensions */
2365
0
    if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) {
2366
0
      key_length -= 11;
2367
0
      key += 11;
2368
2369
      /* TODO: Handle arbitrary extended attributes... */
2370
      /*
2371
        if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
2372
          archive_entry_set_xxxxxx(entry, value);
2373
      */
2374
0
      if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) {
2375
        /* LIBARCHIVE.creationtime */
2376
0
        if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2377
0
          archive_entry_set_birthtime(entry, t, n);
2378
0
        }
2379
0
        return (err);
2380
0
      }
2381
0
      else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) {
2382
        /* LIBARCHIVE.symlinktype */
2383
0
        if (value_length < 16) {
2384
0
          p = __archive_read_ahead(a, value_length, &bytes_read);
2385
0
          if (p != NULL) {
2386
0
            if (value_length == 4 && memcmp(p, "file", 4) == 0) {
2387
0
              archive_entry_set_symlink_type(entry,
2388
0
                       AE_SYMLINK_TYPE_FILE);
2389
0
            } else if (value_length == 3 && memcmp(p, "dir", 3) == 0) {
2390
0
              archive_entry_set_symlink_type(entry,
2391
0
                       AE_SYMLINK_TYPE_DIRECTORY);
2392
0
            } else {
2393
0
              archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2394
0
                    "Unrecognized symlink type");
2395
0
              err = ARCHIVE_WARN;
2396
0
            }
2397
0
          } else {
2398
0
            archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2399
0
                  "Truncated tar archive "
2400
0
                  "detected while reading `symlinktype` attribute");
2401
0
            return (ARCHIVE_FATAL);
2402
0
          }
2403
0
        } else {
2404
0
          archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2405
0
                "symlink type is very long"
2406
0
                "(longest recognized value is 4 bytes, this is %d)",
2407
0
                (int)value_length);
2408
0
          err = ARCHIVE_WARN;
2409
0
        }
2410
0
        __archive_read_consume(a, value_length);
2411
0
        return (err);
2412
0
      }
2413
0
      else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
2414
0
        key_length -= 6;
2415
0
        key += 6;
2416
0
        if (value_length > xattr_limit) {
2417
0
          err = ARCHIVE_WARN;
2418
0
        } else {
2419
0
          p = __archive_read_ahead(a, value_length, &bytes_read);
2420
0
          if (p == NULL
2421
0
              || pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) {
2422
            /* TODO: Unable to parse xattr */
2423
0
            err = ARCHIVE_WARN;
2424
0
          }
2425
0
        }
2426
0
        __archive_read_consume(a, value_length);
2427
0
        return (err);
2428
0
      }
2429
0
    }
2430
0
    break;
2431
0
  case 'R':
2432
    /* GNU tar uses RHT.security header to store SELinux xattrs
2433
     * SCHILY.xattr.security.selinux == RHT.security.selinux */
2434
0
    if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) {
2435
0
      if (value_length > xattr_limit) {
2436
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2437
0
              "Ignoring unreasonably large security.selinux attribute:"
2438
0
              " %d > %d",
2439
0
              (int)value_length, (int)xattr_limit);
2440
        /* TODO: Should this be FAILED instead? */
2441
0
        err = ARCHIVE_WARN;
2442
0
      } else {
2443
0
        p = __archive_read_ahead(a, value_length, &bytes_read);
2444
0
        if (p == NULL
2445
0
            || pax_attribute_RHT_security_selinux(entry, p, value_length)) {
2446
          /* TODO: Unable to parse xattr */
2447
0
          err = ARCHIVE_WARN;
2448
0
        }
2449
0
      }
2450
0
      __archive_read_consume(a, value_length);
2451
0
      return (err);
2452
0
    }
2453
0
    break;
2454
0
  case 'S':
2455
    /* SCHILY.* extensions used by "star" archiver */
2456
0
    if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) {
2457
0
      key_length -= 7;
2458
0
      key += 7;
2459
2460
0
      if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) {
2461
0
        err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2462
0
                  ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
2463
        // TODO: Mark mode as set
2464
0
        return (err);
2465
0
      }
2466
0
      else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) {
2467
0
        err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2468
0
                  ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
2469
0
        return (err);
2470
0
      }
2471
0
      else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) {
2472
0
        err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2473
0
                  ARCHIVE_ENTRY_ACL_TYPE_NFS4);
2474
        // TODO: Mark mode as set
2475
0
        return (err);
2476
0
      }
2477
0
      else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) {
2478
0
        if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2479
0
          archive_entry_set_rdevmajor(entry, t);
2480
0
        }
2481
0
        return (err);
2482
0
      }
2483
0
      else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) {
2484
0
        if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2485
0
          archive_entry_set_rdevminor(entry, t);
2486
0
        }
2487
0
        return (err);
2488
0
      }
2489
0
      else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) {
2490
0
        if (value_length < fflags_limit) {
2491
0
          p = __archive_read_ahead(a, value_length, &bytes_read);
2492
0
          if (p != NULL) {
2493
0
            archive_entry_copy_fflags_text_len(entry, p, value_length);
2494
0
            err = ARCHIVE_OK;
2495
0
          } else {
2496
            /* Truncated archive */
2497
0
            err = ARCHIVE_FATAL;
2498
0
          }
2499
0
        } else {
2500
          /* Overlong fflags field */
2501
0
          err = ARCHIVE_WARN;
2502
0
        }
2503
0
        __archive_read_consume(a, value_length);
2504
0
        return (err);
2505
0
      }
2506
0
      else if (key_length == 3 && memcmp(key, "dev", 3) == 0) {
2507
0
        if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2508
0
          archive_entry_set_dev(entry, t);
2509
0
        }
2510
0
        return (err);
2511
0
      }
2512
0
      else if (key_length == 3 && memcmp(key, "ino", 3) == 0) {
2513
0
        if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2514
0
          archive_entry_set_ino(entry, t);
2515
0
        }
2516
0
        return (err);
2517
0
      }
2518
0
      else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) {
2519
0
        if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2520
0
          archive_entry_set_nlink(entry, t);
2521
0
        }
2522
0
        return (err);
2523
0
      }
2524
0
      else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
2525
0
        if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2526
0
          tar->realsize = t;
2527
0
          tar->realsize_override = 1;
2528
0
          archive_entry_set_size(entry, tar->realsize);
2529
0
        }
2530
0
        return (err);
2531
0
      }
2532
0
      else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
2533
0
        key_length -= 6;
2534
0
        key += 6;
2535
0
        if (value_length < xattr_limit) {
2536
0
          p = __archive_read_ahead(a, value_length, &bytes_read);
2537
0
          if (p == NULL
2538
0
              || pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) {
2539
            /* TODO: Unable to parse xattr */
2540
0
            err = ARCHIVE_WARN;
2541
0
          }
2542
0
        } else {
2543
0
          archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2544
0
                "Unreasonably large xattr: %d > %d",
2545
0
                (int)value_length, (int)xattr_limit);
2546
0
          err = ARCHIVE_WARN;
2547
0
        }
2548
0
        __archive_read_consume(a, value_length);
2549
0
        return (err);
2550
0
      }
2551
0
    }
2552
    /* SUN.* extensions from Solaris tar */
2553
0
    if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) {
2554
0
      key_length -= 4;
2555
0
      key += 4;
2556
2557
0
      if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) {
2558
        /* SUN.holesdata */
2559
0
        if (value_length < sparse_map_limit) {
2560
0
          p = __archive_read_ahead(a, value_length, &bytes_read);
2561
0
          if (p != NULL) {
2562
0
            err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length);
2563
0
            if (err < ARCHIVE_OK) {
2564
0
              archive_set_error(&a->archive,
2565
0
                    ARCHIVE_ERRNO_MISC,
2566
0
                    "Parse error: SUN.holesdata");
2567
0
            }
2568
0
          } else {
2569
0
            return (ARCHIVE_FATAL);
2570
0
          }
2571
0
        } else {
2572
0
          archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2573
0
                "Unreasonably large sparse map: %d > %d",
2574
0
                (int)value_length, (int)sparse_map_limit);
2575
0
          err = ARCHIVE_FAILED;
2576
0
        }
2577
0
        __archive_read_consume(a, value_length);
2578
0
        return (err);
2579
0
      }
2580
0
    }
2581
0
    break;
2582
0
  case 'a':
2583
0
    if (key_length == 5 && memcmp(key, "atime", 5) == 0) {
2584
0
      if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2585
0
        archive_entry_set_atime(entry, t, n);
2586
0
      }
2587
0
      return (err);
2588
0
    }
2589
0
    break;
2590
0
  case 'c':
2591
0
    if (key_length == 5 && memcmp(key, "ctime", 5) == 0) {
2592
0
      if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2593
0
        archive_entry_set_ctime(entry, t, n);
2594
0
      }
2595
0
      return (err);
2596
0
    } else if (key_length == 7 && memcmp(key, "charset", 7) == 0) {
2597
      /* TODO: Publish charset information in entry. */
2598
0
    } else if (key_length == 7 && memcmp(key, "comment", 7) == 0) {
2599
      /* TODO: Publish comment in entry. */
2600
0
    }
2601
0
    break;
2602
0
  case 'g':
2603
0
    if (key_length == 3 && memcmp(key, "gid", 3) == 0) {
2604
0
      if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2605
0
        archive_entry_set_gid(entry, t);
2606
0
      }
2607
0
      return (err);
2608
0
    } else if (key_length == 5 && memcmp(key, "gname", 5) == 0) {
2609
0
      if (value_length > guname_limit) {
2610
0
        *unconsumed += value_length;
2611
0
        err = ARCHIVE_WARN;
2612
0
      } else {
2613
0
        err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed);
2614
0
      }
2615
0
      return (err);
2616
0
    }
2617
0
    break;
2618
0
  case 'h':
2619
0
    if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) {
2620
0
      if (value_length < 64) {
2621
0
        p = __archive_read_ahead(a, value_length, &bytes_read);
2622
0
        if (p != NULL) {
2623
0
          if (value_length == 6
2624
0
              && memcmp(p, "BINARY", 6) == 0) {
2625
            /* Binary  mode. */
2626
0
            tar->pax_hdrcharset_utf8 = 0;
2627
0
            err = ARCHIVE_OK;
2628
0
          } else if (value_length == 23
2629
0
               && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) {
2630
0
            tar->pax_hdrcharset_utf8 = 1;
2631
0
            err = ARCHIVE_OK;
2632
0
          } else {
2633
            /* TODO: Unrecognized character set */
2634
0
            err  = ARCHIVE_WARN;
2635
0
          }
2636
0
        } else {
2637
0
          archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2638
0
                "Truncated tar archive "
2639
0
                "detected while reading hdrcharset attribute");
2640
0
          return (ARCHIVE_FATAL);
2641
0
        }
2642
0
      } else {
2643
0
        archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2644
0
              "hdrcharset attribute is unreasonably large (%d bytes)",
2645
0
              (int)value_length);
2646
0
        err = ARCHIVE_WARN;
2647
0
      }
2648
0
      __archive_read_consume(a, value_length);
2649
0
      return (err);
2650
0
    }
2651
0
    break;
2652
0
  case 'l':
2653
    /* pax interchange doesn't distinguish hardlink vs. symlink. */
2654
0
    if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) {
2655
0
      if (value_length > pathname_limit) {
2656
0
        *unconsumed += value_length;
2657
0
        err = ARCHIVE_WARN;
2658
0
      } else {
2659
0
        err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed);
2660
0
      }
2661
0
      return (err);
2662
0
    }
2663
0
    break;
2664
0
  case 'm':
2665
0
    if (key_length == 5 && memcmp(key, "mtime", 5) == 0) {
2666
0
      if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2667
0
        archive_entry_set_mtime(entry, t, n);
2668
0
      }
2669
0
      return (err);
2670
0
    }
2671
0
    break;
2672
0
  case 'p':
2673
0
    if (key_length == 4 && memcmp(key, "path", 4) == 0) {
2674
0
      if (value_length > pathname_limit) {
2675
0
        *unconsumed += value_length;
2676
0
        err = ARCHIVE_WARN;
2677
0
      } else {
2678
0
        err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed);
2679
0
      }
2680
0
      return (err);
2681
0
    }
2682
0
    break;
2683
0
  case 'r':
2684
    /* POSIX has reserved 'realtime.*' */
2685
0
    break;
2686
0
  case 's':
2687
    /* POSIX has reserved 'security.*' */
2688
    /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
2689
0
    if (key_length == 4 && memcmp(key, "size", 4) == 0) {
2690
      /* "size" is the size of the data in the entry. */
2691
0
      if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2692
0
        tar->entry_bytes_remaining = t;
2693
        /*
2694
         * The "size" pax header keyword always overrides the
2695
         * "size" field in the tar header.
2696
         * GNU.sparse.realsize, GNU.sparse.size and
2697
         * SCHILY.realsize override this value.
2698
         */
2699
0
        if (!tar->realsize_override) {
2700
0
          archive_entry_set_size(entry,
2701
0
                     tar->entry_bytes_remaining);
2702
0
          tar->realsize
2703
0
            = tar->entry_bytes_remaining;
2704
0
        }
2705
0
      }
2706
0
      else if (t == INT64_MAX) {
2707
        /* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */
2708
0
        tar->entry_bytes_remaining = 0;
2709
0
        archive_set_error(&a->archive,
2710
0
            ARCHIVE_ERRNO_MISC,
2711
0
            "Tar size attribute overflow");
2712
0
        return (ARCHIVE_FATAL);
2713
0
      }
2714
0
      return (err);
2715
0
    }
2716
0
    break;
2717
0
  case 'u':
2718
0
    if (key_length == 3 && memcmp(key, "uid", 3) == 0) {
2719
0
      if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2720
0
        archive_entry_set_uid(entry, t);
2721
0
      }
2722
0
      return (err);
2723
0
    } else if (key_length == 5 && memcmp(key, "uname", 5) == 0) {
2724
0
      if (value_length > guname_limit) {
2725
0
        *unconsumed += value_length;
2726
0
        err = ARCHIVE_WARN;
2727
0
      } else {
2728
0
        err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed);
2729
0
      }
2730
0
      return (err);
2731
0
    }
2732
0
    break;
2733
0
  }
2734
2735
  /* Unrecognized key, just skip the entire value. */
2736
0
  __archive_read_consume(a, value_length);
2737
0
  return (err);
2738
0
}
2739
2740
2741
2742
/*
2743
 * parse a decimal time value, which may include a fractional portion
2744
 */
2745
static void
2746
pax_time(const char *p, size_t length, int64_t *ps, long *pn)
2747
0
{
2748
0
  char digit;
2749
0
  int64_t s;
2750
0
  unsigned long l;
2751
0
  int sign;
2752
0
  int64_t limit, last_digit_limit;
2753
2754
0
  limit = INT64_MAX / 10;
2755
0
  last_digit_limit = INT64_MAX % 10;
2756
2757
0
  if (length <= 0) {
2758
0
    *ps = 0;
2759
0
    return;
2760
0
  }
2761
0
  s = 0;
2762
0
  sign = 1;
2763
0
  if (*p == '-') {
2764
0
    sign = -1;
2765
0
    p++;
2766
0
    length--;
2767
0
  }
2768
0
  while (length > 0 && *p >= '0' && *p <= '9') {
2769
0
    digit = *p - '0';
2770
0
    if (s > limit ||
2771
0
        (s == limit && digit > last_digit_limit)) {
2772
0
      s = INT64_MAX;
2773
0
      break;
2774
0
    }
2775
0
    s = (s * 10) + digit;
2776
0
    ++p;
2777
0
    --length;
2778
0
  }
2779
2780
0
  *ps = s * sign;
2781
2782
  /* Calculate nanoseconds. */
2783
0
  *pn = 0;
2784
2785
0
  if (length <= 0 || *p != '.')
2786
0
    return;
2787
2788
0
  l = 100000000UL;
2789
0
  do {
2790
0
    ++p;
2791
0
    --length;
2792
0
    if (length > 0 && *p >= '0' && *p <= '9')
2793
0
      *pn += (*p - '0') * l;
2794
0
    else
2795
0
      break;
2796
0
  } while (l /= 10);
2797
0
}
2798
2799
/*
2800
 * Parse GNU tar header
2801
 */
2802
static int
2803
header_gnutar(struct archive_read *a, struct tar *tar,
2804
    struct archive_entry *entry, const void *h, size_t *unconsumed)
2805
1
{
2806
1
  const struct archive_entry_header_gnutar *header;
2807
1
  int64_t t;
2808
1
  int err = ARCHIVE_OK;
2809
2810
  /*
2811
   * GNU header is like POSIX ustar, except 'prefix' is
2812
   * replaced with some other fields. This also means the
2813
   * filename is stored as in old-style archives.
2814
   */
2815
2816
  /* Grab fields common to all tar variants. */
2817
1
  err = header_common(a, tar, entry, h);
2818
1
  if (err == ARCHIVE_FATAL)
2819
0
    return (err);
2820
2821
  /* Copy filename over (to ensure null termination). */
2822
1
  header = (const struct archive_entry_header_gnutar *)h;
2823
1
  const char *existing_pathname = archive_entry_pathname(entry);
2824
1
  if (existing_pathname == NULL || existing_pathname[0] == '\0') {
2825
1
    if (archive_entry_copy_pathname_l(entry,
2826
1
        header->name, sizeof(header->name), tar->sconv) != 0) {
2827
0
      err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2828
0
      if (err == ARCHIVE_FATAL)
2829
0
        return (err);
2830
0
    }
2831
1
  }
2832
2833
  /* Fields common to ustar and GNU */
2834
  /* XXX Can the following be factored out since it's common
2835
   * to ustar and gnu tar?  Is it okay to move it down into
2836
   * header_common, perhaps?  */
2837
1
  const char *existing_uname = archive_entry_uname(entry);
2838
1
  if (existing_uname == NULL || existing_uname[0] == '\0') {
2839
1
    if (archive_entry_copy_uname_l(entry,
2840
1
        header->uname, sizeof(header->uname), tar->sconv) != 0) {
2841
0
      err = set_conversion_failed_error(a, tar->sconv, "Uname");
2842
0
      if (err == ARCHIVE_FATAL)
2843
0
        return (err);
2844
0
    }
2845
1
  }
2846
2847
1
  const char *existing_gname = archive_entry_gname(entry);
2848
1
  if (existing_gname == NULL || existing_gname[0] == '\0') {
2849
1
    if (archive_entry_copy_gname_l(entry,
2850
1
        header->gname, sizeof(header->gname), tar->sconv) != 0) {
2851
0
      err = set_conversion_failed_error(a, tar->sconv, "Gname");
2852
0
      if (err == ARCHIVE_FATAL)
2853
0
        return (err);
2854
0
    }
2855
1
  }
2856
2857
  /* Parse out device numbers only for char and block specials */
2858
1
  if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2859
0
    if (!archive_entry_rdev_is_set(entry)) {
2860
0
      archive_entry_set_rdevmajor(entry, (dev_t)
2861
0
          tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2862
0
      archive_entry_set_rdevminor(entry, (dev_t)
2863
0
          tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2864
0
    }
2865
1
  } else {
2866
1
    archive_entry_set_rdev(entry, 0);
2867
1
  }
2868
2869
1
  tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2870
2871
  /* Grab GNU-specific fields. */
2872
1
  if (!archive_entry_atime_is_set(entry)) {
2873
1
    t = tar_atol(header->atime, sizeof(header->atime));
2874
1
    if (t > 0)
2875
0
      archive_entry_set_atime(entry, t, 0);
2876
1
  }
2877
1
  if (!archive_entry_ctime_is_set(entry)) {
2878
1
    t = tar_atol(header->ctime, sizeof(header->ctime));
2879
1
    if (t > 0)
2880
0
      archive_entry_set_ctime(entry, t, 0);
2881
1
  }
2882
2883
1
  if (header->realsize[0] != 0) {
2884
0
    tar->realsize
2885
0
        = tar_atol(header->realsize, sizeof(header->realsize));
2886
0
    archive_entry_set_size(entry, tar->realsize);
2887
0
    tar->realsize_override = 1;
2888
0
  }
2889
2890
1
  if (header->sparse[0].offset[0] != 0) {
2891
0
    if (gnu_sparse_old_read(a, tar, header, unconsumed)
2892
0
        != ARCHIVE_OK)
2893
0
      return (ARCHIVE_FATAL);
2894
1
  } else {
2895
1
    if (header->isextended[0] != 0) {
2896
      /* XXX WTF? XXX */
2897
0
    }
2898
1
  }
2899
2900
1
  return (err);
2901
1
}
2902
2903
static int
2904
gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
2905
    int64_t offset, int64_t remaining)
2906
290k
{
2907
290k
  struct sparse_block *p;
2908
2909
290k
  p = (struct sparse_block *)calloc(1, sizeof(*p));
2910
290k
  if (p == NULL) {
2911
0
    archive_set_error(&a->archive, ENOMEM, "Out of memory");
2912
0
    return (ARCHIVE_FATAL);
2913
0
  }
2914
290k
  if (tar->sparse_last != NULL)
2915
0
    tar->sparse_last->next = p;
2916
290k
  else
2917
290k
    tar->sparse_list = p;
2918
290k
  tar->sparse_last = p;
2919
290k
  if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) {
2920
0
    archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
2921
0
    return (ARCHIVE_FATAL);
2922
0
  }
2923
290k
  p->offset = offset;
2924
290k
  p->remaining = remaining;
2925
290k
  return (ARCHIVE_OK);
2926
290k
}
2927
2928
static void
2929
gnu_clear_sparse_list(struct tar *tar)
2930
291k
{
2931
291k
  struct sparse_block *p;
2932
2933
582k
  while (tar->sparse_list != NULL) {
2934
290k
    p = tar->sparse_list;
2935
290k
    tar->sparse_list = p->next;
2936
290k
    free(p);
2937
290k
  }
2938
291k
  tar->sparse_last = NULL;
2939
291k
}
2940
2941
/*
2942
 * GNU tar old-format sparse data.
2943
 *
2944
 * GNU old-format sparse data is stored in a fixed-field
2945
 * format.  Offset/size values are 11-byte octal fields (same
2946
 * format as 'size' field in ustart header).  These are
2947
 * stored in the header, allocating subsequent header blocks
2948
 * as needed.  Extending the header in this way is a pretty
2949
 * severe POSIX violation; this design has earned GNU tar a
2950
 * lot of criticism.
2951
 */
2952
2953
static int
2954
gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
2955
    const struct archive_entry_header_gnutar *header, size_t *unconsumed)
2956
0
{
2957
0
  ssize_t bytes_read;
2958
0
  const void *data;
2959
0
  struct extended {
2960
0
    struct gnu_sparse sparse[21];
2961
0
    char  isextended[1];
2962
0
    char  padding[7];
2963
0
  };
2964
0
  const struct extended *ext;
2965
2966
0
  if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
2967
0
    return (ARCHIVE_FATAL);
2968
0
  if (header->isextended[0] == 0)
2969
0
    return (ARCHIVE_OK);
2970
2971
0
  do {
2972
0
    tar_flush_unconsumed(a, unconsumed);
2973
0
    data = __archive_read_ahead(a, 512, &bytes_read);
2974
0
    if (bytes_read < 0)
2975
0
      return (ARCHIVE_FATAL);
2976
0
    if (bytes_read < 512) {
2977
0
      archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2978
0
          "Truncated tar archive "
2979
0
          "detected while reading sparse file data");
2980
0
      return (ARCHIVE_FATAL);
2981
0
    }
2982
0
    *unconsumed = 512;
2983
0
    ext = (const struct extended *)data;
2984
0
    if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
2985
0
      return (ARCHIVE_FATAL);
2986
0
  } while (ext->isextended[0] != 0);
2987
0
  if (tar->sparse_list != NULL)
2988
0
    tar->entry_offset = tar->sparse_list->offset;
2989
0
  return (ARCHIVE_OK);
2990
0
}
2991
2992
static int
2993
gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
2994
    const struct gnu_sparse *sparse, int length)
2995
0
{
2996
0
  while (length > 0 && sparse->offset[0] != 0) {
2997
0
    if (gnu_add_sparse_entry(a, tar,
2998
0
        tar_atol(sparse->offset, sizeof(sparse->offset)),
2999
0
        tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
3000
0
        != ARCHIVE_OK)
3001
0
      return (ARCHIVE_FATAL);
3002
0
    sparse++;
3003
0
    length--;
3004
0
  }
3005
0
  return (ARCHIVE_OK);
3006
0
}
3007
3008
/*
3009
 * GNU tar sparse format 0.0
3010
 *
3011
 * Beginning with GNU tar 1.15, sparse files are stored using
3012
 * information in the pax extended header.  The GNU tar maintainers
3013
 * have gone through a number of variations in the process of working
3014
 * out this scheme; fortunately, they're all numbered.
3015
 *
3016
 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
3017
 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
3018
 * store offset/size for each block.  The repeated instances of these
3019
 * latter fields violate the pax specification (which frowns on
3020
 * duplicate keys), so this format was quickly replaced.
3021
 */
3022
3023
/*
3024
 * GNU tar sparse format 0.1
3025
 *
3026
 * This version replaced the offset/numbytes attributes with
3027
 * a single "map" attribute that stored a list of integers.  This
3028
 * format had two problems: First, the "map" attribute could be very
3029
 * long, which caused problems for some implementations.  More
3030
 * importantly, the sparse data was lost when extracted by archivers
3031
 * that didn't recognize this extension.
3032
 */
3033
static int
3034
gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length)
3035
0
{
3036
0
  const char *e;
3037
0
  int64_t offset = -1, size = -1;
3038
3039
0
  for (;;) {
3040
0
    e = p;
3041
0
    while (length > 0 && *e != ',') {
3042
0
      if (*e < '0' || *e > '9')
3043
0
        return (ARCHIVE_WARN);
3044
0
      e++;
3045
0
      length--;
3046
0
    }
3047
0
    if (offset < 0) {
3048
0
      offset = tar_atol10(p, e - p);
3049
0
      if (offset < 0)
3050
0
        return (ARCHIVE_WARN);
3051
0
    } else {
3052
0
      size = tar_atol10(p, e - p);
3053
0
      if (size < 0)
3054
0
        return (ARCHIVE_WARN);
3055
0
      if (gnu_add_sparse_entry(a, tar, offset, size)
3056
0
          != ARCHIVE_OK)
3057
0
        return (ARCHIVE_FATAL);
3058
0
      offset = -1;
3059
0
    }
3060
0
    if (length == 0)
3061
0
      return (ARCHIVE_OK);
3062
0
    p = e + 1;
3063
0
    length--;
3064
0
  }
3065
0
}
3066
3067
/*
3068
 * GNU tar sparse format 1.0
3069
 *
3070
 * The idea: The offset/size data is stored as a series of base-10
3071
 * ASCII numbers prepended to the file data, so that dearchivers that
3072
 * don't support this format will extract the block map along with the
3073
 * data and a separate post-process can restore the sparseness.
3074
 *
3075
 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
3076
 * padding to the body of the file when using this format.  GNU tar
3077
 * 1.17 corrected this bug without bumping the version number, so
3078
 * it's not possible to support both variants.  This code supports
3079
 * the later variant at the expense of not supporting the former.
3080
 *
3081
 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
3082
 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
3083
 */
3084
3085
/*
3086
 * Read the next line from the input, and parse it as a decimal
3087
 * integer followed by '\n'.  Returns positive integer value or
3088
 * negative on error.
3089
 */
3090
static int64_t
3091
gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
3092
    int64_t *remaining, size_t *unconsumed)
3093
0
{
3094
0
  int64_t l, limit, last_digit_limit;
3095
0
  const char *p;
3096
0
  ssize_t bytes_read;
3097
0
  int base, digit;
3098
3099
0
  base = 10;
3100
0
  limit = INT64_MAX / base;
3101
0
  last_digit_limit = INT64_MAX % base;
3102
3103
  /*
3104
   * Skip any lines starting with '#'; GNU tar specs
3105
   * don't require this, but they should.
3106
   */
3107
0
  do {
3108
0
    bytes_read = readline(a, tar, &p,
3109
0
      (ssize_t)tar_min(*remaining, 100), unconsumed);
3110
0
    if (bytes_read <= 0)
3111
0
      return (ARCHIVE_FATAL);
3112
0
    *remaining -= bytes_read;
3113
0
  } while (p[0] == '#');
3114
3115
0
  l = 0;
3116
0
  while (bytes_read > 0) {
3117
0
    if (*p == '\n')
3118
0
      return (l);
3119
0
    if (*p < '0' || *p >= '0' + base)
3120
0
      return (ARCHIVE_WARN);
3121
0
    digit = *p - '0';
3122
0
    if (l > limit || (l == limit && digit > last_digit_limit))
3123
0
      l = INT64_MAX; /* Truncate on overflow. */
3124
0
    else
3125
0
      l = (l * base) + digit;
3126
0
    p++;
3127
0
    bytes_read--;
3128
0
  }
3129
  /* TODO: Error message. */
3130
0
  return (ARCHIVE_WARN);
3131
0
}
3132
3133
/*
3134
 * Returns length (in bytes) of the sparse data description
3135
 * that was read.
3136
 */
3137
static ssize_t
3138
gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
3139
0
{
3140
0
  ssize_t bytes_read;
3141
0
  int entries;
3142
0
  int64_t offset, size, to_skip, remaining;
3143
3144
  /* Clear out the existing sparse list. */
3145
0
  gnu_clear_sparse_list(tar);
3146
3147
0
  remaining = tar->entry_bytes_remaining;
3148
3149
  /* Parse entries. */
3150
0
  entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3151
0
  if (entries < 0)
3152
0
    return (ARCHIVE_FATAL);
3153
  /* Parse the individual entries. */
3154
0
  while (entries-- > 0) {
3155
    /* Parse offset/size */
3156
0
    offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3157
0
    if (offset < 0)
3158
0
      return (ARCHIVE_FATAL);
3159
0
    size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3160
0
    if (size < 0)
3161
0
      return (ARCHIVE_FATAL);
3162
    /* Add a new sparse entry. */
3163
0
    if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
3164
0
      return (ARCHIVE_FATAL);
3165
0
  }
3166
  /* Skip rest of block... */
3167
0
  tar_flush_unconsumed(a, unconsumed);
3168
0
  bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
3169
0
  to_skip = 0x1ff & -bytes_read;
3170
  /* Fail if tar->entry_bytes_remaing would get negative */
3171
0
  if (to_skip > remaining)
3172
0
    return (ARCHIVE_FATAL);
3173
0
  if (to_skip != __archive_read_consume(a, to_skip))
3174
0
    return (ARCHIVE_FATAL);
3175
0
  return ((ssize_t)(bytes_read + to_skip));
3176
0
}
3177
3178
/*
3179
 * Solaris pax extension for a sparse file. This is recorded with the
3180
 * data and hole pairs. The way recording sparse information by Solaris'
3181
 * pax simply indicates where data and sparse are, so the stored contents
3182
 * consist of both data and hole.
3183
 */
3184
static int
3185
pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar,
3186
  struct archive_entry *entry, const char *p, size_t length)
3187
0
{
3188
0
  const char *e;
3189
0
  int64_t start, end;
3190
0
  int hole = 1;
3191
3192
0
  (void)entry; /* UNUSED */
3193
3194
0
  end = 0;
3195
0
  if (length <= 0)
3196
0
    return (ARCHIVE_WARN);
3197
0
  if (*p == ' ') {
3198
0
    p++;
3199
0
    length--;
3200
0
  } else {
3201
0
    return (ARCHIVE_WARN);
3202
0
  }
3203
0
  for (;;) {
3204
0
    e = p;
3205
0
    while (length > 0 && *e != ' ') {
3206
0
      if (*e < '0' || *e > '9')
3207
0
        return (ARCHIVE_WARN);
3208
0
      e++;
3209
0
      length--;
3210
0
    }
3211
0
    start = end;
3212
0
    end = tar_atol10(p, e - p);
3213
0
    if (end < 0)
3214
0
      return (ARCHIVE_WARN);
3215
0
    if (start < end) {
3216
0
      if (gnu_add_sparse_entry(a, tar, start,
3217
0
          end - start) != ARCHIVE_OK)
3218
0
        return (ARCHIVE_FATAL);
3219
0
      tar->sparse_last->hole = hole;
3220
0
    }
3221
0
    if (length == 0 || *e == '\n') {
3222
0
      if (length == 0 && *e == '\n') {
3223
0
        return (ARCHIVE_OK);
3224
0
      } else {
3225
0
        return (ARCHIVE_WARN);
3226
0
      }
3227
0
    }
3228
0
    p = e + 1;
3229
0
    length--;
3230
0
    hole = hole == 0;
3231
0
  }
3232
0
}
3233
3234
/*-
3235
 * Convert text->integer.
3236
 *
3237
 * Traditional tar formats (including POSIX) specify base-8 for
3238
 * all of the standard numeric fields.  This is a significant limitation
3239
 * in practice:
3240
 *   = file size is limited to 8GB
3241
 *   = rdevmajor and rdevminor are limited to 21 bits
3242
 *   = uid/gid are limited to 21 bits
3243
 *
3244
 * There are two workarounds for this:
3245
 *   = pax extended headers, which use variable-length string fields
3246
 *   = GNU tar and STAR both allow either base-8 or base-256 in
3247
 *      most fields.  The high bit is set to indicate base-256.
3248
 *
3249
 * On read, this implementation supports both extensions.
3250
 */
3251
static int64_t
3252
tar_atol(const char *p, size_t char_cnt)
3253
576
{
3254
  /*
3255
   * Technically, GNU tar considers a field to be in base-256
3256
   * only if the first byte is 0xff or 0x80.
3257
   */
3258
576
  if (*p & 0x80)
3259
0
    return (tar_atol256(p, char_cnt));
3260
576
  return (tar_atol8(p, char_cnt));
3261
576
}
3262
3263
/*
3264
 * Note that this implementation does not (and should not!) obey
3265
 * locale settings; you cannot simply substitute strtol here, since
3266
 * it does obey locale.
3267
 */
3268
static int64_t
3269
tar_atol_base_n(const char *p, size_t char_cnt, int base)
3270
576
{
3271
576
  int64_t l, maxval, limit, last_digit_limit;
3272
576
  int digit, sign;
3273
3274
576
  maxval = INT64_MAX;
3275
576
  limit = INT64_MAX / base;
3276
576
  last_digit_limit = INT64_MAX % base;
3277
3278
  /* the pointer will not be dereferenced if char_cnt is zero
3279
   * due to the way the && operator is evaluated.
3280
   */
3281
2.17k
  while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
3282
1.60k
    p++;
3283
1.60k
    char_cnt--;
3284
1.60k
  }
3285
3286
576
  sign = 1;
3287
576
  if (char_cnt != 0 && *p == '-') {
3288
0
    sign = -1;
3289
0
    p++;
3290
0
    char_cnt--;
3291
3292
0
    maxval = INT64_MIN;
3293
0
    limit = -(INT64_MIN / base);
3294
0
    last_digit_limit = -(INT64_MIN % base);
3295
0
  }
3296
3297
576
  l = 0;
3298
576
  if (char_cnt != 0) {
3299
376
    digit = *p - '0';
3300
606
    while (digit >= 0 && digit < base  && char_cnt != 0) {
3301
230
      if (l>limit || (l == limit && digit >= last_digit_limit)) {
3302
0
        return maxval; /* Truncate on overflow. */
3303
0
      }
3304
230
      l = (l * base) + digit;
3305
230
      digit = *++p - '0';
3306
230
      char_cnt--;
3307
230
    }
3308
376
  }
3309
576
  return (sign < 0) ? -l : l;
3310
576
}
3311
3312
static int64_t
3313
tar_atol8(const char *p, size_t char_cnt)
3314
576
{
3315
576
  return tar_atol_base_n(p, char_cnt, 8);
3316
576
}
3317
3318
static int64_t
3319
tar_atol10(const char *p, size_t char_cnt)
3320
0
{
3321
0
  return tar_atol_base_n(p, char_cnt, 10);
3322
0
}
3323
3324
/*
3325
 * Parse a base-256 integer.  This is just a variable-length
3326
 * twos-complement signed binary value in big-endian order, except
3327
 * that the high-order bit is ignored.  The values here can be up to
3328
 * 12 bytes, so we need to be careful about overflowing 64-bit
3329
 * (8-byte) integers.
3330
 *
3331
 * This code unashamedly assumes that the local machine uses 8-bit
3332
 * bytes and twos-complement arithmetic.
3333
 */
3334
static int64_t
3335
tar_atol256(const char *_p, size_t char_cnt)
3336
0
{
3337
0
  uint64_t l;
3338
0
  const unsigned char *p = (const unsigned char *)_p;
3339
0
  unsigned char c, neg;
3340
3341
  /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
3342
0
  c = *p;
3343
0
  if (c & 0x40) {
3344
0
    neg = 0xff;
3345
0
    c |= 0x80;
3346
0
    l = ~ARCHIVE_LITERAL_ULL(0);
3347
0
  } else {
3348
0
    neg = 0;
3349
0
    c &= 0x7f;
3350
0
    l = 0;
3351
0
  }
3352
3353
  /* If more than 8 bytes, check that we can ignore
3354
   * high-order bits without overflow. */
3355
0
  while (char_cnt > sizeof(int64_t)) {
3356
0
    --char_cnt;
3357
0
    if (c != neg)
3358
0
      return neg ? INT64_MIN : INT64_MAX;
3359
0
    c = *++p;
3360
0
  }
3361
3362
  /* c is first byte that fits; if sign mismatch, return overflow */
3363
0
  if ((c ^ neg) & 0x80) {
3364
0
    return neg ? INT64_MIN : INT64_MAX;
3365
0
  }
3366
3367
  /* Accumulate remaining bytes. */
3368
0
  while (--char_cnt > 0) {
3369
0
    l = (l << 8) | c;
3370
0
    c = *++p;
3371
0
  }
3372
0
  l = (l << 8) | c;
3373
  /* Return signed twos-complement value. */
3374
0
  return (int64_t)(l);
3375
0
}
3376
3377
/*
3378
 * Returns length of line (including trailing newline)
3379
 * or negative on error.  'start' argument is updated to
3380
 * point to first character of line.  This avoids copying
3381
 * when possible.
3382
 */
3383
static ssize_t
3384
readline(struct archive_read *a, struct tar *tar, const char **start,
3385
    ssize_t limit, size_t *unconsumed)
3386
0
{
3387
0
  ssize_t bytes_read;
3388
0
  ssize_t total_size = 0;
3389
0
  const void *t;
3390
0
  const char *s;
3391
0
  void *p;
3392
3393
0
  tar_flush_unconsumed(a, unconsumed);
3394
3395
0
  t = __archive_read_ahead(a, 1, &bytes_read);
3396
0
  if (bytes_read <= 0)
3397
0
    return (ARCHIVE_FATAL);
3398
0
  s = t;  /* Start of line? */
3399
0
  p = memchr(t, '\n', bytes_read);
3400
  /* If we found '\n' in the read buffer, return pointer to that. */
3401
0
  if (p != NULL) {
3402
0
    bytes_read = 1 + ((const char *)p) - s;
3403
0
    if (bytes_read > limit) {
3404
0
      archive_set_error(&a->archive,
3405
0
          ARCHIVE_ERRNO_FILE_FORMAT,
3406
0
          "Line too long");
3407
0
      return (ARCHIVE_FATAL);
3408
0
    }
3409
0
    *unconsumed = bytes_read;
3410
0
    *start = s;
3411
0
    return (bytes_read);
3412
0
  }
3413
0
  *unconsumed = bytes_read;
3414
  /* Otherwise, we need to accumulate in a line buffer. */
3415
0
  for (;;) {
3416
0
    if (total_size + bytes_read > limit) {
3417
0
      archive_set_error(&a->archive,
3418
0
          ARCHIVE_ERRNO_FILE_FORMAT,
3419
0
          "Line too long");
3420
0
      return (ARCHIVE_FATAL);
3421
0
    }
3422
0
    if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
3423
0
      archive_set_error(&a->archive, ENOMEM,
3424
0
          "Can't allocate working buffer");
3425
0
      return (ARCHIVE_FATAL);
3426
0
    }
3427
0
    memcpy(tar->line.s + total_size, t, bytes_read);
3428
0
    tar_flush_unconsumed(a, unconsumed);
3429
0
    total_size += bytes_read;
3430
    /* If we found '\n', clean up and return. */
3431
0
    if (p != NULL) {
3432
0
      *start = tar->line.s;
3433
0
      return (total_size);
3434
0
    }
3435
    /* Read some more. */
3436
0
    t = __archive_read_ahead(a, 1, &bytes_read);
3437
0
    if (bytes_read <= 0)
3438
0
      return (ARCHIVE_FATAL);
3439
0
    s = t;  /* Start of line? */
3440
0
    p = memchr(t, '\n', bytes_read);
3441
    /* If we found '\n', trim the read. */
3442
0
    if (p != NULL) {
3443
0
      bytes_read = 1 + ((const char *)p) - s;
3444
0
    }
3445
0
    *unconsumed = bytes_read;
3446
0
  }
3447
0
}
3448
3449
/*
3450
 * base64_decode - Base64 decode
3451
 *
3452
 * This accepts most variations of base-64 encoding, including:
3453
 *    * with or without line breaks
3454
 *    * with or without the final group padded with '=' or '_' characters
3455
 * (The most economical Base-64 variant does not pad the last group and
3456
 * omits line breaks; RFC1341 used for MIME requires both.)
3457
 */
3458
static char *
3459
base64_decode(const char *s, size_t len, size_t *out_len)
3460
0
{
3461
0
  static const unsigned char digits[64] = {
3462
0
    'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
3463
0
    'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
3464
0
    'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
3465
0
    'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
3466
0
    '4','5','6','7','8','9','+','/' };
3467
0
  static unsigned char decode_table[128];
3468
0
  char *out, *d;
3469
0
  const unsigned char *src = (const unsigned char *)s;
3470
3471
  /* If the decode table is not yet initialized, prepare it. */
3472
0
  if (decode_table[digits[1]] != 1) {
3473
0
    unsigned i;
3474
0
    memset(decode_table, 0xff, sizeof(decode_table));
3475
0
    for (i = 0; i < sizeof(digits); i++)
3476
0
      decode_table[digits[i]] = i;
3477
0
  }
3478
3479
  /* Allocate enough space to hold the entire output. */
3480
  /* Note that we may not use all of this... */
3481
0
  out = (char *)malloc(len - len / 4 + 1);
3482
0
  if (out == NULL) {
3483
0
    *out_len = 0;
3484
0
    return (NULL);
3485
0
  }
3486
0
  d = out;
3487
3488
0
  while (len > 0) {
3489
    /* Collect the next group of (up to) four characters. */
3490
0
    int v = 0;
3491
0
    int group_size = 0;
3492
0
    while (group_size < 4 && len > 0) {
3493
      /* '=' or '_' padding indicates final group. */
3494
0
      if (*src == '=' || *src == '_') {
3495
0
        len = 0;
3496
0
        break;
3497
0
      }
3498
      /* Skip illegal characters (including line breaks) */
3499
0
      if (*src > 127 || *src < 32
3500
0
          || decode_table[*src] == 0xff) {
3501
0
        len--;
3502
0
        src++;
3503
0
        continue;
3504
0
      }
3505
0
      v <<= 6;
3506
0
      v |= decode_table[*src++];
3507
0
      len --;
3508
0
      group_size++;
3509
0
    }
3510
    /* Align a short group properly. */
3511
0
    v <<= 6 * (4 - group_size);
3512
    /* Unpack the group we just collected. */
3513
0
    switch (group_size) {
3514
0
    case 4: d[2] = v & 0xff;
3515
      /* FALLTHROUGH */
3516
0
    case 3: d[1] = (v >> 8) & 0xff;
3517
      /* FALLTHROUGH */
3518
0
    case 2: d[0] = (v >> 16) & 0xff;
3519
0
      break;
3520
0
    case 1: /* this is invalid! */
3521
0
      break;
3522
0
    }
3523
0
    d += group_size * 3 / 4;
3524
0
  }
3525
3526
0
  *out_len = d - out;
3527
0
  return (out);
3528
0
}
3529
3530
static char *
3531
url_decode(const char *in, size_t length)
3532
0
{
3533
0
  char *out, *d;
3534
0
  const char *s;
3535
3536
0
  out = (char *)malloc(length + 1);
3537
0
  if (out == NULL)
3538
0
    return (NULL);
3539
0
  for (s = in, d = out; length > 0 && *s != '\0'; ) {
3540
0
    if (s[0] == '%' && length > 2) {
3541
      /* Try to convert % escape */
3542
0
      int digit1 = tohex(s[1]);
3543
0
      int digit2 = tohex(s[2]);
3544
0
      if (digit1 >= 0 && digit2 >= 0) {
3545
        /* Looks good, consume three chars */
3546
0
        s += 3;
3547
0
        length -= 3;
3548
        /* Convert output */
3549
0
        *d++ = ((digit1 << 4) | digit2);
3550
0
        continue;
3551
0
      }
3552
      /* Else fall through and treat '%' as normal char */
3553
0
    }
3554
0
    *d++ = *s++;
3555
0
    --length;
3556
0
  }
3557
0
  *d = '\0';
3558
0
  return (out);
3559
0
}
3560
3561
static int
3562
tohex(int c)
3563
0
{
3564
0
  if (c >= '0' && c <= '9')
3565
0
    return (c - '0');
3566
0
  else if (c >= 'A' && c <= 'F')
3567
0
    return (c - 'A' + 10);
3568
0
  else if (c >= 'a' && c <= 'f')
3569
0
    return (c - 'a' + 10);
3570
0
  else
3571
0
    return (-1);
3572
0
}