/src/libarchive/libarchive/archive_read_support_format_tar.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * Copyright (c) 2003-2023 Tim Kientzle |
3 | | * Copyright (c) 2011-2012 Michihiro NAKAJIMA |
4 | | * Copyright (c) 2016 Martin Matuska |
5 | | * All rights reserved. |
6 | | * |
7 | | * Redistribution and use in source and binary forms, with or without |
8 | | * modification, are permitted provided that the following conditions |
9 | | * are met: |
10 | | * 1. Redistributions of source code must retain the above copyright |
11 | | * notice, this list of conditions and the following disclaimer. |
12 | | * 2. Redistributions in binary form must reproduce the above copyright |
13 | | * notice, this list of conditions and the following disclaimer in the |
14 | | * documentation and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR |
17 | | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
18 | | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
19 | | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, |
20 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
21 | | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
22 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
23 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
25 | | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "archive_platform.h" |
29 | | |
30 | | #ifdef HAVE_ERRNO_H |
31 | | #include <errno.h> |
32 | | #endif |
33 | | #include <stddef.h> |
34 | | #ifdef HAVE_STDLIB_H |
35 | | #include <stdlib.h> |
36 | | #endif |
37 | | #ifdef HAVE_STRING_H |
38 | | #include <string.h> |
39 | | #endif |
40 | | |
41 | | #include "archive.h" |
42 | | #include "archive_acl_private.h" /* For ACL parsing routines. */ |
43 | | #include "archive_entry.h" |
44 | | #include "archive_entry_locale.h" |
45 | | #include "archive_private.h" |
46 | | #include "archive_read_private.h" |
47 | | |
48 | 0 | #define tar_min(a,b) ((a) < (b) ? (a) : (b)) |
49 | | |
50 | | /* |
51 | | * Layout of POSIX 'ustar' tar header. |
52 | | */ |
53 | | struct archive_entry_header_ustar { |
54 | | char name[100]; |
55 | | char mode[8]; |
56 | | char uid[8]; |
57 | | char gid[8]; |
58 | | char size[12]; |
59 | | char mtime[12]; |
60 | | char checksum[8]; |
61 | | char typeflag[1]; |
62 | | char linkname[100]; /* "old format" header ends here */ |
63 | | char magic[6]; /* For POSIX: "ustar\0" */ |
64 | | char version[2]; /* For POSIX: "00" */ |
65 | | char uname[32]; |
66 | | char gname[32]; |
67 | | char rdevmajor[8]; |
68 | | char rdevminor[8]; |
69 | | char prefix[155]; |
70 | | }; |
71 | | |
72 | | /* |
73 | | * Structure of GNU tar header |
74 | | */ |
75 | | struct gnu_sparse { |
76 | | char offset[12]; |
77 | | char numbytes[12]; |
78 | | }; |
79 | | |
80 | | struct archive_entry_header_gnutar { |
81 | | char name[100]; |
82 | | char mode[8]; |
83 | | char uid[8]; |
84 | | char gid[8]; |
85 | | char size[12]; |
86 | | char mtime[12]; |
87 | | char checksum[8]; |
88 | | char typeflag[1]; |
89 | | char linkname[100]; |
90 | | char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ |
91 | | char uname[32]; |
92 | | char gname[32]; |
93 | | char rdevmajor[8]; |
94 | | char rdevminor[8]; |
95 | | char atime[12]; |
96 | | char ctime[12]; |
97 | | char offset[12]; |
98 | | char longnames[4]; |
99 | | char unused[1]; |
100 | | struct gnu_sparse sparse[4]; |
101 | | char isextended[1]; |
102 | | char realsize[12]; |
103 | | /* |
104 | | * Old GNU format doesn't use POSIX 'prefix' field; they use |
105 | | * the 'L' (longname) entry instead. |
106 | | */ |
107 | | }; |
108 | | |
109 | | /* |
110 | | * Data specific to this format. |
111 | | */ |
112 | | struct sparse_block { |
113 | | struct sparse_block *next; |
114 | | int64_t offset; |
115 | | int64_t remaining; |
116 | | int hole; |
117 | | }; |
118 | | |
119 | | struct tar { |
120 | | struct archive_string entry_pathname; |
121 | | /* For "GNU.sparse.name" and other similar path extensions. */ |
122 | | struct archive_string entry_pathname_override; |
123 | | struct archive_string entry_uname; |
124 | | struct archive_string entry_gname; |
125 | | struct archive_string entry_linkpath; |
126 | | struct archive_string longname; |
127 | | struct archive_string pax_global; |
128 | | struct archive_string line; |
129 | | int pax_hdrcharset_utf8; |
130 | | int64_t entry_bytes_remaining; |
131 | | int64_t entry_offset; |
132 | | int64_t entry_padding; |
133 | | int64_t entry_bytes_unconsumed; |
134 | | int64_t realsize; |
135 | | struct sparse_block *sparse_list; |
136 | | struct sparse_block *sparse_last; |
137 | | int64_t sparse_offset; |
138 | | int64_t sparse_numbytes; |
139 | | int sparse_gnu_major; |
140 | | int sparse_gnu_minor; |
141 | | char sparse_gnu_attributes_seen; |
142 | | char filetype; |
143 | | |
144 | | struct archive_string localname; |
145 | | struct archive_string_conv *opt_sconv; |
146 | | struct archive_string_conv *sconv; |
147 | | struct archive_string_conv *sconv_acl; |
148 | | struct archive_string_conv *sconv_default; |
149 | | int init_default_conversion; |
150 | | int compat_2x; |
151 | | int process_mac_extensions; |
152 | | int read_concatenated_archives; |
153 | | int realsize_override; |
154 | | }; |
155 | | |
156 | | static int archive_block_is_null(const char *p); |
157 | | static char *base64_decode(const char *, size_t, size_t *); |
158 | | static int gnu_add_sparse_entry(struct archive_read *, struct tar *, |
159 | | int64_t offset, int64_t remaining); |
160 | | |
161 | | static void gnu_clear_sparse_list(struct tar *); |
162 | | static int gnu_sparse_old_read(struct archive_read *, struct tar *, |
163 | | const struct archive_entry_header_gnutar *header, size_t *); |
164 | | static int gnu_sparse_old_parse(struct archive_read *, struct tar *, |
165 | | const struct gnu_sparse *sparse, int length); |
166 | | static int gnu_sparse_01_parse(struct archive_read *, struct tar *, |
167 | | const char *, size_t); |
168 | | static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, |
169 | | size_t *); |
170 | | static int header_Solaris_ACL(struct archive_read *, struct tar *, |
171 | | struct archive_entry *, const void *, size_t *); |
172 | | static int header_common(struct archive_read *, struct tar *, |
173 | | struct archive_entry *, const void *); |
174 | | static int header_old_tar(struct archive_read *, struct tar *, |
175 | | struct archive_entry *, const void *); |
176 | | static int header_pax_extension(struct archive_read *, struct tar *, |
177 | | struct archive_entry *, const void *, size_t *); |
178 | | static int header_pax_global(struct archive_read *, struct tar *, |
179 | | struct archive_entry *, const void *h, size_t *); |
180 | | static int header_gnu_longlink(struct archive_read *, struct tar *, |
181 | | struct archive_entry *, const void *h, size_t *); |
182 | | static int header_gnu_longname(struct archive_read *, struct tar *, |
183 | | struct archive_entry *, const void *h, size_t *); |
184 | | static int is_mac_metadata_entry(struct archive_entry *entry); |
185 | | static int read_mac_metadata_blob(struct archive_read *, |
186 | | struct archive_entry *, size_t *); |
187 | | static int header_volume(struct archive_read *, struct tar *, |
188 | | struct archive_entry *, const void *h, size_t *); |
189 | | static int header_ustar(struct archive_read *, struct tar *, |
190 | | struct archive_entry *, const void *h); |
191 | | static int header_gnutar(struct archive_read *, struct tar *, |
192 | | struct archive_entry *, const void *h, size_t *); |
193 | | static int archive_read_format_tar_bid(struct archive_read *, int); |
194 | | static int archive_read_format_tar_options(struct archive_read *, |
195 | | const char *, const char *); |
196 | | static int archive_read_format_tar_cleanup(struct archive_read *); |
197 | | static int archive_read_format_tar_read_data(struct archive_read *a, |
198 | | const void **buff, size_t *size, int64_t *offset); |
199 | | static int archive_read_format_tar_skip(struct archive_read *a); |
200 | | static int archive_read_format_tar_read_header(struct archive_read *, |
201 | | struct archive_entry *); |
202 | | static int checksum(struct archive_read *, const void *); |
203 | | static int pax_attribute(struct archive_read *, struct tar *, |
204 | | struct archive_entry *, const char *key, size_t key_length, |
205 | | size_t value_length, size_t *unconsumed); |
206 | | static int pax_attribute_LIBARCHIVE_xattr(struct archive_entry *, |
207 | | const char *, size_t, const char *, size_t); |
208 | | static int pax_attribute_SCHILY_acl(struct archive_read *, struct tar *, |
209 | | struct archive_entry *, size_t, int); |
210 | | static int pax_attribute_SUN_holesdata(struct archive_read *, struct tar *, |
211 | | struct archive_entry *, const char *, size_t); |
212 | | static void pax_time(const char *, size_t, int64_t *sec, long *nanos); |
213 | | static ssize_t readline(struct archive_read *, struct tar *, const char **, |
214 | | ssize_t limit, size_t *); |
215 | | static int read_body_to_string(struct archive_read *, struct tar *, |
216 | | struct archive_string *, const void *h, size_t *); |
217 | | static int read_bytes_to_string(struct archive_read *, |
218 | | struct archive_string *, size_t, size_t *); |
219 | | static int64_t tar_atol(const char *, size_t); |
220 | | static int64_t tar_atol10(const char *, size_t); |
221 | | static int64_t tar_atol256(const char *, size_t); |
222 | | static int64_t tar_atol8(const char *, size_t); |
223 | | static int tar_read_header(struct archive_read *, struct tar *, |
224 | | struct archive_entry *, size_t *); |
225 | | static int tohex(int c); |
226 | | static char *url_decode(const char *, size_t); |
227 | | static void tar_flush_unconsumed(struct archive_read *, size_t *); |
228 | | |
229 | | /* Sanity limits: These numbers should be low enough to |
230 | | * prevent a maliciously-crafted archive from forcing us to |
231 | | * allocate extreme amounts of memory. But of course, they |
232 | | * need to be high enough for any correct value. These |
233 | | * will likely need some adjustment as we get more experience. */ |
234 | | static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */ |
235 | | static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */ |
236 | | static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */ |
237 | | static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */ |
238 | | static const size_t fflags_limit = 512; /* Longest fflags */ |
239 | | static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */ |
240 | | static const int64_t entry_limit = 0xfffffffffffffffLL; /* 2^60 bytes = 1 ExbiByte */ |
241 | | |
242 | | int |
243 | | archive_read_support_format_gnutar(struct archive *a) |
244 | 238 | { |
245 | 238 | archive_check_magic(a, ARCHIVE_READ_MAGIC, |
246 | 238 | ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); |
247 | 238 | return (archive_read_support_format_tar(a)); |
248 | 238 | } |
249 | | |
250 | | |
251 | | int |
252 | | archive_read_support_format_tar(struct archive *_a) |
253 | 476 | { |
254 | 476 | struct archive_read *a = (struct archive_read *)_a; |
255 | 476 | struct tar *tar; |
256 | 476 | int r; |
257 | | |
258 | 476 | archive_check_magic(_a, ARCHIVE_READ_MAGIC, |
259 | 476 | ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); |
260 | | |
261 | 476 | tar = (struct tar *)calloc(1, sizeof(*tar)); |
262 | 476 | if (tar == NULL) { |
263 | 0 | archive_set_error(&a->archive, ENOMEM, |
264 | 0 | "Can't allocate tar data"); |
265 | 0 | return (ARCHIVE_FATAL); |
266 | 0 | } |
267 | | #ifdef HAVE_COPYFILE_H |
268 | | /* Set this by default on Mac OS. */ |
269 | | tar->process_mac_extensions = 1; |
270 | | #endif |
271 | | |
272 | 476 | r = __archive_read_register_format(a, tar, "tar", |
273 | 476 | archive_read_format_tar_bid, |
274 | 476 | archive_read_format_tar_options, |
275 | 476 | archive_read_format_tar_read_header, |
276 | 476 | archive_read_format_tar_read_data, |
277 | 476 | archive_read_format_tar_skip, |
278 | 476 | NULL, |
279 | 476 | archive_read_format_tar_cleanup, |
280 | 476 | NULL, |
281 | 476 | NULL); |
282 | | |
283 | 476 | if (r != ARCHIVE_OK) |
284 | 238 | free(tar); |
285 | 476 | return (ARCHIVE_OK); |
286 | 476 | } |
287 | | |
288 | | static int |
289 | | archive_read_format_tar_cleanup(struct archive_read *a) |
290 | 238 | { |
291 | 238 | struct tar *tar; |
292 | | |
293 | 238 | tar = (struct tar *)(a->format->data); |
294 | 238 | gnu_clear_sparse_list(tar); |
295 | 238 | archive_string_free(&tar->entry_pathname); |
296 | 238 | archive_string_free(&tar->entry_pathname_override); |
297 | 238 | archive_string_free(&tar->entry_uname); |
298 | 238 | archive_string_free(&tar->entry_gname); |
299 | 238 | archive_string_free(&tar->line); |
300 | 238 | archive_string_free(&tar->pax_global); |
301 | 238 | archive_string_free(&tar->longname); |
302 | 238 | archive_string_free(&tar->localname); |
303 | 238 | free(tar); |
304 | 238 | (a->format->data) = NULL; |
305 | 238 | return (ARCHIVE_OK); |
306 | 238 | } |
307 | | |
308 | | /* |
309 | | * Validate number field |
310 | | * |
311 | | * This has to be pretty lenient in order to accommodate the enormous |
312 | | * variety of tar writers in the world: |
313 | | * = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading |
314 | | * zeros and allows fields to be terminated with space or null characters |
315 | | * = Many writers use different termination (in particular, libarchive |
316 | | * omits terminator bytes to squeeze one or two more digits) |
317 | | * = Many writers pad with space and omit leading zeros |
318 | | * = GNU tar and star write base-256 values if numbers are too |
319 | | * big to be represented in octal |
320 | | * |
321 | | * Examples of specific tar headers that we should support: |
322 | | * = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two |
323 | | * null bytes, pads size with spaces and other numeric fields with zeroes |
324 | | * = plexus-archiver prior to 2.6.3 (before switching to commons-compress) |
325 | | * may have uid and gid fields filled with spaces without any octal digits |
326 | | * at all and pads all numeric fields with spaces |
327 | | * |
328 | | * This should tolerate all variants in use. It will reject a field |
329 | | * where the writer just left garbage after a trailing NUL. |
330 | | */ |
331 | | static int |
332 | | validate_number_field(const char* p_field, size_t i_size) |
333 | 35 | { |
334 | 35 | unsigned char marker = (unsigned char)p_field[0]; |
335 | 35 | if (marker == 128 || marker == 255 || marker == 0) { |
336 | | /* Base-256 marker, there's nothing we can check. */ |
337 | 30 | return 1; |
338 | 30 | } else { |
339 | | /* Must be octal */ |
340 | 5 | size_t i = 0; |
341 | | /* Skip any leading spaces */ |
342 | 5 | while (i < i_size && p_field[i] == ' ') { |
343 | 0 | ++i; |
344 | 0 | } |
345 | | /* Skip octal digits. */ |
346 | 48 | while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') { |
347 | 43 | ++i; |
348 | 43 | } |
349 | | /* Any remaining characters must be space or NUL padding. */ |
350 | 10 | while (i < i_size) { |
351 | 5 | if (p_field[i] != ' ' && p_field[i] != 0) { |
352 | 0 | return 0; |
353 | 0 | } |
354 | 5 | ++i; |
355 | 5 | } |
356 | 5 | return 1; |
357 | 5 | } |
358 | 35 | } |
359 | | |
360 | | static int |
361 | | archive_read_format_tar_bid(struct archive_read *a, int best_bid) |
362 | 236 | { |
363 | 236 | int bid; |
364 | 236 | const char *h; |
365 | 236 | const struct archive_entry_header_ustar *header; |
366 | | |
367 | 236 | (void)best_bid; /* UNUSED */ |
368 | | |
369 | 236 | bid = 0; |
370 | | |
371 | | /* Now let's look at the actual header and see if it matches. */ |
372 | 236 | h = __archive_read_ahead(a, 512, NULL); |
373 | 236 | if (h == NULL) |
374 | 5 | return (-1); |
375 | | |
376 | | /* If it's an end-of-archive mark, we can handle it. */ |
377 | 231 | if (h[0] == 0 && archive_block_is_null(h)) { |
378 | | /* |
379 | | * Usually, I bid the number of bits verified, but |
380 | | * in this case, 4096 seems excessive so I picked 10 as |
381 | | * an arbitrary but reasonable-seeming value. |
382 | | */ |
383 | 20 | return (10); |
384 | 20 | } |
385 | | |
386 | | /* If it's not an end-of-archive mark, it must have a valid checksum.*/ |
387 | 211 | if (!checksum(a, h)) |
388 | 206 | return (0); |
389 | 5 | bid += 48; /* Checksum is usually 6 octal digits. */ |
390 | | |
391 | 5 | header = (const struct archive_entry_header_ustar *)h; |
392 | | |
393 | | /* Recognize POSIX formats. */ |
394 | 5 | if ((memcmp(header->magic, "ustar\0", 6) == 0) |
395 | 5 | && (memcmp(header->version, "00", 2) == 0)) |
396 | 0 | bid += 56; |
397 | | |
398 | | /* Recognize GNU tar format. */ |
399 | 5 | if ((memcmp(header->magic, "ustar ", 6) == 0) |
400 | 5 | && (memcmp(header->version, " \0", 2) == 0)) |
401 | 1 | bid += 56; |
402 | | |
403 | | /* Type flag must be null, digit or A-Z, a-z. */ |
404 | 5 | if (header->typeflag[0] != 0 && |
405 | 5 | !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && |
406 | 5 | !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && |
407 | 5 | !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) |
408 | 0 | return (0); |
409 | 5 | bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ |
410 | | |
411 | | /* |
412 | | * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields. |
413 | | */ |
414 | 5 | if (validate_number_field(header->mode, sizeof(header->mode)) == 0 |
415 | 5 | || validate_number_field(header->uid, sizeof(header->uid)) == 0 |
416 | 5 | || validate_number_field(header->gid, sizeof(header->gid)) == 0 |
417 | 5 | || validate_number_field(header->mtime, sizeof(header->mtime)) == 0 |
418 | 5 | || validate_number_field(header->size, sizeof(header->size)) == 0 |
419 | 5 | || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0 |
420 | 5 | || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0) { |
421 | 0 | bid = 0; |
422 | 0 | } |
423 | | |
424 | 5 | return (bid); |
425 | 5 | } |
426 | | |
427 | | static int |
428 | | archive_read_format_tar_options(struct archive_read *a, |
429 | | const char *key, const char *val) |
430 | 476 | { |
431 | 476 | struct tar *tar; |
432 | 476 | int ret = ARCHIVE_FAILED; |
433 | | |
434 | 476 | tar = (struct tar *)(a->format->data); |
435 | 476 | if (strcmp(key, "compat-2x") == 0) { |
436 | | /* Handle UTF-8 filenames as libarchive 2.x */ |
437 | 0 | tar->compat_2x = (val != NULL && val[0] != 0); |
438 | 0 | tar->init_default_conversion = tar->compat_2x; |
439 | 0 | return (ARCHIVE_OK); |
440 | 476 | } else if (strcmp(key, "hdrcharset") == 0) { |
441 | 0 | if (val == NULL || val[0] == 0) |
442 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
443 | 0 | "tar: hdrcharset option needs a character-set name"); |
444 | 0 | else { |
445 | 0 | tar->opt_sconv = |
446 | 0 | archive_string_conversion_from_charset( |
447 | 0 | &a->archive, val, 0); |
448 | 0 | if (tar->opt_sconv != NULL) |
449 | 0 | ret = ARCHIVE_OK; |
450 | 0 | else |
451 | 0 | ret = ARCHIVE_FATAL; |
452 | 0 | } |
453 | 0 | return (ret); |
454 | 476 | } else if (strcmp(key, "mac-ext") == 0) { |
455 | 238 | tar->process_mac_extensions = (val != NULL && val[0] != 0); |
456 | 238 | return (ARCHIVE_OK); |
457 | 238 | } else if (strcmp(key, "read_concatenated_archives") == 0) { |
458 | 238 | tar->read_concatenated_archives = (val != NULL && val[0] != 0); |
459 | 238 | return (ARCHIVE_OK); |
460 | 238 | } |
461 | | |
462 | | /* Note: The "warn" return is just to inform the options |
463 | | * supervisor that we didn't handle it. It will generate |
464 | | * a suitable error if no one used this option. */ |
465 | 0 | return (ARCHIVE_WARN); |
466 | 476 | } |
467 | | |
468 | | /* utility function- this exists to centralize the logic of tracking |
469 | | * how much unconsumed data we have floating around, and to consume |
470 | | * anything outstanding since we're going to do read_aheads |
471 | | */ |
472 | | static void |
473 | | tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) |
474 | 888k | { |
475 | 888k | if (*unconsumed) { |
476 | | /* |
477 | | void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); |
478 | | * this block of code is to poison claimed unconsumed space, ensuring |
479 | | * things break if it is in use still. |
480 | | * currently it WILL break things, so enable it only for debugging this issue |
481 | | if (data) { |
482 | | memset(data, 0xff, *unconsumed); |
483 | | } |
484 | | */ |
485 | 307k | __archive_read_consume(a, *unconsumed); |
486 | 307k | *unconsumed = 0; |
487 | 307k | } |
488 | 888k | } |
489 | | |
490 | | /* |
491 | | * The function invoked by archive_read_next_header(). This |
492 | | * just sets up a few things and then calls the internal |
493 | | * tar_read_header() function below. |
494 | | */ |
495 | | static int |
496 | | archive_read_format_tar_read_header(struct archive_read *a, |
497 | | struct archive_entry *entry) |
498 | 290k | { |
499 | | /* |
500 | | * When converting tar archives to cpio archives, it is |
501 | | * essential that each distinct file have a distinct inode |
502 | | * number. To simplify this, we keep a static count here to |
503 | | * assign fake dev/inode numbers to each tar entry. Note that |
504 | | * pax format archives may overwrite this with something more |
505 | | * useful. |
506 | | * |
507 | | * Ideally, we would track every file read from the archive so |
508 | | * that we could assign the same dev/ino pair to hardlinks, |
509 | | * but the memory required to store a complete lookup table is |
510 | | * probably not worthwhile just to support the relatively |
511 | | * obscure tar->cpio conversion case. |
512 | | */ |
513 | | /* TODO: Move this into `struct tar` to avoid conflicts |
514 | | * when reading multiple archives */ |
515 | 290k | static int default_inode; |
516 | 290k | static int default_dev; |
517 | 290k | struct tar *tar; |
518 | 290k | const char *p; |
519 | 290k | const wchar_t *wp; |
520 | 290k | int r; |
521 | 290k | size_t l, unconsumed = 0; |
522 | | |
523 | | /* Assign default device/inode values. */ |
524 | 290k | archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ |
525 | 290k | archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ |
526 | | /* Limit generated st_ino number to 16 bits. */ |
527 | 290k | if (default_inode >= 0xffff) { |
528 | 4 | ++default_dev; |
529 | 4 | default_inode = 0; |
530 | 4 | } |
531 | | |
532 | 290k | tar = (struct tar *)(a->format->data); |
533 | 290k | tar->entry_offset = 0; |
534 | 290k | gnu_clear_sparse_list(tar); |
535 | 290k | tar->realsize = -1; /* Mark this as "unset" */ |
536 | 290k | tar->realsize_override = 0; |
537 | | |
538 | | /* Setup default string conversion. */ |
539 | 290k | tar->sconv = tar->opt_sconv; |
540 | 290k | if (tar->sconv == NULL) { |
541 | 290k | if (!tar->init_default_conversion) { |
542 | 10 | tar->sconv_default = |
543 | 10 | archive_string_default_conversion_for_read(&(a->archive)); |
544 | 10 | tar->init_default_conversion = 1; |
545 | 10 | } |
546 | 290k | tar->sconv = tar->sconv_default; |
547 | 290k | } |
548 | | |
549 | 290k | r = tar_read_header(a, tar, entry, &unconsumed); |
550 | | |
551 | 290k | tar_flush_unconsumed(a, &unconsumed); |
552 | | |
553 | | /* |
554 | | * "non-sparse" files are really just sparse files with |
555 | | * a single block. |
556 | | */ |
557 | 290k | if (tar->sparse_list == NULL) { |
558 | 290k | if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) |
559 | 290k | != ARCHIVE_OK) |
560 | 0 | return (ARCHIVE_FATAL); |
561 | 290k | } else { |
562 | 0 | struct sparse_block *sb; |
563 | |
|
564 | 0 | for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { |
565 | 0 | if (!sb->hole) |
566 | 0 | archive_entry_sparse_add_entry(entry, |
567 | 0 | sb->offset, sb->remaining); |
568 | 0 | } |
569 | 0 | } |
570 | | |
571 | 290k | if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) { |
572 | | /* |
573 | | * "Regular" entry with trailing '/' is really |
574 | | * directory: This is needed for certain old tar |
575 | | * variants and even for some broken newer ones. |
576 | | */ |
577 | 5 | if ((wp = archive_entry_pathname_w(entry)) != NULL) { |
578 | 5 | l = wcslen(wp); |
579 | 5 | if (l > 0 && wp[l - 1] == L'/') { |
580 | 0 | archive_entry_set_filetype(entry, AE_IFDIR); |
581 | 0 | tar->entry_bytes_remaining = 0; |
582 | 0 | tar->entry_padding = 0; |
583 | 0 | } |
584 | 5 | } else if ((p = archive_entry_pathname(entry)) != NULL) { |
585 | 0 | l = strlen(p); |
586 | 0 | if (l > 0 && p[l - 1] == '/') { |
587 | 0 | archive_entry_set_filetype(entry, AE_IFDIR); |
588 | 0 | tar->entry_bytes_remaining = 0; |
589 | 0 | tar->entry_padding = 0; |
590 | 0 | } |
591 | 0 | } |
592 | 5 | } |
593 | 290k | return (r); |
594 | 290k | } |
595 | | |
596 | | static int |
597 | | archive_read_format_tar_read_data(struct archive_read *a, |
598 | | const void **buff, size_t *size, int64_t *offset) |
599 | 7 | { |
600 | 7 | ssize_t bytes_read; |
601 | 7 | struct tar *tar; |
602 | 7 | struct sparse_block *p; |
603 | | |
604 | 7 | tar = (struct tar *)(a->format->data); |
605 | | |
606 | 7 | for (;;) { |
607 | | /* Remove exhausted entries from sparse list. */ |
608 | 12 | while (tar->sparse_list != NULL && |
609 | 12 | tar->sparse_list->remaining == 0) { |
610 | 5 | p = tar->sparse_list; |
611 | 5 | tar->sparse_list = p->next; |
612 | 5 | free(p); |
613 | 5 | } |
614 | | |
615 | 7 | if (tar->entry_bytes_unconsumed) { |
616 | 1 | __archive_read_consume(a, tar->entry_bytes_unconsumed); |
617 | 1 | tar->entry_bytes_unconsumed = 0; |
618 | 1 | } |
619 | | |
620 | | /* If we're at end of file, return EOF. */ |
621 | 7 | if (tar->sparse_list == NULL || |
622 | 7 | tar->entry_bytes_remaining == 0) { |
623 | 6 | if (__archive_read_consume(a, tar->entry_padding) < 0) |
624 | 0 | return (ARCHIVE_FATAL); |
625 | 6 | tar->entry_padding = 0; |
626 | 6 | *buff = NULL; |
627 | 6 | *size = 0; |
628 | 6 | *offset = tar->realsize; |
629 | 6 | return (ARCHIVE_EOF); |
630 | 6 | } |
631 | | |
632 | 1 | *buff = __archive_read_ahead(a, 1, &bytes_read); |
633 | 1 | if (bytes_read < 0) |
634 | 0 | return (ARCHIVE_FATAL); |
635 | 1 | if (*buff == NULL) { |
636 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
637 | 0 | "Truncated tar archive" |
638 | 0 | " detected while reading data"); |
639 | 0 | return (ARCHIVE_FATAL); |
640 | 0 | } |
641 | 1 | if (bytes_read > tar->entry_bytes_remaining) |
642 | 1 | bytes_read = (ssize_t)tar->entry_bytes_remaining; |
643 | | /* Don't read more than is available in the |
644 | | * current sparse block. */ |
645 | 1 | if (tar->sparse_list->remaining < bytes_read) |
646 | 0 | bytes_read = (ssize_t)tar->sparse_list->remaining; |
647 | 1 | *size = bytes_read; |
648 | 1 | *offset = tar->sparse_list->offset; |
649 | 1 | tar->sparse_list->remaining -= bytes_read; |
650 | 1 | tar->sparse_list->offset += bytes_read; |
651 | 1 | tar->entry_bytes_remaining -= bytes_read; |
652 | 1 | tar->entry_bytes_unconsumed = bytes_read; |
653 | | |
654 | 1 | if (!tar->sparse_list->hole) |
655 | 1 | return (ARCHIVE_OK); |
656 | | /* Current is hole data and skip this. */ |
657 | 1 | } |
658 | 7 | } |
659 | | |
660 | | static int |
661 | | archive_read_format_tar_skip(struct archive_read *a) |
662 | 5 | { |
663 | 5 | int64_t bytes_skipped; |
664 | 5 | int64_t request; |
665 | 5 | struct sparse_block *p; |
666 | 5 | struct tar* tar; |
667 | | |
668 | 5 | tar = (struct tar *)(a->format->data); |
669 | | |
670 | | /* Do not consume the hole of a sparse file. */ |
671 | 5 | request = 0; |
672 | 5 | for (p = tar->sparse_list; p != NULL; p = p->next) { |
673 | 0 | if (!p->hole) { |
674 | 0 | if (p->remaining >= INT64_MAX - request) { |
675 | 0 | return ARCHIVE_FATAL; |
676 | 0 | } |
677 | 0 | request += p->remaining; |
678 | 0 | } |
679 | 0 | } |
680 | 5 | if (request > tar->entry_bytes_remaining) |
681 | 0 | request = tar->entry_bytes_remaining; |
682 | 5 | request += tar->entry_padding + tar->entry_bytes_unconsumed; |
683 | | |
684 | 5 | bytes_skipped = __archive_read_consume(a, request); |
685 | 5 | if (bytes_skipped < 0) |
686 | 0 | return (ARCHIVE_FATAL); |
687 | | |
688 | 5 | tar->entry_bytes_remaining = 0; |
689 | 5 | tar->entry_bytes_unconsumed = 0; |
690 | 5 | tar->entry_padding = 0; |
691 | | |
692 | | /* Free the sparse list. */ |
693 | 5 | gnu_clear_sparse_list(tar); |
694 | | |
695 | 5 | return (ARCHIVE_OK); |
696 | 5 | } |
697 | | |
698 | | /* |
699 | | * This function reads and interprets all of the headers associated |
700 | | * with a single entry. |
701 | | */ |
702 | | static int |
703 | | tar_read_header(struct archive_read *a, struct tar *tar, |
704 | | struct archive_entry *entry, size_t *unconsumed) |
705 | 290k | { |
706 | 290k | ssize_t bytes; |
707 | 290k | int err = ARCHIVE_OK, err2; |
708 | 290k | int eof_fatal = 0; /* EOF is okay at some points... */ |
709 | 290k | const char *h; |
710 | 290k | const struct archive_entry_header_ustar *header; |
711 | 290k | const struct archive_entry_header_gnutar *gnuheader; |
712 | | |
713 | | /* Bitmask of what header types we've seen. */ |
714 | 290k | int32_t seen_headers = 0; |
715 | 290k | static const int32_t seen_A_header = 1; |
716 | 290k | static const int32_t seen_g_header = 2; |
717 | 290k | static const int32_t seen_K_header = 4; |
718 | 290k | static const int32_t seen_L_header = 8; |
719 | 290k | static const int32_t seen_V_header = 16; |
720 | 290k | static const int32_t seen_x_header = 32; /* Also X */ |
721 | 290k | static const int32_t seen_mac_metadata = 512; |
722 | | |
723 | 290k | tar->pax_hdrcharset_utf8 = 1; |
724 | 290k | tar->sparse_gnu_attributes_seen = 0; |
725 | 290k | archive_string_empty(&(tar->entry_gname)); |
726 | 290k | archive_string_empty(&(tar->entry_pathname)); |
727 | 290k | archive_string_empty(&(tar->entry_pathname_override)); |
728 | 290k | archive_string_empty(&(tar->entry_uname)); |
729 | | |
730 | | /* Ensure format is set. */ |
731 | 290k | if (a->archive.archive_format_name == NULL) { |
732 | 10 | a->archive.archive_format = ARCHIVE_FORMAT_TAR; |
733 | 10 | a->archive.archive_format_name = "tar"; |
734 | 10 | } |
735 | | |
736 | | /* |
737 | | * TODO: Write global/default pax options into |
738 | | * 'entry' struct here before overwriting with |
739 | | * file-specific options. |
740 | | */ |
741 | | |
742 | | /* Loop over all the headers needed for the next entry */ |
743 | 290k | for (;;) { |
744 | | |
745 | | /* Find the next valid header record. */ |
746 | 307k | while (1) { |
747 | 307k | tar_flush_unconsumed(a, unconsumed); |
748 | | |
749 | | /* Read 512-byte header record */ |
750 | 307k | h = __archive_read_ahead(a, 512, &bytes); |
751 | 307k | if (bytes < 0) |
752 | 1 | return ((int)bytes); |
753 | 307k | if (bytes == 0) { /* EOF at a block boundary. */ |
754 | 4 | if (eof_fatal) { |
755 | | /* We've read a special header already; |
756 | | * if there's no regular header, then this is |
757 | | * a premature EOF. */ |
758 | 0 | archive_set_error(&a->archive, EINVAL, |
759 | 0 | "Damaged tar archive"); |
760 | 0 | return (ARCHIVE_FATAL); |
761 | 4 | } else { |
762 | 4 | return (ARCHIVE_EOF); |
763 | 4 | } |
764 | 4 | } |
765 | 307k | if (bytes < 512) { /* Short block at EOF; this is bad. */ |
766 | 5 | archive_set_error(&a->archive, |
767 | 5 | ARCHIVE_ERRNO_FILE_FORMAT, |
768 | 5 | "Truncated tar archive" |
769 | 5 | " detected while reading next heaader"); |
770 | 5 | return (ARCHIVE_FATAL); |
771 | 5 | } |
772 | 307k | *unconsumed += 512; |
773 | | |
774 | 307k | if (h[0] == 0 && archive_block_is_null(h)) { |
775 | | /* We found a NULL block which indicates end-of-archive */ |
776 | | |
777 | 16.1k | if (tar->read_concatenated_archives) { |
778 | | /* We're ignoring NULL blocks, so keep going. */ |
779 | 16.1k | continue; |
780 | 16.1k | } |
781 | | |
782 | | /* Try to consume a second all-null record, as well. */ |
783 | | /* If we can't, that's okay. */ |
784 | 0 | tar_flush_unconsumed(a, unconsumed); |
785 | 0 | h = __archive_read_ahead(a, 512, NULL); |
786 | 0 | if (h != NULL && h[0] == 0 && archive_block_is_null(h)) |
787 | 0 | __archive_read_consume(a, 512); |
788 | |
|
789 | 0 | archive_clear_error(&a->archive); |
790 | 0 | return (ARCHIVE_EOF); |
791 | 16.1k | } |
792 | | |
793 | | /* This is NOT a null block, so it must be a valid header. */ |
794 | 290k | if (!checksum(a, h)) { |
795 | 290k | tar_flush_unconsumed(a, unconsumed); |
796 | 290k | archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); |
797 | | /* If we've read some critical information (pax headers, etc) |
798 | | * and _then_ see a bad header, we can't really recover. */ |
799 | 290k | if (eof_fatal) { |
800 | 0 | return (ARCHIVE_FATAL); |
801 | 290k | } else { |
802 | 290k | return (ARCHIVE_RETRY); |
803 | 290k | } |
804 | 290k | } |
805 | 5 | break; |
806 | 290k | } |
807 | | |
808 | | /* Determine the format variant. */ |
809 | 5 | header = (const struct archive_entry_header_ustar *)h; |
810 | 5 | switch(header->typeflag[0]) { |
811 | 0 | case 'A': /* Solaris tar ACL */ |
812 | 0 | if (seen_headers & seen_A_header) { |
813 | 0 | return (ARCHIVE_FATAL); |
814 | 0 | } |
815 | 0 | seen_headers |= seen_A_header; |
816 | 0 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
817 | 0 | a->archive.archive_format_name = "Solaris tar"; |
818 | 0 | err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed); |
819 | 0 | break; |
820 | 0 | case 'g': /* POSIX-standard 'g' header. */ |
821 | 0 | if (seen_headers & seen_g_header) { |
822 | 0 | return (ARCHIVE_FATAL); |
823 | 0 | } |
824 | 0 | seen_headers |= seen_g_header; |
825 | 0 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
826 | 0 | a->archive.archive_format_name = "POSIX pax interchange format"; |
827 | 0 | err2 = header_pax_global(a, tar, entry, h, unconsumed); |
828 | 0 | break; |
829 | 0 | case 'K': /* Long link name (GNU tar, others) */ |
830 | 0 | if (seen_headers & seen_K_header) { |
831 | 0 | return (ARCHIVE_FATAL); |
832 | 0 | } |
833 | 0 | seen_headers |= seen_K_header; |
834 | 0 | err2 = header_gnu_longlink(a, tar, entry, h, unconsumed); |
835 | 0 | break; |
836 | 0 | case 'L': /* Long filename (GNU tar, others) */ |
837 | 0 | if (seen_headers & seen_L_header) { |
838 | 0 | return (ARCHIVE_FATAL); |
839 | 0 | } |
840 | 0 | seen_headers |= seen_L_header; |
841 | 0 | err2 = header_gnu_longname(a, tar, entry, h, unconsumed); |
842 | 0 | break; |
843 | 0 | case 'V': /* GNU volume header */ |
844 | 0 | if (seen_headers & seen_V_header) { |
845 | 0 | return (ARCHIVE_FATAL); |
846 | 0 | } |
847 | 0 | seen_headers |= seen_V_header; |
848 | 0 | err2 = header_volume(a, tar, entry, h, unconsumed); |
849 | 0 | break; |
850 | 0 | case 'X': /* Used by SUN tar; same as 'x'. */ |
851 | 0 | if (seen_headers & seen_x_header) { |
852 | 0 | return (ARCHIVE_FATAL); |
853 | 0 | } |
854 | 0 | seen_headers |= seen_x_header; |
855 | 0 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
856 | 0 | a->archive.archive_format_name = |
857 | 0 | "POSIX pax interchange format (Sun variant)"; |
858 | 0 | err2 = header_pax_extension(a, tar, entry, h, unconsumed); |
859 | 0 | break; |
860 | 0 | case 'x': /* POSIX-standard 'x' header. */ |
861 | 0 | if (seen_headers & seen_x_header) { |
862 | 0 | return (ARCHIVE_FATAL); |
863 | 0 | } |
864 | 0 | seen_headers |= seen_x_header; |
865 | 0 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
866 | 0 | a->archive.archive_format_name = "POSIX pax interchange format"; |
867 | 0 | err2 = header_pax_extension(a, tar, entry, h, unconsumed); |
868 | 0 | break; |
869 | 5 | default: /* Regular header: Legacy tar, GNU tar, or ustar */ |
870 | 5 | gnuheader = (const struct archive_entry_header_gnutar *)h; |
871 | 5 | if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { |
872 | 1 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; |
873 | 1 | a->archive.archive_format_name = "GNU tar format"; |
874 | 1 | err2 = header_gnutar(a, tar, entry, h, unconsumed); |
875 | 4 | } else if (memcmp(header->magic, "ustar", 5) == 0) { |
876 | 0 | if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { |
877 | 0 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; |
878 | 0 | a->archive.archive_format_name = "POSIX ustar format"; |
879 | 0 | } |
880 | 0 | err2 = header_ustar(a, tar, entry, h); |
881 | 4 | } else { |
882 | 4 | a->archive.archive_format = ARCHIVE_FORMAT_TAR; |
883 | 4 | a->archive.archive_format_name = "tar (non-POSIX)"; |
884 | 4 | err2 = header_old_tar(a, tar, entry, h); |
885 | 4 | } |
886 | 5 | err = err_combine(err, err2); |
887 | | /* We return warnings or success as-is. Anything else is fatal. */ |
888 | 5 | if (err < ARCHIVE_WARN) { |
889 | 0 | return (ARCHIVE_FATAL); |
890 | 0 | } |
891 | | /* Filename of the form `._filename` is an AppleDouble |
892 | | * extension entry. The body is the macOS metadata blob; |
893 | | * this is followed by another entry with the actual |
894 | | * regular file data. |
895 | | * This design has two drawbacks: |
896 | | * = it's brittle; you might just have a file with such a name |
897 | | * = it duplicates any long pathname extensions |
898 | | * |
899 | | * TODO: This probably shouldn't be here at all. Consider |
900 | | * just returning the contents as a regular entry here and |
901 | | * then dealing with it when we write data to disk. |
902 | | */ |
903 | 5 | if (tar->process_mac_extensions |
904 | 5 | && ((seen_headers & seen_mac_metadata) == 0) |
905 | 5 | && is_mac_metadata_entry(entry)) { |
906 | 0 | err2 = read_mac_metadata_blob(a, entry, unconsumed); |
907 | 0 | if (err2 < ARCHIVE_WARN) { |
908 | 0 | return (ARCHIVE_FATAL); |
909 | 0 | } |
910 | 0 | err = err_combine(err, err2); |
911 | | /* Note: Other headers can appear again. */ |
912 | 0 | seen_headers = seen_mac_metadata; |
913 | 0 | break; |
914 | 0 | } |
915 | | |
916 | | /* Reconcile GNU sparse attributes */ |
917 | 5 | if (tar->sparse_gnu_attributes_seen) { |
918 | | /* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */ |
919 | 0 | if (tar->filetype != 'S' && tar->filetype != '0') { |
920 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
921 | 0 | "Non-regular file cannot be sparse"); |
922 | 0 | return (ARCHIVE_WARN); |
923 | 0 | } else if (tar->sparse_gnu_major == 0 && |
924 | 0 | tar->sparse_gnu_minor == 0) { |
925 | | /* Sparse map already parsed from 'x' header */ |
926 | 0 | } else if (tar->sparse_gnu_major == 0 && |
927 | 0 | tar->sparse_gnu_minor == 1) { |
928 | | /* Sparse map already parsed from 'x' header */ |
929 | 0 | } else if (tar->sparse_gnu_major == 1 && |
930 | 0 | tar->sparse_gnu_minor == 0) { |
931 | | /* Sparse map is prepended to file contents */ |
932 | 0 | ssize_t bytes_read; |
933 | 0 | bytes_read = gnu_sparse_10_read(a, tar, unconsumed); |
934 | 0 | if (bytes_read < 0) |
935 | 0 | return ((int)bytes_read); |
936 | 0 | tar->entry_bytes_remaining -= bytes_read; |
937 | 0 | } else { |
938 | 0 | archive_set_error(&a->archive, |
939 | 0 | ARCHIVE_ERRNO_MISC, |
940 | 0 | "Unrecognized GNU sparse file format"); |
941 | 0 | return (ARCHIVE_WARN); |
942 | 0 | } |
943 | 0 | } |
944 | 5 | return (err); |
945 | 5 | } |
946 | | |
947 | | /* We're between headers ... */ |
948 | 0 | err = err_combine(err, err2); |
949 | 0 | if (err == ARCHIVE_FATAL) |
950 | 0 | return (err); |
951 | | |
952 | | /* The GNU volume header and the pax `g` global header |
953 | | * are both allowed to be the only header in an |
954 | | * archive. If we've seen any other header, a |
955 | | * following EOF is fatal. */ |
956 | 0 | if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) { |
957 | 0 | eof_fatal = 1; |
958 | 0 | } |
959 | 0 | } |
960 | 290k | } |
961 | | |
962 | | /* |
963 | | * Return true if block checksum is correct. |
964 | | */ |
965 | | static int |
966 | | checksum(struct archive_read *a, const void *h) |
967 | 291k | { |
968 | 291k | const unsigned char *bytes; |
969 | 291k | const struct archive_entry_header_ustar *header; |
970 | 291k | int check, sum; |
971 | 291k | size_t i; |
972 | | |
973 | 291k | (void)a; /* UNUSED */ |
974 | 291k | bytes = (const unsigned char *)h; |
975 | 291k | header = (const struct archive_entry_header_ustar *)h; |
976 | | |
977 | | /* Checksum field must hold an octal number */ |
978 | 295k | for (i = 0; i < sizeof(header->checksum); ++i) { |
979 | 295k | char c = header->checksum[i]; |
980 | 295k | if (c != ' ' && c != '\0' && (c < '0' || c > '7')) |
981 | 290k | return 0; |
982 | 295k | } |
983 | | |
984 | | /* |
985 | | * Test the checksum. Note that POSIX specifies _unsigned_ |
986 | | * bytes for this calculation. |
987 | | */ |
988 | 544 | sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); |
989 | 544 | check = 0; |
990 | 81.0k | for (i = 0; i < 148; i++) |
991 | 80.5k | check += (unsigned char)bytes[i]; |
992 | 4.89k | for (; i < 156; i++) |
993 | 4.35k | check += 32; |
994 | 194k | for (; i < 512; i++) |
995 | 193k | check += (unsigned char)bytes[i]; |
996 | 544 | if (sum == check) |
997 | 2 | return (1); |
998 | | |
999 | | /* |
1000 | | * Repeat test with _signed_ bytes, just in case this archive |
1001 | | * was created by an old BSD, Solaris, or HP-UX tar with a |
1002 | | * broken checksum calculation. |
1003 | | */ |
1004 | 542 | check = 0; |
1005 | 80.7k | for (i = 0; i < 148; i++) |
1006 | 80.2k | check += (signed char)bytes[i]; |
1007 | 4.87k | for (; i < 156; i++) |
1008 | 4.33k | check += 32; |
1009 | 193k | for (; i < 512; i++) |
1010 | 192k | check += (signed char)bytes[i]; |
1011 | 542 | if (sum == check) |
1012 | 8 | return (1); |
1013 | | |
1014 | 534 | return (0); |
1015 | 542 | } |
1016 | | |
1017 | | /* |
1018 | | * Return true if this block contains only nulls. |
1019 | | */ |
1020 | | static int |
1021 | | archive_block_is_null(const char *p) |
1022 | 16.4k | { |
1023 | 16.4k | unsigned i; |
1024 | | |
1025 | 8.34M | for (i = 0; i < 512; i++) |
1026 | 8.33M | if (*p++) |
1027 | 311 | return (0); |
1028 | 16.1k | return (1); |
1029 | 16.4k | } |
1030 | | |
1031 | | /* |
1032 | | * Interpret 'A' Solaris ACL header |
1033 | | */ |
1034 | | static int |
1035 | | header_Solaris_ACL(struct archive_read *a, struct tar *tar, |
1036 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
1037 | 0 | { |
1038 | 0 | const struct archive_entry_header_ustar *header; |
1039 | 0 | struct archive_string acl_text; |
1040 | 0 | size_t size; |
1041 | 0 | int err, acl_type; |
1042 | 0 | int64_t type; |
1043 | 0 | char *acl, *p; |
1044 | | |
1045 | | /* |
1046 | | * read_body_to_string adds a NUL terminator, but we need a little |
1047 | | * more to make sure that we don't overrun acl_text later. |
1048 | | */ |
1049 | 0 | header = (const struct archive_entry_header_ustar *)h; |
1050 | 0 | size = (size_t)tar_atol(header->size, sizeof(header->size)); |
1051 | 0 | archive_string_init(&acl_text); |
1052 | 0 | err = read_body_to_string(a, tar, &acl_text, h, unconsumed); |
1053 | 0 | if (err != ARCHIVE_OK) |
1054 | 0 | return (err); |
1055 | | |
1056 | | /* TODO: Examine the first characters to see if this |
1057 | | * is an AIX ACL descriptor. We'll likely never support |
1058 | | * them, but it would be polite to recognize and warn when |
1059 | | * we do see them. */ |
1060 | | |
1061 | | /* Leading octal number indicates ACL type and number of entries. */ |
1062 | 0 | p = acl = acl_text.s; |
1063 | 0 | type = 0; |
1064 | 0 | while (*p != '\0' && p < acl + size) { |
1065 | 0 | if (*p < '0' || *p > '7') { |
1066 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1067 | 0 | "Malformed Solaris ACL attribute (invalid digit)"); |
1068 | 0 | archive_string_free(&acl_text); |
1069 | 0 | return(ARCHIVE_WARN); |
1070 | 0 | } |
1071 | 0 | type <<= 3; |
1072 | 0 | type += *p - '0'; |
1073 | 0 | if (type > 077777777) { |
1074 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1075 | 0 | "Malformed Solaris ACL attribute (count too large)"); |
1076 | 0 | archive_string_free(&acl_text); |
1077 | 0 | return (ARCHIVE_WARN); |
1078 | 0 | } |
1079 | 0 | p++; |
1080 | 0 | } |
1081 | 0 | switch ((int)type & ~0777777) { |
1082 | 0 | case 01000000: |
1083 | | /* POSIX.1e ACL */ |
1084 | 0 | acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; |
1085 | 0 | break; |
1086 | 0 | case 03000000: |
1087 | | /* NFSv4 ACL */ |
1088 | 0 | acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4; |
1089 | 0 | break; |
1090 | 0 | default: |
1091 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1092 | 0 | "Malformed Solaris ACL attribute (unsupported type %o)", |
1093 | 0 | (int)type); |
1094 | 0 | archive_string_free(&acl_text); |
1095 | 0 | return (ARCHIVE_WARN); |
1096 | 0 | } |
1097 | 0 | p++; |
1098 | |
|
1099 | 0 | if (p >= acl + size) { |
1100 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1101 | 0 | "Malformed Solaris ACL attribute (body overflow)"); |
1102 | 0 | archive_string_free(&acl_text); |
1103 | 0 | return(ARCHIVE_WARN); |
1104 | 0 | } |
1105 | | |
1106 | | /* ACL text is null-terminated; find the end. */ |
1107 | 0 | size -= (p - acl); |
1108 | 0 | acl = p; |
1109 | |
|
1110 | 0 | while (*p != '\0' && p < acl + size) |
1111 | 0 | p++; |
1112 | |
|
1113 | 0 | if (tar->sconv_acl == NULL) { |
1114 | 0 | tar->sconv_acl = archive_string_conversion_from_charset( |
1115 | 0 | &(a->archive), "UTF-8", 1); |
1116 | 0 | if (tar->sconv_acl == NULL) { |
1117 | 0 | archive_string_free(&acl_text); |
1118 | 0 | return (ARCHIVE_FATAL); |
1119 | 0 | } |
1120 | 0 | } |
1121 | 0 | archive_strncpy(&(tar->localname), acl, p - acl); |
1122 | 0 | err = archive_acl_from_text_l(archive_entry_acl(entry), |
1123 | 0 | tar->localname.s, acl_type, tar->sconv_acl); |
1124 | | /* Workaround: Force perm_is_set() to be correct */ |
1125 | | /* If this bit were stored in the ACL, this wouldn't be needed */ |
1126 | 0 | archive_entry_set_perm(entry, archive_entry_perm(entry)); |
1127 | 0 | if (err != ARCHIVE_OK) { |
1128 | 0 | if (errno == ENOMEM) { |
1129 | 0 | archive_set_error(&a->archive, ENOMEM, |
1130 | 0 | "Can't allocate memory for ACL"); |
1131 | 0 | } else |
1132 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1133 | 0 | "Malformed Solaris ACL attribute (unparsable)"); |
1134 | 0 | } |
1135 | 0 | archive_string_free(&acl_text); |
1136 | 0 | return (err); |
1137 | 0 | } |
1138 | | |
1139 | | /* |
1140 | | * Interpret 'K' long linkname header. |
1141 | | */ |
1142 | | static int |
1143 | | header_gnu_longlink(struct archive_read *a, struct tar *tar, |
1144 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
1145 | 0 | { |
1146 | 0 | int err; |
1147 | |
|
1148 | 0 | struct archive_string linkpath; |
1149 | 0 | archive_string_init(&linkpath); |
1150 | 0 | err = read_body_to_string(a, tar, &linkpath, h, unconsumed); |
1151 | 0 | archive_entry_set_link(entry, linkpath.s); |
1152 | 0 | archive_string_free(&linkpath); |
1153 | 0 | return (err); |
1154 | 0 | } |
1155 | | |
1156 | | static int |
1157 | | set_conversion_failed_error(struct archive_read *a, |
1158 | | struct archive_string_conv *sconv, const char *name) |
1159 | 0 | { |
1160 | 0 | if (errno == ENOMEM) { |
1161 | 0 | archive_set_error(&a->archive, ENOMEM, |
1162 | 0 | "Can't allocate memory for %s", name); |
1163 | 0 | return (ARCHIVE_FATAL); |
1164 | 0 | } |
1165 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
1166 | 0 | "%s can't be converted from %s to current locale.", |
1167 | 0 | name, archive_string_conversion_charset_name(sconv)); |
1168 | 0 | return (ARCHIVE_WARN); |
1169 | 0 | } |
1170 | | |
1171 | | /* |
1172 | | * Interpret 'L' long filename header. |
1173 | | */ |
1174 | | static int |
1175 | | header_gnu_longname(struct archive_read *a, struct tar *tar, |
1176 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
1177 | 0 | { |
1178 | 0 | int err; |
1179 | |
|
1180 | 0 | err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); |
1181 | 0 | if (err != ARCHIVE_OK) |
1182 | 0 | return (err); |
1183 | 0 | if (archive_entry_copy_pathname_l(entry, tar->longname.s, |
1184 | 0 | archive_strlen(&(tar->longname)), tar->sconv) != 0) |
1185 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Pathname"); |
1186 | 0 | return (err); |
1187 | 0 | } |
1188 | | |
1189 | | /* |
1190 | | * Interpret 'V' GNU tar volume header. |
1191 | | */ |
1192 | | static int |
1193 | | header_volume(struct archive_read *a, struct tar *tar, |
1194 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
1195 | 0 | { |
1196 | 0 | const struct archive_entry_header_ustar *header; |
1197 | 0 | int64_t size, to_consume; |
1198 | |
|
1199 | 0 | (void)a; /* UNUSED */ |
1200 | 0 | (void)tar; /* UNUSED */ |
1201 | 0 | (void)entry; /* UNUSED */ |
1202 | |
|
1203 | 0 | header = (const struct archive_entry_header_ustar *)h; |
1204 | 0 | size = tar_atol(header->size, sizeof(header->size)); |
1205 | 0 | if (size > (int64_t)pathname_limit) { |
1206 | 0 | return (ARCHIVE_FATAL); |
1207 | 0 | } |
1208 | 0 | to_consume = ((size + 511) & ~511); |
1209 | 0 | *unconsumed += to_consume; |
1210 | 0 | return (ARCHIVE_OK); |
1211 | 0 | } |
1212 | | |
1213 | | /* |
1214 | | * Read the next `size` bytes into the provided string. |
1215 | | * Null-terminate the string. |
1216 | | */ |
1217 | | static int |
1218 | | read_bytes_to_string(struct archive_read *a, |
1219 | | struct archive_string *as, size_t size, |
1220 | 0 | size_t *unconsumed) { |
1221 | 0 | const void *src; |
1222 | | |
1223 | | /* Fail if we can't make our buffer big enough. */ |
1224 | 0 | if (archive_string_ensure(as, (size_t)size+1) == NULL) { |
1225 | 0 | archive_set_error(&a->archive, ENOMEM, |
1226 | 0 | "No memory"); |
1227 | 0 | return (ARCHIVE_FATAL); |
1228 | 0 | } |
1229 | | |
1230 | 0 | tar_flush_unconsumed(a, unconsumed); |
1231 | | |
1232 | | /* Read the body into the string. */ |
1233 | 0 | src = __archive_read_ahead(a, size, NULL); |
1234 | 0 | if (src == NULL) { |
1235 | 0 | archive_set_error(&a->archive, EINVAL, |
1236 | 0 | "Truncated archive" |
1237 | 0 | " detected while reading metadata"); |
1238 | 0 | *unconsumed = 0; |
1239 | 0 | return (ARCHIVE_FATAL); |
1240 | 0 | } |
1241 | 0 | memcpy(as->s, src, (size_t)size); |
1242 | 0 | as->s[size] = '\0'; |
1243 | 0 | as->length = (size_t)size; |
1244 | 0 | *unconsumed += size; |
1245 | 0 | return (ARCHIVE_OK); |
1246 | 0 | } |
1247 | | |
1248 | | /* |
1249 | | * Read body of an archive entry into an archive_string object. |
1250 | | */ |
1251 | | static int |
1252 | | read_body_to_string(struct archive_read *a, struct tar *tar, |
1253 | | struct archive_string *as, const void *h, size_t *unconsumed) |
1254 | 0 | { |
1255 | 0 | int64_t size; |
1256 | 0 | const struct archive_entry_header_ustar *header; |
1257 | 0 | int r; |
1258 | |
|
1259 | 0 | (void)tar; /* UNUSED */ |
1260 | 0 | header = (const struct archive_entry_header_ustar *)h; |
1261 | 0 | size = tar_atol(header->size, sizeof(header->size)); |
1262 | 0 | if (size > entry_limit) { |
1263 | 0 | return (ARCHIVE_FATAL); |
1264 | 0 | } |
1265 | 0 | if ((size > (int64_t)pathname_limit) || (size < 0)) { |
1266 | 0 | archive_string_empty(as); |
1267 | 0 | int64_t to_consume = ((size + 511) & ~511); |
1268 | 0 | if (to_consume != __archive_read_consume(a, to_consume)) { |
1269 | 0 | return (ARCHIVE_FATAL); |
1270 | 0 | } |
1271 | 0 | archive_set_error(&a->archive, EINVAL, |
1272 | 0 | "Special header too large: %d > 1MiB", |
1273 | 0 | (int)size); |
1274 | 0 | return (ARCHIVE_WARN); |
1275 | 0 | } |
1276 | 0 | r = read_bytes_to_string(a, as, size, unconsumed); |
1277 | 0 | *unconsumed += 0x1ff & (-size); |
1278 | 0 | return(r); |
1279 | 0 | } |
1280 | | |
1281 | | /* |
1282 | | * Parse out common header elements. |
1283 | | * |
1284 | | * This would be the same as header_old_tar, except that the |
1285 | | * filename is handled slightly differently for old and POSIX |
1286 | | * entries (POSIX entries support a 'prefix'). This factoring |
1287 | | * allows header_old_tar and header_ustar |
1288 | | * to handle filenames differently, while still putting most of the |
1289 | | * common parsing into one place. |
1290 | | */ |
1291 | | static int |
1292 | | header_common(struct archive_read *a, struct tar *tar, |
1293 | | struct archive_entry *entry, const void *h) |
1294 | 5 | { |
1295 | 5 | const struct archive_entry_header_ustar *header; |
1296 | 5 | const char *existing_linkpath; |
1297 | 5 | const wchar_t *existing_wcs_linkpath; |
1298 | 5 | int err = ARCHIVE_OK; |
1299 | | |
1300 | 5 | header = (const struct archive_entry_header_ustar *)h; |
1301 | | |
1302 | | /* Parse out the numeric fields (all are octal) */ |
1303 | | |
1304 | | /* Split mode handling: Set filetype always, perm only if not already set */ |
1305 | 5 | archive_entry_set_filetype(entry, |
1306 | 5 | (mode_t)tar_atol(header->mode, sizeof(header->mode))); |
1307 | 5 | if (!archive_entry_perm_is_set(entry)) { |
1308 | 5 | archive_entry_set_perm(entry, |
1309 | 5 | (mode_t)tar_atol(header->mode, sizeof(header->mode))); |
1310 | 5 | } |
1311 | 5 | if (!archive_entry_uid_is_set(entry)) { |
1312 | 5 | archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); |
1313 | 5 | } |
1314 | 5 | if (!archive_entry_gid_is_set(entry)) { |
1315 | 5 | archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); |
1316 | 5 | } |
1317 | | |
1318 | 5 | tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); |
1319 | 5 | if (tar->entry_bytes_remaining < 0) { |
1320 | 0 | tar->entry_bytes_remaining = 0; |
1321 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1322 | 0 | "Tar entry has negative size"); |
1323 | 0 | return (ARCHIVE_FATAL); |
1324 | 0 | } |
1325 | 5 | if (tar->entry_bytes_remaining > entry_limit) { |
1326 | 0 | tar->entry_bytes_remaining = 0; |
1327 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1328 | 0 | "Tar entry size overflow"); |
1329 | 0 | return (ARCHIVE_FATAL); |
1330 | 0 | } |
1331 | 5 | if (!tar->realsize_override) { |
1332 | 5 | tar->realsize = tar->entry_bytes_remaining; |
1333 | 5 | } |
1334 | 5 | archive_entry_set_size(entry, tar->realsize); |
1335 | | |
1336 | 5 | if (!archive_entry_mtime_is_set(entry)) { |
1337 | 5 | archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); |
1338 | 5 | } |
1339 | | |
1340 | | /* Handle the tar type flag appropriately. */ |
1341 | 5 | tar->filetype = header->typeflag[0]; |
1342 | | |
1343 | | /* |
1344 | | * TODO: If the linkpath came from Pax extension header, then |
1345 | | * we should obey the hdrcharset_utf8 flag when converting these. |
1346 | | */ |
1347 | 5 | switch (tar->filetype) { |
1348 | 0 | case '1': /* Hard link */ |
1349 | 0 | archive_entry_set_link_to_hardlink(entry); |
1350 | 0 | existing_wcs_linkpath = archive_entry_hardlink_w(entry); |
1351 | 0 | existing_linkpath = archive_entry_hardlink(entry); |
1352 | 0 | if ((existing_linkpath == NULL || existing_linkpath[0] == '\0') |
1353 | 0 | && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) { |
1354 | 0 | struct archive_string linkpath; |
1355 | 0 | archive_string_init(&linkpath); |
1356 | 0 | archive_strncpy(&linkpath, |
1357 | 0 | header->linkname, sizeof(header->linkname)); |
1358 | 0 | if (archive_entry_copy_hardlink_l(entry, linkpath.s, |
1359 | 0 | archive_strlen(&linkpath), tar->sconv) != 0) { |
1360 | 0 | err = set_conversion_failed_error(a, tar->sconv, |
1361 | 0 | "Linkname"); |
1362 | 0 | if (err == ARCHIVE_FATAL) { |
1363 | 0 | archive_string_free(&linkpath); |
1364 | 0 | return (err); |
1365 | 0 | } |
1366 | 0 | } |
1367 | 0 | archive_string_free(&linkpath); |
1368 | 0 | } |
1369 | | /* |
1370 | | * The following may seem odd, but: Technically, tar |
1371 | | * does not store the file type for a "hard link" |
1372 | | * entry, only the fact that it is a hard link. So, I |
1373 | | * leave the type zero normally. But, pax interchange |
1374 | | * format allows hard links to have data, which |
1375 | | * implies that the underlying entry is a regular |
1376 | | * file. |
1377 | | */ |
1378 | 0 | if (archive_entry_size(entry) > 0) |
1379 | 0 | archive_entry_set_filetype(entry, AE_IFREG); |
1380 | | |
1381 | | /* |
1382 | | * A tricky point: Traditionally, tar readers have |
1383 | | * ignored the size field when reading hardlink |
1384 | | * entries, and some writers put non-zero sizes even |
1385 | | * though the body is empty. POSIX blessed this |
1386 | | * convention in the 1988 standard, but broke with |
1387 | | * this tradition in 2001 by permitting hardlink |
1388 | | * entries to store valid bodies in pax interchange |
1389 | | * format, but not in ustar format. Since there is no |
1390 | | * hard and fast way to distinguish pax interchange |
1391 | | * from earlier archives (the 'x' and 'g' entries are |
1392 | | * optional, after all), we need a heuristic. |
1393 | | */ |
1394 | 0 | if (archive_entry_size(entry) == 0) { |
1395 | | /* If the size is already zero, we're done. */ |
1396 | 0 | } else if (a->archive.archive_format |
1397 | 0 | == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { |
1398 | | /* Definitely pax extended; must obey hardlink size. */ |
1399 | 0 | } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR |
1400 | 0 | || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) |
1401 | 0 | { |
1402 | | /* Old-style or GNU tar: we must ignore the size. */ |
1403 | 0 | archive_entry_set_size(entry, 0); |
1404 | 0 | tar->entry_bytes_remaining = 0; |
1405 | 0 | } else if (archive_read_format_tar_bid(a, 50) > 50) { |
1406 | | /* |
1407 | | * We don't know if it's pax: If the bid |
1408 | | * function sees a valid ustar header |
1409 | | * immediately following, then let's ignore |
1410 | | * the hardlink size. |
1411 | | */ |
1412 | 0 | archive_entry_set_size(entry, 0); |
1413 | 0 | tar->entry_bytes_remaining = 0; |
1414 | 0 | } |
1415 | | /* |
1416 | | * TODO: There are still two cases I'd like to handle: |
1417 | | * = a ustar non-pax archive with a hardlink entry at |
1418 | | * end-of-archive. (Look for block of nulls following?) |
1419 | | * = a pax archive that has not seen any pax headers |
1420 | | * and has an entry which is a hardlink entry storing |
1421 | | * a body containing an uncompressed tar archive. |
1422 | | * The first is worth addressing; I don't see any reliable |
1423 | | * way to deal with the second possibility. |
1424 | | */ |
1425 | 0 | break; |
1426 | 0 | case '2': /* Symlink */ |
1427 | 0 | archive_entry_set_link_to_symlink(entry); |
1428 | 0 | existing_wcs_linkpath = archive_entry_symlink_w(entry); |
1429 | 0 | existing_linkpath = archive_entry_symlink(entry); |
1430 | 0 | if ((existing_linkpath == NULL || existing_linkpath[0] == '\0') |
1431 | 0 | && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) { |
1432 | 0 | struct archive_string linkpath; |
1433 | 0 | archive_string_init(&linkpath); |
1434 | 0 | archive_strncpy(&linkpath, |
1435 | 0 | header->linkname, sizeof(header->linkname)); |
1436 | 0 | if (archive_entry_copy_symlink_l(entry, linkpath.s, |
1437 | 0 | archive_strlen(&linkpath), tar->sconv) != 0) { |
1438 | 0 | err = set_conversion_failed_error(a, tar->sconv, |
1439 | 0 | "Linkname"); |
1440 | 0 | if (err == ARCHIVE_FATAL) { |
1441 | 0 | archive_string_free(&linkpath); |
1442 | 0 | return (err); |
1443 | 0 | } |
1444 | 0 | } |
1445 | 0 | archive_string_free(&linkpath); |
1446 | 0 | } |
1447 | 0 | archive_entry_set_filetype(entry, AE_IFLNK); |
1448 | 0 | archive_entry_set_size(entry, 0); |
1449 | 0 | tar->entry_bytes_remaining = 0; |
1450 | 0 | break; |
1451 | 0 | case '3': /* Character device */ |
1452 | 0 | archive_entry_set_filetype(entry, AE_IFCHR); |
1453 | 0 | archive_entry_set_size(entry, 0); |
1454 | 0 | tar->entry_bytes_remaining = 0; |
1455 | 0 | break; |
1456 | 0 | case '4': /* Block device */ |
1457 | 0 | archive_entry_set_filetype(entry, AE_IFBLK); |
1458 | 0 | archive_entry_set_size(entry, 0); |
1459 | 0 | tar->entry_bytes_remaining = 0; |
1460 | 0 | break; |
1461 | 0 | case '5': /* Dir */ |
1462 | 0 | archive_entry_set_filetype(entry, AE_IFDIR); |
1463 | 0 | archive_entry_set_size(entry, 0); |
1464 | 0 | tar->entry_bytes_remaining = 0; |
1465 | 0 | break; |
1466 | 0 | case '6': /* FIFO device */ |
1467 | 0 | archive_entry_set_filetype(entry, AE_IFIFO); |
1468 | 0 | archive_entry_set_size(entry, 0); |
1469 | 0 | tar->entry_bytes_remaining = 0; |
1470 | 0 | break; |
1471 | 0 | case 'D': /* GNU incremental directory type */ |
1472 | | /* |
1473 | | * No special handling is actually required here. |
1474 | | * It might be nice someday to preprocess the file list and |
1475 | | * provide it to the client, though. |
1476 | | */ |
1477 | 0 | archive_entry_set_filetype(entry, AE_IFDIR); |
1478 | 0 | break; |
1479 | 0 | case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ |
1480 | | /* |
1481 | | * As far as I can tell, this is just like a regular file |
1482 | | * entry, except that the contents should be _appended_ to |
1483 | | * the indicated file at the indicated offset. This may |
1484 | | * require some API work to fully support. |
1485 | | */ |
1486 | 0 | break; |
1487 | 0 | case 'N': /* Old GNU "long filename" entry. */ |
1488 | | /* The body of this entry is a script for renaming |
1489 | | * previously-extracted entries. Ugh. It will never |
1490 | | * be supported by libarchive. */ |
1491 | 0 | archive_entry_set_filetype(entry, AE_IFREG); |
1492 | 0 | break; |
1493 | 0 | case 'S': /* GNU sparse files */ |
1494 | | /* |
1495 | | * Sparse files are really just regular files with |
1496 | | * sparse information in the extended area. |
1497 | | */ |
1498 | | /* FALLTHROUGH */ |
1499 | 1 | case '0': /* ustar "regular" file */ |
1500 | | /* FALLTHROUGH */ |
1501 | 5 | default: /* Non-standard file types */ |
1502 | | /* |
1503 | | * Per POSIX: non-recognized types should always be |
1504 | | * treated as regular files. |
1505 | | */ |
1506 | 5 | archive_entry_set_filetype(entry, AE_IFREG); |
1507 | 5 | break; |
1508 | 5 | } |
1509 | 5 | return (err); |
1510 | 5 | } |
1511 | | |
1512 | | /* |
1513 | | * Parse out header elements for "old-style" tar archives. |
1514 | | */ |
1515 | | static int |
1516 | | header_old_tar(struct archive_read *a, struct tar *tar, |
1517 | | struct archive_entry *entry, const void *h) |
1518 | 4 | { |
1519 | 4 | const struct archive_entry_header_ustar *header; |
1520 | 4 | int err = ARCHIVE_OK, err2; |
1521 | | |
1522 | | /* Copy filename over (to ensure null termination). */ |
1523 | 4 | header = (const struct archive_entry_header_ustar *)h; |
1524 | 4 | if (archive_entry_copy_pathname_l(entry, |
1525 | 4 | header->name, sizeof(header->name), tar->sconv) != 0) { |
1526 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Pathname"); |
1527 | 0 | if (err == ARCHIVE_FATAL) |
1528 | 0 | return (err); |
1529 | 0 | } |
1530 | | |
1531 | | /* Grab rest of common fields */ |
1532 | 4 | err2 = header_common(a, tar, entry, h); |
1533 | 4 | if (err > err2) |
1534 | 0 | err = err2; |
1535 | | |
1536 | 4 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
1537 | 4 | return (err); |
1538 | 4 | } |
1539 | | |
1540 | | /* |
1541 | | * Is this likely an AppleDouble extension? |
1542 | | */ |
1543 | | static int |
1544 | 5 | is_mac_metadata_entry(struct archive_entry *entry) { |
1545 | 5 | const char *p, *name; |
1546 | 5 | const wchar_t *wp, *wname; |
1547 | | |
1548 | 5 | wname = wp = archive_entry_pathname_w(entry); |
1549 | 5 | if (wp != NULL) { |
1550 | | /* Find the last path element. */ |
1551 | 13 | for (; *wp != L'\0'; ++wp) { |
1552 | 8 | if (wp[0] == '/' && wp[1] != L'\0') |
1553 | 0 | wname = wp + 1; |
1554 | 8 | } |
1555 | | /* |
1556 | | * If last path element starts with "._", then |
1557 | | * this is a Mac extension. |
1558 | | */ |
1559 | 5 | if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0') |
1560 | 0 | return 1; |
1561 | 5 | } else { |
1562 | | /* Find the last path element. */ |
1563 | 0 | name = p = archive_entry_pathname(entry); |
1564 | 0 | if (p == NULL) |
1565 | 0 | return (ARCHIVE_FAILED); |
1566 | 0 | for (; *p != '\0'; ++p) { |
1567 | 0 | if (p[0] == '/' && p[1] != '\0') |
1568 | 0 | name = p + 1; |
1569 | 0 | } |
1570 | | /* |
1571 | | * If last path element starts with "._", then |
1572 | | * this is a Mac extension. |
1573 | | */ |
1574 | 0 | if (name[0] == '.' && name[1] == '_' && name[2] != '\0') |
1575 | 0 | return 1; |
1576 | 0 | } |
1577 | | /* Not a mac extension */ |
1578 | 5 | return 0; |
1579 | 5 | } |
1580 | | |
1581 | | /* |
1582 | | * Read a Mac AppleDouble-encoded blob of file metadata, |
1583 | | * if there is one. |
1584 | | * |
1585 | | * TODO: In Libarchive 4, we should consider ripping this |
1586 | | * out -- instead, return a file starting with `._` as |
1587 | | * a regular file and let the client (or archive_write logic) |
1588 | | * handle it. |
1589 | | */ |
1590 | | static int |
1591 | | read_mac_metadata_blob(struct archive_read *a, |
1592 | | struct archive_entry *entry, size_t *unconsumed) |
1593 | 0 | { |
1594 | 0 | int64_t size; |
1595 | 0 | size_t msize; |
1596 | 0 | const void *data; |
1597 | | |
1598 | | /* Read the body as a Mac OS metadata blob. */ |
1599 | 0 | size = archive_entry_size(entry); |
1600 | 0 | msize = (size_t)size; |
1601 | 0 | if (size < 0 || (uintmax_t)msize != (uintmax_t)size) { |
1602 | 0 | *unconsumed = 0; |
1603 | 0 | return (ARCHIVE_FATAL); |
1604 | 0 | } |
1605 | | |
1606 | | /* TODO: Should this merely skip the overlarge entry and |
1607 | | * WARN? Or is xattr_limit sufficiently large that we can |
1608 | | * safely assume anything larger is malicious? */ |
1609 | 0 | if (size > (int64_t)xattr_limit) { |
1610 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1611 | 0 | "Oversized AppleDouble extension has size %llu > %llu", |
1612 | 0 | (unsigned long long)size, |
1613 | 0 | (unsigned long long)xattr_limit); |
1614 | 0 | return (ARCHIVE_FATAL); |
1615 | 0 | } |
1616 | | |
1617 | | /* |
1618 | | * TODO: Look beyond the body here to peek at the next header. |
1619 | | * If it's a regular header (not an extension header) |
1620 | | * that has the wrong name, just return the current |
1621 | | * entry as-is, without consuming the body here. |
1622 | | * That would reduce the risk of us mis-identifying |
1623 | | * an ordinary file that just happened to have |
1624 | | * a name starting with "._". |
1625 | | * |
1626 | | * Q: Is the above idea really possible? Even |
1627 | | * when there are GNU or pax extension entries? |
1628 | | */ |
1629 | 0 | tar_flush_unconsumed(a, unconsumed); |
1630 | 0 | data = __archive_read_ahead(a, msize, NULL); |
1631 | 0 | if (data == NULL) { |
1632 | 0 | *unconsumed = 0; |
1633 | 0 | return (ARCHIVE_FATAL); |
1634 | 0 | } |
1635 | 0 | archive_entry_clear(entry); |
1636 | 0 | archive_entry_copy_mac_metadata(entry, data, msize); |
1637 | 0 | *unconsumed = (msize + 511) & ~ 511; |
1638 | 0 | return (ARCHIVE_OK); |
1639 | 0 | } |
1640 | | |
1641 | | /* |
1642 | | * Parse a file header for a pax extended archive entry. |
1643 | | */ |
1644 | | static int |
1645 | | header_pax_global(struct archive_read *a, struct tar *tar, |
1646 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
1647 | 0 | { |
1648 | 0 | const struct archive_entry_header_ustar *header; |
1649 | 0 | int64_t size, to_consume; |
1650 | |
|
1651 | 0 | (void)a; /* UNUSED */ |
1652 | 0 | (void)tar; /* UNUSED */ |
1653 | 0 | (void)entry; /* UNUSED */ |
1654 | |
|
1655 | 0 | header = (const struct archive_entry_header_ustar *)h; |
1656 | 0 | size = tar_atol(header->size, sizeof(header->size)); |
1657 | 0 | if (size > entry_limit) { |
1658 | 0 | return (ARCHIVE_FATAL); |
1659 | 0 | } |
1660 | 0 | to_consume = ((size + 511) & ~511); |
1661 | 0 | *unconsumed += to_consume; |
1662 | 0 | return (ARCHIVE_OK); |
1663 | 0 | } |
1664 | | |
1665 | | /* |
1666 | | * Parse a file header for a Posix "ustar" archive entry. This also |
1667 | | * handles "pax" or "extended ustar" entries. |
1668 | | * |
1669 | | * In order to correctly handle pax attributes (which precede this), |
1670 | | * we have to skip parsing any field for which the entry already has |
1671 | | * contents. |
1672 | | */ |
1673 | | static int |
1674 | | header_ustar(struct archive_read *a, struct tar *tar, |
1675 | | struct archive_entry *entry, const void *h) |
1676 | 0 | { |
1677 | 0 | const struct archive_entry_header_ustar *header; |
1678 | 0 | struct archive_string as; |
1679 | 0 | int err = ARCHIVE_OK, r; |
1680 | |
|
1681 | 0 | header = (const struct archive_entry_header_ustar *)h; |
1682 | | |
1683 | | /* Copy name into an internal buffer to ensure null-termination. */ |
1684 | 0 | const char *existing_pathname = archive_entry_pathname(entry); |
1685 | 0 | const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry); |
1686 | 0 | if ((existing_pathname == NULL || existing_pathname[0] == '\0') |
1687 | 0 | && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0')) { |
1688 | 0 | archive_string_init(&as); |
1689 | 0 | if (header->prefix[0]) { |
1690 | 0 | archive_strncpy(&as, header->prefix, sizeof(header->prefix)); |
1691 | 0 | if (as.s[archive_strlen(&as) - 1] != '/') |
1692 | 0 | archive_strappend_char(&as, '/'); |
1693 | 0 | archive_strncat(&as, header->name, sizeof(header->name)); |
1694 | 0 | } else { |
1695 | 0 | archive_strncpy(&as, header->name, sizeof(header->name)); |
1696 | 0 | } |
1697 | 0 | if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as), |
1698 | 0 | tar->sconv) != 0) { |
1699 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Pathname"); |
1700 | 0 | if (err == ARCHIVE_FATAL) |
1701 | 0 | return (err); |
1702 | 0 | } |
1703 | 0 | archive_string_free(&as); |
1704 | 0 | } |
1705 | | |
1706 | | /* Handle rest of common fields. */ |
1707 | 0 | r = header_common(a, tar, entry, h); |
1708 | 0 | if (r == ARCHIVE_FATAL) |
1709 | 0 | return (r); |
1710 | 0 | if (r < err) |
1711 | 0 | err = r; |
1712 | | |
1713 | | /* Handle POSIX ustar fields. */ |
1714 | 0 | const char *existing_uname = archive_entry_uname(entry); |
1715 | 0 | if (existing_uname == NULL || existing_uname[0] == '\0') { |
1716 | 0 | if (archive_entry_copy_uname_l(entry, |
1717 | 0 | header->uname, sizeof(header->uname), tar->sconv) != 0) { |
1718 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Uname"); |
1719 | 0 | if (err == ARCHIVE_FATAL) |
1720 | 0 | return (err); |
1721 | 0 | } |
1722 | 0 | } |
1723 | | |
1724 | 0 | const char *existing_gname = archive_entry_gname(entry); |
1725 | 0 | if (existing_gname == NULL || existing_gname[0] == '\0') { |
1726 | 0 | if (archive_entry_copy_gname_l(entry, |
1727 | 0 | header->gname, sizeof(header->gname), tar->sconv) != 0) { |
1728 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Gname"); |
1729 | 0 | if (err == ARCHIVE_FATAL) |
1730 | 0 | return (err); |
1731 | 0 | } |
1732 | 0 | } |
1733 | | |
1734 | | /* Parse out device numbers only for char and block specials. */ |
1735 | 0 | if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { |
1736 | 0 | if (!archive_entry_rdev_is_set(entry)) { |
1737 | 0 | archive_entry_set_rdevmajor(entry, (dev_t) |
1738 | 0 | tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); |
1739 | 0 | archive_entry_set_rdevminor(entry, (dev_t) |
1740 | 0 | tar_atol(header->rdevminor, sizeof(header->rdevminor))); |
1741 | 0 | } |
1742 | 0 | } else { |
1743 | 0 | archive_entry_set_rdev(entry, 0); |
1744 | 0 | } |
1745 | |
|
1746 | 0 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
1747 | |
|
1748 | 0 | return (err); |
1749 | 0 | } |
1750 | | |
1751 | | static int |
1752 | | header_pax_extension(struct archive_read *a, struct tar *tar, |
1753 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
1754 | 0 | { |
1755 | | /* Sanity checks: The largest `x` body I've ever heard of was |
1756 | | * a little over 4MB. So I doubt there has ever been a |
1757 | | * well-formed archive with an `x` body over 1GiB. Similarly, |
1758 | | * it seems plausible that no single attribute has ever been |
1759 | | * larger than 100MB. So if we see a larger value here, it's |
1760 | | * almost certainly a sign of a corrupted/malicious archive. */ |
1761 | | |
1762 | | /* Maximum sane size for extension body: 1 GiB */ |
1763 | | /* This cannot be raised to larger than 8GiB without |
1764 | | * exceeding the maximum size for a standard ustar |
1765 | | * entry. */ |
1766 | 0 | const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024; |
1767 | | /* Maximum size for a single line/attr: 100 million characters */ |
1768 | | /* This cannot be raised to more than 2GiB without exceeding |
1769 | | * a `size_t` on 32-bit platforms. */ |
1770 | 0 | const size_t max_parsed_line_length = 99999999ULL; |
1771 | | /* Largest attribute prolog: size + name. */ |
1772 | 0 | const size_t max_size_name = 512; |
1773 | | |
1774 | | /* Size and padding of the full extension body */ |
1775 | 0 | int64_t ext_size, ext_padding; |
1776 | 0 | size_t line_length, value_length, name_length; |
1777 | 0 | ssize_t to_read, did_read; |
1778 | 0 | const struct archive_entry_header_ustar *header; |
1779 | 0 | const char *p, *attr_start, *name_start; |
1780 | 0 | struct archive_string_conv *sconv; |
1781 | 0 | struct archive_string *pas = NULL; |
1782 | 0 | struct archive_string attr_name; |
1783 | 0 | int err = ARCHIVE_OK, r; |
1784 | |
|
1785 | 0 | header = (const struct archive_entry_header_ustar *)h; |
1786 | 0 | ext_size = tar_atol(header->size, sizeof(header->size)); |
1787 | 0 | if (ext_size > entry_limit) { |
1788 | 0 | return (ARCHIVE_FATAL); |
1789 | 0 | } |
1790 | 0 | if (ext_size < 0) { |
1791 | 0 | archive_set_error(&a->archive, EINVAL, |
1792 | 0 | "pax extension header has invalid size: %lld", |
1793 | 0 | (long long)ext_size); |
1794 | 0 | return (ARCHIVE_FATAL); |
1795 | 0 | } |
1796 | | |
1797 | 0 | ext_padding = 0x1ff & (-ext_size); |
1798 | 0 | if (ext_size > ext_size_limit) { |
1799 | | /* Consume the pax extension body and return an error */ |
1800 | 0 | if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) { |
1801 | 0 | return (ARCHIVE_FATAL); |
1802 | 0 | } |
1803 | 0 | archive_set_error(&a->archive, EINVAL, |
1804 | 0 | "Ignoring oversized pax extensions: %d > %d", |
1805 | 0 | (int)ext_size, (int)ext_size_limit); |
1806 | 0 | return (ARCHIVE_WARN); |
1807 | 0 | } |
1808 | 0 | tar_flush_unconsumed(a, unconsumed); |
1809 | | |
1810 | | /* Parse the size/name of each pax attribute in the body */ |
1811 | 0 | archive_string_init(&attr_name); |
1812 | 0 | while (ext_size > 0) { |
1813 | | /* Read enough bytes to parse the size/name of the next attribute */ |
1814 | 0 | to_read = max_size_name; |
1815 | 0 | if (to_read > ext_size) { |
1816 | 0 | to_read = ext_size; |
1817 | 0 | } |
1818 | 0 | p = __archive_read_ahead(a, to_read, &did_read); |
1819 | 0 | if (did_read < 0) { |
1820 | 0 | return ((int)did_read); |
1821 | 0 | } |
1822 | 0 | if (did_read == 0) { /* EOF */ |
1823 | 0 | archive_set_error(&a->archive, EINVAL, |
1824 | 0 | "Truncated tar archive" |
1825 | 0 | " detected while reading pax attribute name"); |
1826 | 0 | return (ARCHIVE_FATAL); |
1827 | 0 | } |
1828 | 0 | if (did_read > ext_size) { |
1829 | 0 | did_read = ext_size; |
1830 | 0 | } |
1831 | | |
1832 | | /* Parse size of attribute */ |
1833 | 0 | line_length = 0; |
1834 | 0 | attr_start = p; |
1835 | 0 | while (1) { |
1836 | 0 | if (p >= attr_start + did_read) { |
1837 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1838 | 0 | "Ignoring malformed pax attributes: overlarge attribute size field"); |
1839 | 0 | *unconsumed += ext_size + ext_padding; |
1840 | 0 | return (ARCHIVE_WARN); |
1841 | 0 | } |
1842 | 0 | if (*p == ' ') { |
1843 | 0 | p++; |
1844 | 0 | break; |
1845 | 0 | } |
1846 | 0 | if (*p < '0' || *p > '9') { |
1847 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1848 | 0 | "Ignoring malformed pax attributes: malformed attribute size field"); |
1849 | 0 | *unconsumed += ext_size + ext_padding; |
1850 | 0 | return (ARCHIVE_WARN); |
1851 | 0 | } |
1852 | 0 | line_length *= 10; |
1853 | 0 | line_length += *p - '0'; |
1854 | 0 | if (line_length > max_parsed_line_length) { |
1855 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1856 | 0 | "Ignoring malformed pax attribute: size > %lld", |
1857 | 0 | (long long)max_parsed_line_length); |
1858 | 0 | *unconsumed += ext_size + ext_padding; |
1859 | 0 | return (ARCHIVE_WARN); |
1860 | 0 | } |
1861 | 0 | p++; |
1862 | 0 | } |
1863 | | |
1864 | 0 | if ((int64_t)line_length > ext_size) { |
1865 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1866 | 0 | "Ignoring malformed pax attribute: %lld > %lld", |
1867 | 0 | (long long)line_length, (long long)ext_size); |
1868 | 0 | *unconsumed += ext_size + ext_padding; |
1869 | 0 | return (ARCHIVE_WARN); |
1870 | 0 | } |
1871 | | |
1872 | | /* Parse name of attribute */ |
1873 | 0 | if (p >= attr_start + did_read |
1874 | 0 | || p >= attr_start + line_length |
1875 | 0 | || *p == '=') { |
1876 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1877 | 0 | "Ignoring malformed pax attributes: empty name found"); |
1878 | 0 | *unconsumed += ext_size + ext_padding; |
1879 | 0 | return (ARCHIVE_WARN); |
1880 | 0 | } |
1881 | 0 | name_start = p; |
1882 | 0 | while (1) { |
1883 | 0 | if (p >= attr_start + did_read || p >= attr_start + line_length) { |
1884 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
1885 | 0 | "Ignoring malformed pax attributes: overlarge attribute name"); |
1886 | 0 | *unconsumed += ext_size + ext_padding; |
1887 | 0 | return (ARCHIVE_WARN); |
1888 | 0 | } |
1889 | 0 | if (*p == '=') { |
1890 | 0 | break; |
1891 | 0 | } |
1892 | 0 | p++; |
1893 | 0 | } |
1894 | 0 | name_length = p - name_start; |
1895 | 0 | p++; // Skip '=' |
1896 | |
|
1897 | 0 | archive_strncpy(&attr_name, name_start, name_length); |
1898 | |
|
1899 | 0 | ext_size -= p - attr_start; |
1900 | 0 | value_length = line_length - (p - attr_start); |
1901 | | |
1902 | | /* Consume size, name, and `=` */ |
1903 | 0 | *unconsumed += p - attr_start; |
1904 | 0 | tar_flush_unconsumed(a, unconsumed); |
1905 | | |
1906 | | /* pax_attribute will consume value_length - 1 */ |
1907 | 0 | r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed); |
1908 | 0 | ext_size -= value_length - 1; |
1909 | |
|
1910 | 0 | if (r < ARCHIVE_WARN) { |
1911 | 0 | *unconsumed += ext_size + ext_padding; |
1912 | 0 | return (r); |
1913 | 0 | } |
1914 | 0 | err = err_combine(err, r); |
1915 | | |
1916 | | /* Consume the `\n` that follows the pax attribute value. */ |
1917 | 0 | tar_flush_unconsumed(a, unconsumed); |
1918 | 0 | p = __archive_read_ahead(a, 1, &did_read); |
1919 | 0 | if (did_read < 0) { |
1920 | 0 | return ((int)did_read); |
1921 | 0 | } |
1922 | 0 | if (did_read == 0) { |
1923 | 0 | archive_set_error(&a->archive, EINVAL, |
1924 | 0 | "Truncated tar archive" |
1925 | 0 | " detected while completing pax attribute"); |
1926 | 0 | return (ARCHIVE_FATAL); |
1927 | 0 | } |
1928 | 0 | if (p[0] != '\n') { |
1929 | 0 | archive_set_error(&a->archive, EINVAL, |
1930 | 0 | "Malformed pax attributes"); |
1931 | 0 | *unconsumed += ext_size + ext_padding; |
1932 | 0 | return (ARCHIVE_WARN); |
1933 | 0 | } |
1934 | 0 | ext_size -= 1; |
1935 | 0 | *unconsumed += 1; |
1936 | 0 | tar_flush_unconsumed(a, unconsumed); |
1937 | 0 | } |
1938 | 0 | *unconsumed += ext_size + ext_padding; |
1939 | | |
1940 | | /* |
1941 | | * Some PAX values -- pathname, linkpath, uname, gname -- |
1942 | | * can't be copied into the entry until we know the character |
1943 | | * set to use: |
1944 | | */ |
1945 | 0 | if (!tar->pax_hdrcharset_utf8) |
1946 | | /* PAX specified "BINARY", so use the default charset */ |
1947 | 0 | sconv = tar->opt_sconv; |
1948 | 0 | else { |
1949 | | /* PAX default UTF-8 */ |
1950 | 0 | sconv = archive_string_conversion_from_charset( |
1951 | 0 | &(a->archive), "UTF-8", 1); |
1952 | 0 | if (sconv == NULL) |
1953 | 0 | return (ARCHIVE_FATAL); |
1954 | 0 | if (tar->compat_2x) |
1955 | 0 | archive_string_conversion_set_opt(sconv, |
1956 | 0 | SCONV_SET_OPT_UTF8_LIBARCHIVE2X); |
1957 | 0 | } |
1958 | | |
1959 | | /* Pathname */ |
1960 | 0 | pas = NULL; |
1961 | 0 | if (archive_strlen(&(tar->entry_pathname_override)) > 0) { |
1962 | | /* Prefer GNU.sparse.name attribute if present */ |
1963 | | /* GNU sparse files store a fake name under the standard |
1964 | | * "pathname" key. */ |
1965 | 0 | pas = &(tar->entry_pathname_override); |
1966 | 0 | } else if (archive_strlen(&(tar->entry_pathname)) > 0) { |
1967 | | /* Use standard "pathname" PAX extension */ |
1968 | 0 | pas = &(tar->entry_pathname); |
1969 | 0 | } |
1970 | 0 | if (pas != NULL) { |
1971 | 0 | if (archive_entry_copy_pathname_l(entry, pas->s, |
1972 | 0 | archive_strlen(pas), sconv) != 0) { |
1973 | 0 | err = set_conversion_failed_error(a, sconv, "Pathname"); |
1974 | 0 | if (err == ARCHIVE_FATAL) |
1975 | 0 | return (err); |
1976 | | /* Use raw name without conversion */ |
1977 | 0 | archive_entry_copy_pathname(entry, pas->s); |
1978 | 0 | } |
1979 | 0 | } |
1980 | | /* Uname */ |
1981 | 0 | if (archive_strlen(&(tar->entry_uname)) > 0) { |
1982 | 0 | if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, |
1983 | 0 | archive_strlen(&(tar->entry_uname)), sconv) != 0) { |
1984 | 0 | err = set_conversion_failed_error(a, sconv, "Uname"); |
1985 | 0 | if (err == ARCHIVE_FATAL) |
1986 | 0 | return (err); |
1987 | | /* Use raw name without conversion */ |
1988 | 0 | archive_entry_copy_uname(entry, tar->entry_uname.s); |
1989 | 0 | } |
1990 | 0 | } |
1991 | | /* Gname */ |
1992 | 0 | if (archive_strlen(&(tar->entry_gname)) > 0) { |
1993 | 0 | if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, |
1994 | 0 | archive_strlen(&(tar->entry_gname)), sconv) != 0) { |
1995 | 0 | err = set_conversion_failed_error(a, sconv, "Gname"); |
1996 | 0 | if (err == ARCHIVE_FATAL) |
1997 | 0 | return (err); |
1998 | | /* Use raw name without conversion */ |
1999 | 0 | archive_entry_copy_gname(entry, tar->entry_gname.s); |
2000 | 0 | } |
2001 | 0 | } |
2002 | | /* Linkpath */ |
2003 | 0 | if (archive_strlen(&(tar->entry_linkpath)) > 0) { |
2004 | 0 | if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, |
2005 | 0 | archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { |
2006 | 0 | err = set_conversion_failed_error(a, sconv, "Linkpath"); |
2007 | 0 | if (err == ARCHIVE_FATAL) |
2008 | 0 | return (err); |
2009 | | /* Use raw name without conversion */ |
2010 | 0 | archive_entry_copy_link(entry, tar->entry_linkpath.s); |
2011 | 0 | } |
2012 | 0 | } |
2013 | | |
2014 | | /* Extension may have given us a corrected `entry_bytes_remaining` for |
2015 | | * the main entry; update the padding appropriately. */ |
2016 | 0 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
2017 | 0 | return (err); |
2018 | 0 | } |
2019 | | |
2020 | | static int |
2021 | | pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry, |
2022 | | const char *name, size_t name_length, const char *value, size_t value_length) |
2023 | 0 | { |
2024 | 0 | char *name_decoded; |
2025 | 0 | void *value_decoded; |
2026 | 0 | size_t value_len; |
2027 | |
|
2028 | 0 | if (name_length < 1) |
2029 | 0 | return 3; |
2030 | | |
2031 | | /* URL-decode name */ |
2032 | 0 | name_decoded = url_decode(name, name_length); |
2033 | 0 | if (name_decoded == NULL) |
2034 | 0 | return 2; |
2035 | | |
2036 | | /* Base-64 decode value */ |
2037 | 0 | value_decoded = base64_decode(value, value_length, &value_len); |
2038 | 0 | if (value_decoded == NULL) { |
2039 | 0 | free(name_decoded); |
2040 | 0 | return 1; |
2041 | 0 | } |
2042 | | |
2043 | 0 | archive_entry_xattr_add_entry(entry, name_decoded, |
2044 | 0 | value_decoded, value_len); |
2045 | |
|
2046 | 0 | free(name_decoded); |
2047 | 0 | free(value_decoded); |
2048 | 0 | return 0; |
2049 | 0 | } |
2050 | | |
2051 | | static int |
2052 | | pax_attribute_SCHILY_xattr(struct archive_entry *entry, |
2053 | | const char *name, size_t name_length, const char *value, size_t value_length) |
2054 | 0 | { |
2055 | 0 | if (name_length < 1 || name_length > 128) { |
2056 | 0 | return 1; |
2057 | 0 | } |
2058 | | |
2059 | 0 | char * null_terminated_name = malloc(name_length + 1); |
2060 | 0 | if (null_terminated_name != NULL) { |
2061 | 0 | memcpy(null_terminated_name, name, name_length); |
2062 | 0 | null_terminated_name[name_length] = '\0'; |
2063 | 0 | archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length); |
2064 | 0 | free(null_terminated_name); |
2065 | 0 | } |
2066 | |
|
2067 | 0 | return 0; |
2068 | 0 | } |
2069 | | |
2070 | | static int |
2071 | | pax_attribute_RHT_security_selinux(struct archive_entry *entry, |
2072 | | const char *value, size_t value_length) |
2073 | 0 | { |
2074 | 0 | archive_entry_xattr_add_entry(entry, "security.selinux", |
2075 | 0 | value, value_length); |
2076 | |
|
2077 | 0 | return 0; |
2078 | 0 | } |
2079 | | |
2080 | | static int |
2081 | | pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar, |
2082 | | struct archive_entry *entry, size_t value_length, int type) |
2083 | 0 | { |
2084 | 0 | int r; |
2085 | 0 | const char *p; |
2086 | 0 | const char* errstr; |
2087 | |
|
2088 | 0 | switch (type) { |
2089 | 0 | case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: |
2090 | 0 | errstr = "SCHILY.acl.access"; |
2091 | 0 | break; |
2092 | 0 | case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: |
2093 | 0 | errstr = "SCHILY.acl.default"; |
2094 | 0 | break; |
2095 | 0 | case ARCHIVE_ENTRY_ACL_TYPE_NFS4: |
2096 | 0 | errstr = "SCHILY.acl.ace"; |
2097 | 0 | break; |
2098 | 0 | default: |
2099 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2100 | 0 | "Unknown ACL type: %d", type); |
2101 | 0 | return(ARCHIVE_FATAL); |
2102 | 0 | } |
2103 | | |
2104 | 0 | if (tar->sconv_acl == NULL) { |
2105 | 0 | tar->sconv_acl = |
2106 | 0 | archive_string_conversion_from_charset( |
2107 | 0 | &(a->archive), "UTF-8", 1); |
2108 | 0 | if (tar->sconv_acl == NULL) |
2109 | 0 | return (ARCHIVE_FATAL); |
2110 | 0 | } |
2111 | | |
2112 | 0 | if (value_length > acl_limit) { |
2113 | 0 | __archive_read_consume(a, value_length); |
2114 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2115 | 0 | "Unreasonably large ACL: %d > %d", |
2116 | 0 | (int)value_length, (int)acl_limit); |
2117 | 0 | return (ARCHIVE_WARN); |
2118 | 0 | } |
2119 | | |
2120 | 0 | p = __archive_read_ahead(a, value_length, NULL); |
2121 | 0 | if (p == NULL) { |
2122 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
2123 | 0 | "Truncated tar archive " |
2124 | 0 | "detected while reading ACL data"); |
2125 | 0 | return (ARCHIVE_FATAL); |
2126 | 0 | } |
2127 | | |
2128 | 0 | r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length, |
2129 | 0 | type, tar->sconv_acl); |
2130 | 0 | __archive_read_consume(a, value_length); |
2131 | | /* Workaround: Force perm_is_set() to be correct */ |
2132 | | /* If this bit were stored in the ACL, this wouldn't be needed */ |
2133 | 0 | archive_entry_set_perm(entry, archive_entry_perm(entry)); |
2134 | 0 | if (r != ARCHIVE_OK) { |
2135 | 0 | if (r == ARCHIVE_FATAL) { |
2136 | 0 | archive_set_error(&a->archive, ENOMEM, |
2137 | 0 | "%s %s", "Can't allocate memory for ", |
2138 | 0 | errstr); |
2139 | 0 | return (r); |
2140 | 0 | } |
2141 | 0 | archive_set_error(&a->archive, |
2142 | 0 | ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr); |
2143 | 0 | } |
2144 | 0 | return (r); |
2145 | 0 | } |
2146 | | |
2147 | | static int |
2148 | 0 | pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) { |
2149 | 0 | struct archive_string as; |
2150 | 0 | int r; |
2151 | |
|
2152 | 0 | if (value_length > 128) { |
2153 | 0 | __archive_read_consume(a, value_length); |
2154 | 0 | *ps = 0; |
2155 | 0 | *pn = 0; |
2156 | 0 | return (ARCHIVE_FATAL); |
2157 | 0 | } |
2158 | | |
2159 | 0 | archive_string_init(&as); |
2160 | 0 | r = read_bytes_to_string(a, &as, value_length, unconsumed); |
2161 | 0 | if (r < ARCHIVE_OK) { |
2162 | 0 | archive_string_free(&as); |
2163 | 0 | return (r); |
2164 | 0 | } |
2165 | | |
2166 | 0 | pax_time(as.s, archive_strlen(&as), ps, pn); |
2167 | 0 | archive_string_free(&as); |
2168 | 0 | if (*ps < 0 || *ps == INT64_MAX) { |
2169 | 0 | return (ARCHIVE_WARN); |
2170 | 0 | } |
2171 | 0 | return (ARCHIVE_OK); |
2172 | 0 | } |
2173 | | |
2174 | | static int |
2175 | 0 | pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) { |
2176 | 0 | struct archive_string as; |
2177 | 0 | size_t unconsumed = 0; |
2178 | 0 | int r; |
2179 | |
|
2180 | 0 | if (value_length > 64) { |
2181 | 0 | __archive_read_consume(a, value_length); |
2182 | 0 | *result = 0; |
2183 | 0 | return (ARCHIVE_FATAL); |
2184 | 0 | } |
2185 | | |
2186 | 0 | archive_string_init(&as); |
2187 | 0 | r = read_bytes_to_string(a, &as, value_length, &unconsumed); |
2188 | 0 | tar_flush_unconsumed(a, &unconsumed); |
2189 | 0 | if (r < ARCHIVE_OK) { |
2190 | 0 | archive_string_free(&as); |
2191 | 0 | return (r); |
2192 | 0 | } |
2193 | | |
2194 | 0 | *result = tar_atol10(as.s, archive_strlen(&as)); |
2195 | 0 | archive_string_free(&as); |
2196 | 0 | if (*result < 0 || *result == INT64_MAX) { |
2197 | 0 | *result = INT64_MAX; |
2198 | 0 | return (ARCHIVE_WARN); |
2199 | 0 | } |
2200 | 0 | return (ARCHIVE_OK); |
2201 | 0 | } |
2202 | | |
2203 | | /* |
2204 | | * Parse a single key=value attribute. |
2205 | | * |
2206 | | * POSIX reserves all-lowercase keywords. Vendor-specific extensions |
2207 | | * should always have keywords of the form "VENDOR.attribute" In |
2208 | | * particular, it's quite feasible to support many different vendor |
2209 | | * extensions here. I'm using "LIBARCHIVE" for extensions unique to |
2210 | | * this library. |
2211 | | * |
2212 | | * TODO: Investigate other vendor-specific extensions and see if |
2213 | | * any of them look useful. |
2214 | | */ |
2215 | | static int |
2216 | | pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry, |
2217 | | const char *key, size_t key_length, size_t value_length, size_t *unconsumed) |
2218 | 0 | { |
2219 | 0 | int64_t t; |
2220 | 0 | long n; |
2221 | 0 | const char *p; |
2222 | 0 | ssize_t bytes_read; |
2223 | 0 | int err = ARCHIVE_OK; |
2224 | |
|
2225 | 0 | switch (key[0]) { |
2226 | 0 | case 'G': |
2227 | | /* GNU.* extensions */ |
2228 | 0 | if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) { |
2229 | 0 | key += 4; |
2230 | 0 | key_length -= 4; |
2231 | | |
2232 | | /* GNU.sparse marks the existence of GNU sparse information */ |
2233 | 0 | if (key_length == 6 && memcmp(key, "sparse", 6) == 0) { |
2234 | 0 | tar->sparse_gnu_attributes_seen = 1; |
2235 | 0 | } |
2236 | | |
2237 | | /* GNU.sparse.* extensions */ |
2238 | 0 | else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) { |
2239 | 0 | tar->sparse_gnu_attributes_seen = 1; |
2240 | 0 | key += 7; |
2241 | 0 | key_length -= 7; |
2242 | | |
2243 | | /* GNU "0.0" sparse pax format. */ |
2244 | 0 | if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) { |
2245 | | /* GNU.sparse.numblocks */ |
2246 | 0 | tar->sparse_offset = -1; |
2247 | 0 | tar->sparse_numbytes = -1; |
2248 | 0 | tar->sparse_gnu_major = 0; |
2249 | 0 | tar->sparse_gnu_minor = 0; |
2250 | 0 | } |
2251 | 0 | else if (key_length == 6 && memcmp(key, "offset", 6) == 0) { |
2252 | | /* GNU.sparse.offset */ |
2253 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2254 | 0 | tar->sparse_offset = t; |
2255 | 0 | if (tar->sparse_numbytes != -1) { |
2256 | 0 | if (gnu_add_sparse_entry(a, tar, |
2257 | 0 | tar->sparse_offset, tar->sparse_numbytes) |
2258 | 0 | != ARCHIVE_OK) |
2259 | 0 | return (ARCHIVE_FATAL); |
2260 | 0 | tar->sparse_offset = -1; |
2261 | 0 | tar->sparse_numbytes = -1; |
2262 | 0 | } |
2263 | 0 | } |
2264 | 0 | return (err); |
2265 | 0 | } |
2266 | 0 | else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) { |
2267 | | /* GNU.sparse.numbytes */ |
2268 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2269 | 0 | tar->sparse_numbytes = t; |
2270 | 0 | if (tar->sparse_offset != -1) { |
2271 | 0 | if (gnu_add_sparse_entry(a, tar, |
2272 | 0 | tar->sparse_offset, tar->sparse_numbytes) |
2273 | 0 | != ARCHIVE_OK) |
2274 | 0 | return (ARCHIVE_FATAL); |
2275 | 0 | tar->sparse_offset = -1; |
2276 | 0 | tar->sparse_numbytes = -1; |
2277 | 0 | } |
2278 | 0 | } |
2279 | 0 | return (err); |
2280 | 0 | } |
2281 | 0 | else if (key_length == 4 && memcmp(key, "size", 4) == 0) { |
2282 | | /* GNU.sparse.size */ |
2283 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2284 | 0 | tar->realsize = t; |
2285 | 0 | archive_entry_set_size(entry, tar->realsize); |
2286 | 0 | tar->realsize_override = 1; |
2287 | 0 | } |
2288 | 0 | return (err); |
2289 | 0 | } |
2290 | | |
2291 | | /* GNU "0.1" sparse pax format. */ |
2292 | 0 | else if (key_length == 3 && memcmp(key, "map", 3) == 0) { |
2293 | | /* GNU.sparse.map */ |
2294 | 0 | tar->sparse_gnu_major = 0; |
2295 | 0 | tar->sparse_gnu_minor = 1; |
2296 | 0 | if (value_length > sparse_map_limit) { |
2297 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2298 | 0 | "Unreasonably large sparse map: %d > %d", |
2299 | 0 | (int)value_length, (int)sparse_map_limit); |
2300 | 0 | err = ARCHIVE_FAILED; |
2301 | 0 | } else { |
2302 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2303 | 0 | if (p != NULL) { |
2304 | 0 | if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) { |
2305 | 0 | err = ARCHIVE_WARN; |
2306 | 0 | } |
2307 | 0 | } else { |
2308 | 0 | return (ARCHIVE_FATAL); |
2309 | 0 | } |
2310 | 0 | } |
2311 | 0 | __archive_read_consume(a, value_length); |
2312 | 0 | return (err); |
2313 | 0 | } |
2314 | | |
2315 | | /* GNU "1.0" sparse pax format */ |
2316 | 0 | else if (key_length == 5 && memcmp(key, "major", 5) == 0) { |
2317 | | /* GNU.sparse.major */ |
2318 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK |
2319 | 0 | && t >= 0 |
2320 | 0 | && t <= 10) { |
2321 | 0 | tar->sparse_gnu_major = (int)t; |
2322 | 0 | } |
2323 | 0 | return (err); |
2324 | 0 | } |
2325 | 0 | else if (key_length == 5 && memcmp(key, "minor", 5) == 0) { |
2326 | | /* GNU.sparse.minor */ |
2327 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK |
2328 | 0 | && t >= 0 |
2329 | 0 | && t <= 10) { |
2330 | 0 | tar->sparse_gnu_minor = (int)t; |
2331 | 0 | } |
2332 | 0 | return (err); |
2333 | 0 | } |
2334 | 0 | else if (key_length == 4 && memcmp(key, "name", 4) == 0) { |
2335 | | /* GNU.sparse.name */ |
2336 | | /* |
2337 | | * The real filename; when storing sparse |
2338 | | * files, GNU tar puts a synthesized name into |
2339 | | * the regular 'path' attribute in an attempt |
2340 | | * to limit confusion. ;-) |
2341 | | */ |
2342 | 0 | if (value_length > pathname_limit) { |
2343 | 0 | *unconsumed += value_length; |
2344 | 0 | err = ARCHIVE_WARN; |
2345 | 0 | } else { |
2346 | 0 | err = read_bytes_to_string(a, &(tar->entry_pathname_override), |
2347 | 0 | value_length, unconsumed); |
2348 | 0 | } |
2349 | 0 | return (err); |
2350 | 0 | } |
2351 | 0 | else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) { |
2352 | | /* GNU.sparse.realsize */ |
2353 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2354 | 0 | tar->realsize = t; |
2355 | 0 | archive_entry_set_size(entry, tar->realsize); |
2356 | 0 | tar->realsize_override = 1; |
2357 | 0 | } |
2358 | 0 | return (err); |
2359 | 0 | } |
2360 | 0 | } |
2361 | 0 | } |
2362 | 0 | break; |
2363 | 0 | case 'L': |
2364 | | /* LIBARCHIVE extensions */ |
2365 | 0 | if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) { |
2366 | 0 | key_length -= 11; |
2367 | 0 | key += 11; |
2368 | | |
2369 | | /* TODO: Handle arbitrary extended attributes... */ |
2370 | | /* |
2371 | | if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) |
2372 | | archive_entry_set_xxxxxx(entry, value); |
2373 | | */ |
2374 | 0 | if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) { |
2375 | | /* LIBARCHIVE.creationtime */ |
2376 | 0 | if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { |
2377 | 0 | archive_entry_set_birthtime(entry, t, n); |
2378 | 0 | } |
2379 | 0 | return (err); |
2380 | 0 | } |
2381 | 0 | else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) { |
2382 | | /* LIBARCHIVE.symlinktype */ |
2383 | 0 | if (value_length < 16) { |
2384 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2385 | 0 | if (p != NULL) { |
2386 | 0 | if (value_length == 4 && memcmp(p, "file", 4) == 0) { |
2387 | 0 | archive_entry_set_symlink_type(entry, |
2388 | 0 | AE_SYMLINK_TYPE_FILE); |
2389 | 0 | } else if (value_length == 3 && memcmp(p, "dir", 3) == 0) { |
2390 | 0 | archive_entry_set_symlink_type(entry, |
2391 | 0 | AE_SYMLINK_TYPE_DIRECTORY); |
2392 | 0 | } else { |
2393 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2394 | 0 | "Unrecognized symlink type"); |
2395 | 0 | err = ARCHIVE_WARN; |
2396 | 0 | } |
2397 | 0 | } else { |
2398 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
2399 | 0 | "Truncated tar archive " |
2400 | 0 | "detected while reading `symlinktype` attribute"); |
2401 | 0 | return (ARCHIVE_FATAL); |
2402 | 0 | } |
2403 | 0 | } else { |
2404 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2405 | 0 | "symlink type is very long" |
2406 | 0 | "(longest recognized value is 4 bytes, this is %d)", |
2407 | 0 | (int)value_length); |
2408 | 0 | err = ARCHIVE_WARN; |
2409 | 0 | } |
2410 | 0 | __archive_read_consume(a, value_length); |
2411 | 0 | return (err); |
2412 | 0 | } |
2413 | 0 | else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) { |
2414 | 0 | key_length -= 6; |
2415 | 0 | key += 6; |
2416 | 0 | if (value_length > xattr_limit) { |
2417 | 0 | err = ARCHIVE_WARN; |
2418 | 0 | } else { |
2419 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2420 | 0 | if (p == NULL |
2421 | 0 | || pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) { |
2422 | | /* TODO: Unable to parse xattr */ |
2423 | 0 | err = ARCHIVE_WARN; |
2424 | 0 | } |
2425 | 0 | } |
2426 | 0 | __archive_read_consume(a, value_length); |
2427 | 0 | return (err); |
2428 | 0 | } |
2429 | 0 | } |
2430 | 0 | break; |
2431 | 0 | case 'R': |
2432 | | /* GNU tar uses RHT.security header to store SELinux xattrs |
2433 | | * SCHILY.xattr.security.selinux == RHT.security.selinux */ |
2434 | 0 | if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) { |
2435 | 0 | if (value_length > xattr_limit) { |
2436 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2437 | 0 | "Ignoring unreasonably large security.selinux attribute:" |
2438 | 0 | " %d > %d", |
2439 | 0 | (int)value_length, (int)xattr_limit); |
2440 | | /* TODO: Should this be FAILED instead? */ |
2441 | 0 | err = ARCHIVE_WARN; |
2442 | 0 | } else { |
2443 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2444 | 0 | if (p == NULL |
2445 | 0 | || pax_attribute_RHT_security_selinux(entry, p, value_length)) { |
2446 | | /* TODO: Unable to parse xattr */ |
2447 | 0 | err = ARCHIVE_WARN; |
2448 | 0 | } |
2449 | 0 | } |
2450 | 0 | __archive_read_consume(a, value_length); |
2451 | 0 | return (err); |
2452 | 0 | } |
2453 | 0 | break; |
2454 | 0 | case 'S': |
2455 | | /* SCHILY.* extensions used by "star" archiver */ |
2456 | 0 | if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) { |
2457 | 0 | key_length -= 7; |
2458 | 0 | key += 7; |
2459 | |
|
2460 | 0 | if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) { |
2461 | 0 | err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, |
2462 | 0 | ARCHIVE_ENTRY_ACL_TYPE_ACCESS); |
2463 | | // TODO: Mark mode as set |
2464 | 0 | return (err); |
2465 | 0 | } |
2466 | 0 | else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) { |
2467 | 0 | err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, |
2468 | 0 | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); |
2469 | 0 | return (err); |
2470 | 0 | } |
2471 | 0 | else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) { |
2472 | 0 | err = pax_attribute_SCHILY_acl(a, tar, entry, value_length, |
2473 | 0 | ARCHIVE_ENTRY_ACL_TYPE_NFS4); |
2474 | | // TODO: Mark mode as set |
2475 | 0 | return (err); |
2476 | 0 | } |
2477 | 0 | else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) { |
2478 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2479 | 0 | archive_entry_set_rdevmajor(entry, t); |
2480 | 0 | } |
2481 | 0 | return (err); |
2482 | 0 | } |
2483 | 0 | else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) { |
2484 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2485 | 0 | archive_entry_set_rdevminor(entry, t); |
2486 | 0 | } |
2487 | 0 | return (err); |
2488 | 0 | } |
2489 | 0 | else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) { |
2490 | 0 | if (value_length < fflags_limit) { |
2491 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2492 | 0 | if (p != NULL) { |
2493 | 0 | archive_entry_copy_fflags_text_len(entry, p, value_length); |
2494 | 0 | err = ARCHIVE_OK; |
2495 | 0 | } else { |
2496 | | /* Truncated archive */ |
2497 | 0 | err = ARCHIVE_FATAL; |
2498 | 0 | } |
2499 | 0 | } else { |
2500 | | /* Overlong fflags field */ |
2501 | 0 | err = ARCHIVE_WARN; |
2502 | 0 | } |
2503 | 0 | __archive_read_consume(a, value_length); |
2504 | 0 | return (err); |
2505 | 0 | } |
2506 | 0 | else if (key_length == 3 && memcmp(key, "dev", 3) == 0) { |
2507 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2508 | 0 | archive_entry_set_dev(entry, t); |
2509 | 0 | } |
2510 | 0 | return (err); |
2511 | 0 | } |
2512 | 0 | else if (key_length == 3 && memcmp(key, "ino", 3) == 0) { |
2513 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2514 | 0 | archive_entry_set_ino(entry, t); |
2515 | 0 | } |
2516 | 0 | return (err); |
2517 | 0 | } |
2518 | 0 | else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) { |
2519 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2520 | 0 | archive_entry_set_nlink(entry, t); |
2521 | 0 | } |
2522 | 0 | return (err); |
2523 | 0 | } |
2524 | 0 | else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) { |
2525 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2526 | 0 | tar->realsize = t; |
2527 | 0 | tar->realsize_override = 1; |
2528 | 0 | archive_entry_set_size(entry, tar->realsize); |
2529 | 0 | } |
2530 | 0 | return (err); |
2531 | 0 | } |
2532 | 0 | else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) { |
2533 | 0 | key_length -= 6; |
2534 | 0 | key += 6; |
2535 | 0 | if (value_length < xattr_limit) { |
2536 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2537 | 0 | if (p == NULL |
2538 | 0 | || pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) { |
2539 | | /* TODO: Unable to parse xattr */ |
2540 | 0 | err = ARCHIVE_WARN; |
2541 | 0 | } |
2542 | 0 | } else { |
2543 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2544 | 0 | "Unreasonably large xattr: %d > %d", |
2545 | 0 | (int)value_length, (int)xattr_limit); |
2546 | 0 | err = ARCHIVE_WARN; |
2547 | 0 | } |
2548 | 0 | __archive_read_consume(a, value_length); |
2549 | 0 | return (err); |
2550 | 0 | } |
2551 | 0 | } |
2552 | | /* SUN.* extensions from Solaris tar */ |
2553 | 0 | if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) { |
2554 | 0 | key_length -= 4; |
2555 | 0 | key += 4; |
2556 | |
|
2557 | 0 | if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) { |
2558 | | /* SUN.holesdata */ |
2559 | 0 | if (value_length < sparse_map_limit) { |
2560 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2561 | 0 | if (p != NULL) { |
2562 | 0 | err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length); |
2563 | 0 | if (err < ARCHIVE_OK) { |
2564 | 0 | archive_set_error(&a->archive, |
2565 | 0 | ARCHIVE_ERRNO_MISC, |
2566 | 0 | "Parse error: SUN.holesdata"); |
2567 | 0 | } |
2568 | 0 | } else { |
2569 | 0 | return (ARCHIVE_FATAL); |
2570 | 0 | } |
2571 | 0 | } else { |
2572 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
2573 | 0 | "Unreasonably large sparse map: %d > %d", |
2574 | 0 | (int)value_length, (int)sparse_map_limit); |
2575 | 0 | err = ARCHIVE_FAILED; |
2576 | 0 | } |
2577 | 0 | __archive_read_consume(a, value_length); |
2578 | 0 | return (err); |
2579 | 0 | } |
2580 | 0 | } |
2581 | 0 | break; |
2582 | 0 | case 'a': |
2583 | 0 | if (key_length == 5 && memcmp(key, "atime", 5) == 0) { |
2584 | 0 | if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { |
2585 | 0 | archive_entry_set_atime(entry, t, n); |
2586 | 0 | } |
2587 | 0 | return (err); |
2588 | 0 | } |
2589 | 0 | break; |
2590 | 0 | case 'c': |
2591 | 0 | if (key_length == 5 && memcmp(key, "ctime", 5) == 0) { |
2592 | 0 | if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { |
2593 | 0 | archive_entry_set_ctime(entry, t, n); |
2594 | 0 | } |
2595 | 0 | return (err); |
2596 | 0 | } else if (key_length == 7 && memcmp(key, "charset", 7) == 0) { |
2597 | | /* TODO: Publish charset information in entry. */ |
2598 | 0 | } else if (key_length == 7 && memcmp(key, "comment", 7) == 0) { |
2599 | | /* TODO: Publish comment in entry. */ |
2600 | 0 | } |
2601 | 0 | break; |
2602 | 0 | case 'g': |
2603 | 0 | if (key_length == 3 && memcmp(key, "gid", 3) == 0) { |
2604 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2605 | 0 | archive_entry_set_gid(entry, t); |
2606 | 0 | } |
2607 | 0 | return (err); |
2608 | 0 | } else if (key_length == 5 && memcmp(key, "gname", 5) == 0) { |
2609 | 0 | if (value_length > guname_limit) { |
2610 | 0 | *unconsumed += value_length; |
2611 | 0 | err = ARCHIVE_WARN; |
2612 | 0 | } else { |
2613 | 0 | err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed); |
2614 | 0 | } |
2615 | 0 | return (err); |
2616 | 0 | } |
2617 | 0 | break; |
2618 | 0 | case 'h': |
2619 | 0 | if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) { |
2620 | 0 | if (value_length < 64) { |
2621 | 0 | p = __archive_read_ahead(a, value_length, &bytes_read); |
2622 | 0 | if (p != NULL) { |
2623 | 0 | if (value_length == 6 |
2624 | 0 | && memcmp(p, "BINARY", 6) == 0) { |
2625 | | /* Binary mode. */ |
2626 | 0 | tar->pax_hdrcharset_utf8 = 0; |
2627 | 0 | err = ARCHIVE_OK; |
2628 | 0 | } else if (value_length == 23 |
2629 | 0 | && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) { |
2630 | 0 | tar->pax_hdrcharset_utf8 = 1; |
2631 | 0 | err = ARCHIVE_OK; |
2632 | 0 | } else { |
2633 | | /* TODO: Unrecognized character set */ |
2634 | 0 | err = ARCHIVE_WARN; |
2635 | 0 | } |
2636 | 0 | } else { |
2637 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
2638 | 0 | "Truncated tar archive " |
2639 | 0 | "detected while reading hdrcharset attribute"); |
2640 | 0 | return (ARCHIVE_FATAL); |
2641 | 0 | } |
2642 | 0 | } else { |
2643 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
2644 | 0 | "hdrcharset attribute is unreasonably large (%d bytes)", |
2645 | 0 | (int)value_length); |
2646 | 0 | err = ARCHIVE_WARN; |
2647 | 0 | } |
2648 | 0 | __archive_read_consume(a, value_length); |
2649 | 0 | return (err); |
2650 | 0 | } |
2651 | 0 | break; |
2652 | 0 | case 'l': |
2653 | | /* pax interchange doesn't distinguish hardlink vs. symlink. */ |
2654 | 0 | if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) { |
2655 | 0 | if (value_length > pathname_limit) { |
2656 | 0 | *unconsumed += value_length; |
2657 | 0 | err = ARCHIVE_WARN; |
2658 | 0 | } else { |
2659 | 0 | err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed); |
2660 | 0 | } |
2661 | 0 | return (err); |
2662 | 0 | } |
2663 | 0 | break; |
2664 | 0 | case 'm': |
2665 | 0 | if (key_length == 5 && memcmp(key, "mtime", 5) == 0) { |
2666 | 0 | if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) { |
2667 | 0 | archive_entry_set_mtime(entry, t, n); |
2668 | 0 | } |
2669 | 0 | return (err); |
2670 | 0 | } |
2671 | 0 | break; |
2672 | 0 | case 'p': |
2673 | 0 | if (key_length == 4 && memcmp(key, "path", 4) == 0) { |
2674 | 0 | if (value_length > pathname_limit) { |
2675 | 0 | *unconsumed += value_length; |
2676 | 0 | err = ARCHIVE_WARN; |
2677 | 0 | } else { |
2678 | 0 | err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed); |
2679 | 0 | } |
2680 | 0 | return (err); |
2681 | 0 | } |
2682 | 0 | break; |
2683 | 0 | case 'r': |
2684 | | /* POSIX has reserved 'realtime.*' */ |
2685 | 0 | break; |
2686 | 0 | case 's': |
2687 | | /* POSIX has reserved 'security.*' */ |
2688 | | /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ |
2689 | 0 | if (key_length == 4 && memcmp(key, "size", 4) == 0) { |
2690 | | /* "size" is the size of the data in the entry. */ |
2691 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2692 | 0 | tar->entry_bytes_remaining = t; |
2693 | | /* |
2694 | | * The "size" pax header keyword always overrides the |
2695 | | * "size" field in the tar header. |
2696 | | * GNU.sparse.realsize, GNU.sparse.size and |
2697 | | * SCHILY.realsize override this value. |
2698 | | */ |
2699 | 0 | if (!tar->realsize_override) { |
2700 | 0 | archive_entry_set_size(entry, |
2701 | 0 | tar->entry_bytes_remaining); |
2702 | 0 | tar->realsize |
2703 | 0 | = tar->entry_bytes_remaining; |
2704 | 0 | } |
2705 | 0 | } |
2706 | 0 | else if (t == INT64_MAX) { |
2707 | | /* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */ |
2708 | 0 | tar->entry_bytes_remaining = 0; |
2709 | 0 | archive_set_error(&a->archive, |
2710 | 0 | ARCHIVE_ERRNO_MISC, |
2711 | 0 | "Tar size attribute overflow"); |
2712 | 0 | return (ARCHIVE_FATAL); |
2713 | 0 | } |
2714 | 0 | return (err); |
2715 | 0 | } |
2716 | 0 | break; |
2717 | 0 | case 'u': |
2718 | 0 | if (key_length == 3 && memcmp(key, "uid", 3) == 0) { |
2719 | 0 | if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) { |
2720 | 0 | archive_entry_set_uid(entry, t); |
2721 | 0 | } |
2722 | 0 | return (err); |
2723 | 0 | } else if (key_length == 5 && memcmp(key, "uname", 5) == 0) { |
2724 | 0 | if (value_length > guname_limit) { |
2725 | 0 | *unconsumed += value_length; |
2726 | 0 | err = ARCHIVE_WARN; |
2727 | 0 | } else { |
2728 | 0 | err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed); |
2729 | 0 | } |
2730 | 0 | return (err); |
2731 | 0 | } |
2732 | 0 | break; |
2733 | 0 | } |
2734 | | |
2735 | | /* Unrecognized key, just skip the entire value. */ |
2736 | 0 | __archive_read_consume(a, value_length); |
2737 | 0 | return (err); |
2738 | 0 | } |
2739 | | |
2740 | | |
2741 | | |
2742 | | /* |
2743 | | * parse a decimal time value, which may include a fractional portion |
2744 | | */ |
2745 | | static void |
2746 | | pax_time(const char *p, size_t length, int64_t *ps, long *pn) |
2747 | 0 | { |
2748 | 0 | char digit; |
2749 | 0 | int64_t s; |
2750 | 0 | unsigned long l; |
2751 | 0 | int sign; |
2752 | 0 | int64_t limit, last_digit_limit; |
2753 | |
|
2754 | 0 | limit = INT64_MAX / 10; |
2755 | 0 | last_digit_limit = INT64_MAX % 10; |
2756 | |
|
2757 | 0 | if (length <= 0) { |
2758 | 0 | *ps = 0; |
2759 | 0 | return; |
2760 | 0 | } |
2761 | 0 | s = 0; |
2762 | 0 | sign = 1; |
2763 | 0 | if (*p == '-') { |
2764 | 0 | sign = -1; |
2765 | 0 | p++; |
2766 | 0 | length--; |
2767 | 0 | } |
2768 | 0 | while (length > 0 && *p >= '0' && *p <= '9') { |
2769 | 0 | digit = *p - '0'; |
2770 | 0 | if (s > limit || |
2771 | 0 | (s == limit && digit > last_digit_limit)) { |
2772 | 0 | s = INT64_MAX; |
2773 | 0 | break; |
2774 | 0 | } |
2775 | 0 | s = (s * 10) + digit; |
2776 | 0 | ++p; |
2777 | 0 | --length; |
2778 | 0 | } |
2779 | |
|
2780 | 0 | *ps = s * sign; |
2781 | | |
2782 | | /* Calculate nanoseconds. */ |
2783 | 0 | *pn = 0; |
2784 | |
|
2785 | 0 | if (length <= 0 || *p != '.') |
2786 | 0 | return; |
2787 | | |
2788 | 0 | l = 100000000UL; |
2789 | 0 | do { |
2790 | 0 | ++p; |
2791 | 0 | --length; |
2792 | 0 | if (length > 0 && *p >= '0' && *p <= '9') |
2793 | 0 | *pn += (*p - '0') * l; |
2794 | 0 | else |
2795 | 0 | break; |
2796 | 0 | } while (l /= 10); |
2797 | 0 | } |
2798 | | |
2799 | | /* |
2800 | | * Parse GNU tar header |
2801 | | */ |
2802 | | static int |
2803 | | header_gnutar(struct archive_read *a, struct tar *tar, |
2804 | | struct archive_entry *entry, const void *h, size_t *unconsumed) |
2805 | 1 | { |
2806 | 1 | const struct archive_entry_header_gnutar *header; |
2807 | 1 | int64_t t; |
2808 | 1 | int err = ARCHIVE_OK; |
2809 | | |
2810 | | /* |
2811 | | * GNU header is like POSIX ustar, except 'prefix' is |
2812 | | * replaced with some other fields. This also means the |
2813 | | * filename is stored as in old-style archives. |
2814 | | */ |
2815 | | |
2816 | | /* Grab fields common to all tar variants. */ |
2817 | 1 | err = header_common(a, tar, entry, h); |
2818 | 1 | if (err == ARCHIVE_FATAL) |
2819 | 0 | return (err); |
2820 | | |
2821 | | /* Copy filename over (to ensure null termination). */ |
2822 | 1 | header = (const struct archive_entry_header_gnutar *)h; |
2823 | 1 | const char *existing_pathname = archive_entry_pathname(entry); |
2824 | 1 | if (existing_pathname == NULL || existing_pathname[0] == '\0') { |
2825 | 1 | if (archive_entry_copy_pathname_l(entry, |
2826 | 1 | header->name, sizeof(header->name), tar->sconv) != 0) { |
2827 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Pathname"); |
2828 | 0 | if (err == ARCHIVE_FATAL) |
2829 | 0 | return (err); |
2830 | 0 | } |
2831 | 1 | } |
2832 | | |
2833 | | /* Fields common to ustar and GNU */ |
2834 | | /* XXX Can the following be factored out since it's common |
2835 | | * to ustar and gnu tar? Is it okay to move it down into |
2836 | | * header_common, perhaps? */ |
2837 | 1 | const char *existing_uname = archive_entry_uname(entry); |
2838 | 1 | if (existing_uname == NULL || existing_uname[0] == '\0') { |
2839 | 1 | if (archive_entry_copy_uname_l(entry, |
2840 | 1 | header->uname, sizeof(header->uname), tar->sconv) != 0) { |
2841 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Uname"); |
2842 | 0 | if (err == ARCHIVE_FATAL) |
2843 | 0 | return (err); |
2844 | 0 | } |
2845 | 1 | } |
2846 | | |
2847 | 1 | const char *existing_gname = archive_entry_gname(entry); |
2848 | 1 | if (existing_gname == NULL || existing_gname[0] == '\0') { |
2849 | 1 | if (archive_entry_copy_gname_l(entry, |
2850 | 1 | header->gname, sizeof(header->gname), tar->sconv) != 0) { |
2851 | 0 | err = set_conversion_failed_error(a, tar->sconv, "Gname"); |
2852 | 0 | if (err == ARCHIVE_FATAL) |
2853 | 0 | return (err); |
2854 | 0 | } |
2855 | 1 | } |
2856 | | |
2857 | | /* Parse out device numbers only for char and block specials */ |
2858 | 1 | if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { |
2859 | 0 | if (!archive_entry_rdev_is_set(entry)) { |
2860 | 0 | archive_entry_set_rdevmajor(entry, (dev_t) |
2861 | 0 | tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); |
2862 | 0 | archive_entry_set_rdevminor(entry, (dev_t) |
2863 | 0 | tar_atol(header->rdevminor, sizeof(header->rdevminor))); |
2864 | 0 | } |
2865 | 1 | } else { |
2866 | 1 | archive_entry_set_rdev(entry, 0); |
2867 | 1 | } |
2868 | | |
2869 | 1 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
2870 | | |
2871 | | /* Grab GNU-specific fields. */ |
2872 | 1 | if (!archive_entry_atime_is_set(entry)) { |
2873 | 1 | t = tar_atol(header->atime, sizeof(header->atime)); |
2874 | 1 | if (t > 0) |
2875 | 0 | archive_entry_set_atime(entry, t, 0); |
2876 | 1 | } |
2877 | 1 | if (!archive_entry_ctime_is_set(entry)) { |
2878 | 1 | t = tar_atol(header->ctime, sizeof(header->ctime)); |
2879 | 1 | if (t > 0) |
2880 | 0 | archive_entry_set_ctime(entry, t, 0); |
2881 | 1 | } |
2882 | | |
2883 | 1 | if (header->realsize[0] != 0) { |
2884 | 0 | tar->realsize |
2885 | 0 | = tar_atol(header->realsize, sizeof(header->realsize)); |
2886 | 0 | archive_entry_set_size(entry, tar->realsize); |
2887 | 0 | tar->realsize_override = 1; |
2888 | 0 | } |
2889 | | |
2890 | 1 | if (header->sparse[0].offset[0] != 0) { |
2891 | 0 | if (gnu_sparse_old_read(a, tar, header, unconsumed) |
2892 | 0 | != ARCHIVE_OK) |
2893 | 0 | return (ARCHIVE_FATAL); |
2894 | 1 | } else { |
2895 | 1 | if (header->isextended[0] != 0) { |
2896 | | /* XXX WTF? XXX */ |
2897 | 0 | } |
2898 | 1 | } |
2899 | | |
2900 | 1 | return (err); |
2901 | 1 | } |
2902 | | |
2903 | | static int |
2904 | | gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, |
2905 | | int64_t offset, int64_t remaining) |
2906 | 290k | { |
2907 | 290k | struct sparse_block *p; |
2908 | | |
2909 | 290k | p = (struct sparse_block *)calloc(1, sizeof(*p)); |
2910 | 290k | if (p == NULL) { |
2911 | 0 | archive_set_error(&a->archive, ENOMEM, "Out of memory"); |
2912 | 0 | return (ARCHIVE_FATAL); |
2913 | 0 | } |
2914 | 290k | if (tar->sparse_last != NULL) |
2915 | 0 | tar->sparse_last->next = p; |
2916 | 290k | else |
2917 | 290k | tar->sparse_list = p; |
2918 | 290k | tar->sparse_last = p; |
2919 | 290k | if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) { |
2920 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data"); |
2921 | 0 | return (ARCHIVE_FATAL); |
2922 | 0 | } |
2923 | 290k | p->offset = offset; |
2924 | 290k | p->remaining = remaining; |
2925 | 290k | return (ARCHIVE_OK); |
2926 | 290k | } |
2927 | | |
2928 | | static void |
2929 | | gnu_clear_sparse_list(struct tar *tar) |
2930 | 291k | { |
2931 | 291k | struct sparse_block *p; |
2932 | | |
2933 | 582k | while (tar->sparse_list != NULL) { |
2934 | 290k | p = tar->sparse_list; |
2935 | 290k | tar->sparse_list = p->next; |
2936 | 290k | free(p); |
2937 | 290k | } |
2938 | 291k | tar->sparse_last = NULL; |
2939 | 291k | } |
2940 | | |
2941 | | /* |
2942 | | * GNU tar old-format sparse data. |
2943 | | * |
2944 | | * GNU old-format sparse data is stored in a fixed-field |
2945 | | * format. Offset/size values are 11-byte octal fields (same |
2946 | | * format as 'size' field in ustart header). These are |
2947 | | * stored in the header, allocating subsequent header blocks |
2948 | | * as needed. Extending the header in this way is a pretty |
2949 | | * severe POSIX violation; this design has earned GNU tar a |
2950 | | * lot of criticism. |
2951 | | */ |
2952 | | |
2953 | | static int |
2954 | | gnu_sparse_old_read(struct archive_read *a, struct tar *tar, |
2955 | | const struct archive_entry_header_gnutar *header, size_t *unconsumed) |
2956 | 0 | { |
2957 | 0 | ssize_t bytes_read; |
2958 | 0 | const void *data; |
2959 | 0 | struct extended { |
2960 | 0 | struct gnu_sparse sparse[21]; |
2961 | 0 | char isextended[1]; |
2962 | 0 | char padding[7]; |
2963 | 0 | }; |
2964 | 0 | const struct extended *ext; |
2965 | |
|
2966 | 0 | if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) |
2967 | 0 | return (ARCHIVE_FATAL); |
2968 | 0 | if (header->isextended[0] == 0) |
2969 | 0 | return (ARCHIVE_OK); |
2970 | | |
2971 | 0 | do { |
2972 | 0 | tar_flush_unconsumed(a, unconsumed); |
2973 | 0 | data = __archive_read_ahead(a, 512, &bytes_read); |
2974 | 0 | if (bytes_read < 0) |
2975 | 0 | return (ARCHIVE_FATAL); |
2976 | 0 | if (bytes_read < 512) { |
2977 | 0 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
2978 | 0 | "Truncated tar archive " |
2979 | 0 | "detected while reading sparse file data"); |
2980 | 0 | return (ARCHIVE_FATAL); |
2981 | 0 | } |
2982 | 0 | *unconsumed = 512; |
2983 | 0 | ext = (const struct extended *)data; |
2984 | 0 | if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) |
2985 | 0 | return (ARCHIVE_FATAL); |
2986 | 0 | } while (ext->isextended[0] != 0); |
2987 | 0 | if (tar->sparse_list != NULL) |
2988 | 0 | tar->entry_offset = tar->sparse_list->offset; |
2989 | 0 | return (ARCHIVE_OK); |
2990 | 0 | } |
2991 | | |
2992 | | static int |
2993 | | gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, |
2994 | | const struct gnu_sparse *sparse, int length) |
2995 | 0 | { |
2996 | 0 | while (length > 0 && sparse->offset[0] != 0) { |
2997 | 0 | if (gnu_add_sparse_entry(a, tar, |
2998 | 0 | tar_atol(sparse->offset, sizeof(sparse->offset)), |
2999 | 0 | tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) |
3000 | 0 | != ARCHIVE_OK) |
3001 | 0 | return (ARCHIVE_FATAL); |
3002 | 0 | sparse++; |
3003 | 0 | length--; |
3004 | 0 | } |
3005 | 0 | return (ARCHIVE_OK); |
3006 | 0 | } |
3007 | | |
3008 | | /* |
3009 | | * GNU tar sparse format 0.0 |
3010 | | * |
3011 | | * Beginning with GNU tar 1.15, sparse files are stored using |
3012 | | * information in the pax extended header. The GNU tar maintainers |
3013 | | * have gone through a number of variations in the process of working |
3014 | | * out this scheme; fortunately, they're all numbered. |
3015 | | * |
3016 | | * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the |
3017 | | * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to |
3018 | | * store offset/size for each block. The repeated instances of these |
3019 | | * latter fields violate the pax specification (which frowns on |
3020 | | * duplicate keys), so this format was quickly replaced. |
3021 | | */ |
3022 | | |
3023 | | /* |
3024 | | * GNU tar sparse format 0.1 |
3025 | | * |
3026 | | * This version replaced the offset/numbytes attributes with |
3027 | | * a single "map" attribute that stored a list of integers. This |
3028 | | * format had two problems: First, the "map" attribute could be very |
3029 | | * long, which caused problems for some implementations. More |
3030 | | * importantly, the sparse data was lost when extracted by archivers |
3031 | | * that didn't recognize this extension. |
3032 | | */ |
3033 | | static int |
3034 | | gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length) |
3035 | 0 | { |
3036 | 0 | const char *e; |
3037 | 0 | int64_t offset = -1, size = -1; |
3038 | |
|
3039 | 0 | for (;;) { |
3040 | 0 | e = p; |
3041 | 0 | while (length > 0 && *e != ',') { |
3042 | 0 | if (*e < '0' || *e > '9') |
3043 | 0 | return (ARCHIVE_WARN); |
3044 | 0 | e++; |
3045 | 0 | length--; |
3046 | 0 | } |
3047 | 0 | if (offset < 0) { |
3048 | 0 | offset = tar_atol10(p, e - p); |
3049 | 0 | if (offset < 0) |
3050 | 0 | return (ARCHIVE_WARN); |
3051 | 0 | } else { |
3052 | 0 | size = tar_atol10(p, e - p); |
3053 | 0 | if (size < 0) |
3054 | 0 | return (ARCHIVE_WARN); |
3055 | 0 | if (gnu_add_sparse_entry(a, tar, offset, size) |
3056 | 0 | != ARCHIVE_OK) |
3057 | 0 | return (ARCHIVE_FATAL); |
3058 | 0 | offset = -1; |
3059 | 0 | } |
3060 | 0 | if (length == 0) |
3061 | 0 | return (ARCHIVE_OK); |
3062 | 0 | p = e + 1; |
3063 | 0 | length--; |
3064 | 0 | } |
3065 | 0 | } |
3066 | | |
3067 | | /* |
3068 | | * GNU tar sparse format 1.0 |
3069 | | * |
3070 | | * The idea: The offset/size data is stored as a series of base-10 |
3071 | | * ASCII numbers prepended to the file data, so that dearchivers that |
3072 | | * don't support this format will extract the block map along with the |
3073 | | * data and a separate post-process can restore the sparseness. |
3074 | | * |
3075 | | * Unfortunately, GNU tar 1.16 had a bug that added unnecessary |
3076 | | * padding to the body of the file when using this format. GNU tar |
3077 | | * 1.17 corrected this bug without bumping the version number, so |
3078 | | * it's not possible to support both variants. This code supports |
3079 | | * the later variant at the expense of not supporting the former. |
3080 | | * |
3081 | | * This variant also replaced GNU.sparse.size with GNU.sparse.realsize |
3082 | | * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. |
3083 | | */ |
3084 | | |
3085 | | /* |
3086 | | * Read the next line from the input, and parse it as a decimal |
3087 | | * integer followed by '\n'. Returns positive integer value or |
3088 | | * negative on error. |
3089 | | */ |
3090 | | static int64_t |
3091 | | gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, |
3092 | | int64_t *remaining, size_t *unconsumed) |
3093 | 0 | { |
3094 | 0 | int64_t l, limit, last_digit_limit; |
3095 | 0 | const char *p; |
3096 | 0 | ssize_t bytes_read; |
3097 | 0 | int base, digit; |
3098 | |
|
3099 | 0 | base = 10; |
3100 | 0 | limit = INT64_MAX / base; |
3101 | 0 | last_digit_limit = INT64_MAX % base; |
3102 | | |
3103 | | /* |
3104 | | * Skip any lines starting with '#'; GNU tar specs |
3105 | | * don't require this, but they should. |
3106 | | */ |
3107 | 0 | do { |
3108 | 0 | bytes_read = readline(a, tar, &p, |
3109 | 0 | (ssize_t)tar_min(*remaining, 100), unconsumed); |
3110 | 0 | if (bytes_read <= 0) |
3111 | 0 | return (ARCHIVE_FATAL); |
3112 | 0 | *remaining -= bytes_read; |
3113 | 0 | } while (p[0] == '#'); |
3114 | | |
3115 | 0 | l = 0; |
3116 | 0 | while (bytes_read > 0) { |
3117 | 0 | if (*p == '\n') |
3118 | 0 | return (l); |
3119 | 0 | if (*p < '0' || *p >= '0' + base) |
3120 | 0 | return (ARCHIVE_WARN); |
3121 | 0 | digit = *p - '0'; |
3122 | 0 | if (l > limit || (l == limit && digit > last_digit_limit)) |
3123 | 0 | l = INT64_MAX; /* Truncate on overflow. */ |
3124 | 0 | else |
3125 | 0 | l = (l * base) + digit; |
3126 | 0 | p++; |
3127 | 0 | bytes_read--; |
3128 | 0 | } |
3129 | | /* TODO: Error message. */ |
3130 | 0 | return (ARCHIVE_WARN); |
3131 | 0 | } |
3132 | | |
3133 | | /* |
3134 | | * Returns length (in bytes) of the sparse data description |
3135 | | * that was read. |
3136 | | */ |
3137 | | static ssize_t |
3138 | | gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) |
3139 | 0 | { |
3140 | 0 | ssize_t bytes_read; |
3141 | 0 | int entries; |
3142 | 0 | int64_t offset, size, to_skip, remaining; |
3143 | | |
3144 | | /* Clear out the existing sparse list. */ |
3145 | 0 | gnu_clear_sparse_list(tar); |
3146 | |
|
3147 | 0 | remaining = tar->entry_bytes_remaining; |
3148 | | |
3149 | | /* Parse entries. */ |
3150 | 0 | entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); |
3151 | 0 | if (entries < 0) |
3152 | 0 | return (ARCHIVE_FATAL); |
3153 | | /* Parse the individual entries. */ |
3154 | 0 | while (entries-- > 0) { |
3155 | | /* Parse offset/size */ |
3156 | 0 | offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); |
3157 | 0 | if (offset < 0) |
3158 | 0 | return (ARCHIVE_FATAL); |
3159 | 0 | size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); |
3160 | 0 | if (size < 0) |
3161 | 0 | return (ARCHIVE_FATAL); |
3162 | | /* Add a new sparse entry. */ |
3163 | 0 | if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) |
3164 | 0 | return (ARCHIVE_FATAL); |
3165 | 0 | } |
3166 | | /* Skip rest of block... */ |
3167 | 0 | tar_flush_unconsumed(a, unconsumed); |
3168 | 0 | bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); |
3169 | 0 | to_skip = 0x1ff & -bytes_read; |
3170 | | /* Fail if tar->entry_bytes_remaing would get negative */ |
3171 | 0 | if (to_skip > remaining) |
3172 | 0 | return (ARCHIVE_FATAL); |
3173 | 0 | if (to_skip != __archive_read_consume(a, to_skip)) |
3174 | 0 | return (ARCHIVE_FATAL); |
3175 | 0 | return ((ssize_t)(bytes_read + to_skip)); |
3176 | 0 | } |
3177 | | |
3178 | | /* |
3179 | | * Solaris pax extension for a sparse file. This is recorded with the |
3180 | | * data and hole pairs. The way recording sparse information by Solaris' |
3181 | | * pax simply indicates where data and sparse are, so the stored contents |
3182 | | * consist of both data and hole. |
3183 | | */ |
3184 | | static int |
3185 | | pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar, |
3186 | | struct archive_entry *entry, const char *p, size_t length) |
3187 | 0 | { |
3188 | 0 | const char *e; |
3189 | 0 | int64_t start, end; |
3190 | 0 | int hole = 1; |
3191 | |
|
3192 | 0 | (void)entry; /* UNUSED */ |
3193 | |
|
3194 | 0 | end = 0; |
3195 | 0 | if (length <= 0) |
3196 | 0 | return (ARCHIVE_WARN); |
3197 | 0 | if (*p == ' ') { |
3198 | 0 | p++; |
3199 | 0 | length--; |
3200 | 0 | } else { |
3201 | 0 | return (ARCHIVE_WARN); |
3202 | 0 | } |
3203 | 0 | for (;;) { |
3204 | 0 | e = p; |
3205 | 0 | while (length > 0 && *e != ' ') { |
3206 | 0 | if (*e < '0' || *e > '9') |
3207 | 0 | return (ARCHIVE_WARN); |
3208 | 0 | e++; |
3209 | 0 | length--; |
3210 | 0 | } |
3211 | 0 | start = end; |
3212 | 0 | end = tar_atol10(p, e - p); |
3213 | 0 | if (end < 0) |
3214 | 0 | return (ARCHIVE_WARN); |
3215 | 0 | if (start < end) { |
3216 | 0 | if (gnu_add_sparse_entry(a, tar, start, |
3217 | 0 | end - start) != ARCHIVE_OK) |
3218 | 0 | return (ARCHIVE_FATAL); |
3219 | 0 | tar->sparse_last->hole = hole; |
3220 | 0 | } |
3221 | 0 | if (length == 0 || *e == '\n') { |
3222 | 0 | if (length == 0 && *e == '\n') { |
3223 | 0 | return (ARCHIVE_OK); |
3224 | 0 | } else { |
3225 | 0 | return (ARCHIVE_WARN); |
3226 | 0 | } |
3227 | 0 | } |
3228 | 0 | p = e + 1; |
3229 | 0 | length--; |
3230 | 0 | hole = hole == 0; |
3231 | 0 | } |
3232 | 0 | } |
3233 | | |
3234 | | /*- |
3235 | | * Convert text->integer. |
3236 | | * |
3237 | | * Traditional tar formats (including POSIX) specify base-8 for |
3238 | | * all of the standard numeric fields. This is a significant limitation |
3239 | | * in practice: |
3240 | | * = file size is limited to 8GB |
3241 | | * = rdevmajor and rdevminor are limited to 21 bits |
3242 | | * = uid/gid are limited to 21 bits |
3243 | | * |
3244 | | * There are two workarounds for this: |
3245 | | * = pax extended headers, which use variable-length string fields |
3246 | | * = GNU tar and STAR both allow either base-8 or base-256 in |
3247 | | * most fields. The high bit is set to indicate base-256. |
3248 | | * |
3249 | | * On read, this implementation supports both extensions. |
3250 | | */ |
3251 | | static int64_t |
3252 | | tar_atol(const char *p, size_t char_cnt) |
3253 | 576 | { |
3254 | | /* |
3255 | | * Technically, GNU tar considers a field to be in base-256 |
3256 | | * only if the first byte is 0xff or 0x80. |
3257 | | */ |
3258 | 576 | if (*p & 0x80) |
3259 | 0 | return (tar_atol256(p, char_cnt)); |
3260 | 576 | return (tar_atol8(p, char_cnt)); |
3261 | 576 | } |
3262 | | |
3263 | | /* |
3264 | | * Note that this implementation does not (and should not!) obey |
3265 | | * locale settings; you cannot simply substitute strtol here, since |
3266 | | * it does obey locale. |
3267 | | */ |
3268 | | static int64_t |
3269 | | tar_atol_base_n(const char *p, size_t char_cnt, int base) |
3270 | 576 | { |
3271 | 576 | int64_t l, maxval, limit, last_digit_limit; |
3272 | 576 | int digit, sign; |
3273 | | |
3274 | 576 | maxval = INT64_MAX; |
3275 | 576 | limit = INT64_MAX / base; |
3276 | 576 | last_digit_limit = INT64_MAX % base; |
3277 | | |
3278 | | /* the pointer will not be dereferenced if char_cnt is zero |
3279 | | * due to the way the && operator is evaluated. |
3280 | | */ |
3281 | 2.17k | while (char_cnt != 0 && (*p == ' ' || *p == '\t')) { |
3282 | 1.60k | p++; |
3283 | 1.60k | char_cnt--; |
3284 | 1.60k | } |
3285 | | |
3286 | 576 | sign = 1; |
3287 | 576 | if (char_cnt != 0 && *p == '-') { |
3288 | 0 | sign = -1; |
3289 | 0 | p++; |
3290 | 0 | char_cnt--; |
3291 | |
|
3292 | 0 | maxval = INT64_MIN; |
3293 | 0 | limit = -(INT64_MIN / base); |
3294 | 0 | last_digit_limit = -(INT64_MIN % base); |
3295 | 0 | } |
3296 | | |
3297 | 576 | l = 0; |
3298 | 576 | if (char_cnt != 0) { |
3299 | 376 | digit = *p - '0'; |
3300 | 606 | while (digit >= 0 && digit < base && char_cnt != 0) { |
3301 | 230 | if (l>limit || (l == limit && digit >= last_digit_limit)) { |
3302 | 0 | return maxval; /* Truncate on overflow. */ |
3303 | 0 | } |
3304 | 230 | l = (l * base) + digit; |
3305 | 230 | digit = *++p - '0'; |
3306 | 230 | char_cnt--; |
3307 | 230 | } |
3308 | 376 | } |
3309 | 576 | return (sign < 0) ? -l : l; |
3310 | 576 | } |
3311 | | |
3312 | | static int64_t |
3313 | | tar_atol8(const char *p, size_t char_cnt) |
3314 | 576 | { |
3315 | 576 | return tar_atol_base_n(p, char_cnt, 8); |
3316 | 576 | } |
3317 | | |
3318 | | static int64_t |
3319 | | tar_atol10(const char *p, size_t char_cnt) |
3320 | 0 | { |
3321 | 0 | return tar_atol_base_n(p, char_cnt, 10); |
3322 | 0 | } |
3323 | | |
3324 | | /* |
3325 | | * Parse a base-256 integer. This is just a variable-length |
3326 | | * twos-complement signed binary value in big-endian order, except |
3327 | | * that the high-order bit is ignored. The values here can be up to |
3328 | | * 12 bytes, so we need to be careful about overflowing 64-bit |
3329 | | * (8-byte) integers. |
3330 | | * |
3331 | | * This code unashamedly assumes that the local machine uses 8-bit |
3332 | | * bytes and twos-complement arithmetic. |
3333 | | */ |
3334 | | static int64_t |
3335 | | tar_atol256(const char *_p, size_t char_cnt) |
3336 | 0 | { |
3337 | 0 | uint64_t l; |
3338 | 0 | const unsigned char *p = (const unsigned char *)_p; |
3339 | 0 | unsigned char c, neg; |
3340 | | |
3341 | | /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */ |
3342 | 0 | c = *p; |
3343 | 0 | if (c & 0x40) { |
3344 | 0 | neg = 0xff; |
3345 | 0 | c |= 0x80; |
3346 | 0 | l = ~ARCHIVE_LITERAL_ULL(0); |
3347 | 0 | } else { |
3348 | 0 | neg = 0; |
3349 | 0 | c &= 0x7f; |
3350 | 0 | l = 0; |
3351 | 0 | } |
3352 | | |
3353 | | /* If more than 8 bytes, check that we can ignore |
3354 | | * high-order bits without overflow. */ |
3355 | 0 | while (char_cnt > sizeof(int64_t)) { |
3356 | 0 | --char_cnt; |
3357 | 0 | if (c != neg) |
3358 | 0 | return neg ? INT64_MIN : INT64_MAX; |
3359 | 0 | c = *++p; |
3360 | 0 | } |
3361 | | |
3362 | | /* c is first byte that fits; if sign mismatch, return overflow */ |
3363 | 0 | if ((c ^ neg) & 0x80) { |
3364 | 0 | return neg ? INT64_MIN : INT64_MAX; |
3365 | 0 | } |
3366 | | |
3367 | | /* Accumulate remaining bytes. */ |
3368 | 0 | while (--char_cnt > 0) { |
3369 | 0 | l = (l << 8) | c; |
3370 | 0 | c = *++p; |
3371 | 0 | } |
3372 | 0 | l = (l << 8) | c; |
3373 | | /* Return signed twos-complement value. */ |
3374 | 0 | return (int64_t)(l); |
3375 | 0 | } |
3376 | | |
3377 | | /* |
3378 | | * Returns length of line (including trailing newline) |
3379 | | * or negative on error. 'start' argument is updated to |
3380 | | * point to first character of line. This avoids copying |
3381 | | * when possible. |
3382 | | */ |
3383 | | static ssize_t |
3384 | | readline(struct archive_read *a, struct tar *tar, const char **start, |
3385 | | ssize_t limit, size_t *unconsumed) |
3386 | 0 | { |
3387 | 0 | ssize_t bytes_read; |
3388 | 0 | ssize_t total_size = 0; |
3389 | 0 | const void *t; |
3390 | 0 | const char *s; |
3391 | 0 | void *p; |
3392 | |
|
3393 | 0 | tar_flush_unconsumed(a, unconsumed); |
3394 | |
|
3395 | 0 | t = __archive_read_ahead(a, 1, &bytes_read); |
3396 | 0 | if (bytes_read <= 0) |
3397 | 0 | return (ARCHIVE_FATAL); |
3398 | 0 | s = t; /* Start of line? */ |
3399 | 0 | p = memchr(t, '\n', bytes_read); |
3400 | | /* If we found '\n' in the read buffer, return pointer to that. */ |
3401 | 0 | if (p != NULL) { |
3402 | 0 | bytes_read = 1 + ((const char *)p) - s; |
3403 | 0 | if (bytes_read > limit) { |
3404 | 0 | archive_set_error(&a->archive, |
3405 | 0 | ARCHIVE_ERRNO_FILE_FORMAT, |
3406 | 0 | "Line too long"); |
3407 | 0 | return (ARCHIVE_FATAL); |
3408 | 0 | } |
3409 | 0 | *unconsumed = bytes_read; |
3410 | 0 | *start = s; |
3411 | 0 | return (bytes_read); |
3412 | 0 | } |
3413 | 0 | *unconsumed = bytes_read; |
3414 | | /* Otherwise, we need to accumulate in a line buffer. */ |
3415 | 0 | for (;;) { |
3416 | 0 | if (total_size + bytes_read > limit) { |
3417 | 0 | archive_set_error(&a->archive, |
3418 | 0 | ARCHIVE_ERRNO_FILE_FORMAT, |
3419 | 0 | "Line too long"); |
3420 | 0 | return (ARCHIVE_FATAL); |
3421 | 0 | } |
3422 | 0 | if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { |
3423 | 0 | archive_set_error(&a->archive, ENOMEM, |
3424 | 0 | "Can't allocate working buffer"); |
3425 | 0 | return (ARCHIVE_FATAL); |
3426 | 0 | } |
3427 | 0 | memcpy(tar->line.s + total_size, t, bytes_read); |
3428 | 0 | tar_flush_unconsumed(a, unconsumed); |
3429 | 0 | total_size += bytes_read; |
3430 | | /* If we found '\n', clean up and return. */ |
3431 | 0 | if (p != NULL) { |
3432 | 0 | *start = tar->line.s; |
3433 | 0 | return (total_size); |
3434 | 0 | } |
3435 | | /* Read some more. */ |
3436 | 0 | t = __archive_read_ahead(a, 1, &bytes_read); |
3437 | 0 | if (bytes_read <= 0) |
3438 | 0 | return (ARCHIVE_FATAL); |
3439 | 0 | s = t; /* Start of line? */ |
3440 | 0 | p = memchr(t, '\n', bytes_read); |
3441 | | /* If we found '\n', trim the read. */ |
3442 | 0 | if (p != NULL) { |
3443 | 0 | bytes_read = 1 + ((const char *)p) - s; |
3444 | 0 | } |
3445 | 0 | *unconsumed = bytes_read; |
3446 | 0 | } |
3447 | 0 | } |
3448 | | |
3449 | | /* |
3450 | | * base64_decode - Base64 decode |
3451 | | * |
3452 | | * This accepts most variations of base-64 encoding, including: |
3453 | | * * with or without line breaks |
3454 | | * * with or without the final group padded with '=' or '_' characters |
3455 | | * (The most economical Base-64 variant does not pad the last group and |
3456 | | * omits line breaks; RFC1341 used for MIME requires both.) |
3457 | | */ |
3458 | | static char * |
3459 | | base64_decode(const char *s, size_t len, size_t *out_len) |
3460 | 0 | { |
3461 | 0 | static const unsigned char digits[64] = { |
3462 | 0 | 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', |
3463 | 0 | 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', |
3464 | 0 | 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', |
3465 | 0 | 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', |
3466 | 0 | '4','5','6','7','8','9','+','/' }; |
3467 | 0 | static unsigned char decode_table[128]; |
3468 | 0 | char *out, *d; |
3469 | 0 | const unsigned char *src = (const unsigned char *)s; |
3470 | | |
3471 | | /* If the decode table is not yet initialized, prepare it. */ |
3472 | 0 | if (decode_table[digits[1]] != 1) { |
3473 | 0 | unsigned i; |
3474 | 0 | memset(decode_table, 0xff, sizeof(decode_table)); |
3475 | 0 | for (i = 0; i < sizeof(digits); i++) |
3476 | 0 | decode_table[digits[i]] = i; |
3477 | 0 | } |
3478 | | |
3479 | | /* Allocate enough space to hold the entire output. */ |
3480 | | /* Note that we may not use all of this... */ |
3481 | 0 | out = (char *)malloc(len - len / 4 + 1); |
3482 | 0 | if (out == NULL) { |
3483 | 0 | *out_len = 0; |
3484 | 0 | return (NULL); |
3485 | 0 | } |
3486 | 0 | d = out; |
3487 | |
|
3488 | 0 | while (len > 0) { |
3489 | | /* Collect the next group of (up to) four characters. */ |
3490 | 0 | int v = 0; |
3491 | 0 | int group_size = 0; |
3492 | 0 | while (group_size < 4 && len > 0) { |
3493 | | /* '=' or '_' padding indicates final group. */ |
3494 | 0 | if (*src == '=' || *src == '_') { |
3495 | 0 | len = 0; |
3496 | 0 | break; |
3497 | 0 | } |
3498 | | /* Skip illegal characters (including line breaks) */ |
3499 | 0 | if (*src > 127 || *src < 32 |
3500 | 0 | || decode_table[*src] == 0xff) { |
3501 | 0 | len--; |
3502 | 0 | src++; |
3503 | 0 | continue; |
3504 | 0 | } |
3505 | 0 | v <<= 6; |
3506 | 0 | v |= decode_table[*src++]; |
3507 | 0 | len --; |
3508 | 0 | group_size++; |
3509 | 0 | } |
3510 | | /* Align a short group properly. */ |
3511 | 0 | v <<= 6 * (4 - group_size); |
3512 | | /* Unpack the group we just collected. */ |
3513 | 0 | switch (group_size) { |
3514 | 0 | case 4: d[2] = v & 0xff; |
3515 | | /* FALLTHROUGH */ |
3516 | 0 | case 3: d[1] = (v >> 8) & 0xff; |
3517 | | /* FALLTHROUGH */ |
3518 | 0 | case 2: d[0] = (v >> 16) & 0xff; |
3519 | 0 | break; |
3520 | 0 | case 1: /* this is invalid! */ |
3521 | 0 | break; |
3522 | 0 | } |
3523 | 0 | d += group_size * 3 / 4; |
3524 | 0 | } |
3525 | | |
3526 | 0 | *out_len = d - out; |
3527 | 0 | return (out); |
3528 | 0 | } |
3529 | | |
3530 | | static char * |
3531 | | url_decode(const char *in, size_t length) |
3532 | 0 | { |
3533 | 0 | char *out, *d; |
3534 | 0 | const char *s; |
3535 | |
|
3536 | 0 | out = (char *)malloc(length + 1); |
3537 | 0 | if (out == NULL) |
3538 | 0 | return (NULL); |
3539 | 0 | for (s = in, d = out; length > 0 && *s != '\0'; ) { |
3540 | 0 | if (s[0] == '%' && length > 2) { |
3541 | | /* Try to convert % escape */ |
3542 | 0 | int digit1 = tohex(s[1]); |
3543 | 0 | int digit2 = tohex(s[2]); |
3544 | 0 | if (digit1 >= 0 && digit2 >= 0) { |
3545 | | /* Looks good, consume three chars */ |
3546 | 0 | s += 3; |
3547 | 0 | length -= 3; |
3548 | | /* Convert output */ |
3549 | 0 | *d++ = ((digit1 << 4) | digit2); |
3550 | 0 | continue; |
3551 | 0 | } |
3552 | | /* Else fall through and treat '%' as normal char */ |
3553 | 0 | } |
3554 | 0 | *d++ = *s++; |
3555 | 0 | --length; |
3556 | 0 | } |
3557 | 0 | *d = '\0'; |
3558 | 0 | return (out); |
3559 | 0 | } |
3560 | | |
3561 | | static int |
3562 | | tohex(int c) |
3563 | 0 | { |
3564 | 0 | if (c >= '0' && c <= '9') |
3565 | 0 | return (c - '0'); |
3566 | 0 | else if (c >= 'A' && c <= 'F') |
3567 | 0 | return (c - 'A' + 10); |
3568 | 0 | else if (c >= 'a' && c <= 'f') |
3569 | 0 | return (c - 'a' + 10); |
3570 | 0 | else |
3571 | 0 | return (-1); |
3572 | 0 | } |