/src/libarchive/libarchive/archive_entry_link_resolver.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * Copyright (c) 2003-2007 Tim Kientzle |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR |
15 | | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
16 | | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
17 | | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, |
18 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
19 | | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
20 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
21 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
23 | | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | | */ |
25 | | |
26 | | #include "archive_platform.h" |
27 | | |
28 | | #ifdef HAVE_SYS_STAT_H |
29 | | #include <sys/stat.h> |
30 | | #endif |
31 | | #ifdef HAVE_ERRNO_H |
32 | | #include <errno.h> |
33 | | #endif |
34 | | #include <stdio.h> |
35 | | #ifdef HAVE_STDLIB_H |
36 | | #include <stdlib.h> |
37 | | #endif |
38 | | #ifdef HAVE_STRING_H |
39 | | #include <string.h> |
40 | | #endif |
41 | | |
42 | | #include "archive.h" |
43 | | #include "archive_entry.h" |
44 | | |
45 | | /* |
46 | | * This is mostly a pretty straightforward hash table implementation. |
47 | | * The only interesting bit is the different strategies used to |
48 | | * match up links. These strategies match those used by various |
49 | | * archiving formats: |
50 | | * tar - content stored with first link, remainder refer back to it. |
51 | | * This requires us to match each subsequent link up with the |
52 | | * first appearance. |
53 | | * cpio - Old cpio just stored body with each link, match-ups were |
54 | | * implicit. This is trivial. |
55 | | * new cpio - New cpio only stores body with last link, match-ups |
56 | | * are implicit. This is actually quite tricky; see the notes |
57 | | * below. |
58 | | */ |
59 | | |
60 | | /* Users pass us a format code, we translate that into a strategy here. */ |
61 | 0 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 |
62 | 1 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 |
63 | 0 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 |
64 | 0 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 |
65 | | |
66 | | /* Initial size of link cache. */ |
67 | 2 | #define links_cache_initial_size 1024 |
68 | | |
69 | | struct links_entry { |
70 | | struct links_entry *next; |
71 | | struct links_entry *previous; |
72 | | struct archive_entry *canonical; |
73 | | struct archive_entry *entry; |
74 | | size_t hash; |
75 | | unsigned int links; /* # links not yet seen */ |
76 | | }; |
77 | | |
78 | | struct archive_entry_linkresolver { |
79 | | struct links_entry **buckets; |
80 | | struct links_entry *spare; |
81 | | unsigned long number_entries; |
82 | | size_t number_buckets; |
83 | | int strategy; |
84 | | }; |
85 | | |
86 | 1 | #define NEXT_ENTRY_DEFERRED 1 |
87 | 1 | #define NEXT_ENTRY_PARTIAL 2 |
88 | 1 | #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL) |
89 | | |
90 | | static struct links_entry *find_entry(struct archive_entry_linkresolver *, |
91 | | struct archive_entry *); |
92 | | static void grow_hash(struct archive_entry_linkresolver *); |
93 | | static struct links_entry *insert_entry(struct archive_entry_linkresolver *, |
94 | | struct archive_entry *); |
95 | | static struct links_entry *next_entry(struct archive_entry_linkresolver *, |
96 | | int); |
97 | | |
98 | | struct archive_entry_linkresolver * |
99 | | archive_entry_linkresolver_new(void) |
100 | 1 | { |
101 | 1 | struct archive_entry_linkresolver *res; |
102 | | |
103 | | /* Check for positive power-of-two */ |
104 | 1 | if (links_cache_initial_size == 0 || |
105 | 1 | (links_cache_initial_size & (links_cache_initial_size - 1)) != 0) |
106 | 0 | return (NULL); |
107 | | |
108 | 1 | res = calloc(1, sizeof(struct archive_entry_linkresolver)); |
109 | 1 | if (res == NULL) |
110 | 0 | return (NULL); |
111 | 1 | res->number_buckets = links_cache_initial_size; |
112 | 1 | res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0])); |
113 | 1 | if (res->buckets == NULL) { |
114 | 0 | free(res); |
115 | 0 | return (NULL); |
116 | 0 | } |
117 | 1 | return (res); |
118 | 1 | } |
119 | | |
120 | | void |
121 | | archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, |
122 | | int fmt) |
123 | 1 | { |
124 | 1 | int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; |
125 | | |
126 | 1 | switch (fmtbase) { |
127 | 0 | case ARCHIVE_FORMAT_7ZIP: |
128 | 0 | case ARCHIVE_FORMAT_AR: |
129 | 0 | case ARCHIVE_FORMAT_ZIP: |
130 | 0 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; |
131 | 0 | break; |
132 | 0 | case ARCHIVE_FORMAT_CPIO: |
133 | 0 | switch (fmt) { |
134 | 0 | case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: |
135 | 0 | case ARCHIVE_FORMAT_CPIO_SVR4_CRC: |
136 | 0 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; |
137 | 0 | break; |
138 | 0 | default: |
139 | 0 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | break; |
143 | 1 | case ARCHIVE_FORMAT_MTREE: |
144 | 1 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; |
145 | 1 | break; |
146 | 0 | case ARCHIVE_FORMAT_ISO9660: |
147 | 0 | case ARCHIVE_FORMAT_SHAR: |
148 | 0 | case ARCHIVE_FORMAT_TAR: |
149 | 0 | case ARCHIVE_FORMAT_XAR: |
150 | 0 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; |
151 | 0 | break; |
152 | 0 | default: |
153 | 0 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; |
154 | 0 | break; |
155 | 1 | } |
156 | 1 | } |
157 | | |
158 | | void |
159 | | archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) |
160 | 238 | { |
161 | 238 | struct links_entry *le; |
162 | | |
163 | 238 | if (res == NULL) |
164 | 237 | return; |
165 | | |
166 | 1 | while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL) |
167 | 0 | archive_entry_free(le->entry); |
168 | 1 | free(res->buckets); |
169 | 1 | free(res); |
170 | 1 | } |
171 | | |
172 | | void |
173 | | archive_entry_linkify(struct archive_entry_linkresolver *res, |
174 | | struct archive_entry **e, struct archive_entry **f) |
175 | 0 | { |
176 | 0 | struct links_entry *le; |
177 | 0 | struct archive_entry *t; |
178 | |
|
179 | 0 | *f = NULL; /* Default: Don't return a second entry. */ |
180 | |
|
181 | 0 | if (*e == NULL) { |
182 | 0 | le = next_entry(res, NEXT_ENTRY_DEFERRED); |
183 | 0 | if (le != NULL) { |
184 | 0 | *e = le->entry; |
185 | 0 | le->entry = NULL; |
186 | 0 | } |
187 | 0 | return; |
188 | 0 | } |
189 | | |
190 | | /* If it has only one link, then we're done. */ |
191 | 0 | if (archive_entry_nlink(*e) == 1) |
192 | 0 | return; |
193 | | /* Directories, devices never have hardlinks. */ |
194 | 0 | if (archive_entry_filetype(*e) == AE_IFDIR |
195 | 0 | || archive_entry_filetype(*e) == AE_IFBLK |
196 | 0 | || archive_entry_filetype(*e) == AE_IFCHR) |
197 | 0 | return; |
198 | | |
199 | 0 | switch (res->strategy) { |
200 | 0 | case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: |
201 | 0 | le = find_entry(res, *e); |
202 | 0 | if (le != NULL) { |
203 | 0 | archive_entry_unset_size(*e); |
204 | | #if defined(_WIN32) && !defined(__CYGWIN__) |
205 | | archive_entry_copy_hardlink_w(*e, |
206 | | archive_entry_pathname_w(le->canonical)); |
207 | | #else |
208 | 0 | archive_entry_copy_hardlink(*e, |
209 | 0 | archive_entry_pathname(le->canonical)); |
210 | 0 | #endif |
211 | 0 | } else |
212 | 0 | insert_entry(res, *e); |
213 | 0 | return; |
214 | 0 | case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: |
215 | 0 | le = find_entry(res, *e); |
216 | 0 | if (le != NULL) { |
217 | | #if defined(_WIN32) && !defined(__CYGWIN__) |
218 | | archive_entry_copy_hardlink_w(*e, |
219 | | archive_entry_pathname_w(le->canonical)); |
220 | | #else |
221 | 0 | archive_entry_copy_hardlink(*e, |
222 | 0 | archive_entry_pathname(le->canonical)); |
223 | 0 | #endif |
224 | 0 | } else |
225 | 0 | insert_entry(res, *e); |
226 | 0 | return; |
227 | 0 | case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: |
228 | | /* This one is trivial. */ |
229 | 0 | return; |
230 | 0 | case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: |
231 | 0 | le = find_entry(res, *e); |
232 | 0 | if (le != NULL) { |
233 | | /* |
234 | | * Put the new entry in le, return the |
235 | | * old entry from le. |
236 | | */ |
237 | 0 | t = *e; |
238 | 0 | *e = le->entry; |
239 | 0 | le->entry = t; |
240 | | /* Make the old entry into a hardlink. */ |
241 | 0 | archive_entry_unset_size(*e); |
242 | | #if defined(_WIN32) && !defined(__CYGWIN__) |
243 | | archive_entry_copy_hardlink_w(*e, |
244 | | archive_entry_pathname_w(le->canonical)); |
245 | | #else |
246 | 0 | archive_entry_copy_hardlink(*e, |
247 | 0 | archive_entry_pathname(le->canonical)); |
248 | 0 | #endif |
249 | | /* If we ran out of links, return the |
250 | | * final entry as well. */ |
251 | 0 | if (le->links == 0) { |
252 | 0 | *f = le->entry; |
253 | 0 | le->entry = NULL; |
254 | 0 | } |
255 | 0 | } else { |
256 | | /* |
257 | | * If we haven't seen it, tuck it away |
258 | | * for future use. |
259 | | */ |
260 | 0 | le = insert_entry(res, *e); |
261 | 0 | if (le == NULL) |
262 | | /* XXX We should return an error code XXX */ |
263 | 0 | return; |
264 | 0 | le->entry = *e; |
265 | 0 | *e = NULL; |
266 | 0 | } |
267 | 0 | return; |
268 | 0 | default: |
269 | 0 | break; |
270 | 0 | } |
271 | 0 | return; |
272 | 0 | } |
273 | | |
274 | | static struct links_entry * |
275 | | find_entry(struct archive_entry_linkresolver *res, |
276 | | struct archive_entry *entry) |
277 | 0 | { |
278 | 0 | struct links_entry *le; |
279 | 0 | size_t hash, bucket; |
280 | 0 | dev_t dev; |
281 | 0 | int64_t ino; |
282 | | |
283 | | /* Free a held entry. */ |
284 | 0 | if (res->spare != NULL) { |
285 | 0 | archive_entry_free(res->spare->canonical); |
286 | 0 | archive_entry_free(res->spare->entry); |
287 | 0 | free(res->spare); |
288 | 0 | res->spare = NULL; |
289 | 0 | } |
290 | |
|
291 | 0 | dev = archive_entry_dev(entry); |
292 | 0 | ino = archive_entry_ino64(entry); |
293 | 0 | hash = (size_t)(dev ^ ino); |
294 | | |
295 | | /* Try to locate this entry in the links cache. */ |
296 | 0 | bucket = hash & (res->number_buckets - 1); |
297 | 0 | for (le = res->buckets[bucket]; le != NULL; le = le->next) { |
298 | 0 | if (le->hash == hash |
299 | 0 | && dev == archive_entry_dev(le->canonical) |
300 | 0 | && ino == archive_entry_ino64(le->canonical)) { |
301 | | /* |
302 | | * Decrement link count each time and release |
303 | | * the entry if it hits zero. This saves |
304 | | * memory and is necessary for detecting |
305 | | * missed links. |
306 | | */ |
307 | 0 | --le->links; |
308 | 0 | if (le->links > 0) |
309 | 0 | return (le); |
310 | | /* Remove it from this hash bucket. */ |
311 | 0 | if (le->previous != NULL) |
312 | 0 | le->previous->next = le->next; |
313 | 0 | if (le->next != NULL) |
314 | 0 | le->next->previous = le->previous; |
315 | 0 | if (res->buckets[bucket] == le) |
316 | 0 | res->buckets[bucket] = le->next; |
317 | 0 | res->number_entries--; |
318 | | /* Defer freeing this entry. */ |
319 | 0 | res->spare = le; |
320 | 0 | return (le); |
321 | 0 | } |
322 | 0 | } |
323 | 0 | return (NULL); |
324 | 0 | } |
325 | | |
326 | | static struct links_entry * |
327 | | next_entry(struct archive_entry_linkresolver *res, int mode) |
328 | 1 | { |
329 | 1 | struct links_entry *le; |
330 | 1 | size_t bucket; |
331 | | |
332 | | /* Free a held entry. */ |
333 | 1 | if (res->spare != NULL) { |
334 | 0 | archive_entry_free(res->spare->canonical); |
335 | 0 | archive_entry_free(res->spare->entry); |
336 | 0 | free(res->spare); |
337 | 0 | res->spare = NULL; |
338 | 0 | } |
339 | | |
340 | | /* Look for next non-empty bucket in the links cache. */ |
341 | 1.02k | for (bucket = 0; bucket < res->number_buckets; bucket++) { |
342 | 1.02k | for (le = res->buckets[bucket]; le != NULL; le = le->next) { |
343 | 0 | if (le->entry != NULL && |
344 | 0 | (mode & NEXT_ENTRY_DEFERRED) == 0) |
345 | 0 | continue; |
346 | 0 | if (le->entry == NULL && |
347 | 0 | (mode & NEXT_ENTRY_PARTIAL) == 0) |
348 | 0 | continue; |
349 | | /* Remove it from this hash bucket. */ |
350 | 0 | if (le->next != NULL) |
351 | 0 | le->next->previous = le->previous; |
352 | 0 | if (le->previous != NULL) |
353 | 0 | le->previous->next = le->next; |
354 | 0 | else |
355 | 0 | res->buckets[bucket] = le->next; |
356 | 0 | res->number_entries--; |
357 | | /* Defer freeing this entry. */ |
358 | 0 | res->spare = le; |
359 | 0 | return (le); |
360 | 0 | } |
361 | 1.02k | } |
362 | 1 | return (NULL); |
363 | 1 | } |
364 | | |
365 | | static struct links_entry * |
366 | | insert_entry(struct archive_entry_linkresolver *res, |
367 | | struct archive_entry *entry) |
368 | 0 | { |
369 | 0 | struct links_entry *le; |
370 | 0 | size_t hash, bucket; |
371 | | |
372 | | /* Add this entry to the links cache. */ |
373 | 0 | le = calloc(1, sizeof(struct links_entry)); |
374 | 0 | if (le == NULL) |
375 | 0 | return (NULL); |
376 | 0 | le->canonical = archive_entry_clone(entry); |
377 | | |
378 | | /* If the links cache is getting too full, enlarge the hash table. */ |
379 | 0 | if (res->number_entries > res->number_buckets * 2) |
380 | 0 | grow_hash(res); |
381 | |
|
382 | 0 | hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry)); |
383 | 0 | bucket = hash & (res->number_buckets - 1); |
384 | | |
385 | | /* If we could allocate the entry, record it. */ |
386 | 0 | if (res->buckets[bucket] != NULL) |
387 | 0 | res->buckets[bucket]->previous = le; |
388 | 0 | res->number_entries++; |
389 | 0 | le->next = res->buckets[bucket]; |
390 | 0 | le->previous = NULL; |
391 | 0 | res->buckets[bucket] = le; |
392 | 0 | le->hash = hash; |
393 | 0 | le->links = archive_entry_nlink(entry) - 1; |
394 | 0 | return (le); |
395 | 0 | } |
396 | | |
397 | | static void |
398 | | grow_hash(struct archive_entry_linkresolver *res) |
399 | 0 | { |
400 | 0 | struct links_entry *le, **new_buckets; |
401 | 0 | size_t new_size; |
402 | 0 | size_t i, bucket; |
403 | | |
404 | | /* Try to enlarge the bucket list. */ |
405 | 0 | new_size = res->number_buckets * 2; |
406 | 0 | if (new_size < res->number_buckets) |
407 | 0 | return; |
408 | 0 | new_buckets = calloc(new_size, sizeof(struct links_entry *)); |
409 | |
|
410 | 0 | if (new_buckets == NULL) |
411 | 0 | return; |
412 | | |
413 | 0 | for (i = 0; i < res->number_buckets; i++) { |
414 | 0 | while (res->buckets[i] != NULL) { |
415 | | /* Remove entry from old bucket. */ |
416 | 0 | le = res->buckets[i]; |
417 | 0 | res->buckets[i] = le->next; |
418 | | |
419 | | /* Add entry to new bucket. */ |
420 | 0 | bucket = le->hash & (new_size - 1); |
421 | |
|
422 | 0 | if (new_buckets[bucket] != NULL) |
423 | 0 | new_buckets[bucket]->previous = le; |
424 | 0 | le->next = new_buckets[bucket]; |
425 | 0 | le->previous = NULL; |
426 | 0 | new_buckets[bucket] = le; |
427 | 0 | } |
428 | 0 | } |
429 | 0 | free(res->buckets); |
430 | 0 | res->buckets = new_buckets; |
431 | 0 | res->number_buckets = new_size; |
432 | 0 | } |
433 | | |
434 | | struct archive_entry * |
435 | | archive_entry_partial_links(struct archive_entry_linkresolver *res, |
436 | | unsigned int *links) |
437 | 0 | { |
438 | 0 | struct archive_entry *e; |
439 | 0 | struct links_entry *le; |
440 | | |
441 | | /* Free a held entry. */ |
442 | 0 | if (res->spare != NULL) { |
443 | 0 | archive_entry_free(res->spare->canonical); |
444 | 0 | archive_entry_free(res->spare->entry); |
445 | 0 | free(res->spare); |
446 | 0 | res->spare = NULL; |
447 | 0 | } |
448 | |
|
449 | 0 | le = next_entry(res, NEXT_ENTRY_PARTIAL); |
450 | 0 | if (le != NULL) { |
451 | 0 | e = le->canonical; |
452 | 0 | if (links != NULL) |
453 | 0 | *links = le->links; |
454 | 0 | le->canonical = NULL; |
455 | 0 | } else { |
456 | 0 | e = NULL; |
457 | 0 | if (links != NULL) |
458 | 0 | *links = 0; |
459 | 0 | } |
460 | 0 | return (e); |
461 | 0 | } |