Coverage Report

Created: 2025-08-26 06:37

/src/libarchive/libarchive/archive_entry_link_resolver.c
Line
Count
Source (jump to first uncovered line)
1
/*-
2
 * Copyright (c) 2003-2007 Tim Kientzle
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
 */
25
26
#include "archive_platform.h"
27
28
#ifdef HAVE_SYS_STAT_H
29
#include <sys/stat.h>
30
#endif
31
#ifdef HAVE_ERRNO_H
32
#include <errno.h>
33
#endif
34
#include <stdio.h>
35
#ifdef HAVE_STDLIB_H
36
#include <stdlib.h>
37
#endif
38
#ifdef HAVE_STRING_H
39
#include <string.h>
40
#endif
41
42
#include "archive.h"
43
#include "archive_entry.h"
44
45
/*
46
 * This is mostly a pretty straightforward hash table implementation.
47
 * The only interesting bit is the different strategies used to
48
 * match up links.  These strategies match those used by various
49
 * archiving formats:
50
 *   tar - content stored with first link, remainder refer back to it.
51
 *       This requires us to match each subsequent link up with the
52
 *       first appearance.
53
 *   cpio - Old cpio just stored body with each link, match-ups were
54
 *       implicit.  This is trivial.
55
 *   new cpio - New cpio only stores body with last link, match-ups
56
 *       are implicit.  This is actually quite tricky; see the notes
57
 *       below.
58
 */
59
60
/* Users pass us a format code, we translate that into a strategy here. */
61
0
#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR  0
62
1.12k
#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
63
0
#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
64
0
#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
65
66
/* Initial size of link cache. */
67
2.25k
#define links_cache_initial_size 1024
68
69
struct links_entry {
70
  struct links_entry  *next;
71
  struct links_entry  *previous;
72
  struct archive_entry  *canonical;
73
  struct archive_entry  *entry;
74
  size_t       hash;
75
  unsigned int     links; /* # links not yet seen */
76
};
77
78
struct archive_entry_linkresolver {
79
  struct links_entry  **buckets;
80
  struct links_entry   *spare;
81
  unsigned long     number_entries;
82
  size_t        number_buckets;
83
  int       strategy;
84
};
85
86
1.12k
#define NEXT_ENTRY_DEFERRED 1
87
1.12k
#define NEXT_ENTRY_PARTIAL  2
88
1.12k
#define NEXT_ENTRY_ALL    (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
89
90
static struct links_entry *find_entry(struct archive_entry_linkresolver *,
91
        struct archive_entry *);
92
static void grow_hash(struct archive_entry_linkresolver *);
93
static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
94
        struct archive_entry *);
95
static struct links_entry *next_entry(struct archive_entry_linkresolver *,
96
    int);
97
98
struct archive_entry_linkresolver *
99
archive_entry_linkresolver_new(void)
100
1.12k
{
101
1.12k
  struct archive_entry_linkresolver *res;
102
103
  /* Check for positive power-of-two */
104
1.12k
  if (links_cache_initial_size == 0 ||
105
1.12k
      (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
106
0
    return (NULL);
107
108
1.12k
  res = calloc(1, sizeof(struct archive_entry_linkresolver));
109
1.12k
  if (res == NULL)
110
0
    return (NULL);
111
1.12k
  res->number_buckets = links_cache_initial_size;
112
1.12k
  res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
113
1.12k
  if (res->buckets == NULL) {
114
0
    free(res);
115
0
    return (NULL);
116
0
  }
117
1.12k
  return (res);
118
1.12k
}
119
120
void
121
archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
122
    int fmt)
123
1.12k
{
124
1.12k
  int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
125
126
1.12k
  switch (fmtbase) {
127
0
  case ARCHIVE_FORMAT_7ZIP:
128
0
  case ARCHIVE_FORMAT_AR:
129
0
  case ARCHIVE_FORMAT_ZIP:
130
0
    res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131
0
    break;
132
0
  case ARCHIVE_FORMAT_CPIO:
133
0
    switch (fmt) {
134
0
    case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
135
0
    case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
136
0
      res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
137
0
      break;
138
0
    default:
139
0
      res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
140
0
      break;
141
0
    }
142
0
    break;
143
1.12k
  case ARCHIVE_FORMAT_MTREE:
144
1.12k
    res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
145
1.12k
    break;
146
0
  case ARCHIVE_FORMAT_ISO9660:
147
0
  case ARCHIVE_FORMAT_SHAR:
148
0
  case ARCHIVE_FORMAT_TAR:
149
0
  case ARCHIVE_FORMAT_XAR:
150
0
    res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
151
0
    break;
152
0
  default:
153
0
    res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
154
0
    break;
155
1.12k
  }
156
1.12k
}
157
158
void
159
archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
160
15.1k
{
161
15.1k
  struct links_entry *le;
162
163
15.1k
  if (res == NULL)
164
13.9k
    return;
165
166
1.12k
  while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
167
0
    archive_entry_free(le->entry);
168
1.12k
  free(res->buckets);
169
1.12k
  free(res);
170
1.12k
}
171
172
void
173
archive_entry_linkify(struct archive_entry_linkresolver *res,
174
    struct archive_entry **e, struct archive_entry **f)
175
0
{
176
0
  struct links_entry *le;
177
0
  struct archive_entry *t;
178
179
0
  *f = NULL; /* Default: Don't return a second entry. */
180
181
0
  if (*e == NULL) {
182
0
    le = next_entry(res, NEXT_ENTRY_DEFERRED);
183
0
    if (le != NULL) {
184
0
      *e = le->entry;
185
0
      le->entry = NULL;
186
0
    }
187
0
    return;
188
0
  }
189
190
  /* If it has only one link, then we're done. */
191
0
  if (archive_entry_nlink(*e) == 1)
192
0
    return;
193
  /* Directories, devices never have hardlinks. */
194
0
  if (archive_entry_filetype(*e) == AE_IFDIR
195
0
      || archive_entry_filetype(*e) == AE_IFBLK
196
0
      || archive_entry_filetype(*e) == AE_IFCHR)
197
0
    return;
198
199
0
  switch (res->strategy) {
200
0
  case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
201
0
    le = find_entry(res, *e);
202
0
    if (le != NULL) {
203
0
      archive_entry_unset_size(*e);
204
#if defined(_WIN32) && !defined(__CYGWIN__)
205
      archive_entry_copy_hardlink_w(*e,
206
          archive_entry_pathname_w(le->canonical));
207
#else
208
0
      archive_entry_copy_hardlink(*e,
209
0
          archive_entry_pathname(le->canonical));
210
0
#endif
211
0
    } else
212
0
      insert_entry(res, *e);
213
0
    return;
214
0
  case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
215
0
    le = find_entry(res, *e);
216
0
    if (le != NULL) {
217
#if defined(_WIN32) && !defined(__CYGWIN__)
218
      archive_entry_copy_hardlink_w(*e,
219
          archive_entry_pathname_w(le->canonical));
220
#else
221
0
      archive_entry_copy_hardlink(*e,
222
0
          archive_entry_pathname(le->canonical));
223
0
#endif
224
0
    } else
225
0
      insert_entry(res, *e);
226
0
    return;
227
0
  case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
228
    /* This one is trivial. */
229
0
    return;
230
0
  case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
231
0
    le = find_entry(res, *e);
232
0
    if (le != NULL) {
233
      /*
234
       * Put the new entry in le, return the
235
       * old entry from le.
236
       */
237
0
      t = *e;
238
0
      *e = le->entry;
239
0
      le->entry = t;
240
      /* Make the old entry into a hardlink. */
241
0
      archive_entry_unset_size(*e);
242
#if defined(_WIN32) && !defined(__CYGWIN__)
243
      archive_entry_copy_hardlink_w(*e,
244
          archive_entry_pathname_w(le->canonical));
245
#else
246
0
      archive_entry_copy_hardlink(*e,
247
0
          archive_entry_pathname(le->canonical));
248
0
#endif
249
      /* If we ran out of links, return the
250
       * final entry as well. */
251
0
      if (le->links == 0) {
252
0
        *f = le->entry;
253
0
        le->entry = NULL;
254
0
      }
255
0
    } else {
256
      /*
257
       * If we haven't seen it, tuck it away
258
       * for future use.
259
       */
260
0
      le = insert_entry(res, *e);
261
0
      if (le == NULL)
262
        /* XXX We should return an error code XXX */
263
0
        return;
264
0
      le->entry = *e;
265
0
      *e = NULL;
266
0
    }
267
0
    return;
268
0
  default:
269
0
    break;
270
0
  }
271
0
  return;
272
0
}
273
274
static struct links_entry *
275
find_entry(struct archive_entry_linkresolver *res,
276
    struct archive_entry *entry)
277
0
{
278
0
  struct links_entry  *le;
279
0
  size_t       hash, bucket;
280
0
  dev_t      dev;
281
0
  int64_t      ino;
282
283
0
  if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {
284
0
    return (NULL);
285
0
  }
286
287
  /* Free a held entry. */
288
0
  if (res->spare != NULL) {
289
0
    archive_entry_free(res->spare->canonical);
290
0
    archive_entry_free(res->spare->entry);
291
0
    free(res->spare);
292
0
    res->spare = NULL;
293
0
  }
294
295
0
  dev = archive_entry_dev(entry);
296
0
  ino = archive_entry_ino64(entry);
297
0
  hash = (size_t)(dev ^ ino);
298
299
  /* Try to locate this entry in the links cache. */
300
0
  bucket = hash & (res->number_buckets - 1);
301
0
  for (le = res->buckets[bucket]; le != NULL; le = le->next) {
302
0
    if (le->hash == hash
303
0
        && dev == archive_entry_dev(le->canonical)
304
0
        && ino == archive_entry_ino64(le->canonical)) {
305
      /*
306
       * Decrement link count each time and release
307
       * the entry if it hits zero.  This saves
308
       * memory and is necessary for detecting
309
       * missed links.
310
       */
311
0
      --le->links;
312
0
      if (le->links > 0)
313
0
        return (le);
314
      /* Remove it from this hash bucket. */
315
0
      if (le->previous != NULL)
316
0
        le->previous->next = le->next;
317
0
      if (le->next != NULL)
318
0
        le->next->previous = le->previous;
319
0
      if (res->buckets[bucket] == le)
320
0
        res->buckets[bucket] = le->next;
321
0
      res->number_entries--;
322
      /* Defer freeing this entry. */
323
0
      res->spare = le;
324
0
      return (le);
325
0
    }
326
0
  }
327
0
  return (NULL);
328
0
}
329
330
static struct links_entry *
331
next_entry(struct archive_entry_linkresolver *res, int mode)
332
1.12k
{
333
1.12k
  struct links_entry  *le;
334
1.12k
  size_t       bucket;
335
336
  /* Free a held entry. */
337
1.12k
  if (res->spare != NULL) {
338
0
    archive_entry_free(res->spare->canonical);
339
0
    archive_entry_free(res->spare->entry);
340
0
    free(res->spare);
341
0
    res->spare = NULL;
342
0
  }
343
344
  /* Look for next non-empty bucket in the links cache. */
345
1.15M
  for (bucket = 0; bucket < res->number_buckets; bucket++) {
346
1.15M
    for (le = res->buckets[bucket]; le != NULL; le = le->next) {
347
0
      if (le->entry != NULL &&
348
0
          (mode & NEXT_ENTRY_DEFERRED) == 0)
349
0
        continue;
350
0
      if (le->entry == NULL &&
351
0
          (mode & NEXT_ENTRY_PARTIAL) == 0)
352
0
        continue;
353
      /* Remove it from this hash bucket. */
354
0
      if (le->next != NULL)
355
0
        le->next->previous = le->previous;
356
0
      if (le->previous != NULL)
357
0
        le->previous->next = le->next;
358
0
      else
359
0
        res->buckets[bucket] = le->next;
360
0
      res->number_entries--;
361
      /* Defer freeing this entry. */
362
0
      res->spare = le;
363
0
      return (le);
364
0
    }
365
1.15M
  }
366
1.12k
  return (NULL);
367
1.12k
}
368
369
static struct links_entry *
370
insert_entry(struct archive_entry_linkresolver *res,
371
    struct archive_entry *entry)
372
0
{
373
0
  struct links_entry *le;
374
0
  size_t hash, bucket;
375
376
0
  if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {
377
0
    return (NULL);
378
0
  }
379
380
  /* Add this entry to the links cache. */
381
0
  le = calloc(1, sizeof(struct links_entry));
382
0
  if (le == NULL)
383
0
    return (NULL);
384
0
  le->canonical = archive_entry_clone(entry);
385
386
  /* If the links cache is getting too full, enlarge the hash table. */
387
0
  if (res->number_entries > res->number_buckets * 2)
388
0
    grow_hash(res);
389
390
0
  hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
391
0
  bucket = hash & (res->number_buckets - 1);
392
393
  /* If we could allocate the entry, record it. */
394
0
  if (res->buckets[bucket] != NULL)
395
0
    res->buckets[bucket]->previous = le;
396
0
  res->number_entries++;
397
0
  le->next = res->buckets[bucket];
398
0
  le->previous = NULL;
399
0
  res->buckets[bucket] = le;
400
0
  le->hash = hash;
401
0
  le->links = archive_entry_nlink(entry) - 1;
402
0
  return (le);
403
0
}
404
405
static void
406
grow_hash(struct archive_entry_linkresolver *res)
407
0
{
408
0
  struct links_entry *le, **new_buckets;
409
0
  size_t new_size;
410
0
  size_t i, bucket;
411
412
  /* Try to enlarge the bucket list. */
413
0
  new_size = res->number_buckets * 2;
414
0
  if (new_size < res->number_buckets)
415
0
    return;
416
0
  new_buckets = calloc(new_size, sizeof(struct links_entry *));
417
418
0
  if (new_buckets == NULL)
419
0
    return;
420
421
0
  for (i = 0; i < res->number_buckets; i++) {
422
0
    while (res->buckets[i] != NULL) {
423
      /* Remove entry from old bucket. */
424
0
      le = res->buckets[i];
425
0
      res->buckets[i] = le->next;
426
427
      /* Add entry to new bucket. */
428
0
      bucket = le->hash & (new_size - 1);
429
430
0
      if (new_buckets[bucket] != NULL)
431
0
        new_buckets[bucket]->previous = le;
432
0
      le->next = new_buckets[bucket];
433
0
      le->previous = NULL;
434
0
      new_buckets[bucket] = le;
435
0
    }
436
0
  }
437
0
  free(res->buckets);
438
0
  res->buckets = new_buckets;
439
0
  res->number_buckets = new_size;
440
0
}
441
442
struct archive_entry *
443
archive_entry_partial_links(struct archive_entry_linkresolver *res,
444
    unsigned int *links)
445
0
{
446
0
  struct archive_entry  *e;
447
0
  struct links_entry  *le;
448
449
  /* Free a held entry. */
450
0
  if (res->spare != NULL) {
451
0
    archive_entry_free(res->spare->canonical);
452
0
    archive_entry_free(res->spare->entry);
453
0
    free(res->spare);
454
0
    res->spare = NULL;
455
0
  }
456
457
0
  le = next_entry(res, NEXT_ENTRY_PARTIAL);
458
0
  if (le != NULL) {
459
0
    e = le->canonical;
460
0
    if (links != NULL)
461
0
      *links = le->links;
462
0
    le->canonical = NULL;
463
0
  } else {
464
0
    e = NULL;
465
0
    if (links != NULL)
466
0
      *links = 0;
467
0
  }
468
0
  return (e);
469
0
}