Coverage Report

Created: 2024-09-16 06:10

/src/git/builtin/pack-redundant.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
*
3
* Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
4
*
5
* This file is licensed under the GPL v2.
6
*
7
*/
8
9
#include "builtin.h"
10
#include "gettext.h"
11
#include "hex.h"
12
#include "repository.h"
13
#include "packfile.h"
14
#include "object-store-ll.h"
15
16
0
#define BLKSIZE 512
17
18
static const char pack_redundant_usage[] =
19
"git pack-redundant [--verbose] [--alt-odb] (--all | <pack-filename>...)";
20
21
static int load_all_packs, verbose, alt_odb;
22
23
struct llist_item {
24
  struct llist_item *next;
25
  struct object_id oid;
26
};
27
static struct llist {
28
  struct llist_item *front;
29
  struct llist_item *back;
30
  size_t size;
31
} *all_objects; /* all objects which must be present in local packfiles */
32
33
static struct pack_list {
34
  struct pack_list *next;
35
  struct packed_git *pack;
36
  struct llist *unique_objects;
37
  struct llist *remaining_objects;
38
  size_t all_objects_size;
39
} *local_packs = NULL, *altodb_packs = NULL;
40
41
static struct llist_item *free_nodes;
42
43
static inline void llist_item_put(struct llist_item *item)
44
0
{
45
0
  item->next = free_nodes;
46
0
  free_nodes = item;
47
0
}
48
49
static inline struct llist_item *llist_item_get(void)
50
0
{
51
0
  struct llist_item *new_item;
52
0
  if ( free_nodes ) {
53
0
    new_item = free_nodes;
54
0
    free_nodes = free_nodes->next;
55
0
  } else {
56
0
    int i = 1;
57
0
    ALLOC_ARRAY(new_item, BLKSIZE);
58
0
    for (; i < BLKSIZE; i++)
59
0
      llist_item_put(&new_item[i]);
60
0
  }
61
0
  return new_item;
62
0
}
63
64
static inline void llist_init(struct llist **list)
65
0
{
66
0
  *list = xmalloc(sizeof(struct llist));
67
0
  (*list)->front = (*list)->back = NULL;
68
0
  (*list)->size = 0;
69
0
}
70
71
static struct llist * llist_copy(struct llist *list)
72
0
{
73
0
  struct llist *ret;
74
0
  struct llist_item *new_item, *old_item, *prev;
75
76
0
  llist_init(&ret);
77
78
0
  if ((ret->size = list->size) == 0)
79
0
    return ret;
80
81
0
  new_item = ret->front = llist_item_get();
82
0
  new_item->oid = list->front->oid;
83
84
0
  old_item = list->front->next;
85
0
  while (old_item) {
86
0
    prev = new_item;
87
0
    new_item = llist_item_get();
88
0
    prev->next = new_item;
89
0
    new_item->oid = old_item->oid;
90
0
    old_item = old_item->next;
91
0
  }
92
0
  new_item->next = NULL;
93
0
  ret->back = new_item;
94
95
0
  return ret;
96
0
}
97
98
static inline struct llist_item *llist_insert(struct llist *list,
99
                struct llist_item *after,
100
                const unsigned char *oid)
101
0
{
102
0
  struct llist_item *new_item = llist_item_get();
103
0
  oidread(&new_item->oid, oid, the_repository->hash_algo);
104
0
  new_item->next = NULL;
105
106
0
  if (after) {
107
0
    new_item->next = after->next;
108
0
    after->next = new_item;
109
0
    if (after == list->back)
110
0
      list->back = new_item;
111
0
  } else {/* insert in front */
112
0
    if (list->size == 0)
113
0
      list->back = new_item;
114
0
    else
115
0
      new_item->next = list->front;
116
0
    list->front = new_item;
117
0
  }
118
0
  list->size++;
119
0
  return new_item;
120
0
}
121
122
static inline struct llist_item *llist_insert_back(struct llist *list,
123
               const unsigned char *oid)
124
0
{
125
0
  return llist_insert(list, list->back, oid);
126
0
}
127
128
static inline struct llist_item *llist_insert_sorted_unique(struct llist *list,
129
      const struct object_id *oid, struct llist_item *hint)
130
0
{
131
0
  struct llist_item *prev = NULL, *l;
132
133
0
  l = (hint == NULL) ? list->front : hint;
134
0
  while (l) {
135
0
    int cmp = oidcmp(&l->oid, oid);
136
0
    if (cmp > 0) { /* we insert before this entry */
137
0
      return llist_insert(list, prev, oid->hash);
138
0
    }
139
0
    if (!cmp) { /* already exists */
140
0
      return l;
141
0
    }
142
0
    prev = l;
143
0
    l = l->next;
144
0
  }
145
  /* insert at the end */
146
0
  return llist_insert_back(list, oid->hash);
147
0
}
148
149
/* returns a pointer to an item in front of sha1 */
150
static inline struct llist_item * llist_sorted_remove(struct llist *list, const unsigned char *oid, struct llist_item *hint)
151
0
{
152
0
  struct llist_item *prev, *l;
153
154
0
redo_from_start:
155
0
  l = (hint == NULL) ? list->front : hint;
156
0
  prev = NULL;
157
0
  while (l) {
158
0
    const int cmp = hashcmp(l->oid.hash, oid, the_repository->hash_algo);
159
0
    if (cmp > 0) /* not in list, since sorted */
160
0
      return prev;
161
0
    if (!cmp) { /* found */
162
0
      if (!prev) {
163
0
        if (hint != NULL && hint != list->front) {
164
          /* we don't know the previous element */
165
0
          hint = NULL;
166
0
          goto redo_from_start;
167
0
        }
168
0
        list->front = l->next;
169
0
      } else
170
0
        prev->next = l->next;
171
0
      if (l == list->back)
172
0
        list->back = prev;
173
0
      llist_item_put(l);
174
0
      list->size--;
175
0
      return prev;
176
0
    }
177
0
    prev = l;
178
0
    l = l->next;
179
0
  }
180
0
  return prev;
181
0
}
182
183
/* computes A\B */
184
static void llist_sorted_difference_inplace(struct llist *A,
185
             struct llist *B)
186
0
{
187
0
  struct llist_item *hint, *b;
188
189
0
  hint = NULL;
190
0
  b = B->front;
191
192
0
  while (b) {
193
0
    hint = llist_sorted_remove(A, b->oid.hash, hint);
194
0
    b = b->next;
195
0
  }
196
0
}
197
198
static inline struct pack_list * pack_list_insert(struct pack_list **pl,
199
             struct pack_list *entry)
200
0
{
201
0
  struct pack_list *p = xmalloc(sizeof(struct pack_list));
202
0
  memcpy(p, entry, sizeof(struct pack_list));
203
0
  p->next = *pl;
204
0
  *pl = p;
205
0
  return p;
206
0
}
207
208
static inline size_t pack_list_size(struct pack_list *pl)
209
0
{
210
0
  size_t ret = 0;
211
0
  while (pl) {
212
0
    ret++;
213
0
    pl = pl->next;
214
0
  }
215
0
  return ret;
216
0
}
217
218
static struct pack_list * pack_list_difference(const struct pack_list *A,
219
                 const struct pack_list *B)
220
0
{
221
0
  struct pack_list *ret;
222
0
  const struct pack_list *pl;
223
224
0
  if (!A)
225
0
    return NULL;
226
227
0
  pl = B;
228
0
  while (pl != NULL) {
229
0
    if (A->pack == pl->pack)
230
0
      return pack_list_difference(A->next, B);
231
0
    pl = pl->next;
232
0
  }
233
0
  ret = xmalloc(sizeof(struct pack_list));
234
0
  memcpy(ret, A, sizeof(struct pack_list));
235
0
  ret->next = pack_list_difference(A->next, B);
236
0
  return ret;
237
0
}
238
239
static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
240
0
{
241
0
  size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
242
0
  const unsigned char *p1_base, *p2_base;
243
0
  struct llist_item *p1_hint = NULL, *p2_hint = NULL;
244
0
  const unsigned int hashsz = the_hash_algo->rawsz;
245
246
0
  if (!p1->unique_objects)
247
0
    p1->unique_objects = llist_copy(p1->remaining_objects);
248
0
  if (!p2->unique_objects)
249
0
    p2->unique_objects = llist_copy(p2->remaining_objects);
250
251
0
  p1_base = p1->pack->index_data;
252
0
  p2_base = p2->pack->index_data;
253
0
  p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
254
0
  p2_base += 256 * 4 + ((p2->pack->index_version < 2) ? 4 : 8);
255
0
  p1_step = hashsz + ((p1->pack->index_version < 2) ? 4 : 0);
256
0
  p2_step = hashsz + ((p2->pack->index_version < 2) ? 4 : 0);
257
258
0
  while (p1_off < p1->pack->num_objects * p1_step &&
259
0
         p2_off < p2->pack->num_objects * p2_step)
260
0
  {
261
0
    const int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off,
262
0
          the_repository->hash_algo);
263
    /* cmp ~ p1 - p2 */
264
0
    if (cmp == 0) {
265
0
      p1_hint = llist_sorted_remove(p1->unique_objects,
266
0
          p1_base + p1_off,
267
0
          p1_hint);
268
0
      p2_hint = llist_sorted_remove(p2->unique_objects,
269
0
          p1_base + p1_off,
270
0
          p2_hint);
271
0
      p1_off += p1_step;
272
0
      p2_off += p2_step;
273
0
      continue;
274
0
    }
275
0
    if (cmp < 0) { /* p1 has the object, p2 doesn't */
276
0
      p1_off += p1_step;
277
0
    } else { /* p2 has the object, p1 doesn't */
278
0
      p2_off += p2_step;
279
0
    }
280
0
  }
281
0
}
282
283
static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
284
0
{
285
0
  size_t ret = 0;
286
0
  size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
287
0
  const unsigned char *p1_base, *p2_base;
288
0
  const unsigned int hashsz = the_hash_algo->rawsz;
289
290
0
  p1_base = p1->index_data;
291
0
  p2_base = p2->index_data;
292
0
  p1_base += 256 * 4 + ((p1->index_version < 2) ? 4 : 8);
293
0
  p2_base += 256 * 4 + ((p2->index_version < 2) ? 4 : 8);
294
0
  p1_step = hashsz + ((p1->index_version < 2) ? 4 : 0);
295
0
  p2_step = hashsz + ((p2->index_version < 2) ? 4 : 0);
296
297
0
  while (p1_off < p1->num_objects * p1_step &&
298
0
         p2_off < p2->num_objects * p2_step)
299
0
  {
300
0
    int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off,
301
0
          the_repository->hash_algo);
302
    /* cmp ~ p1 - p2 */
303
0
    if (cmp == 0) {
304
0
      ret++;
305
0
      p1_off += p1_step;
306
0
      p2_off += p2_step;
307
0
      continue;
308
0
    }
309
0
    if (cmp < 0) { /* p1 has the object, p2 doesn't */
310
0
      p1_off += p1_step;
311
0
    } else { /* p2 has the object, p1 doesn't */
312
0
      p2_off += p2_step;
313
0
    }
314
0
  }
315
0
  return ret;
316
0
}
317
318
/* another O(n^2) function ... */
319
static size_t get_pack_redundancy(struct pack_list *pl)
320
0
{
321
0
  struct pack_list *subset;
322
0
  size_t ret = 0;
323
324
0
  if (!pl)
325
0
    return 0;
326
327
0
  while ((subset = pl->next)) {
328
0
    while (subset) {
329
0
      ret += sizeof_union(pl->pack, subset->pack);
330
0
      subset = subset->next;
331
0
    }
332
0
    pl = pl->next;
333
0
  }
334
0
  return ret;
335
0
}
336
337
static inline off_t pack_set_bytecount(struct pack_list *pl)
338
0
{
339
0
  off_t ret = 0;
340
0
  while (pl) {
341
0
    ret += pl->pack->pack_size;
342
0
    ret += pl->pack->index_size;
343
0
    pl = pl->next;
344
0
  }
345
0
  return ret;
346
0
}
347
348
static int cmp_remaining_objects(const void *a, const void *b)
349
0
{
350
0
  struct pack_list *pl_a = *((struct pack_list **)a);
351
0
  struct pack_list *pl_b = *((struct pack_list **)b);
352
353
0
  if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
354
    /* have the same remaining_objects, big pack first */
355
0
    if (pl_a->all_objects_size == pl_b->all_objects_size)
356
0
      return 0;
357
0
    else if (pl_a->all_objects_size < pl_b->all_objects_size)
358
0
      return 1;
359
0
    else
360
0
      return -1;
361
0
  } else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
362
    /* sort by remaining objects, more objects first */
363
0
    return 1;
364
0
  } else {
365
0
    return -1;
366
0
  }
367
0
}
368
369
/* Sort pack_list, greater size of remaining_objects first */
370
static void sort_pack_list(struct pack_list **pl)
371
0
{
372
0
  struct pack_list **ary, *p;
373
0
  int i;
374
0
  size_t n = pack_list_size(*pl);
375
376
0
  if (n < 2)
377
0
    return;
378
379
  /* prepare an array of packed_list for easier sorting */
380
0
  CALLOC_ARRAY(ary, n);
381
0
  for (n = 0, p = *pl; p; p = p->next)
382
0
    ary[n++] = p;
383
384
0
  QSORT(ary, n, cmp_remaining_objects);
385
386
  /* link them back again */
387
0
  for (i = 0; i < n - 1; i++)
388
0
    ary[i]->next = ary[i + 1];
389
0
  ary[n - 1]->next = NULL;
390
0
  *pl = ary[0];
391
392
0
  free(ary);
393
0
}
394
395
396
static void minimize(struct pack_list **min)
397
0
{
398
0
  struct pack_list *pl, *unique = NULL, *non_unique = NULL;
399
0
  struct llist *missing, *unique_pack_objects;
400
401
0
  pl = local_packs;
402
0
  while (pl) {
403
0
    if (pl->unique_objects->size)
404
0
      pack_list_insert(&unique, pl);
405
0
    else
406
0
      pack_list_insert(&non_unique, pl);
407
0
    pl = pl->next;
408
0
  }
409
  /* find out which objects are missing from the set of unique packs */
410
0
  missing = llist_copy(all_objects);
411
0
  pl = unique;
412
0
  while (pl) {
413
0
    llist_sorted_difference_inplace(missing, pl->remaining_objects);
414
0
    pl = pl->next;
415
0
  }
416
417
0
  *min = unique;
418
419
  /* return if there are no objects missing from the unique set */
420
0
  if (missing->size == 0) {
421
0
    free(missing);
422
0
    return;
423
0
  }
424
425
0
  unique_pack_objects = llist_copy(all_objects);
426
0
  llist_sorted_difference_inplace(unique_pack_objects, missing);
427
428
  /* remove unique pack objects from the non_unique packs */
429
0
  pl = non_unique;
430
0
  while (pl) {
431
0
    llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
432
0
    pl = pl->next;
433
0
  }
434
435
0
  while (non_unique) {
436
    /* sort the non_unique packs, greater size of remaining_objects first */
437
0
    sort_pack_list(&non_unique);
438
0
    if (non_unique->remaining_objects->size == 0)
439
0
      break;
440
441
0
    pack_list_insert(min, non_unique);
442
443
0
    for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
444
0
      llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
445
446
0
    non_unique = non_unique->next;
447
0
  }
448
0
}
449
450
static void load_all_objects(void)
451
0
{
452
0
  struct pack_list *pl = local_packs;
453
0
  struct llist_item *hint, *l;
454
455
0
  llist_init(&all_objects);
456
457
0
  while (pl) {
458
0
    hint = NULL;
459
0
    l = pl->remaining_objects->front;
460
0
    while (l) {
461
0
      hint = llist_insert_sorted_unique(all_objects,
462
0
                &l->oid, hint);
463
0
      l = l->next;
464
0
    }
465
0
    pl = pl->next;
466
0
  }
467
  /* remove objects present in remote packs */
468
0
  pl = altodb_packs;
469
0
  while (pl) {
470
0
    llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
471
0
    pl = pl->next;
472
0
  }
473
0
}
474
475
/* this scales like O(n^2) */
476
static void cmp_local_packs(void)
477
0
{
478
0
  struct pack_list *subset, *pl = local_packs;
479
480
  /* only one packfile */
481
0
  if (!pl->next) {
482
0
    llist_init(&pl->unique_objects);
483
0
    return;
484
0
  }
485
486
0
  while ((subset = pl)) {
487
0
    while ((subset = subset->next))
488
0
      cmp_two_packs(pl, subset);
489
0
    pl = pl->next;
490
0
  }
491
0
}
492
493
static void scan_alt_odb_packs(void)
494
0
{
495
0
  struct pack_list *local, *alt;
496
497
0
  alt = altodb_packs;
498
0
  while (alt) {
499
0
    local = local_packs;
500
0
    while (local) {
501
0
      llist_sorted_difference_inplace(local->remaining_objects,
502
0
              alt->remaining_objects);
503
0
      local = local->next;
504
0
    }
505
0
    alt = alt->next;
506
0
  }
507
0
}
508
509
static struct pack_list * add_pack(struct packed_git *p)
510
0
{
511
0
  struct pack_list l;
512
0
  size_t off = 0, step;
513
0
  const unsigned char *base;
514
515
0
  if (!p->pack_local && !(alt_odb || verbose))
516
0
    return NULL;
517
518
0
  l.pack = p;
519
0
  llist_init(&l.remaining_objects);
520
521
0
  if (open_pack_index(p))
522
0
    return NULL;
523
524
0
  base = p->index_data;
525
0
  base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
526
0
  step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
527
0
  while (off < p->num_objects * step) {
528
0
    llist_insert_back(l.remaining_objects, base + off);
529
0
    off += step;
530
0
  }
531
0
  l.all_objects_size = l.remaining_objects->size;
532
0
  l.unique_objects = NULL;
533
0
  if (p->pack_local)
534
0
    return pack_list_insert(&local_packs, &l);
535
0
  else
536
0
    return pack_list_insert(&altodb_packs, &l);
537
0
}
538
539
static struct pack_list * add_pack_file(const char *filename)
540
0
{
541
0
  struct packed_git *p = get_all_packs(the_repository);
542
543
0
  if (strlen(filename) < 40)
544
0
    die("Bad pack filename: %s", filename);
545
546
0
  while (p) {
547
0
    if (strstr(p->pack_name, filename))
548
0
      return add_pack(p);
549
0
    p = p->next;
550
0
  }
551
0
  die("Filename %s not found in packed_git", filename);
552
0
}
553
554
static void load_all(void)
555
0
{
556
0
  struct packed_git *p = get_all_packs(the_repository);
557
558
0
  while (p) {
559
0
    add_pack(p);
560
0
    p = p->next;
561
0
  }
562
0
}
563
564
int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED)
565
0
{
566
0
  int i;
567
0
  int i_still_use_this = 0;
568
0
  struct pack_list *min = NULL, *red, *pl;
569
0
  struct llist *ignore;
570
0
  struct object_id *oid;
571
0
  char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
572
573
0
  if (argc == 2 && !strcmp(argv[1], "-h"))
574
0
    usage(pack_redundant_usage);
575
576
0
  for (i = 1; i < argc; i++) {
577
0
    const char *arg = argv[i];
578
0
    if (!strcmp(arg, "--")) {
579
0
      i++;
580
0
      break;
581
0
    }
582
0
    if (!strcmp(arg, "--all")) {
583
0
      load_all_packs = 1;
584
0
      continue;
585
0
    }
586
0
    if (!strcmp(arg, "--verbose")) {
587
0
      verbose = 1;
588
0
      continue;
589
0
    }
590
0
    if (!strcmp(arg, "--alt-odb")) {
591
0
      alt_odb = 1;
592
0
      continue;
593
0
    }
594
0
    if (!strcmp(arg, "--i-still-use-this")) {
595
0
      i_still_use_this = 1;
596
0
      continue;
597
0
    }
598
0
    if (*arg == '-')
599
0
      usage(pack_redundant_usage);
600
0
    else
601
0
      break;
602
0
  }
603
604
0
  if (!i_still_use_this) {
605
0
    fputs(_("'git pack-redundant' is nominated for removal.\n"
606
0
      "If you still use this command, please add an extra\n"
607
0
      "option, '--i-still-use-this', on the command line\n"
608
0
      "and let us know you still use it by sending an e-mail\n"
609
0
      "to <git@vger.kernel.org>.  Thanks.\n"), stderr);
610
0
    die(_("refusing to run without --i-still-use-this"));
611
0
  }
612
613
0
  if (load_all_packs)
614
0
    load_all();
615
0
  else
616
0
    while (*(argv + i) != NULL)
617
0
      add_pack_file(*(argv + i++));
618
619
0
  if (!local_packs)
620
0
    die("Zero packs found!");
621
622
0
  load_all_objects();
623
624
0
  if (alt_odb)
625
0
    scan_alt_odb_packs();
626
627
  /* ignore objects given on stdin */
628
0
  llist_init(&ignore);
629
0
  if (!isatty(0)) {
630
0
    while (fgets(buf, sizeof(buf), stdin)) {
631
0
      oid = xmalloc(sizeof(*oid));
632
0
      if (get_oid_hex(buf, oid))
633
0
        die("Bad object ID on stdin: %s", buf);
634
0
      llist_insert_sorted_unique(ignore, oid, NULL);
635
0
    }
636
0
  }
637
0
  llist_sorted_difference_inplace(all_objects, ignore);
638
0
  pl = local_packs;
639
0
  while (pl) {
640
0
    llist_sorted_difference_inplace(pl->remaining_objects, ignore);
641
0
    pl = pl->next;
642
0
  }
643
644
0
  cmp_local_packs();
645
646
0
  minimize(&min);
647
648
0
  if (verbose) {
649
0
    fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
650
0
      (unsigned long)pack_list_size(altodb_packs));
651
0
    fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
652
0
    pl = min;
653
0
    while (pl) {
654
0
      fprintf(stderr, "\t%s\n", pl->pack->pack_name);
655
0
      pl = pl->next;
656
0
    }
657
0
    fprintf(stderr, "containing %lu duplicate objects "
658
0
        "with a total size of %lukb.\n",
659
0
      (unsigned long)get_pack_redundancy(min),
660
0
      (unsigned long)pack_set_bytecount(min)/1024);
661
0
    fprintf(stderr, "A total of %lu unique objects were considered.\n",
662
0
      (unsigned long)all_objects->size);
663
0
    fprintf(stderr, "Redundant packs (with indexes):\n");
664
0
  }
665
0
  pl = red = pack_list_difference(local_packs, min);
666
0
  while (pl) {
667
0
    printf("%s\n%s\n",
668
0
           sha1_pack_index_name(pl->pack->hash),
669
0
           pl->pack->pack_name);
670
0
    pl = pl->next;
671
0
  }
672
0
  if (verbose)
673
0
    fprintf(stderr, "%luMB of redundant packs in total.\n",
674
0
      (unsigned long)pack_set_bytecount(red)/(1024*1024));
675
676
0
  return 0;
677
0
}