Coverage Report

Created: 2024-09-08 06:23

/src/git/builtin/pack-objects.c
Line
Count
Source (jump to first uncovered line)
1
#include "builtin.h"
2
#include "environment.h"
3
#include "gettext.h"
4
#include "hex.h"
5
#include "repository.h"
6
#include "config.h"
7
#include "attr.h"
8
#include "object.h"
9
#include "commit.h"
10
#include "tag.h"
11
#include "delta.h"
12
#include "pack.h"
13
#include "pack-revindex.h"
14
#include "csum-file.h"
15
#include "tree-walk.h"
16
#include "diff.h"
17
#include "revision.h"
18
#include "list-objects.h"
19
#include "list-objects-filter-options.h"
20
#include "pack-objects.h"
21
#include "progress.h"
22
#include "refs.h"
23
#include "streaming.h"
24
#include "thread-utils.h"
25
#include "pack-bitmap.h"
26
#include "delta-islands.h"
27
#include "reachable.h"
28
#include "oid-array.h"
29
#include "strvec.h"
30
#include "list.h"
31
#include "packfile.h"
32
#include "object-file.h"
33
#include "object-store-ll.h"
34
#include "replace-object.h"
35
#include "dir.h"
36
#include "midx.h"
37
#include "trace2.h"
38
#include "shallow.h"
39
#include "promisor-remote.h"
40
#include "pack-mtimes.h"
41
#include "parse-options.h"
42
43
/*
44
 * Objects we are going to pack are collected in the `to_pack` structure.
45
 * It contains an array (dynamically expanded) of the object data, and a map
46
 * that can resolve SHA1s to their position in the array.
47
 */
48
static struct packing_data to_pack;
49
50
static inline struct object_entry *oe_delta(
51
    const struct packing_data *pack,
52
    const struct object_entry *e)
53
0
{
54
0
  if (!e->delta_idx)
55
0
    return NULL;
56
0
  if (e->ext_base)
57
0
    return &pack->ext_bases[e->delta_idx - 1];
58
0
  else
59
0
    return &pack->objects[e->delta_idx - 1];
60
0
}
61
62
static inline unsigned long oe_delta_size(struct packing_data *pack,
63
            const struct object_entry *e)
64
0
{
65
0
  if (e->delta_size_valid)
66
0
    return e->delta_size_;
67
68
  /*
69
   * pack->delta_size[] can't be NULL because oe_set_delta_size()
70
   * must have been called when a new delta is saved with
71
   * oe_set_delta().
72
   * If oe_delta() returns NULL (i.e. default state, which means
73
   * delta_size_valid is also false), then the caller must never
74
   * call oe_delta_size().
75
   */
76
0
  return pack->delta_size[e - pack->objects];
77
0
}
78
79
unsigned long oe_get_size_slow(struct packing_data *pack,
80
             const struct object_entry *e);
81
82
static inline unsigned long oe_size(struct packing_data *pack,
83
            const struct object_entry *e)
84
0
{
85
0
  if (e->size_valid)
86
0
    return e->size_;
87
88
0
  return oe_get_size_slow(pack, e);
89
0
}
90
91
static inline void oe_set_delta(struct packing_data *pack,
92
        struct object_entry *e,
93
        struct object_entry *delta)
94
0
{
95
0
  if (delta)
96
0
    e->delta_idx = (delta - pack->objects) + 1;
97
0
  else
98
0
    e->delta_idx = 0;
99
0
}
100
101
static inline struct object_entry *oe_delta_sibling(
102
    const struct packing_data *pack,
103
    const struct object_entry *e)
104
0
{
105
0
  if (e->delta_sibling_idx)
106
0
    return &pack->objects[e->delta_sibling_idx - 1];
107
0
  return NULL;
108
0
}
109
110
static inline struct object_entry *oe_delta_child(
111
    const struct packing_data *pack,
112
    const struct object_entry *e)
113
0
{
114
0
  if (e->delta_child_idx)
115
0
    return &pack->objects[e->delta_child_idx - 1];
116
0
  return NULL;
117
0
}
118
119
static inline void oe_set_delta_child(struct packing_data *pack,
120
              struct object_entry *e,
121
              struct object_entry *delta)
122
0
{
123
0
  if (delta)
124
0
    e->delta_child_idx = (delta - pack->objects) + 1;
125
0
  else
126
0
    e->delta_child_idx = 0;
127
0
}
128
129
static inline void oe_set_delta_sibling(struct packing_data *pack,
130
          struct object_entry *e,
131
          struct object_entry *delta)
132
0
{
133
0
  if (delta)
134
0
    e->delta_sibling_idx = (delta - pack->objects) + 1;
135
0
  else
136
0
    e->delta_sibling_idx = 0;
137
0
}
138
139
static inline void oe_set_size(struct packing_data *pack,
140
             struct object_entry *e,
141
             unsigned long size)
142
0
{
143
0
  if (size < pack->oe_size_limit) {
144
0
    e->size_ = size;
145
0
    e->size_valid = 1;
146
0
  } else {
147
0
    e->size_valid = 0;
148
0
    if (oe_get_size_slow(pack, e) != size)
149
0
      BUG("'size' is supposed to be the object size!");
150
0
  }
151
0
}
152
153
static inline void oe_set_delta_size(struct packing_data *pack,
154
             struct object_entry *e,
155
             unsigned long size)
156
0
{
157
0
  if (size < pack->oe_delta_size_limit) {
158
0
    e->delta_size_ = size;
159
0
    e->delta_size_valid = 1;
160
0
  } else {
161
0
    packing_data_lock(pack);
162
0
    if (!pack->delta_size)
163
0
      ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
164
0
    packing_data_unlock(pack);
165
166
0
    pack->delta_size[e - pack->objects] = size;
167
0
    e->delta_size_valid = 0;
168
0
  }
169
0
}
170
171
0
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
172
0
#define SIZE(obj) oe_size(&to_pack, obj)
173
0
#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
174
0
#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
175
0
#define DELTA(obj) oe_delta(&to_pack, obj)
176
0
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
177
0
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
178
0
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
179
0
#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
180
0
#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
181
0
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
182
0
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
183
184
static const char *pack_usage[] = {
185
  N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"),
186
  N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"),
187
  NULL
188
};
189
190
static struct pack_idx_entry **written_list;
191
static uint32_t nr_result, nr_written, nr_seen;
192
static struct bitmap_index *bitmap_git;
193
static uint32_t write_layer;
194
195
static int non_empty;
196
static int reuse_delta = 1, reuse_object = 1;
197
static int keep_unreachable, unpack_unreachable, include_tag;
198
static timestamp_t unpack_unreachable_expiration;
199
static int pack_loose_unreachable;
200
static int cruft;
201
static timestamp_t cruft_expiration;
202
static int local;
203
static int have_non_local_packs;
204
static int incremental;
205
static int ignore_packed_keep_on_disk;
206
static int ignore_packed_keep_in_core;
207
static int allow_ofs_delta;
208
static struct pack_idx_option pack_idx_opts;
209
static const char *base_name;
210
static int progress = 1;
211
static int window = 10;
212
static unsigned long pack_size_limit;
213
static int depth = 50;
214
static int delta_search_threads;
215
static int pack_to_stdout;
216
static int sparse;
217
static int thin;
218
static int num_preferred_base;
219
static struct progress *progress_state;
220
221
static struct bitmapped_pack *reuse_packfiles;
222
static size_t reuse_packfiles_nr;
223
static size_t reuse_packfiles_used_nr;
224
static uint32_t reuse_packfile_objects;
225
static struct bitmap *reuse_packfile_bitmap;
226
227
static int use_bitmap_index_default = 1;
228
static int use_bitmap_index = -1;
229
static enum {
230
  NO_PACK_REUSE = 0,
231
  SINGLE_PACK_REUSE,
232
  MULTI_PACK_REUSE,
233
} allow_pack_reuse = SINGLE_PACK_REUSE;
234
static enum {
235
  WRITE_BITMAP_FALSE = 0,
236
  WRITE_BITMAP_QUIET,
237
  WRITE_BITMAP_TRUE,
238
} write_bitmap_index;
239
static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
240
241
static int exclude_promisor_objects;
242
243
static int use_delta_islands;
244
245
static unsigned long delta_cache_size = 0;
246
static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
247
static unsigned long cache_max_small_delta_size = 1000;
248
249
static unsigned long window_memory_limit = 0;
250
251
static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
252
253
enum missing_action {
254
  MA_ERROR = 0,      /* fail if any missing objects are encountered */
255
  MA_ALLOW_ANY,      /* silently allow ALL missing objects */
256
  MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
257
};
258
static enum missing_action arg_missing_action;
259
static show_object_fn fn_show_object;
260
261
struct configured_exclusion {
262
  struct oidmap_entry e;
263
  char *pack_hash_hex;
264
  char *uri;
265
};
266
static struct oidmap configured_exclusions;
267
268
static struct oidset excluded_by_config;
269
270
/*
271
 * stats
272
 */
273
static uint32_t written, written_delta;
274
static uint32_t reused, reused_delta;
275
276
/*
277
 * Indexed commits
278
 */
279
static struct commit **indexed_commits;
280
static unsigned int indexed_commits_nr;
281
static unsigned int indexed_commits_alloc;
282
283
static void index_commit_for_bitmap(struct commit *commit)
284
0
{
285
0
  if (indexed_commits_nr >= indexed_commits_alloc) {
286
0
    indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
287
0
    REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
288
0
  }
289
290
0
  indexed_commits[indexed_commits_nr++] = commit;
291
0
}
292
293
static void *get_delta(struct object_entry *entry)
294
0
{
295
0
  unsigned long size, base_size, delta_size;
296
0
  void *buf, *base_buf, *delta_buf;
297
0
  enum object_type type;
298
299
0
  buf = repo_read_object_file(the_repository, &entry->idx.oid, &type,
300
0
            &size);
301
0
  if (!buf)
302
0
    die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
303
0
  base_buf = repo_read_object_file(the_repository,
304
0
           &DELTA(entry)->idx.oid, &type,
305
0
           &base_size);
306
0
  if (!base_buf)
307
0
    die("unable to read %s",
308
0
        oid_to_hex(&DELTA(entry)->idx.oid));
309
0
  delta_buf = diff_delta(base_buf, base_size,
310
0
             buf, size, &delta_size, 0);
311
  /*
312
   * We successfully computed this delta once but dropped it for
313
   * memory reasons. Something is very wrong if this time we
314
   * recompute and create a different delta.
315
   */
316
0
  if (!delta_buf || delta_size != DELTA_SIZE(entry))
317
0
    BUG("delta size changed");
318
0
  free(buf);
319
0
  free(base_buf);
320
0
  return delta_buf;
321
0
}
322
323
static unsigned long do_compress(void **pptr, unsigned long size)
324
0
{
325
0
  git_zstream stream;
326
0
  void *in, *out;
327
0
  unsigned long maxsize;
328
329
0
  git_deflate_init(&stream, pack_compression_level);
330
0
  maxsize = git_deflate_bound(&stream, size);
331
332
0
  in = *pptr;
333
0
  out = xmalloc(maxsize);
334
0
  *pptr = out;
335
336
0
  stream.next_in = in;
337
0
  stream.avail_in = size;
338
0
  stream.next_out = out;
339
0
  stream.avail_out = maxsize;
340
0
  while (git_deflate(&stream, Z_FINISH) == Z_OK)
341
0
    ; /* nothing */
342
0
  git_deflate_end(&stream);
343
344
0
  free(in);
345
0
  return stream.total_out;
346
0
}
347
348
static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
349
             const struct object_id *oid)
350
0
{
351
0
  git_zstream stream;
352
0
  unsigned char ibuf[1024 * 16];
353
0
  unsigned char obuf[1024 * 16];
354
0
  unsigned long olen = 0;
355
356
0
  git_deflate_init(&stream, pack_compression_level);
357
358
0
  for (;;) {
359
0
    ssize_t readlen;
360
0
    int zret = Z_OK;
361
0
    readlen = read_istream(st, ibuf, sizeof(ibuf));
362
0
    if (readlen == -1)
363
0
      die(_("unable to read %s"), oid_to_hex(oid));
364
365
0
    stream.next_in = ibuf;
366
0
    stream.avail_in = readlen;
367
0
    while ((stream.avail_in || readlen == 0) &&
368
0
           (zret == Z_OK || zret == Z_BUF_ERROR)) {
369
0
      stream.next_out = obuf;
370
0
      stream.avail_out = sizeof(obuf);
371
0
      zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
372
0
      hashwrite(f, obuf, stream.next_out - obuf);
373
0
      olen += stream.next_out - obuf;
374
0
    }
375
0
    if (stream.avail_in)
376
0
      die(_("deflate error (%d)"), zret);
377
0
    if (readlen == 0) {
378
0
      if (zret != Z_STREAM_END)
379
0
        die(_("deflate error (%d)"), zret);
380
0
      break;
381
0
    }
382
0
  }
383
0
  git_deflate_end(&stream);
384
0
  return olen;
385
0
}
386
387
/*
388
 * we are going to reuse the existing object data as is.  make
389
 * sure it is not corrupt.
390
 */
391
static int check_pack_inflate(struct packed_git *p,
392
    struct pack_window **w_curs,
393
    off_t offset,
394
    off_t len,
395
    unsigned long expect)
396
0
{
397
0
  git_zstream stream;
398
0
  unsigned char fakebuf[4096], *in;
399
0
  int st;
400
401
0
  memset(&stream, 0, sizeof(stream));
402
0
  git_inflate_init(&stream);
403
0
  do {
404
0
    in = use_pack(p, w_curs, offset, &stream.avail_in);
405
0
    stream.next_in = in;
406
0
    stream.next_out = fakebuf;
407
0
    stream.avail_out = sizeof(fakebuf);
408
0
    st = git_inflate(&stream, Z_FINISH);
409
0
    offset += stream.next_in - in;
410
0
  } while (st == Z_OK || st == Z_BUF_ERROR);
411
0
  git_inflate_end(&stream);
412
0
  return (st == Z_STREAM_END &&
413
0
    stream.total_out == expect &&
414
0
    stream.total_in == len) ? 0 : -1;
415
0
}
416
417
static void copy_pack_data(struct hashfile *f,
418
    struct packed_git *p,
419
    struct pack_window **w_curs,
420
    off_t offset,
421
    off_t len)
422
0
{
423
0
  unsigned char *in;
424
0
  unsigned long avail;
425
426
0
  while (len) {
427
0
    in = use_pack(p, w_curs, offset, &avail);
428
0
    if (avail > len)
429
0
      avail = (unsigned long)len;
430
0
    hashwrite(f, in, avail);
431
0
    offset += avail;
432
0
    len -= avail;
433
0
  }
434
0
}
435
436
static inline int oe_size_greater_than(struct packing_data *pack,
437
               const struct object_entry *lhs,
438
               unsigned long rhs)
439
0
{
440
0
  if (lhs->size_valid)
441
0
    return lhs->size_ > rhs;
442
0
  if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
443
0
    return 1;
444
0
  return oe_get_size_slow(pack, lhs) > rhs;
445
0
}
446
447
/* Return 0 if we will bust the pack-size limit */
448
static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
449
             unsigned long limit, int usable_delta)
450
0
{
451
0
  unsigned long size, datalen;
452
0
  unsigned char header[MAX_PACK_OBJECT_HEADER],
453
0
          dheader[MAX_PACK_OBJECT_HEADER];
454
0
  unsigned hdrlen;
455
0
  enum object_type type;
456
0
  void *buf;
457
0
  struct git_istream *st = NULL;
458
0
  const unsigned hashsz = the_hash_algo->rawsz;
459
460
0
  if (!usable_delta) {
461
0
    if (oe_type(entry) == OBJ_BLOB &&
462
0
        oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
463
0
        (st = open_istream(the_repository, &entry->idx.oid, &type,
464
0
               &size, NULL)) != NULL)
465
0
      buf = NULL;
466
0
    else {
467
0
      buf = repo_read_object_file(the_repository,
468
0
                &entry->idx.oid, &type,
469
0
                &size);
470
0
      if (!buf)
471
0
        die(_("unable to read %s"),
472
0
            oid_to_hex(&entry->idx.oid));
473
0
    }
474
    /*
475
     * make sure no cached delta data remains from a
476
     * previous attempt before a pack split occurred.
477
     */
478
0
    FREE_AND_NULL(entry->delta_data);
479
0
    entry->z_delta_size = 0;
480
0
  } else if (entry->delta_data) {
481
0
    size = DELTA_SIZE(entry);
482
0
    buf = entry->delta_data;
483
0
    entry->delta_data = NULL;
484
0
    type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
485
0
      OBJ_OFS_DELTA : OBJ_REF_DELTA;
486
0
  } else {
487
0
    buf = get_delta(entry);
488
0
    size = DELTA_SIZE(entry);
489
0
    type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
490
0
      OBJ_OFS_DELTA : OBJ_REF_DELTA;
491
0
  }
492
493
0
  if (st) /* large blob case, just assume we don't compress well */
494
0
    datalen = size;
495
0
  else if (entry->z_delta_size)
496
0
    datalen = entry->z_delta_size;
497
0
  else
498
0
    datalen = do_compress(&buf, size);
499
500
  /*
501
   * The object header is a byte of 'type' followed by zero or
502
   * more bytes of length.
503
   */
504
0
  hdrlen = encode_in_pack_object_header(header, sizeof(header),
505
0
                type, size);
506
507
0
  if (type == OBJ_OFS_DELTA) {
508
    /*
509
     * Deltas with relative base contain an additional
510
     * encoding of the relative offset for the delta
511
     * base from this object's position in the pack.
512
     */
513
0
    off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
514
0
    unsigned pos = sizeof(dheader) - 1;
515
0
    dheader[pos] = ofs & 127;
516
0
    while (ofs >>= 7)
517
0
      dheader[--pos] = 128 | (--ofs & 127);
518
0
    if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
519
0
      if (st)
520
0
        close_istream(st);
521
0
      free(buf);
522
0
      return 0;
523
0
    }
524
0
    hashwrite(f, header, hdrlen);
525
0
    hashwrite(f, dheader + pos, sizeof(dheader) - pos);
526
0
    hdrlen += sizeof(dheader) - pos;
527
0
  } else if (type == OBJ_REF_DELTA) {
528
    /*
529
     * Deltas with a base reference contain
530
     * additional bytes for the base object ID.
531
     */
532
0
    if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
533
0
      if (st)
534
0
        close_istream(st);
535
0
      free(buf);
536
0
      return 0;
537
0
    }
538
0
    hashwrite(f, header, hdrlen);
539
0
    hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
540
0
    hdrlen += hashsz;
541
0
  } else {
542
0
    if (limit && hdrlen + datalen + hashsz >= limit) {
543
0
      if (st)
544
0
        close_istream(st);
545
0
      free(buf);
546
0
      return 0;
547
0
    }
548
0
    hashwrite(f, header, hdrlen);
549
0
  }
550
0
  if (st) {
551
0
    datalen = write_large_blob_data(st, f, &entry->idx.oid);
552
0
    close_istream(st);
553
0
  } else {
554
0
    hashwrite(f, buf, datalen);
555
0
    free(buf);
556
0
  }
557
558
0
  return hdrlen + datalen;
559
0
}
560
561
/* Return 0 if we will bust the pack-size limit */
562
static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
563
        unsigned long limit, int usable_delta)
564
0
{
565
0
  struct packed_git *p = IN_PACK(entry);
566
0
  struct pack_window *w_curs = NULL;
567
0
  uint32_t pos;
568
0
  off_t offset;
569
0
  enum object_type type = oe_type(entry);
570
0
  off_t datalen;
571
0
  unsigned char header[MAX_PACK_OBJECT_HEADER],
572
0
          dheader[MAX_PACK_OBJECT_HEADER];
573
0
  unsigned hdrlen;
574
0
  const unsigned hashsz = the_hash_algo->rawsz;
575
0
  unsigned long entry_size = SIZE(entry);
576
577
0
  if (DELTA(entry))
578
0
    type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
579
0
      OBJ_OFS_DELTA : OBJ_REF_DELTA;
580
0
  hdrlen = encode_in_pack_object_header(header, sizeof(header),
581
0
                type, entry_size);
582
583
0
  offset = entry->in_pack_offset;
584
0
  if (offset_to_pack_pos(p, offset, &pos) < 0)
585
0
    die(_("write_reuse_object: could not locate %s, expected at "
586
0
          "offset %"PRIuMAX" in pack %s"),
587
0
        oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
588
0
        p->pack_name);
589
0
  datalen = pack_pos_to_offset(p, pos + 1) - offset;
590
0
  if (!pack_to_stdout && p->index_version > 1 &&
591
0
      check_pack_crc(p, &w_curs, offset, datalen,
592
0
         pack_pos_to_index(p, pos))) {
593
0
    error(_("bad packed object CRC for %s"),
594
0
          oid_to_hex(&entry->idx.oid));
595
0
    unuse_pack(&w_curs);
596
0
    return write_no_reuse_object(f, entry, limit, usable_delta);
597
0
  }
598
599
0
  offset += entry->in_pack_header_size;
600
0
  datalen -= entry->in_pack_header_size;
601
602
0
  if (!pack_to_stdout && p->index_version == 1 &&
603
0
      check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
604
0
    error(_("corrupt packed object for %s"),
605
0
          oid_to_hex(&entry->idx.oid));
606
0
    unuse_pack(&w_curs);
607
0
    return write_no_reuse_object(f, entry, limit, usable_delta);
608
0
  }
609
610
0
  if (type == OBJ_OFS_DELTA) {
611
0
    off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
612
0
    unsigned pos = sizeof(dheader) - 1;
613
0
    dheader[pos] = ofs & 127;
614
0
    while (ofs >>= 7)
615
0
      dheader[--pos] = 128 | (--ofs & 127);
616
0
    if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
617
0
      unuse_pack(&w_curs);
618
0
      return 0;
619
0
    }
620
0
    hashwrite(f, header, hdrlen);
621
0
    hashwrite(f, dheader + pos, sizeof(dheader) - pos);
622
0
    hdrlen += sizeof(dheader) - pos;
623
0
    reused_delta++;
624
0
  } else if (type == OBJ_REF_DELTA) {
625
0
    if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
626
0
      unuse_pack(&w_curs);
627
0
      return 0;
628
0
    }
629
0
    hashwrite(f, header, hdrlen);
630
0
    hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
631
0
    hdrlen += hashsz;
632
0
    reused_delta++;
633
0
  } else {
634
0
    if (limit && hdrlen + datalen + hashsz >= limit) {
635
0
      unuse_pack(&w_curs);
636
0
      return 0;
637
0
    }
638
0
    hashwrite(f, header, hdrlen);
639
0
  }
640
0
  copy_pack_data(f, p, &w_curs, offset, datalen);
641
0
  unuse_pack(&w_curs);
642
0
  reused++;
643
0
  return hdrlen + datalen;
644
0
}
645
646
/* Return 0 if we will bust the pack-size limit */
647
static off_t write_object(struct hashfile *f,
648
        struct object_entry *entry,
649
        off_t write_offset)
650
0
{
651
0
  unsigned long limit;
652
0
  off_t len;
653
0
  int usable_delta, to_reuse;
654
655
0
  if (!pack_to_stdout)
656
0
    crc32_begin(f);
657
658
  /* apply size limit if limited packsize and not first object */
659
0
  if (!pack_size_limit || !nr_written)
660
0
    limit = 0;
661
0
  else if (pack_size_limit <= write_offset)
662
    /*
663
     * the earlier object did not fit the limit; avoid
664
     * mistaking this with unlimited (i.e. limit = 0).
665
     */
666
0
    limit = 1;
667
0
  else
668
0
    limit = pack_size_limit - write_offset;
669
670
0
  if (!DELTA(entry))
671
0
    usable_delta = 0; /* no delta */
672
0
  else if (!pack_size_limit)
673
0
         usable_delta = 1; /* unlimited packfile */
674
0
  else if (DELTA(entry)->idx.offset == (off_t)-1)
675
0
    usable_delta = 0; /* base was written to another pack */
676
0
  else if (DELTA(entry)->idx.offset)
677
0
    usable_delta = 1; /* base already exists in this pack */
678
0
  else
679
0
    usable_delta = 0; /* base could end up in another pack */
680
681
0
  if (!reuse_object)
682
0
    to_reuse = 0; /* explicit */
683
0
  else if (!IN_PACK(entry))
684
0
    to_reuse = 0; /* can't reuse what we don't have */
685
0
  else if (oe_type(entry) == OBJ_REF_DELTA ||
686
0
     oe_type(entry) == OBJ_OFS_DELTA)
687
        /* check_object() decided it for us ... */
688
0
    to_reuse = usable_delta;
689
        /* ... but pack split may override that */
690
0
  else if (oe_type(entry) != entry->in_pack_type)
691
0
    to_reuse = 0; /* pack has delta which is unusable */
692
0
  else if (DELTA(entry))
693
0
    to_reuse = 0; /* we want to pack afresh */
694
0
  else
695
0
    to_reuse = 1; /* we have it in-pack undeltified,
696
         * and we do not need to deltify it.
697
         */
698
699
0
  if (!to_reuse)
700
0
    len = write_no_reuse_object(f, entry, limit, usable_delta);
701
0
  else
702
0
    len = write_reuse_object(f, entry, limit, usable_delta);
703
0
  if (!len)
704
0
    return 0;
705
706
0
  if (usable_delta)
707
0
    written_delta++;
708
0
  written++;
709
0
  if (!pack_to_stdout)
710
0
    entry->idx.crc32 = crc32_end(f);
711
0
  return len;
712
0
}
713
714
enum write_one_status {
715
  WRITE_ONE_SKIP = -1, /* already written */
716
  WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
717
  WRITE_ONE_WRITTEN = 1, /* normal */
718
  WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
719
};
720
721
static enum write_one_status write_one(struct hashfile *f,
722
               struct object_entry *e,
723
               off_t *offset)
724
0
{
725
0
  off_t size;
726
0
  int recursing;
727
728
  /*
729
   * we set offset to 1 (which is an impossible value) to mark
730
   * the fact that this object is involved in "write its base
731
   * first before writing a deltified object" recursion.
732
   */
733
0
  recursing = (e->idx.offset == 1);
734
0
  if (recursing) {
735
0
    warning(_("recursive delta detected for object %s"),
736
0
      oid_to_hex(&e->idx.oid));
737
0
    return WRITE_ONE_RECURSIVE;
738
0
  } else if (e->idx.offset || e->preferred_base) {
739
    /* offset is non zero if object is written already. */
740
0
    return WRITE_ONE_SKIP;
741
0
  }
742
743
  /* if we are deltified, write out base object first. */
744
0
  if (DELTA(e)) {
745
0
    e->idx.offset = 1; /* now recurse */
746
0
    switch (write_one(f, DELTA(e), offset)) {
747
0
    case WRITE_ONE_RECURSIVE:
748
      /* we cannot depend on this one */
749
0
      SET_DELTA(e, NULL);
750
0
      break;
751
0
    default:
752
0
      break;
753
0
    case WRITE_ONE_BREAK:
754
0
      e->idx.offset = recursing;
755
0
      return WRITE_ONE_BREAK;
756
0
    }
757
0
  }
758
759
0
  e->idx.offset = *offset;
760
0
  size = write_object(f, e, *offset);
761
0
  if (!size) {
762
0
    e->idx.offset = recursing;
763
0
    return WRITE_ONE_BREAK;
764
0
  }
765
0
  written_list[nr_written++] = &e->idx;
766
767
  /* make sure off_t is sufficiently large not to wrap */
768
0
  if (signed_add_overflows(*offset, size))
769
0
    die(_("pack too large for current definition of off_t"));
770
0
  *offset += size;
771
0
  return WRITE_ONE_WRITTEN;
772
0
}
773
774
static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid,
775
           int flag UNUSED, void *cb_data UNUSED)
776
0
{
777
0
  struct object_id peeled;
778
0
  struct object_entry *entry = packlist_find(&to_pack, oid);
779
780
0
  if (entry)
781
0
    entry->tagged = 1;
782
0
  if (!peel_iterated_oid(the_repository, oid, &peeled)) {
783
0
    entry = packlist_find(&to_pack, &peeled);
784
0
    if (entry)
785
0
      entry->tagged = 1;
786
0
  }
787
0
  return 0;
788
0
}
789
790
static inline unsigned char oe_layer(struct packing_data *pack,
791
             struct object_entry *e)
792
0
{
793
0
  if (!pack->layer)
794
0
    return 0;
795
0
  return pack->layer[e - pack->objects];
796
0
}
797
798
static inline void add_to_write_order(struct object_entry **wo,
799
             unsigned int *endp,
800
             struct object_entry *e)
801
0
{
802
0
  if (e->filled || oe_layer(&to_pack, e) != write_layer)
803
0
    return;
804
0
  wo[(*endp)++] = e;
805
0
  e->filled = 1;
806
0
}
807
808
static void add_descendants_to_write_order(struct object_entry **wo,
809
             unsigned int *endp,
810
             struct object_entry *e)
811
0
{
812
0
  int add_to_order = 1;
813
0
  while (e) {
814
0
    if (add_to_order) {
815
0
      struct object_entry *s;
816
      /* add this node... */
817
0
      add_to_write_order(wo, endp, e);
818
      /* all its siblings... */
819
0
      for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
820
0
        add_to_write_order(wo, endp, s);
821
0
      }
822
0
    }
823
    /* drop down a level to add left subtree nodes if possible */
824
0
    if (DELTA_CHILD(e)) {
825
0
      add_to_order = 1;
826
0
      e = DELTA_CHILD(e);
827
0
    } else {
828
0
      add_to_order = 0;
829
      /* our sibling might have some children, it is next */
830
0
      if (DELTA_SIBLING(e)) {
831
0
        e = DELTA_SIBLING(e);
832
0
        continue;
833
0
      }
834
      /* go back to our parent node */
835
0
      e = DELTA(e);
836
0
      while (e && !DELTA_SIBLING(e)) {
837
        /* we're on the right side of a subtree, keep
838
         * going up until we can go right again */
839
0
        e = DELTA(e);
840
0
      }
841
0
      if (!e) {
842
        /* done- we hit our original root node */
843
0
        return;
844
0
      }
845
      /* pass it off to sibling at this level */
846
0
      e = DELTA_SIBLING(e);
847
0
    }
848
0
  };
849
0
}
850
851
static void add_family_to_write_order(struct object_entry **wo,
852
              unsigned int *endp,
853
              struct object_entry *e)
854
0
{
855
0
  struct object_entry *root;
856
857
0
  for (root = e; DELTA(root); root = DELTA(root))
858
0
    ; /* nothing */
859
0
  add_descendants_to_write_order(wo, endp, root);
860
0
}
861
862
static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
863
0
{
864
0
  unsigned int i, last_untagged;
865
0
  struct object_entry *objects = to_pack.objects;
866
867
0
  for (i = 0; i < to_pack.nr_objects; i++) {
868
0
    if (objects[i].tagged)
869
0
      break;
870
0
    add_to_write_order(wo, wo_end, &objects[i]);
871
0
  }
872
0
  last_untagged = i;
873
874
  /*
875
   * Then fill all the tagged tips.
876
   */
877
0
  for (; i < to_pack.nr_objects; i++) {
878
0
    if (objects[i].tagged)
879
0
      add_to_write_order(wo, wo_end, &objects[i]);
880
0
  }
881
882
  /*
883
   * And then all remaining commits and tags.
884
   */
885
0
  for (i = last_untagged; i < to_pack.nr_objects; i++) {
886
0
    if (oe_type(&objects[i]) != OBJ_COMMIT &&
887
0
        oe_type(&objects[i]) != OBJ_TAG)
888
0
      continue;
889
0
    add_to_write_order(wo, wo_end, &objects[i]);
890
0
  }
891
892
  /*
893
   * And then all the trees.
894
   */
895
0
  for (i = last_untagged; i < to_pack.nr_objects; i++) {
896
0
    if (oe_type(&objects[i]) != OBJ_TREE)
897
0
      continue;
898
0
    add_to_write_order(wo, wo_end, &objects[i]);
899
0
  }
900
901
  /*
902
   * Finally all the rest in really tight order
903
   */
904
0
  for (i = last_untagged; i < to_pack.nr_objects; i++) {
905
0
    if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
906
0
      add_family_to_write_order(wo, wo_end, &objects[i]);
907
0
  }
908
0
}
909
910
static struct object_entry **compute_write_order(void)
911
0
{
912
0
  uint32_t max_layers = 1;
913
0
  unsigned int i, wo_end;
914
915
0
  struct object_entry **wo;
916
0
  struct object_entry *objects = to_pack.objects;
917
918
0
  for (i = 0; i < to_pack.nr_objects; i++) {
919
0
    objects[i].tagged = 0;
920
0
    objects[i].filled = 0;
921
0
    SET_DELTA_CHILD(&objects[i], NULL);
922
0
    SET_DELTA_SIBLING(&objects[i], NULL);
923
0
  }
924
925
  /*
926
   * Fully connect delta_child/delta_sibling network.
927
   * Make sure delta_sibling is sorted in the original
928
   * recency order.
929
   */
930
0
  for (i = to_pack.nr_objects; i > 0;) {
931
0
    struct object_entry *e = &objects[--i];
932
0
    if (!DELTA(e))
933
0
      continue;
934
    /* Mark me as the first child */
935
0
    e->delta_sibling_idx = DELTA(e)->delta_child_idx;
936
0
    SET_DELTA_CHILD(DELTA(e), e);
937
0
  }
938
939
  /*
940
   * Mark objects that are at the tip of tags.
941
   */
942
0
  refs_for_each_tag_ref(get_main_ref_store(the_repository), mark_tagged,
943
0
            NULL);
944
945
0
  if (use_delta_islands) {
946
0
    max_layers = compute_pack_layers(&to_pack);
947
0
    free_island_marks();
948
0
  }
949
950
0
  ALLOC_ARRAY(wo, to_pack.nr_objects);
951
0
  wo_end = 0;
952
953
0
  for (; write_layer < max_layers; ++write_layer)
954
0
    compute_layer_order(wo, &wo_end);
955
956
0
  if (wo_end != to_pack.nr_objects)
957
0
    die(_("ordered %u objects, expected %"PRIu32),
958
0
        wo_end, to_pack.nr_objects);
959
960
0
  return wo;
961
0
}
962
963
964
/*
965
 * A reused set of objects. All objects in a chunk have the same
966
 * relative position in the original packfile and the generated
967
 * packfile.
968
 */
969
970
static struct reused_chunk {
971
  /* The offset of the first object of this chunk in the original
972
   * packfile. */
973
  off_t original;
974
  /* The difference for "original" minus the offset of the first object of
975
   * this chunk in the generated packfile. */
976
  off_t difference;
977
} *reused_chunks;
978
static int reused_chunks_nr;
979
static int reused_chunks_alloc;
980
981
static void record_reused_object(off_t where, off_t offset)
982
0
{
983
0
  if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
984
0
    return;
985
986
0
  ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
987
0
       reused_chunks_alloc);
988
0
  reused_chunks[reused_chunks_nr].original = where;
989
0
  reused_chunks[reused_chunks_nr].difference = offset;
990
0
  reused_chunks_nr++;
991
0
}
992
993
/*
994
 * Binary search to find the chunk that "where" is in. Note
995
 * that we're not looking for an exact match, just the first
996
 * chunk that contains it (which implicitly ends at the start
997
 * of the next chunk.
998
 */
999
static off_t find_reused_offset(off_t where)
1000
0
{
1001
0
  int lo = 0, hi = reused_chunks_nr;
1002
0
  while (lo < hi) {
1003
0
    int mi = lo + ((hi - lo) / 2);
1004
0
    if (where == reused_chunks[mi].original)
1005
0
      return reused_chunks[mi].difference;
1006
0
    if (where < reused_chunks[mi].original)
1007
0
      hi = mi;
1008
0
    else
1009
0
      lo = mi + 1;
1010
0
  }
1011
1012
  /*
1013
   * The first chunk starts at zero, so we can't have gone below
1014
   * there.
1015
   */
1016
0
  assert(lo);
1017
0
  return reused_chunks[lo-1].difference;
1018
0
}
1019
1020
static void write_reused_pack_one(struct packed_git *reuse_packfile,
1021
          size_t pos, struct hashfile *out,
1022
          off_t pack_start,
1023
          struct pack_window **w_curs)
1024
0
{
1025
0
  off_t offset, next, cur;
1026
0
  enum object_type type;
1027
0
  unsigned long size;
1028
1029
0
  offset = pack_pos_to_offset(reuse_packfile, pos);
1030
0
  next = pack_pos_to_offset(reuse_packfile, pos + 1);
1031
1032
0
  record_reused_object(offset,
1033
0
           offset - (hashfile_total(out) - pack_start));
1034
1035
0
  cur = offset;
1036
0
  type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1037
0
  assert(type >= 0);
1038
1039
0
  if (type == OBJ_OFS_DELTA) {
1040
0
    off_t base_offset;
1041
0
    off_t fixup;
1042
1043
0
    unsigned char header[MAX_PACK_OBJECT_HEADER];
1044
0
    unsigned len;
1045
1046
0
    base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1047
0
    assert(base_offset != 0);
1048
1049
    /* Convert to REF_DELTA if we must... */
1050
0
    if (!allow_ofs_delta) {
1051
0
      uint32_t base_pos;
1052
0
      struct object_id base_oid;
1053
1054
0
      if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1055
0
        die(_("expected object at offset %"PRIuMAX" "
1056
0
              "in pack %s"),
1057
0
            (uintmax_t)base_offset,
1058
0
            reuse_packfile->pack_name);
1059
1060
0
      nth_packed_object_id(&base_oid, reuse_packfile,
1061
0
               pack_pos_to_index(reuse_packfile, base_pos));
1062
1063
0
      len = encode_in_pack_object_header(header, sizeof(header),
1064
0
                 OBJ_REF_DELTA, size);
1065
0
      hashwrite(out, header, len);
1066
0
      hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1067
0
      copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1068
0
      return;
1069
0
    }
1070
1071
    /* Otherwise see if we need to rewrite the offset... */
1072
0
    fixup = find_reused_offset(offset) -
1073
0
      find_reused_offset(base_offset);
1074
0
    if (fixup) {
1075
0
      unsigned char ofs_header[MAX_PACK_OBJECT_HEADER];
1076
0
      unsigned i, ofs_len;
1077
0
      off_t ofs = offset - base_offset - fixup;
1078
1079
0
      len = encode_in_pack_object_header(header, sizeof(header),
1080
0
                 OBJ_OFS_DELTA, size);
1081
1082
0
      i = sizeof(ofs_header) - 1;
1083
0
      ofs_header[i] = ofs & 127;
1084
0
      while (ofs >>= 7)
1085
0
        ofs_header[--i] = 128 | (--ofs & 127);
1086
1087
0
      ofs_len = sizeof(ofs_header) - i;
1088
1089
0
      hashwrite(out, header, len);
1090
0
      hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1091
0
      copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1092
0
      return;
1093
0
    }
1094
1095
    /* ...otherwise we have no fixup, and can write it verbatim */
1096
0
  }
1097
1098
0
  copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1099
0
}
1100
1101
static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
1102
           struct hashfile *out,
1103
           off_t pack_start,
1104
           struct pack_window **w_curs)
1105
0
{
1106
0
  size_t pos = reuse_packfile->bitmap_pos;
1107
0
  size_t end;
1108
1109
0
  if (pos % BITS_IN_EWORD) {
1110
0
    size_t word_pos = (pos / BITS_IN_EWORD);
1111
0
    size_t offset = pos % BITS_IN_EWORD;
1112
0
    size_t last;
1113
0
    eword_t word = reuse_packfile_bitmap->words[word_pos];
1114
1115
0
    if (offset + reuse_packfile->bitmap_nr < BITS_IN_EWORD)
1116
0
      last = offset + reuse_packfile->bitmap_nr;
1117
0
    else
1118
0
      last = BITS_IN_EWORD;
1119
1120
0
    for (; offset < last; offset++) {
1121
0
      if (word >> offset == 0)
1122
0
        return word_pos;
1123
0
      if (!bitmap_get(reuse_packfile_bitmap,
1124
0
          word_pos * BITS_IN_EWORD + offset))
1125
0
        return word_pos;
1126
0
    }
1127
1128
0
    pos += BITS_IN_EWORD - (pos % BITS_IN_EWORD);
1129
0
  }
1130
1131
  /*
1132
   * Now we're going to copy as many whole eword_t's as possible.
1133
   * "end" is the index of the last whole eword_t we copy, but
1134
   * there may be additional bits to process. Those are handled
1135
   * individually by write_reused_pack().
1136
   *
1137
   * Begin by advancing to the first word boundary in range of the
1138
   * bit positions occupied by objects in "reuse_packfile". Then
1139
   * pick the last word boundary in the same range. If we have at
1140
   * least one word's worth of bits to process, continue on.
1141
   */
1142
0
  end = reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr;
1143
0
  if (end % BITS_IN_EWORD)
1144
0
    end -= end % BITS_IN_EWORD;
1145
0
  if (pos >= end)
1146
0
    return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1147
1148
0
  while (pos < end &&
1149
0
         reuse_packfile_bitmap->words[pos / BITS_IN_EWORD] == (eword_t)~0)
1150
0
    pos += BITS_IN_EWORD;
1151
1152
0
  if (pos > end)
1153
0
    pos = end;
1154
1155
0
  if (reuse_packfile->bitmap_pos < pos) {
1156
0
    off_t pack_start_off = pack_pos_to_offset(reuse_packfile->p, 0);
1157
0
    off_t pack_end_off = pack_pos_to_offset(reuse_packfile->p,
1158
0
              pos - reuse_packfile->bitmap_pos);
1159
1160
0
    written += pos - reuse_packfile->bitmap_pos;
1161
1162
    /* We're recording one chunk, not one object. */
1163
0
    record_reused_object(pack_start_off,
1164
0
             pack_start_off - (hashfile_total(out) - pack_start));
1165
0
    hashflush(out);
1166
0
    copy_pack_data(out, reuse_packfile->p, w_curs,
1167
0
      pack_start_off, pack_end_off - pack_start_off);
1168
1169
0
    display_progress(progress_state, written);
1170
0
  }
1171
0
  if (pos % BITS_IN_EWORD)
1172
0
    BUG("attempted to jump past a word boundary to %"PRIuMAX,
1173
0
        (uintmax_t)pos);
1174
0
  return pos / BITS_IN_EWORD;
1175
0
}
1176
1177
static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
1178
            struct hashfile *f)
1179
0
{
1180
0
  size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1181
0
  uint32_t offset;
1182
0
  off_t pack_start = hashfile_total(f) - sizeof(struct pack_header);
1183
0
  struct pack_window *w_curs = NULL;
1184
1185
0
  if (allow_ofs_delta)
1186
0
    i = write_reused_pack_verbatim(reuse_packfile, f, pack_start,
1187
0
                 &w_curs);
1188
1189
0
  for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1190
0
    eword_t word = reuse_packfile_bitmap->words[i];
1191
0
    size_t pos = (i * BITS_IN_EWORD);
1192
1193
0
    for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1194
0
      uint32_t pack_pos;
1195
0
      if ((word >> offset) == 0)
1196
0
        break;
1197
1198
0
      offset += ewah_bit_ctz64(word >> offset);
1199
0
      if (pos + offset < reuse_packfile->bitmap_pos)
1200
0
        continue;
1201
0
      if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr)
1202
0
        goto done;
1203
1204
0
      if (reuse_packfile->bitmap_pos) {
1205
        /*
1206
         * When doing multi-pack reuse on a
1207
         * non-preferred pack, translate bit positions
1208
         * from the MIDX pseudo-pack order back to their
1209
         * pack-relative positions before attempting
1210
         * reuse.
1211
         */
1212
0
        struct multi_pack_index *m = reuse_packfile->from_midx;
1213
0
        uint32_t midx_pos;
1214
0
        off_t pack_ofs;
1215
1216
0
        if (!m)
1217
0
          BUG("non-zero bitmap position without MIDX");
1218
1219
0
        midx_pos = pack_pos_to_midx(m, pos + offset);
1220
0
        pack_ofs = nth_midxed_offset(m, midx_pos);
1221
1222
0
        if (offset_to_pack_pos(reuse_packfile->p,
1223
0
                   pack_ofs, &pack_pos) < 0)
1224
0
          BUG("could not find expected object at offset %"PRIuMAX" in pack %s",
1225
0
              (uintmax_t)pack_ofs,
1226
0
              pack_basename(reuse_packfile->p));
1227
0
      } else {
1228
        /*
1229
         * Can use bit positions directly, even for MIDX
1230
         * bitmaps. See comment in try_partial_reuse()
1231
         * for why.
1232
         */
1233
0
        pack_pos = pos + offset;
1234
0
      }
1235
1236
0
      write_reused_pack_one(reuse_packfile->p, pack_pos, f,
1237
0
                pack_start, &w_curs);
1238
0
      display_progress(progress_state, ++written);
1239
0
    }
1240
0
  }
1241
1242
0
done:
1243
0
  unuse_pack(&w_curs);
1244
0
}
1245
1246
static void write_excluded_by_configs(void)
1247
0
{
1248
0
  struct oidset_iter iter;
1249
0
  const struct object_id *oid;
1250
1251
0
  oidset_iter_init(&excluded_by_config, &iter);
1252
0
  while ((oid = oidset_iter_next(&iter))) {
1253
0
    struct configured_exclusion *ex =
1254
0
      oidmap_get(&configured_exclusions, oid);
1255
1256
0
    if (!ex)
1257
0
      BUG("configured exclusion wasn't configured");
1258
0
    write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1259
0
    write_in_full(1, " ", 1);
1260
0
    write_in_full(1, ex->uri, strlen(ex->uri));
1261
0
    write_in_full(1, "\n", 1);
1262
0
  }
1263
0
}
1264
1265
static const char no_split_warning[] = N_(
1266
"disabling bitmap writing, packs are split due to pack.packSizeLimit"
1267
);
1268
1269
static void write_pack_file(void)
1270
0
{
1271
0
  uint32_t i = 0, j;
1272
0
  struct hashfile *f;
1273
0
  off_t offset;
1274
0
  uint32_t nr_remaining = nr_result;
1275
0
  time_t last_mtime = 0;
1276
0
  struct object_entry **write_order;
1277
1278
0
  if (progress > pack_to_stdout)
1279
0
    progress_state = start_progress(_("Writing objects"), nr_result);
1280
0
  ALLOC_ARRAY(written_list, to_pack.nr_objects);
1281
0
  write_order = compute_write_order();
1282
1283
0
  do {
1284
0
    unsigned char hash[GIT_MAX_RAWSZ];
1285
0
    char *pack_tmp_name = NULL;
1286
1287
0
    if (pack_to_stdout)
1288
0
      f = hashfd_throughput(1, "<stdout>", progress_state);
1289
0
    else
1290
0
      f = create_tmp_packfile(&pack_tmp_name);
1291
1292
0
    offset = write_pack_header(f, nr_remaining);
1293
1294
0
    if (reuse_packfiles_nr) {
1295
0
      assert(pack_to_stdout);
1296
0
      for (j = 0; j < reuse_packfiles_nr; j++) {
1297
0
        reused_chunks_nr = 0;
1298
0
        write_reused_pack(&reuse_packfiles[j], f);
1299
0
        if (reused_chunks_nr)
1300
0
          reuse_packfiles_used_nr++;
1301
0
      }
1302
0
      offset = hashfile_total(f);
1303
0
    }
1304
1305
0
    nr_written = 0;
1306
0
    for (; i < to_pack.nr_objects; i++) {
1307
0
      struct object_entry *e = write_order[i];
1308
0
      if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1309
0
        break;
1310
0
      display_progress(progress_state, written);
1311
0
    }
1312
1313
0
    if (pack_to_stdout) {
1314
      /*
1315
       * We never fsync when writing to stdout since we may
1316
       * not be writing to an actual pack file. For instance,
1317
       * the upload-pack code passes a pipe here. Calling
1318
       * fsync on a pipe results in unnecessary
1319
       * synchronization with the reader on some platforms.
1320
       */
1321
0
      finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1322
0
            CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1323
0
    } else if (nr_written == nr_remaining) {
1324
0
      finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1325
0
            CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1326
0
    } else {
1327
      /*
1328
       * If we wrote the wrong number of entries in the
1329
       * header, rewrite it like in fast-import.
1330
       */
1331
1332
0
      int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
1333
0
      fixup_pack_header_footer(fd, hash, pack_tmp_name,
1334
0
             nr_written, hash, offset);
1335
0
      close(fd);
1336
0
      if (write_bitmap_index) {
1337
0
        if (write_bitmap_index != WRITE_BITMAP_QUIET)
1338
0
          warning(_(no_split_warning));
1339
0
        write_bitmap_index = 0;
1340
0
      }
1341
0
    }
1342
1343
0
    if (!pack_to_stdout) {
1344
0
      struct stat st;
1345
0
      struct strbuf tmpname = STRBUF_INIT;
1346
0
      struct bitmap_writer bitmap_writer;
1347
0
      char *idx_tmp_name = NULL;
1348
1349
      /*
1350
       * Packs are runtime accessed in their mtime
1351
       * order since newer packs are more likely to contain
1352
       * younger objects.  So if we are creating multiple
1353
       * packs then we should modify the mtime of later ones
1354
       * to preserve this property.
1355
       */
1356
0
      if (stat(pack_tmp_name, &st) < 0) {
1357
0
        warning_errno(_("failed to stat %s"), pack_tmp_name);
1358
0
      } else if (!last_mtime) {
1359
0
        last_mtime = st.st_mtime;
1360
0
      } else {
1361
0
        struct utimbuf utb;
1362
0
        utb.actime = st.st_atime;
1363
0
        utb.modtime = --last_mtime;
1364
0
        if (utime(pack_tmp_name, &utb) < 0)
1365
0
          warning_errno(_("failed utime() on %s"), pack_tmp_name);
1366
0
      }
1367
1368
0
      strbuf_addf(&tmpname, "%s-%s.", base_name,
1369
0
            hash_to_hex(hash));
1370
1371
0
      if (write_bitmap_index) {
1372
0
        bitmap_writer_init(&bitmap_writer,
1373
0
               the_repository, &to_pack);
1374
0
        bitmap_writer_set_checksum(&bitmap_writer, hash);
1375
0
        bitmap_writer_build_type_index(&bitmap_writer,
1376
0
                     written_list);
1377
0
      }
1378
1379
0
      if (cruft)
1380
0
        pack_idx_opts.flags |= WRITE_MTIMES;
1381
1382
0
      stage_tmp_packfiles(&tmpname, pack_tmp_name,
1383
0
              written_list, nr_written,
1384
0
              &to_pack, &pack_idx_opts, hash,
1385
0
              &idx_tmp_name);
1386
1387
0
      if (write_bitmap_index) {
1388
0
        size_t tmpname_len = tmpname.len;
1389
1390
0
        strbuf_addstr(&tmpname, "bitmap");
1391
0
        stop_progress(&progress_state);
1392
1393
0
        bitmap_writer_show_progress(&bitmap_writer,
1394
0
                  progress);
1395
0
        bitmap_writer_select_commits(&bitmap_writer,
1396
0
                   indexed_commits,
1397
0
                   indexed_commits_nr);
1398
0
        if (bitmap_writer_build(&bitmap_writer) < 0)
1399
0
          die(_("failed to write bitmap index"));
1400
0
        bitmap_writer_finish(&bitmap_writer,
1401
0
                 written_list,
1402
0
                 tmpname.buf, write_bitmap_options);
1403
0
        bitmap_writer_free(&bitmap_writer);
1404
0
        write_bitmap_index = 0;
1405
0
        strbuf_setlen(&tmpname, tmpname_len);
1406
0
      }
1407
1408
0
      rename_tmp_packfile_idx(&tmpname, &idx_tmp_name);
1409
1410
0
      free(idx_tmp_name);
1411
0
      strbuf_release(&tmpname);
1412
0
      free(pack_tmp_name);
1413
0
      puts(hash_to_hex(hash));
1414
0
    }
1415
1416
    /* mark written objects as written to previous pack */
1417
0
    for (j = 0; j < nr_written; j++) {
1418
0
      written_list[j]->offset = (off_t)-1;
1419
0
    }
1420
0
    nr_remaining -= nr_written;
1421
0
  } while (nr_remaining && i < to_pack.nr_objects);
1422
1423
0
  free(written_list);
1424
0
  free(write_order);
1425
0
  stop_progress(&progress_state);
1426
0
  if (written != nr_result)
1427
0
    die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1428
0
        written, nr_result);
1429
0
  trace2_data_intmax("pack-objects", the_repository,
1430
0
         "write_pack_file/wrote", nr_result);
1431
0
}
1432
1433
static int no_try_delta(const char *path)
1434
0
{
1435
0
  static struct attr_check *check;
1436
1437
0
  if (!check)
1438
0
    check = attr_check_initl("delta", NULL);
1439
0
  git_check_attr(the_repository->index, path, check);
1440
0
  if (ATTR_FALSE(check->items[0].value))
1441
0
    return 1;
1442
0
  return 0;
1443
0
}
1444
1445
/*
1446
 * When adding an object, check whether we have already added it
1447
 * to our packing list. If so, we can skip. However, if we are
1448
 * being asked to excludei t, but the previous mention was to include
1449
 * it, make sure to adjust its flags and tweak our numbers accordingly.
1450
 *
1451
 * As an optimization, we pass out the index position where we would have
1452
 * found the item, since that saves us from having to look it up again a
1453
 * few lines later when we want to add the new entry.
1454
 */
1455
static int have_duplicate_entry(const struct object_id *oid,
1456
        int exclude)
1457
0
{
1458
0
  struct object_entry *entry;
1459
1460
0
  if (reuse_packfile_bitmap &&
1461
0
      bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1462
0
    return 1;
1463
1464
0
  entry = packlist_find(&to_pack, oid);
1465
0
  if (!entry)
1466
0
    return 0;
1467
1468
0
  if (exclude) {
1469
0
    if (!entry->preferred_base)
1470
0
      nr_result--;
1471
0
    entry->preferred_base = 1;
1472
0
  }
1473
1474
0
  return 1;
1475
0
}
1476
1477
static int want_found_object(const struct object_id *oid, int exclude,
1478
           struct packed_git *p)
1479
0
{
1480
0
  if (exclude)
1481
0
    return 1;
1482
0
  if (incremental)
1483
0
    return 0;
1484
1485
0
  if (!is_pack_valid(p))
1486
0
    return -1;
1487
1488
  /*
1489
   * When asked to do --local (do not include an object that appears in a
1490
   * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1491
   * an object that appears in a pack marked with .keep), finding a pack
1492
   * that matches the criteria is sufficient for us to decide to omit it.
1493
   * However, even if this pack does not satisfy the criteria, we need to
1494
   * make sure no copy of this object appears in _any_ pack that makes us
1495
   * to omit the object, so we need to check all the packs.
1496
   *
1497
   * We can however first check whether these options can possibly matter;
1498
   * if they do not matter we know we want the object in generated pack.
1499
   * Otherwise, we signal "-1" at the end to tell the caller that we do
1500
   * not know either way, and it needs to check more packs.
1501
   */
1502
1503
  /*
1504
   * Objects in packs borrowed from elsewhere are discarded regardless of
1505
   * if they appear in other packs that weren't borrowed.
1506
   */
1507
0
  if (local && !p->pack_local)
1508
0
    return 0;
1509
1510
  /*
1511
   * Then handle .keep first, as we have a fast(er) path there.
1512
   */
1513
0
  if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1514
    /*
1515
     * Set the flags for the kept-pack cache to be the ones we want
1516
     * to ignore.
1517
     *
1518
     * That is, if we are ignoring objects in on-disk keep packs,
1519
     * then we want to search through the on-disk keep and ignore
1520
     * the in-core ones.
1521
     */
1522
0
    unsigned flags = 0;
1523
0
    if (ignore_packed_keep_on_disk)
1524
0
      flags |= ON_DISK_KEEP_PACKS;
1525
0
    if (ignore_packed_keep_in_core)
1526
0
      flags |= IN_CORE_KEEP_PACKS;
1527
1528
0
    if (ignore_packed_keep_on_disk && p->pack_keep)
1529
0
      return 0;
1530
0
    if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1531
0
      return 0;
1532
0
    if (has_object_kept_pack(oid, flags))
1533
0
      return 0;
1534
0
  }
1535
1536
  /*
1537
   * At this point we know definitively that either we don't care about
1538
   * keep-packs, or the object is not in one. Keep checking other
1539
   * conditions...
1540
   */
1541
0
  if (!local || !have_non_local_packs)
1542
0
    return 1;
1543
1544
  /* we don't know yet; keep looking for more packs */
1545
0
  return -1;
1546
0
}
1547
1548
static int want_object_in_pack_one(struct packed_git *p,
1549
           const struct object_id *oid,
1550
           int exclude,
1551
           struct packed_git **found_pack,
1552
           off_t *found_offset)
1553
0
{
1554
0
  off_t offset;
1555
1556
0
  if (p == *found_pack)
1557
0
    offset = *found_offset;
1558
0
  else
1559
0
    offset = find_pack_entry_one(oid->hash, p);
1560
1561
0
  if (offset) {
1562
0
    if (!*found_pack) {
1563
0
      if (!is_pack_valid(p))
1564
0
        return -1;
1565
0
      *found_offset = offset;
1566
0
      *found_pack = p;
1567
0
    }
1568
0
    return want_found_object(oid, exclude, p);
1569
0
  }
1570
0
  return -1;
1571
0
}
1572
1573
/*
1574
 * Check whether we want the object in the pack (e.g., we do not want
1575
 * objects found in non-local stores if the "--local" option was used).
1576
 *
1577
 * If the caller already knows an existing pack it wants to take the object
1578
 * from, that is passed in *found_pack and *found_offset; otherwise this
1579
 * function finds if there is any pack that has the object and returns the pack
1580
 * and its offset in these variables.
1581
 */
1582
static int want_object_in_pack(const struct object_id *oid,
1583
             int exclude,
1584
             struct packed_git **found_pack,
1585
             off_t *found_offset)
1586
0
{
1587
0
  int want;
1588
0
  struct list_head *pos;
1589
0
  struct multi_pack_index *m;
1590
1591
0
  if (!exclude && local && has_loose_object_nonlocal(oid))
1592
0
    return 0;
1593
1594
  /*
1595
   * If we already know the pack object lives in, start checks from that
1596
   * pack - in the usual case when neither --local was given nor .keep files
1597
   * are present we will determine the answer right now.
1598
   */
1599
0
  if (*found_pack) {
1600
0
    want = want_found_object(oid, exclude, *found_pack);
1601
0
    if (want != -1)
1602
0
      return want;
1603
1604
0
    *found_pack = NULL;
1605
0
    *found_offset = 0;
1606
0
  }
1607
1608
0
  for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1609
0
    struct pack_entry e;
1610
0
    if (fill_midx_entry(the_repository, oid, &e, m)) {
1611
0
      want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1612
0
      if (want != -1)
1613
0
        return want;
1614
0
    }
1615
0
  }
1616
1617
0
  list_for_each(pos, get_packed_git_mru(the_repository)) {
1618
0
    struct packed_git *p = list_entry(pos, struct packed_git, mru);
1619
0
    want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1620
0
    if (!exclude && want > 0)
1621
0
      list_move(&p->mru,
1622
0
          get_packed_git_mru(the_repository));
1623
0
    if (want != -1)
1624
0
      return want;
1625
0
  }
1626
1627
0
  if (uri_protocols.nr) {
1628
0
    struct configured_exclusion *ex =
1629
0
      oidmap_get(&configured_exclusions, oid);
1630
0
    int i;
1631
0
    const char *p;
1632
1633
0
    if (ex) {
1634
0
      for (i = 0; i < uri_protocols.nr; i++) {
1635
0
        if (skip_prefix(ex->uri,
1636
0
            uri_protocols.items[i].string,
1637
0
            &p) &&
1638
0
            *p == ':') {
1639
0
          oidset_insert(&excluded_by_config, oid);
1640
0
          return 0;
1641
0
        }
1642
0
      }
1643
0
    }
1644
0
  }
1645
1646
0
  return 1;
1647
0
}
1648
1649
static struct object_entry *create_object_entry(const struct object_id *oid,
1650
            enum object_type type,
1651
            uint32_t hash,
1652
            int exclude,
1653
            int no_try_delta,
1654
            struct packed_git *found_pack,
1655
            off_t found_offset)
1656
0
{
1657
0
  struct object_entry *entry;
1658
1659
0
  entry = packlist_alloc(&to_pack, oid);
1660
0
  entry->hash = hash;
1661
0
  oe_set_type(entry, type);
1662
0
  if (exclude)
1663
0
    entry->preferred_base = 1;
1664
0
  else
1665
0
    nr_result++;
1666
0
  if (found_pack) {
1667
0
    oe_set_in_pack(&to_pack, entry, found_pack);
1668
0
    entry->in_pack_offset = found_offset;
1669
0
  }
1670
1671
0
  entry->no_try_delta = no_try_delta;
1672
1673
0
  return entry;
1674
0
}
1675
1676
static const char no_closure_warning[] = N_(
1677
"disabling bitmap writing, as some objects are not being packed"
1678
);
1679
1680
static int add_object_entry(const struct object_id *oid, enum object_type type,
1681
          const char *name, int exclude)
1682
0
{
1683
0
  struct packed_git *found_pack = NULL;
1684
0
  off_t found_offset = 0;
1685
1686
0
  display_progress(progress_state, ++nr_seen);
1687
1688
0
  if (have_duplicate_entry(oid, exclude))
1689
0
    return 0;
1690
1691
0
  if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1692
    /* The pack is missing an object, so it will not have closure */
1693
0
    if (write_bitmap_index) {
1694
0
      if (write_bitmap_index != WRITE_BITMAP_QUIET)
1695
0
        warning(_(no_closure_warning));
1696
0
      write_bitmap_index = 0;
1697
0
    }
1698
0
    return 0;
1699
0
  }
1700
1701
0
  create_object_entry(oid, type, pack_name_hash(name),
1702
0
          exclude, name && no_try_delta(name),
1703
0
          found_pack, found_offset);
1704
0
  return 1;
1705
0
}
1706
1707
static int add_object_entry_from_bitmap(const struct object_id *oid,
1708
          enum object_type type,
1709
          int flags UNUSED, uint32_t name_hash,
1710
          struct packed_git *pack, off_t offset)
1711
0
{
1712
0
  display_progress(progress_state, ++nr_seen);
1713
1714
0
  if (have_duplicate_entry(oid, 0))
1715
0
    return 0;
1716
1717
0
  if (!want_object_in_pack(oid, 0, &pack, &offset))
1718
0
    return 0;
1719
1720
0
  create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1721
0
  return 1;
1722
0
}
1723
1724
struct pbase_tree_cache {
1725
  struct object_id oid;
1726
  int ref;
1727
  int temporary;
1728
  void *tree_data;
1729
  unsigned long tree_size;
1730
};
1731
1732
static struct pbase_tree_cache *(pbase_tree_cache[256]);
1733
static int pbase_tree_cache_ix(const struct object_id *oid)
1734
0
{
1735
0
  return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1736
0
}
1737
static int pbase_tree_cache_ix_incr(int ix)
1738
0
{
1739
0
  return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1740
0
}
1741
1742
static struct pbase_tree {
1743
  struct pbase_tree *next;
1744
  /* This is a phony "cache" entry; we are not
1745
   * going to evict it or find it through _get()
1746
   * mechanism -- this is for the toplevel node that
1747
   * would almost always change with any commit.
1748
   */
1749
  struct pbase_tree_cache pcache;
1750
} *pbase_tree;
1751
1752
static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1753
0
{
1754
0
  struct pbase_tree_cache *ent, *nent;
1755
0
  void *data;
1756
0
  unsigned long size;
1757
0
  enum object_type type;
1758
0
  int neigh;
1759
0
  int my_ix = pbase_tree_cache_ix(oid);
1760
0
  int available_ix = -1;
1761
1762
  /* pbase-tree-cache acts as a limited hashtable.
1763
   * your object will be found at your index or within a few
1764
   * slots after that slot if it is cached.
1765
   */
1766
0
  for (neigh = 0; neigh < 8; neigh++) {
1767
0
    ent = pbase_tree_cache[my_ix];
1768
0
    if (ent && oideq(&ent->oid, oid)) {
1769
0
      ent->ref++;
1770
0
      return ent;
1771
0
    }
1772
0
    else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1773
0
       ((0 <= available_ix) &&
1774
0
        (!ent && pbase_tree_cache[available_ix])))
1775
0
      available_ix = my_ix;
1776
0
    if (!ent)
1777
0
      break;
1778
0
    my_ix = pbase_tree_cache_ix_incr(my_ix);
1779
0
  }
1780
1781
  /* Did not find one.  Either we got a bogus request or
1782
   * we need to read and perhaps cache.
1783
   */
1784
0
  data = repo_read_object_file(the_repository, oid, &type, &size);
1785
0
  if (!data)
1786
0
    return NULL;
1787
0
  if (type != OBJ_TREE) {
1788
0
    free(data);
1789
0
    return NULL;
1790
0
  }
1791
1792
  /* We need to either cache or return a throwaway copy */
1793
1794
0
  if (available_ix < 0)
1795
0
    ent = NULL;
1796
0
  else {
1797
0
    ent = pbase_tree_cache[available_ix];
1798
0
    my_ix = available_ix;
1799
0
  }
1800
1801
0
  if (!ent) {
1802
0
    nent = xmalloc(sizeof(*nent));
1803
0
    nent->temporary = (available_ix < 0);
1804
0
  }
1805
0
  else {
1806
    /* evict and reuse */
1807
0
    free(ent->tree_data);
1808
0
    nent = ent;
1809
0
  }
1810
0
  oidcpy(&nent->oid, oid);
1811
0
  nent->tree_data = data;
1812
0
  nent->tree_size = size;
1813
0
  nent->ref = 1;
1814
0
  if (!nent->temporary)
1815
0
    pbase_tree_cache[my_ix] = nent;
1816
0
  return nent;
1817
0
}
1818
1819
static void pbase_tree_put(struct pbase_tree_cache *cache)
1820
0
{
1821
0
  if (!cache->temporary) {
1822
0
    cache->ref--;
1823
0
    return;
1824
0
  }
1825
0
  free(cache->tree_data);
1826
0
  free(cache);
1827
0
}
1828
1829
static size_t name_cmp_len(const char *name)
1830
0
{
1831
0
  return strcspn(name, "\n/");
1832
0
}
1833
1834
static void add_pbase_object(struct tree_desc *tree,
1835
           const char *name,
1836
           size_t cmplen,
1837
           const char *fullname)
1838
0
{
1839
0
  struct name_entry entry;
1840
0
  int cmp;
1841
1842
0
  while (tree_entry(tree,&entry)) {
1843
0
    if (S_ISGITLINK(entry.mode))
1844
0
      continue;
1845
0
    cmp = tree_entry_len(&entry) != cmplen ? 1 :
1846
0
          memcmp(name, entry.path, cmplen);
1847
0
    if (cmp > 0)
1848
0
      continue;
1849
0
    if (cmp < 0)
1850
0
      return;
1851
0
    if (name[cmplen] != '/') {
1852
0
      add_object_entry(&entry.oid,
1853
0
           object_type(entry.mode),
1854
0
           fullname, 1);
1855
0
      return;
1856
0
    }
1857
0
    if (S_ISDIR(entry.mode)) {
1858
0
      struct tree_desc sub;
1859
0
      struct pbase_tree_cache *tree;
1860
0
      const char *down = name+cmplen+1;
1861
0
      size_t downlen = name_cmp_len(down);
1862
1863
0
      tree = pbase_tree_get(&entry.oid);
1864
0
      if (!tree)
1865
0
        return;
1866
0
      init_tree_desc(&sub, &tree->oid,
1867
0
               tree->tree_data, tree->tree_size);
1868
1869
0
      add_pbase_object(&sub, down, downlen, fullname);
1870
0
      pbase_tree_put(tree);
1871
0
    }
1872
0
  }
1873
0
}
1874
1875
static unsigned *done_pbase_paths;
1876
static int done_pbase_paths_num;
1877
static int done_pbase_paths_alloc;
1878
static int done_pbase_path_pos(unsigned hash)
1879
0
{
1880
0
  int lo = 0;
1881
0
  int hi = done_pbase_paths_num;
1882
0
  while (lo < hi) {
1883
0
    int mi = lo + (hi - lo) / 2;
1884
0
    if (done_pbase_paths[mi] == hash)
1885
0
      return mi;
1886
0
    if (done_pbase_paths[mi] < hash)
1887
0
      hi = mi;
1888
0
    else
1889
0
      lo = mi + 1;
1890
0
  }
1891
0
  return -lo-1;
1892
0
}
1893
1894
static int check_pbase_path(unsigned hash)
1895
0
{
1896
0
  int pos = done_pbase_path_pos(hash);
1897
0
  if (0 <= pos)
1898
0
    return 1;
1899
0
  pos = -pos - 1;
1900
0
  ALLOC_GROW(done_pbase_paths,
1901
0
       done_pbase_paths_num + 1,
1902
0
       done_pbase_paths_alloc);
1903
0
  done_pbase_paths_num++;
1904
0
  if (pos < done_pbase_paths_num)
1905
0
    MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1906
0
         done_pbase_paths_num - pos - 1);
1907
0
  done_pbase_paths[pos] = hash;
1908
0
  return 0;
1909
0
}
1910
1911
static void add_preferred_base_object(const char *name)
1912
0
{
1913
0
  struct pbase_tree *it;
1914
0
  size_t cmplen;
1915
0
  unsigned hash = pack_name_hash(name);
1916
1917
0
  if (!num_preferred_base || check_pbase_path(hash))
1918
0
    return;
1919
1920
0
  cmplen = name_cmp_len(name);
1921
0
  for (it = pbase_tree; it; it = it->next) {
1922
0
    if (cmplen == 0) {
1923
0
      add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1924
0
    }
1925
0
    else {
1926
0
      struct tree_desc tree;
1927
0
      init_tree_desc(&tree, &it->pcache.oid,
1928
0
               it->pcache.tree_data, it->pcache.tree_size);
1929
0
      add_pbase_object(&tree, name, cmplen, name);
1930
0
    }
1931
0
  }
1932
0
}
1933
1934
static void add_preferred_base(struct object_id *oid)
1935
0
{
1936
0
  struct pbase_tree *it;
1937
0
  void *data;
1938
0
  unsigned long size;
1939
0
  struct object_id tree_oid;
1940
1941
0
  if (window <= num_preferred_base++)
1942
0
    return;
1943
1944
0
  data = read_object_with_reference(the_repository, oid,
1945
0
            OBJ_TREE, &size, &tree_oid);
1946
0
  if (!data)
1947
0
    return;
1948
1949
0
  for (it = pbase_tree; it; it = it->next) {
1950
0
    if (oideq(&it->pcache.oid, &tree_oid)) {
1951
0
      free(data);
1952
0
      return;
1953
0
    }
1954
0
  }
1955
1956
0
  CALLOC_ARRAY(it, 1);
1957
0
  it->next = pbase_tree;
1958
0
  pbase_tree = it;
1959
1960
0
  oidcpy(&it->pcache.oid, &tree_oid);
1961
0
  it->pcache.tree_data = data;
1962
0
  it->pcache.tree_size = size;
1963
0
}
1964
1965
static void cleanup_preferred_base(void)
1966
0
{
1967
0
  struct pbase_tree *it;
1968
0
  unsigned i;
1969
1970
0
  it = pbase_tree;
1971
0
  pbase_tree = NULL;
1972
0
  while (it) {
1973
0
    struct pbase_tree *tmp = it;
1974
0
    it = tmp->next;
1975
0
    free(tmp->pcache.tree_data);
1976
0
    free(tmp);
1977
0
  }
1978
1979
0
  for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1980
0
    if (!pbase_tree_cache[i])
1981
0
      continue;
1982
0
    free(pbase_tree_cache[i]->tree_data);
1983
0
    FREE_AND_NULL(pbase_tree_cache[i]);
1984
0
  }
1985
1986
0
  FREE_AND_NULL(done_pbase_paths);
1987
0
  done_pbase_paths_num = done_pbase_paths_alloc = 0;
1988
0
}
1989
1990
/*
1991
 * Return 1 iff the object specified by "delta" can be sent
1992
 * literally as a delta against the base in "base_sha1". If
1993
 * so, then *base_out will point to the entry in our packing
1994
 * list, or NULL if we must use the external-base list.
1995
 *
1996
 * Depth value does not matter - find_deltas() will
1997
 * never consider reused delta as the base object to
1998
 * deltify other objects against, in order to avoid
1999
 * circular deltas.
2000
 */
2001
static int can_reuse_delta(const struct object_id *base_oid,
2002
         struct object_entry *delta,
2003
         struct object_entry **base_out)
2004
0
{
2005
0
  struct object_entry *base;
2006
2007
  /*
2008
   * First see if we're already sending the base (or it's explicitly in
2009
   * our "excluded" list).
2010
   */
2011
0
  base = packlist_find(&to_pack, base_oid);
2012
0
  if (base) {
2013
0
    if (!in_same_island(&delta->idx.oid, &base->idx.oid))
2014
0
      return 0;
2015
0
    *base_out = base;
2016
0
    return 1;
2017
0
  }
2018
2019
  /*
2020
   * Otherwise, reachability bitmaps may tell us if the receiver has it,
2021
   * even if it was buried too deep in history to make it into the
2022
   * packing list.
2023
   */
2024
0
  if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
2025
0
    if (use_delta_islands) {
2026
0
      if (!in_same_island(&delta->idx.oid, base_oid))
2027
0
        return 0;
2028
0
    }
2029
0
    *base_out = NULL;
2030
0
    return 1;
2031
0
  }
2032
2033
0
  return 0;
2034
0
}
2035
2036
0
static void prefetch_to_pack(uint32_t object_index_start) {
2037
0
  struct oid_array to_fetch = OID_ARRAY_INIT;
2038
0
  uint32_t i;
2039
2040
0
  for (i = object_index_start; i < to_pack.nr_objects; i++) {
2041
0
    struct object_entry *entry = to_pack.objects + i;
2042
2043
0
    if (!oid_object_info_extended(the_repository,
2044
0
                &entry->idx.oid,
2045
0
                NULL,
2046
0
                OBJECT_INFO_FOR_PREFETCH))
2047
0
      continue;
2048
0
    oid_array_append(&to_fetch, &entry->idx.oid);
2049
0
  }
2050
0
  promisor_remote_get_direct(the_repository,
2051
0
           to_fetch.oid, to_fetch.nr);
2052
0
  oid_array_clear(&to_fetch);
2053
0
}
2054
2055
static void check_object(struct object_entry *entry, uint32_t object_index)
2056
0
{
2057
0
  unsigned long canonical_size;
2058
0
  enum object_type type;
2059
0
  struct object_info oi = {.typep = &type, .sizep = &canonical_size};
2060
2061
0
  if (IN_PACK(entry)) {
2062
0
    struct packed_git *p = IN_PACK(entry);
2063
0
    struct pack_window *w_curs = NULL;
2064
0
    int have_base = 0;
2065
0
    struct object_id base_ref;
2066
0
    struct object_entry *base_entry;
2067
0
    unsigned long used, used_0;
2068
0
    unsigned long avail;
2069
0
    off_t ofs;
2070
0
    unsigned char *buf, c;
2071
0
    enum object_type type;
2072
0
    unsigned long in_pack_size;
2073
2074
0
    buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
2075
2076
    /*
2077
     * We want in_pack_type even if we do not reuse delta
2078
     * since non-delta representations could still be reused.
2079
     */
2080
0
    used = unpack_object_header_buffer(buf, avail,
2081
0
               &type,
2082
0
               &in_pack_size);
2083
0
    if (used == 0)
2084
0
      goto give_up;
2085
2086
0
    if (type < 0)
2087
0
      BUG("invalid type %d", type);
2088
0
    entry->in_pack_type = type;
2089
2090
    /*
2091
     * Determine if this is a delta and if so whether we can
2092
     * reuse it or not.  Otherwise let's find out as cheaply as
2093
     * possible what the actual type and size for this object is.
2094
     */
2095
0
    switch (entry->in_pack_type) {
2096
0
    default:
2097
      /* Not a delta hence we've already got all we need. */
2098
0
      oe_set_type(entry, entry->in_pack_type);
2099
0
      SET_SIZE(entry, in_pack_size);
2100
0
      entry->in_pack_header_size = used;
2101
0
      if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
2102
0
        goto give_up;
2103
0
      unuse_pack(&w_curs);
2104
0
      return;
2105
0
    case OBJ_REF_DELTA:
2106
0
      if (reuse_delta && !entry->preferred_base) {
2107
0
        oidread(&base_ref,
2108
0
          use_pack(p, &w_curs,
2109
0
             entry->in_pack_offset + used,
2110
0
             NULL),
2111
0
          the_repository->hash_algo);
2112
0
        have_base = 1;
2113
0
      }
2114
0
      entry->in_pack_header_size = used + the_hash_algo->rawsz;
2115
0
      break;
2116
0
    case OBJ_OFS_DELTA:
2117
0
      buf = use_pack(p, &w_curs,
2118
0
               entry->in_pack_offset + used, NULL);
2119
0
      used_0 = 0;
2120
0
      c = buf[used_0++];
2121
0
      ofs = c & 127;
2122
0
      while (c & 128) {
2123
0
        ofs += 1;
2124
0
        if (!ofs || MSB(ofs, 7)) {
2125
0
          error(_("delta base offset overflow in pack for %s"),
2126
0
                oid_to_hex(&entry->idx.oid));
2127
0
          goto give_up;
2128
0
        }
2129
0
        c = buf[used_0++];
2130
0
        ofs = (ofs << 7) + (c & 127);
2131
0
      }
2132
0
      ofs = entry->in_pack_offset - ofs;
2133
0
      if (ofs <= 0 || ofs >= entry->in_pack_offset) {
2134
0
        error(_("delta base offset out of bound for %s"),
2135
0
              oid_to_hex(&entry->idx.oid));
2136
0
        goto give_up;
2137
0
      }
2138
0
      if (reuse_delta && !entry->preferred_base) {
2139
0
        uint32_t pos;
2140
0
        if (offset_to_pack_pos(p, ofs, &pos) < 0)
2141
0
          goto give_up;
2142
0
        if (!nth_packed_object_id(&base_ref, p,
2143
0
                pack_pos_to_index(p, pos)))
2144
0
          have_base = 1;
2145
0
      }
2146
0
      entry->in_pack_header_size = used + used_0;
2147
0
      break;
2148
0
    }
2149
2150
0
    if (have_base &&
2151
0
        can_reuse_delta(&base_ref, entry, &base_entry)) {
2152
0
      oe_set_type(entry, entry->in_pack_type);
2153
0
      SET_SIZE(entry, in_pack_size); /* delta size */
2154
0
      SET_DELTA_SIZE(entry, in_pack_size);
2155
2156
0
      if (base_entry) {
2157
0
        SET_DELTA(entry, base_entry);
2158
0
        entry->delta_sibling_idx = base_entry->delta_child_idx;
2159
0
        SET_DELTA_CHILD(base_entry, entry);
2160
0
      } else {
2161
0
        SET_DELTA_EXT(entry, &base_ref);
2162
0
      }
2163
2164
0
      unuse_pack(&w_curs);
2165
0
      return;
2166
0
    }
2167
2168
0
    if (oe_type(entry)) {
2169
0
      off_t delta_pos;
2170
2171
      /*
2172
       * This must be a delta and we already know what the
2173
       * final object type is.  Let's extract the actual
2174
       * object size from the delta header.
2175
       */
2176
0
      delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2177
0
      canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2178
0
      if (canonical_size == 0)
2179
0
        goto give_up;
2180
0
      SET_SIZE(entry, canonical_size);
2181
0
      unuse_pack(&w_curs);
2182
0
      return;
2183
0
    }
2184
2185
    /*
2186
     * No choice but to fall back to the recursive delta walk
2187
     * with oid_object_info() to find about the object type
2188
     * at this point...
2189
     */
2190
0
    give_up:
2191
0
    unuse_pack(&w_curs);
2192
0
  }
2193
2194
0
  if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2195
0
             OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2196
0
    if (repo_has_promisor_remote(the_repository)) {
2197
0
      prefetch_to_pack(object_index);
2198
0
      if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2199
0
                 OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2200
0
        type = -1;
2201
0
    } else {
2202
0
      type = -1;
2203
0
    }
2204
0
  }
2205
0
  oe_set_type(entry, type);
2206
0
  if (entry->type_valid) {
2207
0
    SET_SIZE(entry, canonical_size);
2208
0
  } else {
2209
    /*
2210
     * Bad object type is checked in prepare_pack().  This is
2211
     * to permit a missing preferred base object to be ignored
2212
     * as a preferred base.  Doing so can result in a larger
2213
     * pack file, but the transfer will still take place.
2214
     */
2215
0
  }
2216
0
}
2217
2218
static int pack_offset_sort(const void *_a, const void *_b)
2219
0
{
2220
0
  const struct object_entry *a = *(struct object_entry **)_a;
2221
0
  const struct object_entry *b = *(struct object_entry **)_b;
2222
0
  const struct packed_git *a_in_pack = IN_PACK(a);
2223
0
  const struct packed_git *b_in_pack = IN_PACK(b);
2224
2225
  /* avoid filesystem trashing with loose objects */
2226
0
  if (!a_in_pack && !b_in_pack)
2227
0
    return oidcmp(&a->idx.oid, &b->idx.oid);
2228
2229
0
  if (a_in_pack < b_in_pack)
2230
0
    return -1;
2231
0
  if (a_in_pack > b_in_pack)
2232
0
    return 1;
2233
0
  return a->in_pack_offset < b->in_pack_offset ? -1 :
2234
0
      (a->in_pack_offset > b->in_pack_offset);
2235
0
}
2236
2237
/*
2238
 * Drop an on-disk delta we were planning to reuse. Naively, this would
2239
 * just involve blanking out the "delta" field, but we have to deal
2240
 * with some extra book-keeping:
2241
 *
2242
 *   1. Removing ourselves from the delta_sibling linked list.
2243
 *
2244
 *   2. Updating our size/type to the non-delta representation. These were
2245
 *      either not recorded initially (size) or overwritten with the delta type
2246
 *      (type) when check_object() decided to reuse the delta.
2247
 *
2248
 *   3. Resetting our delta depth, as we are now a base object.
2249
 */
2250
static void drop_reused_delta(struct object_entry *entry)
2251
0
{
2252
0
  unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2253
0
  struct object_info oi = OBJECT_INFO_INIT;
2254
0
  enum object_type type;
2255
0
  unsigned long size;
2256
2257
0
  while (*idx) {
2258
0
    struct object_entry *oe = &to_pack.objects[*idx - 1];
2259
2260
0
    if (oe == entry)
2261
0
      *idx = oe->delta_sibling_idx;
2262
0
    else
2263
0
      idx = &oe->delta_sibling_idx;
2264
0
  }
2265
0
  SET_DELTA(entry, NULL);
2266
0
  entry->depth = 0;
2267
2268
0
  oi.sizep = &size;
2269
0
  oi.typep = &type;
2270
0
  if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2271
    /*
2272
     * We failed to get the info from this pack for some reason;
2273
     * fall back to oid_object_info, which may find another copy.
2274
     * And if that fails, the error will be recorded in oe_type(entry)
2275
     * and dealt with in prepare_pack().
2276
     */
2277
0
    oe_set_type(entry,
2278
0
          oid_object_info(the_repository, &entry->idx.oid, &size));
2279
0
  } else {
2280
0
    oe_set_type(entry, type);
2281
0
  }
2282
0
  SET_SIZE(entry, size);
2283
0
}
2284
2285
/*
2286
 * Follow the chain of deltas from this entry onward, throwing away any links
2287
 * that cause us to hit a cycle (as determined by the DFS state flags in
2288
 * the entries).
2289
 *
2290
 * We also detect too-long reused chains that would violate our --depth
2291
 * limit.
2292
 */
2293
static void break_delta_chains(struct object_entry *entry)
2294
0
{
2295
  /*
2296
   * The actual depth of each object we will write is stored as an int,
2297
   * as it cannot exceed our int "depth" limit. But before we break
2298
   * changes based no that limit, we may potentially go as deep as the
2299
   * number of objects, which is elsewhere bounded to a uint32_t.
2300
   */
2301
0
  uint32_t total_depth;
2302
0
  struct object_entry *cur, *next;
2303
2304
0
  for (cur = entry, total_depth = 0;
2305
0
       cur;
2306
0
       cur = DELTA(cur), total_depth++) {
2307
0
    if (cur->dfs_state == DFS_DONE) {
2308
      /*
2309
       * We've already seen this object and know it isn't
2310
       * part of a cycle. We do need to append its depth
2311
       * to our count.
2312
       */
2313
0
      total_depth += cur->depth;
2314
0
      break;
2315
0
    }
2316
2317
    /*
2318
     * We break cycles before looping, so an ACTIVE state (or any
2319
     * other cruft which made its way into the state variable)
2320
     * is a bug.
2321
     */
2322
0
    if (cur->dfs_state != DFS_NONE)
2323
0
      BUG("confusing delta dfs state in first pass: %d",
2324
0
          cur->dfs_state);
2325
2326
    /*
2327
     * Now we know this is the first time we've seen the object. If
2328
     * it's not a delta, we're done traversing, but we'll mark it
2329
     * done to save time on future traversals.
2330
     */
2331
0
    if (!DELTA(cur)) {
2332
0
      cur->dfs_state = DFS_DONE;
2333
0
      break;
2334
0
    }
2335
2336
    /*
2337
     * Mark ourselves as active and see if the next step causes
2338
     * us to cycle to another active object. It's important to do
2339
     * this _before_ we loop, because it impacts where we make the
2340
     * cut, and thus how our total_depth counter works.
2341
     * E.g., We may see a partial loop like:
2342
     *
2343
     *   A -> B -> C -> D -> B
2344
     *
2345
     * Cutting B->C breaks the cycle. But now the depth of A is
2346
     * only 1, and our total_depth counter is at 3. The size of the
2347
     * error is always one less than the size of the cycle we
2348
     * broke. Commits C and D were "lost" from A's chain.
2349
     *
2350
     * If we instead cut D->B, then the depth of A is correct at 3.
2351
     * We keep all commits in the chain that we examined.
2352
     */
2353
0
    cur->dfs_state = DFS_ACTIVE;
2354
0
    if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2355
0
      drop_reused_delta(cur);
2356
0
      cur->dfs_state = DFS_DONE;
2357
0
      break;
2358
0
    }
2359
0
  }
2360
2361
  /*
2362
   * And now that we've gone all the way to the bottom of the chain, we
2363
   * need to clear the active flags and set the depth fields as
2364
   * appropriate. Unlike the loop above, which can quit when it drops a
2365
   * delta, we need to keep going to look for more depth cuts. So we need
2366
   * an extra "next" pointer to keep going after we reset cur->delta.
2367
   */
2368
0
  for (cur = entry; cur; cur = next) {
2369
0
    next = DELTA(cur);
2370
2371
    /*
2372
     * We should have a chain of zero or more ACTIVE states down to
2373
     * a final DONE. We can quit after the DONE, because either it
2374
     * has no bases, or we've already handled them in a previous
2375
     * call.
2376
     */
2377
0
    if (cur->dfs_state == DFS_DONE)
2378
0
      break;
2379
0
    else if (cur->dfs_state != DFS_ACTIVE)
2380
0
      BUG("confusing delta dfs state in second pass: %d",
2381
0
          cur->dfs_state);
2382
2383
    /*
2384
     * If the total_depth is more than depth, then we need to snip
2385
     * the chain into two or more smaller chains that don't exceed
2386
     * the maximum depth. Most of the resulting chains will contain
2387
     * (depth + 1) entries (i.e., depth deltas plus one base), and
2388
     * the last chain (i.e., the one containing entry) will contain
2389
     * whatever entries are left over, namely
2390
     * (total_depth % (depth + 1)) of them.
2391
     *
2392
     * Since we are iterating towards decreasing depth, we need to
2393
     * decrement total_depth as we go, and we need to write to the
2394
     * entry what its final depth will be after all of the
2395
     * snipping. Since we're snipping into chains of length (depth
2396
     * + 1) entries, the final depth of an entry will be its
2397
     * original depth modulo (depth + 1). Any time we encounter an
2398
     * entry whose final depth is supposed to be zero, we snip it
2399
     * from its delta base, thereby making it so.
2400
     */
2401
0
    cur->depth = (total_depth--) % (depth + 1);
2402
0
    if (!cur->depth)
2403
0
      drop_reused_delta(cur);
2404
2405
0
    cur->dfs_state = DFS_DONE;
2406
0
  }
2407
0
}
2408
2409
static void get_object_details(void)
2410
0
{
2411
0
  uint32_t i;
2412
0
  struct object_entry **sorted_by_offset;
2413
2414
0
  if (progress)
2415
0
    progress_state = start_progress(_("Counting objects"),
2416
0
            to_pack.nr_objects);
2417
2418
0
  CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2419
0
  for (i = 0; i < to_pack.nr_objects; i++)
2420
0
    sorted_by_offset[i] = to_pack.objects + i;
2421
0
  QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2422
2423
0
  for (i = 0; i < to_pack.nr_objects; i++) {
2424
0
    struct object_entry *entry = sorted_by_offset[i];
2425
0
    check_object(entry, i);
2426
0
    if (entry->type_valid &&
2427
0
        oe_size_greater_than(&to_pack, entry, big_file_threshold))
2428
0
      entry->no_try_delta = 1;
2429
0
    display_progress(progress_state, i + 1);
2430
0
  }
2431
0
  stop_progress(&progress_state);
2432
2433
  /*
2434
   * This must happen in a second pass, since we rely on the delta
2435
   * information for the whole list being completed.
2436
   */
2437
0
  for (i = 0; i < to_pack.nr_objects; i++)
2438
0
    break_delta_chains(&to_pack.objects[i]);
2439
2440
0
  free(sorted_by_offset);
2441
0
}
2442
2443
/*
2444
 * We search for deltas in a list sorted by type, by filename hash, and then
2445
 * by size, so that we see progressively smaller and smaller files.
2446
 * That's because we prefer deltas to be from the bigger file
2447
 * to the smaller -- deletes are potentially cheaper, but perhaps
2448
 * more importantly, the bigger file is likely the more recent
2449
 * one.  The deepest deltas are therefore the oldest objects which are
2450
 * less susceptible to be accessed often.
2451
 */
2452
static int type_size_sort(const void *_a, const void *_b)
2453
0
{
2454
0
  const struct object_entry *a = *(struct object_entry **)_a;
2455
0
  const struct object_entry *b = *(struct object_entry **)_b;
2456
0
  const enum object_type a_type = oe_type(a);
2457
0
  const enum object_type b_type = oe_type(b);
2458
0
  const unsigned long a_size = SIZE(a);
2459
0
  const unsigned long b_size = SIZE(b);
2460
2461
0
  if (a_type > b_type)
2462
0
    return -1;
2463
0
  if (a_type < b_type)
2464
0
    return 1;
2465
0
  if (a->hash > b->hash)
2466
0
    return -1;
2467
0
  if (a->hash < b->hash)
2468
0
    return 1;
2469
0
  if (a->preferred_base > b->preferred_base)
2470
0
    return -1;
2471
0
  if (a->preferred_base < b->preferred_base)
2472
0
    return 1;
2473
0
  if (use_delta_islands) {
2474
0
    const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2475
0
    if (island_cmp)
2476
0
      return island_cmp;
2477
0
  }
2478
0
  if (a_size > b_size)
2479
0
    return -1;
2480
0
  if (a_size < b_size)
2481
0
    return 1;
2482
0
  return a < b ? -1 : (a > b);  /* newest first */
2483
0
}
2484
2485
struct unpacked {
2486
  struct object_entry *entry;
2487
  void *data;
2488
  struct delta_index *index;
2489
  unsigned depth;
2490
};
2491
2492
static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2493
         unsigned long delta_size)
2494
0
{
2495
0
  if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2496
0
    return 0;
2497
2498
0
  if (delta_size < cache_max_small_delta_size)
2499
0
    return 1;
2500
2501
  /* cache delta, if objects are large enough compared to delta size */
2502
0
  if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2503
0
    return 1;
2504
2505
0
  return 0;
2506
0
}
2507
2508
/* Protect delta_cache_size */
2509
static pthread_mutex_t cache_mutex;
2510
0
#define cache_lock()    pthread_mutex_lock(&cache_mutex)
2511
0
#define cache_unlock()    pthread_mutex_unlock(&cache_mutex)
2512
2513
/*
2514
 * Protect object list partitioning (e.g. struct thread_param) and
2515
 * progress_state
2516
 */
2517
static pthread_mutex_t progress_mutex;
2518
0
#define progress_lock()   pthread_mutex_lock(&progress_mutex)
2519
0
#define progress_unlock() pthread_mutex_unlock(&progress_mutex)
2520
2521
/*
2522
 * Access to struct object_entry is unprotected since each thread owns
2523
 * a portion of the main object list. Just don't access object entries
2524
 * ahead in the list because they can be stolen and would need
2525
 * progress_mutex for protection.
2526
 */
2527
2528
static inline int oe_size_less_than(struct packing_data *pack,
2529
            const struct object_entry *lhs,
2530
            unsigned long rhs)
2531
0
{
2532
0
  if (lhs->size_valid)
2533
0
    return lhs->size_ < rhs;
2534
0
  if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2535
0
    return 0;
2536
0
  return oe_get_size_slow(pack, lhs) < rhs;
2537
0
}
2538
2539
static inline void oe_set_tree_depth(struct packing_data *pack,
2540
             struct object_entry *e,
2541
             unsigned int tree_depth)
2542
0
{
2543
0
  if (!pack->tree_depth)
2544
0
    CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2545
0
  pack->tree_depth[e - pack->objects] = tree_depth;
2546
0
}
2547
2548
/*
2549
 * Return the size of the object without doing any delta
2550
 * reconstruction (so non-deltas are true object sizes, but deltas
2551
 * return the size of the delta data).
2552
 */
2553
unsigned long oe_get_size_slow(struct packing_data *pack,
2554
             const struct object_entry *e)
2555
0
{
2556
0
  struct packed_git *p;
2557
0
  struct pack_window *w_curs;
2558
0
  unsigned char *buf;
2559
0
  enum object_type type;
2560
0
  unsigned long used, avail, size;
2561
2562
0
  if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2563
0
    packing_data_lock(&to_pack);
2564
0
    if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2565
0
      die(_("unable to get size of %s"),
2566
0
          oid_to_hex(&e->idx.oid));
2567
0
    packing_data_unlock(&to_pack);
2568
0
    return size;
2569
0
  }
2570
2571
0
  p = oe_in_pack(pack, e);
2572
0
  if (!p)
2573
0
    BUG("when e->type is a delta, it must belong to a pack");
2574
2575
0
  packing_data_lock(&to_pack);
2576
0
  w_curs = NULL;
2577
0
  buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2578
0
  used = unpack_object_header_buffer(buf, avail, &type, &size);
2579
0
  if (used == 0)
2580
0
    die(_("unable to parse object header of %s"),
2581
0
        oid_to_hex(&e->idx.oid));
2582
2583
0
  unuse_pack(&w_curs);
2584
0
  packing_data_unlock(&to_pack);
2585
0
  return size;
2586
0
}
2587
2588
static int try_delta(struct unpacked *trg, struct unpacked *src,
2589
         unsigned max_depth, unsigned long *mem_usage)
2590
0
{
2591
0
  struct object_entry *trg_entry = trg->entry;
2592
0
  struct object_entry *src_entry = src->entry;
2593
0
  unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2594
0
  unsigned ref_depth;
2595
0
  enum object_type type;
2596
0
  void *delta_buf;
2597
2598
  /* Don't bother doing diffs between different types */
2599
0
  if (oe_type(trg_entry) != oe_type(src_entry))
2600
0
    return -1;
2601
2602
  /*
2603
   * We do not bother to try a delta that we discarded on an
2604
   * earlier try, but only when reusing delta data.  Note that
2605
   * src_entry that is marked as the preferred_base should always
2606
   * be considered, as even if we produce a suboptimal delta against
2607
   * it, we will still save the transfer cost, as we already know
2608
   * the other side has it and we won't send src_entry at all.
2609
   */
2610
0
  if (reuse_delta && IN_PACK(trg_entry) &&
2611
0
      IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2612
0
      !src_entry->preferred_base &&
2613
0
      trg_entry->in_pack_type != OBJ_REF_DELTA &&
2614
0
      trg_entry->in_pack_type != OBJ_OFS_DELTA)
2615
0
    return 0;
2616
2617
  /* Let's not bust the allowed depth. */
2618
0
  if (src->depth >= max_depth)
2619
0
    return 0;
2620
2621
  /* Now some size filtering heuristics. */
2622
0
  trg_size = SIZE(trg_entry);
2623
0
  if (!DELTA(trg_entry)) {
2624
0
    max_size = trg_size/2 - the_hash_algo->rawsz;
2625
0
    ref_depth = 1;
2626
0
  } else {
2627
0
    max_size = DELTA_SIZE(trg_entry);
2628
0
    ref_depth = trg->depth;
2629
0
  }
2630
0
  max_size = (uint64_t)max_size * (max_depth - src->depth) /
2631
0
            (max_depth - ref_depth + 1);
2632
0
  if (max_size == 0)
2633
0
    return 0;
2634
0
  src_size = SIZE(src_entry);
2635
0
  sizediff = src_size < trg_size ? trg_size - src_size : 0;
2636
0
  if (sizediff >= max_size)
2637
0
    return 0;
2638
0
  if (trg_size < src_size / 32)
2639
0
    return 0;
2640
2641
0
  if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2642
0
    return 0;
2643
2644
  /* Load data if not already done */
2645
0
  if (!trg->data) {
2646
0
    packing_data_lock(&to_pack);
2647
0
    trg->data = repo_read_object_file(the_repository,
2648
0
              &trg_entry->idx.oid, &type,
2649
0
              &sz);
2650
0
    packing_data_unlock(&to_pack);
2651
0
    if (!trg->data)
2652
0
      die(_("object %s cannot be read"),
2653
0
          oid_to_hex(&trg_entry->idx.oid));
2654
0
    if (sz != trg_size)
2655
0
      die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2656
0
          oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2657
0
          (uintmax_t)trg_size);
2658
0
    *mem_usage += sz;
2659
0
  }
2660
0
  if (!src->data) {
2661
0
    packing_data_lock(&to_pack);
2662
0
    src->data = repo_read_object_file(the_repository,
2663
0
              &src_entry->idx.oid, &type,
2664
0
              &sz);
2665
0
    packing_data_unlock(&to_pack);
2666
0
    if (!src->data) {
2667
0
      if (src_entry->preferred_base) {
2668
0
        static int warned = 0;
2669
0
        if (!warned++)
2670
0
          warning(_("object %s cannot be read"),
2671
0
            oid_to_hex(&src_entry->idx.oid));
2672
        /*
2673
         * Those objects are not included in the
2674
         * resulting pack.  Be resilient and ignore
2675
         * them if they can't be read, in case the
2676
         * pack could be created nevertheless.
2677
         */
2678
0
        return 0;
2679
0
      }
2680
0
      die(_("object %s cannot be read"),
2681
0
          oid_to_hex(&src_entry->idx.oid));
2682
0
    }
2683
0
    if (sz != src_size)
2684
0
      die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2685
0
          oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2686
0
          (uintmax_t)src_size);
2687
0
    *mem_usage += sz;
2688
0
  }
2689
0
  if (!src->index) {
2690
0
    src->index = create_delta_index(src->data, src_size);
2691
0
    if (!src->index) {
2692
0
      static int warned = 0;
2693
0
      if (!warned++)
2694
0
        warning(_("suboptimal pack - out of memory"));
2695
0
      return 0;
2696
0
    }
2697
0
    *mem_usage += sizeof_delta_index(src->index);
2698
0
  }
2699
2700
0
  delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2701
0
  if (!delta_buf)
2702
0
    return 0;
2703
2704
0
  if (DELTA(trg_entry)) {
2705
    /* Prefer only shallower same-sized deltas. */
2706
0
    if (delta_size == DELTA_SIZE(trg_entry) &&
2707
0
        src->depth + 1 >= trg->depth) {
2708
0
      free(delta_buf);
2709
0
      return 0;
2710
0
    }
2711
0
  }
2712
2713
  /*
2714
   * Handle memory allocation outside of the cache
2715
   * accounting lock.  Compiler will optimize the strangeness
2716
   * away when NO_PTHREADS is defined.
2717
   */
2718
0
  free(trg_entry->delta_data);
2719
0
  cache_lock();
2720
0
  if (trg_entry->delta_data) {
2721
0
    delta_cache_size -= DELTA_SIZE(trg_entry);
2722
0
    trg_entry->delta_data = NULL;
2723
0
  }
2724
0
  if (delta_cacheable(src_size, trg_size, delta_size)) {
2725
0
    delta_cache_size += delta_size;
2726
0
    cache_unlock();
2727
0
    trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2728
0
  } else {
2729
0
    cache_unlock();
2730
0
    free(delta_buf);
2731
0
  }
2732
2733
0
  SET_DELTA(trg_entry, src_entry);
2734
0
  SET_DELTA_SIZE(trg_entry, delta_size);
2735
0
  trg->depth = src->depth + 1;
2736
2737
0
  return 1;
2738
0
}
2739
2740
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2741
0
{
2742
0
  struct object_entry *child = DELTA_CHILD(me);
2743
0
  unsigned int m = n;
2744
0
  while (child) {
2745
0
    const unsigned int c = check_delta_limit(child, n + 1);
2746
0
    if (m < c)
2747
0
      m = c;
2748
0
    child = DELTA_SIBLING(child);
2749
0
  }
2750
0
  return m;
2751
0
}
2752
2753
static unsigned long free_unpacked(struct unpacked *n)
2754
0
{
2755
0
  unsigned long freed_mem = sizeof_delta_index(n->index);
2756
0
  free_delta_index(n->index);
2757
0
  n->index = NULL;
2758
0
  if (n->data) {
2759
0
    freed_mem += SIZE(n->entry);
2760
0
    FREE_AND_NULL(n->data);
2761
0
  }
2762
0
  n->entry = NULL;
2763
0
  n->depth = 0;
2764
0
  return freed_mem;
2765
0
}
2766
2767
static void find_deltas(struct object_entry **list, unsigned *list_size,
2768
      int window, int depth, unsigned *processed)
2769
0
{
2770
0
  uint32_t i, idx = 0, count = 0;
2771
0
  struct unpacked *array;
2772
0
  unsigned long mem_usage = 0;
2773
2774
0
  CALLOC_ARRAY(array, window);
2775
2776
0
  for (;;) {
2777
0
    struct object_entry *entry;
2778
0
    struct unpacked *n = array + idx;
2779
0
    int j, max_depth, best_base = -1;
2780
2781
0
    progress_lock();
2782
0
    if (!*list_size) {
2783
0
      progress_unlock();
2784
0
      break;
2785
0
    }
2786
0
    entry = *list++;
2787
0
    (*list_size)--;
2788
0
    if (!entry->preferred_base) {
2789
0
      (*processed)++;
2790
0
      display_progress(progress_state, *processed);
2791
0
    }
2792
0
    progress_unlock();
2793
2794
0
    mem_usage -= free_unpacked(n);
2795
0
    n->entry = entry;
2796
2797
0
    while (window_memory_limit &&
2798
0
           mem_usage > window_memory_limit &&
2799
0
           count > 1) {
2800
0
      const uint32_t tail = (idx + window - count) % window;
2801
0
      mem_usage -= free_unpacked(array + tail);
2802
0
      count--;
2803
0
    }
2804
2805
    /* We do not compute delta to *create* objects we are not
2806
     * going to pack.
2807
     */
2808
0
    if (entry->preferred_base)
2809
0
      goto next;
2810
2811
    /*
2812
     * If the current object is at pack edge, take the depth the
2813
     * objects that depend on the current object into account
2814
     * otherwise they would become too deep.
2815
     */
2816
0
    max_depth = depth;
2817
0
    if (DELTA_CHILD(entry)) {
2818
0
      max_depth -= check_delta_limit(entry, 0);
2819
0
      if (max_depth <= 0)
2820
0
        goto next;
2821
0
    }
2822
2823
0
    j = window;
2824
0
    while (--j > 0) {
2825
0
      int ret;
2826
0
      uint32_t other_idx = idx + j;
2827
0
      struct unpacked *m;
2828
0
      if (other_idx >= window)
2829
0
        other_idx -= window;
2830
0
      m = array + other_idx;
2831
0
      if (!m->entry)
2832
0
        break;
2833
0
      ret = try_delta(n, m, max_depth, &mem_usage);
2834
0
      if (ret < 0)
2835
0
        break;
2836
0
      else if (ret > 0)
2837
0
        best_base = other_idx;
2838
0
    }
2839
2840
    /*
2841
     * If we decided to cache the delta data, then it is best
2842
     * to compress it right away.  First because we have to do
2843
     * it anyway, and doing it here while we're threaded will
2844
     * save a lot of time in the non threaded write phase,
2845
     * as well as allow for caching more deltas within
2846
     * the same cache size limit.
2847
     * ...
2848
     * But only if not writing to stdout, since in that case
2849
     * the network is most likely throttling writes anyway,
2850
     * and therefore it is best to go to the write phase ASAP
2851
     * instead, as we can afford spending more time compressing
2852
     * between writes at that moment.
2853
     */
2854
0
    if (entry->delta_data && !pack_to_stdout) {
2855
0
      unsigned long size;
2856
2857
0
      size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2858
0
      if (size < (1U << OE_Z_DELTA_BITS)) {
2859
0
        entry->z_delta_size = size;
2860
0
        cache_lock();
2861
0
        delta_cache_size -= DELTA_SIZE(entry);
2862
0
        delta_cache_size += entry->z_delta_size;
2863
0
        cache_unlock();
2864
0
      } else {
2865
0
        FREE_AND_NULL(entry->delta_data);
2866
0
        entry->z_delta_size = 0;
2867
0
      }
2868
0
    }
2869
2870
    /* if we made n a delta, and if n is already at max
2871
     * depth, leaving it in the window is pointless.  we
2872
     * should evict it first.
2873
     */
2874
0
    if (DELTA(entry) && max_depth <= n->depth)
2875
0
      continue;
2876
2877
    /*
2878
     * Move the best delta base up in the window, after the
2879
     * currently deltified object, to keep it longer.  It will
2880
     * be the first base object to be attempted next.
2881
     */
2882
0
    if (DELTA(entry)) {
2883
0
      struct unpacked swap = array[best_base];
2884
0
      int dist = (window + idx - best_base) % window;
2885
0
      int dst = best_base;
2886
0
      while (dist--) {
2887
0
        int src = (dst + 1) % window;
2888
0
        array[dst] = array[src];
2889
0
        dst = src;
2890
0
      }
2891
0
      array[dst] = swap;
2892
0
    }
2893
2894
0
    next:
2895
0
    idx++;
2896
0
    if (count + 1 < window)
2897
0
      count++;
2898
0
    if (idx >= window)
2899
0
      idx = 0;
2900
0
  }
2901
2902
0
  for (i = 0; i < window; ++i) {
2903
0
    free_delta_index(array[i].index);
2904
0
    free(array[i].data);
2905
0
  }
2906
0
  free(array);
2907
0
}
2908
2909
/*
2910
 * The main object list is split into smaller lists, each is handed to
2911
 * one worker.
2912
 *
2913
 * The main thread waits on the condition that (at least) one of the workers
2914
 * has stopped working (which is indicated in the .working member of
2915
 * struct thread_params).
2916
 *
2917
 * When a work thread has completed its work, it sets .working to 0 and
2918
 * signals the main thread and waits on the condition that .data_ready
2919
 * becomes 1.
2920
 *
2921
 * The main thread steals half of the work from the worker that has
2922
 * most work left to hand it to the idle worker.
2923
 */
2924
2925
struct thread_params {
2926
  pthread_t thread;
2927
  struct object_entry **list;
2928
  unsigned list_size;
2929
  unsigned remaining;
2930
  int window;
2931
  int depth;
2932
  int working;
2933
  int data_ready;
2934
  pthread_mutex_t mutex;
2935
  pthread_cond_t cond;
2936
  unsigned *processed;
2937
};
2938
2939
static pthread_cond_t progress_cond;
2940
2941
/*
2942
 * Mutex and conditional variable can't be statically-initialized on Windows.
2943
 */
2944
static void init_threaded_search(void)
2945
0
{
2946
0
  pthread_mutex_init(&cache_mutex, NULL);
2947
0
  pthread_mutex_init(&progress_mutex, NULL);
2948
0
  pthread_cond_init(&progress_cond, NULL);
2949
0
}
2950
2951
static void cleanup_threaded_search(void)
2952
0
{
2953
0
  pthread_cond_destroy(&progress_cond);
2954
0
  pthread_mutex_destroy(&cache_mutex);
2955
0
  pthread_mutex_destroy(&progress_mutex);
2956
0
}
2957
2958
static void *threaded_find_deltas(void *arg)
2959
0
{
2960
0
  struct thread_params *me = arg;
2961
2962
0
  progress_lock();
2963
0
  while (me->remaining) {
2964
0
    progress_unlock();
2965
2966
0
    find_deltas(me->list, &me->remaining,
2967
0
          me->window, me->depth, me->processed);
2968
2969
0
    progress_lock();
2970
0
    me->working = 0;
2971
0
    pthread_cond_signal(&progress_cond);
2972
0
    progress_unlock();
2973
2974
    /*
2975
     * We must not set ->data_ready before we wait on the
2976
     * condition because the main thread may have set it to 1
2977
     * before we get here. In order to be sure that new
2978
     * work is available if we see 1 in ->data_ready, it
2979
     * was initialized to 0 before this thread was spawned
2980
     * and we reset it to 0 right away.
2981
     */
2982
0
    pthread_mutex_lock(&me->mutex);
2983
0
    while (!me->data_ready)
2984
0
      pthread_cond_wait(&me->cond, &me->mutex);
2985
0
    me->data_ready = 0;
2986
0
    pthread_mutex_unlock(&me->mutex);
2987
2988
0
    progress_lock();
2989
0
  }
2990
0
  progress_unlock();
2991
  /* leave ->working 1 so that this doesn't get more work assigned */
2992
0
  return NULL;
2993
0
}
2994
2995
static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2996
         int window, int depth, unsigned *processed)
2997
0
{
2998
0
  struct thread_params *p;
2999
0
  int i, ret, active_threads = 0;
3000
3001
0
  init_threaded_search();
3002
3003
0
  if (delta_search_threads <= 1) {
3004
0
    find_deltas(list, &list_size, window, depth, processed);
3005
0
    cleanup_threaded_search();
3006
0
    return;
3007
0
  }
3008
0
  if (progress > pack_to_stdout)
3009
0
    fprintf_ln(stderr, _("Delta compression using up to %d threads"),
3010
0
         delta_search_threads);
3011
0
  CALLOC_ARRAY(p, delta_search_threads);
3012
3013
  /* Partition the work amongst work threads. */
3014
0
  for (i = 0; i < delta_search_threads; i++) {
3015
0
    unsigned sub_size = list_size / (delta_search_threads - i);
3016
3017
    /* don't use too small segments or no deltas will be found */
3018
0
    if (sub_size < 2*window && i+1 < delta_search_threads)
3019
0
      sub_size = 0;
3020
3021
0
    p[i].window = window;
3022
0
    p[i].depth = depth;
3023
0
    p[i].processed = processed;
3024
0
    p[i].working = 1;
3025
0
    p[i].data_ready = 0;
3026
3027
    /* try to split chunks on "path" boundaries */
3028
0
    while (sub_size && sub_size < list_size &&
3029
0
           list[sub_size]->hash &&
3030
0
           list[sub_size]->hash == list[sub_size-1]->hash)
3031
0
      sub_size++;
3032
3033
0
    p[i].list = list;
3034
0
    p[i].list_size = sub_size;
3035
0
    p[i].remaining = sub_size;
3036
3037
0
    list += sub_size;
3038
0
    list_size -= sub_size;
3039
0
  }
3040
3041
  /* Start work threads. */
3042
0
  for (i = 0; i < delta_search_threads; i++) {
3043
0
    if (!p[i].list_size)
3044
0
      continue;
3045
0
    pthread_mutex_init(&p[i].mutex, NULL);
3046
0
    pthread_cond_init(&p[i].cond, NULL);
3047
0
    ret = pthread_create(&p[i].thread, NULL,
3048
0
             threaded_find_deltas, &p[i]);
3049
0
    if (ret)
3050
0
      die(_("unable to create thread: %s"), strerror(ret));
3051
0
    active_threads++;
3052
0
  }
3053
3054
  /*
3055
   * Now let's wait for work completion.  Each time a thread is done
3056
   * with its work, we steal half of the remaining work from the
3057
   * thread with the largest number of unprocessed objects and give
3058
   * it to that newly idle thread.  This ensure good load balancing
3059
   * until the remaining object list segments are simply too short
3060
   * to be worth splitting anymore.
3061
   */
3062
0
  while (active_threads) {
3063
0
    struct thread_params *target = NULL;
3064
0
    struct thread_params *victim = NULL;
3065
0
    unsigned sub_size = 0;
3066
3067
0
    progress_lock();
3068
0
    for (;;) {
3069
0
      for (i = 0; !target && i < delta_search_threads; i++)
3070
0
        if (!p[i].working)
3071
0
          target = &p[i];
3072
0
      if (target)
3073
0
        break;
3074
0
      pthread_cond_wait(&progress_cond, &progress_mutex);
3075
0
    }
3076
3077
0
    for (i = 0; i < delta_search_threads; i++)
3078
0
      if (p[i].remaining > 2*window &&
3079
0
          (!victim || victim->remaining < p[i].remaining))
3080
0
        victim = &p[i];
3081
0
    if (victim) {
3082
0
      sub_size = victim->remaining / 2;
3083
0
      list = victim->list + victim->list_size - sub_size;
3084
0
      while (sub_size && list[0]->hash &&
3085
0
             list[0]->hash == list[-1]->hash) {
3086
0
        list++;
3087
0
        sub_size--;
3088
0
      }
3089
0
      if (!sub_size) {
3090
        /*
3091
         * It is possible for some "paths" to have
3092
         * so many objects that no hash boundary
3093
         * might be found.  Let's just steal the
3094
         * exact half in that case.
3095
         */
3096
0
        sub_size = victim->remaining / 2;
3097
0
        list -= sub_size;
3098
0
      }
3099
0
      target->list = list;
3100
0
      victim->list_size -= sub_size;
3101
0
      victim->remaining -= sub_size;
3102
0
    }
3103
0
    target->list_size = sub_size;
3104
0
    target->remaining = sub_size;
3105
0
    target->working = 1;
3106
0
    progress_unlock();
3107
3108
0
    pthread_mutex_lock(&target->mutex);
3109
0
    target->data_ready = 1;
3110
0
    pthread_cond_signal(&target->cond);
3111
0
    pthread_mutex_unlock(&target->mutex);
3112
3113
0
    if (!sub_size) {
3114
0
      pthread_join(target->thread, NULL);
3115
0
      pthread_cond_destroy(&target->cond);
3116
0
      pthread_mutex_destroy(&target->mutex);
3117
0
      active_threads--;
3118
0
    }
3119
0
  }
3120
0
  cleanup_threaded_search();
3121
0
  free(p);
3122
0
}
3123
3124
static int obj_is_packed(const struct object_id *oid)
3125
0
{
3126
0
  return packlist_find(&to_pack, oid) ||
3127
0
    (reuse_packfile_bitmap &&
3128
0
     bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
3129
0
}
3130
3131
static void add_tag_chain(const struct object_id *oid)
3132
0
{
3133
0
  struct tag *tag;
3134
3135
  /*
3136
   * We catch duplicates already in add_object_entry(), but we'd
3137
   * prefer to do this extra check to avoid having to parse the
3138
   * tag at all if we already know that it's being packed (e.g., if
3139
   * it was included via bitmaps, we would not have parsed it
3140
   * previously).
3141
   */
3142
0
  if (obj_is_packed(oid))
3143
0
    return;
3144
3145
0
  tag = lookup_tag(the_repository, oid);
3146
0
  while (1) {
3147
0
    if (!tag || parse_tag(tag) || !tag->tagged)
3148
0
      die(_("unable to pack objects reachable from tag %s"),
3149
0
          oid_to_hex(oid));
3150
3151
0
    add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3152
3153
0
    if (tag->tagged->type != OBJ_TAG)
3154
0
      return;
3155
3156
0
    tag = (struct tag *)tag->tagged;
3157
0
  }
3158
0
}
3159
3160
static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid,
3161
           int flag UNUSED, void *cb_data UNUSED)
3162
0
{
3163
0
  struct object_id peeled;
3164
3165
0
  if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled))
3166
0
    add_tag_chain(oid);
3167
0
  return 0;
3168
0
}
3169
3170
static void prepare_pack(int window, int depth)
3171
0
{
3172
0
  struct object_entry **delta_list;
3173
0
  uint32_t i, nr_deltas;
3174
0
  unsigned n;
3175
3176
0
  if (use_delta_islands)
3177
0
    resolve_tree_islands(the_repository, progress, &to_pack);
3178
3179
0
  get_object_details();
3180
3181
  /*
3182
   * If we're locally repacking then we need to be doubly careful
3183
   * from now on in order to make sure no stealth corruption gets
3184
   * propagated to the new pack.  Clients receiving streamed packs
3185
   * should validate everything they get anyway so no need to incur
3186
   * the additional cost here in that case.
3187
   */
3188
0
  if (!pack_to_stdout)
3189
0
    do_check_packed_object_crc = 1;
3190
3191
0
  if (!to_pack.nr_objects || !window || !depth)
3192
0
    return;
3193
3194
0
  ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3195
0
  nr_deltas = n = 0;
3196
3197
0
  for (i = 0; i < to_pack.nr_objects; i++) {
3198
0
    struct object_entry *entry = to_pack.objects + i;
3199
3200
0
    if (DELTA(entry))
3201
      /* This happens if we decided to reuse existing
3202
       * delta from a pack.  "reuse_delta &&" is implied.
3203
       */
3204
0
      continue;
3205
3206
0
    if (!entry->type_valid ||
3207
0
        oe_size_less_than(&to_pack, entry, 50))
3208
0
      continue;
3209
3210
0
    if (entry->no_try_delta)
3211
0
      continue;
3212
3213
0
    if (!entry->preferred_base) {
3214
0
      nr_deltas++;
3215
0
      if (oe_type(entry) < 0)
3216
0
        die(_("unable to get type of object %s"),
3217
0
            oid_to_hex(&entry->idx.oid));
3218
0
    } else {
3219
0
      if (oe_type(entry) < 0) {
3220
        /*
3221
         * This object is not found, but we
3222
         * don't have to include it anyway.
3223
         */
3224
0
        continue;
3225
0
      }
3226
0
    }
3227
3228
0
    delta_list[n++] = entry;
3229
0
  }
3230
3231
0
  if (nr_deltas && n > 1) {
3232
0
    unsigned nr_done = 0;
3233
3234
0
    if (progress)
3235
0
      progress_state = start_progress(_("Compressing objects"),
3236
0
              nr_deltas);
3237
0
    QSORT(delta_list, n, type_size_sort);
3238
0
    ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3239
0
    stop_progress(&progress_state);
3240
0
    if (nr_done != nr_deltas)
3241
0
      die(_("inconsistency with delta count"));
3242
0
  }
3243
0
  free(delta_list);
3244
0
}
3245
3246
static int git_pack_config(const char *k, const char *v,
3247
         const struct config_context *ctx, void *cb)
3248
0
{
3249
0
  if (!strcmp(k, "pack.window")) {
3250
0
    window = git_config_int(k, v, ctx->kvi);
3251
0
    return 0;
3252
0
  }
3253
0
  if (!strcmp(k, "pack.windowmemory")) {
3254
0
    window_memory_limit = git_config_ulong(k, v, ctx->kvi);
3255
0
    return 0;
3256
0
  }
3257
0
  if (!strcmp(k, "pack.depth")) {
3258
0
    depth = git_config_int(k, v, ctx->kvi);
3259
0
    return 0;
3260
0
  }
3261
0
  if (!strcmp(k, "pack.deltacachesize")) {
3262
0
    max_delta_cache_size = git_config_int(k, v, ctx->kvi);
3263
0
    return 0;
3264
0
  }
3265
0
  if (!strcmp(k, "pack.deltacachelimit")) {
3266
0
    cache_max_small_delta_size = git_config_int(k, v, ctx->kvi);
3267
0
    return 0;
3268
0
  }
3269
0
  if (!strcmp(k, "pack.writebitmaphashcache")) {
3270
0
    if (git_config_bool(k, v))
3271
0
      write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3272
0
    else
3273
0
      write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3274
0
  }
3275
3276
0
  if (!strcmp(k, "pack.writebitmaplookuptable")) {
3277
0
    if (git_config_bool(k, v))
3278
0
      write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3279
0
    else
3280
0
      write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3281
0
  }
3282
3283
0
  if (!strcmp(k, "pack.usebitmaps")) {
3284
0
    use_bitmap_index_default = git_config_bool(k, v);
3285
0
    return 0;
3286
0
  }
3287
0
  if (!strcmp(k, "pack.allowpackreuse")) {
3288
0
    int res = git_parse_maybe_bool_text(v);
3289
0
    if (res < 0) {
3290
0
      if (!strcasecmp(v, "single"))
3291
0
        allow_pack_reuse = SINGLE_PACK_REUSE;
3292
0
      else if (!strcasecmp(v, "multi"))
3293
0
        allow_pack_reuse = MULTI_PACK_REUSE;
3294
0
      else
3295
0
        die(_("invalid pack.allowPackReuse value: '%s'"), v);
3296
0
    } else if (res) {
3297
0
      allow_pack_reuse = SINGLE_PACK_REUSE;
3298
0
    } else {
3299
0
      allow_pack_reuse = NO_PACK_REUSE;
3300
0
    }
3301
0
    return 0;
3302
0
  }
3303
0
  if (!strcmp(k, "pack.threads")) {
3304
0
    delta_search_threads = git_config_int(k, v, ctx->kvi);
3305
0
    if (delta_search_threads < 0)
3306
0
      die(_("invalid number of threads specified (%d)"),
3307
0
          delta_search_threads);
3308
0
    if (!HAVE_THREADS && delta_search_threads != 1) {
3309
0
      warning(_("no threads support, ignoring %s"), k);
3310
0
      delta_search_threads = 0;
3311
0
    }
3312
0
    return 0;
3313
0
  }
3314
0
  if (!strcmp(k, "pack.indexversion")) {
3315
0
    pack_idx_opts.version = git_config_int(k, v, ctx->kvi);
3316
0
    if (pack_idx_opts.version > 2)
3317
0
      die(_("bad pack.indexVersion=%"PRIu32),
3318
0
          pack_idx_opts.version);
3319
0
    return 0;
3320
0
  }
3321
0
  if (!strcmp(k, "pack.writereverseindex")) {
3322
0
    if (git_config_bool(k, v))
3323
0
      pack_idx_opts.flags |= WRITE_REV;
3324
0
    else
3325
0
      pack_idx_opts.flags &= ~WRITE_REV;
3326
0
    return 0;
3327
0
  }
3328
0
  if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3329
0
    struct configured_exclusion *ex;
3330
0
    const char *oid_end, *pack_end;
3331
    /*
3332
     * Stores the pack hash. This is not a true object ID, but is
3333
     * of the same form.
3334
     */
3335
0
    struct object_id pack_hash;
3336
3337
0
    if (!v)
3338
0
      return config_error_nonbool(k);
3339
3340
0
    ex = xmalloc(sizeof(*ex));
3341
0
    if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3342
0
        *oid_end != ' ' ||
3343
0
        parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3344
0
        *pack_end != ' ')
3345
0
      die(_("value of uploadpack.blobpackfileuri must be "
3346
0
            "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3347
0
    if (oidmap_get(&configured_exclusions, &ex->e.oid))
3348
0
      die(_("object already configured in another "
3349
0
            "uploadpack.blobpackfileuri (got '%s')"), v);
3350
0
    ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3351
0
    memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3352
0
    ex->uri = xstrdup(pack_end + 1);
3353
0
    oidmap_put(&configured_exclusions, ex);
3354
0
  }
3355
0
  return git_default_config(k, v, ctx, cb);
3356
0
}
3357
3358
/* Counters for trace2 output when in --stdin-packs mode. */
3359
static int stdin_packs_found_nr;
3360
static int stdin_packs_hints_nr;
3361
3362
static int add_object_entry_from_pack(const struct object_id *oid,
3363
              struct packed_git *p,
3364
              uint32_t pos,
3365
              void *_data)
3366
0
{
3367
0
  off_t ofs;
3368
0
  enum object_type type = OBJ_NONE;
3369
3370
0
  display_progress(progress_state, ++nr_seen);
3371
3372
0
  if (have_duplicate_entry(oid, 0))
3373
0
    return 0;
3374
3375
0
  ofs = nth_packed_object_offset(p, pos);
3376
0
  if (!want_object_in_pack(oid, 0, &p, &ofs))
3377
0
    return 0;
3378
3379
0
  if (p) {
3380
0
    struct rev_info *revs = _data;
3381
0
    struct object_info oi = OBJECT_INFO_INIT;
3382
3383
0
    oi.typep = &type;
3384
0
    if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
3385
0
      die(_("could not get type of object %s in pack %s"),
3386
0
          oid_to_hex(oid), p->pack_name);
3387
0
    } else if (type == OBJ_COMMIT) {
3388
      /*
3389
       * commits in included packs are used as starting points for the
3390
       * subsequent revision walk
3391
       */
3392
0
      add_pending_oid(revs, NULL, oid, 0);
3393
0
    }
3394
3395
0
    stdin_packs_found_nr++;
3396
0
  }
3397
3398
0
  create_object_entry(oid, type, 0, 0, 0, p, ofs);
3399
3400
0
  return 0;
3401
0
}
3402
3403
static void show_commit_pack_hint(struct commit *commit UNUSED,
3404
          void *data UNUSED)
3405
0
{
3406
  /* nothing to do; commits don't have a namehash */
3407
0
}
3408
3409
static void show_object_pack_hint(struct object *object, const char *name,
3410
          void *data UNUSED)
3411
0
{
3412
0
  struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3413
0
  if (!oe)
3414
0
    return;
3415
3416
  /*
3417
   * Our 'to_pack' list was constructed by iterating all objects packed in
3418
   * included packs, and so doesn't have a non-zero hash field that you
3419
   * would typically pick up during a reachability traversal.
3420
   *
3421
   * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3422
   * here using a now in order to perhaps improve the delta selection
3423
   * process.
3424
   */
3425
0
  oe->hash = pack_name_hash(name);
3426
0
  oe->no_try_delta = name && no_try_delta(name);
3427
3428
0
  stdin_packs_hints_nr++;
3429
0
}
3430
3431
static int pack_mtime_cmp(const void *_a, const void *_b)
3432
0
{
3433
0
  struct packed_git *a = ((const struct string_list_item*)_a)->util;
3434
0
  struct packed_git *b = ((const struct string_list_item*)_b)->util;
3435
3436
  /*
3437
   * order packs by descending mtime so that objects are laid out
3438
   * roughly as newest-to-oldest
3439
   */
3440
0
  if (a->mtime < b->mtime)
3441
0
    return 1;
3442
0
  else if (b->mtime < a->mtime)
3443
0
    return -1;
3444
0
  else
3445
0
    return 0;
3446
0
}
3447
3448
static void read_packs_list_from_stdin(void)
3449
0
{
3450
0
  struct strbuf buf = STRBUF_INIT;
3451
0
  struct string_list include_packs = STRING_LIST_INIT_DUP;
3452
0
  struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3453
0
  struct string_list_item *item = NULL;
3454
3455
0
  struct packed_git *p;
3456
0
  struct rev_info revs;
3457
3458
0
  repo_init_revisions(the_repository, &revs, NULL);
3459
  /*
3460
   * Use a revision walk to fill in the namehash of objects in the include
3461
   * packs. To save time, we'll avoid traversing through objects that are
3462
   * in excluded packs.
3463
   *
3464
   * That may cause us to avoid populating all of the namehash fields of
3465
   * all included objects, but our goal is best-effort, since this is only
3466
   * an optimization during delta selection.
3467
   */
3468
0
  revs.no_kept_objects = 1;
3469
0
  revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3470
0
  revs.blob_objects = 1;
3471
0
  revs.tree_objects = 1;
3472
0
  revs.tag_objects = 1;
3473
0
  revs.ignore_missing_links = 1;
3474
3475
0
  while (strbuf_getline(&buf, stdin) != EOF) {
3476
0
    if (!buf.len)
3477
0
      continue;
3478
3479
0
    if (*buf.buf == '^')
3480
0
      string_list_append(&exclude_packs, buf.buf + 1);
3481
0
    else
3482
0
      string_list_append(&include_packs, buf.buf);
3483
3484
0
    strbuf_reset(&buf);
3485
0
  }
3486
3487
0
  string_list_sort(&include_packs);
3488
0
  string_list_remove_duplicates(&include_packs, 0);
3489
0
  string_list_sort(&exclude_packs);
3490
0
  string_list_remove_duplicates(&exclude_packs, 0);
3491
3492
0
  for (p = get_all_packs(the_repository); p; p = p->next) {
3493
0
    const char *pack_name = pack_basename(p);
3494
3495
0
    if ((item = string_list_lookup(&include_packs, pack_name)))
3496
0
      item->util = p;
3497
0
    if ((item = string_list_lookup(&exclude_packs, pack_name)))
3498
0
      item->util = p;
3499
0
  }
3500
3501
  /*
3502
   * Arguments we got on stdin may not even be packs. First
3503
   * check that to avoid segfaulting later on in
3504
   * e.g. pack_mtime_cmp(), excluded packs are handled below.
3505
   *
3506
   * Since we first parsed our STDIN and then sorted the input
3507
   * lines the pack we error on will be whatever line happens to
3508
   * sort first. This is lazy, it's enough that we report one
3509
   * bad case here, we don't need to report the first/last one,
3510
   * or all of them.
3511
   */
3512
0
  for_each_string_list_item(item, &include_packs) {
3513
0
    struct packed_git *p = item->util;
3514
0
    if (!p)
3515
0
      die(_("could not find pack '%s'"), item->string);
3516
0
    if (!is_pack_valid(p))
3517
0
      die(_("packfile %s cannot be accessed"), p->pack_name);
3518
0
  }
3519
3520
  /*
3521
   * Then, handle all of the excluded packs, marking them as
3522
   * kept in-core so that later calls to add_object_entry()
3523
   * discards any objects that are also found in excluded packs.
3524
   */
3525
0
  for_each_string_list_item(item, &exclude_packs) {
3526
0
    struct packed_git *p = item->util;
3527
0
    if (!p)
3528
0
      die(_("could not find pack '%s'"), item->string);
3529
0
    p->pack_keep_in_core = 1;
3530
0
  }
3531
3532
  /*
3533
   * Order packs by ascending mtime; use QSORT directly to access the
3534
   * string_list_item's ->util pointer, which string_list_sort() does not
3535
   * provide.
3536
   */
3537
0
  QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3538
3539
0
  for_each_string_list_item(item, &include_packs) {
3540
0
    struct packed_git *p = item->util;
3541
0
    for_each_object_in_pack(p,
3542
0
          add_object_entry_from_pack,
3543
0
          &revs,
3544
0
          FOR_EACH_OBJECT_PACK_ORDER);
3545
0
  }
3546
3547
0
  if (prepare_revision_walk(&revs))
3548
0
    die(_("revision walk setup failed"));
3549
0
  traverse_commit_list(&revs,
3550
0
           show_commit_pack_hint,
3551
0
           show_object_pack_hint,
3552
0
           NULL);
3553
3554
0
  trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3555
0
         stdin_packs_found_nr);
3556
0
  trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3557
0
         stdin_packs_hints_nr);
3558
3559
0
  strbuf_release(&buf);
3560
0
  string_list_clear(&include_packs, 0);
3561
0
  string_list_clear(&exclude_packs, 0);
3562
0
}
3563
3564
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
3565
           struct packed_git *pack, off_t offset,
3566
           const char *name, uint32_t mtime)
3567
0
{
3568
0
  struct object_entry *entry;
3569
3570
0
  display_progress(progress_state, ++nr_seen);
3571
3572
0
  entry = packlist_find(&to_pack, oid);
3573
0
  if (entry) {
3574
0
    if (name) {
3575
0
      entry->hash = pack_name_hash(name);
3576
0
      entry->no_try_delta = no_try_delta(name);
3577
0
    }
3578
0
  } else {
3579
0
    if (!want_object_in_pack(oid, 0, &pack, &offset))
3580
0
      return;
3581
0
    if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
3582
      /*
3583
       * If a traversed tree has a missing blob then we want
3584
       * to avoid adding that missing object to our pack.
3585
       *
3586
       * This only applies to missing blobs, not trees,
3587
       * because the traversal needs to parse sub-trees but
3588
       * not blobs.
3589
       *
3590
       * Note we only perform this check when we couldn't
3591
       * already find the object in a pack, so we're really
3592
       * limited to "ensure non-tip blobs which don't exist in
3593
       * packs do exist via loose objects". Confused?
3594
       */
3595
0
      return;
3596
0
    }
3597
3598
0
    entry = create_object_entry(oid, type, pack_name_hash(name),
3599
0
              0, name && no_try_delta(name),
3600
0
              pack, offset);
3601
0
  }
3602
3603
0
  if (mtime > oe_cruft_mtime(&to_pack, entry))
3604
0
    oe_set_cruft_mtime(&to_pack, entry, mtime);
3605
0
  return;
3606
0
}
3607
3608
static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED)
3609
0
{
3610
  /*
3611
   * if we did not record it earlier, it's at least as old as our
3612
   * expiration value. Rather than find it exactly, just use that
3613
   * value.  This may bump it forward from its real mtime, but it
3614
   * will still be "too old" next time we run with the same
3615
   * expiration.
3616
   *
3617
   * if obj does appear in the packing list, this call is a noop (or may
3618
   * set the namehash).
3619
   */
3620
0
  add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
3621
0
}
3622
3623
static void show_cruft_commit(struct commit *commit, void *data)
3624
0
{
3625
0
  show_cruft_object((struct object*)commit, NULL, data);
3626
0
}
3627
3628
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
3629
0
{
3630
0
  return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
3631
0
}
3632
3633
static int cruft_include_check(struct commit *commit, void *data)
3634
0
{
3635
0
  return cruft_include_check_obj((struct object*)commit, data);
3636
0
}
3637
3638
static void set_cruft_mtime(const struct object *object,
3639
          struct packed_git *pack,
3640
          off_t offset, time_t mtime)
3641
0
{
3642
0
  add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
3643
0
             mtime);
3644
0
}
3645
3646
static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
3647
0
{
3648
0
  struct string_list_item *item = NULL;
3649
0
  for_each_string_list_item(item, packs) {
3650
0
    struct packed_git *p = item->util;
3651
0
    if (!p)
3652
0
      die(_("could not find pack '%s'"), item->string);
3653
0
    p->pack_keep_in_core = keep;
3654
0
  }
3655
0
}
3656
3657
static void add_unreachable_loose_objects(void);
3658
static void add_objects_in_unpacked_packs(void);
3659
3660
static void enumerate_cruft_objects(void)
3661
0
{
3662
0
  if (progress)
3663
0
    progress_state = start_progress(_("Enumerating cruft objects"), 0);
3664
3665
0
  add_objects_in_unpacked_packs();
3666
0
  add_unreachable_loose_objects();
3667
3668
0
  stop_progress(&progress_state);
3669
0
}
3670
3671
static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
3672
0
{
3673
0
  struct packed_git *p;
3674
0
  struct rev_info revs;
3675
0
  int ret;
3676
3677
0
  repo_init_revisions(the_repository, &revs, NULL);
3678
3679
0
  revs.tag_objects = 1;
3680
0
  revs.tree_objects = 1;
3681
0
  revs.blob_objects = 1;
3682
3683
0
  revs.include_check = cruft_include_check;
3684
0
  revs.include_check_obj = cruft_include_check_obj;
3685
3686
0
  revs.ignore_missing_links = 1;
3687
3688
0
  if (progress)
3689
0
    progress_state = start_progress(_("Enumerating cruft objects"), 0);
3690
0
  ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
3691
0
                 set_cruft_mtime, 1);
3692
0
  stop_progress(&progress_state);
3693
3694
0
  if (ret)
3695
0
    die(_("unable to add cruft objects"));
3696
3697
  /*
3698
   * Re-mark only the fresh packs as kept so that objects in
3699
   * unknown packs do not halt the reachability traversal early.
3700
   */
3701
0
  for (p = get_all_packs(the_repository); p; p = p->next)
3702
0
    p->pack_keep_in_core = 0;
3703
0
  mark_pack_kept_in_core(fresh_packs, 1);
3704
3705
0
  if (prepare_revision_walk(&revs))
3706
0
    die(_("revision walk setup failed"));
3707
0
  if (progress)
3708
0
    progress_state = start_progress(_("Traversing cruft objects"), 0);
3709
0
  nr_seen = 0;
3710
0
  traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
3711
3712
0
  stop_progress(&progress_state);
3713
0
}
3714
3715
static void read_cruft_objects(void)
3716
0
{
3717
0
  struct strbuf buf = STRBUF_INIT;
3718
0
  struct string_list discard_packs = STRING_LIST_INIT_DUP;
3719
0
  struct string_list fresh_packs = STRING_LIST_INIT_DUP;
3720
0
  struct packed_git *p;
3721
3722
0
  ignore_packed_keep_in_core = 1;
3723
3724
0
  while (strbuf_getline(&buf, stdin) != EOF) {
3725
0
    if (!buf.len)
3726
0
      continue;
3727
3728
0
    if (*buf.buf == '-')
3729
0
      string_list_append(&discard_packs, buf.buf + 1);
3730
0
    else
3731
0
      string_list_append(&fresh_packs, buf.buf);
3732
0
  }
3733
3734
0
  string_list_sort(&discard_packs);
3735
0
  string_list_sort(&fresh_packs);
3736
3737
0
  for (p = get_all_packs(the_repository); p; p = p->next) {
3738
0
    const char *pack_name = pack_basename(p);
3739
0
    struct string_list_item *item;
3740
3741
0
    item = string_list_lookup(&fresh_packs, pack_name);
3742
0
    if (!item)
3743
0
      item = string_list_lookup(&discard_packs, pack_name);
3744
3745
0
    if (item) {
3746
0
      item->util = p;
3747
0
    } else {
3748
      /*
3749
       * This pack wasn't mentioned in either the "fresh" or
3750
       * "discard" list, so the caller didn't know about it.
3751
       *
3752
       * Mark it as kept so that its objects are ignored by
3753
       * add_unseen_recent_objects_to_traversal(). We'll
3754
       * unmark it before starting the traversal so it doesn't
3755
       * halt the traversal early.
3756
       */
3757
0
      p->pack_keep_in_core = 1;
3758
0
    }
3759
0
  }
3760
3761
0
  mark_pack_kept_in_core(&fresh_packs, 1);
3762
0
  mark_pack_kept_in_core(&discard_packs, 0);
3763
3764
0
  if (cruft_expiration)
3765
0
    enumerate_and_traverse_cruft_objects(&fresh_packs);
3766
0
  else
3767
0
    enumerate_cruft_objects();
3768
3769
0
  strbuf_release(&buf);
3770
0
  string_list_clear(&discard_packs, 0);
3771
0
  string_list_clear(&fresh_packs, 0);
3772
0
}
3773
3774
static void read_object_list_from_stdin(void)
3775
0
{
3776
0
  char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
3777
0
  struct object_id oid;
3778
0
  const char *p;
3779
3780
0
  for (;;) {
3781
0
    if (!fgets(line, sizeof(line), stdin)) {
3782
0
      if (feof(stdin))
3783
0
        break;
3784
0
      if (!ferror(stdin))
3785
0
        BUG("fgets returned NULL, not EOF, not error!");
3786
0
      if (errno != EINTR)
3787
0
        die_errno("fgets");
3788
0
      clearerr(stdin);
3789
0
      continue;
3790
0
    }
3791
0
    if (line[0] == '-') {
3792
0
      if (get_oid_hex(line+1, &oid))
3793
0
        die(_("expected edge object ID, got garbage:\n %s"),
3794
0
            line);
3795
0
      add_preferred_base(&oid);
3796
0
      continue;
3797
0
    }
3798
0
    if (parse_oid_hex(line, &oid, &p))
3799
0
      die(_("expected object ID, got garbage:\n %s"), line);
3800
3801
0
    add_preferred_base_object(p + 1);
3802
0
    add_object_entry(&oid, OBJ_NONE, p + 1, 0);
3803
0
  }
3804
0
}
3805
3806
static void show_commit(struct commit *commit, void *data UNUSED)
3807
0
{
3808
0
  add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
3809
3810
0
  if (write_bitmap_index)
3811
0
    index_commit_for_bitmap(commit);
3812
3813
0
  if (use_delta_islands)
3814
0
    propagate_island_marks(commit);
3815
0
}
3816
3817
static void show_object(struct object *obj, const char *name,
3818
      void *data UNUSED)
3819
0
{
3820
0
  add_preferred_base_object(name);
3821
0
  add_object_entry(&obj->oid, obj->type, name, 0);
3822
3823
0
  if (use_delta_islands) {
3824
0
    const char *p;
3825
0
    unsigned depth;
3826
0
    struct object_entry *ent;
3827
3828
    /* the empty string is a root tree, which is depth 0 */
3829
0
    depth = *name ? 1 : 0;
3830
0
    for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
3831
0
      depth++;
3832
3833
0
    ent = packlist_find(&to_pack, &obj->oid);
3834
0
    if (ent && depth > oe_tree_depth(&to_pack, ent))
3835
0
      oe_set_tree_depth(&to_pack, ent, depth);
3836
0
  }
3837
0
}
3838
3839
static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
3840
0
{
3841
0
  assert(arg_missing_action == MA_ALLOW_ANY);
3842
3843
  /*
3844
   * Quietly ignore ALL missing objects.  This avoids problems with
3845
   * staging them now and getting an odd error later.
3846
   */
3847
0
  if (!has_object(the_repository, &obj->oid, 0))
3848
0
    return;
3849
3850
0
  show_object(obj, name, data);
3851
0
}
3852
3853
static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
3854
0
{
3855
0
  assert(arg_missing_action == MA_ALLOW_PROMISOR);
3856
3857
  /*
3858
   * Quietly ignore EXPECTED missing objects.  This avoids problems with
3859
   * staging them now and getting an odd error later.
3860
   */
3861
0
  if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
3862
0
    return;
3863
3864
0
  show_object(obj, name, data);
3865
0
}
3866
3867
static int option_parse_missing_action(const struct option *opt UNUSED,
3868
               const char *arg, int unset)
3869
0
{
3870
0
  assert(arg);
3871
0
  assert(!unset);
3872
3873
0
  if (!strcmp(arg, "error")) {
3874
0
    arg_missing_action = MA_ERROR;
3875
0
    fn_show_object = show_object;
3876
0
    return 0;
3877
0
  }
3878
3879
0
  if (!strcmp(arg, "allow-any")) {
3880
0
    arg_missing_action = MA_ALLOW_ANY;
3881
0
    fetch_if_missing = 0;
3882
0
    fn_show_object = show_object__ma_allow_any;
3883
0
    return 0;
3884
0
  }
3885
3886
0
  if (!strcmp(arg, "allow-promisor")) {
3887
0
    arg_missing_action = MA_ALLOW_PROMISOR;
3888
0
    fetch_if_missing = 0;
3889
0
    fn_show_object = show_object__ma_allow_promisor;
3890
0
    return 0;
3891
0
  }
3892
3893
0
  die(_("invalid value for '%s': '%s'"), "--missing", arg);
3894
0
  return 0;
3895
0
}
3896
3897
static void show_edge(struct commit *commit)
3898
0
{
3899
0
  add_preferred_base(&commit->object.oid);
3900
0
}
3901
3902
static int add_object_in_unpacked_pack(const struct object_id *oid,
3903
               struct packed_git *pack,
3904
               uint32_t pos,
3905
               void *data UNUSED)
3906
0
{
3907
0
  if (cruft) {
3908
0
    off_t offset;
3909
0
    time_t mtime;
3910
3911
0
    if (pack->is_cruft) {
3912
0
      if (load_pack_mtimes(pack) < 0)
3913
0
        die(_("could not load cruft pack .mtimes"));
3914
0
      mtime = nth_packed_mtime(pack, pos);
3915
0
    } else {
3916
0
      mtime = pack->mtime;
3917
0
    }
3918
0
    offset = nth_packed_object_offset(pack, pos);
3919
3920
0
    add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
3921
0
               NULL, mtime);
3922
0
  } else {
3923
0
    add_object_entry(oid, OBJ_NONE, "", 0);
3924
0
  }
3925
0
  return 0;
3926
0
}
3927
3928
static void add_objects_in_unpacked_packs(void)
3929
0
{
3930
0
  if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
3931
0
           FOR_EACH_OBJECT_PACK_ORDER |
3932
0
           FOR_EACH_OBJECT_LOCAL_ONLY |
3933
0
           FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
3934
0
           FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
3935
0
    die(_("cannot open pack index"));
3936
0
}
3937
3938
static int add_loose_object(const struct object_id *oid, const char *path,
3939
          void *data UNUSED)
3940
0
{
3941
0
  enum object_type type = oid_object_info(the_repository, oid, NULL);
3942
3943
0
  if (type < 0) {
3944
0
    warning(_("loose object at %s could not be examined"), path);
3945
0
    return 0;
3946
0
  }
3947
3948
0
  if (cruft) {
3949
0
    struct stat st;
3950
0
    if (stat(path, &st) < 0) {
3951
0
      if (errno == ENOENT)
3952
0
        return 0;
3953
0
      return error_errno("unable to stat %s", oid_to_hex(oid));
3954
0
    }
3955
3956
0
    add_cruft_object_entry(oid, type, NULL, 0, NULL,
3957
0
               st.st_mtime);
3958
0
  } else {
3959
0
    add_object_entry(oid, type, "", 0);
3960
0
  }
3961
0
  return 0;
3962
0
}
3963
3964
/*
3965
 * We actually don't even have to worry about reachability here.
3966
 * add_object_entry will weed out duplicates, so we just add every
3967
 * loose object we find.
3968
 */
3969
static void add_unreachable_loose_objects(void)
3970
0
{
3971
0
  for_each_loose_file_in_objdir(get_object_directory(),
3972
0
              add_loose_object,
3973
0
              NULL, NULL, NULL);
3974
0
}
3975
3976
static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
3977
0
{
3978
0
  static struct packed_git *last_found = (void *)1;
3979
0
  struct packed_git *p;
3980
3981
0
  p = (last_found != (void *)1) ? last_found :
3982
0
          get_all_packs(the_repository);
3983
3984
0
  while (p) {
3985
0
    if ((!p->pack_local || p->pack_keep ||
3986
0
        p->pack_keep_in_core) &&
3987
0
      find_pack_entry_one(oid->hash, p)) {
3988
0
      last_found = p;
3989
0
      return 1;
3990
0
    }
3991
0
    if (p == last_found)
3992
0
      p = get_all_packs(the_repository);
3993
0
    else
3994
0
      p = p->next;
3995
0
    if (p == last_found)
3996
0
      p = p->next;
3997
0
  }
3998
0
  return 0;
3999
0
}
4000
4001
/*
4002
 * Store a list of sha1s that are should not be discarded
4003
 * because they are either written too recently, or are
4004
 * reachable from another object that was.
4005
 *
4006
 * This is filled by get_object_list.
4007
 */
4008
static struct oid_array recent_objects;
4009
4010
static int loosened_object_can_be_discarded(const struct object_id *oid,
4011
              timestamp_t mtime)
4012
0
{
4013
0
  if (!unpack_unreachable_expiration)
4014
0
    return 0;
4015
0
  if (mtime > unpack_unreachable_expiration)
4016
0
    return 0;
4017
0
  if (oid_array_lookup(&recent_objects, oid) >= 0)
4018
0
    return 0;
4019
0
  return 1;
4020
0
}
4021
4022
static void loosen_unused_packed_objects(void)
4023
0
{
4024
0
  struct packed_git *p;
4025
0
  uint32_t i;
4026
0
  uint32_t loosened_objects_nr = 0;
4027
0
  struct object_id oid;
4028
4029
0
  for (p = get_all_packs(the_repository); p; p = p->next) {
4030
0
    if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
4031
0
      continue;
4032
4033
0
    if (open_pack_index(p))
4034
0
      die(_("cannot open pack index"));
4035
4036
0
    for (i = 0; i < p->num_objects; i++) {
4037
0
      nth_packed_object_id(&oid, p, i);
4038
0
      if (!packlist_find(&to_pack, &oid) &&
4039
0
          !has_sha1_pack_kept_or_nonlocal(&oid) &&
4040
0
          !loosened_object_can_be_discarded(&oid, p->mtime)) {
4041
0
        if (force_object_loose(&oid, p->mtime))
4042
0
          die(_("unable to force loose object"));
4043
0
        loosened_objects_nr++;
4044
0
      }
4045
0
    }
4046
0
  }
4047
4048
0
  trace2_data_intmax("pack-objects", the_repository,
4049
0
         "loosen_unused_packed_objects/loosened", loosened_objects_nr);
4050
0
}
4051
4052
/*
4053
 * This tracks any options which pack-reuse code expects to be on, or which a
4054
 * reader of the pack might not understand, and which would therefore prevent
4055
 * blind reuse of what we have on disk.
4056
 */
4057
static int pack_options_allow_reuse(void)
4058
0
{
4059
0
  return allow_pack_reuse != NO_PACK_REUSE &&
4060
0
         pack_to_stdout &&
4061
0
         !ignore_packed_keep_on_disk &&
4062
0
         !ignore_packed_keep_in_core &&
4063
0
         (!local || !have_non_local_packs) &&
4064
0
         !incremental;
4065
0
}
4066
4067
static int get_object_list_from_bitmap(struct rev_info *revs)
4068
0
{
4069
0
  if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
4070
0
    return -1;
4071
4072
0
  if (pack_options_allow_reuse())
4073
0
    reuse_partial_packfile_from_bitmap(bitmap_git,
4074
0
               &reuse_packfiles,
4075
0
               &reuse_packfiles_nr,
4076
0
               &reuse_packfile_bitmap,
4077
0
               allow_pack_reuse == MULTI_PACK_REUSE);
4078
4079
0
  if (reuse_packfiles) {
4080
0
    reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
4081
0
    if (!reuse_packfile_objects)
4082
0
      BUG("expected non-empty reuse bitmap");
4083
4084
0
    nr_result += reuse_packfile_objects;
4085
0
    nr_seen += reuse_packfile_objects;
4086
0
    display_progress(progress_state, nr_seen);
4087
0
  }
4088
4089
0
  traverse_bitmap_commit_list(bitmap_git, revs,
4090
0
            &add_object_entry_from_bitmap);
4091
0
  return 0;
4092
0
}
4093
4094
static void record_recent_object(struct object *obj,
4095
         const char *name UNUSED,
4096
         void *data UNUSED)
4097
0
{
4098
0
  oid_array_append(&recent_objects, &obj->oid);
4099
0
}
4100
4101
static void record_recent_commit(struct commit *commit, void *data UNUSED)
4102
0
{
4103
0
  oid_array_append(&recent_objects, &commit->object.oid);
4104
0
}
4105
4106
static int mark_bitmap_preferred_tip(const char *refname,
4107
             const char *referent UNUSED,
4108
             const struct object_id *oid,
4109
             int flags UNUSED,
4110
             void *data UNUSED)
4111
0
{
4112
0
  struct object_id peeled;
4113
0
  struct object *object;
4114
4115
0
  if (!peel_iterated_oid(the_repository, oid, &peeled))
4116
0
    oid = &peeled;
4117
4118
0
  object = parse_object_or_die(oid, refname);
4119
0
  if (object->type == OBJ_COMMIT)
4120
0
    object->flags |= NEEDS_BITMAP;
4121
4122
0
  return 0;
4123
0
}
4124
4125
static void mark_bitmap_preferred_tips(void)
4126
0
{
4127
0
  struct string_list_item *item;
4128
0
  const struct string_list *preferred_tips;
4129
4130
0
  preferred_tips = bitmap_preferred_tips(the_repository);
4131
0
  if (!preferred_tips)
4132
0
    return;
4133
4134
0
  for_each_string_list_item(item, preferred_tips) {
4135
0
    refs_for_each_ref_in(get_main_ref_store(the_repository),
4136
0
             item->string, mark_bitmap_preferred_tip,
4137
0
             NULL);
4138
0
  }
4139
0
}
4140
4141
static void get_object_list(struct rev_info *revs, int ac, const char **av)
4142
0
{
4143
0
  struct setup_revision_opt s_r_opt = {
4144
0
    .allow_exclude_promisor_objects = 1,
4145
0
  };
4146
0
  char line[1000];
4147
0
  int flags = 0;
4148
0
  int save_warning;
4149
4150
0
  save_commit_buffer = 0;
4151
0
  setup_revisions(ac, av, revs, &s_r_opt);
4152
4153
  /* make sure shallows are read */
4154
0
  is_repository_shallow(the_repository);
4155
4156
0
  save_warning = warn_on_object_refname_ambiguity;
4157
0
  warn_on_object_refname_ambiguity = 0;
4158
4159
0
  while (fgets(line, sizeof(line), stdin) != NULL) {
4160
0
    int len = strlen(line);
4161
0
    if (len && line[len - 1] == '\n')
4162
0
      line[--len] = 0;
4163
0
    if (!len)
4164
0
      break;
4165
0
    if (*line == '-') {
4166
0
      if (!strcmp(line, "--not")) {
4167
0
        flags ^= UNINTERESTING;
4168
0
        write_bitmap_index = 0;
4169
0
        continue;
4170
0
      }
4171
0
      if (starts_with(line, "--shallow ")) {
4172
0
        struct object_id oid;
4173
0
        if (get_oid_hex(line + 10, &oid))
4174
0
          die("not an object name '%s'", line + 10);
4175
0
        register_shallow(the_repository, &oid);
4176
0
        use_bitmap_index = 0;
4177
0
        continue;
4178
0
      }
4179
0
      die(_("not a rev '%s'"), line);
4180
0
    }
4181
0
    if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME))
4182
0
      die(_("bad revision '%s'"), line);
4183
0
  }
4184
4185
0
  warn_on_object_refname_ambiguity = save_warning;
4186
4187
0
  if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4188
0
    return;
4189
4190
0
  if (use_delta_islands)
4191
0
    load_delta_islands(the_repository, progress);
4192
4193
0
  if (write_bitmap_index)
4194
0
    mark_bitmap_preferred_tips();
4195
4196
0
  if (prepare_revision_walk(revs))
4197
0
    die(_("revision walk setup failed"));
4198
0
  mark_edges_uninteresting(revs, show_edge, sparse);
4199
4200
0
  if (!fn_show_object)
4201
0
    fn_show_object = show_object;
4202
0
  traverse_commit_list(revs,
4203
0
           show_commit, fn_show_object,
4204
0
           NULL);
4205
4206
0
  if (unpack_unreachable_expiration) {
4207
0
    revs->ignore_missing_links = 1;
4208
0
    if (add_unseen_recent_objects_to_traversal(revs,
4209
0
        unpack_unreachable_expiration, NULL, 0))
4210
0
      die(_("unable to add recent objects"));
4211
0
    if (prepare_revision_walk(revs))
4212
0
      die(_("revision walk setup failed"));
4213
0
    traverse_commit_list(revs, record_recent_commit,
4214
0
             record_recent_object, NULL);
4215
0
  }
4216
4217
0
  if (keep_unreachable)
4218
0
    add_objects_in_unpacked_packs();
4219
0
  if (pack_loose_unreachable)
4220
0
    add_unreachable_loose_objects();
4221
0
  if (unpack_unreachable)
4222
0
    loosen_unused_packed_objects();
4223
4224
0
  oid_array_clear(&recent_objects);
4225
0
}
4226
4227
static void add_extra_kept_packs(const struct string_list *names)
4228
0
{
4229
0
  struct packed_git *p;
4230
4231
0
  if (!names->nr)
4232
0
    return;
4233
4234
0
  for (p = get_all_packs(the_repository); p; p = p->next) {
4235
0
    const char *name = basename(p->pack_name);
4236
0
    int i;
4237
4238
0
    if (!p->pack_local)
4239
0
      continue;
4240
4241
0
    for (i = 0; i < names->nr; i++)
4242
0
      if (!fspathcmp(name, names->items[i].string))
4243
0
        break;
4244
4245
0
    if (i < names->nr) {
4246
0
      p->pack_keep_in_core = 1;
4247
0
      ignore_packed_keep_in_core = 1;
4248
0
      continue;
4249
0
    }
4250
0
  }
4251
0
}
4252
4253
static int option_parse_quiet(const struct option *opt, const char *arg,
4254
            int unset)
4255
0
{
4256
0
  int *val = opt->value;
4257
4258
0
  BUG_ON_OPT_ARG(arg);
4259
4260
0
  if (!unset)
4261
0
    *val = 0;
4262
0
  else if (!*val)
4263
0
    *val = 1;
4264
0
  return 0;
4265
0
}
4266
4267
static int option_parse_index_version(const struct option *opt,
4268
              const char *arg, int unset)
4269
0
{
4270
0
  struct pack_idx_option *popts = opt->value;
4271
0
  char *c;
4272
0
  const char *val = arg;
4273
4274
0
  BUG_ON_OPT_NEG(unset);
4275
4276
0
  popts->version = strtoul(val, &c, 10);
4277
0
  if (popts->version > 2)
4278
0
    die(_("unsupported index version %s"), val);
4279
0
  if (*c == ',' && c[1])
4280
0
    popts->off32_limit = strtoul(c+1, &c, 0);
4281
0
  if (*c || popts->off32_limit & 0x80000000)
4282
0
    die(_("bad index version '%s'"), val);
4283
0
  return 0;
4284
0
}
4285
4286
static int option_parse_unpack_unreachable(const struct option *opt UNUSED,
4287
             const char *arg, int unset)
4288
0
{
4289
0
  if (unset) {
4290
0
    unpack_unreachable = 0;
4291
0
    unpack_unreachable_expiration = 0;
4292
0
  }
4293
0
  else {
4294
0
    unpack_unreachable = 1;
4295
0
    if (arg)
4296
0
      unpack_unreachable_expiration = approxidate(arg);
4297
0
  }
4298
0
  return 0;
4299
0
}
4300
4301
static int option_parse_cruft_expiration(const struct option *opt UNUSED,
4302
           const char *arg, int unset)
4303
0
{
4304
0
  if (unset) {
4305
0
    cruft = 0;
4306
0
    cruft_expiration = 0;
4307
0
  } else {
4308
0
    cruft = 1;
4309
0
    if (arg)
4310
0
      cruft_expiration = approxidate(arg);
4311
0
  }
4312
0
  return 0;
4313
0
}
4314
4315
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
4316
0
{
4317
0
  int use_internal_rev_list = 0;
4318
0
  int shallow = 0;
4319
0
  int all_progress_implied = 0;
4320
0
  struct strvec rp = STRVEC_INIT;
4321
0
  int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
4322
0
  int rev_list_index = 0;
4323
0
  int stdin_packs = 0;
4324
0
  struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
4325
0
  struct list_objects_filter_options filter_options =
4326
0
    LIST_OBJECTS_FILTER_INIT;
4327
4328
0
  struct option pack_objects_options[] = {
4329
0
    OPT_CALLBACK_F('q', "quiet", &progress, NULL,
4330
0
             N_("do not show progress meter"),
4331
0
             PARSE_OPT_NOARG, option_parse_quiet),
4332
0
    OPT_SET_INT(0, "progress", &progress,
4333
0
          N_("show progress meter"), 1),
4334
0
    OPT_SET_INT(0, "all-progress", &progress,
4335
0
          N_("show progress meter during object writing phase"), 2),
4336
0
    OPT_BOOL(0, "all-progress-implied",
4337
0
       &all_progress_implied,
4338
0
       N_("similar to --all-progress when progress meter is shown")),
4339
0
    OPT_CALLBACK_F(0, "index-version", &pack_idx_opts, N_("<version>[,<offset>]"),
4340
0
      N_("write the pack index file in the specified idx format version"),
4341
0
      PARSE_OPT_NONEG, option_parse_index_version),
4342
0
    OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
4343
0
            N_("maximum size of each output pack file")),
4344
0
    OPT_BOOL(0, "local", &local,
4345
0
       N_("ignore borrowed objects from alternate object store")),
4346
0
    OPT_BOOL(0, "incremental", &incremental,
4347
0
       N_("ignore packed objects")),
4348
0
    OPT_INTEGER(0, "window", &window,
4349
0
          N_("limit pack window by objects")),
4350
0
    OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
4351
0
            N_("limit pack window by memory in addition to object limit")),
4352
0
    OPT_INTEGER(0, "depth", &depth,
4353
0
          N_("maximum length of delta chain allowed in the resulting pack")),
4354
0
    OPT_BOOL(0, "reuse-delta", &reuse_delta,
4355
0
       N_("reuse existing deltas")),
4356
0
    OPT_BOOL(0, "reuse-object", &reuse_object,
4357
0
       N_("reuse existing objects")),
4358
0
    OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
4359
0
       N_("use OFS_DELTA objects")),
4360
0
    OPT_INTEGER(0, "threads", &delta_search_threads,
4361
0
          N_("use threads when searching for best delta matches")),
4362
0
    OPT_BOOL(0, "non-empty", &non_empty,
4363
0
       N_("do not create an empty pack output")),
4364
0
    OPT_BOOL(0, "revs", &use_internal_rev_list,
4365
0
       N_("read revision arguments from standard input")),
4366
0
    OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
4367
0
            N_("limit the objects to those that are not yet packed"),
4368
0
            1, PARSE_OPT_NONEG),
4369
0
    OPT_SET_INT_F(0, "all", &rev_list_all,
4370
0
            N_("include objects reachable from any reference"),
4371
0
            1, PARSE_OPT_NONEG),
4372
0
    OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
4373
0
            N_("include objects referred by reflog entries"),
4374
0
            1, PARSE_OPT_NONEG),
4375
0
    OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
4376
0
            N_("include objects referred to by the index"),
4377
0
            1, PARSE_OPT_NONEG),
4378
0
    OPT_BOOL(0, "stdin-packs", &stdin_packs,
4379
0
       N_("read packs from stdin")),
4380
0
    OPT_BOOL(0, "stdout", &pack_to_stdout,
4381
0
       N_("output pack to stdout")),
4382
0
    OPT_BOOL(0, "include-tag", &include_tag,
4383
0
       N_("include tag objects that refer to objects to be packed")),
4384
0
    OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
4385
0
       N_("keep unreachable objects")),
4386
0
    OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
4387
0
       N_("pack loose unreachable objects")),
4388
0
    OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
4389
0
      N_("unpack unreachable objects newer than <time>"),
4390
0
      PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
4391
0
    OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
4392
0
    OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
4393
0
      N_("expire cruft objects older than <time>"),
4394
0
      PARSE_OPT_OPTARG, option_parse_cruft_expiration),
4395
0
    OPT_BOOL(0, "sparse", &sparse,
4396
0
       N_("use the sparse reachability algorithm")),
4397
0
    OPT_BOOL(0, "thin", &thin,
4398
0
       N_("create thin packs")),
4399
0
    OPT_BOOL(0, "shallow", &shallow,
4400
0
       N_("create packs suitable for shallow fetches")),
4401
0
    OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
4402
0
       N_("ignore packs that have companion .keep file")),
4403
0
    OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
4404
0
        N_("ignore this pack")),
4405
0
    OPT_INTEGER(0, "compression", &pack_compression_level,
4406
0
          N_("pack compression level")),
4407
0
    OPT_BOOL(0, "keep-true-parents", &grafts_keep_true_parents,
4408
0
       N_("do not hide commits by grafts")),
4409
0
    OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
4410
0
       N_("use a bitmap index if available to speed up counting objects")),
4411
0
    OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
4412
0
          N_("write a bitmap index together with the pack index"),
4413
0
          WRITE_BITMAP_TRUE),
4414
0
    OPT_SET_INT_F(0, "write-bitmap-index-quiet",
4415
0
            &write_bitmap_index,
4416
0
            N_("write a bitmap index if possible"),
4417
0
            WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
4418
0
    OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
4419
0
    OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
4420
0
      N_("handling for missing objects"), PARSE_OPT_NONEG,
4421
0
      option_parse_missing_action),
4422
0
    OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
4423
0
       N_("do not pack objects in promisor packfiles")),
4424
0
    OPT_BOOL(0, "delta-islands", &use_delta_islands,
4425
0
       N_("respect islands during delta compression")),
4426
0
    OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
4427
0
        N_("protocol"),
4428
0
        N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4429
0
    OPT_END(),
4430
0
  };
4431
4432
0
  if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4433
0
    BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4434
4435
0
  disable_replace_refs();
4436
4437
0
  sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
4438
0
  if (the_repository->gitdir) {
4439
0
    prepare_repo_settings(the_repository);
4440
0
    if (sparse < 0)
4441
0
      sparse = the_repository->settings.pack_use_sparse;
4442
0
    if (the_repository->settings.pack_use_multi_pack_reuse)
4443
0
      allow_pack_reuse = MULTI_PACK_REUSE;
4444
0
  }
4445
4446
0
  reset_pack_idx_option(&pack_idx_opts);
4447
0
  pack_idx_opts.flags |= WRITE_REV;
4448
0
  git_config(git_pack_config, NULL);
4449
0
  if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0))
4450
0
    pack_idx_opts.flags &= ~WRITE_REV;
4451
4452
0
  progress = isatty(2);
4453
0
  argc = parse_options(argc, argv, prefix, pack_objects_options,
4454
0
           pack_usage, 0);
4455
4456
0
  if (argc) {
4457
0
    base_name = argv[0];
4458
0
    argc--;
4459
0
  }
4460
0
  if (pack_to_stdout != !base_name || argc)
4461
0
    usage_with_options(pack_usage, pack_objects_options);
4462
4463
0
  if (depth < 0)
4464
0
    depth = 0;
4465
0
  if (depth >= (1 << OE_DEPTH_BITS)) {
4466
0
    warning(_("delta chain depth %d is too deep, forcing %d"),
4467
0
      depth, (1 << OE_DEPTH_BITS) - 1);
4468
0
    depth = (1 << OE_DEPTH_BITS) - 1;
4469
0
  }
4470
0
  if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
4471
0
    warning(_("pack.deltaCacheLimit is too high, forcing %d"),
4472
0
      (1U << OE_Z_DELTA_BITS) - 1);
4473
0
    cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
4474
0
  }
4475
0
  if (window < 0)
4476
0
    window = 0;
4477
4478
0
  strvec_push(&rp, "pack-objects");
4479
0
  if (thin) {
4480
0
    use_internal_rev_list = 1;
4481
0
    strvec_push(&rp, shallow
4482
0
        ? "--objects-edge-aggressive"
4483
0
        : "--objects-edge");
4484
0
  } else
4485
0
    strvec_push(&rp, "--objects");
4486
4487
0
  if (rev_list_all) {
4488
0
    use_internal_rev_list = 1;
4489
0
    strvec_push(&rp, "--all");
4490
0
  }
4491
0
  if (rev_list_reflog) {
4492
0
    use_internal_rev_list = 1;
4493
0
    strvec_push(&rp, "--reflog");
4494
0
  }
4495
0
  if (rev_list_index) {
4496
0
    use_internal_rev_list = 1;
4497
0
    strvec_push(&rp, "--indexed-objects");
4498
0
  }
4499
0
  if (rev_list_unpacked && !stdin_packs) {
4500
0
    use_internal_rev_list = 1;
4501
0
    strvec_push(&rp, "--unpacked");
4502
0
  }
4503
4504
0
  if (exclude_promisor_objects) {
4505
0
    use_internal_rev_list = 1;
4506
0
    fetch_if_missing = 0;
4507
0
    strvec_push(&rp, "--exclude-promisor-objects");
4508
0
  }
4509
0
  if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
4510
0
    use_internal_rev_list = 1;
4511
4512
0
  if (!reuse_object)
4513
0
    reuse_delta = 0;
4514
0
  if (pack_compression_level == -1)
4515
0
    pack_compression_level = Z_DEFAULT_COMPRESSION;
4516
0
  else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
4517
0
    die(_("bad pack compression level %d"), pack_compression_level);
4518
4519
0
  if (!delta_search_threads) /* --threads=0 means autodetect */
4520
0
    delta_search_threads = online_cpus();
4521
4522
0
  if (!HAVE_THREADS && delta_search_threads != 1)
4523
0
    warning(_("no threads support, ignoring --threads"));
4524
0
  if (!pack_to_stdout && !pack_size_limit)
4525
0
    pack_size_limit = pack_size_limit_cfg;
4526
0
  if (pack_to_stdout && pack_size_limit)
4527
0
    die(_("--max-pack-size cannot be used to build a pack for transfer"));
4528
0
  if (pack_size_limit && pack_size_limit < 1024*1024) {
4529
0
    warning(_("minimum pack size limit is 1 MiB"));
4530
0
    pack_size_limit = 1024*1024;
4531
0
  }
4532
4533
0
  if (!pack_to_stdout && thin)
4534
0
    die(_("--thin cannot be used to build an indexable pack"));
4535
4536
0
  if (keep_unreachable && unpack_unreachable)
4537
0
    die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
4538
0
  if (!rev_list_all || !rev_list_reflog || !rev_list_index)
4539
0
    unpack_unreachable_expiration = 0;
4540
4541
0
  if (stdin_packs && filter_options.choice)
4542
0
    die(_("cannot use --filter with --stdin-packs"));
4543
4544
0
  if (stdin_packs && use_internal_rev_list)
4545
0
    die(_("cannot use internal rev list with --stdin-packs"));
4546
4547
0
  if (cruft) {
4548
0
    if (use_internal_rev_list)
4549
0
      die(_("cannot use internal rev list with --cruft"));
4550
0
    if (stdin_packs)
4551
0
      die(_("cannot use --stdin-packs with --cruft"));
4552
0
  }
4553
4554
  /*
4555
   * "soft" reasons not to use bitmaps - for on-disk repack by default we want
4556
   *
4557
   * - to produce good pack (with bitmap index not-yet-packed objects are
4558
   *   packed in suboptimal order).
4559
   *
4560
   * - to use more robust pack-generation codepath (avoiding possible
4561
   *   bugs in bitmap code and possible bitmap index corruption).
4562
   */
4563
0
  if (!pack_to_stdout)
4564
0
    use_bitmap_index_default = 0;
4565
4566
0
  if (use_bitmap_index < 0)
4567
0
    use_bitmap_index = use_bitmap_index_default;
4568
4569
  /* "hard" reasons not to use bitmaps; these just won't work at all */
4570
0
  if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
4571
0
    use_bitmap_index = 0;
4572
4573
0
  if (pack_to_stdout || !rev_list_all)
4574
0
    write_bitmap_index = 0;
4575
4576
0
  if (use_delta_islands)
4577
0
    strvec_push(&rp, "--topo-order");
4578
4579
0
  if (progress && all_progress_implied)
4580
0
    progress = 2;
4581
4582
0
  add_extra_kept_packs(&keep_pack_list);
4583
0
  if (ignore_packed_keep_on_disk) {
4584
0
    struct packed_git *p;
4585
0
    for (p = get_all_packs(the_repository); p; p = p->next)
4586
0
      if (p->pack_local && p->pack_keep)
4587
0
        break;
4588
0
    if (!p) /* no keep-able packs found */
4589
0
      ignore_packed_keep_on_disk = 0;
4590
0
  }
4591
0
  if (local) {
4592
    /*
4593
     * unlike ignore_packed_keep_on_disk above, we do not
4594
     * want to unset "local" based on looking at packs, as
4595
     * it also covers non-local objects
4596
     */
4597
0
    struct packed_git *p;
4598
0
    for (p = get_all_packs(the_repository); p; p = p->next) {
4599
0
      if (!p->pack_local) {
4600
0
        have_non_local_packs = 1;
4601
0
        break;
4602
0
      }
4603
0
    }
4604
0
  }
4605
4606
0
  trace2_region_enter("pack-objects", "enumerate-objects",
4607
0
          the_repository);
4608
0
  prepare_packing_data(the_repository, &to_pack);
4609
4610
0
  if (progress && !cruft)
4611
0
    progress_state = start_progress(_("Enumerating objects"), 0);
4612
0
  if (stdin_packs) {
4613
    /* avoids adding objects in excluded packs */
4614
0
    ignore_packed_keep_in_core = 1;
4615
0
    read_packs_list_from_stdin();
4616
0
    if (rev_list_unpacked)
4617
0
      add_unreachable_loose_objects();
4618
0
  } else if (cruft) {
4619
0
    read_cruft_objects();
4620
0
  } else if (!use_internal_rev_list) {
4621
0
    read_object_list_from_stdin();
4622
0
  } else {
4623
0
    struct rev_info revs;
4624
4625
0
    repo_init_revisions(the_repository, &revs, NULL);
4626
0
    list_objects_filter_copy(&revs.filter, &filter_options);
4627
0
    get_object_list(&revs, rp.nr, rp.v);
4628
0
    release_revisions(&revs);
4629
0
  }
4630
0
  cleanup_preferred_base();
4631
0
  if (include_tag && nr_result)
4632
0
    refs_for_each_tag_ref(get_main_ref_store(the_repository),
4633
0
              add_ref_tag, NULL);
4634
0
  stop_progress(&progress_state);
4635
0
  trace2_region_leave("pack-objects", "enumerate-objects",
4636
0
          the_repository);
4637
4638
0
  if (non_empty && !nr_result)
4639
0
    goto cleanup;
4640
0
  if (nr_result) {
4641
0
    trace2_region_enter("pack-objects", "prepare-pack",
4642
0
            the_repository);
4643
0
    prepare_pack(window, depth);
4644
0
    trace2_region_leave("pack-objects", "prepare-pack",
4645
0
            the_repository);
4646
0
  }
4647
4648
0
  trace2_region_enter("pack-objects", "write-pack-file", the_repository);
4649
0
  write_excluded_by_configs();
4650
0
  write_pack_file();
4651
0
  trace2_region_leave("pack-objects", "write-pack-file", the_repository);
4652
4653
0
  if (progress)
4654
0
    fprintf_ln(stderr,
4655
0
         _("Total %"PRIu32" (delta %"PRIu32"),"
4656
0
           " reused %"PRIu32" (delta %"PRIu32"),"
4657
0
           " pack-reused %"PRIu32" (from %"PRIuMAX")"),
4658
0
         written, written_delta, reused, reused_delta,
4659
0
         reuse_packfile_objects,
4660
0
         (uintmax_t)reuse_packfiles_used_nr);
4661
4662
0
  trace2_data_intmax("pack-objects", the_repository, "written", written);
4663
0
  trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta);
4664
0
  trace2_data_intmax("pack-objects", the_repository, "reused", reused);
4665
0
  trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta);
4666
0
  trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects);
4667
0
  trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr);
4668
4669
0
cleanup:
4670
0
  clear_packing_data(&to_pack);
4671
0
  list_objects_filter_release(&filter_options);
4672
0
  strvec_clear(&rp);
4673
4674
0
  return 0;
4675
0
}