Coverage Report

Created: 2023-11-19 07:08

/src/git/midx.c
Line
Count
Source (jump to first uncovered line)
1
#include "git-compat-util.h"
2
#include "abspath.h"
3
#include "config.h"
4
#include "csum-file.h"
5
#include "dir.h"
6
#include "gettext.h"
7
#include "hex.h"
8
#include "lockfile.h"
9
#include "packfile.h"
10
#include "object-file.h"
11
#include "object-store-ll.h"
12
#include "hash-lookup.h"
13
#include "midx.h"
14
#include "progress.h"
15
#include "trace2.h"
16
#include "run-command.h"
17
#include "repository.h"
18
#include "chunk-format.h"
19
#include "pack.h"
20
#include "pack-bitmap.h"
21
#include "refs.h"
22
#include "revision.h"
23
#include "list-objects.h"
24
25
0
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
26
0
#define MIDX_VERSION 1
27
0
#define MIDX_BYTE_FILE_VERSION 4
28
0
#define MIDX_BYTE_HASH_VERSION 5
29
0
#define MIDX_BYTE_NUM_CHUNKS 6
30
0
#define MIDX_BYTE_NUM_PACKS 8
31
0
#define MIDX_HEADER_SIZE 12
32
0
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
33
34
0
#define MIDX_CHUNK_ALIGNMENT 4
35
0
#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
36
0
#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
37
0
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
38
0
#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
39
0
#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
40
0
#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
41
0
#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
42
0
#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
43
0
#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
44
0
#define MIDX_LARGE_OFFSET_NEEDED 0x80000000
45
46
0
#define PACK_EXPIRED UINT_MAX
47
48
const unsigned char *get_midx_checksum(struct multi_pack_index *m)
49
0
{
50
0
  return m->data + m->data_len - the_hash_algo->rawsz;
51
0
}
52
53
void get_midx_filename(struct strbuf *out, const char *object_dir)
54
1.22k
{
55
1.22k
  strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
56
1.22k
}
57
58
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m)
59
0
{
60
0
  get_midx_filename(out, m->object_dir);
61
0
  strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m)));
62
0
}
63
64
static int midx_read_oid_fanout(const unsigned char *chunk_start,
65
        size_t chunk_size, void *data)
66
0
{
67
0
  int i;
68
0
  struct multi_pack_index *m = data;
69
0
  m->chunk_oid_fanout = (uint32_t *)chunk_start;
70
71
0
  if (chunk_size != 4 * 256) {
72
0
    error(_("multi-pack-index OID fanout is of the wrong size"));
73
0
    return 1;
74
0
  }
75
0
  for (i = 0; i < 255; i++) {
76
0
    uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
77
0
    uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
78
79
0
    if (oid_fanout1 > oid_fanout2) {
80
0
      error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
81
0
            i, oid_fanout1, oid_fanout2, i + 1);
82
0
      return 1;
83
0
    }
84
0
  }
85
0
  m->num_objects = ntohl(m->chunk_oid_fanout[255]);
86
0
  return 0;
87
0
}
88
89
static int midx_read_oid_lookup(const unsigned char *chunk_start,
90
        size_t chunk_size, void *data)
91
0
{
92
0
  struct multi_pack_index *m = data;
93
0
  m->chunk_oid_lookup = chunk_start;
94
95
0
  if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
96
0
    error(_("multi-pack-index OID lookup chunk is the wrong size"));
97
0
    return 1;
98
0
  }
99
0
  return 0;
100
0
}
101
102
static int midx_read_object_offsets(const unsigned char *chunk_start,
103
            size_t chunk_size, void *data)
104
0
{
105
0
  struct multi_pack_index *m = data;
106
0
  m->chunk_object_offsets = chunk_start;
107
108
0
  if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
109
0
    error(_("multi-pack-index object offset chunk is the wrong size"));
110
0
    return 1;
111
0
  }
112
0
  return 0;
113
0
}
114
115
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
116
1.22k
{
117
1.22k
  struct multi_pack_index *m = NULL;
118
1.22k
  int fd;
119
1.22k
  struct stat st;
120
1.22k
  size_t midx_size;
121
1.22k
  void *midx_map = NULL;
122
1.22k
  uint32_t hash_version;
123
1.22k
  struct strbuf midx_name = STRBUF_INIT;
124
1.22k
  uint32_t i;
125
1.22k
  const char *cur_pack_name;
126
1.22k
  struct chunkfile *cf = NULL;
127
128
1.22k
  get_midx_filename(&midx_name, object_dir);
129
130
1.22k
  fd = git_open(midx_name.buf);
131
132
1.22k
  if (fd < 0)
133
1.22k
    goto cleanup_fail;
134
0
  if (fstat(fd, &st)) {
135
0
    error_errno(_("failed to read %s"), midx_name.buf);
136
0
    goto cleanup_fail;
137
0
  }
138
139
0
  midx_size = xsize_t(st.st_size);
140
141
0
  if (midx_size < MIDX_MIN_SIZE) {
142
0
    error(_("multi-pack-index file %s is too small"), midx_name.buf);
143
0
    goto cleanup_fail;
144
0
  }
145
146
0
  strbuf_release(&midx_name);
147
148
0
  midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
149
0
  close(fd);
150
151
0
  FLEX_ALLOC_STR(m, object_dir, object_dir);
152
0
  m->data = midx_map;
153
0
  m->data_len = midx_size;
154
0
  m->local = local;
155
156
0
  m->signature = get_be32(m->data);
157
0
  if (m->signature != MIDX_SIGNATURE)
158
0
    die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
159
0
          m->signature, MIDX_SIGNATURE);
160
161
0
  m->version = m->data[MIDX_BYTE_FILE_VERSION];
162
0
  if (m->version != MIDX_VERSION)
163
0
    die(_("multi-pack-index version %d not recognized"),
164
0
          m->version);
165
166
0
  hash_version = m->data[MIDX_BYTE_HASH_VERSION];
167
0
  if (hash_version != oid_version(the_hash_algo)) {
168
0
    error(_("multi-pack-index hash version %u does not match version %u"),
169
0
          hash_version, oid_version(the_hash_algo));
170
0
    goto cleanup_fail;
171
0
  }
172
0
  m->hash_len = the_hash_algo->rawsz;
173
174
0
  m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
175
176
0
  m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
177
178
0
  cf = init_chunkfile(NULL);
179
180
0
  if (read_table_of_contents(cf, m->data, midx_size,
181
0
           MIDX_HEADER_SIZE, m->num_chunks,
182
0
           MIDX_CHUNK_ALIGNMENT))
183
0
    goto cleanup_fail;
184
185
0
  if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
186
0
    die(_("multi-pack-index required pack-name chunk missing or corrupted"));
187
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
188
0
    die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
189
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
190
0
    die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
191
0
  if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
192
0
    die(_("multi-pack-index required object offsets chunk missing or corrupted"));
193
194
0
  pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
195
0
       &m->chunk_large_offsets_len);
196
197
0
  if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
198
0
    pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
199
0
         &m->chunk_revindex_len);
200
201
0
  CALLOC_ARRAY(m->pack_names, m->num_packs);
202
0
  CALLOC_ARRAY(m->packs, m->num_packs);
203
204
0
  cur_pack_name = (const char *)m->chunk_pack_names;
205
0
  for (i = 0; i < m->num_packs; i++) {
206
0
    const char *end;
207
0
    size_t avail = m->chunk_pack_names_len -
208
0
        (cur_pack_name - (const char *)m->chunk_pack_names);
209
210
0
    m->pack_names[i] = cur_pack_name;
211
212
0
    end = memchr(cur_pack_name, '\0', avail);
213
0
    if (!end)
214
0
      die(_("multi-pack-index pack-name chunk is too short"));
215
0
    cur_pack_name = end + 1;
216
217
0
    if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
218
0
      die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
219
0
            m->pack_names[i - 1],
220
0
            m->pack_names[i]);
221
0
  }
222
223
0
  trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
224
0
  trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
225
226
0
  free_chunkfile(cf);
227
0
  return m;
228
229
1.22k
cleanup_fail:
230
1.22k
  free(m);
231
1.22k
  strbuf_release(&midx_name);
232
1.22k
  free_chunkfile(cf);
233
1.22k
  if (midx_map)
234
0
    munmap(midx_map, midx_size);
235
1.22k
  if (0 <= fd)
236
0
    close(fd);
237
1.22k
  return NULL;
238
0
}
239
240
void close_midx(struct multi_pack_index *m)
241
0
{
242
0
  uint32_t i;
243
244
0
  if (!m)
245
0
    return;
246
247
0
  close_midx(m->next);
248
249
0
  munmap((unsigned char *)m->data, m->data_len);
250
251
0
  for (i = 0; i < m->num_packs; i++) {
252
0
    if (m->packs[i])
253
0
      m->packs[i]->multi_pack_index = 0;
254
0
  }
255
0
  FREE_AND_NULL(m->packs);
256
0
  FREE_AND_NULL(m->pack_names);
257
0
  free(m);
258
0
}
259
260
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id)
261
0
{
262
0
  struct strbuf pack_name = STRBUF_INIT;
263
0
  struct packed_git *p;
264
265
0
  if (pack_int_id >= m->num_packs)
266
0
    die(_("bad pack-int-id: %u (%u total packs)"),
267
0
        pack_int_id, m->num_packs);
268
269
0
  if (m->packs[pack_int_id])
270
0
    return 0;
271
272
0
  strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
273
0
        m->pack_names[pack_int_id]);
274
275
0
  p = add_packed_git(pack_name.buf, pack_name.len, m->local);
276
0
  strbuf_release(&pack_name);
277
278
0
  if (!p)
279
0
    return 1;
280
281
0
  p->multi_pack_index = 1;
282
0
  m->packs[pack_int_id] = p;
283
0
  install_packed_git(r, p);
284
0
  list_add_tail(&p->mru, &r->objects->packed_git_mru);
285
286
0
  return 0;
287
0
}
288
289
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
290
0
{
291
0
  return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
292
0
          the_hash_algo->rawsz, result);
293
0
}
294
295
struct object_id *nth_midxed_object_oid(struct object_id *oid,
296
          struct multi_pack_index *m,
297
          uint32_t n)
298
0
{
299
0
  if (n >= m->num_objects)
300
0
    return NULL;
301
302
0
  oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n));
303
0
  return oid;
304
0
}
305
306
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
307
0
{
308
0
  const unsigned char *offset_data;
309
0
  uint32_t offset32;
310
311
0
  offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
312
0
  offset32 = get_be32(offset_data + sizeof(uint32_t));
313
314
0
  if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
315
0
    if (sizeof(off_t) < sizeof(uint64_t))
316
0
      die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
317
318
0
    offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
319
0
    if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
320
0
      die(_("multi-pack-index large offset out of bounds"));
321
0
    return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
322
0
  }
323
324
0
  return offset32;
325
0
}
326
327
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
328
0
{
329
0
  return get_be32(m->chunk_object_offsets +
330
0
      (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
331
0
}
332
333
int fill_midx_entry(struct repository *r,
334
        const struct object_id *oid,
335
        struct pack_entry *e,
336
        struct multi_pack_index *m)
337
0
{
338
0
  uint32_t pos;
339
0
  uint32_t pack_int_id;
340
0
  struct packed_git *p;
341
342
0
  if (!bsearch_midx(oid, m, &pos))
343
0
    return 0;
344
345
0
  if (pos >= m->num_objects)
346
0
    return 0;
347
348
0
  pack_int_id = nth_midxed_pack_int_id(m, pos);
349
350
0
  if (prepare_midx_pack(r, m, pack_int_id))
351
0
    return 0;
352
0
  p = m->packs[pack_int_id];
353
354
  /*
355
  * We are about to tell the caller where they can locate the
356
  * requested object.  We better make sure the packfile is
357
  * still here and can be accessed before supplying that
358
  * answer, as it may have been deleted since the MIDX was
359
  * loaded!
360
  */
361
0
  if (!is_pack_valid(p))
362
0
    return 0;
363
364
0
  if (oidset_size(&p->bad_objects) &&
365
0
      oidset_contains(&p->bad_objects, oid))
366
0
    return 0;
367
368
0
  e->offset = nth_midxed_offset(m, pos);
369
0
  e->p = p;
370
371
0
  return 1;
372
0
}
373
374
/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
375
static int cmp_idx_or_pack_name(const char *idx_or_pack_name,
376
        const char *idx_name)
377
0
{
378
  /* Skip past any initial matching prefix. */
379
0
  while (*idx_name && *idx_name == *idx_or_pack_name) {
380
0
    idx_name++;
381
0
    idx_or_pack_name++;
382
0
  }
383
384
  /*
385
   * If we didn't match completely, we may have matched "pack-1234." and
386
   * be left with "idx" and "pack" respectively, which is also OK. We do
387
   * not have to check for "idx" and "idx", because that would have been
388
   * a complete match (and in that case these strcmps will be false, but
389
   * we'll correctly return 0 from the final strcmp() below.
390
   *
391
   * Technically this matches "fooidx" and "foopack", but we'd never have
392
   * such names in the first place.
393
   */
394
0
  if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
395
0
    return 0;
396
397
  /*
398
   * This not only checks for a complete match, but also orders based on
399
   * the first non-identical character, which means our ordering will
400
   * match a raw strcmp(). That makes it OK to use this to binary search
401
   * a naively-sorted list.
402
   */
403
0
  return strcmp(idx_or_pack_name, idx_name);
404
0
}
405
406
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
407
0
{
408
0
  uint32_t first = 0, last = m->num_packs;
409
410
0
  while (first < last) {
411
0
    uint32_t mid = first + (last - first) / 2;
412
0
    const char *current;
413
0
    int cmp;
414
415
0
    current = m->pack_names[mid];
416
0
    cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
417
0
    if (!cmp)
418
0
      return 1;
419
0
    if (cmp > 0) {
420
0
      first = mid + 1;
421
0
      continue;
422
0
    }
423
0
    last = mid;
424
0
  }
425
426
0
  return 0;
427
0
}
428
429
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
430
1.22k
{
431
1.22k
  struct multi_pack_index *m;
432
1.22k
  struct multi_pack_index *m_search;
433
434
1.22k
  prepare_repo_settings(r);
435
1.22k
  if (!r->settings.core_multi_pack_index)
436
0
    return 0;
437
438
1.22k
  for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
439
0
    if (!strcmp(object_dir, m_search->object_dir))
440
0
      return 1;
441
442
1.22k
  m = load_multi_pack_index(object_dir, local);
443
444
1.22k
  if (m) {
445
0
    struct multi_pack_index *mp = r->objects->multi_pack_index;
446
0
    if (mp) {
447
0
      m->next = mp->next;
448
0
      mp->next = m;
449
0
    } else
450
0
      r->objects->multi_pack_index = m;
451
0
    return 1;
452
0
  }
453
454
1.22k
  return 0;
455
1.22k
}
456
457
static size_t write_midx_header(struct hashfile *f,
458
        unsigned char num_chunks,
459
        uint32_t num_packs)
460
0
{
461
0
  hashwrite_be32(f, MIDX_SIGNATURE);
462
0
  hashwrite_u8(f, MIDX_VERSION);
463
0
  hashwrite_u8(f, oid_version(the_hash_algo));
464
0
  hashwrite_u8(f, num_chunks);
465
0
  hashwrite_u8(f, 0); /* unused */
466
0
  hashwrite_be32(f, num_packs);
467
468
0
  return MIDX_HEADER_SIZE;
469
0
}
470
471
struct pack_info {
472
  uint32_t orig_pack_int_id;
473
  char *pack_name;
474
  struct packed_git *p;
475
  unsigned expired : 1;
476
};
477
478
static int pack_info_compare(const void *_a, const void *_b)
479
0
{
480
0
  struct pack_info *a = (struct pack_info *)_a;
481
0
  struct pack_info *b = (struct pack_info *)_b;
482
0
  return strcmp(a->pack_name, b->pack_name);
483
0
}
484
485
static int idx_or_pack_name_cmp(const void *_va, const void *_vb)
486
0
{
487
0
  const char *pack_name = _va;
488
0
  const struct pack_info *compar = _vb;
489
490
0
  return cmp_idx_or_pack_name(pack_name, compar->pack_name);
491
0
}
492
493
struct write_midx_context {
494
  struct pack_info *info;
495
  size_t nr;
496
  size_t alloc;
497
  struct multi_pack_index *m;
498
  struct progress *progress;
499
  unsigned pack_paths_checked;
500
501
  struct pack_midx_entry *entries;
502
  size_t entries_nr;
503
504
  uint32_t *pack_perm;
505
  uint32_t *pack_order;
506
  unsigned large_offsets_needed:1;
507
  uint32_t num_large_offsets;
508
509
  int preferred_pack_idx;
510
511
  struct string_list *to_include;
512
};
513
514
static void add_pack_to_midx(const char *full_path, size_t full_path_len,
515
           const char *file_name, void *data)
516
0
{
517
0
  struct write_midx_context *ctx = data;
518
519
0
  if (ends_with(file_name, ".idx")) {
520
0
    display_progress(ctx->progress, ++ctx->pack_paths_checked);
521
    /*
522
     * Note that at most one of ctx->m and ctx->to_include are set,
523
     * so we are testing midx_contains_pack() and
524
     * string_list_has_string() independently (guarded by the
525
     * appropriate NULL checks).
526
     *
527
     * We could support passing to_include while reusing an existing
528
     * MIDX, but don't currently since the reuse process drags
529
     * forward all packs from an existing MIDX (without checking
530
     * whether or not they appear in the to_include list).
531
     *
532
     * If we added support for that, these next two conditional
533
     * should be performed independently (likely checking
534
     * to_include before the existing MIDX).
535
     */
536
0
    if (ctx->m && midx_contains_pack(ctx->m, file_name))
537
0
      return;
538
0
    else if (ctx->to_include &&
539
0
       !string_list_has_string(ctx->to_include, file_name))
540
0
      return;
541
542
0
    ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
543
544
0
    ctx->info[ctx->nr].p = add_packed_git(full_path,
545
0
                  full_path_len,
546
0
                  0);
547
548
0
    if (!ctx->info[ctx->nr].p) {
549
0
      warning(_("failed to add packfile '%s'"),
550
0
        full_path);
551
0
      return;
552
0
    }
553
554
0
    if (open_pack_index(ctx->info[ctx->nr].p)) {
555
0
      warning(_("failed to open pack-index '%s'"),
556
0
        full_path);
557
0
      close_pack(ctx->info[ctx->nr].p);
558
0
      FREE_AND_NULL(ctx->info[ctx->nr].p);
559
0
      return;
560
0
    }
561
562
0
    ctx->info[ctx->nr].pack_name = xstrdup(file_name);
563
0
    ctx->info[ctx->nr].orig_pack_int_id = ctx->nr;
564
0
    ctx->info[ctx->nr].expired = 0;
565
0
    ctx->nr++;
566
0
  }
567
0
}
568
569
struct pack_midx_entry {
570
  struct object_id oid;
571
  uint32_t pack_int_id;
572
  time_t pack_mtime;
573
  uint64_t offset;
574
  unsigned preferred : 1;
575
};
576
577
static int midx_oid_compare(const void *_a, const void *_b)
578
0
{
579
0
  const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
580
0
  const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
581
0
  int cmp = oidcmp(&a->oid, &b->oid);
582
583
0
  if (cmp)
584
0
    return cmp;
585
586
  /* Sort objects in a preferred pack first when multiple copies exist. */
587
0
  if (a->preferred > b->preferred)
588
0
    return -1;
589
0
  if (a->preferred < b->preferred)
590
0
    return 1;
591
592
0
  if (a->pack_mtime > b->pack_mtime)
593
0
    return -1;
594
0
  else if (a->pack_mtime < b->pack_mtime)
595
0
    return 1;
596
597
0
  return a->pack_int_id - b->pack_int_id;
598
0
}
599
600
static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
601
              struct pack_midx_entry *e,
602
              uint32_t pos)
603
0
{
604
0
  if (pos >= m->num_objects)
605
0
    return 1;
606
607
0
  nth_midxed_object_oid(&e->oid, m, pos);
608
0
  e->pack_int_id = nth_midxed_pack_int_id(m, pos);
609
0
  e->offset = nth_midxed_offset(m, pos);
610
611
  /* consider objects in midx to be from "old" packs */
612
0
  e->pack_mtime = 0;
613
0
  return 0;
614
0
}
615
616
static void fill_pack_entry(uint32_t pack_int_id,
617
          struct packed_git *p,
618
          uint32_t cur_object,
619
          struct pack_midx_entry *entry,
620
          int preferred)
621
0
{
622
0
  if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
623
0
    die(_("failed to locate object %d in packfile"), cur_object);
624
625
0
  entry->pack_int_id = pack_int_id;
626
0
  entry->pack_mtime = p->mtime;
627
628
0
  entry->offset = nth_packed_object_offset(p, cur_object);
629
0
  entry->preferred = !!preferred;
630
0
}
631
632
struct midx_fanout {
633
  struct pack_midx_entry *entries;
634
  size_t nr, alloc;
635
};
636
637
static void midx_fanout_grow(struct midx_fanout *fanout, size_t nr)
638
0
{
639
0
  if (nr < fanout->nr)
640
0
    BUG("negative growth in midx_fanout_grow() (%"PRIuMAX" < %"PRIuMAX")",
641
0
        (uintmax_t)nr, (uintmax_t)fanout->nr);
642
0
  ALLOC_GROW(fanout->entries, nr, fanout->alloc);
643
0
}
644
645
static void midx_fanout_sort(struct midx_fanout *fanout)
646
0
{
647
0
  QSORT(fanout->entries, fanout->nr, midx_oid_compare);
648
0
}
649
650
static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout,
651
          struct multi_pack_index *m,
652
          uint32_t cur_fanout,
653
          int preferred_pack)
654
0
{
655
0
  uint32_t start = 0, end;
656
0
  uint32_t cur_object;
657
658
0
  if (cur_fanout)
659
0
    start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]);
660
0
  end = ntohl(m->chunk_oid_fanout[cur_fanout]);
661
662
0
  for (cur_object = start; cur_object < end; cur_object++) {
663
0
    if ((preferred_pack > -1) &&
664
0
        (preferred_pack == nth_midxed_pack_int_id(m, cur_object))) {
665
      /*
666
       * Objects from preferred packs are added
667
       * separately.
668
       */
669
0
      continue;
670
0
    }
671
672
0
    midx_fanout_grow(fanout, fanout->nr + 1);
673
0
    nth_midxed_pack_midx_entry(m,
674
0
             &fanout->entries[fanout->nr],
675
0
             cur_object);
676
0
    fanout->entries[fanout->nr].preferred = 0;
677
0
    fanout->nr++;
678
0
  }
679
0
}
680
681
static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
682
          struct pack_info *info,
683
          uint32_t cur_pack,
684
          int preferred,
685
          uint32_t cur_fanout)
686
0
{
687
0
  struct packed_git *pack = info[cur_pack].p;
688
0
  uint32_t start = 0, end;
689
0
  uint32_t cur_object;
690
691
0
  if (cur_fanout)
692
0
    start = get_pack_fanout(pack, cur_fanout - 1);
693
0
  end = get_pack_fanout(pack, cur_fanout);
694
695
0
  for (cur_object = start; cur_object < end; cur_object++) {
696
0
    midx_fanout_grow(fanout, fanout->nr + 1);
697
0
    fill_pack_entry(cur_pack,
698
0
        info[cur_pack].p,
699
0
        cur_object,
700
0
        &fanout->entries[fanout->nr],
701
0
        preferred);
702
0
    fanout->nr++;
703
0
  }
704
0
}
705
706
/*
707
 * It is possible to artificially get into a state where there are many
708
 * duplicate copies of objects. That can create high memory pressure if
709
 * we are to create a list of all objects before de-duplication. To reduce
710
 * this memory pressure without a significant performance drop, automatically
711
 * group objects by the first byte of their object id. Use the IDX fanout
712
 * tables to group the data, copy to a local array, then sort.
713
 *
714
 * Copy only the de-duplicated entries (selected by most-recent modified time
715
 * of a packfile containing the object).
716
 */
717
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
718
              struct pack_info *info,
719
              uint32_t nr_packs,
720
              size_t *nr_objects,
721
              int preferred_pack)
722
0
{
723
0
  uint32_t cur_fanout, cur_pack, cur_object;
724
0
  size_t alloc_objects, total_objects = 0;
725
0
  struct midx_fanout fanout = { 0 };
726
0
  struct pack_midx_entry *deduplicated_entries = NULL;
727
0
  uint32_t start_pack = m ? m->num_packs : 0;
728
729
0
  for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
730
0
    total_objects = st_add(total_objects,
731
0
               info[cur_pack].p->num_objects);
732
733
  /*
734
   * As we de-duplicate by fanout value, we expect the fanout
735
   * slices to be evenly distributed, with some noise. Hence,
736
   * allocate slightly more than one 256th.
737
   */
738
0
  alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
739
740
0
  ALLOC_ARRAY(fanout.entries, fanout.alloc);
741
0
  ALLOC_ARRAY(deduplicated_entries, alloc_objects);
742
0
  *nr_objects = 0;
743
744
0
  for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
745
0
    fanout.nr = 0;
746
747
0
    if (m)
748
0
      midx_fanout_add_midx_fanout(&fanout, m, cur_fanout,
749
0
                preferred_pack);
750
751
0
    for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
752
0
      int preferred = cur_pack == preferred_pack;
753
0
      midx_fanout_add_pack_fanout(&fanout,
754
0
                info, cur_pack,
755
0
                preferred, cur_fanout);
756
0
    }
757
758
0
    if (-1 < preferred_pack && preferred_pack < start_pack)
759
0
      midx_fanout_add_pack_fanout(&fanout, info,
760
0
                preferred_pack, 1,
761
0
                cur_fanout);
762
763
0
    midx_fanout_sort(&fanout);
764
765
    /*
766
     * The batch is now sorted by OID and then mtime (descending).
767
     * Take only the first duplicate.
768
     */
769
0
    for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
770
0
      if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
771
0
            &fanout.entries[cur_object].oid))
772
0
        continue;
773
774
0
      ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1),
775
0
           alloc_objects);
776
0
      memcpy(&deduplicated_entries[*nr_objects],
777
0
             &fanout.entries[cur_object],
778
0
             sizeof(struct pack_midx_entry));
779
0
      (*nr_objects)++;
780
0
    }
781
0
  }
782
783
0
  free(fanout.entries);
784
0
  return deduplicated_entries;
785
0
}
786
787
static int write_midx_pack_names(struct hashfile *f, void *data)
788
0
{
789
0
  struct write_midx_context *ctx = data;
790
0
  uint32_t i;
791
0
  unsigned char padding[MIDX_CHUNK_ALIGNMENT];
792
0
  size_t written = 0;
793
794
0
  for (i = 0; i < ctx->nr; i++) {
795
0
    size_t writelen;
796
797
0
    if (ctx->info[i].expired)
798
0
      continue;
799
800
0
    if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0)
801
0
      BUG("incorrect pack-file order: %s before %s",
802
0
          ctx->info[i - 1].pack_name,
803
0
          ctx->info[i].pack_name);
804
805
0
    writelen = strlen(ctx->info[i].pack_name) + 1;
806
0
    hashwrite(f, ctx->info[i].pack_name, writelen);
807
0
    written += writelen;
808
0
  }
809
810
  /* add padding to be aligned */
811
0
  i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
812
0
  if (i < MIDX_CHUNK_ALIGNMENT) {
813
0
    memset(padding, 0, sizeof(padding));
814
0
    hashwrite(f, padding, i);
815
0
  }
816
817
0
  return 0;
818
0
}
819
820
static int write_midx_oid_fanout(struct hashfile *f,
821
         void *data)
822
0
{
823
0
  struct write_midx_context *ctx = data;
824
0
  struct pack_midx_entry *list = ctx->entries;
825
0
  struct pack_midx_entry *last = ctx->entries + ctx->entries_nr;
826
0
  uint32_t count = 0;
827
0
  uint32_t i;
828
829
  /*
830
  * Write the first-level table (the list is sorted,
831
  * but we use a 256-entry lookup to be able to avoid
832
  * having to do eight extra binary search iterations).
833
  */
834
0
  for (i = 0; i < 256; i++) {
835
0
    struct pack_midx_entry *next = list;
836
837
0
    while (next < last && next->oid.hash[0] == i) {
838
0
      count++;
839
0
      next++;
840
0
    }
841
842
0
    hashwrite_be32(f, count);
843
0
    list = next;
844
0
  }
845
846
0
  return 0;
847
0
}
848
849
static int write_midx_oid_lookup(struct hashfile *f,
850
         void *data)
851
0
{
852
0
  struct write_midx_context *ctx = data;
853
0
  unsigned char hash_len = the_hash_algo->rawsz;
854
0
  struct pack_midx_entry *list = ctx->entries;
855
0
  uint32_t i;
856
857
0
  for (i = 0; i < ctx->entries_nr; i++) {
858
0
    struct pack_midx_entry *obj = list++;
859
860
0
    if (i < ctx->entries_nr - 1) {
861
0
      struct pack_midx_entry *next = list;
862
0
      if (oidcmp(&obj->oid, &next->oid) >= 0)
863
0
        BUG("OIDs not in order: %s >= %s",
864
0
            oid_to_hex(&obj->oid),
865
0
            oid_to_hex(&next->oid));
866
0
    }
867
868
0
    hashwrite(f, obj->oid.hash, (int)hash_len);
869
0
  }
870
871
0
  return 0;
872
0
}
873
874
static int write_midx_object_offsets(struct hashfile *f,
875
             void *data)
876
0
{
877
0
  struct write_midx_context *ctx = data;
878
0
  struct pack_midx_entry *list = ctx->entries;
879
0
  uint32_t i, nr_large_offset = 0;
880
881
0
  for (i = 0; i < ctx->entries_nr; i++) {
882
0
    struct pack_midx_entry *obj = list++;
883
884
0
    if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED)
885
0
      BUG("object %s is in an expired pack with int-id %d",
886
0
          oid_to_hex(&obj->oid),
887
0
          obj->pack_int_id);
888
889
0
    hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]);
890
891
0
    if (ctx->large_offsets_needed && obj->offset >> 31)
892
0
      hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
893
0
    else if (!ctx->large_offsets_needed && obj->offset >> 32)
894
0
      BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!",
895
0
          oid_to_hex(&obj->oid),
896
0
          obj->offset);
897
0
    else
898
0
      hashwrite_be32(f, (uint32_t)obj->offset);
899
0
  }
900
901
0
  return 0;
902
0
}
903
904
static int write_midx_large_offsets(struct hashfile *f,
905
            void *data)
906
0
{
907
0
  struct write_midx_context *ctx = data;
908
0
  struct pack_midx_entry *list = ctx->entries;
909
0
  struct pack_midx_entry *end = ctx->entries + ctx->entries_nr;
910
0
  uint32_t nr_large_offset = ctx->num_large_offsets;
911
912
0
  while (nr_large_offset) {
913
0
    struct pack_midx_entry *obj;
914
0
    uint64_t offset;
915
916
0
    if (list >= end)
917
0
      BUG("too many large-offset objects");
918
919
0
    obj = list++;
920
0
    offset = obj->offset;
921
922
0
    if (!(offset >> 31))
923
0
      continue;
924
925
0
    hashwrite_be64(f, offset);
926
927
0
    nr_large_offset--;
928
0
  }
929
930
0
  return 0;
931
0
}
932
933
static int write_midx_revindex(struct hashfile *f,
934
             void *data)
935
0
{
936
0
  struct write_midx_context *ctx = data;
937
0
  uint32_t i;
938
939
0
  for (i = 0; i < ctx->entries_nr; i++)
940
0
    hashwrite_be32(f, ctx->pack_order[i]);
941
942
0
  return 0;
943
0
}
944
945
struct midx_pack_order_data {
946
  uint32_t nr;
947
  uint32_t pack;
948
  off_t offset;
949
};
950
951
static int midx_pack_order_cmp(const void *va, const void *vb)
952
0
{
953
0
  const struct midx_pack_order_data *a = va, *b = vb;
954
0
  if (a->pack < b->pack)
955
0
    return -1;
956
0
  else if (a->pack > b->pack)
957
0
    return 1;
958
0
  else if (a->offset < b->offset)
959
0
    return -1;
960
0
  else if (a->offset > b->offset)
961
0
    return 1;
962
0
  else
963
0
    return 0;
964
0
}
965
966
static uint32_t *midx_pack_order(struct write_midx_context *ctx)
967
0
{
968
0
  struct midx_pack_order_data *data;
969
0
  uint32_t *pack_order;
970
0
  uint32_t i;
971
972
0
  trace2_region_enter("midx", "midx_pack_order", the_repository);
973
974
0
  ALLOC_ARRAY(data, ctx->entries_nr);
975
0
  for (i = 0; i < ctx->entries_nr; i++) {
976
0
    struct pack_midx_entry *e = &ctx->entries[i];
977
0
    data[i].nr = i;
978
0
    data[i].pack = ctx->pack_perm[e->pack_int_id];
979
0
    if (!e->preferred)
980
0
      data[i].pack |= (1U << 31);
981
0
    data[i].offset = e->offset;
982
0
  }
983
984
0
  QSORT(data, ctx->entries_nr, midx_pack_order_cmp);
985
986
0
  ALLOC_ARRAY(pack_order, ctx->entries_nr);
987
0
  for (i = 0; i < ctx->entries_nr; i++)
988
0
    pack_order[i] = data[i].nr;
989
0
  free(data);
990
991
0
  trace2_region_leave("midx", "midx_pack_order", the_repository);
992
993
0
  return pack_order;
994
0
}
995
996
static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
997
             struct write_midx_context *ctx)
998
0
{
999
0
  struct strbuf buf = STRBUF_INIT;
1000
0
  const char *tmp_file;
1001
1002
0
  trace2_region_enter("midx", "write_midx_reverse_index", the_repository);
1003
1004
0
  strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex(midx_hash));
1005
1006
0
  tmp_file = write_rev_file_order(NULL, ctx->pack_order, ctx->entries_nr,
1007
0
          midx_hash, WRITE_REV);
1008
1009
0
  if (finalize_object_file(tmp_file, buf.buf))
1010
0
    die(_("cannot store reverse index file"));
1011
1012
0
  strbuf_release(&buf);
1013
1014
0
  trace2_region_leave("midx", "write_midx_reverse_index", the_repository);
1015
0
}
1016
1017
static void clear_midx_files_ext(const char *object_dir, const char *ext,
1018
         unsigned char *keep_hash);
1019
1020
static int midx_checksum_valid(struct multi_pack_index *m)
1021
0
{
1022
0
  return hashfile_checksum_valid(m->data, m->data_len);
1023
0
}
1024
1025
static void prepare_midx_packing_data(struct packing_data *pdata,
1026
              struct write_midx_context *ctx)
1027
0
{
1028
0
  uint32_t i;
1029
1030
0
  trace2_region_enter("midx", "prepare_midx_packing_data", the_repository);
1031
1032
0
  memset(pdata, 0, sizeof(struct packing_data));
1033
0
  prepare_packing_data(the_repository, pdata);
1034
1035
0
  for (i = 0; i < ctx->entries_nr; i++) {
1036
0
    struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]];
1037
0
    struct object_entry *to = packlist_alloc(pdata, &from->oid);
1038
1039
0
    oe_set_in_pack(pdata, to,
1040
0
             ctx->info[ctx->pack_perm[from->pack_int_id]].p);
1041
0
  }
1042
1043
0
  trace2_region_leave("midx", "prepare_midx_packing_data", the_repository);
1044
0
}
1045
1046
static int add_ref_to_pending(const char *refname,
1047
            const struct object_id *oid,
1048
            int flag, void *cb_data)
1049
0
{
1050
0
  struct rev_info *revs = (struct rev_info*)cb_data;
1051
0
  struct object_id peeled;
1052
0
  struct object *object;
1053
1054
0
  if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) {
1055
0
    warning("symbolic ref is dangling: %s", refname);
1056
0
    return 0;
1057
0
  }
1058
1059
0
  if (!peel_iterated_oid(oid, &peeled))
1060
0
    oid = &peeled;
1061
1062
0
  object = parse_object_or_die(oid, refname);
1063
0
  if (object->type != OBJ_COMMIT)
1064
0
    return 0;
1065
1066
0
  add_pending_object(revs, object, "");
1067
0
  if (bitmap_is_preferred_refname(revs->repo, refname))
1068
0
    object->flags |= NEEDS_BITMAP;
1069
0
  return 0;
1070
0
}
1071
1072
struct bitmap_commit_cb {
1073
  struct commit **commits;
1074
  size_t commits_nr, commits_alloc;
1075
1076
  struct write_midx_context *ctx;
1077
};
1078
1079
static const struct object_id *bitmap_oid_access(size_t index,
1080
             const void *_entries)
1081
0
{
1082
0
  const struct pack_midx_entry *entries = _entries;
1083
0
  return &entries[index].oid;
1084
0
}
1085
1086
static void bitmap_show_commit(struct commit *commit, void *_data)
1087
0
{
1088
0
  struct bitmap_commit_cb *data = _data;
1089
0
  int pos = oid_pos(&commit->object.oid, data->ctx->entries,
1090
0
        data->ctx->entries_nr,
1091
0
        bitmap_oid_access);
1092
0
  if (pos < 0)
1093
0
    return;
1094
1095
0
  ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc);
1096
0
  data->commits[data->commits_nr++] = commit;
1097
0
}
1098
1099
static int read_refs_snapshot(const char *refs_snapshot,
1100
            struct rev_info *revs)
1101
0
{
1102
0
  struct strbuf buf = STRBUF_INIT;
1103
0
  struct object_id oid;
1104
0
  FILE *f = xfopen(refs_snapshot, "r");
1105
1106
0
  while (strbuf_getline(&buf, f) != EOF) {
1107
0
    struct object *object;
1108
0
    int preferred = 0;
1109
0
    char *hex = buf.buf;
1110
0
    const char *end = NULL;
1111
1112
0
    if (buf.len && *buf.buf == '+') {
1113
0
      preferred = 1;
1114
0
      hex = &buf.buf[1];
1115
0
    }
1116
1117
0
    if (parse_oid_hex(hex, &oid, &end) < 0)
1118
0
      die(_("could not parse line: %s"), buf.buf);
1119
0
    if (*end)
1120
0
      die(_("malformed line: %s"), buf.buf);
1121
1122
0
    object = parse_object_or_die(&oid, NULL);
1123
0
    if (preferred)
1124
0
      object->flags |= NEEDS_BITMAP;
1125
1126
0
    add_pending_object(revs, object, "");
1127
0
  }
1128
1129
0
  fclose(f);
1130
0
  strbuf_release(&buf);
1131
0
  return 0;
1132
0
}
1133
1134
static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p,
1135
                const char *refs_snapshot,
1136
                struct write_midx_context *ctx)
1137
0
{
1138
0
  struct rev_info revs;
1139
0
  struct bitmap_commit_cb cb = {0};
1140
1141
0
  trace2_region_enter("midx", "find_commits_for_midx_bitmap",
1142
0
          the_repository);
1143
1144
0
  cb.ctx = ctx;
1145
1146
0
  repo_init_revisions(the_repository, &revs, NULL);
1147
0
  if (refs_snapshot) {
1148
0
    read_refs_snapshot(refs_snapshot, &revs);
1149
0
  } else {
1150
0
    setup_revisions(0, NULL, &revs, NULL);
1151
0
    for_each_ref(add_ref_to_pending, &revs);
1152
0
  }
1153
1154
  /*
1155
   * Skipping promisor objects here is intentional, since it only excludes
1156
   * them from the list of reachable commits that we want to select from
1157
   * when computing the selection of MIDX'd commits to receive bitmaps.
1158
   *
1159
   * Reachability bitmaps do require that their objects be closed under
1160
   * reachability, but fetching any objects missing from promisors at this
1161
   * point is too late. But, if one of those objects can be reached from
1162
   * an another object that is included in the bitmap, then we will
1163
   * complain later that we don't have reachability closure (and fail
1164
   * appropriately).
1165
   */
1166
0
  fetch_if_missing = 0;
1167
0
  revs.exclude_promisor_objects = 1;
1168
1169
0
  if (prepare_revision_walk(&revs))
1170
0
    die(_("revision walk setup failed"));
1171
1172
0
  traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb);
1173
0
  if (indexed_commits_nr_p)
1174
0
    *indexed_commits_nr_p = cb.commits_nr;
1175
1176
0
  release_revisions(&revs);
1177
1178
0
  trace2_region_leave("midx", "find_commits_for_midx_bitmap",
1179
0
          the_repository);
1180
1181
0
  return cb.commits;
1182
0
}
1183
1184
static int write_midx_bitmap(const char *midx_name,
1185
           const unsigned char *midx_hash,
1186
           struct packing_data *pdata,
1187
           struct commit **commits,
1188
           uint32_t commits_nr,
1189
           uint32_t *pack_order,
1190
           unsigned flags)
1191
0
{
1192
0
  int ret, i;
1193
0
  uint16_t options = 0;
1194
0
  struct pack_idx_entry **index;
1195
0
  char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name,
1196
0
          hash_to_hex(midx_hash));
1197
1198
0
  trace2_region_enter("midx", "write_midx_bitmap", the_repository);
1199
1200
0
  if (flags & MIDX_WRITE_BITMAP_HASH_CACHE)
1201
0
    options |= BITMAP_OPT_HASH_CACHE;
1202
1203
0
  if (flags & MIDX_WRITE_BITMAP_LOOKUP_TABLE)
1204
0
    options |= BITMAP_OPT_LOOKUP_TABLE;
1205
1206
  /*
1207
   * Build the MIDX-order index based on pdata.objects (which is already
1208
   * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of
1209
   * this order).
1210
   */
1211
0
  ALLOC_ARRAY(index, pdata->nr_objects);
1212
0
  for (i = 0; i < pdata->nr_objects; i++)
1213
0
    index[i] = &pdata->objects[i].idx;
1214
1215
0
  bitmap_writer_show_progress(flags & MIDX_PROGRESS);
1216
0
  bitmap_writer_build_type_index(pdata, index, pdata->nr_objects);
1217
1218
  /*
1219
   * bitmap_writer_finish expects objects in lex order, but pack_order
1220
   * gives us exactly that. use it directly instead of re-sorting the
1221
   * array.
1222
   *
1223
   * This changes the order of objects in 'index' between
1224
   * bitmap_writer_build_type_index and bitmap_writer_finish.
1225
   *
1226
   * The same re-ordering takes place in the single-pack bitmap code via
1227
   * write_idx_file(), which is called by finish_tmp_packfile(), which
1228
   * happens between bitmap_writer_build_type_index() and
1229
   * bitmap_writer_finish().
1230
   */
1231
0
  for (i = 0; i < pdata->nr_objects; i++)
1232
0
    index[pack_order[i]] = &pdata->objects[i].idx;
1233
1234
0
  bitmap_writer_select_commits(commits, commits_nr, -1);
1235
0
  ret = bitmap_writer_build(pdata);
1236
0
  if (ret < 0)
1237
0
    goto cleanup;
1238
1239
0
  bitmap_writer_set_checksum(midx_hash);
1240
0
  bitmap_writer_finish(index, pdata->nr_objects, bitmap_name, options);
1241
1242
0
cleanup:
1243
0
  free(index);
1244
0
  free(bitmap_name);
1245
1246
0
  trace2_region_leave("midx", "write_midx_bitmap", the_repository);
1247
1248
0
  return ret;
1249
0
}
1250
1251
static struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
1252
              const char *object_dir)
1253
0
{
1254
0
  struct multi_pack_index *result = NULL;
1255
0
  struct multi_pack_index *cur;
1256
0
  char *obj_dir_real = real_pathdup(object_dir, 1);
1257
0
  struct strbuf cur_path_real = STRBUF_INIT;
1258
1259
  /* Ensure the given object_dir is local, or a known alternate. */
1260
0
  find_odb(r, obj_dir_real);
1261
1262
0
  for (cur = get_multi_pack_index(r); cur; cur = cur->next) {
1263
0
    strbuf_realpath(&cur_path_real, cur->object_dir, 1);
1264
0
    if (!strcmp(obj_dir_real, cur_path_real.buf)) {
1265
0
      result = cur;
1266
0
      goto cleanup;
1267
0
    }
1268
0
  }
1269
1270
0
cleanup:
1271
0
  free(obj_dir_real);
1272
0
  strbuf_release(&cur_path_real);
1273
0
  return result;
1274
0
}
1275
1276
static int write_midx_internal(const char *object_dir,
1277
             struct string_list *packs_to_include,
1278
             struct string_list *packs_to_drop,
1279
             const char *preferred_pack_name,
1280
             const char *refs_snapshot,
1281
             unsigned flags)
1282
0
{
1283
0
  struct strbuf midx_name = STRBUF_INIT;
1284
0
  unsigned char midx_hash[GIT_MAX_RAWSZ];
1285
0
  uint32_t i;
1286
0
  struct hashfile *f = NULL;
1287
0
  struct lock_file lk;
1288
0
  struct write_midx_context ctx = { 0 };
1289
0
  int pack_name_concat_len = 0;
1290
0
  int dropped_packs = 0;
1291
0
  int result = 0;
1292
0
  struct chunkfile *cf;
1293
1294
0
  trace2_region_enter("midx", "write_midx_internal", the_repository);
1295
1296
0
  get_midx_filename(&midx_name, object_dir);
1297
0
  if (safe_create_leading_directories(midx_name.buf))
1298
0
    die_errno(_("unable to create leading directories of %s"),
1299
0
        midx_name.buf);
1300
1301
0
  if (!packs_to_include) {
1302
    /*
1303
     * Only reference an existing MIDX when not filtering which
1304
     * packs to include, since all packs and objects are copied
1305
     * blindly from an existing MIDX if one is present.
1306
     */
1307
0
    ctx.m = lookup_multi_pack_index(the_repository, object_dir);
1308
0
  }
1309
1310
0
  if (ctx.m && !midx_checksum_valid(ctx.m)) {
1311
0
    warning(_("ignoring existing multi-pack-index; checksum mismatch"));
1312
0
    ctx.m = NULL;
1313
0
  }
1314
1315
0
  ctx.nr = 0;
1316
0
  ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
1317
0
  ctx.info = NULL;
1318
0
  ALLOC_ARRAY(ctx.info, ctx.alloc);
1319
1320
0
  if (ctx.m) {
1321
0
    for (i = 0; i < ctx.m->num_packs; i++) {
1322
0
      ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc);
1323
1324
0
      ctx.info[ctx.nr].orig_pack_int_id = i;
1325
0
      ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]);
1326
0
      ctx.info[ctx.nr].p = ctx.m->packs[i];
1327
0
      ctx.info[ctx.nr].expired = 0;
1328
1329
0
      if (flags & MIDX_WRITE_REV_INDEX) {
1330
        /*
1331
         * If generating a reverse index, need to have
1332
         * packed_git's loaded to compare their
1333
         * mtimes and object count.
1334
         */
1335
0
        if (prepare_midx_pack(the_repository, ctx.m, i)) {
1336
0
          error(_("could not load pack"));
1337
0
          result = 1;
1338
0
          goto cleanup;
1339
0
        }
1340
1341
0
        if (open_pack_index(ctx.m->packs[i]))
1342
0
          die(_("could not open index for %s"),
1343
0
              ctx.m->packs[i]->pack_name);
1344
0
        ctx.info[ctx.nr].p = ctx.m->packs[i];
1345
0
      }
1346
1347
0
      ctx.nr++;
1348
0
    }
1349
0
  }
1350
1351
0
  ctx.pack_paths_checked = 0;
1352
0
  if (flags & MIDX_PROGRESS)
1353
0
    ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
1354
0
  else
1355
0
    ctx.progress = NULL;
1356
1357
0
  ctx.to_include = packs_to_include;
1358
1359
0
  for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
1360
0
  stop_progress(&ctx.progress);
1361
1362
0
  if ((ctx.m && ctx.nr == ctx.m->num_packs) &&
1363
0
      !(packs_to_include || packs_to_drop)) {
1364
0
    struct bitmap_index *bitmap_git;
1365
0
    int bitmap_exists;
1366
0
    int want_bitmap = flags & MIDX_WRITE_BITMAP;
1367
1368
0
    bitmap_git = prepare_midx_bitmap_git(ctx.m);
1369
0
    bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
1370
0
    free_bitmap_index(bitmap_git);
1371
1372
0
    if (bitmap_exists || !want_bitmap) {
1373
      /*
1374
       * The correct MIDX already exists, and so does a
1375
       * corresponding bitmap (or one wasn't requested).
1376
       */
1377
0
      if (!want_bitmap)
1378
0
        clear_midx_files_ext(object_dir, ".bitmap",
1379
0
                 NULL);
1380
0
      goto cleanup;
1381
0
    }
1382
0
  }
1383
1384
0
  if (preferred_pack_name) {
1385
0
    ctx.preferred_pack_idx = -1;
1386
1387
0
    for (i = 0; i < ctx.nr; i++) {
1388
0
      if (!cmp_idx_or_pack_name(preferred_pack_name,
1389
0
              ctx.info[i].pack_name)) {
1390
0
        ctx.preferred_pack_idx = i;
1391
0
        break;
1392
0
      }
1393
0
    }
1394
1395
0
    if (ctx.preferred_pack_idx == -1)
1396
0
      warning(_("unknown preferred pack: '%s'"),
1397
0
        preferred_pack_name);
1398
0
  } else if (ctx.nr &&
1399
0
       (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
1400
0
    struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p;
1401
0
    ctx.preferred_pack_idx = 0;
1402
1403
0
    if (packs_to_drop && packs_to_drop->nr)
1404
0
      BUG("cannot write a MIDX bitmap during expiration");
1405
1406
    /*
1407
     * set a preferred pack when writing a bitmap to ensure that
1408
     * the pack from which the first object is selected in pseudo
1409
     * pack-order has all of its objects selected from that pack
1410
     * (and not another pack containing a duplicate)
1411
     */
1412
0
    for (i = 1; i < ctx.nr; i++) {
1413
0
      struct packed_git *p = ctx.info[i].p;
1414
1415
0
      if (!oldest->num_objects || p->mtime < oldest->mtime) {
1416
0
        oldest = p;
1417
0
        ctx.preferred_pack_idx = i;
1418
0
      }
1419
0
    }
1420
1421
0
    if (!oldest->num_objects) {
1422
      /*
1423
       * If all packs are empty; unset the preferred index.
1424
       * This is acceptable since there will be no duplicate
1425
       * objects to resolve, so the preferred value doesn't
1426
       * matter.
1427
       */
1428
0
      ctx.preferred_pack_idx = -1;
1429
0
    }
1430
0
  } else {
1431
    /*
1432
     * otherwise don't mark any pack as preferred to avoid
1433
     * interfering with expiration logic below
1434
     */
1435
0
    ctx.preferred_pack_idx = -1;
1436
0
  }
1437
1438
0
  if (ctx.preferred_pack_idx > -1) {
1439
0
    struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p;
1440
0
    if (!preferred->num_objects) {
1441
0
      error(_("cannot select preferred pack %s with no objects"),
1442
0
            preferred->pack_name);
1443
0
      result = 1;
1444
0
      goto cleanup;
1445
0
    }
1446
0
  }
1447
1448
0
  ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
1449
0
           ctx.preferred_pack_idx);
1450
1451
0
  ctx.large_offsets_needed = 0;
1452
0
  for (i = 0; i < ctx.entries_nr; i++) {
1453
0
    if (ctx.entries[i].offset > 0x7fffffff)
1454
0
      ctx.num_large_offsets++;
1455
0
    if (ctx.entries[i].offset > 0xffffffff)
1456
0
      ctx.large_offsets_needed = 1;
1457
0
  }
1458
1459
0
  QSORT(ctx.info, ctx.nr, pack_info_compare);
1460
1461
0
  if (packs_to_drop && packs_to_drop->nr) {
1462
0
    int drop_index = 0;
1463
0
    int missing_drops = 0;
1464
1465
0
    for (i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) {
1466
0
      int cmp = strcmp(ctx.info[i].pack_name,
1467
0
           packs_to_drop->items[drop_index].string);
1468
1469
0
      if (!cmp) {
1470
0
        drop_index++;
1471
0
        ctx.info[i].expired = 1;
1472
0
      } else if (cmp > 0) {
1473
0
        error(_("did not see pack-file %s to drop"),
1474
0
              packs_to_drop->items[drop_index].string);
1475
0
        drop_index++;
1476
0
        missing_drops++;
1477
0
        i--;
1478
0
      } else {
1479
0
        ctx.info[i].expired = 0;
1480
0
      }
1481
0
    }
1482
1483
0
    if (missing_drops) {
1484
0
      result = 1;
1485
0
      goto cleanup;
1486
0
    }
1487
0
  }
1488
1489
  /*
1490
   * pack_perm stores a permutation between pack-int-ids from the
1491
   * previous multi-pack-index to the new one we are writing:
1492
   *
1493
   * pack_perm[old_id] = new_id
1494
   */
1495
0
  ALLOC_ARRAY(ctx.pack_perm, ctx.nr);
1496
0
  for (i = 0; i < ctx.nr; i++) {
1497
0
    if (ctx.info[i].expired) {
1498
0
      dropped_packs++;
1499
0
      ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED;
1500
0
    } else {
1501
0
      ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs;
1502
0
    }
1503
0
  }
1504
1505
0
  for (i = 0; i < ctx.nr; i++) {
1506
0
    if (!ctx.info[i].expired)
1507
0
      pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
1508
0
  }
1509
1510
  /* Check that the preferred pack wasn't expired (if given). */
1511
0
  if (preferred_pack_name) {
1512
0
    struct pack_info *preferred = bsearch(preferred_pack_name,
1513
0
                  ctx.info, ctx.nr,
1514
0
                  sizeof(*ctx.info),
1515
0
                  idx_or_pack_name_cmp);
1516
0
    if (preferred) {
1517
0
      uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
1518
0
      if (perm == PACK_EXPIRED)
1519
0
        warning(_("preferred pack '%s' is expired"),
1520
0
          preferred_pack_name);
1521
0
    }
1522
0
  }
1523
1524
0
  if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
1525
0
    pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
1526
0
          (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
1527
1528
0
  hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR);
1529
0
  f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
1530
1531
0
  if (ctx.nr - dropped_packs == 0) {
1532
0
    error(_("no pack files to index."));
1533
0
    result = 1;
1534
0
    goto cleanup;
1535
0
  }
1536
1537
0
  if (!ctx.entries_nr) {
1538
0
    if (flags & MIDX_WRITE_BITMAP)
1539
0
      warning(_("refusing to write multi-pack .bitmap without any objects"));
1540
0
    flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP);
1541
0
  }
1542
1543
0
  cf = init_chunkfile(f);
1544
1545
0
  add_chunk(cf, MIDX_CHUNKID_PACKNAMES, pack_name_concat_len,
1546
0
      write_midx_pack_names);
1547
0
  add_chunk(cf, MIDX_CHUNKID_OIDFANOUT, MIDX_CHUNK_FANOUT_SIZE,
1548
0
      write_midx_oid_fanout);
1549
0
  add_chunk(cf, MIDX_CHUNKID_OIDLOOKUP,
1550
0
      st_mult(ctx.entries_nr, the_hash_algo->rawsz),
1551
0
      write_midx_oid_lookup);
1552
0
  add_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS,
1553
0
      st_mult(ctx.entries_nr, MIDX_CHUNK_OFFSET_WIDTH),
1554
0
      write_midx_object_offsets);
1555
1556
0
  if (ctx.large_offsets_needed)
1557
0
    add_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS,
1558
0
      st_mult(ctx.num_large_offsets,
1559
0
        MIDX_CHUNK_LARGE_OFFSET_WIDTH),
1560
0
      write_midx_large_offsets);
1561
1562
0
  if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
1563
0
    ctx.pack_order = midx_pack_order(&ctx);
1564
0
    add_chunk(cf, MIDX_CHUNKID_REVINDEX,
1565
0
        st_mult(ctx.entries_nr, sizeof(uint32_t)),
1566
0
        write_midx_revindex);
1567
0
  }
1568
1569
0
  write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
1570
0
  write_chunkfile(cf, &ctx);
1571
1572
0
  finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
1573
0
        CSUM_FSYNC | CSUM_HASH_IN_STREAM);
1574
0
  free_chunkfile(cf);
1575
1576
0
  if (flags & MIDX_WRITE_REV_INDEX &&
1577
0
      git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
1578
0
    write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
1579
1580
0
  if (flags & MIDX_WRITE_BITMAP) {
1581
0
    struct packing_data pdata;
1582
0
    struct commit **commits;
1583
0
    uint32_t commits_nr;
1584
1585
0
    if (!ctx.entries_nr)
1586
0
      BUG("cannot write a bitmap without any objects");
1587
1588
0
    prepare_midx_packing_data(&pdata, &ctx);
1589
1590
0
    commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx);
1591
1592
    /*
1593
     * The previous steps translated the information from
1594
     * 'entries' into information suitable for constructing
1595
     * bitmaps. We no longer need that array, so clear it to
1596
     * reduce memory pressure.
1597
     */
1598
0
    FREE_AND_NULL(ctx.entries);
1599
0
    ctx.entries_nr = 0;
1600
1601
0
    if (write_midx_bitmap(midx_name.buf, midx_hash, &pdata,
1602
0
              commits, commits_nr, ctx.pack_order,
1603
0
              flags) < 0) {
1604
0
      error(_("could not write multi-pack bitmap"));
1605
0
      result = 1;
1606
0
      goto cleanup;
1607
0
    }
1608
0
  }
1609
  /*
1610
   * NOTE: Do not use ctx.entries beyond this point, since it might
1611
   * have been freed in the previous if block.
1612
   */
1613
1614
0
  if (ctx.m)
1615
0
    close_object_store(the_repository->objects);
1616
1617
0
  if (commit_lock_file(&lk) < 0)
1618
0
    die_errno(_("could not write multi-pack-index"));
1619
1620
0
  clear_midx_files_ext(object_dir, ".bitmap", midx_hash);
1621
0
  clear_midx_files_ext(object_dir, ".rev", midx_hash);
1622
1623
0
cleanup:
1624
0
  for (i = 0; i < ctx.nr; i++) {
1625
0
    if (ctx.info[i].p) {
1626
0
      close_pack(ctx.info[i].p);
1627
0
      free(ctx.info[i].p);
1628
0
    }
1629
0
    free(ctx.info[i].pack_name);
1630
0
  }
1631
1632
0
  free(ctx.info);
1633
0
  free(ctx.entries);
1634
0
  free(ctx.pack_perm);
1635
0
  free(ctx.pack_order);
1636
0
  strbuf_release(&midx_name);
1637
1638
0
  trace2_region_leave("midx", "write_midx_internal", the_repository);
1639
1640
0
  return result;
1641
0
}
1642
1643
int write_midx_file(const char *object_dir,
1644
        const char *preferred_pack_name,
1645
        const char *refs_snapshot,
1646
        unsigned flags)
1647
0
{
1648
0
  return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
1649
0
           refs_snapshot, flags);
1650
0
}
1651
1652
int write_midx_file_only(const char *object_dir,
1653
       struct string_list *packs_to_include,
1654
       const char *preferred_pack_name,
1655
       const char *refs_snapshot,
1656
       unsigned flags)
1657
0
{
1658
0
  return write_midx_internal(object_dir, packs_to_include, NULL,
1659
0
           preferred_pack_name, refs_snapshot, flags);
1660
0
}
1661
1662
struct clear_midx_data {
1663
  char *keep;
1664
  const char *ext;
1665
};
1666
1667
static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
1668
        const char *file_name, void *_data)
1669
0
{
1670
0
  struct clear_midx_data *data = _data;
1671
1672
0
  if (!(starts_with(file_name, "multi-pack-index-") &&
1673
0
        ends_with(file_name, data->ext)))
1674
0
    return;
1675
0
  if (data->keep && !strcmp(data->keep, file_name))
1676
0
    return;
1677
1678
0
  if (unlink(full_path))
1679
0
    die_errno(_("failed to remove %s"), full_path);
1680
0
}
1681
1682
static void clear_midx_files_ext(const char *object_dir, const char *ext,
1683
         unsigned char *keep_hash)
1684
0
{
1685
0
  struct clear_midx_data data;
1686
0
  memset(&data, 0, sizeof(struct clear_midx_data));
1687
1688
0
  if (keep_hash)
1689
0
    data.keep = xstrfmt("multi-pack-index-%s%s",
1690
0
            hash_to_hex(keep_hash), ext);
1691
0
  data.ext = ext;
1692
1693
0
  for_each_file_in_pack_dir(object_dir,
1694
0
          clear_midx_file_ext,
1695
0
          &data);
1696
1697
0
  free(data.keep);
1698
0
}
1699
1700
void clear_midx_file(struct repository *r)
1701
0
{
1702
0
  struct strbuf midx = STRBUF_INIT;
1703
1704
0
  get_midx_filename(&midx, r->objects->odb->path);
1705
1706
0
  if (r->objects && r->objects->multi_pack_index) {
1707
0
    close_midx(r->objects->multi_pack_index);
1708
0
    r->objects->multi_pack_index = NULL;
1709
0
  }
1710
1711
0
  if (remove_path(midx.buf))
1712
0
    die(_("failed to clear multi-pack-index at %s"), midx.buf);
1713
1714
0
  clear_midx_files_ext(r->objects->odb->path, ".bitmap", NULL);
1715
0
  clear_midx_files_ext(r->objects->odb->path, ".rev", NULL);
1716
1717
0
  strbuf_release(&midx);
1718
0
}
1719
1720
static int verify_midx_error;
1721
1722
__attribute__((format (printf, 1, 2)))
1723
static void midx_report(const char *fmt, ...)
1724
0
{
1725
0
  va_list ap;
1726
0
  verify_midx_error = 1;
1727
0
  va_start(ap, fmt);
1728
0
  vfprintf(stderr, fmt, ap);
1729
0
  fprintf(stderr, "\n");
1730
0
  va_end(ap);
1731
0
}
1732
1733
struct pair_pos_vs_id
1734
{
1735
  uint32_t pos;
1736
  uint32_t pack_int_id;
1737
};
1738
1739
static int compare_pair_pos_vs_id(const void *_a, const void *_b)
1740
0
{
1741
0
  struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
1742
0
  struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
1743
1744
0
  return b->pack_int_id - a->pack_int_id;
1745
0
}
1746
1747
/*
1748
 * Limit calls to display_progress() for performance reasons.
1749
 * The interval here was arbitrarily chosen.
1750
 */
1751
0
#define SPARSE_PROGRESS_INTERVAL (1 << 12)
1752
#define midx_display_sparse_progress(progress, n) \
1753
0
  do { \
1754
0
    uint64_t _n = (n); \
1755
0
    if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
1756
0
      display_progress(progress, _n); \
1757
0
  } while (0)
1758
1759
int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
1760
0
{
1761
0
  struct pair_pos_vs_id *pairs = NULL;
1762
0
  uint32_t i;
1763
0
  struct progress *progress = NULL;
1764
0
  struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
1765
0
  verify_midx_error = 0;
1766
1767
0
  if (!m) {
1768
0
    int result = 0;
1769
0
    struct stat sb;
1770
0
    struct strbuf filename = STRBUF_INIT;
1771
1772
0
    get_midx_filename(&filename, object_dir);
1773
1774
0
    if (!stat(filename.buf, &sb)) {
1775
0
      error(_("multi-pack-index file exists, but failed to parse"));
1776
0
      result = 1;
1777
0
    }
1778
0
    strbuf_release(&filename);
1779
0
    return result;
1780
0
  }
1781
1782
0
  if (!midx_checksum_valid(m))
1783
0
    midx_report(_("incorrect checksum"));
1784
1785
0
  if (flags & MIDX_PROGRESS)
1786
0
    progress = start_delayed_progress(_("Looking for referenced packfiles"),
1787
0
            m->num_packs);
1788
0
  for (i = 0; i < m->num_packs; i++) {
1789
0
    if (prepare_midx_pack(r, m, i))
1790
0
      midx_report("failed to load pack in position %d", i);
1791
1792
0
    display_progress(progress, i + 1);
1793
0
  }
1794
0
  stop_progress(&progress);
1795
1796
0
  if (m->num_objects == 0) {
1797
0
    midx_report(_("the midx contains no oid"));
1798
    /*
1799
     * Remaining tests assume that we have objects, so we can
1800
     * return here.
1801
     */
1802
0
    goto cleanup;
1803
0
  }
1804
1805
0
  if (flags & MIDX_PROGRESS)
1806
0
    progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
1807
0
             m->num_objects - 1);
1808
0
  for (i = 0; i < m->num_objects - 1; i++) {
1809
0
    struct object_id oid1, oid2;
1810
1811
0
    nth_midxed_object_oid(&oid1, m, i);
1812
0
    nth_midxed_object_oid(&oid2, m, i + 1);
1813
1814
0
    if (oidcmp(&oid1, &oid2) >= 0)
1815
0
      midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
1816
0
            i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
1817
1818
0
    midx_display_sparse_progress(progress, i + 1);
1819
0
  }
1820
0
  stop_progress(&progress);
1821
1822
  /*
1823
   * Create an array mapping each object to its packfile id.  Sort it
1824
   * to group the objects by packfile.  Use this permutation to visit
1825
   * each of the objects and only require 1 packfile to be open at a
1826
   * time.
1827
   */
1828
0
  ALLOC_ARRAY(pairs, m->num_objects);
1829
0
  for (i = 0; i < m->num_objects; i++) {
1830
0
    pairs[i].pos = i;
1831
0
    pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
1832
0
  }
1833
1834
0
  if (flags & MIDX_PROGRESS)
1835
0
    progress = start_sparse_progress(_("Sorting objects by packfile"),
1836
0
             m->num_objects);
1837
0
  display_progress(progress, 0); /* TODO: Measure QSORT() progress */
1838
0
  QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
1839
0
  stop_progress(&progress);
1840
1841
0
  if (flags & MIDX_PROGRESS)
1842
0
    progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
1843
0
  for (i = 0; i < m->num_objects; i++) {
1844
0
    struct object_id oid;
1845
0
    struct pack_entry e;
1846
0
    off_t m_offset, p_offset;
1847
1848
0
    if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
1849
0
        m->packs[pairs[i-1].pack_int_id])
1850
0
    {
1851
0
      close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
1852
0
      close_pack_index(m->packs[pairs[i-1].pack_int_id]);
1853
0
    }
1854
1855
0
    nth_midxed_object_oid(&oid, m, pairs[i].pos);
1856
1857
0
    if (!fill_midx_entry(r, &oid, &e, m)) {
1858
0
      midx_report(_("failed to load pack entry for oid[%d] = %s"),
1859
0
            pairs[i].pos, oid_to_hex(&oid));
1860
0
      continue;
1861
0
    }
1862
1863
0
    if (open_pack_index(e.p)) {
1864
0
      midx_report(_("failed to load pack-index for packfile %s"),
1865
0
            e.p->pack_name);
1866
0
      break;
1867
0
    }
1868
1869
0
    m_offset = e.offset;
1870
0
    p_offset = find_pack_entry_one(oid.hash, e.p);
1871
1872
0
    if (m_offset != p_offset)
1873
0
      midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1874
0
            pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
1875
1876
0
    midx_display_sparse_progress(progress, i + 1);
1877
0
  }
1878
0
  stop_progress(&progress);
1879
1880
0
cleanup:
1881
0
  free(pairs);
1882
0
  close_midx(m);
1883
1884
0
  return verify_midx_error;
1885
0
}
1886
1887
int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags)
1888
0
{
1889
0
  uint32_t i, *count, result = 0;
1890
0
  struct string_list packs_to_drop = STRING_LIST_INIT_DUP;
1891
0
  struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
1892
0
  struct progress *progress = NULL;
1893
1894
0
  if (!m)
1895
0
    return 0;
1896
1897
0
  CALLOC_ARRAY(count, m->num_packs);
1898
1899
0
  if (flags & MIDX_PROGRESS)
1900
0
    progress = start_delayed_progress(_("Counting referenced objects"),
1901
0
            m->num_objects);
1902
0
  for (i = 0; i < m->num_objects; i++) {
1903
0
    int pack_int_id = nth_midxed_pack_int_id(m, i);
1904
0
    count[pack_int_id]++;
1905
0
    display_progress(progress, i + 1);
1906
0
  }
1907
0
  stop_progress(&progress);
1908
1909
0
  if (flags & MIDX_PROGRESS)
1910
0
    progress = start_delayed_progress(_("Finding and deleting unreferenced packfiles"),
1911
0
            m->num_packs);
1912
0
  for (i = 0; i < m->num_packs; i++) {
1913
0
    char *pack_name;
1914
0
    display_progress(progress, i + 1);
1915
1916
0
    if (count[i])
1917
0
      continue;
1918
1919
0
    if (prepare_midx_pack(r, m, i))
1920
0
      continue;
1921
1922
0
    if (m->packs[i]->pack_keep || m->packs[i]->is_cruft)
1923
0
      continue;
1924
1925
0
    pack_name = xstrdup(m->packs[i]->pack_name);
1926
0
    close_pack(m->packs[i]);
1927
1928
0
    string_list_insert(&packs_to_drop, m->pack_names[i]);
1929
0
    unlink_pack_path(pack_name, 0);
1930
0
    free(pack_name);
1931
0
  }
1932
0
  stop_progress(&progress);
1933
1934
0
  free(count);
1935
1936
0
  if (packs_to_drop.nr)
1937
0
    result = write_midx_internal(object_dir, NULL, &packs_to_drop, NULL, NULL, flags);
1938
1939
0
  string_list_clear(&packs_to_drop, 0);
1940
1941
0
  return result;
1942
0
}
1943
1944
struct repack_info {
1945
  timestamp_t mtime;
1946
  uint32_t referenced_objects;
1947
  uint32_t pack_int_id;
1948
};
1949
1950
static int compare_by_mtime(const void *a_, const void *b_)
1951
0
{
1952
0
  const struct repack_info *a, *b;
1953
1954
0
  a = (const struct repack_info *)a_;
1955
0
  b = (const struct repack_info *)b_;
1956
1957
0
  if (a->mtime < b->mtime)
1958
0
    return -1;
1959
0
  if (a->mtime > b->mtime)
1960
0
    return 1;
1961
0
  return 0;
1962
0
}
1963
1964
static int fill_included_packs_all(struct repository *r,
1965
           struct multi_pack_index *m,
1966
           unsigned char *include_pack)
1967
0
{
1968
0
  uint32_t i, count = 0;
1969
0
  int pack_kept_objects = 0;
1970
1971
0
  repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
1972
1973
0
  for (i = 0; i < m->num_packs; i++) {
1974
0
    if (prepare_midx_pack(r, m, i))
1975
0
      continue;
1976
0
    if (!pack_kept_objects && m->packs[i]->pack_keep)
1977
0
      continue;
1978
0
    if (m->packs[i]->is_cruft)
1979
0
      continue;
1980
1981
0
    include_pack[i] = 1;
1982
0
    count++;
1983
0
  }
1984
1985
0
  return count < 2;
1986
0
}
1987
1988
static int fill_included_packs_batch(struct repository *r,
1989
             struct multi_pack_index *m,
1990
             unsigned char *include_pack,
1991
             size_t batch_size)
1992
0
{
1993
0
  uint32_t i, packs_to_repack;
1994
0
  size_t total_size;
1995
0
  struct repack_info *pack_info;
1996
0
  int pack_kept_objects = 0;
1997
1998
0
  CALLOC_ARRAY(pack_info, m->num_packs);
1999
2000
0
  repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
2001
2002
0
  for (i = 0; i < m->num_packs; i++) {
2003
0
    pack_info[i].pack_int_id = i;
2004
2005
0
    if (prepare_midx_pack(r, m, i))
2006
0
      continue;
2007
2008
0
    pack_info[i].mtime = m->packs[i]->mtime;
2009
0
  }
2010
2011
0
  for (i = 0; i < m->num_objects; i++) {
2012
0
    uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
2013
0
    pack_info[pack_int_id].referenced_objects++;
2014
0
  }
2015
2016
0
  QSORT(pack_info, m->num_packs, compare_by_mtime);
2017
2018
0
  total_size = 0;
2019
0
  packs_to_repack = 0;
2020
0
  for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
2021
0
    int pack_int_id = pack_info[i].pack_int_id;
2022
0
    struct packed_git *p = m->packs[pack_int_id];
2023
0
    size_t expected_size;
2024
2025
0
    if (!p)
2026
0
      continue;
2027
0
    if (!pack_kept_objects && p->pack_keep)
2028
0
      continue;
2029
0
    if (p->is_cruft)
2030
0
      continue;
2031
0
    if (open_pack_index(p) || !p->num_objects)
2032
0
      continue;
2033
2034
0
    expected_size = st_mult(p->pack_size,
2035
0
          pack_info[i].referenced_objects);
2036
0
    expected_size /= p->num_objects;
2037
2038
0
    if (expected_size >= batch_size)
2039
0
      continue;
2040
2041
0
    packs_to_repack++;
2042
0
    total_size += expected_size;
2043
0
    include_pack[pack_int_id] = 1;
2044
0
  }
2045
2046
0
  free(pack_info);
2047
2048
0
  if (packs_to_repack < 2)
2049
0
    return 1;
2050
2051
0
  return 0;
2052
0
}
2053
2054
int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
2055
0
{
2056
0
  int result = 0;
2057
0
  uint32_t i;
2058
0
  unsigned char *include_pack;
2059
0
  struct child_process cmd = CHILD_PROCESS_INIT;
2060
0
  FILE *cmd_in;
2061
0
  struct strbuf base_name = STRBUF_INIT;
2062
0
  struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
2063
2064
  /*
2065
   * When updating the default for these configuration
2066
   * variables in builtin/repack.c, these must be adjusted
2067
   * to match.
2068
   */
2069
0
  int delta_base_offset = 1;
2070
0
  int use_delta_islands = 0;
2071
2072
0
  if (!m)
2073
0
    return 0;
2074
2075
0
  CALLOC_ARRAY(include_pack, m->num_packs);
2076
2077
0
  if (batch_size) {
2078
0
    if (fill_included_packs_batch(r, m, include_pack, batch_size))
2079
0
      goto cleanup;
2080
0
  } else if (fill_included_packs_all(r, m, include_pack))
2081
0
    goto cleanup;
2082
2083
0
  repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
2084
0
  repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
2085
2086
0
  strvec_push(&cmd.args, "pack-objects");
2087
2088
0
  strbuf_addstr(&base_name, object_dir);
2089
0
  strbuf_addstr(&base_name, "/pack/pack");
2090
0
  strvec_push(&cmd.args, base_name.buf);
2091
2092
0
  if (delta_base_offset)
2093
0
    strvec_push(&cmd.args, "--delta-base-offset");
2094
0
  if (use_delta_islands)
2095
0
    strvec_push(&cmd.args, "--delta-islands");
2096
2097
0
  if (flags & MIDX_PROGRESS)
2098
0
    strvec_push(&cmd.args, "--progress");
2099
0
  else
2100
0
    strvec_push(&cmd.args, "-q");
2101
2102
0
  strbuf_release(&base_name);
2103
2104
0
  cmd.git_cmd = 1;
2105
0
  cmd.in = cmd.out = -1;
2106
2107
0
  if (start_command(&cmd)) {
2108
0
    error(_("could not start pack-objects"));
2109
0
    result = 1;
2110
0
    goto cleanup;
2111
0
  }
2112
2113
0
  cmd_in = xfdopen(cmd.in, "w");
2114
2115
0
  for (i = 0; i < m->num_objects; i++) {
2116
0
    struct object_id oid;
2117
0
    uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
2118
2119
0
    if (!include_pack[pack_int_id])
2120
0
      continue;
2121
2122
0
    nth_midxed_object_oid(&oid, m, i);
2123
0
    fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
2124
0
  }
2125
0
  fclose(cmd_in);
2126
2127
0
  if (finish_command(&cmd)) {
2128
0
    error(_("could not finish pack-objects"));
2129
0
    result = 1;
2130
0
    goto cleanup;
2131
0
  }
2132
2133
0
  result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
2134
2135
0
cleanup:
2136
0
  free(include_pack);
2137
0
  return result;
2138
0
}