Coverage Report

Created: 2024-09-08 06:23

/src/git/midx.c
Line
Count
Source (jump to first uncovered line)
1
#define USE_THE_REPOSITORY_VARIABLE
2
3
#include "git-compat-util.h"
4
#include "config.h"
5
#include "dir.h"
6
#include "hex.h"
7
#include "packfile.h"
8
#include "object-file.h"
9
#include "hash-lookup.h"
10
#include "midx.h"
11
#include "progress.h"
12
#include "trace2.h"
13
#include "chunk-format.h"
14
#include "pack-bitmap.h"
15
#include "pack-revindex.h"
16
17
int midx_checksum_valid(struct multi_pack_index *m);
18
void clear_midx_files_ext(const char *object_dir, const char *ext,
19
        const char *keep_hash);
20
void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
21
              char **keep_hashes,
22
              uint32_t hashes_nr);
23
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
24
       const char *idx_name);
25
26
const unsigned char *get_midx_checksum(struct multi_pack_index *m)
27
0
{
28
0
  return m->data + m->data_len - the_hash_algo->rawsz;
29
0
}
30
31
void get_midx_filename(struct strbuf *out, const char *object_dir)
32
0
{
33
0
  get_midx_filename_ext(out, object_dir, NULL, NULL);
34
0
}
35
36
void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
37
         const unsigned char *hash, const char *ext)
38
0
{
39
0
  strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
40
0
  if (ext)
41
0
    strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
42
0
}
43
44
static int midx_read_oid_fanout(const unsigned char *chunk_start,
45
        size_t chunk_size, void *data)
46
0
{
47
0
  int i;
48
0
  struct multi_pack_index *m = data;
49
0
  m->chunk_oid_fanout = (uint32_t *)chunk_start;
50
51
0
  if (chunk_size != 4 * 256) {
52
0
    error(_("multi-pack-index OID fanout is of the wrong size"));
53
0
    return 1;
54
0
  }
55
0
  for (i = 0; i < 255; i++) {
56
0
    uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
57
0
    uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
58
59
0
    if (oid_fanout1 > oid_fanout2) {
60
0
      error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
61
0
            i, oid_fanout1, oid_fanout2, i + 1);
62
0
      return 1;
63
0
    }
64
0
  }
65
0
  m->num_objects = ntohl(m->chunk_oid_fanout[255]);
66
0
  return 0;
67
0
}
68
69
static int midx_read_oid_lookup(const unsigned char *chunk_start,
70
        size_t chunk_size, void *data)
71
0
{
72
0
  struct multi_pack_index *m = data;
73
0
  m->chunk_oid_lookup = chunk_start;
74
75
0
  if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
76
0
    error(_("multi-pack-index OID lookup chunk is the wrong size"));
77
0
    return 1;
78
0
  }
79
0
  return 0;
80
0
}
81
82
static int midx_read_object_offsets(const unsigned char *chunk_start,
83
            size_t chunk_size, void *data)
84
0
{
85
0
  struct multi_pack_index *m = data;
86
0
  m->chunk_object_offsets = chunk_start;
87
88
0
  if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
89
0
    error(_("multi-pack-index object offset chunk is the wrong size"));
90
0
    return 1;
91
0
  }
92
0
  return 0;
93
0
}
94
95
0
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
96
97
static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir,
98
                const char *midx_name,
99
                int local)
100
0
{
101
0
  struct multi_pack_index *m = NULL;
102
0
  int fd;
103
0
  struct stat st;
104
0
  size_t midx_size;
105
0
  void *midx_map = NULL;
106
0
  uint32_t hash_version;
107
0
  uint32_t i;
108
0
  const char *cur_pack_name;
109
0
  struct chunkfile *cf = NULL;
110
111
0
  fd = git_open(midx_name);
112
113
0
  if (fd < 0)
114
0
    goto cleanup_fail;
115
0
  if (fstat(fd, &st)) {
116
0
    error_errno(_("failed to read %s"), midx_name);
117
0
    goto cleanup_fail;
118
0
  }
119
120
0
  midx_size = xsize_t(st.st_size);
121
122
0
  if (midx_size < MIDX_MIN_SIZE) {
123
0
    error(_("multi-pack-index file %s is too small"), midx_name);
124
0
    goto cleanup_fail;
125
0
  }
126
127
0
  midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
128
0
  close(fd);
129
130
0
  FLEX_ALLOC_STR(m, object_dir, object_dir);
131
0
  m->data = midx_map;
132
0
  m->data_len = midx_size;
133
0
  m->local = local;
134
135
0
  m->signature = get_be32(m->data);
136
0
  if (m->signature != MIDX_SIGNATURE)
137
0
    die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
138
0
          m->signature, MIDX_SIGNATURE);
139
140
0
  m->version = m->data[MIDX_BYTE_FILE_VERSION];
141
0
  if (m->version != MIDX_VERSION)
142
0
    die(_("multi-pack-index version %d not recognized"),
143
0
          m->version);
144
145
0
  hash_version = m->data[MIDX_BYTE_HASH_VERSION];
146
0
  if (hash_version != oid_version(the_hash_algo)) {
147
0
    error(_("multi-pack-index hash version %u does not match version %u"),
148
0
          hash_version, oid_version(the_hash_algo));
149
0
    goto cleanup_fail;
150
0
  }
151
0
  m->hash_len = the_hash_algo->rawsz;
152
153
0
  m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
154
155
0
  m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
156
157
0
  m->preferred_pack_idx = -1;
158
159
0
  cf = init_chunkfile(NULL);
160
161
0
  if (read_table_of_contents(cf, m->data, midx_size,
162
0
           MIDX_HEADER_SIZE, m->num_chunks,
163
0
           MIDX_CHUNK_ALIGNMENT))
164
0
    goto cleanup_fail;
165
166
0
  if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
167
0
    die(_("multi-pack-index required pack-name chunk missing or corrupted"));
168
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
169
0
    die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
170
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
171
0
    die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
172
0
  if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
173
0
    die(_("multi-pack-index required object offsets chunk missing or corrupted"));
174
175
0
  pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
176
0
       &m->chunk_large_offsets_len);
177
0
  if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
178
0
    pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
179
0
         (const unsigned char **)&m->chunk_bitmapped_packs,
180
0
         &m->chunk_bitmapped_packs_len);
181
182
0
  if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
183
0
    pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
184
0
         &m->chunk_revindex_len);
185
186
0
  CALLOC_ARRAY(m->pack_names, m->num_packs);
187
0
  CALLOC_ARRAY(m->packs, m->num_packs);
188
189
0
  cur_pack_name = (const char *)m->chunk_pack_names;
190
0
  for (i = 0; i < m->num_packs; i++) {
191
0
    const char *end;
192
0
    size_t avail = m->chunk_pack_names_len -
193
0
        (cur_pack_name - (const char *)m->chunk_pack_names);
194
195
0
    m->pack_names[i] = cur_pack_name;
196
197
0
    end = memchr(cur_pack_name, '\0', avail);
198
0
    if (!end)
199
0
      die(_("multi-pack-index pack-name chunk is too short"));
200
0
    cur_pack_name = end + 1;
201
202
0
    if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
203
0
      die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
204
0
            m->pack_names[i - 1],
205
0
            m->pack_names[i]);
206
0
  }
207
208
0
  trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
209
0
  trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
210
211
0
  free_chunkfile(cf);
212
0
  return m;
213
214
0
cleanup_fail:
215
0
  free(m);
216
0
  free_chunkfile(cf);
217
0
  if (midx_map)
218
0
    munmap(midx_map, midx_size);
219
0
  if (0 <= fd)
220
0
    close(fd);
221
0
  return NULL;
222
0
}
223
224
void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
225
0
{
226
0
  strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
227
0
}
228
229
void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
230
0
{
231
0
  get_midx_chain_dirname(buf, object_dir);
232
0
  strbuf_addstr(buf, "/multi-pack-index-chain");
233
0
}
234
235
void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
236
         const unsigned char *hash, const char *ext)
237
0
{
238
0
  get_midx_chain_dirname(buf, object_dir);
239
0
  strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext);
240
0
}
241
242
static int open_multi_pack_index_chain(const char *chain_file,
243
               int *fd, struct stat *st)
244
0
{
245
0
  *fd = git_open(chain_file);
246
0
  if (*fd < 0)
247
0
    return 0;
248
0
  if (fstat(*fd, st)) {
249
0
    close(*fd);
250
0
    return 0;
251
0
  }
252
0
  if (st->st_size < the_hash_algo->hexsz) {
253
0
    close(*fd);
254
0
    if (!st->st_size) {
255
      /* treat empty files the same as missing */
256
0
      errno = ENOENT;
257
0
    } else {
258
0
      warning(_("multi-pack-index chain file too small"));
259
0
      errno = EINVAL;
260
0
    }
261
0
    return 0;
262
0
  }
263
0
  return 1;
264
0
}
265
266
static int add_midx_to_chain(struct multi_pack_index *midx,
267
           struct multi_pack_index *midx_chain)
268
0
{
269
0
  if (midx_chain) {
270
0
    if (unsigned_add_overflows(midx_chain->num_packs,
271
0
             midx_chain->num_packs_in_base)) {
272
0
      warning(_("pack count in base MIDX too high: %"PRIuMAX),
273
0
        (uintmax_t)midx_chain->num_packs_in_base);
274
0
      return 0;
275
0
    }
276
0
    if (unsigned_add_overflows(midx_chain->num_objects,
277
0
             midx_chain->num_objects_in_base)) {
278
0
      warning(_("object count in base MIDX too high: %"PRIuMAX),
279
0
        (uintmax_t)midx_chain->num_objects_in_base);
280
0
      return 0;
281
0
    }
282
0
    midx->num_packs_in_base = midx_chain->num_packs +
283
0
      midx_chain->num_packs_in_base;
284
0
    midx->num_objects_in_base = midx_chain->num_objects +
285
0
      midx_chain->num_objects_in_base;
286
0
  }
287
288
0
  midx->base_midx = midx_chain;
289
0
  midx->has_chain = 1;
290
291
0
  return 1;
292
0
}
293
294
static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir,
295
                  int local,
296
                  int fd, struct stat *st,
297
                  int *incomplete_chain)
298
0
{
299
0
  struct multi_pack_index *midx_chain = NULL;
300
0
  struct strbuf buf = STRBUF_INIT;
301
0
  int valid = 1;
302
0
  uint32_t i, count;
303
0
  FILE *fp = xfdopen(fd, "r");
304
305
0
  count = st->st_size / (the_hash_algo->hexsz + 1);
306
307
0
  for (i = 0; i < count; i++) {
308
0
    struct multi_pack_index *m;
309
0
    struct object_id layer;
310
311
0
    if (strbuf_getline_lf(&buf, fp) == EOF)
312
0
      break;
313
314
0
    if (get_oid_hex(buf.buf, &layer)) {
315
0
      warning(_("invalid multi-pack-index chain: line '%s' "
316
0
          "not a hash"),
317
0
        buf.buf);
318
0
      valid = 0;
319
0
      break;
320
0
    }
321
322
0
    valid = 0;
323
324
0
    strbuf_reset(&buf);
325
0
    get_split_midx_filename_ext(&buf, object_dir, layer.hash,
326
0
              MIDX_EXT_MIDX);
327
0
    m = load_multi_pack_index_one(object_dir, buf.buf, local);
328
329
0
    if (m) {
330
0
      if (add_midx_to_chain(m, midx_chain)) {
331
0
        midx_chain = m;
332
0
        valid = 1;
333
0
      } else {
334
0
        close_midx(m);
335
0
      }
336
0
    }
337
0
    if (!valid) {
338
0
      warning(_("unable to find all multi-pack index files"));
339
0
      break;
340
0
    }
341
0
  }
342
343
0
  fclose(fp);
344
0
  strbuf_release(&buf);
345
346
0
  *incomplete_chain = !valid;
347
0
  return midx_chain;
348
0
}
349
350
static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir,
351
                  int local)
352
0
{
353
0
  struct strbuf chain_file = STRBUF_INIT;
354
0
  struct stat st;
355
0
  int fd;
356
0
  struct multi_pack_index *m = NULL;
357
358
0
  get_midx_chain_filename(&chain_file, object_dir);
359
0
  if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) {
360
0
    int incomplete;
361
    /* ownership of fd is taken over by load function */
362
0
    m = load_midx_chain_fd_st(object_dir, local, fd, &st,
363
0
            &incomplete);
364
0
  }
365
366
0
  strbuf_release(&chain_file);
367
0
  return m;
368
0
}
369
370
struct multi_pack_index *load_multi_pack_index(const char *object_dir,
371
                 int local)
372
0
{
373
0
  struct strbuf midx_name = STRBUF_INIT;
374
0
  struct multi_pack_index *m;
375
376
0
  get_midx_filename(&midx_name, object_dir);
377
378
0
  m = load_multi_pack_index_one(object_dir, midx_name.buf, local);
379
0
  if (!m)
380
0
    m = load_multi_pack_index_chain(object_dir, local);
381
382
0
  strbuf_release(&midx_name);
383
384
0
  return m;
385
0
}
386
387
void close_midx(struct multi_pack_index *m)
388
0
{
389
0
  uint32_t i;
390
391
0
  if (!m)
392
0
    return;
393
394
0
  close_midx(m->next);
395
0
  close_midx(m->base_midx);
396
397
0
  munmap((unsigned char *)m->data, m->data_len);
398
399
0
  for (i = 0; i < m->num_packs; i++) {
400
0
    if (m->packs[i])
401
0
      m->packs[i]->multi_pack_index = 0;
402
0
  }
403
0
  FREE_AND_NULL(m->packs);
404
0
  FREE_AND_NULL(m->pack_names);
405
0
  free(m);
406
0
}
407
408
static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
409
0
{
410
0
  struct multi_pack_index *m = *_m;
411
0
  while (m && pos < m->num_objects_in_base)
412
0
    m = m->base_midx;
413
414
0
  if (!m)
415
0
    BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
416
417
0
  if (pos >= m->num_objects + m->num_objects_in_base)
418
0
    die(_("invalid MIDX object position, MIDX is likely corrupt"));
419
420
0
  *_m = m;
421
422
0
  return pos - m->num_objects_in_base;
423
0
}
424
425
static uint32_t midx_for_pack(struct multi_pack_index **_m,
426
            uint32_t pack_int_id)
427
0
{
428
0
  struct multi_pack_index *m = *_m;
429
0
  while (m && pack_int_id < m->num_packs_in_base)
430
0
    m = m->base_midx;
431
432
0
  if (!m)
433
0
    BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
434
435
0
  if (pack_int_id >= m->num_packs + m->num_packs_in_base)
436
0
    die(_("bad pack-int-id: %u (%u total packs)"),
437
0
        pack_int_id, m->num_packs + m->num_packs_in_base);
438
439
0
  *_m = m;
440
441
0
  return pack_int_id - m->num_packs_in_base;
442
0
}
443
444
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
445
          uint32_t pack_int_id)
446
0
{
447
0
  struct strbuf pack_name = STRBUF_INIT;
448
0
  struct packed_git *p;
449
450
0
  pack_int_id = midx_for_pack(&m, pack_int_id);
451
452
0
  if (m->packs[pack_int_id])
453
0
    return 0;
454
455
0
  strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
456
0
        m->pack_names[pack_int_id]);
457
458
0
  p = add_packed_git(pack_name.buf, pack_name.len, m->local);
459
0
  strbuf_release(&pack_name);
460
461
0
  if (!p)
462
0
    return 1;
463
464
0
  p->multi_pack_index = 1;
465
0
  m->packs[pack_int_id] = p;
466
0
  install_packed_git(r, p);
467
0
  list_add_tail(&p->mru, &r->objects->packed_git_mru);
468
469
0
  return 0;
470
0
}
471
472
struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
473
           uint32_t pack_int_id)
474
0
{
475
0
  uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
476
0
  return m->packs[local_pack_int_id];
477
0
}
478
479
0
#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
480
481
int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
482
           struct bitmapped_pack *bp, uint32_t pack_int_id)
483
0
{
484
0
  uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
485
486
0
  if (!m->chunk_bitmapped_packs)
487
0
    return error(_("MIDX does not contain the BTMP chunk"));
488
489
0
  if (prepare_midx_pack(r, m, pack_int_id))
490
0
    return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
491
492
0
  bp->p = m->packs[local_pack_int_id];
493
0
  bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
494
0
          MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
495
0
  bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
496
0
         MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
497
0
         sizeof(uint32_t));
498
0
  bp->pack_int_id = pack_int_id;
499
0
  bp->from_midx = m;
500
501
0
  return 0;
502
0
}
503
504
int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
505
         uint32_t *result)
506
0
{
507
0
  int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
508
0
             m->chunk_oid_lookup, the_hash_algo->rawsz,
509
0
             result);
510
0
  if (result)
511
0
    *result += m->num_objects_in_base;
512
0
  return ret;
513
0
}
514
515
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
516
     uint32_t *result)
517
0
{
518
0
  for (; m; m = m->base_midx)
519
0
    if (bsearch_one_midx(oid, m, result))
520
0
      return 1;
521
0
  return 0;
522
0
}
523
524
int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
525
0
{
526
0
  return bsearch_midx(oid, m, NULL);
527
0
}
528
529
struct object_id *nth_midxed_object_oid(struct object_id *oid,
530
          struct multi_pack_index *m,
531
          uint32_t n)
532
0
{
533
0
  if (n >= m->num_objects + m->num_objects_in_base)
534
0
    return NULL;
535
536
0
  n = midx_for_object(&m, n);
537
538
0
  oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
539
0
    the_repository->hash_algo);
540
0
  return oid;
541
0
}
542
543
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
544
0
{
545
0
  const unsigned char *offset_data;
546
0
  uint32_t offset32;
547
548
0
  pos = midx_for_object(&m, pos);
549
550
0
  offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
551
0
  offset32 = get_be32(offset_data + sizeof(uint32_t));
552
553
0
  if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
554
0
    if (sizeof(off_t) < sizeof(uint64_t))
555
0
      die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
556
557
0
    offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
558
0
    if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
559
0
      die(_("multi-pack-index large offset out of bounds"));
560
0
    return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
561
0
  }
562
563
0
  return offset32;
564
0
}
565
566
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
567
0
{
568
0
  pos = midx_for_object(&m, pos);
569
570
0
  return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
571
0
                 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
572
0
}
573
574
int fill_midx_entry(struct repository *r,
575
        const struct object_id *oid,
576
        struct pack_entry *e,
577
        struct multi_pack_index *m)
578
0
{
579
0
  uint32_t pos;
580
0
  uint32_t pack_int_id;
581
0
  struct packed_git *p;
582
583
0
  if (!bsearch_midx(oid, m, &pos))
584
0
    return 0;
585
586
0
  midx_for_object(&m, pos);
587
0
  pack_int_id = nth_midxed_pack_int_id(m, pos);
588
589
0
  if (prepare_midx_pack(r, m, pack_int_id))
590
0
    return 0;
591
0
  p = m->packs[pack_int_id - m->num_packs_in_base];
592
593
  /*
594
  * We are about to tell the caller where they can locate the
595
  * requested object.  We better make sure the packfile is
596
  * still here and can be accessed before supplying that
597
  * answer, as it may have been deleted since the MIDX was
598
  * loaded!
599
  */
600
0
  if (!is_pack_valid(p))
601
0
    return 0;
602
603
0
  if (oidset_size(&p->bad_objects) &&
604
0
      oidset_contains(&p->bad_objects, oid))
605
0
    return 0;
606
607
0
  e->offset = nth_midxed_offset(m, pos);
608
0
  e->p = p;
609
610
0
  return 1;
611
0
}
612
613
/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
614
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
615
       const char *idx_name)
616
0
{
617
  /* Skip past any initial matching prefix. */
618
0
  while (*idx_name && *idx_name == *idx_or_pack_name) {
619
0
    idx_name++;
620
0
    idx_or_pack_name++;
621
0
  }
622
623
  /*
624
   * If we didn't match completely, we may have matched "pack-1234." and
625
   * be left with "idx" and "pack" respectively, which is also OK. We do
626
   * not have to check for "idx" and "idx", because that would have been
627
   * a complete match (and in that case these strcmps will be false, but
628
   * we'll correctly return 0 from the final strcmp() below.
629
   *
630
   * Technically this matches "fooidx" and "foopack", but we'd never have
631
   * such names in the first place.
632
   */
633
0
  if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
634
0
    return 0;
635
636
  /*
637
   * This not only checks for a complete match, but also orders based on
638
   * the first non-identical character, which means our ordering will
639
   * match a raw strcmp(). That makes it OK to use this to binary search
640
   * a naively-sorted list.
641
   */
642
0
  return strcmp(idx_or_pack_name, idx_name);
643
0
}
644
645
static int midx_contains_pack_1(struct multi_pack_index *m,
646
        const char *idx_or_pack_name)
647
0
{
648
0
  uint32_t first = 0, last = m->num_packs;
649
650
0
  while (first < last) {
651
0
    uint32_t mid = first + (last - first) / 2;
652
0
    const char *current;
653
0
    int cmp;
654
655
0
    current = m->pack_names[mid];
656
0
    cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
657
0
    if (!cmp)
658
0
      return 1;
659
0
    if (cmp > 0) {
660
0
      first = mid + 1;
661
0
      continue;
662
0
    }
663
0
    last = mid;
664
0
  }
665
666
0
  return 0;
667
0
}
668
669
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
670
0
{
671
0
  for (; m; m = m->base_midx)
672
0
    if (midx_contains_pack_1(m, idx_or_pack_name))
673
0
      return 1;
674
0
  return 0;
675
0
}
676
677
int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
678
0
{
679
0
  if (m->preferred_pack_idx == -1) {
680
0
    uint32_t midx_pos;
681
0
    if (load_midx_revindex(m) < 0) {
682
0
      m->preferred_pack_idx = -2;
683
0
      return -1;
684
0
    }
685
686
0
    midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
687
688
0
    m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
689
690
0
  } else if (m->preferred_pack_idx == -2)
691
0
    return -1; /* no revindex */
692
693
0
  *pack_int_id = m->preferred_pack_idx;
694
0
  return 0;
695
0
}
696
697
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
698
0
{
699
0
  struct multi_pack_index *m;
700
0
  struct multi_pack_index *m_search;
701
702
0
  prepare_repo_settings(r);
703
0
  if (!r->settings.core_multi_pack_index)
704
0
    return 0;
705
706
0
  for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
707
0
    if (!strcmp(object_dir, m_search->object_dir))
708
0
      return 1;
709
710
0
  m = load_multi_pack_index(object_dir, local);
711
712
0
  if (m) {
713
0
    struct multi_pack_index *mp = r->objects->multi_pack_index;
714
0
    if (mp) {
715
0
      m->next = mp->next;
716
0
      mp->next = m;
717
0
    } else
718
0
      r->objects->multi_pack_index = m;
719
0
    return 1;
720
0
  }
721
722
0
  return 0;
723
0
}
724
725
int midx_checksum_valid(struct multi_pack_index *m)
726
0
{
727
0
  return hashfile_checksum_valid(m->data, m->data_len);
728
0
}
729
730
struct clear_midx_data {
731
  char **keep;
732
  uint32_t keep_nr;
733
  const char *ext;
734
};
735
736
static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
737
        const char *file_name, void *_data)
738
0
{
739
0
  struct clear_midx_data *data = _data;
740
0
  uint32_t i;
741
742
0
  if (!(starts_with(file_name, "multi-pack-index-") &&
743
0
        ends_with(file_name, data->ext)))
744
0
    return;
745
0
  for (i = 0; i < data->keep_nr; i++) {
746
0
    if (!strcmp(data->keep[i], file_name))
747
0
      return;
748
0
  }
749
0
  if (unlink(full_path))
750
0
    die_errno(_("failed to remove %s"), full_path);
751
0
}
752
753
void clear_midx_files_ext(const char *object_dir, const char *ext,
754
        const char *keep_hash)
755
0
{
756
0
  struct clear_midx_data data;
757
0
  memset(&data, 0, sizeof(struct clear_midx_data));
758
759
0
  if (keep_hash) {
760
0
    ALLOC_ARRAY(data.keep, 1);
761
762
0
    data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
763
0
    data.keep_nr = 1;
764
0
  }
765
0
  data.ext = ext;
766
767
0
  for_each_file_in_pack_dir(object_dir,
768
0
          clear_midx_file_ext,
769
0
          &data);
770
771
0
  if (keep_hash)
772
0
    free(data.keep[0]);
773
0
  free(data.keep);
774
0
}
775
776
void clear_incremental_midx_files_ext(const char *object_dir, const char *ext,
777
              char **keep_hashes,
778
              uint32_t hashes_nr)
779
0
{
780
0
  struct clear_midx_data data;
781
0
  uint32_t i;
782
783
0
  memset(&data, 0, sizeof(struct clear_midx_data));
784
785
0
  ALLOC_ARRAY(data.keep, hashes_nr);
786
0
  for (i = 0; i < hashes_nr; i++)
787
0
    data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
788
0
               ext);
789
0
  data.keep_nr = hashes_nr;
790
0
  data.ext = ext;
791
792
0
  for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d",
793
0
             clear_midx_file_ext, &data);
794
795
0
  for (i = 0; i < hashes_nr; i++)
796
0
    free(data.keep[i]);
797
0
  free(data.keep);
798
0
}
799
800
void clear_midx_file(struct repository *r)
801
0
{
802
0
  struct strbuf midx = STRBUF_INIT;
803
804
0
  get_midx_filename(&midx, r->objects->odb->path);
805
806
0
  if (r->objects && r->objects->multi_pack_index) {
807
0
    close_midx(r->objects->multi_pack_index);
808
0
    r->objects->multi_pack_index = NULL;
809
0
  }
810
811
0
  if (remove_path(midx.buf))
812
0
    die(_("failed to clear multi-pack-index at %s"), midx.buf);
813
814
0
  clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_BITMAP, NULL);
815
0
  clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_REV, NULL);
816
817
0
  strbuf_release(&midx);
818
0
}
819
820
static int verify_midx_error;
821
822
__attribute__((format (printf, 1, 2)))
823
static void midx_report(const char *fmt, ...)
824
0
{
825
0
  va_list ap;
826
0
  verify_midx_error = 1;
827
0
  va_start(ap, fmt);
828
0
  vfprintf(stderr, fmt, ap);
829
0
  fprintf(stderr, "\n");
830
0
  va_end(ap);
831
0
}
832
833
struct pair_pos_vs_id
834
{
835
  uint32_t pos;
836
  uint32_t pack_int_id;
837
};
838
839
static int compare_pair_pos_vs_id(const void *_a, const void *_b)
840
0
{
841
0
  struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
842
0
  struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
843
844
0
  return b->pack_int_id - a->pack_int_id;
845
0
}
846
847
/*
848
 * Limit calls to display_progress() for performance reasons.
849
 * The interval here was arbitrarily chosen.
850
 */
851
0
#define SPARSE_PROGRESS_INTERVAL (1 << 12)
852
#define midx_display_sparse_progress(progress, n) \
853
0
  do { \
854
0
    uint64_t _n = (n); \
855
0
    if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
856
0
      display_progress(progress, _n); \
857
0
  } while (0)
858
859
int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
860
0
{
861
0
  struct pair_pos_vs_id *pairs = NULL;
862
0
  uint32_t i;
863
0
  struct progress *progress = NULL;
864
0
  struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
865
0
  struct multi_pack_index *curr;
866
0
  verify_midx_error = 0;
867
868
0
  if (!m) {
869
0
    int result = 0;
870
0
    struct stat sb;
871
0
    struct strbuf filename = STRBUF_INIT;
872
873
0
    get_midx_filename(&filename, object_dir);
874
875
0
    if (!stat(filename.buf, &sb)) {
876
0
      error(_("multi-pack-index file exists, but failed to parse"));
877
0
      result = 1;
878
0
    }
879
0
    strbuf_release(&filename);
880
0
    return result;
881
0
  }
882
883
0
  if (!midx_checksum_valid(m))
884
0
    midx_report(_("incorrect checksum"));
885
886
0
  if (flags & MIDX_PROGRESS)
887
0
    progress = start_delayed_progress(_("Looking for referenced packfiles"),
888
0
              m->num_packs + m->num_packs_in_base);
889
0
  for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
890
0
    if (prepare_midx_pack(r, m, i))
891
0
      midx_report("failed to load pack in position %d", i);
892
893
0
    display_progress(progress, i + 1);
894
0
  }
895
0
  stop_progress(&progress);
896
897
0
  if (m->num_objects == 0) {
898
0
    midx_report(_("the midx contains no oid"));
899
    /*
900
     * Remaining tests assume that we have objects, so we can
901
     * return here.
902
     */
903
0
    goto cleanup;
904
0
  }
905
906
0
  if (flags & MIDX_PROGRESS)
907
0
    progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
908
0
             m->num_objects - 1);
909
910
0
  for (curr = m; curr; curr = curr->base_midx) {
911
0
    for (i = 0; i < m->num_objects - 1; i++) {
912
0
      struct object_id oid1, oid2;
913
914
0
      nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
915
0
      nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
916
917
0
      if (oidcmp(&oid1, &oid2) >= 0)
918
0
        midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
919
0
              i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
920
921
0
      midx_display_sparse_progress(progress, i + 1);
922
0
    }
923
0
  }
924
0
  stop_progress(&progress);
925
926
  /*
927
   * Create an array mapping each object to its packfile id.  Sort it
928
   * to group the objects by packfile.  Use this permutation to visit
929
   * each of the objects and only require 1 packfile to be open at a
930
   * time.
931
   */
932
0
  ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
933
0
  for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
934
0
    pairs[i].pos = i;
935
0
    pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
936
0
  }
937
938
0
  if (flags & MIDX_PROGRESS)
939
0
    progress = start_sparse_progress(_("Sorting objects by packfile"),
940
0
             m->num_objects);
941
0
  display_progress(progress, 0); /* TODO: Measure QSORT() progress */
942
0
  QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
943
0
  stop_progress(&progress);
944
945
0
  if (flags & MIDX_PROGRESS)
946
0
    progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
947
0
  for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
948
0
    struct object_id oid;
949
0
    struct pack_entry e;
950
0
    off_t m_offset, p_offset;
951
952
0
    if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
953
0
        nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
954
0
      uint32_t pack_int_id = pairs[i-1].pack_int_id;
955
0
      struct packed_git *p = nth_midxed_pack(m, pack_int_id);
956
957
0
      close_pack_fd(p);
958
0
      close_pack_index(p);
959
0
    }
960
961
0
    nth_midxed_object_oid(&oid, m, pairs[i].pos);
962
963
0
    if (!fill_midx_entry(r, &oid, &e, m)) {
964
0
      midx_report(_("failed to load pack entry for oid[%d] = %s"),
965
0
            pairs[i].pos, oid_to_hex(&oid));
966
0
      continue;
967
0
    }
968
969
0
    if (open_pack_index(e.p)) {
970
0
      midx_report(_("failed to load pack-index for packfile %s"),
971
0
            e.p->pack_name);
972
0
      break;
973
0
    }
974
975
0
    m_offset = e.offset;
976
0
    p_offset = find_pack_entry_one(oid.hash, e.p);
977
978
0
    if (m_offset != p_offset)
979
0
      midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
980
0
            pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
981
982
0
    midx_display_sparse_progress(progress, i + 1);
983
0
  }
984
0
  stop_progress(&progress);
985
986
0
cleanup:
987
0
  free(pairs);
988
0
  close_midx(m);
989
990
0
  return verify_midx_error;
991
0
}