Coverage Report

Created: 2026-02-14 06:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/git/midx.c
Line
Count
Source
1
#define DISABLE_SIGN_COMPARE_WARNINGS
2
3
#include "git-compat-util.h"
4
#include "config.h"
5
#include "dir.h"
6
#include "hex.h"
7
#include "packfile.h"
8
#include "hash-lookup.h"
9
#include "midx.h"
10
#include "progress.h"
11
#include "trace2.h"
12
#include "chunk-format.h"
13
#include "pack-bitmap.h"
14
#include "pack-revindex.h"
15
16
0
#define MIDX_PACK_ERROR ((void *)(intptr_t)-1)
17
18
int midx_checksum_valid(struct multi_pack_index *m);
19
void clear_midx_files_ext(struct odb_source *source, const char *ext,
20
        const char *keep_hash);
21
void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext,
22
              char **keep_hashes,
23
              uint32_t hashes_nr);
24
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
25
       const char *idx_name);
26
27
const unsigned char *get_midx_checksum(struct multi_pack_index *m)
28
0
{
29
0
  return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz;
30
0
}
31
32
void get_midx_filename(struct odb_source *source, struct strbuf *out)
33
0
{
34
0
  get_midx_filename_ext(source, out, NULL, NULL);
35
0
}
36
37
void get_midx_filename_ext(struct odb_source *source, struct strbuf *out,
38
         const unsigned char *hash, const char *ext)
39
0
{
40
0
  strbuf_addf(out, "%s/pack/multi-pack-index", source->path);
41
0
  if (ext)
42
0
    strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext);
43
0
}
44
45
static int midx_read_oid_fanout(const unsigned char *chunk_start,
46
        size_t chunk_size, void *data)
47
0
{
48
0
  int i;
49
0
  struct multi_pack_index *m = data;
50
0
  m->chunk_oid_fanout = (uint32_t *)chunk_start;
51
52
0
  if (chunk_size != 4 * 256) {
53
0
    error(_("multi-pack-index OID fanout is of the wrong size"));
54
0
    return 1;
55
0
  }
56
0
  for (i = 0; i < 255; i++) {
57
0
    uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
58
0
    uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
59
60
0
    if (oid_fanout1 > oid_fanout2) {
61
0
      error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
62
0
            i, oid_fanout1, oid_fanout2, i + 1);
63
0
      return 1;
64
0
    }
65
0
  }
66
0
  m->num_objects = ntohl(m->chunk_oid_fanout[255]);
67
0
  return 0;
68
0
}
69
70
static int midx_read_oid_lookup(const unsigned char *chunk_start,
71
        size_t chunk_size, void *data)
72
0
{
73
0
  struct multi_pack_index *m = data;
74
0
  m->chunk_oid_lookup = chunk_start;
75
76
0
  if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
77
0
    error(_("multi-pack-index OID lookup chunk is the wrong size"));
78
0
    return 1;
79
0
  }
80
0
  return 0;
81
0
}
82
83
static int midx_read_object_offsets(const unsigned char *chunk_start,
84
            size_t chunk_size, void *data)
85
0
{
86
0
  struct multi_pack_index *m = data;
87
0
  m->chunk_object_offsets = chunk_start;
88
89
0
  if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
90
0
    error(_("multi-pack-index object offset chunk is the wrong size"));
91
0
    return 1;
92
0
  }
93
0
  return 0;
94
0
}
95
96
struct multi_pack_index *get_multi_pack_index(struct odb_source *source)
97
0
{
98
0
  packfile_store_prepare(source->packfiles);
99
0
  return source->packfiles->midx;
100
0
}
101
102
static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source,
103
                const char *midx_name)
104
0
{
105
0
  struct repository *r = source->odb->repo;
106
0
  struct multi_pack_index *m = NULL;
107
0
  int fd;
108
0
  struct stat st;
109
0
  size_t midx_size;
110
0
  void *midx_map = NULL;
111
0
  uint32_t hash_version;
112
0
  uint32_t i;
113
0
  const char *cur_pack_name;
114
0
  struct chunkfile *cf = NULL;
115
116
0
  fd = git_open(midx_name);
117
118
0
  if (fd < 0)
119
0
    goto cleanup_fail;
120
0
  if (fstat(fd, &st)) {
121
0
    error_errno(_("failed to read %s"), midx_name);
122
0
    goto cleanup_fail;
123
0
  }
124
125
0
  midx_size = xsize_t(st.st_size);
126
127
0
  if (midx_size < (MIDX_HEADER_SIZE + r->hash_algo->rawsz)) {
128
0
    error(_("multi-pack-index file %s is too small"), midx_name);
129
0
    goto cleanup_fail;
130
0
  }
131
132
0
  midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
133
0
  close(fd);
134
135
0
  CALLOC_ARRAY(m, 1);
136
0
  m->data = midx_map;
137
0
  m->data_len = midx_size;
138
0
  m->source = source;
139
140
0
  m->signature = get_be32(m->data);
141
0
  if (m->signature != MIDX_SIGNATURE)
142
0
    die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
143
0
          m->signature, MIDX_SIGNATURE);
144
145
0
  m->version = m->data[MIDX_BYTE_FILE_VERSION];
146
0
  if (m->version != MIDX_VERSION)
147
0
    die(_("multi-pack-index version %d not recognized"),
148
0
          m->version);
149
150
0
  hash_version = m->data[MIDX_BYTE_HASH_VERSION];
151
0
  if (hash_version != oid_version(r->hash_algo)) {
152
0
    error(_("multi-pack-index hash version %u does not match version %u"),
153
0
          hash_version, oid_version(r->hash_algo));
154
0
    goto cleanup_fail;
155
0
  }
156
0
  m->hash_len = r->hash_algo->rawsz;
157
158
0
  m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
159
160
0
  m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
161
162
0
  m->preferred_pack_idx = -1;
163
164
0
  cf = init_chunkfile(NULL);
165
166
0
  if (read_table_of_contents(cf, m->data, midx_size,
167
0
           MIDX_HEADER_SIZE, m->num_chunks,
168
0
           MIDX_CHUNK_ALIGNMENT))
169
0
    goto cleanup_fail;
170
171
0
  if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
172
0
    die(_("multi-pack-index required pack-name chunk missing or corrupted"));
173
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
174
0
    die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
175
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
176
0
    die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
177
0
  if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
178
0
    die(_("multi-pack-index required object offsets chunk missing or corrupted"));
179
180
0
  pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
181
0
       &m->chunk_large_offsets_len);
182
0
  if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
183
0
    pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
184
0
         (const unsigned char **)&m->chunk_bitmapped_packs,
185
0
         &m->chunk_bitmapped_packs_len);
186
187
0
  if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
188
0
    pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
189
0
         &m->chunk_revindex_len);
190
191
0
  CALLOC_ARRAY(m->pack_names, m->num_packs);
192
0
  CALLOC_ARRAY(m->packs, m->num_packs);
193
194
0
  cur_pack_name = (const char *)m->chunk_pack_names;
195
0
  for (i = 0; i < m->num_packs; i++) {
196
0
    const char *end;
197
0
    size_t avail = m->chunk_pack_names_len -
198
0
        (cur_pack_name - (const char *)m->chunk_pack_names);
199
200
0
    m->pack_names[i] = cur_pack_name;
201
202
0
    end = memchr(cur_pack_name, '\0', avail);
203
0
    if (!end)
204
0
      die(_("multi-pack-index pack-name chunk is too short"));
205
0
    cur_pack_name = end + 1;
206
207
0
    if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
208
0
      die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
209
0
            m->pack_names[i - 1],
210
0
            m->pack_names[i]);
211
0
  }
212
213
0
  trace2_data_intmax("midx", r, "load/num_packs", m->num_packs);
214
0
  trace2_data_intmax("midx", r, "load/num_objects", m->num_objects);
215
216
0
  free_chunkfile(cf);
217
0
  return m;
218
219
0
cleanup_fail:
220
0
  free(m);
221
0
  free_chunkfile(cf);
222
0
  if (midx_map)
223
0
    munmap(midx_map, midx_size);
224
0
  if (0 <= fd)
225
0
    close(fd);
226
0
  return NULL;
227
0
}
228
229
void get_midx_chain_dirname(struct odb_source *source, struct strbuf *buf)
230
0
{
231
0
  strbuf_addf(buf, "%s/pack/multi-pack-index.d", source->path);
232
0
}
233
234
void get_midx_chain_filename(struct odb_source *source, struct strbuf *buf)
235
0
{
236
0
  get_midx_chain_dirname(source, buf);
237
0
  strbuf_addstr(buf, "/multi-pack-index-chain");
238
0
}
239
240
void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf,
241
         const unsigned char *hash, const char *ext)
242
0
{
243
0
  get_midx_chain_dirname(source, buf);
244
0
  strbuf_addf(buf, "/multi-pack-index-%s.%s",
245
0
        hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext);
246
0
}
247
248
static int open_multi_pack_index_chain(const struct git_hash_algo *hash_algo,
249
               const char *chain_file, int *fd,
250
               struct stat *st)
251
0
{
252
0
  *fd = git_open(chain_file);
253
0
  if (*fd < 0)
254
0
    return 0;
255
0
  if (fstat(*fd, st)) {
256
0
    close(*fd);
257
0
    return 0;
258
0
  }
259
0
  if (st->st_size < hash_algo->hexsz) {
260
0
    close(*fd);
261
0
    if (!st->st_size) {
262
      /* treat empty files the same as missing */
263
0
      errno = ENOENT;
264
0
    } else {
265
0
      warning(_("multi-pack-index chain file too small"));
266
0
      errno = EINVAL;
267
0
    }
268
0
    return 0;
269
0
  }
270
0
  return 1;
271
0
}
272
273
static int add_midx_to_chain(struct multi_pack_index *midx,
274
           struct multi_pack_index *midx_chain)
275
0
{
276
0
  if (midx_chain) {
277
0
    if (unsigned_add_overflows(midx_chain->num_packs,
278
0
             midx_chain->num_packs_in_base)) {
279
0
      warning(_("pack count in base MIDX too high: %"PRIuMAX),
280
0
        (uintmax_t)midx_chain->num_packs_in_base);
281
0
      return 0;
282
0
    }
283
0
    if (unsigned_add_overflows(midx_chain->num_objects,
284
0
             midx_chain->num_objects_in_base)) {
285
0
      warning(_("object count in base MIDX too high: %"PRIuMAX),
286
0
        (uintmax_t)midx_chain->num_objects_in_base);
287
0
      return 0;
288
0
    }
289
0
    midx->num_packs_in_base = midx_chain->num_packs +
290
0
      midx_chain->num_packs_in_base;
291
0
    midx->num_objects_in_base = midx_chain->num_objects +
292
0
      midx_chain->num_objects_in_base;
293
0
  }
294
295
0
  midx->base_midx = midx_chain;
296
0
  midx->has_chain = 1;
297
298
0
  return 1;
299
0
}
300
301
static struct multi_pack_index *load_midx_chain_fd_st(struct odb_source *source,
302
                  int fd, struct stat *st,
303
                  int *incomplete_chain)
304
0
{
305
0
  const struct git_hash_algo *hash_algo = source->odb->repo->hash_algo;
306
0
  struct multi_pack_index *midx_chain = NULL;
307
0
  struct strbuf buf = STRBUF_INIT;
308
0
  int valid = 1;
309
0
  uint32_t i, count;
310
0
  FILE *fp = xfdopen(fd, "r");
311
312
0
  count = st->st_size / (hash_algo->hexsz + 1);
313
314
0
  for (i = 0; i < count; i++) {
315
0
    struct multi_pack_index *m;
316
0
    struct object_id layer;
317
318
0
    if (strbuf_getline_lf(&buf, fp) == EOF)
319
0
      break;
320
321
0
    if (get_oid_hex_algop(buf.buf, &layer, hash_algo)) {
322
0
      warning(_("invalid multi-pack-index chain: line '%s' "
323
0
          "not a hash"),
324
0
        buf.buf);
325
0
      valid = 0;
326
0
      break;
327
0
    }
328
329
0
    valid = 0;
330
331
0
    strbuf_reset(&buf);
332
0
    get_split_midx_filename_ext(source, &buf,
333
0
              layer.hash, MIDX_EXT_MIDX);
334
0
    m = load_multi_pack_index_one(source, buf.buf);
335
336
0
    if (m) {
337
0
      if (add_midx_to_chain(m, midx_chain)) {
338
0
        midx_chain = m;
339
0
        valid = 1;
340
0
      } else {
341
0
        close_midx(m);
342
0
      }
343
0
    }
344
0
    if (!valid) {
345
0
      warning(_("unable to find all multi-pack index files"));
346
0
      break;
347
0
    }
348
0
  }
349
350
0
  fclose(fp);
351
0
  strbuf_release(&buf);
352
353
0
  *incomplete_chain = !valid;
354
0
  return midx_chain;
355
0
}
356
357
static struct multi_pack_index *load_multi_pack_index_chain(struct odb_source *source)
358
0
{
359
0
  struct strbuf chain_file = STRBUF_INIT;
360
0
  struct stat st;
361
0
  int fd;
362
0
  struct multi_pack_index *m = NULL;
363
364
0
  get_midx_chain_filename(source, &chain_file);
365
0
  if (open_multi_pack_index_chain(source->odb->repo->hash_algo, chain_file.buf, &fd, &st)) {
366
0
    int incomplete;
367
    /* ownership of fd is taken over by load function */
368
0
    m = load_midx_chain_fd_st(source, fd, &st, &incomplete);
369
0
  }
370
371
0
  strbuf_release(&chain_file);
372
0
  return m;
373
0
}
374
375
struct multi_pack_index *load_multi_pack_index(struct odb_source *source)
376
0
{
377
0
  struct strbuf midx_name = STRBUF_INIT;
378
0
  struct multi_pack_index *m;
379
380
0
  get_midx_filename(source, &midx_name);
381
382
0
  m = load_multi_pack_index_one(source, midx_name.buf);
383
0
  if (!m)
384
0
    m = load_multi_pack_index_chain(source);
385
386
0
  strbuf_release(&midx_name);
387
388
0
  return m;
389
0
}
390
391
void close_midx(struct multi_pack_index *m)
392
0
{
393
0
  uint32_t i;
394
395
0
  if (!m)
396
0
    return;
397
398
0
  close_midx(m->base_midx);
399
400
0
  munmap((unsigned char *)m->data, m->data_len);
401
402
0
  for (i = 0; i < m->num_packs; i++) {
403
0
    if (m->packs[i] && m->packs[i] != MIDX_PACK_ERROR)
404
0
      m->packs[i]->multi_pack_index = 0;
405
0
  }
406
0
  FREE_AND_NULL(m->packs);
407
0
  FREE_AND_NULL(m->pack_names);
408
0
  free(m);
409
0
}
410
411
static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
412
0
{
413
0
  struct multi_pack_index *m = *_m;
414
0
  while (m && pos < m->num_objects_in_base)
415
0
    m = m->base_midx;
416
417
0
  if (!m)
418
0
    BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
419
420
0
  if (pos >= m->num_objects + m->num_objects_in_base)
421
0
    die(_("invalid MIDX object position, MIDX is likely corrupt"));
422
423
0
  *_m = m;
424
425
0
  return pos - m->num_objects_in_base;
426
0
}
427
428
static uint32_t midx_for_pack(struct multi_pack_index **_m,
429
            uint32_t pack_int_id)
430
0
{
431
0
  struct multi_pack_index *m = *_m;
432
0
  while (m && pack_int_id < m->num_packs_in_base)
433
0
    m = m->base_midx;
434
435
0
  if (!m)
436
0
    BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
437
438
0
  if (pack_int_id >= m->num_packs + m->num_packs_in_base)
439
0
    die(_("bad pack-int-id: %u (%u total packs)"),
440
0
        pack_int_id, m->num_packs + m->num_packs_in_base);
441
442
0
  *_m = m;
443
444
0
  return pack_int_id - m->num_packs_in_base;
445
0
}
446
447
int prepare_midx_pack(struct multi_pack_index *m,
448
          uint32_t pack_int_id)
449
0
{
450
0
  struct strbuf pack_name = STRBUF_INIT;
451
0
  struct packed_git *p;
452
453
0
  pack_int_id = midx_for_pack(&m, pack_int_id);
454
455
0
  if (m->packs[pack_int_id] == MIDX_PACK_ERROR)
456
0
    return 1;
457
0
  if (m->packs[pack_int_id])
458
0
    return 0;
459
460
0
  strbuf_addf(&pack_name, "%s/pack/%s", m->source->path,
461
0
        m->pack_names[pack_int_id]);
462
0
  p = packfile_store_load_pack(m->source->packfiles,
463
0
             pack_name.buf, m->source->local);
464
0
  strbuf_release(&pack_name);
465
466
0
  if (!p) {
467
0
    m->packs[pack_int_id] = MIDX_PACK_ERROR;
468
0
    return 1;
469
0
  }
470
471
0
  p->multi_pack_index = 1;
472
0
  m->packs[pack_int_id] = p;
473
474
0
  return 0;
475
0
}
476
477
struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
478
           uint32_t pack_int_id)
479
0
{
480
0
  uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
481
0
  if (m->packs[local_pack_int_id] == MIDX_PACK_ERROR)
482
0
    return NULL;
483
0
  return m->packs[local_pack_int_id];
484
0
}
485
486
0
#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
487
488
int nth_bitmapped_pack(struct multi_pack_index *m,
489
           struct bitmapped_pack *bp, uint32_t pack_int_id)
490
0
{
491
0
  uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
492
493
0
  if (!m->chunk_bitmapped_packs)
494
0
    return error(_("MIDX does not contain the BTMP chunk"));
495
496
0
  if (prepare_midx_pack(m, pack_int_id))
497
0
    return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
498
499
0
  bp->p = m->packs[local_pack_int_id];
500
0
  bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
501
0
          MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
502
0
  bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
503
0
         MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
504
0
         sizeof(uint32_t));
505
0
  bp->pack_int_id = pack_int_id;
506
0
  bp->from_midx = m;
507
508
0
  return 0;
509
0
}
510
511
int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
512
         uint32_t *result)
513
0
{
514
0
  int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
515
0
             m->chunk_oid_lookup,
516
0
             m->source->odb->repo->hash_algo->rawsz,
517
0
             result);
518
0
  if (result)
519
0
    *result += m->num_objects_in_base;
520
0
  return ret;
521
0
}
522
523
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
524
     uint32_t *result)
525
0
{
526
0
  for (; m; m = m->base_midx)
527
0
    if (bsearch_one_midx(oid, m, result))
528
0
      return 1;
529
0
  return 0;
530
0
}
531
532
int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
533
0
{
534
0
  return bsearch_midx(oid, m, NULL);
535
0
}
536
537
struct object_id *nth_midxed_object_oid(struct object_id *oid,
538
          struct multi_pack_index *m,
539
          uint32_t n)
540
0
{
541
0
  if (n >= m->num_objects + m->num_objects_in_base)
542
0
    return NULL;
543
544
0
  n = midx_for_object(&m, n);
545
546
0
  oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
547
0
    m->source->odb->repo->hash_algo);
548
0
  return oid;
549
0
}
550
551
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
552
0
{
553
0
  const unsigned char *offset_data;
554
0
  uint32_t offset32;
555
556
0
  pos = midx_for_object(&m, pos);
557
558
0
  offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
559
0
  offset32 = get_be32(offset_data + sizeof(uint32_t));
560
561
0
  if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
562
0
    if (sizeof(off_t) < sizeof(uint64_t))
563
0
      die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
564
565
0
    offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
566
0
    if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
567
0
      die(_("multi-pack-index large offset out of bounds"));
568
0
    return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
569
0
  }
570
571
0
  return offset32;
572
0
}
573
574
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
575
0
{
576
0
  pos = midx_for_object(&m, pos);
577
578
0
  return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
579
0
                 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
580
0
}
581
582
int fill_midx_entry(struct multi_pack_index *m,
583
        const struct object_id *oid,
584
        struct pack_entry *e)
585
0
{
586
0
  uint32_t pos;
587
0
  uint32_t pack_int_id;
588
0
  struct packed_git *p;
589
590
0
  if (!bsearch_midx(oid, m, &pos))
591
0
    return 0;
592
593
0
  midx_for_object(&m, pos);
594
0
  pack_int_id = nth_midxed_pack_int_id(m, pos);
595
596
0
  if (prepare_midx_pack(m, pack_int_id))
597
0
    return 0;
598
0
  p = m->packs[pack_int_id - m->num_packs_in_base];
599
600
  /*
601
  * We are about to tell the caller where they can locate the
602
  * requested object.  We better make sure the packfile is
603
  * still here and can be accessed before supplying that
604
  * answer, as it may have been deleted since the MIDX was
605
  * loaded!
606
  */
607
0
  if (!is_pack_valid(p))
608
0
    return 0;
609
610
0
  if (oidset_size(&p->bad_objects) &&
611
0
      oidset_contains(&p->bad_objects, oid))
612
0
    return 0;
613
614
0
  e->offset = nth_midxed_offset(m, pos);
615
0
  e->p = p;
616
617
0
  return 1;
618
0
}
619
620
/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
621
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
622
       const char *idx_name)
623
0
{
624
  /* Skip past any initial matching prefix. */
625
0
  while (*idx_name && *idx_name == *idx_or_pack_name) {
626
0
    idx_name++;
627
0
    idx_or_pack_name++;
628
0
  }
629
630
  /*
631
   * If we didn't match completely, we may have matched "pack-1234." and
632
   * be left with "idx" and "pack" respectively, which is also OK. We do
633
   * not have to check for "idx" and "idx", because that would have been
634
   * a complete match (and in that case these strcmps will be false, but
635
   * we'll correctly return 0 from the final strcmp() below.
636
   *
637
   * Technically this matches "fooidx" and "foopack", but we'd never have
638
   * such names in the first place.
639
   */
640
0
  if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
641
0
    return 0;
642
643
  /*
644
   * This not only checks for a complete match, but also orders based on
645
   * the first non-identical character, which means our ordering will
646
   * match a raw strcmp(). That makes it OK to use this to binary search
647
   * a naively-sorted list.
648
   */
649
0
  return strcmp(idx_or_pack_name, idx_name);
650
0
}
651
652
static int midx_contains_pack_1(struct multi_pack_index *m,
653
        const char *idx_or_pack_name)
654
0
{
655
0
  uint32_t first = 0, last = m->num_packs;
656
657
0
  while (first < last) {
658
0
    uint32_t mid = first + (last - first) / 2;
659
0
    const char *current;
660
0
    int cmp;
661
662
0
    current = m->pack_names[mid];
663
0
    cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
664
0
    if (!cmp)
665
0
      return 1;
666
0
    if (cmp > 0) {
667
0
      first = mid + 1;
668
0
      continue;
669
0
    }
670
0
    last = mid;
671
0
  }
672
673
0
  return 0;
674
0
}
675
676
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
677
0
{
678
0
  for (; m; m = m->base_midx)
679
0
    if (midx_contains_pack_1(m, idx_or_pack_name))
680
0
      return 1;
681
0
  return 0;
682
0
}
683
684
int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
685
0
{
686
0
  if (m->preferred_pack_idx == -1) {
687
0
    uint32_t midx_pos;
688
0
    if (load_midx_revindex(m)) {
689
0
      m->preferred_pack_idx = -2;
690
0
      return -1;
691
0
    }
692
693
0
    midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
694
695
0
    m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
696
697
0
  } else if (m->preferred_pack_idx == -2)
698
0
    return -1; /* no revindex */
699
700
0
  *pack_int_id = m->preferred_pack_idx;
701
0
  return 0;
702
0
}
703
704
int prepare_multi_pack_index_one(struct odb_source *source)
705
0
{
706
0
  struct repository *r = source->odb->repo;
707
708
0
  prepare_repo_settings(r);
709
0
  if (!r->settings.core_multi_pack_index)
710
0
    return 0;
711
712
0
  if (source->packfiles->midx)
713
0
    return 1;
714
715
0
  source->packfiles->midx = load_multi_pack_index(source);
716
717
0
  return !!source->packfiles->midx;
718
0
}
719
720
int midx_checksum_valid(struct multi_pack_index *m)
721
0
{
722
0
  return hashfile_checksum_valid(m->source->odb->repo->hash_algo,
723
0
               m->data, m->data_len);
724
0
}
725
726
struct clear_midx_data {
727
  char **keep;
728
  uint32_t keep_nr;
729
  const char *ext;
730
};
731
732
static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
733
        const char *file_name, void *_data)
734
0
{
735
0
  struct clear_midx_data *data = _data;
736
0
  uint32_t i;
737
738
0
  if (!(starts_with(file_name, "multi-pack-index-") &&
739
0
        ends_with(file_name, data->ext)))
740
0
    return;
741
0
  for (i = 0; i < data->keep_nr; i++) {
742
0
    if (!strcmp(data->keep[i], file_name))
743
0
      return;
744
0
  }
745
0
  if (unlink(full_path))
746
0
    die_errno(_("failed to remove %s"), full_path);
747
0
}
748
749
void clear_midx_files_ext(struct odb_source *source, const char *ext,
750
        const char *keep_hash)
751
0
{
752
0
  struct clear_midx_data data;
753
0
  memset(&data, 0, sizeof(struct clear_midx_data));
754
755
0
  if (keep_hash) {
756
0
    ALLOC_ARRAY(data.keep, 1);
757
758
0
    data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
759
0
    data.keep_nr = 1;
760
0
  }
761
0
  data.ext = ext;
762
763
0
  for_each_file_in_pack_dir(source->path,
764
0
          clear_midx_file_ext,
765
0
          &data);
766
767
0
  if (keep_hash)
768
0
    free(data.keep[0]);
769
0
  free(data.keep);
770
0
}
771
772
void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext,
773
              char **keep_hashes,
774
              uint32_t hashes_nr)
775
0
{
776
0
  struct clear_midx_data data;
777
0
  uint32_t i;
778
779
0
  memset(&data, 0, sizeof(struct clear_midx_data));
780
781
0
  ALLOC_ARRAY(data.keep, hashes_nr);
782
0
  for (i = 0; i < hashes_nr; i++)
783
0
    data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
784
0
               ext);
785
0
  data.keep_nr = hashes_nr;
786
0
  data.ext = ext;
787
788
0
  for_each_file_in_pack_subdir(source->path, "multi-pack-index.d",
789
0
             clear_midx_file_ext, &data);
790
791
0
  for (i = 0; i < hashes_nr; i++)
792
0
    free(data.keep[i]);
793
0
  free(data.keep);
794
0
}
795
796
void clear_midx_file(struct repository *r)
797
0
{
798
0
  struct strbuf midx = STRBUF_INIT;
799
800
0
  get_midx_filename(r->objects->sources, &midx);
801
802
0
  if (r->objects) {
803
0
    struct odb_source *source;
804
805
0
    for (source = r->objects->sources; source; source = source->next) {
806
0
      if (source->packfiles->midx)
807
0
        close_midx(source->packfiles->midx);
808
0
      source->packfiles->midx = NULL;
809
0
    }
810
0
  }
811
812
0
  if (remove_path(midx.buf))
813
0
    die(_("failed to clear multi-pack-index at %s"), midx.buf);
814
815
0
  clear_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, NULL);
816
0
  clear_midx_files_ext(r->objects->sources, MIDX_EXT_REV, NULL);
817
818
0
  strbuf_release(&midx);
819
0
}
820
821
static int verify_midx_error;
822
823
__attribute__((format (printf, 1, 2)))
824
static void midx_report(const char *fmt, ...)
825
0
{
826
0
  va_list ap;
827
0
  verify_midx_error = 1;
828
0
  va_start(ap, fmt);
829
0
  vfprintf(stderr, fmt, ap);
830
0
  fprintf(stderr, "\n");
831
0
  va_end(ap);
832
0
}
833
834
struct pair_pos_vs_id
835
{
836
  uint32_t pos;
837
  uint32_t pack_int_id;
838
};
839
840
static int compare_pair_pos_vs_id(const void *_a, const void *_b)
841
0
{
842
0
  struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
843
0
  struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
844
845
0
  return b->pack_int_id - a->pack_int_id;
846
0
}
847
848
/*
849
 * Limit calls to display_progress() for performance reasons.
850
 * The interval here was arbitrarily chosen.
851
 */
852
0
#define SPARSE_PROGRESS_INTERVAL (1 << 12)
853
#define midx_display_sparse_progress(progress, n) \
854
0
  do { \
855
0
    uint64_t _n = (n); \
856
0
    if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
857
0
      display_progress(progress, _n); \
858
0
  } while (0)
859
860
int verify_midx_file(struct odb_source *source, unsigned flags)
861
0
{
862
0
  struct repository *r = source->odb->repo;
863
0
  struct pair_pos_vs_id *pairs = NULL;
864
0
  uint32_t i;
865
0
  struct progress *progress = NULL;
866
0
  struct multi_pack_index *m = load_multi_pack_index(source);
867
0
  struct multi_pack_index *curr;
868
0
  verify_midx_error = 0;
869
870
0
  if (!m) {
871
0
    int result = 0;
872
0
    struct stat sb;
873
0
    struct strbuf filename = STRBUF_INIT;
874
875
0
    get_midx_filename(source, &filename);
876
877
0
    if (!stat(filename.buf, &sb)) {
878
0
      error(_("multi-pack-index file exists, but failed to parse"));
879
0
      result = 1;
880
0
    }
881
0
    strbuf_release(&filename);
882
0
    return result;
883
0
  }
884
885
0
  if (!midx_checksum_valid(m))
886
0
    midx_report(_("incorrect checksum"));
887
888
0
  if (flags & MIDX_PROGRESS)
889
0
    progress = start_delayed_progress(r,
890
0
              _("Looking for referenced packfiles"),
891
0
              m->num_packs + m->num_packs_in_base);
892
0
  for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
893
0
    if (prepare_midx_pack(m, i))
894
0
      midx_report("failed to load pack in position %d", i);
895
896
0
    display_progress(progress, i + 1);
897
0
  }
898
0
  stop_progress(&progress);
899
900
0
  if (m->num_objects == 0) {
901
0
    midx_report(_("the midx contains no oid"));
902
    /*
903
     * Remaining tests assume that we have objects, so we can
904
     * return here.
905
     */
906
0
    goto cleanup;
907
0
  }
908
909
0
  if (flags & MIDX_PROGRESS)
910
0
    progress = start_sparse_progress(r,
911
0
             _("Verifying OID order in multi-pack-index"),
912
0
             m->num_objects - 1);
913
914
0
  for (curr = m; curr; curr = curr->base_midx) {
915
0
    for (i = 0; i < m->num_objects - 1; i++) {
916
0
      struct object_id oid1, oid2;
917
918
0
      nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
919
0
      nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
920
921
0
      if (oidcmp(&oid1, &oid2) >= 0)
922
0
        midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
923
0
              i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
924
925
0
      midx_display_sparse_progress(progress, i + 1);
926
0
    }
927
0
  }
928
0
  stop_progress(&progress);
929
930
  /*
931
   * Create an array mapping each object to its packfile id.  Sort it
932
   * to group the objects by packfile.  Use this permutation to visit
933
   * each of the objects and only require 1 packfile to be open at a
934
   * time.
935
   */
936
0
  ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
937
0
  for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
938
0
    pairs[i].pos = i;
939
0
    pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
940
0
  }
941
942
0
  if (flags & MIDX_PROGRESS)
943
0
    progress = start_sparse_progress(r,
944
0
             _("Sorting objects by packfile"),
945
0
             m->num_objects);
946
0
  display_progress(progress, 0); /* TODO: Measure QSORT() progress */
947
0
  QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
948
0
  stop_progress(&progress);
949
950
0
  if (flags & MIDX_PROGRESS)
951
0
    progress = start_sparse_progress(r,
952
0
             _("Verifying object offsets"),
953
0
             m->num_objects);
954
0
  for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
955
0
    struct object_id oid;
956
0
    struct pack_entry e;
957
0
    off_t m_offset, p_offset;
958
959
0
    if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
960
0
        nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
961
0
      uint32_t pack_int_id = pairs[i-1].pack_int_id;
962
0
      struct packed_git *p = nth_midxed_pack(m, pack_int_id);
963
964
0
      close_pack_fd(p);
965
0
      close_pack_index(p);
966
0
    }
967
968
0
    nth_midxed_object_oid(&oid, m, pairs[i].pos);
969
970
0
    if (!fill_midx_entry(m, &oid, &e)) {
971
0
      midx_report(_("failed to load pack entry for oid[%d] = %s"),
972
0
            pairs[i].pos, oid_to_hex(&oid));
973
0
      continue;
974
0
    }
975
976
0
    if (open_pack_index(e.p)) {
977
0
      midx_report(_("failed to load pack-index for packfile %s"),
978
0
            e.p->pack_name);
979
0
      break;
980
0
    }
981
982
0
    m_offset = e.offset;
983
0
    p_offset = find_pack_entry_one(&oid, e.p);
984
985
0
    if (m_offset != p_offset)
986
0
      midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
987
0
            pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
988
989
0
    midx_display_sparse_progress(progress, i + 1);
990
0
  }
991
0
  stop_progress(&progress);
992
993
0
cleanup:
994
0
  free(pairs);
995
0
  close_midx(m);
996
997
0
  return verify_midx_error;
998
0
}