Coverage Report

Created: 2026-03-31 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/git/midx.c
Line
Count
Source
1
#define DISABLE_SIGN_COMPARE_WARNINGS
2
3
#include "git-compat-util.h"
4
#include "config.h"
5
#include "dir.h"
6
#include "hex.h"
7
#include "packfile.h"
8
#include "hash-lookup.h"
9
#include "midx.h"
10
#include "progress.h"
11
#include "trace2.h"
12
#include "chunk-format.h"
13
#include "pack-bitmap.h"
14
#include "pack-revindex.h"
15
16
0
#define MIDX_PACK_ERROR ((void *)(intptr_t)-1)
17
18
int midx_checksum_valid(struct multi_pack_index *m);
19
void clear_midx_files_ext(struct odb_source *source, const char *ext,
20
        const char *keep_hash);
21
void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext,
22
              char **keep_hashes,
23
              uint32_t hashes_nr);
24
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
25
       const char *idx_name);
26
27
const char *midx_get_checksum_hex(const struct multi_pack_index *m)
28
0
{
29
0
  return hash_to_hex_algop(midx_get_checksum_hash(m),
30
0
         m->source->odb->repo->hash_algo);
31
0
}
32
33
const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m)
34
0
{
35
0
  return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz;
36
0
}
37
38
void get_midx_filename(struct odb_source *source, struct strbuf *out)
39
0
{
40
0
  get_midx_filename_ext(source, out, NULL, NULL);
41
0
}
42
43
void get_midx_filename_ext(struct odb_source *source, struct strbuf *out,
44
         const unsigned char *hash, const char *ext)
45
0
{
46
0
  strbuf_addf(out, "%s/pack/multi-pack-index", source->path);
47
0
  if (ext)
48
0
    strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext);
49
0
}
50
51
static int midx_read_oid_fanout(const unsigned char *chunk_start,
52
        size_t chunk_size, void *data)
53
0
{
54
0
  int i;
55
0
  struct multi_pack_index *m = data;
56
0
  m->chunk_oid_fanout = (uint32_t *)chunk_start;
57
58
0
  if (chunk_size != 4 * 256) {
59
0
    error(_("multi-pack-index OID fanout is of the wrong size"));
60
0
    return 1;
61
0
  }
62
0
  for (i = 0; i < 255; i++) {
63
0
    uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
64
0
    uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
65
66
0
    if (oid_fanout1 > oid_fanout2) {
67
0
      error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
68
0
            i, oid_fanout1, oid_fanout2, i + 1);
69
0
      return 1;
70
0
    }
71
0
  }
72
0
  m->num_objects = ntohl(m->chunk_oid_fanout[255]);
73
0
  return 0;
74
0
}
75
76
static int midx_read_oid_lookup(const unsigned char *chunk_start,
77
        size_t chunk_size, void *data)
78
0
{
79
0
  struct multi_pack_index *m = data;
80
0
  m->chunk_oid_lookup = chunk_start;
81
82
0
  if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
83
0
    error(_("multi-pack-index OID lookup chunk is the wrong size"));
84
0
    return 1;
85
0
  }
86
0
  return 0;
87
0
}
88
89
static int midx_read_object_offsets(const unsigned char *chunk_start,
90
            size_t chunk_size, void *data)
91
0
{
92
0
  struct multi_pack_index *m = data;
93
0
  m->chunk_object_offsets = chunk_start;
94
95
0
  if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
96
0
    error(_("multi-pack-index object offset chunk is the wrong size"));
97
0
    return 1;
98
0
  }
99
0
  return 0;
100
0
}
101
102
struct multi_pack_index *get_multi_pack_index(struct odb_source *source)
103
0
{
104
0
  struct odb_source_files *files = odb_source_files_downcast(source);
105
0
  packfile_store_prepare(files->packed);
106
0
  return files->packed->midx;
107
0
}
108
109
static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source,
110
                const char *midx_name)
111
0
{
112
0
  struct repository *r = source->odb->repo;
113
0
  struct multi_pack_index *m = NULL;
114
0
  int fd;
115
0
  struct stat st;
116
0
  size_t midx_size;
117
0
  void *midx_map = NULL;
118
0
  uint32_t hash_version;
119
0
  uint32_t i;
120
0
  const char *cur_pack_name;
121
0
  struct chunkfile *cf = NULL;
122
123
0
  fd = git_open(midx_name);
124
125
0
  if (fd < 0)
126
0
    goto cleanup_fail;
127
0
  if (fstat(fd, &st)) {
128
0
    error_errno(_("failed to read %s"), midx_name);
129
0
    goto cleanup_fail;
130
0
  }
131
132
0
  midx_size = xsize_t(st.st_size);
133
134
0
  if (midx_size < (MIDX_HEADER_SIZE + r->hash_algo->rawsz)) {
135
0
    error(_("multi-pack-index file %s is too small"), midx_name);
136
0
    goto cleanup_fail;
137
0
  }
138
139
0
  midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
140
0
  close(fd);
141
142
0
  CALLOC_ARRAY(m, 1);
143
0
  m->data = midx_map;
144
0
  m->data_len = midx_size;
145
0
  m->source = source;
146
147
0
  m->signature = get_be32(m->data);
148
0
  if (m->signature != MIDX_SIGNATURE)
149
0
    die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
150
0
          m->signature, MIDX_SIGNATURE);
151
152
0
  m->version = m->data[MIDX_BYTE_FILE_VERSION];
153
0
  if (m->version != MIDX_VERSION_V1 && m->version != MIDX_VERSION_V2)
154
0
    die(_("multi-pack-index version %d not recognized"),
155
0
          m->version);
156
157
0
  hash_version = m->data[MIDX_BYTE_HASH_VERSION];
158
0
  if (hash_version != oid_version(r->hash_algo)) {
159
0
    error(_("multi-pack-index hash version %u does not match version %u"),
160
0
          hash_version, oid_version(r->hash_algo));
161
0
    goto cleanup_fail;
162
0
  }
163
0
  m->hash_len = r->hash_algo->rawsz;
164
165
0
  m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
166
167
0
  m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
168
169
0
  m->preferred_pack_idx = -1;
170
171
0
  cf = init_chunkfile(NULL);
172
173
0
  if (read_table_of_contents(cf, m->data, midx_size,
174
0
           MIDX_HEADER_SIZE, m->num_chunks,
175
0
           MIDX_CHUNK_ALIGNMENT))
176
0
    goto cleanup_fail;
177
178
0
  if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
179
0
    die(_("multi-pack-index required pack-name chunk missing or corrupted"));
180
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
181
0
    die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
182
0
  if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
183
0
    die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
184
0
  if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
185
0
    die(_("multi-pack-index required object offsets chunk missing or corrupted"));
186
187
0
  pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
188
0
       &m->chunk_large_offsets_len);
189
0
  if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
190
0
    pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
191
0
         (const unsigned char **)&m->chunk_bitmapped_packs,
192
0
         &m->chunk_bitmapped_packs_len);
193
194
0
  if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
195
0
    pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
196
0
         &m->chunk_revindex_len);
197
198
0
  CALLOC_ARRAY(m->pack_names, m->num_packs);
199
0
  CALLOC_ARRAY(m->packs, m->num_packs);
200
201
0
  cur_pack_name = (const char *)m->chunk_pack_names;
202
0
  for (i = 0; i < m->num_packs; i++) {
203
0
    const char *end;
204
0
    size_t avail = m->chunk_pack_names_len -
205
0
        (cur_pack_name - (const char *)m->chunk_pack_names);
206
207
0
    m->pack_names[i] = cur_pack_name;
208
209
0
    end = memchr(cur_pack_name, '\0', avail);
210
0
    if (!end)
211
0
      die(_("multi-pack-index pack-name chunk is too short"));
212
0
    cur_pack_name = end + 1;
213
214
0
    if (m->version == MIDX_VERSION_V1 &&
215
0
        i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
216
0
      die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
217
0
            m->pack_names[i - 1],
218
0
            m->pack_names[i]);
219
0
  }
220
221
0
  trace2_data_intmax("midx", r, "load/num_packs", m->num_packs);
222
0
  trace2_data_intmax("midx", r, "load/num_objects", m->num_objects);
223
224
0
  free_chunkfile(cf);
225
0
  return m;
226
227
0
cleanup_fail:
228
0
  free(m);
229
0
  free_chunkfile(cf);
230
0
  if (midx_map)
231
0
    munmap(midx_map, midx_size);
232
0
  if (0 <= fd)
233
0
    close(fd);
234
0
  return NULL;
235
0
}
236
237
void get_midx_chain_dirname(struct odb_source *source, struct strbuf *buf)
238
0
{
239
0
  strbuf_addf(buf, "%s/pack/multi-pack-index.d", source->path);
240
0
}
241
242
void get_midx_chain_filename(struct odb_source *source, struct strbuf *buf)
243
0
{
244
0
  get_midx_chain_dirname(source, buf);
245
0
  strbuf_addstr(buf, "/multi-pack-index-chain");
246
0
}
247
248
void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf,
249
         const unsigned char *hash, const char *ext)
250
0
{
251
0
  get_midx_chain_dirname(source, buf);
252
0
  strbuf_addf(buf, "/multi-pack-index-%s.%s",
253
0
        hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext);
254
0
}
255
256
static int open_multi_pack_index_chain(const struct git_hash_algo *hash_algo,
257
               const char *chain_file, int *fd,
258
               struct stat *st)
259
0
{
260
0
  *fd = git_open(chain_file);
261
0
  if (*fd < 0)
262
0
    return 0;
263
0
  if (fstat(*fd, st)) {
264
0
    close(*fd);
265
0
    return 0;
266
0
  }
267
0
  if (st->st_size < hash_algo->hexsz) {
268
0
    close(*fd);
269
0
    if (!st->st_size) {
270
      /* treat empty files the same as missing */
271
0
      errno = ENOENT;
272
0
    } else {
273
0
      warning(_("multi-pack-index chain file too small"));
274
0
      errno = EINVAL;
275
0
    }
276
0
    return 0;
277
0
  }
278
0
  return 1;
279
0
}
280
281
static int add_midx_to_chain(struct multi_pack_index *midx,
282
           struct multi_pack_index *midx_chain)
283
0
{
284
0
  if (midx_chain) {
285
0
    if (unsigned_add_overflows(midx_chain->num_packs,
286
0
             midx_chain->num_packs_in_base)) {
287
0
      warning(_("pack count in base MIDX too high: %"PRIuMAX),
288
0
        (uintmax_t)midx_chain->num_packs_in_base);
289
0
      return 0;
290
0
    }
291
0
    if (unsigned_add_overflows(midx_chain->num_objects,
292
0
             midx_chain->num_objects_in_base)) {
293
0
      warning(_("object count in base MIDX too high: %"PRIuMAX),
294
0
        (uintmax_t)midx_chain->num_objects_in_base);
295
0
      return 0;
296
0
    }
297
0
    midx->num_packs_in_base = midx_chain->num_packs +
298
0
      midx_chain->num_packs_in_base;
299
0
    midx->num_objects_in_base = midx_chain->num_objects +
300
0
      midx_chain->num_objects_in_base;
301
0
  }
302
303
0
  midx->base_midx = midx_chain;
304
0
  midx->has_chain = 1;
305
306
0
  return 1;
307
0
}
308
309
static struct multi_pack_index *load_midx_chain_fd_st(struct odb_source *source,
310
                  int fd, struct stat *st,
311
                  int *incomplete_chain)
312
0
{
313
0
  const struct git_hash_algo *hash_algo = source->odb->repo->hash_algo;
314
0
  struct multi_pack_index *midx_chain = NULL;
315
0
  struct strbuf buf = STRBUF_INIT;
316
0
  int valid = 1;
317
0
  uint32_t i, count;
318
0
  FILE *fp = xfdopen(fd, "r");
319
320
0
  count = st->st_size / (hash_algo->hexsz + 1);
321
322
0
  for (i = 0; i < count; i++) {
323
0
    struct multi_pack_index *m;
324
0
    struct object_id layer;
325
326
0
    if (strbuf_getline_lf(&buf, fp) == EOF)
327
0
      break;
328
329
0
    if (get_oid_hex_algop(buf.buf, &layer, hash_algo)) {
330
0
      warning(_("invalid multi-pack-index chain: line '%s' "
331
0
          "not a hash"),
332
0
        buf.buf);
333
0
      valid = 0;
334
0
      break;
335
0
    }
336
337
0
    valid = 0;
338
339
0
    strbuf_reset(&buf);
340
0
    get_split_midx_filename_ext(source, &buf,
341
0
              layer.hash, MIDX_EXT_MIDX);
342
0
    m = load_multi_pack_index_one(source, buf.buf);
343
344
0
    if (m) {
345
0
      if (add_midx_to_chain(m, midx_chain)) {
346
0
        midx_chain = m;
347
0
        valid = 1;
348
0
      } else {
349
0
        close_midx(m);
350
0
      }
351
0
    }
352
0
    if (!valid) {
353
0
      warning(_("unable to find all multi-pack index files"));
354
0
      break;
355
0
    }
356
0
  }
357
358
0
  fclose(fp);
359
0
  strbuf_release(&buf);
360
361
0
  *incomplete_chain = !valid;
362
0
  return midx_chain;
363
0
}
364
365
static struct multi_pack_index *load_multi_pack_index_chain(struct odb_source *source)
366
0
{
367
0
  struct strbuf chain_file = STRBUF_INIT;
368
0
  struct stat st;
369
0
  int fd;
370
0
  struct multi_pack_index *m = NULL;
371
372
0
  get_midx_chain_filename(source, &chain_file);
373
0
  if (open_multi_pack_index_chain(source->odb->repo->hash_algo, chain_file.buf, &fd, &st)) {
374
0
    int incomplete;
375
    /* ownership of fd is taken over by load function */
376
0
    m = load_midx_chain_fd_st(source, fd, &st, &incomplete);
377
0
  }
378
379
0
  strbuf_release(&chain_file);
380
0
  return m;
381
0
}
382
383
struct multi_pack_index *load_multi_pack_index(struct odb_source *source)
384
0
{
385
0
  struct strbuf midx_name = STRBUF_INIT;
386
0
  struct multi_pack_index *m;
387
388
0
  get_midx_filename(source, &midx_name);
389
390
0
  m = load_multi_pack_index_one(source, midx_name.buf);
391
0
  if (!m)
392
0
    m = load_multi_pack_index_chain(source);
393
394
0
  strbuf_release(&midx_name);
395
396
0
  return m;
397
0
}
398
399
void close_midx(struct multi_pack_index *m)
400
0
{
401
0
  uint32_t i;
402
403
0
  if (!m)
404
0
    return;
405
406
0
  close_midx(m->base_midx);
407
408
0
  munmap((unsigned char *)m->data, m->data_len);
409
410
0
  for (i = 0; i < m->num_packs; i++) {
411
0
    if (m->packs[i] && m->packs[i] != MIDX_PACK_ERROR)
412
0
      m->packs[i]->multi_pack_index = 0;
413
0
  }
414
0
  FREE_AND_NULL(m->packs);
415
0
  FREE_AND_NULL(m->pack_names);
416
0
  FREE_AND_NULL(m->pack_names_sorted);
417
0
  free(m);
418
0
}
419
420
static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos)
421
0
{
422
0
  struct multi_pack_index *m = *_m;
423
0
  while (m && pos < m->num_objects_in_base)
424
0
    m = m->base_midx;
425
426
0
  if (!m)
427
0
    BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
428
429
0
  if (pos >= m->num_objects + m->num_objects_in_base)
430
0
    die(_("invalid MIDX object position, MIDX is likely corrupt"));
431
432
0
  *_m = m;
433
434
0
  return pos - m->num_objects_in_base;
435
0
}
436
437
static uint32_t midx_for_pack(struct multi_pack_index **_m,
438
            uint32_t pack_int_id)
439
0
{
440
0
  struct multi_pack_index *m = *_m;
441
0
  while (m && pack_int_id < m->num_packs_in_base)
442
0
    m = m->base_midx;
443
444
0
  if (!m)
445
0
    BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id);
446
447
0
  if (pack_int_id >= m->num_packs + m->num_packs_in_base)
448
0
    die(_("bad pack-int-id: %u (%u total packs)"),
449
0
        pack_int_id, m->num_packs + m->num_packs_in_base);
450
451
0
  *_m = m;
452
453
0
  return pack_int_id - m->num_packs_in_base;
454
0
}
455
456
int prepare_midx_pack(struct multi_pack_index *m,
457
          uint32_t pack_int_id)
458
0
{
459
0
  struct odb_source_files *files = odb_source_files_downcast(m->source);
460
0
  struct strbuf pack_name = STRBUF_INIT;
461
0
  struct packed_git *p;
462
463
0
  pack_int_id = midx_for_pack(&m, pack_int_id);
464
465
0
  if (m->packs[pack_int_id] == MIDX_PACK_ERROR)
466
0
    return 1;
467
0
  if (m->packs[pack_int_id])
468
0
    return 0;
469
470
0
  strbuf_addf(&pack_name, "%s/pack/%s", files->base.path,
471
0
        m->pack_names[pack_int_id]);
472
0
  p = packfile_store_load_pack(files->packed,
473
0
             pack_name.buf, files->base.local);
474
0
  strbuf_release(&pack_name);
475
476
0
  if (!p) {
477
0
    m->packs[pack_int_id] = MIDX_PACK_ERROR;
478
0
    return 1;
479
0
  }
480
481
0
  p->multi_pack_index = 1;
482
0
  m->packs[pack_int_id] = p;
483
484
0
  return 0;
485
0
}
486
487
struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
488
           uint32_t pack_int_id)
489
0
{
490
0
  uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
491
0
  if (m->packs[local_pack_int_id] == MIDX_PACK_ERROR)
492
0
    return NULL;
493
0
  return m->packs[local_pack_int_id];
494
0
}
495
496
0
#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
497
498
int nth_bitmapped_pack(struct multi_pack_index *m,
499
           struct bitmapped_pack *bp, uint32_t pack_int_id)
500
0
{
501
0
  uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id);
502
503
0
  if (!m->chunk_bitmapped_packs)
504
0
    return error(_("MIDX does not contain the BTMP chunk"));
505
506
0
  if (prepare_midx_pack(m, pack_int_id))
507
0
    return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
508
509
0
  bp->p = m->packs[local_pack_int_id];
510
0
  bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
511
0
          MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id);
512
0
  bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
513
0
         MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
514
0
         sizeof(uint32_t));
515
0
  bp->pack_int_id = pack_int_id;
516
0
  bp->from_midx = m;
517
518
0
  return 0;
519
0
}
520
521
int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
522
         uint32_t *result)
523
0
{
524
0
  int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
525
0
             m->chunk_oid_lookup,
526
0
             m->source->odb->repo->hash_algo->rawsz,
527
0
             result);
528
0
  if (result)
529
0
    *result += m->num_objects_in_base;
530
0
  return ret;
531
0
}
532
533
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
534
     uint32_t *result)
535
0
{
536
0
  for (; m; m = m->base_midx)
537
0
    if (bsearch_one_midx(oid, m, result))
538
0
      return 1;
539
0
  return 0;
540
0
}
541
542
int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid)
543
0
{
544
0
  return bsearch_midx(oid, m, NULL);
545
0
}
546
547
struct object_id *nth_midxed_object_oid(struct object_id *oid,
548
          struct multi_pack_index *m,
549
          uint32_t n)
550
0
{
551
0
  if (n >= m->num_objects + m->num_objects_in_base)
552
0
    return NULL;
553
554
0
  n = midx_for_object(&m, n);
555
556
0
  oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
557
0
    m->source->odb->repo->hash_algo);
558
0
  return oid;
559
0
}
560
561
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
562
0
{
563
0
  const unsigned char *offset_data;
564
0
  uint32_t offset32;
565
566
0
  pos = midx_for_object(&m, pos);
567
568
0
  offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
569
0
  offset32 = get_be32(offset_data + sizeof(uint32_t));
570
571
0
  if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
572
0
    if (sizeof(off_t) < sizeof(uint64_t))
573
0
      die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
574
575
0
    offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
576
0
    if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
577
0
      die(_("multi-pack-index large offset out of bounds"));
578
0
    return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
579
0
  }
580
581
0
  return offset32;
582
0
}
583
584
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
585
0
{
586
0
  pos = midx_for_object(&m, pos);
587
588
0
  return m->num_packs_in_base + get_be32(m->chunk_object_offsets +
589
0
                 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
590
0
}
591
592
int fill_midx_entry(struct multi_pack_index *m,
593
        const struct object_id *oid,
594
        struct pack_entry *e)
595
0
{
596
0
  uint32_t pos;
597
0
  uint32_t pack_int_id;
598
0
  struct packed_git *p;
599
600
0
  if (!bsearch_midx(oid, m, &pos))
601
0
    return 0;
602
603
0
  midx_for_object(&m, pos);
604
0
  pack_int_id = nth_midxed_pack_int_id(m, pos);
605
606
0
  if (prepare_midx_pack(m, pack_int_id))
607
0
    return 0;
608
0
  p = m->packs[pack_int_id - m->num_packs_in_base];
609
610
  /*
611
  * We are about to tell the caller where they can locate the
612
  * requested object.  We better make sure the packfile is
613
  * still here and can be accessed before supplying that
614
  * answer, as it may have been deleted since the MIDX was
615
  * loaded!
616
  */
617
0
  if (!is_pack_valid(p))
618
0
    return 0;
619
620
0
  if (oidset_size(&p->bad_objects) &&
621
0
      oidset_contains(&p->bad_objects, oid))
622
0
    return 0;
623
624
0
  e->offset = nth_midxed_offset(m, pos);
625
0
  e->p = p;
626
627
0
  return 1;
628
0
}
629
630
/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
631
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
632
       const char *idx_name)
633
0
{
634
  /* Skip past any initial matching prefix. */
635
0
  while (*idx_name && *idx_name == *idx_or_pack_name) {
636
0
    idx_name++;
637
0
    idx_or_pack_name++;
638
0
  }
639
640
  /*
641
   * If we didn't match completely, we may have matched "pack-1234." and
642
   * be left with "idx" and "pack" respectively, which is also OK. We do
643
   * not have to check for "idx" and "idx", because that would have been
644
   * a complete match (and in that case these strcmps will be false, but
645
   * we'll correctly return 0 from the final strcmp() below.
646
   *
647
   * Technically this matches "fooidx" and "foopack", but we'd never have
648
   * such names in the first place.
649
   */
650
0
  if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
651
0
    return 0;
652
653
  /*
654
   * This not only checks for a complete match, but also orders based on
655
   * the first non-identical character, which means our ordering will
656
   * match a raw strcmp(). That makes it OK to use this to binary search
657
   * a naively-sorted list.
658
   */
659
0
  return strcmp(idx_or_pack_name, idx_name);
660
0
}
661
662
663
static int midx_pack_names_cmp(const void *a, const void *b, void *m_)
664
0
{
665
0
  struct multi_pack_index *m = m_;
666
0
  return strcmp(m->pack_names[*(const size_t *)a],
667
0
          m->pack_names[*(const size_t *)b]);
668
0
}
669
670
static int midx_contains_pack_1(struct multi_pack_index *m,
671
        const char *idx_or_pack_name)
672
0
{
673
0
  uint32_t first = 0, last = m->num_packs;
674
675
0
  if (m->version == MIDX_VERSION_V2 && !m->pack_names_sorted) {
676
0
    uint32_t i;
677
678
0
    ALLOC_ARRAY(m->pack_names_sorted, m->num_packs);
679
680
0
    for (i = 0; i < m->num_packs; i++)
681
0
      m->pack_names_sorted[i] = i;
682
683
0
    QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp,
684
0
      m);
685
0
  }
686
687
0
  while (first < last) {
688
0
    uint32_t mid = first + (last - first) / 2;
689
0
    const char *current;
690
0
    int cmp;
691
692
0
    if (m->pack_names_sorted)
693
0
      current = m->pack_names[m->pack_names_sorted[mid]];
694
0
    else
695
0
      current = m->pack_names[mid];
696
0
    cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
697
0
    if (!cmp)
698
0
      return 1;
699
0
    if (cmp > 0) {
700
0
      first = mid + 1;
701
0
      continue;
702
0
    }
703
0
    last = mid;
704
0
  }
705
706
0
  return 0;
707
0
}
708
709
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
710
0
{
711
0
  for (; m; m = m->base_midx)
712
0
    if (midx_contains_pack_1(m, idx_or_pack_name))
713
0
      return 1;
714
0
  return 0;
715
0
}
716
717
int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
718
0
{
719
0
  if (m->preferred_pack_idx == -1) {
720
0
    uint32_t midx_pos;
721
0
    if (load_midx_revindex(m)) {
722
0
      m->preferred_pack_idx = -2;
723
0
      return -1;
724
0
    }
725
726
0
    midx_pos = pack_pos_to_midx(m, m->num_objects_in_base);
727
728
0
    m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos);
729
730
0
  } else if (m->preferred_pack_idx == -2)
731
0
    return -1; /* no revindex */
732
733
0
  *pack_int_id = m->preferred_pack_idx;
734
0
  return 0;
735
0
}
736
737
int prepare_multi_pack_index_one(struct odb_source *source)
738
0
{
739
0
  struct odb_source_files *files = odb_source_files_downcast(source);
740
0
  struct repository *r = source->odb->repo;
741
742
0
  prepare_repo_settings(r);
743
0
  if (!r->settings.core_multi_pack_index)
744
0
    return 0;
745
746
0
  if (files->packed->midx)
747
0
    return 1;
748
749
0
  files->packed->midx = load_multi_pack_index(source);
750
751
0
  return !!files->packed->midx;
752
0
}
753
754
int midx_checksum_valid(struct multi_pack_index *m)
755
0
{
756
0
  return hashfile_checksum_valid(m->source->odb->repo->hash_algo,
757
0
               m->data, m->data_len);
758
0
}
759
760
struct clear_midx_data {
761
  char **keep;
762
  uint32_t keep_nr;
763
  const char *ext;
764
};
765
766
static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
767
        const char *file_name, void *_data)
768
0
{
769
0
  struct clear_midx_data *data = _data;
770
0
  uint32_t i;
771
772
0
  if (!(starts_with(file_name, "multi-pack-index-") &&
773
0
        ends_with(file_name, data->ext)))
774
0
    return;
775
0
  for (i = 0; i < data->keep_nr; i++) {
776
0
    if (!strcmp(data->keep[i], file_name))
777
0
      return;
778
0
  }
779
0
  if (unlink(full_path))
780
0
    die_errno(_("failed to remove %s"), full_path);
781
0
}
782
783
void clear_midx_files_ext(struct odb_source *source, const char *ext,
784
        const char *keep_hash)
785
0
{
786
0
  struct clear_midx_data data;
787
0
  memset(&data, 0, sizeof(struct clear_midx_data));
788
789
0
  if (keep_hash) {
790
0
    ALLOC_ARRAY(data.keep, 1);
791
792
0
    data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext);
793
0
    data.keep_nr = 1;
794
0
  }
795
0
  data.ext = ext;
796
797
0
  for_each_file_in_pack_dir(source->path,
798
0
          clear_midx_file_ext,
799
0
          &data);
800
801
0
  if (keep_hash)
802
0
    free(data.keep[0]);
803
0
  free(data.keep);
804
0
}
805
806
void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext,
807
              char **keep_hashes,
808
              uint32_t hashes_nr)
809
0
{
810
0
  struct clear_midx_data data;
811
0
  uint32_t i;
812
813
0
  memset(&data, 0, sizeof(struct clear_midx_data));
814
815
0
  ALLOC_ARRAY(data.keep, hashes_nr);
816
0
  for (i = 0; i < hashes_nr; i++)
817
0
    data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i],
818
0
               ext);
819
0
  data.keep_nr = hashes_nr;
820
0
  data.ext = ext;
821
822
0
  for_each_file_in_pack_subdir(source->path, "multi-pack-index.d",
823
0
             clear_midx_file_ext, &data);
824
825
0
  for (i = 0; i < hashes_nr; i++)
826
0
    free(data.keep[i]);
827
0
  free(data.keep);
828
0
}
829
830
void clear_midx_file(struct repository *r)
831
0
{
832
0
  struct strbuf midx = STRBUF_INIT;
833
834
0
  get_midx_filename(r->objects->sources, &midx);
835
836
0
  if (r->objects) {
837
0
    struct odb_source *source;
838
839
0
    for (source = r->objects->sources; source; source = source->next) {
840
0
      struct odb_source_files *files = odb_source_files_downcast(source);
841
0
      if (files->packed->midx)
842
0
        close_midx(files->packed->midx);
843
0
      files->packed->midx = NULL;
844
0
    }
845
0
  }
846
847
0
  if (remove_path(midx.buf))
848
0
    die(_("failed to clear multi-pack-index at %s"), midx.buf);
849
850
0
  clear_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, NULL);
851
0
  clear_midx_files_ext(r->objects->sources, MIDX_EXT_REV, NULL);
852
853
0
  strbuf_release(&midx);
854
0
}
855
856
static int verify_midx_error;
857
858
__attribute__((format (printf, 1, 2)))
859
static void midx_report(const char *fmt, ...)
860
0
{
861
0
  va_list ap;
862
0
  verify_midx_error = 1;
863
0
  va_start(ap, fmt);
864
0
  vfprintf(stderr, fmt, ap);
865
0
  fprintf(stderr, "\n");
866
0
  va_end(ap);
867
0
}
868
869
struct pair_pos_vs_id
870
{
871
  uint32_t pos;
872
  uint32_t pack_int_id;
873
};
874
875
static int compare_pair_pos_vs_id(const void *_a, const void *_b)
876
0
{
877
0
  struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
878
0
  struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
879
880
0
  return b->pack_int_id - a->pack_int_id;
881
0
}
882
883
/*
884
 * Limit calls to display_progress() for performance reasons.
885
 * The interval here was arbitrarily chosen.
886
 */
887
0
#define SPARSE_PROGRESS_INTERVAL (1 << 12)
888
#define midx_display_sparse_progress(progress, n) \
889
0
  do { \
890
0
    uint64_t _n = (n); \
891
0
    if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
892
0
      display_progress(progress, _n); \
893
0
  } while (0)
894
895
int verify_midx_file(struct odb_source *source, unsigned flags)
896
0
{
897
0
  struct repository *r = source->odb->repo;
898
0
  struct pair_pos_vs_id *pairs = NULL;
899
0
  uint32_t i;
900
0
  struct progress *progress = NULL;
901
0
  struct multi_pack_index *m = load_multi_pack_index(source);
902
0
  struct multi_pack_index *curr;
903
0
  verify_midx_error = 0;
904
905
0
  if (!m) {
906
0
    int result = 0;
907
0
    struct stat sb;
908
0
    struct strbuf filename = STRBUF_INIT;
909
910
0
    get_midx_filename(source, &filename);
911
912
0
    if (!stat(filename.buf, &sb)) {
913
0
      error(_("multi-pack-index file exists, but failed to parse"));
914
0
      result = 1;
915
0
    }
916
0
    strbuf_release(&filename);
917
0
    return result;
918
0
  }
919
920
0
  if (!midx_checksum_valid(m))
921
0
    midx_report(_("incorrect checksum"));
922
923
0
  if (flags & MIDX_PROGRESS)
924
0
    progress = start_delayed_progress(r,
925
0
              _("Looking for referenced packfiles"),
926
0
              m->num_packs + m->num_packs_in_base);
927
0
  for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
928
0
    if (prepare_midx_pack(m, i))
929
0
      midx_report("failed to load pack in position %d", i);
930
931
0
    display_progress(progress, i + 1);
932
0
  }
933
0
  stop_progress(&progress);
934
935
0
  if (m->num_objects == 0) {
936
0
    midx_report(_("the midx contains no oid"));
937
    /*
938
     * Remaining tests assume that we have objects, so we can
939
     * return here.
940
     */
941
0
    goto cleanup;
942
0
  }
943
944
0
  if (flags & MIDX_PROGRESS)
945
0
    progress = start_sparse_progress(r,
946
0
             _("Verifying OID order in multi-pack-index"),
947
0
             m->num_objects - 1);
948
949
0
  for (curr = m; curr; curr = curr->base_midx) {
950
0
    for (i = 0; i < m->num_objects - 1; i++) {
951
0
      struct object_id oid1, oid2;
952
953
0
      nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i);
954
0
      nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1);
955
956
0
      if (oidcmp(&oid1, &oid2) >= 0)
957
0
        midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
958
0
              i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
959
960
0
      midx_display_sparse_progress(progress, i + 1);
961
0
    }
962
0
  }
963
0
  stop_progress(&progress);
964
965
  /*
966
   * Create an array mapping each object to its packfile id.  Sort it
967
   * to group the objects by packfile.  Use this permutation to visit
968
   * each of the objects and only require 1 packfile to be open at a
969
   * time.
970
   */
971
0
  ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base);
972
0
  for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
973
0
    pairs[i].pos = i;
974
0
    pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
975
0
  }
976
977
0
  if (flags & MIDX_PROGRESS)
978
0
    progress = start_sparse_progress(r,
979
0
             _("Sorting objects by packfile"),
980
0
             m->num_objects);
981
0
  display_progress(progress, 0); /* TODO: Measure QSORT() progress */
982
0
  QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
983
0
  stop_progress(&progress);
984
985
0
  if (flags & MIDX_PROGRESS)
986
0
    progress = start_sparse_progress(r,
987
0
             _("Verifying object offsets"),
988
0
             m->num_objects);
989
0
  for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) {
990
0
    struct object_id oid;
991
0
    struct pack_entry e;
992
0
    off_t m_offset, p_offset;
993
994
0
    if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
995
0
        nth_midxed_pack(m, pairs[i-1].pack_int_id)) {
996
0
      uint32_t pack_int_id = pairs[i-1].pack_int_id;
997
0
      struct packed_git *p = nth_midxed_pack(m, pack_int_id);
998
999
0
      close_pack_fd(p);
1000
0
      close_pack_index(p);
1001
0
    }
1002
1003
0
    nth_midxed_object_oid(&oid, m, pairs[i].pos);
1004
1005
0
    if (!fill_midx_entry(m, &oid, &e)) {
1006
0
      midx_report(_("failed to load pack entry for oid[%d] = %s"),
1007
0
            pairs[i].pos, oid_to_hex(&oid));
1008
0
      continue;
1009
0
    }
1010
1011
0
    if (open_pack_index(e.p)) {
1012
0
      midx_report(_("failed to load pack-index for packfile %s"),
1013
0
            e.p->pack_name);
1014
0
      break;
1015
0
    }
1016
1017
0
    m_offset = e.offset;
1018
0
    p_offset = find_pack_entry_one(&oid, e.p);
1019
1020
0
    if (m_offset != p_offset)
1021
0
      midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1022
0
            pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
1023
1024
0
    midx_display_sparse_progress(progress, i + 1);
1025
0
  }
1026
0
  stop_progress(&progress);
1027
1028
0
cleanup:
1029
0
  free(pairs);
1030
0
  close_midx(m);
1031
1032
0
  return verify_midx_error;
1033
0
}