Coverage Report

Created: 2023-11-19 07:08

/src/git/packfile.c
Line
Count
Source (jump to first uncovered line)
1
#include "git-compat-util.h"
2
#include "environment.h"
3
#include "gettext.h"
4
#include "hex.h"
5
#include "list.h"
6
#include "pack.h"
7
#include "repository.h"
8
#include "dir.h"
9
#include "mergesort.h"
10
#include "packfile.h"
11
#include "delta.h"
12
#include "streaming.h"
13
#include "hash-lookup.h"
14
#include "commit.h"
15
#include "object.h"
16
#include "tag.h"
17
#include "trace.h"
18
#include "tree-walk.h"
19
#include "tree.h"
20
#include "object-file.h"
21
#include "object-store-ll.h"
22
#include "midx.h"
23
#include "commit-graph.h"
24
#include "pack-revindex.h"
25
#include "promisor-remote.h"
26
27
char *odb_pack_name(struct strbuf *buf,
28
        const unsigned char *hash,
29
        const char *ext)
30
0
{
31
0
  strbuf_reset(buf);
32
0
  strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
33
0
        hash_to_hex(hash), ext);
34
0
  return buf->buf;
35
0
}
36
37
char *sha1_pack_name(const unsigned char *sha1)
38
0
{
39
0
  static struct strbuf buf = STRBUF_INIT;
40
0
  return odb_pack_name(&buf, sha1, "pack");
41
0
}
42
43
char *sha1_pack_index_name(const unsigned char *sha1)
44
0
{
45
0
  static struct strbuf buf = STRBUF_INIT;
46
0
  return odb_pack_name(&buf, sha1, "idx");
47
0
}
48
49
static unsigned int pack_used_ctr;
50
static unsigned int pack_mmap_calls;
51
static unsigned int peak_pack_open_windows;
52
static unsigned int pack_open_windows;
53
static unsigned int pack_open_fds;
54
static unsigned int pack_max_fds;
55
static size_t peak_pack_mapped;
56
static size_t pack_mapped;
57
58
#define SZ_FMT PRIuMAX
59
0
static inline uintmax_t sz_fmt(size_t s) { return s; }
60
61
void pack_report(void)
62
0
{
63
0
  fprintf(stderr,
64
0
    "pack_report: getpagesize()            = %10" SZ_FMT "\n"
65
0
    "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
66
0
    "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
67
0
    sz_fmt(getpagesize()),
68
0
    sz_fmt(packed_git_window_size),
69
0
    sz_fmt(packed_git_limit));
70
0
  fprintf(stderr,
71
0
    "pack_report: pack_used_ctr            = %10u\n"
72
0
    "pack_report: pack_mmap_calls          = %10u\n"
73
0
    "pack_report: pack_open_windows        = %10u / %10u\n"
74
0
    "pack_report: pack_mapped              = "
75
0
      "%10" SZ_FMT " / %10" SZ_FMT "\n",
76
0
    pack_used_ctr,
77
0
    pack_mmap_calls,
78
0
    pack_open_windows, peak_pack_open_windows,
79
0
    sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
80
0
}
81
82
/*
83
 * Open and mmap the index file at path, perform a couple of
84
 * consistency checks, then record its information to p.  Return 0 on
85
 * success.
86
 */
87
static int check_packed_git_idx(const char *path, struct packed_git *p)
88
0
{
89
0
  void *idx_map;
90
0
  size_t idx_size;
91
0
  int fd = git_open(path), ret;
92
0
  struct stat st;
93
0
  const unsigned int hashsz = the_hash_algo->rawsz;
94
95
0
  if (fd < 0)
96
0
    return -1;
97
0
  if (fstat(fd, &st)) {
98
0
    close(fd);
99
0
    return -1;
100
0
  }
101
0
  idx_size = xsize_t(st.st_size);
102
0
  if (idx_size < 4 * 256 + hashsz + hashsz) {
103
0
    close(fd);
104
0
    return error("index file %s is too small", path);
105
0
  }
106
0
  idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
107
0
  close(fd);
108
109
0
  ret = load_idx(path, hashsz, idx_map, idx_size, p);
110
111
0
  if (ret)
112
0
    munmap(idx_map, idx_size);
113
114
0
  return ret;
115
0
}
116
117
int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
118
       size_t idx_size, struct packed_git *p)
119
350
{
120
350
  struct pack_idx_header *hdr = idx_map;
121
350
  uint32_t version, nr, i, *index;
122
123
350
  if (idx_size < 4 * 256 + hashsz + hashsz)
124
21
    return error("index file %s is too small", path);
125
329
  if (!idx_map)
126
0
    return error("empty data");
127
128
329
  if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
129
209
    version = ntohl(hdr->idx_version);
130
209
    if (version < 2 || version > 2)
131
50
      return error("index file %s is version %"PRIu32
132
209
             " and is not supported by this binary"
133
209
             " (try upgrading GIT to a newer version)",
134
209
             path, version);
135
209
  } else
136
120
    version = 1;
137
138
279
  nr = 0;
139
279
  index = idx_map;
140
279
  if (version > 1)
141
159
    index += 2;  /* skip index header */
142
56.4k
  for (i = 0; i < 256; i++) {
143
56.2k
    uint32_t n = ntohl(index[i]);
144
56.2k
    if (n < nr)
145
62
      return error("non-monotonic index %s", path);
146
56.1k
    nr = n;
147
56.1k
  }
148
149
217
  if (version == 1) {
150
    /*
151
     * Total size:
152
     *  - 256 index entries 4 bytes each
153
     *  - 24-byte entries * nr (object ID + 4-byte offset)
154
     *  - hash of the packfile
155
     *  - file checksum
156
     */
157
62
    if (idx_size != st_add(4 * 256 + hashsz + hashsz, st_mult(nr, hashsz + 4)))
158
61
      return error("wrong index v1 file size in %s", path);
159
155
  } else if (version == 2) {
160
    /*
161
     * Minimum size:
162
     *  - 8 bytes of header
163
     *  - 256 index entries 4 bytes each
164
     *  - object ID entry * nr
165
     *  - 4-byte crc entry * nr
166
     *  - 4-byte offset entry * nr
167
     *  - hash of the packfile
168
     *  - file checksum
169
     * And after the 4-byte offset table might be a
170
     * variable sized table containing 8-byte entries
171
     * for offsets larger than 2^31.
172
     */
173
155
    size_t min_size = st_add(8 + 4*256 + hashsz + hashsz, st_mult(nr, hashsz + 4 + 4));
174
155
    size_t max_size = min_size;
175
155
    if (nr)
176
141
      max_size = st_add(max_size, st_mult(nr - 1, 8));
177
155
    if (idx_size < min_size || idx_size > max_size)
178
121
      return error("wrong index v2 file size in %s", path);
179
34
    if (idx_size != min_size &&
180
        /*
181
         * make sure we can deal with large pack offsets.
182
         * 31-bit signed offset won't be enough, neither
183
         * 32-bit unsigned one will be.
184
         */
185
34
        (sizeof(off_t) <= 4))
186
0
      return error("pack too large for current definition of off_t in %s", path);
187
34
    p->crc_offset = st_add(8 + 4 * 256, st_mult(nr, hashsz));
188
34
  }
189
190
35
  p->index_version = version;
191
35
  p->index_data = idx_map;
192
35
  p->index_size = idx_size;
193
35
  p->num_objects = nr;
194
35
  return 0;
195
217
}
196
197
int open_pack_index(struct packed_git *p)
198
0
{
199
0
  char *idx_name;
200
0
  size_t len;
201
0
  int ret;
202
203
0
  if (p->index_data)
204
0
    return 0;
205
206
0
  if (!strip_suffix(p->pack_name, ".pack", &len))
207
0
    BUG("pack_name does not end in .pack");
208
0
  idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
209
0
  ret = check_packed_git_idx(idx_name, p);
210
0
  free(idx_name);
211
0
  return ret;
212
0
}
213
214
uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
215
0
{
216
0
  const uint32_t *level1_ofs = p->index_data;
217
218
0
  if (!level1_ofs) {
219
0
    if (open_pack_index(p))
220
0
      return 0;
221
0
    level1_ofs = p->index_data;
222
0
  }
223
224
0
  if (p->index_version > 1) {
225
0
    level1_ofs += 2;
226
0
  }
227
228
0
  return ntohl(level1_ofs[value]);
229
0
}
230
231
static struct packed_git *alloc_packed_git(int extra)
232
0
{
233
0
  struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
234
0
  memset(p, 0, sizeof(*p));
235
0
  p->pack_fd = -1;
236
0
  return p;
237
0
}
238
239
struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
240
0
{
241
0
  const char *path = sha1_pack_name(sha1);
242
0
  size_t alloc = st_add(strlen(path), 1);
243
0
  struct packed_git *p = alloc_packed_git(alloc);
244
245
0
  memcpy(p->pack_name, path, alloc); /* includes NUL */
246
0
  hashcpy(p->hash, sha1);
247
0
  if (check_packed_git_idx(idx_path, p)) {
248
0
    free(p);
249
0
    return NULL;
250
0
  }
251
252
0
  return p;
253
0
}
254
255
static void scan_windows(struct packed_git *p,
256
  struct packed_git **lru_p,
257
  struct pack_window **lru_w,
258
  struct pack_window **lru_l)
259
0
{
260
0
  struct pack_window *w, *w_l;
261
262
0
  for (w_l = NULL, w = p->windows; w; w = w->next) {
263
0
    if (!w->inuse_cnt) {
264
0
      if (!*lru_w || w->last_used < (*lru_w)->last_used) {
265
0
        *lru_p = p;
266
0
        *lru_w = w;
267
0
        *lru_l = w_l;
268
0
      }
269
0
    }
270
0
    w_l = w;
271
0
  }
272
0
}
273
274
static int unuse_one_window(struct packed_git *current)
275
0
{
276
0
  struct packed_git *p, *lru_p = NULL;
277
0
  struct pack_window *lru_w = NULL, *lru_l = NULL;
278
279
0
  if (current)
280
0
    scan_windows(current, &lru_p, &lru_w, &lru_l);
281
0
  for (p = the_repository->objects->packed_git; p; p = p->next)
282
0
    scan_windows(p, &lru_p, &lru_w, &lru_l);
283
0
  if (lru_p) {
284
0
    munmap(lru_w->base, lru_w->len);
285
0
    pack_mapped -= lru_w->len;
286
0
    if (lru_l)
287
0
      lru_l->next = lru_w->next;
288
0
    else
289
0
      lru_p->windows = lru_w->next;
290
0
    free(lru_w);
291
0
    pack_open_windows--;
292
0
    return 1;
293
0
  }
294
0
  return 0;
295
0
}
296
297
void close_pack_windows(struct packed_git *p)
298
0
{
299
0
  while (p->windows) {
300
0
    struct pack_window *w = p->windows;
301
302
0
    if (w->inuse_cnt)
303
0
      die("pack '%s' still has open windows to it",
304
0
          p->pack_name);
305
0
    munmap(w->base, w->len);
306
0
    pack_mapped -= w->len;
307
0
    pack_open_windows--;
308
0
    p->windows = w->next;
309
0
    free(w);
310
0
  }
311
0
}
312
313
int close_pack_fd(struct packed_git *p)
314
0
{
315
0
  if (p->pack_fd < 0)
316
0
    return 0;
317
318
0
  close(p->pack_fd);
319
0
  pack_open_fds--;
320
0
  p->pack_fd = -1;
321
322
0
  return 1;
323
0
}
324
325
void close_pack_index(struct packed_git *p)
326
0
{
327
0
  if (p->index_data) {
328
0
    munmap((void *)p->index_data, p->index_size);
329
0
    p->index_data = NULL;
330
0
  }
331
0
}
332
333
static void close_pack_revindex(struct packed_git *p)
334
0
{
335
0
  if (!p->revindex_map)
336
0
    return;
337
338
0
  munmap((void *)p->revindex_map, p->revindex_size);
339
0
  p->revindex_map = NULL;
340
0
  p->revindex_data = NULL;
341
0
}
342
343
static void close_pack_mtimes(struct packed_git *p)
344
0
{
345
0
  if (!p->mtimes_map)
346
0
    return;
347
348
0
  munmap((void *)p->mtimes_map, p->mtimes_size);
349
0
  p->mtimes_map = NULL;
350
0
}
351
352
void close_pack(struct packed_git *p)
353
0
{
354
0
  close_pack_windows(p);
355
0
  close_pack_fd(p);
356
0
  close_pack_index(p);
357
0
  close_pack_revindex(p);
358
0
  close_pack_mtimes(p);
359
0
  oidset_clear(&p->bad_objects);
360
0
}
361
362
void close_object_store(struct raw_object_store *o)
363
8.96k
{
364
8.96k
  struct packed_git *p;
365
366
8.96k
  for (p = o->packed_git; p; p = p->next)
367
0
    if (p->do_not_close)
368
0
      BUG("want to close pack marked 'do-not-close'");
369
0
    else
370
0
      close_pack(p);
371
372
8.96k
  if (o->multi_pack_index) {
373
0
    close_midx(o->multi_pack_index);
374
0
    o->multi_pack_index = NULL;
375
0
  }
376
377
8.96k
  close_commit_graph(o);
378
8.96k
}
379
380
void unlink_pack_path(const char *pack_name, int force_delete)
381
0
{
382
0
  static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"};
383
0
  int i;
384
0
  struct strbuf buf = STRBUF_INIT;
385
0
  size_t plen;
386
387
0
  strbuf_addstr(&buf, pack_name);
388
0
  strip_suffix_mem(buf.buf, &buf.len, ".pack");
389
0
  plen = buf.len;
390
391
0
  if (!force_delete) {
392
0
    strbuf_addstr(&buf, ".keep");
393
0
    if (!access(buf.buf, F_OK)) {
394
0
      strbuf_release(&buf);
395
0
      return;
396
0
    }
397
0
  }
398
399
0
  for (i = 0; i < ARRAY_SIZE(exts); i++) {
400
0
    strbuf_setlen(&buf, plen);
401
0
    strbuf_addstr(&buf, exts[i]);
402
0
    unlink(buf.buf);
403
0
  }
404
405
0
  strbuf_release(&buf);
406
0
}
407
408
/*
409
 * The LRU pack is the one with the oldest MRU window, preferring packs
410
 * with no used windows, or the oldest mtime if it has no windows allocated.
411
 */
412
static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
413
0
{
414
0
  struct pack_window *w, *this_mru_w;
415
0
  int has_windows_inuse = 0;
416
417
  /*
418
   * Reject this pack if it has windows and the previously selected
419
   * one does not.  If this pack does not have windows, reject
420
   * it if the pack file is newer than the previously selected one.
421
   */
422
0
  if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
423
0
    return;
424
425
0
  for (w = this_mru_w = p->windows; w; w = w->next) {
426
    /*
427
     * Reject this pack if any of its windows are in use,
428
     * but the previously selected pack did not have any
429
     * inuse windows.  Otherwise, record that this pack
430
     * has windows in use.
431
     */
432
0
    if (w->inuse_cnt) {
433
0
      if (*accept_windows_inuse)
434
0
        has_windows_inuse = 1;
435
0
      else
436
0
        return;
437
0
    }
438
439
0
    if (w->last_used > this_mru_w->last_used)
440
0
      this_mru_w = w;
441
442
    /*
443
     * Reject this pack if it has windows that have been
444
     * used more recently than the previously selected pack.
445
     * If the previously selected pack had windows inuse and
446
     * we have not encountered a window in this pack that is
447
     * inuse, skip this check since we prefer a pack with no
448
     * inuse windows to one that has inuse windows.
449
     */
450
0
    if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
451
0
        this_mru_w->last_used > (*mru_w)->last_used)
452
0
      return;
453
0
  }
454
455
  /*
456
   * Select this pack.
457
   */
458
0
  *mru_w = this_mru_w;
459
0
  *lru_p = p;
460
0
  *accept_windows_inuse = has_windows_inuse;
461
0
}
462
463
static int close_one_pack(void)
464
0
{
465
0
  struct packed_git *p, *lru_p = NULL;
466
0
  struct pack_window *mru_w = NULL;
467
0
  int accept_windows_inuse = 1;
468
469
0
  for (p = the_repository->objects->packed_git; p; p = p->next) {
470
0
    if (p->pack_fd == -1)
471
0
      continue;
472
0
    find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
473
0
  }
474
475
0
  if (lru_p)
476
0
    return close_pack_fd(lru_p);
477
478
0
  return 0;
479
0
}
480
481
static unsigned int get_max_fd_limit(void)
482
0
{
483
0
#ifdef RLIMIT_NOFILE
484
0
  {
485
0
    struct rlimit lim;
486
487
0
    if (!getrlimit(RLIMIT_NOFILE, &lim))
488
0
      return lim.rlim_cur;
489
0
  }
490
0
#endif
491
492
0
#ifdef _SC_OPEN_MAX
493
0
  {
494
0
    long open_max = sysconf(_SC_OPEN_MAX);
495
0
    if (0 < open_max)
496
0
      return open_max;
497
    /*
498
     * Otherwise, we got -1 for one of the two
499
     * reasons:
500
     *
501
     * (1) sysconf() did not understand _SC_OPEN_MAX
502
     *     and signaled an error with -1; or
503
     * (2) sysconf() said there is no limit.
504
     *
505
     * We _could_ clear errno before calling sysconf() to
506
     * tell these two cases apart and return a huge number
507
     * in the latter case to let the caller cap it to a
508
     * value that is not so selfish, but letting the
509
     * fallback OPEN_MAX codepath take care of these cases
510
     * is a lot simpler.
511
     */
512
0
  }
513
0
#endif
514
515
#ifdef OPEN_MAX
516
  return OPEN_MAX;
517
#else
518
0
  return 1; /* see the caller ;-) */
519
0
#endif
520
0
}
521
522
const char *pack_basename(struct packed_git *p)
523
0
{
524
0
  const char *ret = strrchr(p->pack_name, '/');
525
0
  if (ret)
526
0
    ret = ret + 1; /* skip past slash */
527
0
  else
528
0
    ret = p->pack_name; /* we only have a base */
529
0
  return ret;
530
0
}
531
532
/*
533
 * Do not call this directly as this leaks p->pack_fd on error return;
534
 * call open_packed_git() instead.
535
 */
536
static int open_packed_git_1(struct packed_git *p)
537
0
{
538
0
  struct stat st;
539
0
  struct pack_header hdr;
540
0
  unsigned char hash[GIT_MAX_RAWSZ];
541
0
  unsigned char *idx_hash;
542
0
  ssize_t read_result;
543
0
  const unsigned hashsz = the_hash_algo->rawsz;
544
545
0
  if (open_pack_index(p))
546
0
    return error("packfile %s index unavailable", p->pack_name);
547
548
0
  if (!pack_max_fds) {
549
0
    unsigned int max_fds = get_max_fd_limit();
550
551
    /* Save 3 for stdin/stdout/stderr, 22 for work */
552
0
    if (25 < max_fds)
553
0
      pack_max_fds = max_fds - 25;
554
0
    else
555
0
      pack_max_fds = 1;
556
0
  }
557
558
0
  while (pack_max_fds <= pack_open_fds && close_one_pack())
559
0
    ; /* nothing */
560
561
0
  p->pack_fd = git_open(p->pack_name);
562
0
  if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
563
0
    return -1;
564
0
  pack_open_fds++;
565
566
  /* If we created the struct before we had the pack we lack size. */
567
0
  if (!p->pack_size) {
568
0
    if (!S_ISREG(st.st_mode))
569
0
      return error("packfile %s not a regular file", p->pack_name);
570
0
    p->pack_size = st.st_size;
571
0
  } else if (p->pack_size != st.st_size)
572
0
    return error("packfile %s size changed", p->pack_name);
573
574
  /* Verify we recognize this pack file format. */
575
0
  read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
576
0
  if (read_result < 0)
577
0
    return error_errno("error reading from %s", p->pack_name);
578
0
  if (read_result != sizeof(hdr))
579
0
    return error("file %s is far too short to be a packfile", p->pack_name);
580
0
  if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
581
0
    return error("file %s is not a GIT packfile", p->pack_name);
582
0
  if (!pack_version_ok(hdr.hdr_version))
583
0
    return error("packfile %s is version %"PRIu32" and not"
584
0
      " supported (try upgrading GIT to a newer version)",
585
0
      p->pack_name, ntohl(hdr.hdr_version));
586
587
  /* Verify the pack matches its index. */
588
0
  if (p->num_objects != ntohl(hdr.hdr_entries))
589
0
    return error("packfile %s claims to have %"PRIu32" objects"
590
0
           " while index indicates %"PRIu32" objects",
591
0
           p->pack_name, ntohl(hdr.hdr_entries),
592
0
           p->num_objects);
593
0
  read_result = pread_in_full(p->pack_fd, hash, hashsz,
594
0
          p->pack_size - hashsz);
595
0
  if (read_result < 0)
596
0
    return error_errno("error reading from %s", p->pack_name);
597
0
  if (read_result != hashsz)
598
0
    return error("packfile %s signature is unavailable", p->pack_name);
599
0
  idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
600
0
  if (!hasheq(hash, idx_hash))
601
0
    return error("packfile %s does not match index", p->pack_name);
602
0
  return 0;
603
0
}
604
605
static int open_packed_git(struct packed_git *p)
606
0
{
607
0
  if (!open_packed_git_1(p))
608
0
    return 0;
609
0
  close_pack_fd(p);
610
0
  return -1;
611
0
}
612
613
static int in_window(struct pack_window *win, off_t offset)
614
0
{
615
  /* We must promise at least one full hash after the
616
   * offset is available from this window, otherwise the offset
617
   * is not actually in this window and a different window (which
618
   * has that one hash excess) must be used.  This is to support
619
   * the object header and delta base parsing routines below.
620
   */
621
0
  off_t win_off = win->offset;
622
0
  return win_off <= offset
623
0
    && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
624
0
}
625
626
unsigned char *use_pack(struct packed_git *p,
627
    struct pack_window **w_cursor,
628
    off_t offset,
629
    unsigned long *left)
630
0
{
631
0
  struct pack_window *win = *w_cursor;
632
633
  /* Since packfiles end in a hash of their content and it's
634
   * pointless to ask for an offset into the middle of that
635
   * hash, and the in_window function above wouldn't match
636
   * don't allow an offset too close to the end of the file.
637
   */
638
0
  if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
639
0
    die("packfile %s cannot be accessed", p->pack_name);
640
0
  if (offset > (p->pack_size - the_hash_algo->rawsz))
641
0
    die("offset beyond end of packfile (truncated pack?)");
642
0
  if (offset < 0)
643
0
    die(_("offset before end of packfile (broken .idx?)"));
644
645
0
  if (!win || !in_window(win, offset)) {
646
0
    if (win)
647
0
      win->inuse_cnt--;
648
0
    for (win = p->windows; win; win = win->next) {
649
0
      if (in_window(win, offset))
650
0
        break;
651
0
    }
652
0
    if (!win) {
653
0
      size_t window_align = packed_git_window_size / 2;
654
0
      off_t len;
655
656
0
      if (p->pack_fd == -1 && open_packed_git(p))
657
0
        die("packfile %s cannot be accessed", p->pack_name);
658
659
0
      CALLOC_ARRAY(win, 1);
660
0
      win->offset = (offset / window_align) * window_align;
661
0
      len = p->pack_size - win->offset;
662
0
      if (len > packed_git_window_size)
663
0
        len = packed_git_window_size;
664
0
      win->len = (size_t)len;
665
0
      pack_mapped += win->len;
666
0
      while (packed_git_limit < pack_mapped
667
0
        && unuse_one_window(p))
668
0
        ; /* nothing */
669
0
      win->base = xmmap_gently(NULL, win->len,
670
0
        PROT_READ, MAP_PRIVATE,
671
0
        p->pack_fd, win->offset);
672
0
      if (win->base == MAP_FAILED)
673
0
        die_errno(_("packfile %s cannot be mapped%s"),
674
0
            p->pack_name, mmap_os_err());
675
0
      if (!win->offset && win->len == p->pack_size
676
0
        && !p->do_not_close)
677
0
        close_pack_fd(p);
678
0
      pack_mmap_calls++;
679
0
      pack_open_windows++;
680
0
      if (pack_mapped > peak_pack_mapped)
681
0
        peak_pack_mapped = pack_mapped;
682
0
      if (pack_open_windows > peak_pack_open_windows)
683
0
        peak_pack_open_windows = pack_open_windows;
684
0
      win->next = p->windows;
685
0
      p->windows = win;
686
0
    }
687
0
  }
688
0
  if (win != *w_cursor) {
689
0
    win->last_used = pack_used_ctr++;
690
0
    win->inuse_cnt++;
691
0
    *w_cursor = win;
692
0
  }
693
0
  offset -= win->offset;
694
0
  if (left)
695
0
    *left = win->len - xsize_t(offset);
696
0
  return win->base + offset;
697
0
}
698
699
void unuse_pack(struct pack_window **w_cursor)
700
0
{
701
0
  struct pack_window *w = *w_cursor;
702
0
  if (w) {
703
0
    w->inuse_cnt--;
704
0
    *w_cursor = NULL;
705
0
  }
706
0
}
707
708
struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
709
0
{
710
0
  struct stat st;
711
0
  size_t alloc;
712
0
  struct packed_git *p;
713
714
  /*
715
   * Make sure a corresponding .pack file exists and that
716
   * the index looks sane.
717
   */
718
0
  if (!strip_suffix_mem(path, &path_len, ".idx"))
719
0
    return NULL;
720
721
  /*
722
   * ".promisor" is long enough to hold any suffix we're adding (and
723
   * the use xsnprintf double-checks that)
724
   */
725
0
  alloc = st_add3(path_len, strlen(".promisor"), 1);
726
0
  p = alloc_packed_git(alloc);
727
0
  memcpy(p->pack_name, path, path_len);
728
729
0
  xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
730
0
  if (!access(p->pack_name, F_OK))
731
0
    p->pack_keep = 1;
732
733
0
  xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
734
0
  if (!access(p->pack_name, F_OK))
735
0
    p->pack_promisor = 1;
736
737
0
  xsnprintf(p->pack_name + path_len, alloc - path_len, ".mtimes");
738
0
  if (!access(p->pack_name, F_OK))
739
0
    p->is_cruft = 1;
740
741
0
  xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
742
0
  if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
743
0
    free(p);
744
0
    return NULL;
745
0
  }
746
747
  /* ok, it looks sane as far as we can check without
748
   * actually mapping the pack file.
749
   */
750
0
  p->pack_size = st.st_size;
751
0
  p->pack_local = local;
752
0
  p->mtime = st.st_mtime;
753
0
  if (path_len < the_hash_algo->hexsz ||
754
0
      get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
755
0
    hashclr(p->hash);
756
0
  return p;
757
0
}
758
759
void install_packed_git(struct repository *r, struct packed_git *pack)
760
0
{
761
0
  if (pack->pack_fd != -1)
762
0
    pack_open_fds++;
763
764
0
  pack->next = r->objects->packed_git;
765
0
  r->objects->packed_git = pack;
766
767
0
  hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name));
768
0
  hashmap_add(&r->objects->pack_map, &pack->packmap_ent);
769
0
}
770
771
void (*report_garbage)(unsigned seen_bits, const char *path);
772
773
static void report_helper(const struct string_list *list,
774
        int seen_bits, int first, int last)
775
0
{
776
0
  if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
777
0
    return;
778
779
0
  for (; first < last; first++)
780
0
    report_garbage(seen_bits, list->items[first].string);
781
0
}
782
783
static void report_pack_garbage(struct string_list *list)
784
1.22k
{
785
1.22k
  int i, baselen = -1, first = 0, seen_bits = 0;
786
787
1.22k
  if (!report_garbage)
788
1.22k
    return;
789
790
0
  string_list_sort(list);
791
792
0
  for (i = 0; i < list->nr; i++) {
793
0
    const char *path = list->items[i].string;
794
0
    if (baselen != -1 &&
795
0
        strncmp(path, list->items[first].string, baselen)) {
796
0
      report_helper(list, seen_bits, first, i);
797
0
      baselen = -1;
798
0
      seen_bits = 0;
799
0
    }
800
0
    if (baselen == -1) {
801
0
      const char *dot = strrchr(path, '.');
802
0
      if (!dot) {
803
0
        report_garbage(PACKDIR_FILE_GARBAGE, path);
804
0
        continue;
805
0
      }
806
0
      baselen = dot - path + 1;
807
0
      first = i;
808
0
    }
809
0
    if (!strcmp(path + baselen, "pack"))
810
0
      seen_bits |= 1;
811
0
    else if (!strcmp(path + baselen, "idx"))
812
0
      seen_bits |= 2;
813
0
  }
814
0
  report_helper(list, seen_bits, first, list->nr);
815
0
}
816
817
void for_each_file_in_pack_dir(const char *objdir,
818
             each_file_in_pack_dir_fn fn,
819
             void *data)
820
1.22k
{
821
1.22k
  struct strbuf path = STRBUF_INIT;
822
1.22k
  size_t dirnamelen;
823
1.22k
  DIR *dir;
824
1.22k
  struct dirent *de;
825
826
1.22k
  strbuf_addstr(&path, objdir);
827
1.22k
  strbuf_addstr(&path, "/pack");
828
1.22k
  dir = opendir(path.buf);
829
1.22k
  if (!dir) {
830
0
    if (errno != ENOENT)
831
0
      error_errno("unable to open object pack directory: %s",
832
0
            path.buf);
833
0
    strbuf_release(&path);
834
0
    return;
835
0
  }
836
1.22k
  strbuf_addch(&path, '/');
837
1.22k
  dirnamelen = path.len;
838
1.22k
  while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) {
839
0
    strbuf_setlen(&path, dirnamelen);
840
0
    strbuf_addstr(&path, de->d_name);
841
842
0
    fn(path.buf, path.len, de->d_name, data);
843
0
  }
844
845
1.22k
  closedir(dir);
846
1.22k
  strbuf_release(&path);
847
1.22k
}
848
849
struct prepare_pack_data {
850
  struct repository *r;
851
  struct string_list *garbage;
852
  int local;
853
  struct multi_pack_index *m;
854
};
855
856
static void prepare_pack(const char *full_name, size_t full_name_len,
857
       const char *file_name, void *_data)
858
0
{
859
0
  struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
860
0
  struct packed_git *p;
861
0
  size_t base_len = full_name_len;
862
863
0
  if (strip_suffix_mem(full_name, &base_len, ".idx") &&
864
0
      !(data->m && midx_contains_pack(data->m, file_name))) {
865
0
    struct hashmap_entry hent;
866
0
    char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name);
867
0
    unsigned int hash = strhash(pack_name);
868
0
    hashmap_entry_init(&hent, hash);
869
870
    /* Don't reopen a pack we already have. */
871
0
    if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
872
0
      p = add_packed_git(full_name, full_name_len, data->local);
873
0
      if (p)
874
0
        install_packed_git(data->r, p);
875
0
    }
876
0
    free(pack_name);
877
0
  }
878
879
0
  if (!report_garbage)
880
0
    return;
881
882
0
  if (!strcmp(file_name, "multi-pack-index"))
883
0
    return;
884
0
  if (starts_with(file_name, "multi-pack-index") &&
885
0
      (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev")))
886
0
    return;
887
0
  if (ends_with(file_name, ".idx") ||
888
0
      ends_with(file_name, ".rev") ||
889
0
      ends_with(file_name, ".pack") ||
890
0
      ends_with(file_name, ".bitmap") ||
891
0
      ends_with(file_name, ".keep") ||
892
0
      ends_with(file_name, ".promisor") ||
893
0
      ends_with(file_name, ".mtimes"))
894
0
    string_list_append(data->garbage, full_name);
895
0
  else
896
0
    report_garbage(PACKDIR_FILE_GARBAGE, full_name);
897
0
}
898
899
static void prepare_packed_git_one(struct repository *r, char *objdir, int local)
900
1.22k
{
901
1.22k
  struct prepare_pack_data data;
902
1.22k
  struct string_list garbage = STRING_LIST_INIT_DUP;
903
904
1.22k
  data.m = r->objects->multi_pack_index;
905
906
  /* look for the multi-pack-index for this object directory */
907
1.22k
  while (data.m && strcmp(data.m->object_dir, objdir))
908
0
    data.m = data.m->next;
909
910
1.22k
  data.r = r;
911
1.22k
  data.garbage = &garbage;
912
1.22k
  data.local = local;
913
914
1.22k
  for_each_file_in_pack_dir(objdir, prepare_pack, &data);
915
916
1.22k
  report_pack_garbage(data.garbage);
917
1.22k
  string_list_clear(data.garbage, 0);
918
1.22k
}
919
920
static void prepare_packed_git(struct repository *r);
921
/*
922
 * Give a fast, rough count of the number of objects in the repository. This
923
 * ignores loose objects completely. If you have a lot of them, then either
924
 * you should repack because your performance will be awful, or they are
925
 * all unreachable objects about to be pruned, in which case they're not really
926
 * interesting as a measure of repo size in the first place.
927
 */
928
unsigned long repo_approximate_object_count(struct repository *r)
929
7.22k
{
930
7.22k
  if (!r->objects->approximate_object_count_valid) {
931
1.22k
    unsigned long count;
932
1.22k
    struct multi_pack_index *m;
933
1.22k
    struct packed_git *p;
934
935
1.22k
    prepare_packed_git(r);
936
1.22k
    count = 0;
937
1.22k
    for (m = get_multi_pack_index(r); m; m = m->next)
938
0
      count += m->num_objects;
939
1.22k
    for (p = r->objects->packed_git; p; p = p->next) {
940
0
      if (open_pack_index(p))
941
0
        continue;
942
0
      count += p->num_objects;
943
0
    }
944
1.22k
    r->objects->approximate_object_count = count;
945
1.22k
    r->objects->approximate_object_count_valid = 1;
946
1.22k
  }
947
7.22k
  return r->objects->approximate_object_count;
948
7.22k
}
949
950
DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next);
951
952
static int sort_pack(const struct packed_git *a, const struct packed_git *b)
953
0
{
954
0
  int st;
955
956
  /*
957
   * Local packs tend to contain objects specific to our
958
   * variant of the project than remote ones.  In addition,
959
   * remote ones could be on a network mounted filesystem.
960
   * Favor local ones for these reasons.
961
   */
962
0
  st = a->pack_local - b->pack_local;
963
0
  if (st)
964
0
    return -st;
965
966
  /*
967
   * Younger packs tend to contain more recent objects,
968
   * and more recent objects tend to get accessed more
969
   * often.
970
   */
971
0
  if (a->mtime < b->mtime)
972
0
    return 1;
973
0
  else if (a->mtime == b->mtime)
974
0
    return 0;
975
0
  return -1;
976
0
}
977
978
static void rearrange_packed_git(struct repository *r)
979
1.22k
{
980
1.22k
  sort_packs(&r->objects->packed_git, sort_pack);
981
1.22k
}
982
983
static void prepare_packed_git_mru(struct repository *r)
984
1.22k
{
985
1.22k
  struct packed_git *p;
986
987
1.22k
  INIT_LIST_HEAD(&r->objects->packed_git_mru);
988
989
1.22k
  for (p = r->objects->packed_git; p; p = p->next)
990
0
    list_add_tail(&p->mru, &r->objects->packed_git_mru);
991
1.22k
}
992
993
static void prepare_packed_git(struct repository *r)
994
147k
{
995
147k
  struct object_directory *odb;
996
997
147k
  if (r->objects->packed_git_initialized)
998
146k
    return;
999
1000
1.22k
  prepare_alt_odb(r);
1001
2.44k
  for (odb = r->objects->odb; odb; odb = odb->next) {
1002
1.22k
    int local = (odb == r->objects->odb);
1003
1.22k
    prepare_multi_pack_index_one(r, odb->path, local);
1004
1.22k
    prepare_packed_git_one(r, odb->path, local);
1005
1.22k
  }
1006
1.22k
  rearrange_packed_git(r);
1007
1008
1.22k
  prepare_packed_git_mru(r);
1009
1.22k
  r->objects->packed_git_initialized = 1;
1010
1.22k
}
1011
1012
void reprepare_packed_git(struct repository *r)
1013
0
{
1014
0
  struct object_directory *odb;
1015
1016
0
  obj_read_lock();
1017
1018
  /*
1019
   * Reprepare alt odbs, in case the alternates file was modified
1020
   * during the course of this process. This only _adds_ odbs to
1021
   * the linked list, so existing odbs will continue to exist for
1022
   * the lifetime of the process.
1023
   */
1024
0
  r->objects->loaded_alternates = 0;
1025
0
  prepare_alt_odb(r);
1026
1027
0
  for (odb = r->objects->odb; odb; odb = odb->next)
1028
0
    odb_clear_loose_cache(odb);
1029
1030
0
  r->objects->approximate_object_count_valid = 0;
1031
0
  r->objects->packed_git_initialized = 0;
1032
0
  prepare_packed_git(r);
1033
0
  obj_read_unlock();
1034
0
}
1035
1036
struct packed_git *get_packed_git(struct repository *r)
1037
7.22k
{
1038
7.22k
  prepare_packed_git(r);
1039
7.22k
  return r->objects->packed_git;
1040
7.22k
}
1041
1042
struct multi_pack_index *get_multi_pack_index(struct repository *r)
1043
8.44k
{
1044
8.44k
  prepare_packed_git(r);
1045
8.44k
  return r->objects->multi_pack_index;
1046
8.44k
}
1047
1048
struct multi_pack_index *get_local_multi_pack_index(struct repository *r)
1049
0
{
1050
0
  struct multi_pack_index *m = get_multi_pack_index(r);
1051
1052
  /* no need to iterate; we always put the local one first (if any) */
1053
0
  if (m && m->local)
1054
0
    return m;
1055
1056
0
  return NULL;
1057
0
}
1058
1059
struct packed_git *get_all_packs(struct repository *r)
1060
0
{
1061
0
  struct multi_pack_index *m;
1062
1063
0
  prepare_packed_git(r);
1064
0
  for (m = r->objects->multi_pack_index; m; m = m->next) {
1065
0
    uint32_t i;
1066
0
    for (i = 0; i < m->num_packs; i++)
1067
0
      prepare_midx_pack(r, m, i);
1068
0
  }
1069
1070
0
  return r->objects->packed_git;
1071
0
}
1072
1073
struct list_head *get_packed_git_mru(struct repository *r)
1074
0
{
1075
0
  prepare_packed_git(r);
1076
0
  return &r->objects->packed_git_mru;
1077
0
}
1078
1079
unsigned long unpack_object_header_buffer(const unsigned char *buf,
1080
    unsigned long len, enum object_type *type, unsigned long *sizep)
1081
138
{
1082
138
  unsigned shift;
1083
138
  size_t size, c;
1084
138
  unsigned long used = 0;
1085
1086
138
  c = buf[used++];
1087
138
  *type = (c >> 4) & 7;
1088
138
  size = c & 15;
1089
138
  shift = 4;
1090
788
  while (c & 0x80) {
1091
748
    if (len <= used || (bitsizeof(long) - 7) < shift) {
1092
98
      error("bad object header");
1093
98
      size = used = 0;
1094
98
      break;
1095
98
    }
1096
650
    c = buf[used++];
1097
650
    size = st_add(size, st_left_shift(c & 0x7f, shift));
1098
650
    shift += 7;
1099
650
  }
1100
138
  *sizep = cast_size_t_to_ulong(size);
1101
138
  return used;
1102
138
}
1103
1104
unsigned long get_size_from_delta(struct packed_git *p,
1105
          struct pack_window **w_curs,
1106
          off_t curpos)
1107
0
{
1108
0
  const unsigned char *data;
1109
0
  unsigned char delta_head[20], *in;
1110
0
  git_zstream stream;
1111
0
  int st;
1112
1113
0
  memset(&stream, 0, sizeof(stream));
1114
0
  stream.next_out = delta_head;
1115
0
  stream.avail_out = sizeof(delta_head);
1116
1117
0
  git_inflate_init(&stream);
1118
0
  do {
1119
0
    in = use_pack(p, w_curs, curpos, &stream.avail_in);
1120
0
    stream.next_in = in;
1121
    /*
1122
     * Note: the window section returned by use_pack() must be
1123
     * available throughout git_inflate()'s unlocked execution. To
1124
     * ensure no other thread will modify the window in the
1125
     * meantime, we rely on the packed_window.inuse_cnt. This
1126
     * counter is incremented before window reading and checked
1127
     * before window disposal.
1128
     *
1129
     * Other worrying sections could be the call to close_pack_fd(),
1130
     * which can close packs even with in-use windows, and to
1131
     * reprepare_packed_git(). Regarding the former, mmap doc says:
1132
     * "closing the file descriptor does not unmap the region". And
1133
     * for the latter, it won't re-open already available packs.
1134
     */
1135
0
    obj_read_unlock();
1136
0
    st = git_inflate(&stream, Z_FINISH);
1137
0
    obj_read_lock();
1138
0
    curpos += stream.next_in - in;
1139
0
  } while ((st == Z_OK || st == Z_BUF_ERROR) &&
1140
0
     stream.total_out < sizeof(delta_head));
1141
0
  git_inflate_end(&stream);
1142
0
  if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
1143
0
    error("delta data unpack-initial failed");
1144
0
    return 0;
1145
0
  }
1146
1147
  /* Examine the initial part of the delta to figure out
1148
   * the result size.
1149
   */
1150
0
  data = delta_head;
1151
1152
  /* ignore base size */
1153
0
  get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1154
1155
  /* Read the result size */
1156
0
  return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1157
0
}
1158
1159
int unpack_object_header(struct packed_git *p,
1160
       struct pack_window **w_curs,
1161
       off_t *curpos,
1162
       unsigned long *sizep)
1163
0
{
1164
0
  unsigned char *base;
1165
0
  unsigned long left;
1166
0
  unsigned long used;
1167
0
  enum object_type type;
1168
1169
  /* use_pack() assures us we have [base, base + 20) available
1170
   * as a range that we can look at.  (Its actually the hash
1171
   * size that is assured.)  With our object header encoding
1172
   * the maximum deflated object size is 2^137, which is just
1173
   * insane, so we know won't exceed what we have been given.
1174
   */
1175
0
  base = use_pack(p, w_curs, *curpos, &left);
1176
0
  used = unpack_object_header_buffer(base, left, &type, sizep);
1177
0
  if (!used) {
1178
0
    type = OBJ_BAD;
1179
0
  } else
1180
0
    *curpos += used;
1181
1182
0
  return type;
1183
0
}
1184
1185
void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid)
1186
0
{
1187
0
  oidset_insert(&p->bad_objects, oid);
1188
0
}
1189
1190
const struct packed_git *has_packed_and_bad(struct repository *r,
1191
              const struct object_id *oid)
1192
0
{
1193
0
  struct packed_git *p;
1194
1195
0
  for (p = r->objects->packed_git; p; p = p->next)
1196
0
    if (oidset_contains(&p->bad_objects, oid))
1197
0
      return p;
1198
0
  return NULL;
1199
0
}
1200
1201
off_t get_delta_base(struct packed_git *p,
1202
         struct pack_window **w_curs,
1203
         off_t *curpos,
1204
         enum object_type type,
1205
         off_t delta_obj_offset)
1206
0
{
1207
0
  unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1208
0
  off_t base_offset;
1209
1210
  /* use_pack() assured us we have [base_info, base_info + 20)
1211
   * as a range that we can look at without walking off the
1212
   * end of the mapped window.  Its actually the hash size
1213
   * that is assured.  An OFS_DELTA longer than the hash size
1214
   * is stupid, as then a REF_DELTA would be smaller to store.
1215
   */
1216
0
  if (type == OBJ_OFS_DELTA) {
1217
0
    unsigned used = 0;
1218
0
    unsigned char c = base_info[used++];
1219
0
    base_offset = c & 127;
1220
0
    while (c & 128) {
1221
0
      base_offset += 1;
1222
0
      if (!base_offset || MSB(base_offset, 7))
1223
0
        return 0;  /* overflow */
1224
0
      c = base_info[used++];
1225
0
      base_offset = (base_offset << 7) + (c & 127);
1226
0
    }
1227
0
    base_offset = delta_obj_offset - base_offset;
1228
0
    if (base_offset <= 0 || base_offset >= delta_obj_offset)
1229
0
      return 0;  /* out of bound */
1230
0
    *curpos += used;
1231
0
  } else if (type == OBJ_REF_DELTA) {
1232
    /* The base entry _must_ be in the same pack */
1233
0
    base_offset = find_pack_entry_one(base_info, p);
1234
0
    *curpos += the_hash_algo->rawsz;
1235
0
  } else
1236
0
    die("I am totally screwed");
1237
0
  return base_offset;
1238
0
}
1239
1240
/*
1241
 * Like get_delta_base above, but we return the sha1 instead of the pack
1242
 * offset. This means it is cheaper for REF deltas (we do not have to do
1243
 * the final object lookup), but more expensive for OFS deltas (we
1244
 * have to load the revidx to convert the offset back into a sha1).
1245
 */
1246
static int get_delta_base_oid(struct packed_git *p,
1247
            struct pack_window **w_curs,
1248
            off_t curpos,
1249
            struct object_id *oid,
1250
            enum object_type type,
1251
            off_t delta_obj_offset)
1252
0
{
1253
0
  if (type == OBJ_REF_DELTA) {
1254
0
    unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1255
0
    oidread(oid, base);
1256
0
    return 0;
1257
0
  } else if (type == OBJ_OFS_DELTA) {
1258
0
    uint32_t base_pos;
1259
0
    off_t base_offset = get_delta_base(p, w_curs, &curpos,
1260
0
               type, delta_obj_offset);
1261
1262
0
    if (!base_offset)
1263
0
      return -1;
1264
1265
0
    if (offset_to_pack_pos(p, base_offset, &base_pos) < 0)
1266
0
      return -1;
1267
1268
0
    return nth_packed_object_id(oid, p,
1269
0
              pack_pos_to_index(p, base_pos));
1270
0
  } else
1271
0
    return -1;
1272
0
}
1273
1274
static int retry_bad_packed_offset(struct repository *r,
1275
           struct packed_git *p,
1276
           off_t obj_offset)
1277
0
{
1278
0
  int type;
1279
0
  uint32_t pos;
1280
0
  struct object_id oid;
1281
0
  if (offset_to_pack_pos(p, obj_offset, &pos) < 0)
1282
0
    return OBJ_BAD;
1283
0
  nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos));
1284
0
  mark_bad_packed_object(p, &oid);
1285
0
  type = oid_object_info(r, &oid, NULL);
1286
0
  if (type <= OBJ_NONE)
1287
0
    return OBJ_BAD;
1288
0
  return type;
1289
0
}
1290
1291
0
#define POI_STACK_PREALLOC 64
1292
1293
static enum object_type packed_to_object_type(struct repository *r,
1294
                struct packed_git *p,
1295
                off_t obj_offset,
1296
                enum object_type type,
1297
                struct pack_window **w_curs,
1298
                off_t curpos)
1299
0
{
1300
0
  off_t small_poi_stack[POI_STACK_PREALLOC];
1301
0
  off_t *poi_stack = small_poi_stack;
1302
0
  int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1303
1304
0
  while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1305
0
    off_t base_offset;
1306
0
    unsigned long size;
1307
    /* Push the object we're going to leave behind */
1308
0
    if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1309
0
      poi_stack_alloc = alloc_nr(poi_stack_nr);
1310
0
      ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1311
0
      COPY_ARRAY(poi_stack, small_poi_stack, poi_stack_nr);
1312
0
    } else {
1313
0
      ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1314
0
    }
1315
0
    poi_stack[poi_stack_nr++] = obj_offset;
1316
    /* If parsing the base offset fails, just unwind */
1317
0
    base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1318
0
    if (!base_offset)
1319
0
      goto unwind;
1320
0
    curpos = obj_offset = base_offset;
1321
0
    type = unpack_object_header(p, w_curs, &curpos, &size);
1322
0
    if (type <= OBJ_NONE) {
1323
      /* If getting the base itself fails, we first
1324
       * retry the base, otherwise unwind */
1325
0
      type = retry_bad_packed_offset(r, p, base_offset);
1326
0
      if (type > OBJ_NONE)
1327
0
        goto out;
1328
0
      goto unwind;
1329
0
    }
1330
0
  }
1331
1332
0
  switch (type) {
1333
0
  case OBJ_BAD:
1334
0
  case OBJ_COMMIT:
1335
0
  case OBJ_TREE:
1336
0
  case OBJ_BLOB:
1337
0
  case OBJ_TAG:
1338
0
    break;
1339
0
  default:
1340
0
    error("unknown object type %i at offset %"PRIuMAX" in %s",
1341
0
          type, (uintmax_t)obj_offset, p->pack_name);
1342
0
    type = OBJ_BAD;
1343
0
  }
1344
1345
0
out:
1346
0
  if (poi_stack != small_poi_stack)
1347
0
    free(poi_stack);
1348
0
  return type;
1349
1350
0
unwind:
1351
0
  while (poi_stack_nr) {
1352
0
    obj_offset = poi_stack[--poi_stack_nr];
1353
0
    type = retry_bad_packed_offset(r, p, obj_offset);
1354
0
    if (type > OBJ_NONE)
1355
0
      goto out;
1356
0
  }
1357
0
  type = OBJ_BAD;
1358
0
  goto out;
1359
0
}
1360
1361
static struct hashmap delta_base_cache;
1362
static size_t delta_base_cached;
1363
1364
static LIST_HEAD(delta_base_cache_lru);
1365
1366
struct delta_base_cache_key {
1367
  struct packed_git *p;
1368
  off_t base_offset;
1369
};
1370
1371
struct delta_base_cache_entry {
1372
  struct hashmap_entry ent;
1373
  struct delta_base_cache_key key;
1374
  struct list_head lru;
1375
  void *data;
1376
  unsigned long size;
1377
  enum object_type type;
1378
};
1379
1380
static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1381
0
{
1382
0
  unsigned int hash;
1383
1384
0
  hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1385
0
  hash += (hash >> 8) + (hash >> 16);
1386
0
  return hash;
1387
0
}
1388
1389
static struct delta_base_cache_entry *
1390
get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1391
0
{
1392
0
  struct hashmap_entry entry, *e;
1393
0
  struct delta_base_cache_key key;
1394
1395
0
  if (!delta_base_cache.cmpfn)
1396
0
    return NULL;
1397
1398
0
  hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1399
0
  key.p = p;
1400
0
  key.base_offset = base_offset;
1401
0
  e = hashmap_get(&delta_base_cache, &entry, &key);
1402
0
  return e ? container_of(e, struct delta_base_cache_entry, ent) : NULL;
1403
0
}
1404
1405
static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1406
           const struct delta_base_cache_key *b)
1407
0
{
1408
0
  return a->p == b->p && a->base_offset == b->base_offset;
1409
0
}
1410
1411
static int delta_base_cache_hash_cmp(const void *cmp_data UNUSED,
1412
             const struct hashmap_entry *va,
1413
             const struct hashmap_entry *vb,
1414
             const void *vkey)
1415
0
{
1416
0
  const struct delta_base_cache_entry *a, *b;
1417
0
  const struct delta_base_cache_key *key = vkey;
1418
1419
0
  a = container_of(va, const struct delta_base_cache_entry, ent);
1420
0
  b = container_of(vb, const struct delta_base_cache_entry, ent);
1421
1422
0
  if (key)
1423
0
    return !delta_base_cache_key_eq(&a->key, key);
1424
0
  else
1425
0
    return !delta_base_cache_key_eq(&a->key, &b->key);
1426
0
}
1427
1428
static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1429
0
{
1430
0
  return !!get_delta_base_cache_entry(p, base_offset);
1431
0
}
1432
1433
/*
1434
 * Remove the entry from the cache, but do _not_ free the associated
1435
 * entry data. The caller takes ownership of the "data" buffer, and
1436
 * should copy out any fields it wants before detaching.
1437
 */
1438
static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1439
0
{
1440
0
  hashmap_remove(&delta_base_cache, &ent->ent, &ent->key);
1441
0
  list_del(&ent->lru);
1442
0
  delta_base_cached -= ent->size;
1443
0
  free(ent);
1444
0
}
1445
1446
static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
1447
           off_t base_offset, unsigned long *base_size,
1448
           enum object_type *type)
1449
0
{
1450
0
  struct delta_base_cache_entry *ent;
1451
1452
0
  ent = get_delta_base_cache_entry(p, base_offset);
1453
0
  if (!ent)
1454
0
    return unpack_entry(r, p, base_offset, type, base_size);
1455
1456
0
  if (type)
1457
0
    *type = ent->type;
1458
0
  if (base_size)
1459
0
    *base_size = ent->size;
1460
0
  return xmemdupz(ent->data, ent->size);
1461
0
}
1462
1463
static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1464
0
{
1465
0
  free(ent->data);
1466
0
  detach_delta_base_cache_entry(ent);
1467
0
}
1468
1469
void clear_delta_base_cache(void)
1470
0
{
1471
0
  struct list_head *lru, *tmp;
1472
0
  list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1473
0
    struct delta_base_cache_entry *entry =
1474
0
      list_entry(lru, struct delta_base_cache_entry, lru);
1475
0
    release_delta_base_cache(entry);
1476
0
  }
1477
0
}
1478
1479
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1480
  void *base, unsigned long base_size, enum object_type type)
1481
0
{
1482
0
  struct delta_base_cache_entry *ent;
1483
0
  struct list_head *lru, *tmp;
1484
1485
  /*
1486
   * Check required to avoid redundant entries when more than one thread
1487
   * is unpacking the same object, in unpack_entry() (since its phases I
1488
   * and III might run concurrently across multiple threads).
1489
   */
1490
0
  if (in_delta_base_cache(p, base_offset)) {
1491
0
    free(base);
1492
0
    return;
1493
0
  }
1494
1495
0
  delta_base_cached += base_size;
1496
1497
0
  list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1498
0
    struct delta_base_cache_entry *f =
1499
0
      list_entry(lru, struct delta_base_cache_entry, lru);
1500
0
    if (delta_base_cached <= delta_base_cache_limit)
1501
0
      break;
1502
0
    release_delta_base_cache(f);
1503
0
  }
1504
1505
0
  ent = xmalloc(sizeof(*ent));
1506
0
  ent->key.p = p;
1507
0
  ent->key.base_offset = base_offset;
1508
0
  ent->type = type;
1509
0
  ent->data = base;
1510
0
  ent->size = base_size;
1511
0
  list_add_tail(&ent->lru, &delta_base_cache_lru);
1512
1513
0
  if (!delta_base_cache.cmpfn)
1514
0
    hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1515
0
  hashmap_entry_init(&ent->ent, pack_entry_hash(p, base_offset));
1516
0
  hashmap_add(&delta_base_cache, &ent->ent);
1517
0
}
1518
1519
int packed_object_info(struct repository *r, struct packed_git *p,
1520
           off_t obj_offset, struct object_info *oi)
1521
0
{
1522
0
  struct pack_window *w_curs = NULL;
1523
0
  unsigned long size;
1524
0
  off_t curpos = obj_offset;
1525
0
  enum object_type type;
1526
1527
  /*
1528
   * We always get the representation type, but only convert it to
1529
   * a "real" type later if the caller is interested.
1530
   */
1531
0
  if (oi->contentp) {
1532
0
    *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,
1533
0
                  &type);
1534
0
    if (!*oi->contentp)
1535
0
      type = OBJ_BAD;
1536
0
  } else {
1537
0
    type = unpack_object_header(p, &w_curs, &curpos, &size);
1538
0
  }
1539
1540
0
  if (!oi->contentp && oi->sizep) {
1541
0
    if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1542
0
      off_t tmp_pos = curpos;
1543
0
      off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1544
0
                 type, obj_offset);
1545
0
      if (!base_offset) {
1546
0
        type = OBJ_BAD;
1547
0
        goto out;
1548
0
      }
1549
0
      *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1550
0
      if (*oi->sizep == 0) {
1551
0
        type = OBJ_BAD;
1552
0
        goto out;
1553
0
      }
1554
0
    } else {
1555
0
      *oi->sizep = size;
1556
0
    }
1557
0
  }
1558
1559
0
  if (oi->disk_sizep) {
1560
0
    uint32_t pos;
1561
0
    if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
1562
0
      error("could not find object at offset %"PRIuMAX" "
1563
0
            "in pack %s", (uintmax_t)obj_offset, p->pack_name);
1564
0
      type = OBJ_BAD;
1565
0
      goto out;
1566
0
    }
1567
1568
0
    *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
1569
0
  }
1570
1571
0
  if (oi->typep || oi->type_name) {
1572
0
    enum object_type ptot;
1573
0
    ptot = packed_to_object_type(r, p, obj_offset,
1574
0
               type, &w_curs, curpos);
1575
0
    if (oi->typep)
1576
0
      *oi->typep = ptot;
1577
0
    if (oi->type_name) {
1578
0
      const char *tn = type_name(ptot);
1579
0
      if (tn)
1580
0
        strbuf_addstr(oi->type_name, tn);
1581
0
    }
1582
0
    if (ptot < 0) {
1583
0
      type = OBJ_BAD;
1584
0
      goto out;
1585
0
    }
1586
0
  }
1587
1588
0
  if (oi->delta_base_oid) {
1589
0
    if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1590
0
      if (get_delta_base_oid(p, &w_curs, curpos,
1591
0
                 oi->delta_base_oid,
1592
0
                 type, obj_offset) < 0) {
1593
0
        type = OBJ_BAD;
1594
0
        goto out;
1595
0
      }
1596
0
    } else
1597
0
      oidclr(oi->delta_base_oid);
1598
0
  }
1599
1600
0
  oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1601
0
                OI_PACKED;
1602
1603
0
out:
1604
0
  unuse_pack(&w_curs);
1605
0
  return type;
1606
0
}
1607
1608
static void *unpack_compressed_entry(struct packed_git *p,
1609
            struct pack_window **w_curs,
1610
            off_t curpos,
1611
            unsigned long size)
1612
0
{
1613
0
  int st;
1614
0
  git_zstream stream;
1615
0
  unsigned char *buffer, *in;
1616
1617
0
  buffer = xmallocz_gently(size);
1618
0
  if (!buffer)
1619
0
    return NULL;
1620
0
  memset(&stream, 0, sizeof(stream));
1621
0
  stream.next_out = buffer;
1622
0
  stream.avail_out = size + 1;
1623
1624
0
  git_inflate_init(&stream);
1625
0
  do {
1626
0
    in = use_pack(p, w_curs, curpos, &stream.avail_in);
1627
0
    stream.next_in = in;
1628
    /*
1629
     * Note: we must ensure the window section returned by
1630
     * use_pack() will be available throughout git_inflate()'s
1631
     * unlocked execution. Please refer to the comment at
1632
     * get_size_from_delta() to see how this is done.
1633
     */
1634
0
    obj_read_unlock();
1635
0
    st = git_inflate(&stream, Z_FINISH);
1636
0
    obj_read_lock();
1637
0
    if (!stream.avail_out)
1638
0
      break; /* the payload is larger than it should be */
1639
0
    curpos += stream.next_in - in;
1640
0
  } while (st == Z_OK || st == Z_BUF_ERROR);
1641
0
  git_inflate_end(&stream);
1642
0
  if ((st != Z_STREAM_END) || stream.total_out != size) {
1643
0
    free(buffer);
1644
0
    return NULL;
1645
0
  }
1646
1647
  /* versions of zlib can clobber unconsumed portion of outbuf */
1648
0
  buffer[size] = '\0';
1649
1650
0
  return buffer;
1651
0
}
1652
1653
static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1654
0
{
1655
0
  static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1656
0
  trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1657
0
       p->pack_name, (uintmax_t)obj_offset);
1658
0
}
1659
1660
int do_check_packed_object_crc;
1661
1662
0
#define UNPACK_ENTRY_STACK_PREALLOC 64
1663
struct unpack_entry_stack_ent {
1664
  off_t obj_offset;
1665
  off_t curpos;
1666
  unsigned long size;
1667
};
1668
1669
void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
1670
       enum object_type *final_type, unsigned long *final_size)
1671
0
{
1672
0
  struct pack_window *w_curs = NULL;
1673
0
  off_t curpos = obj_offset;
1674
0
  void *data = NULL;
1675
0
  unsigned long size;
1676
0
  enum object_type type;
1677
0
  struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1678
0
  struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1679
0
  int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1680
0
  int base_from_cache = 0;
1681
1682
0
  write_pack_access_log(p, obj_offset);
1683
1684
  /* PHASE 1: drill down to the innermost base object */
1685
0
  for (;;) {
1686
0
    off_t base_offset;
1687
0
    int i;
1688
0
    struct delta_base_cache_entry *ent;
1689
1690
0
    ent = get_delta_base_cache_entry(p, curpos);
1691
0
    if (ent) {
1692
0
      type = ent->type;
1693
0
      data = ent->data;
1694
0
      size = ent->size;
1695
0
      detach_delta_base_cache_entry(ent);
1696
0
      base_from_cache = 1;
1697
0
      break;
1698
0
    }
1699
1700
0
    if (do_check_packed_object_crc && p->index_version > 1) {
1701
0
      uint32_t pack_pos, index_pos;
1702
0
      off_t len;
1703
1704
0
      if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
1705
0
        error("could not find object at offset %"PRIuMAX" in pack %s",
1706
0
              (uintmax_t)obj_offset, p->pack_name);
1707
0
        data = NULL;
1708
0
        goto out;
1709
0
      }
1710
1711
0
      len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
1712
0
      index_pos = pack_pos_to_index(p, pack_pos);
1713
0
      if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) {
1714
0
        struct object_id oid;
1715
0
        nth_packed_object_id(&oid, p, index_pos);
1716
0
        error("bad packed object CRC for %s",
1717
0
              oid_to_hex(&oid));
1718
0
        mark_bad_packed_object(p, &oid);
1719
0
        data = NULL;
1720
0
        goto out;
1721
0
      }
1722
0
    }
1723
1724
0
    type = unpack_object_header(p, &w_curs, &curpos, &size);
1725
0
    if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1726
0
      break;
1727
1728
0
    base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1729
0
    if (!base_offset) {
1730
0
      error("failed to validate delta base reference "
1731
0
            "at offset %"PRIuMAX" from %s",
1732
0
            (uintmax_t)curpos, p->pack_name);
1733
      /* bail to phase 2, in hopes of recovery */
1734
0
      data = NULL;
1735
0
      break;
1736
0
    }
1737
1738
    /* push object, proceed to base */
1739
0
    if (delta_stack_nr >= delta_stack_alloc
1740
0
        && delta_stack == small_delta_stack) {
1741
0
      delta_stack_alloc = alloc_nr(delta_stack_nr);
1742
0
      ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1743
0
      COPY_ARRAY(delta_stack, small_delta_stack,
1744
0
           delta_stack_nr);
1745
0
    } else {
1746
0
      ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1747
0
    }
1748
0
    i = delta_stack_nr++;
1749
0
    delta_stack[i].obj_offset = obj_offset;
1750
0
    delta_stack[i].curpos = curpos;
1751
0
    delta_stack[i].size = size;
1752
1753
0
    curpos = obj_offset = base_offset;
1754
0
  }
1755
1756
  /* PHASE 2: handle the base */
1757
0
  switch (type) {
1758
0
  case OBJ_OFS_DELTA:
1759
0
  case OBJ_REF_DELTA:
1760
0
    if (data)
1761
0
      BUG("unpack_entry: left loop at a valid delta");
1762
0
    break;
1763
0
  case OBJ_COMMIT:
1764
0
  case OBJ_TREE:
1765
0
  case OBJ_BLOB:
1766
0
  case OBJ_TAG:
1767
0
    if (!base_from_cache)
1768
0
      data = unpack_compressed_entry(p, &w_curs, curpos, size);
1769
0
    break;
1770
0
  default:
1771
0
    data = NULL;
1772
0
    error("unknown object type %i at offset %"PRIuMAX" in %s",
1773
0
          type, (uintmax_t)obj_offset, p->pack_name);
1774
0
  }
1775
1776
  /* PHASE 3: apply deltas in order */
1777
1778
  /* invariants:
1779
   *   'data' holds the base data, or NULL if there was corruption
1780
   */
1781
0
  while (delta_stack_nr) {
1782
0
    void *delta_data;
1783
0
    void *base = data;
1784
0
    void *external_base = NULL;
1785
0
    unsigned long delta_size, base_size = size;
1786
0
    int i;
1787
0
    off_t base_obj_offset = obj_offset;
1788
1789
0
    data = NULL;
1790
1791
0
    if (!base) {
1792
      /*
1793
       * We're probably in deep shit, but let's try to fetch
1794
       * the required base anyway from another pack or loose.
1795
       * This is costly but should happen only in the presence
1796
       * of a corrupted pack, and is better than failing outright.
1797
       */
1798
0
      uint32_t pos;
1799
0
      struct object_id base_oid;
1800
0
      if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
1801
0
        struct object_info oi = OBJECT_INFO_INIT;
1802
1803
0
        nth_packed_object_id(&base_oid, p,
1804
0
                 pack_pos_to_index(p, pos));
1805
0
        error("failed to read delta base object %s"
1806
0
              " at offset %"PRIuMAX" from %s",
1807
0
              oid_to_hex(&base_oid), (uintmax_t)obj_offset,
1808
0
              p->pack_name);
1809
0
        mark_bad_packed_object(p, &base_oid);
1810
1811
0
        oi.typep = &type;
1812
0
        oi.sizep = &base_size;
1813
0
        oi.contentp = &base;
1814
0
        if (oid_object_info_extended(r, &base_oid, &oi, 0) < 0)
1815
0
          base = NULL;
1816
1817
0
        external_base = base;
1818
0
      }
1819
0
    }
1820
1821
0
    i = --delta_stack_nr;
1822
0
    obj_offset = delta_stack[i].obj_offset;
1823
0
    curpos = delta_stack[i].curpos;
1824
0
    delta_size = delta_stack[i].size;
1825
1826
0
    if (!base)
1827
0
      continue;
1828
1829
0
    delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1830
1831
0
    if (!delta_data) {
1832
0
      error("failed to unpack compressed delta "
1833
0
            "at offset %"PRIuMAX" from %s",
1834
0
            (uintmax_t)curpos, p->pack_name);
1835
0
      data = NULL;
1836
0
    } else {
1837
0
      data = patch_delta(base, base_size, delta_data,
1838
0
             delta_size, &size);
1839
1840
      /*
1841
       * We could not apply the delta; warn the user, but
1842
       * keep going. Our failure will be noticed either in
1843
       * the next iteration of the loop, or if this is the
1844
       * final delta, in the caller when we return NULL.
1845
       * Those code paths will take care of making a more
1846
       * explicit warning and retrying with another copy of
1847
       * the object.
1848
       */
1849
0
      if (!data)
1850
0
        error("failed to apply delta");
1851
0
    }
1852
1853
    /*
1854
     * We delay adding `base` to the cache until the end of the loop
1855
     * because unpack_compressed_entry() momentarily releases the
1856
     * obj_read_mutex, giving another thread the chance to access
1857
     * the cache. Therefore, if `base` was already there, this other
1858
     * thread could free() it (e.g. to make space for another entry)
1859
     * before we are done using it.
1860
     */
1861
0
    if (!external_base)
1862
0
      add_delta_base_cache(p, base_obj_offset, base, base_size, type);
1863
1864
0
    free(delta_data);
1865
0
    free(external_base);
1866
0
  }
1867
1868
0
  if (final_type)
1869
0
    *final_type = type;
1870
0
  if (final_size)
1871
0
    *final_size = size;
1872
1873
0
out:
1874
0
  unuse_pack(&w_curs);
1875
1876
0
  if (delta_stack != small_delta_stack)
1877
0
    free(delta_stack);
1878
1879
0
  return data;
1880
0
}
1881
1882
int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)
1883
0
{
1884
0
  const unsigned char *index_fanout = p->index_data;
1885
0
  const unsigned char *index_lookup;
1886
0
  const unsigned int hashsz = the_hash_algo->rawsz;
1887
0
  int index_lookup_width;
1888
1889
0
  if (!index_fanout)
1890
0
    BUG("bsearch_pack called without a valid pack-index");
1891
1892
0
  index_lookup = index_fanout + 4 * 256;
1893
0
  if (p->index_version == 1) {
1894
0
    index_lookup_width = hashsz + 4;
1895
0
    index_lookup += 4;
1896
0
  } else {
1897
0
    index_lookup_width = hashsz;
1898
0
    index_fanout += 8;
1899
0
    index_lookup += 8;
1900
0
  }
1901
1902
0
  return bsearch_hash(oid->hash, (const uint32_t*)index_fanout,
1903
0
          index_lookup, index_lookup_width, result);
1904
0
}
1905
1906
int nth_packed_object_id(struct object_id *oid,
1907
       struct packed_git *p,
1908
       uint32_t n)
1909
0
{
1910
0
  const unsigned char *index = p->index_data;
1911
0
  const unsigned int hashsz = the_hash_algo->rawsz;
1912
0
  if (!index) {
1913
0
    if (open_pack_index(p))
1914
0
      return -1;
1915
0
    index = p->index_data;
1916
0
  }
1917
0
  if (n >= p->num_objects)
1918
0
    return -1;
1919
0
  index += 4 * 256;
1920
0
  if (p->index_version == 1) {
1921
0
    oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4));
1922
0
  } else {
1923
0
    index += 8;
1924
0
    oidread(oid, index + st_mult(hashsz, n));
1925
0
  }
1926
0
  return 0;
1927
0
}
1928
1929
void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1930
0
{
1931
0
  const unsigned char *ptr = vptr;
1932
0
  const unsigned char *start = p->index_data;
1933
0
  const unsigned char *end = start + p->index_size;
1934
0
  if (ptr < start)
1935
0
    die(_("offset before start of pack index for %s (corrupt index?)"),
1936
0
        p->pack_name);
1937
  /* No need to check for underflow; .idx files must be at least 8 bytes */
1938
0
  if (ptr >= end - 8)
1939
0
    die(_("offset beyond end of pack index for %s (truncated index?)"),
1940
0
        p->pack_name);
1941
0
}
1942
1943
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1944
0
{
1945
0
  const unsigned char *index = p->index_data;
1946
0
  const unsigned int hashsz = the_hash_algo->rawsz;
1947
0
  index += 4 * 256;
1948
0
  if (p->index_version == 1) {
1949
0
    return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
1950
0
  } else {
1951
0
    uint32_t off;
1952
0
    index += st_add(8, st_mult(p->num_objects, hashsz + 4));
1953
0
    off = ntohl(*((uint32_t *)(index + st_mult(4, n))));
1954
0
    if (!(off & 0x80000000))
1955
0
      return off;
1956
0
    index += st_add(st_mult(p->num_objects, 4),
1957
0
        st_mult(off & 0x7fffffff, 8));
1958
0
    check_pack_index_ptr(p, index);
1959
0
    return get_be64(index);
1960
0
  }
1961
0
}
1962
1963
off_t find_pack_entry_one(const unsigned char *sha1,
1964
          struct packed_git *p)
1965
0
{
1966
0
  const unsigned char *index = p->index_data;
1967
0
  struct object_id oid;
1968
0
  uint32_t result;
1969
1970
0
  if (!index) {
1971
0
    if (open_pack_index(p))
1972
0
      return 0;
1973
0
  }
1974
1975
0
  hashcpy(oid.hash, sha1);
1976
0
  if (bsearch_pack(&oid, p, &result))
1977
0
    return nth_packed_object_offset(p, result);
1978
0
  return 0;
1979
0
}
1980
1981
int is_pack_valid(struct packed_git *p)
1982
0
{
1983
  /* An already open pack is known to be valid. */
1984
0
  if (p->pack_fd != -1)
1985
0
    return 1;
1986
1987
  /* If the pack has one window completely covering the
1988
   * file size, the pack is known to be valid even if
1989
   * the descriptor is not currently open.
1990
   */
1991
0
  if (p->windows) {
1992
0
    struct pack_window *w = p->windows;
1993
1994
0
    if (!w->offset && w->len == p->pack_size)
1995
0
      return 1;
1996
0
  }
1997
1998
  /* Force the pack to open to prove its valid. */
1999
0
  return !open_packed_git(p);
2000
0
}
2001
2002
struct packed_git *find_sha1_pack(const unsigned char *sha1,
2003
          struct packed_git *packs)
2004
0
{
2005
0
  struct packed_git *p;
2006
2007
0
  for (p = packs; p; p = p->next) {
2008
0
    if (find_pack_entry_one(sha1, p))
2009
0
      return p;
2010
0
  }
2011
0
  return NULL;
2012
2013
0
}
2014
2015
static int fill_pack_entry(const struct object_id *oid,
2016
         struct pack_entry *e,
2017
         struct packed_git *p)
2018
0
{
2019
0
  off_t offset;
2020
2021
0
  if (oidset_size(&p->bad_objects) &&
2022
0
      oidset_contains(&p->bad_objects, oid))
2023
0
    return 0;
2024
2025
0
  offset = find_pack_entry_one(oid->hash, p);
2026
0
  if (!offset)
2027
0
    return 0;
2028
2029
  /*
2030
   * We are about to tell the caller where they can locate the
2031
   * requested object.  We better make sure the packfile is
2032
   * still here and can be accessed before supplying that
2033
   * answer, as it may have been deleted since the index was
2034
   * loaded!
2035
   */
2036
0
  if (!is_pack_valid(p))
2037
0
    return 0;
2038
0
  e->offset = offset;
2039
0
  e->p = p;
2040
0
  return 1;
2041
0
}
2042
2043
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
2044
130k
{
2045
130k
  struct list_head *pos;
2046
130k
  struct multi_pack_index *m;
2047
2048
130k
  prepare_packed_git(r);
2049
130k
  if (!r->objects->packed_git && !r->objects->multi_pack_index)
2050
130k
    return 0;
2051
2052
0
  for (m = r->objects->multi_pack_index; m; m = m->next) {
2053
0
    if (fill_midx_entry(r, oid, e, m))
2054
0
      return 1;
2055
0
  }
2056
2057
0
  list_for_each(pos, &r->objects->packed_git_mru) {
2058
0
    struct packed_git *p = list_entry(pos, struct packed_git, mru);
2059
0
    if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) {
2060
0
      list_move(&p->mru, &r->objects->packed_git_mru);
2061
0
      return 1;
2062
0
    }
2063
0
  }
2064
0
  return 0;
2065
0
}
2066
2067
static void maybe_invalidate_kept_pack_cache(struct repository *r,
2068
               unsigned flags)
2069
0
{
2070
0
  if (!r->objects->kept_pack_cache.packs)
2071
0
    return;
2072
0
  if (r->objects->kept_pack_cache.flags == flags)
2073
0
    return;
2074
0
  FREE_AND_NULL(r->objects->kept_pack_cache.packs);
2075
0
  r->objects->kept_pack_cache.flags = 0;
2076
0
}
2077
2078
static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
2079
0
{
2080
0
  maybe_invalidate_kept_pack_cache(r, flags);
2081
2082
0
  if (!r->objects->kept_pack_cache.packs) {
2083
0
    struct packed_git **packs = NULL;
2084
0
    size_t nr = 0, alloc = 0;
2085
0
    struct packed_git *p;
2086
2087
    /*
2088
     * We want "all" packs here, because we need to cover ones that
2089
     * are used by a midx, as well. We need to look in every one of
2090
     * them (instead of the midx itself) to cover duplicates. It's
2091
     * possible that an object is found in two packs that the midx
2092
     * covers, one kept and one not kept, but the midx returns only
2093
     * the non-kept version.
2094
     */
2095
0
    for (p = get_all_packs(r); p; p = p->next) {
2096
0
      if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
2097
0
          (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
2098
0
        ALLOC_GROW(packs, nr + 1, alloc);
2099
0
        packs[nr++] = p;
2100
0
      }
2101
0
    }
2102
0
    ALLOC_GROW(packs, nr + 1, alloc);
2103
0
    packs[nr] = NULL;
2104
2105
0
    r->objects->kept_pack_cache.packs = packs;
2106
0
    r->objects->kept_pack_cache.flags = flags;
2107
0
  }
2108
2109
0
  return r->objects->kept_pack_cache.packs;
2110
0
}
2111
2112
int find_kept_pack_entry(struct repository *r,
2113
       const struct object_id *oid,
2114
       unsigned flags,
2115
       struct pack_entry *e)
2116
0
{
2117
0
  struct packed_git **cache;
2118
2119
0
  for (cache = kept_pack_cache(r, flags); *cache; cache++) {
2120
0
    struct packed_git *p = *cache;
2121
0
    if (fill_pack_entry(oid, e, p))
2122
0
      return 1;
2123
0
  }
2124
2125
0
  return 0;
2126
0
}
2127
2128
int has_object_pack(const struct object_id *oid)
2129
0
{
2130
0
  struct pack_entry e;
2131
0
  return find_pack_entry(the_repository, oid, &e);
2132
0
}
2133
2134
int has_object_kept_pack(const struct object_id *oid, unsigned flags)
2135
0
{
2136
0
  struct pack_entry e;
2137
0
  return find_kept_pack_entry(the_repository, oid, flags, &e);
2138
0
}
2139
2140
int has_pack_index(const unsigned char *sha1)
2141
0
{
2142
0
  struct stat st;
2143
0
  if (stat(sha1_pack_index_name(sha1), &st))
2144
0
    return 0;
2145
0
  return 1;
2146
0
}
2147
2148
int for_each_object_in_pack(struct packed_git *p,
2149
          each_packed_object_fn cb, void *data,
2150
          enum for_each_object_flags flags)
2151
0
{
2152
0
  uint32_t i;
2153
0
  int r = 0;
2154
2155
0
  if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
2156
0
    if (load_pack_revindex(the_repository, p))
2157
0
      return -1;
2158
0
  }
2159
2160
0
  for (i = 0; i < p->num_objects; i++) {
2161
0
    uint32_t index_pos;
2162
0
    struct object_id oid;
2163
2164
    /*
2165
     * We are iterating "i" from 0 up to num_objects, but its
2166
     * meaning may be different, depending on the requested output
2167
     * order:
2168
     *
2169
     *   - in object-name order, it is the same as the index order
2170
     *     used by nth_packed_object_id(), so we can pass it
2171
     *     directly
2172
     *
2173
     *   - in pack-order, it is pack position, which we must
2174
     *     convert to an index position in order to get the oid.
2175
     */
2176
0
    if (flags & FOR_EACH_OBJECT_PACK_ORDER)
2177
0
      index_pos = pack_pos_to_index(p, i);
2178
0
    else
2179
0
      index_pos = i;
2180
2181
0
    if (nth_packed_object_id(&oid, p, index_pos) < 0)
2182
0
      return error("unable to get sha1 of object %u in %s",
2183
0
             index_pos, p->pack_name);
2184
2185
0
    r = cb(&oid, p, index_pos, data);
2186
0
    if (r)
2187
0
      break;
2188
0
  }
2189
0
  return r;
2190
0
}
2191
2192
int for_each_packed_object(each_packed_object_fn cb, void *data,
2193
         enum for_each_object_flags flags)
2194
0
{
2195
0
  struct packed_git *p;
2196
0
  int r = 0;
2197
0
  int pack_errors = 0;
2198
2199
0
  prepare_packed_git(the_repository);
2200
0
  for (p = get_all_packs(the_repository); p; p = p->next) {
2201
0
    if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
2202
0
      continue;
2203
0
    if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
2204
0
        !p->pack_promisor)
2205
0
      continue;
2206
0
    if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
2207
0
        p->pack_keep_in_core)
2208
0
      continue;
2209
0
    if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
2210
0
        p->pack_keep)
2211
0
      continue;
2212
0
    if (open_pack_index(p)) {
2213
0
      pack_errors = 1;
2214
0
      continue;
2215
0
    }
2216
0
    r = for_each_object_in_pack(p, cb, data, flags);
2217
0
    if (r)
2218
0
      break;
2219
0
  }
2220
0
  return r ? r : pack_errors;
2221
0
}
2222
2223
static int add_promisor_object(const struct object_id *oid,
2224
             struct packed_git *pack UNUSED,
2225
             uint32_t pos UNUSED,
2226
             void *set_)
2227
0
{
2228
0
  struct oidset *set = set_;
2229
0
  struct object *obj;
2230
0
  int we_parsed_object;
2231
2232
0
  obj = lookup_object(the_repository, oid);
2233
0
  if (obj && obj->parsed) {
2234
0
    we_parsed_object = 0;
2235
0
  } else {
2236
0
    we_parsed_object = 1;
2237
0
    obj = parse_object(the_repository, oid);
2238
0
  }
2239
2240
0
  if (!obj)
2241
0
    return 1;
2242
2243
0
  oidset_insert(set, oid);
2244
2245
  /*
2246
   * If this is a tree, commit, or tag, the objects it refers
2247
   * to are also promisor objects. (Blobs refer to no objects->)
2248
   */
2249
0
  if (obj->type == OBJ_TREE) {
2250
0
    struct tree *tree = (struct tree *)obj;
2251
0
    struct tree_desc desc;
2252
0
    struct name_entry entry;
2253
0
    if (init_tree_desc_gently(&desc, tree->buffer, tree->size, 0))
2254
      /*
2255
       * Error messages are given when packs are
2256
       * verified, so do not print any here.
2257
       */
2258
0
      return 0;
2259
0
    while (tree_entry_gently(&desc, &entry))
2260
0
      oidset_insert(set, &entry.oid);
2261
0
    if (we_parsed_object)
2262
0
      free_tree_buffer(tree);
2263
0
  } else if (obj->type == OBJ_COMMIT) {
2264
0
    struct commit *commit = (struct commit *) obj;
2265
0
    struct commit_list *parents = commit->parents;
2266
2267
0
    oidset_insert(set, get_commit_tree_oid(commit));
2268
0
    for (; parents; parents = parents->next)
2269
0
      oidset_insert(set, &parents->item->object.oid);
2270
0
  } else if (obj->type == OBJ_TAG) {
2271
0
    struct tag *tag = (struct tag *) obj;
2272
0
    oidset_insert(set, get_tagged_oid(tag));
2273
0
  }
2274
0
  return 0;
2275
0
}
2276
2277
int is_promisor_object(const struct object_id *oid)
2278
0
{
2279
0
  static struct oidset promisor_objects;
2280
0
  static int promisor_objects_prepared;
2281
2282
0
  if (!promisor_objects_prepared) {
2283
0
    if (repo_has_promisor_remote(the_repository)) {
2284
0
      for_each_packed_object(add_promisor_object,
2285
0
                 &promisor_objects,
2286
0
                 FOR_EACH_OBJECT_PROMISOR_ONLY |
2287
0
                 FOR_EACH_OBJECT_PACK_ORDER);
2288
0
    }
2289
0
    promisor_objects_prepared = 1;
2290
0
  }
2291
0
  return oidset_contains(&promisor_objects, oid);
2292
0
}