Line | Count | Source (jump to first uncovered line) |
1 | | #define USE_THE_REPOSITORY_VARIABLE |
2 | | |
3 | | #include "git-compat-util.h" |
4 | | #include "config.h" |
5 | | #include "dir.h" |
6 | | #include "hex.h" |
7 | | #include "packfile.h" |
8 | | #include "object-file.h" |
9 | | #include "hash-lookup.h" |
10 | | #include "midx.h" |
11 | | #include "progress.h" |
12 | | #include "trace2.h" |
13 | | #include "chunk-format.h" |
14 | | #include "pack-bitmap.h" |
15 | | #include "pack-revindex.h" |
16 | | |
17 | | int midx_checksum_valid(struct multi_pack_index *m); |
18 | | void clear_midx_files_ext(const char *object_dir, const char *ext, |
19 | | const char *keep_hash); |
20 | | void clear_incremental_midx_files_ext(const char *object_dir, const char *ext, |
21 | | char **keep_hashes, |
22 | | uint32_t hashes_nr); |
23 | | int cmp_idx_or_pack_name(const char *idx_or_pack_name, |
24 | | const char *idx_name); |
25 | | |
26 | | const unsigned char *get_midx_checksum(struct multi_pack_index *m) |
27 | 0 | { |
28 | 0 | return m->data + m->data_len - the_hash_algo->rawsz; |
29 | 0 | } |
30 | | |
31 | | void get_midx_filename(struct strbuf *out, const char *object_dir) |
32 | 55.4k | { |
33 | 55.4k | get_midx_filename_ext(out, object_dir, NULL, NULL); |
34 | 55.4k | } |
35 | | |
36 | | void get_midx_filename_ext(struct strbuf *out, const char *object_dir, |
37 | | const unsigned char *hash, const char *ext) |
38 | 55.4k | { |
39 | 55.4k | strbuf_addf(out, "%s/pack/multi-pack-index", object_dir); |
40 | 55.4k | if (ext) |
41 | 0 | strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext); |
42 | 55.4k | } |
43 | | |
44 | | static int midx_read_oid_fanout(const unsigned char *chunk_start, |
45 | | size_t chunk_size, void *data) |
46 | 0 | { |
47 | 0 | int i; |
48 | 0 | struct multi_pack_index *m = data; |
49 | 0 | m->chunk_oid_fanout = (uint32_t *)chunk_start; |
50 | |
|
51 | 0 | if (chunk_size != 4 * 256) { |
52 | 0 | error(_("multi-pack-index OID fanout is of the wrong size")); |
53 | 0 | return 1; |
54 | 0 | } |
55 | 0 | for (i = 0; i < 255; i++) { |
56 | 0 | uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); |
57 | 0 | uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]); |
58 | |
|
59 | 0 | if (oid_fanout1 > oid_fanout2) { |
60 | 0 | error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"), |
61 | 0 | i, oid_fanout1, oid_fanout2, i + 1); |
62 | 0 | return 1; |
63 | 0 | } |
64 | 0 | } |
65 | 0 | m->num_objects = ntohl(m->chunk_oid_fanout[255]); |
66 | 0 | return 0; |
67 | 0 | } |
68 | | |
69 | | static int midx_read_oid_lookup(const unsigned char *chunk_start, |
70 | | size_t chunk_size, void *data) |
71 | 0 | { |
72 | 0 | struct multi_pack_index *m = data; |
73 | 0 | m->chunk_oid_lookup = chunk_start; |
74 | |
|
75 | 0 | if (chunk_size != st_mult(m->hash_len, m->num_objects)) { |
76 | 0 | error(_("multi-pack-index OID lookup chunk is the wrong size")); |
77 | 0 | return 1; |
78 | 0 | } |
79 | 0 | return 0; |
80 | 0 | } |
81 | | |
82 | | static int midx_read_object_offsets(const unsigned char *chunk_start, |
83 | | size_t chunk_size, void *data) |
84 | 0 | { |
85 | 0 | struct multi_pack_index *m = data; |
86 | 0 | m->chunk_object_offsets = chunk_start; |
87 | |
|
88 | 0 | if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) { |
89 | 0 | error(_("multi-pack-index object offset chunk is the wrong size")); |
90 | 0 | return 1; |
91 | 0 | } |
92 | 0 | return 0; |
93 | 0 | } |
94 | | |
95 | 0 | #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz) |
96 | | |
97 | | static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir, |
98 | | const char *midx_name, |
99 | | int local) |
100 | 55.4k | { |
101 | 55.4k | struct multi_pack_index *m = NULL; |
102 | 55.4k | int fd; |
103 | 55.4k | struct stat st; |
104 | 55.4k | size_t midx_size; |
105 | 55.4k | void *midx_map = NULL; |
106 | 55.4k | uint32_t hash_version; |
107 | 55.4k | uint32_t i; |
108 | 55.4k | const char *cur_pack_name; |
109 | 55.4k | struct chunkfile *cf = NULL; |
110 | | |
111 | 55.4k | fd = git_open(midx_name); |
112 | | |
113 | 55.4k | if (fd < 0) |
114 | 55.4k | goto cleanup_fail; |
115 | 0 | if (fstat(fd, &st)) { |
116 | 0 | error_errno(_("failed to read %s"), midx_name); |
117 | 0 | goto cleanup_fail; |
118 | 0 | } |
119 | | |
120 | 0 | midx_size = xsize_t(st.st_size); |
121 | |
|
122 | 0 | if (midx_size < MIDX_MIN_SIZE) { |
123 | 0 | error(_("multi-pack-index file %s is too small"), midx_name); |
124 | 0 | goto cleanup_fail; |
125 | 0 | } |
126 | | |
127 | 0 | midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); |
128 | 0 | close(fd); |
129 | |
|
130 | 0 | FLEX_ALLOC_STR(m, object_dir, object_dir); |
131 | 0 | m->data = midx_map; |
132 | 0 | m->data_len = midx_size; |
133 | 0 | m->local = local; |
134 | |
|
135 | 0 | m->signature = get_be32(m->data); |
136 | 0 | if (m->signature != MIDX_SIGNATURE) |
137 | 0 | die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), |
138 | 0 | m->signature, MIDX_SIGNATURE); |
139 | | |
140 | 0 | m->version = m->data[MIDX_BYTE_FILE_VERSION]; |
141 | 0 | if (m->version != MIDX_VERSION) |
142 | 0 | die(_("multi-pack-index version %d not recognized"), |
143 | 0 | m->version); |
144 | | |
145 | 0 | hash_version = m->data[MIDX_BYTE_HASH_VERSION]; |
146 | 0 | if (hash_version != oid_version(the_hash_algo)) { |
147 | 0 | error(_("multi-pack-index hash version %u does not match version %u"), |
148 | 0 | hash_version, oid_version(the_hash_algo)); |
149 | 0 | goto cleanup_fail; |
150 | 0 | } |
151 | 0 | m->hash_len = the_hash_algo->rawsz; |
152 | |
|
153 | 0 | m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; |
154 | |
|
155 | 0 | m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); |
156 | |
|
157 | 0 | m->preferred_pack_idx = -1; |
158 | |
|
159 | 0 | cf = init_chunkfile(NULL); |
160 | |
|
161 | 0 | if (read_table_of_contents(cf, m->data, midx_size, |
162 | 0 | MIDX_HEADER_SIZE, m->num_chunks, |
163 | 0 | MIDX_CHUNK_ALIGNMENT)) |
164 | 0 | goto cleanup_fail; |
165 | | |
166 | 0 | if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) |
167 | 0 | die(_("multi-pack-index required pack-name chunk missing or corrupted")); |
168 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) |
169 | 0 | die(_("multi-pack-index required OID fanout chunk missing or corrupted")); |
170 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m)) |
171 | 0 | die(_("multi-pack-index required OID lookup chunk missing or corrupted")); |
172 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) |
173 | 0 | die(_("multi-pack-index required object offsets chunk missing or corrupted")); |
174 | | |
175 | 0 | pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, |
176 | 0 | &m->chunk_large_offsets_len); |
177 | 0 | if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1)) |
178 | 0 | pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, |
179 | 0 | (const unsigned char **)&m->chunk_bitmapped_packs, |
180 | 0 | &m->chunk_bitmapped_packs_len); |
181 | |
|
182 | 0 | if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) |
183 | 0 | pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, |
184 | 0 | &m->chunk_revindex_len); |
185 | |
|
186 | 0 | CALLOC_ARRAY(m->pack_names, m->num_packs); |
187 | 0 | CALLOC_ARRAY(m->packs, m->num_packs); |
188 | |
|
189 | 0 | cur_pack_name = (const char *)m->chunk_pack_names; |
190 | 0 | for (i = 0; i < m->num_packs; i++) { |
191 | 0 | const char *end; |
192 | 0 | size_t avail = m->chunk_pack_names_len - |
193 | 0 | (cur_pack_name - (const char *)m->chunk_pack_names); |
194 | |
|
195 | 0 | m->pack_names[i] = cur_pack_name; |
196 | |
|
197 | 0 | end = memchr(cur_pack_name, '\0', avail); |
198 | 0 | if (!end) |
199 | 0 | die(_("multi-pack-index pack-name chunk is too short")); |
200 | 0 | cur_pack_name = end + 1; |
201 | |
|
202 | 0 | if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) |
203 | 0 | die(_("multi-pack-index pack names out of order: '%s' before '%s'"), |
204 | 0 | m->pack_names[i - 1], |
205 | 0 | m->pack_names[i]); |
206 | 0 | } |
207 | | |
208 | 0 | trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); |
209 | 0 | trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); |
210 | |
|
211 | 0 | free_chunkfile(cf); |
212 | 0 | return m; |
213 | | |
214 | 55.4k | cleanup_fail: |
215 | 55.4k | free(m); |
216 | 55.4k | free_chunkfile(cf); |
217 | 55.4k | if (midx_map) |
218 | 0 | munmap(midx_map, midx_size); |
219 | 55.4k | if (0 <= fd) |
220 | 0 | close(fd); |
221 | 55.4k | return NULL; |
222 | 0 | } |
223 | | |
224 | | void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir) |
225 | 55.4k | { |
226 | 55.4k | strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir); |
227 | 55.4k | } |
228 | | |
229 | | void get_midx_chain_filename(struct strbuf *buf, const char *object_dir) |
230 | 55.4k | { |
231 | 55.4k | get_midx_chain_dirname(buf, object_dir); |
232 | 55.4k | strbuf_addstr(buf, "/multi-pack-index-chain"); |
233 | 55.4k | } |
234 | | |
235 | | void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir, |
236 | | const unsigned char *hash, const char *ext) |
237 | 0 | { |
238 | 0 | get_midx_chain_dirname(buf, object_dir); |
239 | 0 | strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext); |
240 | 0 | } |
241 | | |
242 | | static int open_multi_pack_index_chain(const char *chain_file, |
243 | | int *fd, struct stat *st) |
244 | 55.4k | { |
245 | 55.4k | *fd = git_open(chain_file); |
246 | 55.4k | if (*fd < 0) |
247 | 55.4k | return 0; |
248 | 0 | if (fstat(*fd, st)) { |
249 | 0 | close(*fd); |
250 | 0 | return 0; |
251 | 0 | } |
252 | 0 | if (st->st_size < the_hash_algo->hexsz) { |
253 | 0 | close(*fd); |
254 | 0 | if (!st->st_size) { |
255 | | /* treat empty files the same as missing */ |
256 | 0 | errno = ENOENT; |
257 | 0 | } else { |
258 | 0 | warning(_("multi-pack-index chain file too small")); |
259 | 0 | errno = EINVAL; |
260 | 0 | } |
261 | 0 | return 0; |
262 | 0 | } |
263 | 0 | return 1; |
264 | 0 | } |
265 | | |
266 | | static int add_midx_to_chain(struct multi_pack_index *midx, |
267 | | struct multi_pack_index *midx_chain) |
268 | 0 | { |
269 | 0 | if (midx_chain) { |
270 | 0 | if (unsigned_add_overflows(midx_chain->num_packs, |
271 | 0 | midx_chain->num_packs_in_base)) { |
272 | 0 | warning(_("pack count in base MIDX too high: %"PRIuMAX), |
273 | 0 | (uintmax_t)midx_chain->num_packs_in_base); |
274 | 0 | return 0; |
275 | 0 | } |
276 | 0 | if (unsigned_add_overflows(midx_chain->num_objects, |
277 | 0 | midx_chain->num_objects_in_base)) { |
278 | 0 | warning(_("object count in base MIDX too high: %"PRIuMAX), |
279 | 0 | (uintmax_t)midx_chain->num_objects_in_base); |
280 | 0 | return 0; |
281 | 0 | } |
282 | 0 | midx->num_packs_in_base = midx_chain->num_packs + |
283 | 0 | midx_chain->num_packs_in_base; |
284 | 0 | midx->num_objects_in_base = midx_chain->num_objects + |
285 | 0 | midx_chain->num_objects_in_base; |
286 | 0 | } |
287 | | |
288 | 0 | midx->base_midx = midx_chain; |
289 | 0 | midx->has_chain = 1; |
290 | |
|
291 | 0 | return 1; |
292 | 0 | } |
293 | | |
294 | | static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir, |
295 | | int local, |
296 | | int fd, struct stat *st, |
297 | | int *incomplete_chain) |
298 | 0 | { |
299 | 0 | struct multi_pack_index *midx_chain = NULL; |
300 | 0 | struct strbuf buf = STRBUF_INIT; |
301 | 0 | int valid = 1; |
302 | 0 | uint32_t i, count; |
303 | 0 | FILE *fp = xfdopen(fd, "r"); |
304 | |
|
305 | 0 | count = st->st_size / (the_hash_algo->hexsz + 1); |
306 | |
|
307 | 0 | for (i = 0; i < count; i++) { |
308 | 0 | struct multi_pack_index *m; |
309 | 0 | struct object_id layer; |
310 | |
|
311 | 0 | if (strbuf_getline_lf(&buf, fp) == EOF) |
312 | 0 | break; |
313 | | |
314 | 0 | if (get_oid_hex(buf.buf, &layer)) { |
315 | 0 | warning(_("invalid multi-pack-index chain: line '%s' " |
316 | 0 | "not a hash"), |
317 | 0 | buf.buf); |
318 | 0 | valid = 0; |
319 | 0 | break; |
320 | 0 | } |
321 | | |
322 | 0 | valid = 0; |
323 | |
|
324 | 0 | strbuf_reset(&buf); |
325 | 0 | get_split_midx_filename_ext(&buf, object_dir, layer.hash, |
326 | 0 | MIDX_EXT_MIDX); |
327 | 0 | m = load_multi_pack_index_one(object_dir, buf.buf, local); |
328 | |
|
329 | 0 | if (m) { |
330 | 0 | if (add_midx_to_chain(m, midx_chain)) { |
331 | 0 | midx_chain = m; |
332 | 0 | valid = 1; |
333 | 0 | } else { |
334 | 0 | close_midx(m); |
335 | 0 | } |
336 | 0 | } |
337 | 0 | if (!valid) { |
338 | 0 | warning(_("unable to find all multi-pack index files")); |
339 | 0 | break; |
340 | 0 | } |
341 | 0 | } |
342 | |
|
343 | 0 | fclose(fp); |
344 | 0 | strbuf_release(&buf); |
345 | |
|
346 | 0 | *incomplete_chain = !valid; |
347 | 0 | return midx_chain; |
348 | 0 | } |
349 | | |
350 | | static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir, |
351 | | int local) |
352 | 55.4k | { |
353 | 55.4k | struct strbuf chain_file = STRBUF_INIT; |
354 | 55.4k | struct stat st; |
355 | 55.4k | int fd; |
356 | 55.4k | struct multi_pack_index *m = NULL; |
357 | | |
358 | 55.4k | get_midx_chain_filename(&chain_file, object_dir); |
359 | 55.4k | if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) { |
360 | 0 | int incomplete; |
361 | | /* ownership of fd is taken over by load function */ |
362 | 0 | m = load_midx_chain_fd_st(object_dir, local, fd, &st, |
363 | 0 | &incomplete); |
364 | 0 | } |
365 | | |
366 | 55.4k | strbuf_release(&chain_file); |
367 | 55.4k | return m; |
368 | 55.4k | } |
369 | | |
370 | | struct multi_pack_index *load_multi_pack_index(const char *object_dir, |
371 | | int local) |
372 | 55.4k | { |
373 | 55.4k | struct strbuf midx_name = STRBUF_INIT; |
374 | 55.4k | struct multi_pack_index *m; |
375 | | |
376 | 55.4k | get_midx_filename(&midx_name, object_dir); |
377 | | |
378 | 55.4k | m = load_multi_pack_index_one(object_dir, midx_name.buf, local); |
379 | 55.4k | if (!m) |
380 | 55.4k | m = load_multi_pack_index_chain(object_dir, local); |
381 | | |
382 | 55.4k | strbuf_release(&midx_name); |
383 | | |
384 | 55.4k | return m; |
385 | 55.4k | } |
386 | | |
387 | | void close_midx(struct multi_pack_index *m) |
388 | 0 | { |
389 | 0 | uint32_t i; |
390 | |
|
391 | 0 | if (!m) |
392 | 0 | return; |
393 | | |
394 | 0 | close_midx(m->next); |
395 | 0 | close_midx(m->base_midx); |
396 | |
|
397 | 0 | munmap((unsigned char *)m->data, m->data_len); |
398 | |
|
399 | 0 | for (i = 0; i < m->num_packs; i++) { |
400 | 0 | if (m->packs[i]) |
401 | 0 | m->packs[i]->multi_pack_index = 0; |
402 | 0 | } |
403 | 0 | FREE_AND_NULL(m->packs); |
404 | 0 | FREE_AND_NULL(m->pack_names); |
405 | 0 | free(m); |
406 | 0 | } |
407 | | |
408 | | static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos) |
409 | 0 | { |
410 | 0 | struct multi_pack_index *m = *_m; |
411 | 0 | while (m && pos < m->num_objects_in_base) |
412 | 0 | m = m->base_midx; |
413 | |
|
414 | 0 | if (!m) |
415 | 0 | BUG("NULL multi-pack-index for object position: %"PRIu32, pos); |
416 | | |
417 | 0 | if (pos >= m->num_objects + m->num_objects_in_base) |
418 | 0 | die(_("invalid MIDX object position, MIDX is likely corrupt")); |
419 | | |
420 | 0 | *_m = m; |
421 | |
|
422 | 0 | return pos - m->num_objects_in_base; |
423 | 0 | } |
424 | | |
425 | | static uint32_t midx_for_pack(struct multi_pack_index **_m, |
426 | | uint32_t pack_int_id) |
427 | 0 | { |
428 | 0 | struct multi_pack_index *m = *_m; |
429 | 0 | while (m && pack_int_id < m->num_packs_in_base) |
430 | 0 | m = m->base_midx; |
431 | |
|
432 | 0 | if (!m) |
433 | 0 | BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id); |
434 | | |
435 | 0 | if (pack_int_id >= m->num_packs + m->num_packs_in_base) |
436 | 0 | die(_("bad pack-int-id: %u (%u total packs)"), |
437 | 0 | pack_int_id, m->num_packs + m->num_packs_in_base); |
438 | | |
439 | 0 | *_m = m; |
440 | |
|
441 | 0 | return pack_int_id - m->num_packs_in_base; |
442 | 0 | } |
443 | | |
444 | | int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, |
445 | | uint32_t pack_int_id) |
446 | 0 | { |
447 | 0 | struct strbuf pack_name = STRBUF_INIT; |
448 | 0 | struct packed_git *p; |
449 | |
|
450 | 0 | pack_int_id = midx_for_pack(&m, pack_int_id); |
451 | |
|
452 | 0 | if (m->packs[pack_int_id]) |
453 | 0 | return 0; |
454 | | |
455 | 0 | strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, |
456 | 0 | m->pack_names[pack_int_id]); |
457 | |
|
458 | 0 | p = add_packed_git(pack_name.buf, pack_name.len, m->local); |
459 | 0 | strbuf_release(&pack_name); |
460 | |
|
461 | 0 | if (!p) |
462 | 0 | return 1; |
463 | | |
464 | 0 | p->multi_pack_index = 1; |
465 | 0 | m->packs[pack_int_id] = p; |
466 | 0 | install_packed_git(r, p); |
467 | 0 | list_add_tail(&p->mru, &r->objects->packed_git_mru); |
468 | |
|
469 | 0 | return 0; |
470 | 0 | } |
471 | | |
472 | | struct packed_git *nth_midxed_pack(struct multi_pack_index *m, |
473 | | uint32_t pack_int_id) |
474 | 0 | { |
475 | 0 | uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); |
476 | 0 | return m->packs[local_pack_int_id]; |
477 | 0 | } |
478 | | |
479 | 0 | #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t)) |
480 | | |
481 | | int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, |
482 | | struct bitmapped_pack *bp, uint32_t pack_int_id) |
483 | 0 | { |
484 | 0 | uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); |
485 | |
|
486 | 0 | if (!m->chunk_bitmapped_packs) |
487 | 0 | return error(_("MIDX does not contain the BTMP chunk")); |
488 | | |
489 | 0 | if (prepare_midx_pack(r, m, pack_int_id)) |
490 | 0 | return error(_("could not load bitmapped pack %"PRIu32), pack_int_id); |
491 | | |
492 | 0 | bp->p = m->packs[local_pack_int_id]; |
493 | 0 | bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs + |
494 | 0 | MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id); |
495 | 0 | bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs + |
496 | 0 | MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id + |
497 | 0 | sizeof(uint32_t)); |
498 | 0 | bp->pack_int_id = pack_int_id; |
499 | 0 | bp->from_midx = m; |
500 | |
|
501 | 0 | return 0; |
502 | 0 | } |
503 | | |
504 | | int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m, |
505 | | uint32_t *result) |
506 | 0 | { |
507 | 0 | int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout, |
508 | 0 | m->chunk_oid_lookup, the_hash_algo->rawsz, |
509 | 0 | result); |
510 | 0 | if (result) |
511 | 0 | *result += m->num_objects_in_base; |
512 | 0 | return ret; |
513 | 0 | } |
514 | | |
515 | | int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, |
516 | | uint32_t *result) |
517 | 0 | { |
518 | 0 | for (; m; m = m->base_midx) |
519 | 0 | if (bsearch_one_midx(oid, m, result)) |
520 | 0 | return 1; |
521 | 0 | return 0; |
522 | 0 | } |
523 | | |
524 | | int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid) |
525 | 0 | { |
526 | 0 | return bsearch_midx(oid, m, NULL); |
527 | 0 | } |
528 | | |
529 | | struct object_id *nth_midxed_object_oid(struct object_id *oid, |
530 | | struct multi_pack_index *m, |
531 | | uint32_t n) |
532 | 0 | { |
533 | 0 | if (n >= m->num_objects + m->num_objects_in_base) |
534 | 0 | return NULL; |
535 | | |
536 | 0 | n = midx_for_object(&m, n); |
537 | |
|
538 | 0 | oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n), |
539 | 0 | the_repository->hash_algo); |
540 | 0 | return oid; |
541 | 0 | } |
542 | | |
543 | | off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) |
544 | 0 | { |
545 | 0 | const unsigned char *offset_data; |
546 | 0 | uint32_t offset32; |
547 | |
|
548 | 0 | pos = midx_for_object(&m, pos); |
549 | |
|
550 | 0 | offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH; |
551 | 0 | offset32 = get_be32(offset_data + sizeof(uint32_t)); |
552 | |
|
553 | 0 | if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { |
554 | 0 | if (sizeof(off_t) < sizeof(uint64_t)) |
555 | 0 | die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); |
556 | | |
557 | 0 | offset32 ^= MIDX_LARGE_OFFSET_NEEDED; |
558 | 0 | if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t)) |
559 | 0 | die(_("multi-pack-index large offset out of bounds")); |
560 | 0 | return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); |
561 | 0 | } |
562 | | |
563 | 0 | return offset32; |
564 | 0 | } |
565 | | |
566 | | uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) |
567 | 0 | { |
568 | 0 | pos = midx_for_object(&m, pos); |
569 | |
|
570 | 0 | return m->num_packs_in_base + get_be32(m->chunk_object_offsets + |
571 | 0 | (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); |
572 | 0 | } |
573 | | |
574 | | int fill_midx_entry(struct repository *r, |
575 | | const struct object_id *oid, |
576 | | struct pack_entry *e, |
577 | | struct multi_pack_index *m) |
578 | 0 | { |
579 | 0 | uint32_t pos; |
580 | 0 | uint32_t pack_int_id; |
581 | 0 | struct packed_git *p; |
582 | |
|
583 | 0 | if (!bsearch_midx(oid, m, &pos)) |
584 | 0 | return 0; |
585 | | |
586 | 0 | midx_for_object(&m, pos); |
587 | 0 | pack_int_id = nth_midxed_pack_int_id(m, pos); |
588 | |
|
589 | 0 | if (prepare_midx_pack(r, m, pack_int_id)) |
590 | 0 | return 0; |
591 | 0 | p = m->packs[pack_int_id - m->num_packs_in_base]; |
592 | | |
593 | | /* |
594 | | * We are about to tell the caller where they can locate the |
595 | | * requested object. We better make sure the packfile is |
596 | | * still here and can be accessed before supplying that |
597 | | * answer, as it may have been deleted since the MIDX was |
598 | | * loaded! |
599 | | */ |
600 | 0 | if (!is_pack_valid(p)) |
601 | 0 | return 0; |
602 | | |
603 | 0 | if (oidset_size(&p->bad_objects) && |
604 | 0 | oidset_contains(&p->bad_objects, oid)) |
605 | 0 | return 0; |
606 | | |
607 | 0 | e->offset = nth_midxed_offset(m, pos); |
608 | 0 | e->p = p; |
609 | |
|
610 | 0 | return 1; |
611 | 0 | } |
612 | | |
613 | | /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */ |
614 | | int cmp_idx_or_pack_name(const char *idx_or_pack_name, |
615 | | const char *idx_name) |
616 | 0 | { |
617 | | /* Skip past any initial matching prefix. */ |
618 | 0 | while (*idx_name && *idx_name == *idx_or_pack_name) { |
619 | 0 | idx_name++; |
620 | 0 | idx_or_pack_name++; |
621 | 0 | } |
622 | | |
623 | | /* |
624 | | * If we didn't match completely, we may have matched "pack-1234." and |
625 | | * be left with "idx" and "pack" respectively, which is also OK. We do |
626 | | * not have to check for "idx" and "idx", because that would have been |
627 | | * a complete match (and in that case these strcmps will be false, but |
628 | | * we'll correctly return 0 from the final strcmp() below. |
629 | | * |
630 | | * Technically this matches "fooidx" and "foopack", but we'd never have |
631 | | * such names in the first place. |
632 | | */ |
633 | 0 | if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack")) |
634 | 0 | return 0; |
635 | | |
636 | | /* |
637 | | * This not only checks for a complete match, but also orders based on |
638 | | * the first non-identical character, which means our ordering will |
639 | | * match a raw strcmp(). That makes it OK to use this to binary search |
640 | | * a naively-sorted list. |
641 | | */ |
642 | 0 | return strcmp(idx_or_pack_name, idx_name); |
643 | 0 | } |
644 | | |
645 | | static int midx_contains_pack_1(struct multi_pack_index *m, |
646 | | const char *idx_or_pack_name) |
647 | 0 | { |
648 | 0 | uint32_t first = 0, last = m->num_packs; |
649 | |
|
650 | 0 | while (first < last) { |
651 | 0 | uint32_t mid = first + (last - first) / 2; |
652 | 0 | const char *current; |
653 | 0 | int cmp; |
654 | |
|
655 | 0 | current = m->pack_names[mid]; |
656 | 0 | cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); |
657 | 0 | if (!cmp) |
658 | 0 | return 1; |
659 | 0 | if (cmp > 0) { |
660 | 0 | first = mid + 1; |
661 | 0 | continue; |
662 | 0 | } |
663 | 0 | last = mid; |
664 | 0 | } |
665 | | |
666 | 0 | return 0; |
667 | 0 | } |
668 | | |
669 | | int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) |
670 | 0 | { |
671 | 0 | for (; m; m = m->base_midx) |
672 | 0 | if (midx_contains_pack_1(m, idx_or_pack_name)) |
673 | 0 | return 1; |
674 | 0 | return 0; |
675 | 0 | } |
676 | | |
677 | | int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) |
678 | 0 | { |
679 | 0 | if (m->preferred_pack_idx == -1) { |
680 | 0 | uint32_t midx_pos; |
681 | 0 | if (load_midx_revindex(m) < 0) { |
682 | 0 | m->preferred_pack_idx = -2; |
683 | 0 | return -1; |
684 | 0 | } |
685 | | |
686 | 0 | midx_pos = pack_pos_to_midx(m, m->num_objects_in_base); |
687 | |
|
688 | 0 | m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos); |
689 | |
|
690 | 0 | } else if (m->preferred_pack_idx == -2) |
691 | 0 | return -1; /* no revindex */ |
692 | | |
693 | 0 | *pack_int_id = m->preferred_pack_idx; |
694 | 0 | return 0; |
695 | 0 | } |
696 | | |
697 | | int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) |
698 | 55.4k | { |
699 | 55.4k | struct multi_pack_index *m; |
700 | 55.4k | struct multi_pack_index *m_search; |
701 | | |
702 | 55.4k | prepare_repo_settings(r); |
703 | 55.4k | if (!r->settings.core_multi_pack_index) |
704 | 0 | return 0; |
705 | | |
706 | 55.4k | for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next) |
707 | 0 | if (!strcmp(object_dir, m_search->object_dir)) |
708 | 0 | return 1; |
709 | | |
710 | 55.4k | m = load_multi_pack_index(object_dir, local); |
711 | | |
712 | 55.4k | if (m) { |
713 | 0 | struct multi_pack_index *mp = r->objects->multi_pack_index; |
714 | 0 | if (mp) { |
715 | 0 | m->next = mp->next; |
716 | 0 | mp->next = m; |
717 | 0 | } else |
718 | 0 | r->objects->multi_pack_index = m; |
719 | 0 | return 1; |
720 | 0 | } |
721 | | |
722 | 55.4k | return 0; |
723 | 55.4k | } |
724 | | |
725 | | int midx_checksum_valid(struct multi_pack_index *m) |
726 | 0 | { |
727 | 0 | return hashfile_checksum_valid(m->data, m->data_len); |
728 | 0 | } |
729 | | |
730 | | struct clear_midx_data { |
731 | | char **keep; |
732 | | uint32_t keep_nr; |
733 | | const char *ext; |
734 | | }; |
735 | | |
736 | | static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED, |
737 | | const char *file_name, void *_data) |
738 | 0 | { |
739 | 0 | struct clear_midx_data *data = _data; |
740 | 0 | uint32_t i; |
741 | |
|
742 | 0 | if (!(starts_with(file_name, "multi-pack-index-") && |
743 | 0 | ends_with(file_name, data->ext))) |
744 | 0 | return; |
745 | 0 | for (i = 0; i < data->keep_nr; i++) { |
746 | 0 | if (!strcmp(data->keep[i], file_name)) |
747 | 0 | return; |
748 | 0 | } |
749 | 0 | if (unlink(full_path)) |
750 | 0 | die_errno(_("failed to remove %s"), full_path); |
751 | 0 | } |
752 | | |
753 | | void clear_midx_files_ext(const char *object_dir, const char *ext, |
754 | | const char *keep_hash) |
755 | 0 | { |
756 | 0 | struct clear_midx_data data; |
757 | 0 | memset(&data, 0, sizeof(struct clear_midx_data)); |
758 | |
|
759 | 0 | if (keep_hash) { |
760 | 0 | ALLOC_ARRAY(data.keep, 1); |
761 | |
|
762 | 0 | data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); |
763 | 0 | data.keep_nr = 1; |
764 | 0 | } |
765 | 0 | data.ext = ext; |
766 | |
|
767 | 0 | for_each_file_in_pack_dir(object_dir, |
768 | 0 | clear_midx_file_ext, |
769 | 0 | &data); |
770 | |
|
771 | 0 | if (keep_hash) |
772 | 0 | free(data.keep[0]); |
773 | 0 | free(data.keep); |
774 | 0 | } |
775 | | |
776 | | void clear_incremental_midx_files_ext(const char *object_dir, const char *ext, |
777 | | char **keep_hashes, |
778 | | uint32_t hashes_nr) |
779 | 0 | { |
780 | 0 | struct clear_midx_data data; |
781 | 0 | uint32_t i; |
782 | |
|
783 | 0 | memset(&data, 0, sizeof(struct clear_midx_data)); |
784 | |
|
785 | 0 | ALLOC_ARRAY(data.keep, hashes_nr); |
786 | 0 | for (i = 0; i < hashes_nr; i++) |
787 | 0 | data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], |
788 | 0 | ext); |
789 | 0 | data.keep_nr = hashes_nr; |
790 | 0 | data.ext = ext; |
791 | |
|
792 | 0 | for_each_file_in_pack_subdir(object_dir, "multi-pack-index.d", |
793 | 0 | clear_midx_file_ext, &data); |
794 | |
|
795 | 0 | for (i = 0; i < hashes_nr; i++) |
796 | 0 | free(data.keep[i]); |
797 | 0 | free(data.keep); |
798 | 0 | } |
799 | | |
800 | | void clear_midx_file(struct repository *r) |
801 | 0 | { |
802 | 0 | struct strbuf midx = STRBUF_INIT; |
803 | |
|
804 | 0 | get_midx_filename(&midx, r->objects->odb->path); |
805 | |
|
806 | 0 | if (r->objects && r->objects->multi_pack_index) { |
807 | 0 | close_midx(r->objects->multi_pack_index); |
808 | 0 | r->objects->multi_pack_index = NULL; |
809 | 0 | } |
810 | |
|
811 | 0 | if (remove_path(midx.buf)) |
812 | 0 | die(_("failed to clear multi-pack-index at %s"), midx.buf); |
813 | | |
814 | 0 | clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_BITMAP, NULL); |
815 | 0 | clear_midx_files_ext(r->objects->odb->path, MIDX_EXT_REV, NULL); |
816 | |
|
817 | 0 | strbuf_release(&midx); |
818 | 0 | } |
819 | | |
820 | | static int verify_midx_error; |
821 | | |
822 | | __attribute__((format (printf, 1, 2))) |
823 | | static void midx_report(const char *fmt, ...) |
824 | 0 | { |
825 | 0 | va_list ap; |
826 | 0 | verify_midx_error = 1; |
827 | 0 | va_start(ap, fmt); |
828 | 0 | vfprintf(stderr, fmt, ap); |
829 | 0 | fprintf(stderr, "\n"); |
830 | 0 | va_end(ap); |
831 | 0 | } |
832 | | |
833 | | struct pair_pos_vs_id |
834 | | { |
835 | | uint32_t pos; |
836 | | uint32_t pack_int_id; |
837 | | }; |
838 | | |
839 | | static int compare_pair_pos_vs_id(const void *_a, const void *_b) |
840 | 0 | { |
841 | 0 | struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; |
842 | 0 | struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; |
843 | |
|
844 | 0 | return b->pack_int_id - a->pack_int_id; |
845 | 0 | } |
846 | | |
847 | | /* |
848 | | * Limit calls to display_progress() for performance reasons. |
849 | | * The interval here was arbitrarily chosen. |
850 | | */ |
851 | 0 | #define SPARSE_PROGRESS_INTERVAL (1 << 12) |
852 | | #define midx_display_sparse_progress(progress, n) \ |
853 | 0 | do { \ |
854 | 0 | uint64_t _n = (n); \ |
855 | 0 | if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ |
856 | 0 | display_progress(progress, _n); \ |
857 | 0 | } while (0) |
858 | | |
859 | | int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags) |
860 | 0 | { |
861 | 0 | struct pair_pos_vs_id *pairs = NULL; |
862 | 0 | uint32_t i; |
863 | 0 | struct progress *progress = NULL; |
864 | 0 | struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); |
865 | 0 | struct multi_pack_index *curr; |
866 | 0 | verify_midx_error = 0; |
867 | |
|
868 | 0 | if (!m) { |
869 | 0 | int result = 0; |
870 | 0 | struct stat sb; |
871 | 0 | struct strbuf filename = STRBUF_INIT; |
872 | |
|
873 | 0 | get_midx_filename(&filename, object_dir); |
874 | |
|
875 | 0 | if (!stat(filename.buf, &sb)) { |
876 | 0 | error(_("multi-pack-index file exists, but failed to parse")); |
877 | 0 | result = 1; |
878 | 0 | } |
879 | 0 | strbuf_release(&filename); |
880 | 0 | return result; |
881 | 0 | } |
882 | | |
883 | 0 | if (!midx_checksum_valid(m)) |
884 | 0 | midx_report(_("incorrect checksum")); |
885 | |
|
886 | 0 | if (flags & MIDX_PROGRESS) |
887 | 0 | progress = start_delayed_progress(_("Looking for referenced packfiles"), |
888 | 0 | m->num_packs + m->num_packs_in_base); |
889 | 0 | for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { |
890 | 0 | if (prepare_midx_pack(r, m, i)) |
891 | 0 | midx_report("failed to load pack in position %d", i); |
892 | |
|
893 | 0 | display_progress(progress, i + 1); |
894 | 0 | } |
895 | 0 | stop_progress(&progress); |
896 | |
|
897 | 0 | if (m->num_objects == 0) { |
898 | 0 | midx_report(_("the midx contains no oid")); |
899 | | /* |
900 | | * Remaining tests assume that we have objects, so we can |
901 | | * return here. |
902 | | */ |
903 | 0 | goto cleanup; |
904 | 0 | } |
905 | | |
906 | 0 | if (flags & MIDX_PROGRESS) |
907 | 0 | progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"), |
908 | 0 | m->num_objects - 1); |
909 | |
|
910 | 0 | for (curr = m; curr; curr = curr->base_midx) { |
911 | 0 | for (i = 0; i < m->num_objects - 1; i++) { |
912 | 0 | struct object_id oid1, oid2; |
913 | |
|
914 | 0 | nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i); |
915 | 0 | nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1); |
916 | |
|
917 | 0 | if (oidcmp(&oid1, &oid2) >= 0) |
918 | 0 | midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), |
919 | 0 | i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); |
920 | |
|
921 | 0 | midx_display_sparse_progress(progress, i + 1); |
922 | 0 | } |
923 | 0 | } |
924 | 0 | stop_progress(&progress); |
925 | | |
926 | | /* |
927 | | * Create an array mapping each object to its packfile id. Sort it |
928 | | * to group the objects by packfile. Use this permutation to visit |
929 | | * each of the objects and only require 1 packfile to be open at a |
930 | | * time. |
931 | | */ |
932 | 0 | ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base); |
933 | 0 | for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { |
934 | 0 | pairs[i].pos = i; |
935 | 0 | pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); |
936 | 0 | } |
937 | |
|
938 | 0 | if (flags & MIDX_PROGRESS) |
939 | 0 | progress = start_sparse_progress(_("Sorting objects by packfile"), |
940 | 0 | m->num_objects); |
941 | 0 | display_progress(progress, 0); /* TODO: Measure QSORT() progress */ |
942 | 0 | QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); |
943 | 0 | stop_progress(&progress); |
944 | |
|
945 | 0 | if (flags & MIDX_PROGRESS) |
946 | 0 | progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); |
947 | 0 | for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { |
948 | 0 | struct object_id oid; |
949 | 0 | struct pack_entry e; |
950 | 0 | off_t m_offset, p_offset; |
951 | |
|
952 | 0 | if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && |
953 | 0 | nth_midxed_pack(m, pairs[i-1].pack_int_id)) { |
954 | 0 | uint32_t pack_int_id = pairs[i-1].pack_int_id; |
955 | 0 | struct packed_git *p = nth_midxed_pack(m, pack_int_id); |
956 | |
|
957 | 0 | close_pack_fd(p); |
958 | 0 | close_pack_index(p); |
959 | 0 | } |
960 | |
|
961 | 0 | nth_midxed_object_oid(&oid, m, pairs[i].pos); |
962 | |
|
963 | 0 | if (!fill_midx_entry(r, &oid, &e, m)) { |
964 | 0 | midx_report(_("failed to load pack entry for oid[%d] = %s"), |
965 | 0 | pairs[i].pos, oid_to_hex(&oid)); |
966 | 0 | continue; |
967 | 0 | } |
968 | | |
969 | 0 | if (open_pack_index(e.p)) { |
970 | 0 | midx_report(_("failed to load pack-index for packfile %s"), |
971 | 0 | e.p->pack_name); |
972 | 0 | break; |
973 | 0 | } |
974 | | |
975 | 0 | m_offset = e.offset; |
976 | 0 | p_offset = find_pack_entry_one(oid.hash, e.p); |
977 | |
|
978 | 0 | if (m_offset != p_offset) |
979 | 0 | midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), |
980 | 0 | pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); |
981 | |
|
982 | 0 | midx_display_sparse_progress(progress, i + 1); |
983 | 0 | } |
984 | 0 | stop_progress(&progress); |
985 | |
|
986 | 0 | cleanup: |
987 | 0 | free(pairs); |
988 | 0 | close_midx(m); |
989 | |
|
990 | 0 | return verify_midx_error; |
991 | 0 | } |