Line | Count | Source |
1 | | #define DISABLE_SIGN_COMPARE_WARNINGS |
2 | | |
3 | | #include "git-compat-util.h" |
4 | | #include "config.h" |
5 | | #include "dir.h" |
6 | | #include "hex.h" |
7 | | #include "packfile.h" |
8 | | #include "hash-lookup.h" |
9 | | #include "midx.h" |
10 | | #include "progress.h" |
11 | | #include "trace2.h" |
12 | | #include "chunk-format.h" |
13 | | #include "pack-bitmap.h" |
14 | | #include "pack-revindex.h" |
15 | | |
16 | 0 | #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) |
17 | | |
18 | | int midx_checksum_valid(struct multi_pack_index *m); |
19 | | void clear_midx_files_ext(struct odb_source *source, const char *ext, |
20 | | const char *keep_hash); |
21 | | void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, |
22 | | char **keep_hashes, |
23 | | uint32_t hashes_nr); |
24 | | int cmp_idx_or_pack_name(const char *idx_or_pack_name, |
25 | | const char *idx_name); |
26 | | |
27 | | const unsigned char *get_midx_checksum(struct multi_pack_index *m) |
28 | 0 | { |
29 | 0 | return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; |
30 | 0 | } |
31 | | |
32 | | void get_midx_filename(struct odb_source *source, struct strbuf *out) |
33 | 0 | { |
34 | 0 | get_midx_filename_ext(source, out, NULL, NULL); |
35 | 0 | } |
36 | | |
37 | | void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, |
38 | | const unsigned char *hash, const char *ext) |
39 | 0 | { |
40 | 0 | strbuf_addf(out, "%s/pack/multi-pack-index", source->path); |
41 | 0 | if (ext) |
42 | 0 | strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext); |
43 | 0 | } |
44 | | |
45 | | static int midx_read_oid_fanout(const unsigned char *chunk_start, |
46 | | size_t chunk_size, void *data) |
47 | 0 | { |
48 | 0 | int i; |
49 | 0 | struct multi_pack_index *m = data; |
50 | 0 | m->chunk_oid_fanout = (uint32_t *)chunk_start; |
51 | |
|
52 | 0 | if (chunk_size != 4 * 256) { |
53 | 0 | error(_("multi-pack-index OID fanout is of the wrong size")); |
54 | 0 | return 1; |
55 | 0 | } |
56 | 0 | for (i = 0; i < 255; i++) { |
57 | 0 | uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); |
58 | 0 | uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]); |
59 | |
|
60 | 0 | if (oid_fanout1 > oid_fanout2) { |
61 | 0 | error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"), |
62 | 0 | i, oid_fanout1, oid_fanout2, i + 1); |
63 | 0 | return 1; |
64 | 0 | } |
65 | 0 | } |
66 | 0 | m->num_objects = ntohl(m->chunk_oid_fanout[255]); |
67 | 0 | return 0; |
68 | 0 | } |
69 | | |
70 | | static int midx_read_oid_lookup(const unsigned char *chunk_start, |
71 | | size_t chunk_size, void *data) |
72 | 0 | { |
73 | 0 | struct multi_pack_index *m = data; |
74 | 0 | m->chunk_oid_lookup = chunk_start; |
75 | |
|
76 | 0 | if (chunk_size != st_mult(m->hash_len, m->num_objects)) { |
77 | 0 | error(_("multi-pack-index OID lookup chunk is the wrong size")); |
78 | 0 | return 1; |
79 | 0 | } |
80 | 0 | return 0; |
81 | 0 | } |
82 | | |
83 | | static int midx_read_object_offsets(const unsigned char *chunk_start, |
84 | | size_t chunk_size, void *data) |
85 | 0 | { |
86 | 0 | struct multi_pack_index *m = data; |
87 | 0 | m->chunk_object_offsets = chunk_start; |
88 | |
|
89 | 0 | if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) { |
90 | 0 | error(_("multi-pack-index object offset chunk is the wrong size")); |
91 | 0 | return 1; |
92 | 0 | } |
93 | 0 | return 0; |
94 | 0 | } |
95 | | |
96 | | struct multi_pack_index *get_multi_pack_index(struct odb_source *source) |
97 | 0 | { |
98 | 0 | packfile_store_prepare(source->odb->packfiles); |
99 | 0 | return source->midx; |
100 | 0 | } |
101 | | |
102 | | static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, |
103 | | const char *midx_name) |
104 | 0 | { |
105 | 0 | struct repository *r = source->odb->repo; |
106 | 0 | struct multi_pack_index *m = NULL; |
107 | 0 | int fd; |
108 | 0 | struct stat st; |
109 | 0 | size_t midx_size; |
110 | 0 | void *midx_map = NULL; |
111 | 0 | uint32_t hash_version; |
112 | 0 | uint32_t i; |
113 | 0 | const char *cur_pack_name; |
114 | 0 | struct chunkfile *cf = NULL; |
115 | |
|
116 | 0 | fd = git_open(midx_name); |
117 | |
|
118 | 0 | if (fd < 0) |
119 | 0 | goto cleanup_fail; |
120 | 0 | if (fstat(fd, &st)) { |
121 | 0 | error_errno(_("failed to read %s"), midx_name); |
122 | 0 | goto cleanup_fail; |
123 | 0 | } |
124 | | |
125 | 0 | midx_size = xsize_t(st.st_size); |
126 | |
|
127 | 0 | if (midx_size < (MIDX_HEADER_SIZE + r->hash_algo->rawsz)) { |
128 | 0 | error(_("multi-pack-index file %s is too small"), midx_name); |
129 | 0 | goto cleanup_fail; |
130 | 0 | } |
131 | | |
132 | 0 | midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); |
133 | 0 | close(fd); |
134 | |
|
135 | 0 | CALLOC_ARRAY(m, 1); |
136 | 0 | m->data = midx_map; |
137 | 0 | m->data_len = midx_size; |
138 | 0 | m->source = source; |
139 | |
|
140 | 0 | m->signature = get_be32(m->data); |
141 | 0 | if (m->signature != MIDX_SIGNATURE) |
142 | 0 | die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), |
143 | 0 | m->signature, MIDX_SIGNATURE); |
144 | | |
145 | 0 | m->version = m->data[MIDX_BYTE_FILE_VERSION]; |
146 | 0 | if (m->version != MIDX_VERSION) |
147 | 0 | die(_("multi-pack-index version %d not recognized"), |
148 | 0 | m->version); |
149 | | |
150 | 0 | hash_version = m->data[MIDX_BYTE_HASH_VERSION]; |
151 | 0 | if (hash_version != oid_version(r->hash_algo)) { |
152 | 0 | error(_("multi-pack-index hash version %u does not match version %u"), |
153 | 0 | hash_version, oid_version(r->hash_algo)); |
154 | 0 | goto cleanup_fail; |
155 | 0 | } |
156 | 0 | m->hash_len = r->hash_algo->rawsz; |
157 | |
|
158 | 0 | m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; |
159 | |
|
160 | 0 | m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); |
161 | |
|
162 | 0 | m->preferred_pack_idx = -1; |
163 | |
|
164 | 0 | cf = init_chunkfile(NULL); |
165 | |
|
166 | 0 | if (read_table_of_contents(cf, m->data, midx_size, |
167 | 0 | MIDX_HEADER_SIZE, m->num_chunks, |
168 | 0 | MIDX_CHUNK_ALIGNMENT)) |
169 | 0 | goto cleanup_fail; |
170 | | |
171 | 0 | if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) |
172 | 0 | die(_("multi-pack-index required pack-name chunk missing or corrupted")); |
173 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) |
174 | 0 | die(_("multi-pack-index required OID fanout chunk missing or corrupted")); |
175 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m)) |
176 | 0 | die(_("multi-pack-index required OID lookup chunk missing or corrupted")); |
177 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) |
178 | 0 | die(_("multi-pack-index required object offsets chunk missing or corrupted")); |
179 | | |
180 | 0 | pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, |
181 | 0 | &m->chunk_large_offsets_len); |
182 | 0 | if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1)) |
183 | 0 | pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, |
184 | 0 | (const unsigned char **)&m->chunk_bitmapped_packs, |
185 | 0 | &m->chunk_bitmapped_packs_len); |
186 | |
|
187 | 0 | if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) |
188 | 0 | pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, |
189 | 0 | &m->chunk_revindex_len); |
190 | |
|
191 | 0 | CALLOC_ARRAY(m->pack_names, m->num_packs); |
192 | 0 | CALLOC_ARRAY(m->packs, m->num_packs); |
193 | |
|
194 | 0 | cur_pack_name = (const char *)m->chunk_pack_names; |
195 | 0 | for (i = 0; i < m->num_packs; i++) { |
196 | 0 | const char *end; |
197 | 0 | size_t avail = m->chunk_pack_names_len - |
198 | 0 | (cur_pack_name - (const char *)m->chunk_pack_names); |
199 | |
|
200 | 0 | m->pack_names[i] = cur_pack_name; |
201 | |
|
202 | 0 | end = memchr(cur_pack_name, '\0', avail); |
203 | 0 | if (!end) |
204 | 0 | die(_("multi-pack-index pack-name chunk is too short")); |
205 | 0 | cur_pack_name = end + 1; |
206 | |
|
207 | 0 | if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) |
208 | 0 | die(_("multi-pack-index pack names out of order: '%s' before '%s'"), |
209 | 0 | m->pack_names[i - 1], |
210 | 0 | m->pack_names[i]); |
211 | 0 | } |
212 | | |
213 | 0 | trace2_data_intmax("midx", r, "load/num_packs", m->num_packs); |
214 | 0 | trace2_data_intmax("midx", r, "load/num_objects", m->num_objects); |
215 | |
|
216 | 0 | free_chunkfile(cf); |
217 | 0 | return m; |
218 | | |
219 | 0 | cleanup_fail: |
220 | 0 | free(m); |
221 | 0 | free_chunkfile(cf); |
222 | 0 | if (midx_map) |
223 | 0 | munmap(midx_map, midx_size); |
224 | 0 | if (0 <= fd) |
225 | 0 | close(fd); |
226 | 0 | return NULL; |
227 | 0 | } |
228 | | |
229 | | void get_midx_chain_dirname(struct odb_source *source, struct strbuf *buf) |
230 | 0 | { |
231 | 0 | strbuf_addf(buf, "%s/pack/multi-pack-index.d", source->path); |
232 | 0 | } |
233 | | |
234 | | void get_midx_chain_filename(struct odb_source *source, struct strbuf *buf) |
235 | 0 | { |
236 | 0 | get_midx_chain_dirname(source, buf); |
237 | 0 | strbuf_addstr(buf, "/multi-pack-index-chain"); |
238 | 0 | } |
239 | | |
240 | | void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, |
241 | | const unsigned char *hash, const char *ext) |
242 | 0 | { |
243 | 0 | get_midx_chain_dirname(source, buf); |
244 | 0 | strbuf_addf(buf, "/multi-pack-index-%s.%s", |
245 | 0 | hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext); |
246 | 0 | } |
247 | | |
248 | | static int open_multi_pack_index_chain(const struct git_hash_algo *hash_algo, |
249 | | const char *chain_file, int *fd, |
250 | | struct stat *st) |
251 | 0 | { |
252 | 0 | *fd = git_open(chain_file); |
253 | 0 | if (*fd < 0) |
254 | 0 | return 0; |
255 | 0 | if (fstat(*fd, st)) { |
256 | 0 | close(*fd); |
257 | 0 | return 0; |
258 | 0 | } |
259 | 0 | if (st->st_size < hash_algo->hexsz) { |
260 | 0 | close(*fd); |
261 | 0 | if (!st->st_size) { |
262 | | /* treat empty files the same as missing */ |
263 | 0 | errno = ENOENT; |
264 | 0 | } else { |
265 | 0 | warning(_("multi-pack-index chain file too small")); |
266 | 0 | errno = EINVAL; |
267 | 0 | } |
268 | 0 | return 0; |
269 | 0 | } |
270 | 0 | return 1; |
271 | 0 | } |
272 | | |
273 | | static int add_midx_to_chain(struct multi_pack_index *midx, |
274 | | struct multi_pack_index *midx_chain) |
275 | 0 | { |
276 | 0 | if (midx_chain) { |
277 | 0 | if (unsigned_add_overflows(midx_chain->num_packs, |
278 | 0 | midx_chain->num_packs_in_base)) { |
279 | 0 | warning(_("pack count in base MIDX too high: %"PRIuMAX), |
280 | 0 | (uintmax_t)midx_chain->num_packs_in_base); |
281 | 0 | return 0; |
282 | 0 | } |
283 | 0 | if (unsigned_add_overflows(midx_chain->num_objects, |
284 | 0 | midx_chain->num_objects_in_base)) { |
285 | 0 | warning(_("object count in base MIDX too high: %"PRIuMAX), |
286 | 0 | (uintmax_t)midx_chain->num_objects_in_base); |
287 | 0 | return 0; |
288 | 0 | } |
289 | 0 | midx->num_packs_in_base = midx_chain->num_packs + |
290 | 0 | midx_chain->num_packs_in_base; |
291 | 0 | midx->num_objects_in_base = midx_chain->num_objects + |
292 | 0 | midx_chain->num_objects_in_base; |
293 | 0 | } |
294 | | |
295 | 0 | midx->base_midx = midx_chain; |
296 | 0 | midx->has_chain = 1; |
297 | |
|
298 | 0 | return 1; |
299 | 0 | } |
300 | | |
301 | | static struct multi_pack_index *load_midx_chain_fd_st(struct odb_source *source, |
302 | | int fd, struct stat *st, |
303 | | int *incomplete_chain) |
304 | 0 | { |
305 | 0 | const struct git_hash_algo *hash_algo = source->odb->repo->hash_algo; |
306 | 0 | struct multi_pack_index *midx_chain = NULL; |
307 | 0 | struct strbuf buf = STRBUF_INIT; |
308 | 0 | int valid = 1; |
309 | 0 | uint32_t i, count; |
310 | 0 | FILE *fp = xfdopen(fd, "r"); |
311 | |
|
312 | 0 | count = st->st_size / (hash_algo->hexsz + 1); |
313 | |
|
314 | 0 | for (i = 0; i < count; i++) { |
315 | 0 | struct multi_pack_index *m; |
316 | 0 | struct object_id layer; |
317 | |
|
318 | 0 | if (strbuf_getline_lf(&buf, fp) == EOF) |
319 | 0 | break; |
320 | | |
321 | 0 | if (get_oid_hex_algop(buf.buf, &layer, hash_algo)) { |
322 | 0 | warning(_("invalid multi-pack-index chain: line '%s' " |
323 | 0 | "not a hash"), |
324 | 0 | buf.buf); |
325 | 0 | valid = 0; |
326 | 0 | break; |
327 | 0 | } |
328 | | |
329 | 0 | valid = 0; |
330 | |
|
331 | 0 | strbuf_reset(&buf); |
332 | 0 | get_split_midx_filename_ext(source, &buf, |
333 | 0 | layer.hash, MIDX_EXT_MIDX); |
334 | 0 | m = load_multi_pack_index_one(source, buf.buf); |
335 | |
|
336 | 0 | if (m) { |
337 | 0 | if (add_midx_to_chain(m, midx_chain)) { |
338 | 0 | midx_chain = m; |
339 | 0 | valid = 1; |
340 | 0 | } else { |
341 | 0 | close_midx(m); |
342 | 0 | } |
343 | 0 | } |
344 | 0 | if (!valid) { |
345 | 0 | warning(_("unable to find all multi-pack index files")); |
346 | 0 | break; |
347 | 0 | } |
348 | 0 | } |
349 | |
|
350 | 0 | fclose(fp); |
351 | 0 | strbuf_release(&buf); |
352 | |
|
353 | 0 | *incomplete_chain = !valid; |
354 | 0 | return midx_chain; |
355 | 0 | } |
356 | | |
357 | | static struct multi_pack_index *load_multi_pack_index_chain(struct odb_source *source) |
358 | 0 | { |
359 | 0 | struct strbuf chain_file = STRBUF_INIT; |
360 | 0 | struct stat st; |
361 | 0 | int fd; |
362 | 0 | struct multi_pack_index *m = NULL; |
363 | |
|
364 | 0 | get_midx_chain_filename(source, &chain_file); |
365 | 0 | if (open_multi_pack_index_chain(source->odb->repo->hash_algo, chain_file.buf, &fd, &st)) { |
366 | 0 | int incomplete; |
367 | | /* ownership of fd is taken over by load function */ |
368 | 0 | m = load_midx_chain_fd_st(source, fd, &st, &incomplete); |
369 | 0 | } |
370 | |
|
371 | 0 | strbuf_release(&chain_file); |
372 | 0 | return m; |
373 | 0 | } |
374 | | |
375 | | struct multi_pack_index *load_multi_pack_index(struct odb_source *source) |
376 | 0 | { |
377 | 0 | struct strbuf midx_name = STRBUF_INIT; |
378 | 0 | struct multi_pack_index *m; |
379 | |
|
380 | 0 | get_midx_filename(source, &midx_name); |
381 | |
|
382 | 0 | m = load_multi_pack_index_one(source, midx_name.buf); |
383 | 0 | if (!m) |
384 | 0 | m = load_multi_pack_index_chain(source); |
385 | |
|
386 | 0 | strbuf_release(&midx_name); |
387 | |
|
388 | 0 | return m; |
389 | 0 | } |
390 | | |
391 | | void close_midx(struct multi_pack_index *m) |
392 | 0 | { |
393 | 0 | uint32_t i; |
394 | |
|
395 | 0 | if (!m) |
396 | 0 | return; |
397 | | |
398 | 0 | close_midx(m->base_midx); |
399 | |
|
400 | 0 | munmap((unsigned char *)m->data, m->data_len); |
401 | |
|
402 | 0 | for (i = 0; i < m->num_packs; i++) { |
403 | 0 | if (m->packs[i] && m->packs[i] != MIDX_PACK_ERROR) |
404 | 0 | m->packs[i]->multi_pack_index = 0; |
405 | 0 | } |
406 | 0 | FREE_AND_NULL(m->packs); |
407 | 0 | FREE_AND_NULL(m->pack_names); |
408 | 0 | free(m); |
409 | 0 | } |
410 | | |
411 | | static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos) |
412 | 0 | { |
413 | 0 | struct multi_pack_index *m = *_m; |
414 | 0 | while (m && pos < m->num_objects_in_base) |
415 | 0 | m = m->base_midx; |
416 | |
|
417 | 0 | if (!m) |
418 | 0 | BUG("NULL multi-pack-index for object position: %"PRIu32, pos); |
419 | | |
420 | 0 | if (pos >= m->num_objects + m->num_objects_in_base) |
421 | 0 | die(_("invalid MIDX object position, MIDX is likely corrupt")); |
422 | | |
423 | 0 | *_m = m; |
424 | |
|
425 | 0 | return pos - m->num_objects_in_base; |
426 | 0 | } |
427 | | |
428 | | static uint32_t midx_for_pack(struct multi_pack_index **_m, |
429 | | uint32_t pack_int_id) |
430 | 0 | { |
431 | 0 | struct multi_pack_index *m = *_m; |
432 | 0 | while (m && pack_int_id < m->num_packs_in_base) |
433 | 0 | m = m->base_midx; |
434 | |
|
435 | 0 | if (!m) |
436 | 0 | BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id); |
437 | | |
438 | 0 | if (pack_int_id >= m->num_packs + m->num_packs_in_base) |
439 | 0 | die(_("bad pack-int-id: %u (%u total packs)"), |
440 | 0 | pack_int_id, m->num_packs + m->num_packs_in_base); |
441 | | |
442 | 0 | *_m = m; |
443 | |
|
444 | 0 | return pack_int_id - m->num_packs_in_base; |
445 | 0 | } |
446 | | |
447 | | int prepare_midx_pack(struct multi_pack_index *m, |
448 | | uint32_t pack_int_id) |
449 | 0 | { |
450 | 0 | struct repository *r = m->source->odb->repo; |
451 | 0 | struct strbuf pack_name = STRBUF_INIT; |
452 | 0 | struct packed_git *p; |
453 | |
|
454 | 0 | pack_int_id = midx_for_pack(&m, pack_int_id); |
455 | |
|
456 | 0 | if (m->packs[pack_int_id] == MIDX_PACK_ERROR) |
457 | 0 | return 1; |
458 | 0 | if (m->packs[pack_int_id]) |
459 | 0 | return 0; |
460 | | |
461 | 0 | strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, |
462 | 0 | m->pack_names[pack_int_id]); |
463 | 0 | p = packfile_store_load_pack(r->objects->packfiles, |
464 | 0 | pack_name.buf, m->source->local); |
465 | 0 | strbuf_release(&pack_name); |
466 | |
|
467 | 0 | if (!p) { |
468 | 0 | m->packs[pack_int_id] = MIDX_PACK_ERROR; |
469 | 0 | return 1; |
470 | 0 | } |
471 | | |
472 | 0 | p->multi_pack_index = 1; |
473 | 0 | m->packs[pack_int_id] = p; |
474 | |
|
475 | 0 | return 0; |
476 | 0 | } |
477 | | |
478 | | struct packed_git *nth_midxed_pack(struct multi_pack_index *m, |
479 | | uint32_t pack_int_id) |
480 | 0 | { |
481 | 0 | uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); |
482 | 0 | if (m->packs[local_pack_int_id] == MIDX_PACK_ERROR) |
483 | 0 | return NULL; |
484 | 0 | return m->packs[local_pack_int_id]; |
485 | 0 | } |
486 | | |
487 | 0 | #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t)) |
488 | | |
489 | | int nth_bitmapped_pack(struct multi_pack_index *m, |
490 | | struct bitmapped_pack *bp, uint32_t pack_int_id) |
491 | 0 | { |
492 | 0 | uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); |
493 | |
|
494 | 0 | if (!m->chunk_bitmapped_packs) |
495 | 0 | return error(_("MIDX does not contain the BTMP chunk")); |
496 | | |
497 | 0 | if (prepare_midx_pack(m, pack_int_id)) |
498 | 0 | return error(_("could not load bitmapped pack %"PRIu32), pack_int_id); |
499 | | |
500 | 0 | bp->p = m->packs[local_pack_int_id]; |
501 | 0 | bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs + |
502 | 0 | MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id); |
503 | 0 | bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs + |
504 | 0 | MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id + |
505 | 0 | sizeof(uint32_t)); |
506 | 0 | bp->pack_int_id = pack_int_id; |
507 | 0 | bp->from_midx = m; |
508 | |
|
509 | 0 | return 0; |
510 | 0 | } |
511 | | |
512 | | int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m, |
513 | | uint32_t *result) |
514 | 0 | { |
515 | 0 | int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout, |
516 | 0 | m->chunk_oid_lookup, |
517 | 0 | m->source->odb->repo->hash_algo->rawsz, |
518 | 0 | result); |
519 | 0 | if (result) |
520 | 0 | *result += m->num_objects_in_base; |
521 | 0 | return ret; |
522 | 0 | } |
523 | | |
524 | | int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, |
525 | | uint32_t *result) |
526 | 0 | { |
527 | 0 | for (; m; m = m->base_midx) |
528 | 0 | if (bsearch_one_midx(oid, m, result)) |
529 | 0 | return 1; |
530 | 0 | return 0; |
531 | 0 | } |
532 | | |
533 | | int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid) |
534 | 0 | { |
535 | 0 | return bsearch_midx(oid, m, NULL); |
536 | 0 | } |
537 | | |
538 | | struct object_id *nth_midxed_object_oid(struct object_id *oid, |
539 | | struct multi_pack_index *m, |
540 | | uint32_t n) |
541 | 0 | { |
542 | 0 | if (n >= m->num_objects + m->num_objects_in_base) |
543 | 0 | return NULL; |
544 | | |
545 | 0 | n = midx_for_object(&m, n); |
546 | |
|
547 | 0 | oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n), |
548 | 0 | m->source->odb->repo->hash_algo); |
549 | 0 | return oid; |
550 | 0 | } |
551 | | |
552 | | off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) |
553 | 0 | { |
554 | 0 | const unsigned char *offset_data; |
555 | 0 | uint32_t offset32; |
556 | |
|
557 | 0 | pos = midx_for_object(&m, pos); |
558 | |
|
559 | 0 | offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH; |
560 | 0 | offset32 = get_be32(offset_data + sizeof(uint32_t)); |
561 | |
|
562 | 0 | if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { |
563 | 0 | if (sizeof(off_t) < sizeof(uint64_t)) |
564 | 0 | die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); |
565 | | |
566 | 0 | offset32 ^= MIDX_LARGE_OFFSET_NEEDED; |
567 | 0 | if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t)) |
568 | 0 | die(_("multi-pack-index large offset out of bounds")); |
569 | 0 | return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); |
570 | 0 | } |
571 | | |
572 | 0 | return offset32; |
573 | 0 | } |
574 | | |
575 | | uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) |
576 | 0 | { |
577 | 0 | pos = midx_for_object(&m, pos); |
578 | |
|
579 | 0 | return m->num_packs_in_base + get_be32(m->chunk_object_offsets + |
580 | 0 | (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); |
581 | 0 | } |
582 | | |
583 | | int fill_midx_entry(struct multi_pack_index *m, |
584 | | const struct object_id *oid, |
585 | | struct pack_entry *e) |
586 | 0 | { |
587 | 0 | uint32_t pos; |
588 | 0 | uint32_t pack_int_id; |
589 | 0 | struct packed_git *p; |
590 | |
|
591 | 0 | if (!bsearch_midx(oid, m, &pos)) |
592 | 0 | return 0; |
593 | | |
594 | 0 | midx_for_object(&m, pos); |
595 | 0 | pack_int_id = nth_midxed_pack_int_id(m, pos); |
596 | |
|
597 | 0 | if (prepare_midx_pack(m, pack_int_id)) |
598 | 0 | return 0; |
599 | 0 | p = m->packs[pack_int_id - m->num_packs_in_base]; |
600 | | |
601 | | /* |
602 | | * We are about to tell the caller where they can locate the |
603 | | * requested object. We better make sure the packfile is |
604 | | * still here and can be accessed before supplying that |
605 | | * answer, as it may have been deleted since the MIDX was |
606 | | * loaded! |
607 | | */ |
608 | 0 | if (!is_pack_valid(p)) |
609 | 0 | return 0; |
610 | | |
611 | 0 | if (oidset_size(&p->bad_objects) && |
612 | 0 | oidset_contains(&p->bad_objects, oid)) |
613 | 0 | return 0; |
614 | | |
615 | 0 | e->offset = nth_midxed_offset(m, pos); |
616 | 0 | e->p = p; |
617 | |
|
618 | 0 | return 1; |
619 | 0 | } |
620 | | |
621 | | /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */ |
622 | | int cmp_idx_or_pack_name(const char *idx_or_pack_name, |
623 | | const char *idx_name) |
624 | 0 | { |
625 | | /* Skip past any initial matching prefix. */ |
626 | 0 | while (*idx_name && *idx_name == *idx_or_pack_name) { |
627 | 0 | idx_name++; |
628 | 0 | idx_or_pack_name++; |
629 | 0 | } |
630 | | |
631 | | /* |
632 | | * If we didn't match completely, we may have matched "pack-1234." and |
633 | | * be left with "idx" and "pack" respectively, which is also OK. We do |
634 | | * not have to check for "idx" and "idx", because that would have been |
635 | | * a complete match (and in that case these strcmps will be false, but |
636 | | * we'll correctly return 0 from the final strcmp() below. |
637 | | * |
638 | | * Technically this matches "fooidx" and "foopack", but we'd never have |
639 | | * such names in the first place. |
640 | | */ |
641 | 0 | if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack")) |
642 | 0 | return 0; |
643 | | |
644 | | /* |
645 | | * This not only checks for a complete match, but also orders based on |
646 | | * the first non-identical character, which means our ordering will |
647 | | * match a raw strcmp(). That makes it OK to use this to binary search |
648 | | * a naively-sorted list. |
649 | | */ |
650 | 0 | return strcmp(idx_or_pack_name, idx_name); |
651 | 0 | } |
652 | | |
653 | | static int midx_contains_pack_1(struct multi_pack_index *m, |
654 | | const char *idx_or_pack_name) |
655 | 0 | { |
656 | 0 | uint32_t first = 0, last = m->num_packs; |
657 | |
|
658 | 0 | while (first < last) { |
659 | 0 | uint32_t mid = first + (last - first) / 2; |
660 | 0 | const char *current; |
661 | 0 | int cmp; |
662 | |
|
663 | 0 | current = m->pack_names[mid]; |
664 | 0 | cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); |
665 | 0 | if (!cmp) |
666 | 0 | return 1; |
667 | 0 | if (cmp > 0) { |
668 | 0 | first = mid + 1; |
669 | 0 | continue; |
670 | 0 | } |
671 | 0 | last = mid; |
672 | 0 | } |
673 | | |
674 | 0 | return 0; |
675 | 0 | } |
676 | | |
677 | | int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) |
678 | 0 | { |
679 | 0 | for (; m; m = m->base_midx) |
680 | 0 | if (midx_contains_pack_1(m, idx_or_pack_name)) |
681 | 0 | return 1; |
682 | 0 | return 0; |
683 | 0 | } |
684 | | |
685 | | int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) |
686 | 0 | { |
687 | 0 | if (m->preferred_pack_idx == -1) { |
688 | 0 | uint32_t midx_pos; |
689 | 0 | if (load_midx_revindex(m)) { |
690 | 0 | m->preferred_pack_idx = -2; |
691 | 0 | return -1; |
692 | 0 | } |
693 | | |
694 | 0 | midx_pos = pack_pos_to_midx(m, m->num_objects_in_base); |
695 | |
|
696 | 0 | m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos); |
697 | |
|
698 | 0 | } else if (m->preferred_pack_idx == -2) |
699 | 0 | return -1; /* no revindex */ |
700 | | |
701 | 0 | *pack_int_id = m->preferred_pack_idx; |
702 | 0 | return 0; |
703 | 0 | } |
704 | | |
705 | | int prepare_multi_pack_index_one(struct odb_source *source) |
706 | 0 | { |
707 | 0 | struct repository *r = source->odb->repo; |
708 | |
|
709 | 0 | prepare_repo_settings(r); |
710 | 0 | if (!r->settings.core_multi_pack_index) |
711 | 0 | return 0; |
712 | | |
713 | 0 | if (source->midx) |
714 | 0 | return 1; |
715 | | |
716 | 0 | source->midx = load_multi_pack_index(source); |
717 | |
|
718 | 0 | return !!source->midx; |
719 | 0 | } |
720 | | |
721 | | int midx_checksum_valid(struct multi_pack_index *m) |
722 | 0 | { |
723 | 0 | return hashfile_checksum_valid(m->source->odb->repo->hash_algo, |
724 | 0 | m->data, m->data_len); |
725 | 0 | } |
726 | | |
727 | | struct clear_midx_data { |
728 | | char **keep; |
729 | | uint32_t keep_nr; |
730 | | const char *ext; |
731 | | }; |
732 | | |
733 | | static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED, |
734 | | const char *file_name, void *_data) |
735 | 0 | { |
736 | 0 | struct clear_midx_data *data = _data; |
737 | 0 | uint32_t i; |
738 | |
|
739 | 0 | if (!(starts_with(file_name, "multi-pack-index-") && |
740 | 0 | ends_with(file_name, data->ext))) |
741 | 0 | return; |
742 | 0 | for (i = 0; i < data->keep_nr; i++) { |
743 | 0 | if (!strcmp(data->keep[i], file_name)) |
744 | 0 | return; |
745 | 0 | } |
746 | 0 | if (unlink(full_path)) |
747 | 0 | die_errno(_("failed to remove %s"), full_path); |
748 | 0 | } |
749 | | |
750 | | void clear_midx_files_ext(struct odb_source *source, const char *ext, |
751 | | const char *keep_hash) |
752 | 0 | { |
753 | 0 | struct clear_midx_data data; |
754 | 0 | memset(&data, 0, sizeof(struct clear_midx_data)); |
755 | |
|
756 | 0 | if (keep_hash) { |
757 | 0 | ALLOC_ARRAY(data.keep, 1); |
758 | |
|
759 | 0 | data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); |
760 | 0 | data.keep_nr = 1; |
761 | 0 | } |
762 | 0 | data.ext = ext; |
763 | |
|
764 | 0 | for_each_file_in_pack_dir(source->path, |
765 | 0 | clear_midx_file_ext, |
766 | 0 | &data); |
767 | |
|
768 | 0 | if (keep_hash) |
769 | 0 | free(data.keep[0]); |
770 | 0 | free(data.keep); |
771 | 0 | } |
772 | | |
773 | | void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, |
774 | | char **keep_hashes, |
775 | | uint32_t hashes_nr) |
776 | 0 | { |
777 | 0 | struct clear_midx_data data; |
778 | 0 | uint32_t i; |
779 | |
|
780 | 0 | memset(&data, 0, sizeof(struct clear_midx_data)); |
781 | |
|
782 | 0 | ALLOC_ARRAY(data.keep, hashes_nr); |
783 | 0 | for (i = 0; i < hashes_nr; i++) |
784 | 0 | data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], |
785 | 0 | ext); |
786 | 0 | data.keep_nr = hashes_nr; |
787 | 0 | data.ext = ext; |
788 | |
|
789 | 0 | for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", |
790 | 0 | clear_midx_file_ext, &data); |
791 | |
|
792 | 0 | for (i = 0; i < hashes_nr; i++) |
793 | 0 | free(data.keep[i]); |
794 | 0 | free(data.keep); |
795 | 0 | } |
796 | | |
797 | | void clear_midx_file(struct repository *r) |
798 | 0 | { |
799 | 0 | struct strbuf midx = STRBUF_INIT; |
800 | |
|
801 | 0 | get_midx_filename(r->objects->sources, &midx); |
802 | |
|
803 | 0 | if (r->objects) { |
804 | 0 | struct odb_source *source; |
805 | |
|
806 | 0 | for (source = r->objects->sources; source; source = source->next) { |
807 | 0 | if (source->midx) |
808 | 0 | close_midx(source->midx); |
809 | 0 | source->midx = NULL; |
810 | 0 | } |
811 | 0 | } |
812 | |
|
813 | 0 | if (remove_path(midx.buf)) |
814 | 0 | die(_("failed to clear multi-pack-index at %s"), midx.buf); |
815 | | |
816 | 0 | clear_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, NULL); |
817 | 0 | clear_midx_files_ext(r->objects->sources, MIDX_EXT_REV, NULL); |
818 | |
|
819 | 0 | strbuf_release(&midx); |
820 | 0 | } |
821 | | |
822 | | static int verify_midx_error; |
823 | | |
824 | | __attribute__((format (printf, 1, 2))) |
825 | | static void midx_report(const char *fmt, ...) |
826 | 0 | { |
827 | 0 | va_list ap; |
828 | 0 | verify_midx_error = 1; |
829 | 0 | va_start(ap, fmt); |
830 | 0 | vfprintf(stderr, fmt, ap); |
831 | 0 | fprintf(stderr, "\n"); |
832 | 0 | va_end(ap); |
833 | 0 | } |
834 | | |
835 | | struct pair_pos_vs_id |
836 | | { |
837 | | uint32_t pos; |
838 | | uint32_t pack_int_id; |
839 | | }; |
840 | | |
841 | | static int compare_pair_pos_vs_id(const void *_a, const void *_b) |
842 | 0 | { |
843 | 0 | struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; |
844 | 0 | struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; |
845 | |
|
846 | 0 | return b->pack_int_id - a->pack_int_id; |
847 | 0 | } |
848 | | |
849 | | /* |
850 | | * Limit calls to display_progress() for performance reasons. |
851 | | * The interval here was arbitrarily chosen. |
852 | | */ |
853 | 0 | #define SPARSE_PROGRESS_INTERVAL (1 << 12) |
854 | | #define midx_display_sparse_progress(progress, n) \ |
855 | 0 | do { \ |
856 | 0 | uint64_t _n = (n); \ |
857 | 0 | if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ |
858 | 0 | display_progress(progress, _n); \ |
859 | 0 | } while (0) |
860 | | |
861 | | int verify_midx_file(struct odb_source *source, unsigned flags) |
862 | 0 | { |
863 | 0 | struct repository *r = source->odb->repo; |
864 | 0 | struct pair_pos_vs_id *pairs = NULL; |
865 | 0 | uint32_t i; |
866 | 0 | struct progress *progress = NULL; |
867 | 0 | struct multi_pack_index *m = load_multi_pack_index(source); |
868 | 0 | struct multi_pack_index *curr; |
869 | 0 | verify_midx_error = 0; |
870 | |
|
871 | 0 | if (!m) { |
872 | 0 | int result = 0; |
873 | 0 | struct stat sb; |
874 | 0 | struct strbuf filename = STRBUF_INIT; |
875 | |
|
876 | 0 | get_midx_filename(source, &filename); |
877 | |
|
878 | 0 | if (!stat(filename.buf, &sb)) { |
879 | 0 | error(_("multi-pack-index file exists, but failed to parse")); |
880 | 0 | result = 1; |
881 | 0 | } |
882 | 0 | strbuf_release(&filename); |
883 | 0 | return result; |
884 | 0 | } |
885 | | |
886 | 0 | if (!midx_checksum_valid(m)) |
887 | 0 | midx_report(_("incorrect checksum")); |
888 | |
|
889 | 0 | if (flags & MIDX_PROGRESS) |
890 | 0 | progress = start_delayed_progress(r, |
891 | 0 | _("Looking for referenced packfiles"), |
892 | 0 | m->num_packs + m->num_packs_in_base); |
893 | 0 | for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { |
894 | 0 | if (prepare_midx_pack(m, i)) |
895 | 0 | midx_report("failed to load pack in position %d", i); |
896 | |
|
897 | 0 | display_progress(progress, i + 1); |
898 | 0 | } |
899 | 0 | stop_progress(&progress); |
900 | |
|
901 | 0 | if (m->num_objects == 0) { |
902 | 0 | midx_report(_("the midx contains no oid")); |
903 | | /* |
904 | | * Remaining tests assume that we have objects, so we can |
905 | | * return here. |
906 | | */ |
907 | 0 | goto cleanup; |
908 | 0 | } |
909 | | |
910 | 0 | if (flags & MIDX_PROGRESS) |
911 | 0 | progress = start_sparse_progress(r, |
912 | 0 | _("Verifying OID order in multi-pack-index"), |
913 | 0 | m->num_objects - 1); |
914 | |
|
915 | 0 | for (curr = m; curr; curr = curr->base_midx) { |
916 | 0 | for (i = 0; i < m->num_objects - 1; i++) { |
917 | 0 | struct object_id oid1, oid2; |
918 | |
|
919 | 0 | nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i); |
920 | 0 | nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1); |
921 | |
|
922 | 0 | if (oidcmp(&oid1, &oid2) >= 0) |
923 | 0 | midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), |
924 | 0 | i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); |
925 | |
|
926 | 0 | midx_display_sparse_progress(progress, i + 1); |
927 | 0 | } |
928 | 0 | } |
929 | 0 | stop_progress(&progress); |
930 | | |
931 | | /* |
932 | | * Create an array mapping each object to its packfile id. Sort it |
933 | | * to group the objects by packfile. Use this permutation to visit |
934 | | * each of the objects and only require 1 packfile to be open at a |
935 | | * time. |
936 | | */ |
937 | 0 | ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base); |
938 | 0 | for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { |
939 | 0 | pairs[i].pos = i; |
940 | 0 | pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); |
941 | 0 | } |
942 | |
|
943 | 0 | if (flags & MIDX_PROGRESS) |
944 | 0 | progress = start_sparse_progress(r, |
945 | 0 | _("Sorting objects by packfile"), |
946 | 0 | m->num_objects); |
947 | 0 | display_progress(progress, 0); /* TODO: Measure QSORT() progress */ |
948 | 0 | QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); |
949 | 0 | stop_progress(&progress); |
950 | |
|
951 | 0 | if (flags & MIDX_PROGRESS) |
952 | 0 | progress = start_sparse_progress(r, |
953 | 0 | _("Verifying object offsets"), |
954 | 0 | m->num_objects); |
955 | 0 | for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { |
956 | 0 | struct object_id oid; |
957 | 0 | struct pack_entry e; |
958 | 0 | off_t m_offset, p_offset; |
959 | |
|
960 | 0 | if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && |
961 | 0 | nth_midxed_pack(m, pairs[i-1].pack_int_id)) { |
962 | 0 | uint32_t pack_int_id = pairs[i-1].pack_int_id; |
963 | 0 | struct packed_git *p = nth_midxed_pack(m, pack_int_id); |
964 | |
|
965 | 0 | close_pack_fd(p); |
966 | 0 | close_pack_index(p); |
967 | 0 | } |
968 | |
|
969 | 0 | nth_midxed_object_oid(&oid, m, pairs[i].pos); |
970 | |
|
971 | 0 | if (!fill_midx_entry(m, &oid, &e)) { |
972 | 0 | midx_report(_("failed to load pack entry for oid[%d] = %s"), |
973 | 0 | pairs[i].pos, oid_to_hex(&oid)); |
974 | 0 | continue; |
975 | 0 | } |
976 | | |
977 | 0 | if (open_pack_index(e.p)) { |
978 | 0 | midx_report(_("failed to load pack-index for packfile %s"), |
979 | 0 | e.p->pack_name); |
980 | 0 | break; |
981 | 0 | } |
982 | | |
983 | 0 | m_offset = e.offset; |
984 | 0 | p_offset = find_pack_entry_one(&oid, e.p); |
985 | |
|
986 | 0 | if (m_offset != p_offset) |
987 | 0 | midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), |
988 | 0 | pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); |
989 | |
|
990 | 0 | midx_display_sparse_progress(progress, i + 1); |
991 | 0 | } |
992 | 0 | stop_progress(&progress); |
993 | |
|
994 | 0 | cleanup: |
995 | 0 | free(pairs); |
996 | 0 | close_midx(m); |
997 | |
|
998 | 0 | return verify_midx_error; |
999 | 0 | } |