Line | Count | Source |
1 | | #define DISABLE_SIGN_COMPARE_WARNINGS |
2 | | |
3 | | #include "git-compat-util.h" |
4 | | #include "config.h" |
5 | | #include "dir.h" |
6 | | #include "hex.h" |
7 | | #include "packfile.h" |
8 | | #include "hash-lookup.h" |
9 | | #include "midx.h" |
10 | | #include "progress.h" |
11 | | #include "trace2.h" |
12 | | #include "chunk-format.h" |
13 | | #include "pack-bitmap.h" |
14 | | #include "pack-revindex.h" |
15 | | |
16 | 0 | #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) |
17 | | |
18 | | int midx_checksum_valid(struct multi_pack_index *m); |
19 | | void clear_midx_files_ext(struct odb_source *source, const char *ext, |
20 | | const char *keep_hash); |
21 | | void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, |
22 | | char **keep_hashes, |
23 | | uint32_t hashes_nr); |
24 | | int cmp_idx_or_pack_name(const char *idx_or_pack_name, |
25 | | const char *idx_name); |
26 | | |
27 | | const unsigned char *get_midx_checksum(struct multi_pack_index *m) |
28 | 0 | { |
29 | 0 | return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; |
30 | 0 | } |
31 | | |
32 | | void get_midx_filename(struct odb_source *source, struct strbuf *out) |
33 | 0 | { |
34 | 0 | get_midx_filename_ext(source, out, NULL, NULL); |
35 | 0 | } |
36 | | |
37 | | void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, |
38 | | const unsigned char *hash, const char *ext) |
39 | 0 | { |
40 | 0 | strbuf_addf(out, "%s/pack/multi-pack-index", source->path); |
41 | 0 | if (ext) |
42 | 0 | strbuf_addf(out, "-%s.%s", hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext); |
43 | 0 | } |
44 | | |
45 | | static int midx_read_oid_fanout(const unsigned char *chunk_start, |
46 | | size_t chunk_size, void *data) |
47 | 0 | { |
48 | 0 | int i; |
49 | 0 | struct multi_pack_index *m = data; |
50 | 0 | m->chunk_oid_fanout = (uint32_t *)chunk_start; |
51 | |
|
52 | 0 | if (chunk_size != 4 * 256) { |
53 | 0 | error(_("multi-pack-index OID fanout is of the wrong size")); |
54 | 0 | return 1; |
55 | 0 | } |
56 | 0 | for (i = 0; i < 255; i++) { |
57 | 0 | uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); |
58 | 0 | uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]); |
59 | |
|
60 | 0 | if (oid_fanout1 > oid_fanout2) { |
61 | 0 | error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"), |
62 | 0 | i, oid_fanout1, oid_fanout2, i + 1); |
63 | 0 | return 1; |
64 | 0 | } |
65 | 0 | } |
66 | 0 | m->num_objects = ntohl(m->chunk_oid_fanout[255]); |
67 | 0 | return 0; |
68 | 0 | } |
69 | | |
70 | | static int midx_read_oid_lookup(const unsigned char *chunk_start, |
71 | | size_t chunk_size, void *data) |
72 | 0 | { |
73 | 0 | struct multi_pack_index *m = data; |
74 | 0 | m->chunk_oid_lookup = chunk_start; |
75 | |
|
76 | 0 | if (chunk_size != st_mult(m->hash_len, m->num_objects)) { |
77 | 0 | error(_("multi-pack-index OID lookup chunk is the wrong size")); |
78 | 0 | return 1; |
79 | 0 | } |
80 | 0 | return 0; |
81 | 0 | } |
82 | | |
83 | | static int midx_read_object_offsets(const unsigned char *chunk_start, |
84 | | size_t chunk_size, void *data) |
85 | 0 | { |
86 | 0 | struct multi_pack_index *m = data; |
87 | 0 | m->chunk_object_offsets = chunk_start; |
88 | |
|
89 | 0 | if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) { |
90 | 0 | error(_("multi-pack-index object offset chunk is the wrong size")); |
91 | 0 | return 1; |
92 | 0 | } |
93 | 0 | return 0; |
94 | 0 | } |
95 | | |
96 | | struct multi_pack_index *get_multi_pack_index(struct odb_source *source) |
97 | 0 | { |
98 | 0 | packfile_store_prepare(source->packfiles); |
99 | 0 | return source->packfiles->midx; |
100 | 0 | } |
101 | | |
102 | | static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, |
103 | | const char *midx_name) |
104 | 0 | { |
105 | 0 | struct repository *r = source->odb->repo; |
106 | 0 | struct multi_pack_index *m = NULL; |
107 | 0 | int fd; |
108 | 0 | struct stat st; |
109 | 0 | size_t midx_size; |
110 | 0 | void *midx_map = NULL; |
111 | 0 | uint32_t hash_version; |
112 | 0 | uint32_t i; |
113 | 0 | const char *cur_pack_name; |
114 | 0 | struct chunkfile *cf = NULL; |
115 | |
|
116 | 0 | fd = git_open(midx_name); |
117 | |
|
118 | 0 | if (fd < 0) |
119 | 0 | goto cleanup_fail; |
120 | 0 | if (fstat(fd, &st)) { |
121 | 0 | error_errno(_("failed to read %s"), midx_name); |
122 | 0 | goto cleanup_fail; |
123 | 0 | } |
124 | | |
125 | 0 | midx_size = xsize_t(st.st_size); |
126 | |
|
127 | 0 | if (midx_size < (MIDX_HEADER_SIZE + r->hash_algo->rawsz)) { |
128 | 0 | error(_("multi-pack-index file %s is too small"), midx_name); |
129 | 0 | goto cleanup_fail; |
130 | 0 | } |
131 | | |
132 | 0 | midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); |
133 | 0 | close(fd); |
134 | |
|
135 | 0 | CALLOC_ARRAY(m, 1); |
136 | 0 | m->data = midx_map; |
137 | 0 | m->data_len = midx_size; |
138 | 0 | m->source = source; |
139 | |
|
140 | 0 | m->signature = get_be32(m->data); |
141 | 0 | if (m->signature != MIDX_SIGNATURE) |
142 | 0 | die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), |
143 | 0 | m->signature, MIDX_SIGNATURE); |
144 | | |
145 | 0 | m->version = m->data[MIDX_BYTE_FILE_VERSION]; |
146 | 0 | if (m->version != MIDX_VERSION) |
147 | 0 | die(_("multi-pack-index version %d not recognized"), |
148 | 0 | m->version); |
149 | | |
150 | 0 | hash_version = m->data[MIDX_BYTE_HASH_VERSION]; |
151 | 0 | if (hash_version != oid_version(r->hash_algo)) { |
152 | 0 | error(_("multi-pack-index hash version %u does not match version %u"), |
153 | 0 | hash_version, oid_version(r->hash_algo)); |
154 | 0 | goto cleanup_fail; |
155 | 0 | } |
156 | 0 | m->hash_len = r->hash_algo->rawsz; |
157 | |
|
158 | 0 | m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; |
159 | |
|
160 | 0 | m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); |
161 | |
|
162 | 0 | m->preferred_pack_idx = -1; |
163 | |
|
164 | 0 | cf = init_chunkfile(NULL); |
165 | |
|
166 | 0 | if (read_table_of_contents(cf, m->data, midx_size, |
167 | 0 | MIDX_HEADER_SIZE, m->num_chunks, |
168 | 0 | MIDX_CHUNK_ALIGNMENT)) |
169 | 0 | goto cleanup_fail; |
170 | | |
171 | 0 | if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) |
172 | 0 | die(_("multi-pack-index required pack-name chunk missing or corrupted")); |
173 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) |
174 | 0 | die(_("multi-pack-index required OID fanout chunk missing or corrupted")); |
175 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m)) |
176 | 0 | die(_("multi-pack-index required OID lookup chunk missing or corrupted")); |
177 | 0 | if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) |
178 | 0 | die(_("multi-pack-index required object offsets chunk missing or corrupted")); |
179 | | |
180 | 0 | pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, |
181 | 0 | &m->chunk_large_offsets_len); |
182 | 0 | if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1)) |
183 | 0 | pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, |
184 | 0 | (const unsigned char **)&m->chunk_bitmapped_packs, |
185 | 0 | &m->chunk_bitmapped_packs_len); |
186 | |
|
187 | 0 | if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) |
188 | 0 | pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, |
189 | 0 | &m->chunk_revindex_len); |
190 | |
|
191 | 0 | CALLOC_ARRAY(m->pack_names, m->num_packs); |
192 | 0 | CALLOC_ARRAY(m->packs, m->num_packs); |
193 | |
|
194 | 0 | cur_pack_name = (const char *)m->chunk_pack_names; |
195 | 0 | for (i = 0; i < m->num_packs; i++) { |
196 | 0 | const char *end; |
197 | 0 | size_t avail = m->chunk_pack_names_len - |
198 | 0 | (cur_pack_name - (const char *)m->chunk_pack_names); |
199 | |
|
200 | 0 | m->pack_names[i] = cur_pack_name; |
201 | |
|
202 | 0 | end = memchr(cur_pack_name, '\0', avail); |
203 | 0 | if (!end) |
204 | 0 | die(_("multi-pack-index pack-name chunk is too short")); |
205 | 0 | cur_pack_name = end + 1; |
206 | |
|
207 | 0 | if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) |
208 | 0 | die(_("multi-pack-index pack names out of order: '%s' before '%s'"), |
209 | 0 | m->pack_names[i - 1], |
210 | 0 | m->pack_names[i]); |
211 | 0 | } |
212 | | |
213 | 0 | trace2_data_intmax("midx", r, "load/num_packs", m->num_packs); |
214 | 0 | trace2_data_intmax("midx", r, "load/num_objects", m->num_objects); |
215 | |
|
216 | 0 | free_chunkfile(cf); |
217 | 0 | return m; |
218 | | |
219 | 0 | cleanup_fail: |
220 | 0 | free(m); |
221 | 0 | free_chunkfile(cf); |
222 | 0 | if (midx_map) |
223 | 0 | munmap(midx_map, midx_size); |
224 | 0 | if (0 <= fd) |
225 | 0 | close(fd); |
226 | 0 | return NULL; |
227 | 0 | } |
228 | | |
229 | | void get_midx_chain_dirname(struct odb_source *source, struct strbuf *buf) |
230 | 0 | { |
231 | 0 | strbuf_addf(buf, "%s/pack/multi-pack-index.d", source->path); |
232 | 0 | } |
233 | | |
234 | | void get_midx_chain_filename(struct odb_source *source, struct strbuf *buf) |
235 | 0 | { |
236 | 0 | get_midx_chain_dirname(source, buf); |
237 | 0 | strbuf_addstr(buf, "/multi-pack-index-chain"); |
238 | 0 | } |
239 | | |
240 | | void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, |
241 | | const unsigned char *hash, const char *ext) |
242 | 0 | { |
243 | 0 | get_midx_chain_dirname(source, buf); |
244 | 0 | strbuf_addf(buf, "/multi-pack-index-%s.%s", |
245 | 0 | hash_to_hex_algop(hash, source->odb->repo->hash_algo), ext); |
246 | 0 | } |
247 | | |
248 | | static int open_multi_pack_index_chain(const struct git_hash_algo *hash_algo, |
249 | | const char *chain_file, int *fd, |
250 | | struct stat *st) |
251 | 0 | { |
252 | 0 | *fd = git_open(chain_file); |
253 | 0 | if (*fd < 0) |
254 | 0 | return 0; |
255 | 0 | if (fstat(*fd, st)) { |
256 | 0 | close(*fd); |
257 | 0 | return 0; |
258 | 0 | } |
259 | 0 | if (st->st_size < hash_algo->hexsz) { |
260 | 0 | close(*fd); |
261 | 0 | if (!st->st_size) { |
262 | | /* treat empty files the same as missing */ |
263 | 0 | errno = ENOENT; |
264 | 0 | } else { |
265 | 0 | warning(_("multi-pack-index chain file too small")); |
266 | 0 | errno = EINVAL; |
267 | 0 | } |
268 | 0 | return 0; |
269 | 0 | } |
270 | 0 | return 1; |
271 | 0 | } |
272 | | |
273 | | static int add_midx_to_chain(struct multi_pack_index *midx, |
274 | | struct multi_pack_index *midx_chain) |
275 | 0 | { |
276 | 0 | if (midx_chain) { |
277 | 0 | if (unsigned_add_overflows(midx_chain->num_packs, |
278 | 0 | midx_chain->num_packs_in_base)) { |
279 | 0 | warning(_("pack count in base MIDX too high: %"PRIuMAX), |
280 | 0 | (uintmax_t)midx_chain->num_packs_in_base); |
281 | 0 | return 0; |
282 | 0 | } |
283 | 0 | if (unsigned_add_overflows(midx_chain->num_objects, |
284 | 0 | midx_chain->num_objects_in_base)) { |
285 | 0 | warning(_("object count in base MIDX too high: %"PRIuMAX), |
286 | 0 | (uintmax_t)midx_chain->num_objects_in_base); |
287 | 0 | return 0; |
288 | 0 | } |
289 | 0 | midx->num_packs_in_base = midx_chain->num_packs + |
290 | 0 | midx_chain->num_packs_in_base; |
291 | 0 | midx->num_objects_in_base = midx_chain->num_objects + |
292 | 0 | midx_chain->num_objects_in_base; |
293 | 0 | } |
294 | | |
295 | 0 | midx->base_midx = midx_chain; |
296 | 0 | midx->has_chain = 1; |
297 | |
|
298 | 0 | return 1; |
299 | 0 | } |
300 | | |
301 | | static struct multi_pack_index *load_midx_chain_fd_st(struct odb_source *source, |
302 | | int fd, struct stat *st, |
303 | | int *incomplete_chain) |
304 | 0 | { |
305 | 0 | const struct git_hash_algo *hash_algo = source->odb->repo->hash_algo; |
306 | 0 | struct multi_pack_index *midx_chain = NULL; |
307 | 0 | struct strbuf buf = STRBUF_INIT; |
308 | 0 | int valid = 1; |
309 | 0 | uint32_t i, count; |
310 | 0 | FILE *fp = xfdopen(fd, "r"); |
311 | |
|
312 | 0 | count = st->st_size / (hash_algo->hexsz + 1); |
313 | |
|
314 | 0 | for (i = 0; i < count; i++) { |
315 | 0 | struct multi_pack_index *m; |
316 | 0 | struct object_id layer; |
317 | |
|
318 | 0 | if (strbuf_getline_lf(&buf, fp) == EOF) |
319 | 0 | break; |
320 | | |
321 | 0 | if (get_oid_hex_algop(buf.buf, &layer, hash_algo)) { |
322 | 0 | warning(_("invalid multi-pack-index chain: line '%s' " |
323 | 0 | "not a hash"), |
324 | 0 | buf.buf); |
325 | 0 | valid = 0; |
326 | 0 | break; |
327 | 0 | } |
328 | | |
329 | 0 | valid = 0; |
330 | |
|
331 | 0 | strbuf_reset(&buf); |
332 | 0 | get_split_midx_filename_ext(source, &buf, |
333 | 0 | layer.hash, MIDX_EXT_MIDX); |
334 | 0 | m = load_multi_pack_index_one(source, buf.buf); |
335 | |
|
336 | 0 | if (m) { |
337 | 0 | if (add_midx_to_chain(m, midx_chain)) { |
338 | 0 | midx_chain = m; |
339 | 0 | valid = 1; |
340 | 0 | } else { |
341 | 0 | close_midx(m); |
342 | 0 | } |
343 | 0 | } |
344 | 0 | if (!valid) { |
345 | 0 | warning(_("unable to find all multi-pack index files")); |
346 | 0 | break; |
347 | 0 | } |
348 | 0 | } |
349 | |
|
350 | 0 | fclose(fp); |
351 | 0 | strbuf_release(&buf); |
352 | |
|
353 | 0 | *incomplete_chain = !valid; |
354 | 0 | return midx_chain; |
355 | 0 | } |
356 | | |
357 | | static struct multi_pack_index *load_multi_pack_index_chain(struct odb_source *source) |
358 | 0 | { |
359 | 0 | struct strbuf chain_file = STRBUF_INIT; |
360 | 0 | struct stat st; |
361 | 0 | int fd; |
362 | 0 | struct multi_pack_index *m = NULL; |
363 | |
|
364 | 0 | get_midx_chain_filename(source, &chain_file); |
365 | 0 | if (open_multi_pack_index_chain(source->odb->repo->hash_algo, chain_file.buf, &fd, &st)) { |
366 | 0 | int incomplete; |
367 | | /* ownership of fd is taken over by load function */ |
368 | 0 | m = load_midx_chain_fd_st(source, fd, &st, &incomplete); |
369 | 0 | } |
370 | |
|
371 | 0 | strbuf_release(&chain_file); |
372 | 0 | return m; |
373 | 0 | } |
374 | | |
375 | | struct multi_pack_index *load_multi_pack_index(struct odb_source *source) |
376 | 0 | { |
377 | 0 | struct strbuf midx_name = STRBUF_INIT; |
378 | 0 | struct multi_pack_index *m; |
379 | |
|
380 | 0 | get_midx_filename(source, &midx_name); |
381 | |
|
382 | 0 | m = load_multi_pack_index_one(source, midx_name.buf); |
383 | 0 | if (!m) |
384 | 0 | m = load_multi_pack_index_chain(source); |
385 | |
|
386 | 0 | strbuf_release(&midx_name); |
387 | |
|
388 | 0 | return m; |
389 | 0 | } |
390 | | |
391 | | void close_midx(struct multi_pack_index *m) |
392 | 0 | { |
393 | 0 | uint32_t i; |
394 | |
|
395 | 0 | if (!m) |
396 | 0 | return; |
397 | | |
398 | 0 | close_midx(m->base_midx); |
399 | |
|
400 | 0 | munmap((unsigned char *)m->data, m->data_len); |
401 | |
|
402 | 0 | for (i = 0; i < m->num_packs; i++) { |
403 | 0 | if (m->packs[i] && m->packs[i] != MIDX_PACK_ERROR) |
404 | 0 | m->packs[i]->multi_pack_index = 0; |
405 | 0 | } |
406 | 0 | FREE_AND_NULL(m->packs); |
407 | 0 | FREE_AND_NULL(m->pack_names); |
408 | 0 | free(m); |
409 | 0 | } |
410 | | |
411 | | static uint32_t midx_for_object(struct multi_pack_index **_m, uint32_t pos) |
412 | 0 | { |
413 | 0 | struct multi_pack_index *m = *_m; |
414 | 0 | while (m && pos < m->num_objects_in_base) |
415 | 0 | m = m->base_midx; |
416 | |
|
417 | 0 | if (!m) |
418 | 0 | BUG("NULL multi-pack-index for object position: %"PRIu32, pos); |
419 | | |
420 | 0 | if (pos >= m->num_objects + m->num_objects_in_base) |
421 | 0 | die(_("invalid MIDX object position, MIDX is likely corrupt")); |
422 | | |
423 | 0 | *_m = m; |
424 | |
|
425 | 0 | return pos - m->num_objects_in_base; |
426 | 0 | } |
427 | | |
428 | | static uint32_t midx_for_pack(struct multi_pack_index **_m, |
429 | | uint32_t pack_int_id) |
430 | 0 | { |
431 | 0 | struct multi_pack_index *m = *_m; |
432 | 0 | while (m && pack_int_id < m->num_packs_in_base) |
433 | 0 | m = m->base_midx; |
434 | |
|
435 | 0 | if (!m) |
436 | 0 | BUG("NULL multi-pack-index for pack ID: %"PRIu32, pack_int_id); |
437 | | |
438 | 0 | if (pack_int_id >= m->num_packs + m->num_packs_in_base) |
439 | 0 | die(_("bad pack-int-id: %u (%u total packs)"), |
440 | 0 | pack_int_id, m->num_packs + m->num_packs_in_base); |
441 | | |
442 | 0 | *_m = m; |
443 | |
|
444 | 0 | return pack_int_id - m->num_packs_in_base; |
445 | 0 | } |
446 | | |
447 | | int prepare_midx_pack(struct multi_pack_index *m, |
448 | | uint32_t pack_int_id) |
449 | 0 | { |
450 | 0 | struct strbuf pack_name = STRBUF_INIT; |
451 | 0 | struct packed_git *p; |
452 | |
|
453 | 0 | pack_int_id = midx_for_pack(&m, pack_int_id); |
454 | |
|
455 | 0 | if (m->packs[pack_int_id] == MIDX_PACK_ERROR) |
456 | 0 | return 1; |
457 | 0 | if (m->packs[pack_int_id]) |
458 | 0 | return 0; |
459 | | |
460 | 0 | strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, |
461 | 0 | m->pack_names[pack_int_id]); |
462 | 0 | p = packfile_store_load_pack(m->source->packfiles, |
463 | 0 | pack_name.buf, m->source->local); |
464 | 0 | strbuf_release(&pack_name); |
465 | |
|
466 | 0 | if (!p) { |
467 | 0 | m->packs[pack_int_id] = MIDX_PACK_ERROR; |
468 | 0 | return 1; |
469 | 0 | } |
470 | | |
471 | 0 | p->multi_pack_index = 1; |
472 | 0 | m->packs[pack_int_id] = p; |
473 | |
|
474 | 0 | return 0; |
475 | 0 | } |
476 | | |
477 | | struct packed_git *nth_midxed_pack(struct multi_pack_index *m, |
478 | | uint32_t pack_int_id) |
479 | 0 | { |
480 | 0 | uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); |
481 | 0 | if (m->packs[local_pack_int_id] == MIDX_PACK_ERROR) |
482 | 0 | return NULL; |
483 | 0 | return m->packs[local_pack_int_id]; |
484 | 0 | } |
485 | | |
486 | 0 | #define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t)) |
487 | | |
488 | | int nth_bitmapped_pack(struct multi_pack_index *m, |
489 | | struct bitmapped_pack *bp, uint32_t pack_int_id) |
490 | 0 | { |
491 | 0 | uint32_t local_pack_int_id = midx_for_pack(&m, pack_int_id); |
492 | |
|
493 | 0 | if (!m->chunk_bitmapped_packs) |
494 | 0 | return error(_("MIDX does not contain the BTMP chunk")); |
495 | | |
496 | 0 | if (prepare_midx_pack(m, pack_int_id)) |
497 | 0 | return error(_("could not load bitmapped pack %"PRIu32), pack_int_id); |
498 | | |
499 | 0 | bp->p = m->packs[local_pack_int_id]; |
500 | 0 | bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs + |
501 | 0 | MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id); |
502 | 0 | bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs + |
503 | 0 | MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id + |
504 | 0 | sizeof(uint32_t)); |
505 | 0 | bp->pack_int_id = pack_int_id; |
506 | 0 | bp->from_midx = m; |
507 | |
|
508 | 0 | return 0; |
509 | 0 | } |
510 | | |
511 | | int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m, |
512 | | uint32_t *result) |
513 | 0 | { |
514 | 0 | int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout, |
515 | 0 | m->chunk_oid_lookup, |
516 | 0 | m->source->odb->repo->hash_algo->rawsz, |
517 | 0 | result); |
518 | 0 | if (result) |
519 | 0 | *result += m->num_objects_in_base; |
520 | 0 | return ret; |
521 | 0 | } |
522 | | |
523 | | int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, |
524 | | uint32_t *result) |
525 | 0 | { |
526 | 0 | for (; m; m = m->base_midx) |
527 | 0 | if (bsearch_one_midx(oid, m, result)) |
528 | 0 | return 1; |
529 | 0 | return 0; |
530 | 0 | } |
531 | | |
532 | | int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid) |
533 | 0 | { |
534 | 0 | return bsearch_midx(oid, m, NULL); |
535 | 0 | } |
536 | | |
537 | | struct object_id *nth_midxed_object_oid(struct object_id *oid, |
538 | | struct multi_pack_index *m, |
539 | | uint32_t n) |
540 | 0 | { |
541 | 0 | if (n >= m->num_objects + m->num_objects_in_base) |
542 | 0 | return NULL; |
543 | | |
544 | 0 | n = midx_for_object(&m, n); |
545 | |
|
546 | 0 | oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n), |
547 | 0 | m->source->odb->repo->hash_algo); |
548 | 0 | return oid; |
549 | 0 | } |
550 | | |
551 | | off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) |
552 | 0 | { |
553 | 0 | const unsigned char *offset_data; |
554 | 0 | uint32_t offset32; |
555 | |
|
556 | 0 | pos = midx_for_object(&m, pos); |
557 | |
|
558 | 0 | offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH; |
559 | 0 | offset32 = get_be32(offset_data + sizeof(uint32_t)); |
560 | |
|
561 | 0 | if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { |
562 | 0 | if (sizeof(off_t) < sizeof(uint64_t)) |
563 | 0 | die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); |
564 | | |
565 | 0 | offset32 ^= MIDX_LARGE_OFFSET_NEEDED; |
566 | 0 | if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t)) |
567 | 0 | die(_("multi-pack-index large offset out of bounds")); |
568 | 0 | return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); |
569 | 0 | } |
570 | | |
571 | 0 | return offset32; |
572 | 0 | } |
573 | | |
574 | | uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) |
575 | 0 | { |
576 | 0 | pos = midx_for_object(&m, pos); |
577 | |
|
578 | 0 | return m->num_packs_in_base + get_be32(m->chunk_object_offsets + |
579 | 0 | (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); |
580 | 0 | } |
581 | | |
582 | | int fill_midx_entry(struct multi_pack_index *m, |
583 | | const struct object_id *oid, |
584 | | struct pack_entry *e) |
585 | 0 | { |
586 | 0 | uint32_t pos; |
587 | 0 | uint32_t pack_int_id; |
588 | 0 | struct packed_git *p; |
589 | |
|
590 | 0 | if (!bsearch_midx(oid, m, &pos)) |
591 | 0 | return 0; |
592 | | |
593 | 0 | midx_for_object(&m, pos); |
594 | 0 | pack_int_id = nth_midxed_pack_int_id(m, pos); |
595 | |
|
596 | 0 | if (prepare_midx_pack(m, pack_int_id)) |
597 | 0 | return 0; |
598 | 0 | p = m->packs[pack_int_id - m->num_packs_in_base]; |
599 | | |
600 | | /* |
601 | | * We are about to tell the caller where they can locate the |
602 | | * requested object. We better make sure the packfile is |
603 | | * still here and can be accessed before supplying that |
604 | | * answer, as it may have been deleted since the MIDX was |
605 | | * loaded! |
606 | | */ |
607 | 0 | if (!is_pack_valid(p)) |
608 | 0 | return 0; |
609 | | |
610 | 0 | if (oidset_size(&p->bad_objects) && |
611 | 0 | oidset_contains(&p->bad_objects, oid)) |
612 | 0 | return 0; |
613 | | |
614 | 0 | e->offset = nth_midxed_offset(m, pos); |
615 | 0 | e->p = p; |
616 | |
|
617 | 0 | return 1; |
618 | 0 | } |
619 | | |
620 | | /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */ |
621 | | int cmp_idx_or_pack_name(const char *idx_or_pack_name, |
622 | | const char *idx_name) |
623 | 0 | { |
624 | | /* Skip past any initial matching prefix. */ |
625 | 0 | while (*idx_name && *idx_name == *idx_or_pack_name) { |
626 | 0 | idx_name++; |
627 | 0 | idx_or_pack_name++; |
628 | 0 | } |
629 | | |
630 | | /* |
631 | | * If we didn't match completely, we may have matched "pack-1234." and |
632 | | * be left with "idx" and "pack" respectively, which is also OK. We do |
633 | | * not have to check for "idx" and "idx", because that would have been |
634 | | * a complete match (and in that case these strcmps will be false, but |
635 | | * we'll correctly return 0 from the final strcmp() below. |
636 | | * |
637 | | * Technically this matches "fooidx" and "foopack", but we'd never have |
638 | | * such names in the first place. |
639 | | */ |
640 | 0 | if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack")) |
641 | 0 | return 0; |
642 | | |
643 | | /* |
644 | | * This not only checks for a complete match, but also orders based on |
645 | | * the first non-identical character, which means our ordering will |
646 | | * match a raw strcmp(). That makes it OK to use this to binary search |
647 | | * a naively-sorted list. |
648 | | */ |
649 | 0 | return strcmp(idx_or_pack_name, idx_name); |
650 | 0 | } |
651 | | |
652 | | static int midx_contains_pack_1(struct multi_pack_index *m, |
653 | | const char *idx_or_pack_name) |
654 | 0 | { |
655 | 0 | uint32_t first = 0, last = m->num_packs; |
656 | |
|
657 | 0 | while (first < last) { |
658 | 0 | uint32_t mid = first + (last - first) / 2; |
659 | 0 | const char *current; |
660 | 0 | int cmp; |
661 | |
|
662 | 0 | current = m->pack_names[mid]; |
663 | 0 | cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); |
664 | 0 | if (!cmp) |
665 | 0 | return 1; |
666 | 0 | if (cmp > 0) { |
667 | 0 | first = mid + 1; |
668 | 0 | continue; |
669 | 0 | } |
670 | 0 | last = mid; |
671 | 0 | } |
672 | | |
673 | 0 | return 0; |
674 | 0 | } |
675 | | |
676 | | int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) |
677 | 0 | { |
678 | 0 | for (; m; m = m->base_midx) |
679 | 0 | if (midx_contains_pack_1(m, idx_or_pack_name)) |
680 | 0 | return 1; |
681 | 0 | return 0; |
682 | 0 | } |
683 | | |
684 | | int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) |
685 | 0 | { |
686 | 0 | if (m->preferred_pack_idx == -1) { |
687 | 0 | uint32_t midx_pos; |
688 | 0 | if (load_midx_revindex(m)) { |
689 | 0 | m->preferred_pack_idx = -2; |
690 | 0 | return -1; |
691 | 0 | } |
692 | | |
693 | 0 | midx_pos = pack_pos_to_midx(m, m->num_objects_in_base); |
694 | |
|
695 | 0 | m->preferred_pack_idx = nth_midxed_pack_int_id(m, midx_pos); |
696 | |
|
697 | 0 | } else if (m->preferred_pack_idx == -2) |
698 | 0 | return -1; /* no revindex */ |
699 | | |
700 | 0 | *pack_int_id = m->preferred_pack_idx; |
701 | 0 | return 0; |
702 | 0 | } |
703 | | |
704 | | int prepare_multi_pack_index_one(struct odb_source *source) |
705 | 0 | { |
706 | 0 | struct repository *r = source->odb->repo; |
707 | |
|
708 | 0 | prepare_repo_settings(r); |
709 | 0 | if (!r->settings.core_multi_pack_index) |
710 | 0 | return 0; |
711 | | |
712 | 0 | if (source->packfiles->midx) |
713 | 0 | return 1; |
714 | | |
715 | 0 | source->packfiles->midx = load_multi_pack_index(source); |
716 | |
|
717 | 0 | return !!source->packfiles->midx; |
718 | 0 | } |
719 | | |
720 | | int midx_checksum_valid(struct multi_pack_index *m) |
721 | 0 | { |
722 | 0 | return hashfile_checksum_valid(m->source->odb->repo->hash_algo, |
723 | 0 | m->data, m->data_len); |
724 | 0 | } |
725 | | |
726 | | struct clear_midx_data { |
727 | | char **keep; |
728 | | uint32_t keep_nr; |
729 | | const char *ext; |
730 | | }; |
731 | | |
732 | | static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED, |
733 | | const char *file_name, void *_data) |
734 | 0 | { |
735 | 0 | struct clear_midx_data *data = _data; |
736 | 0 | uint32_t i; |
737 | |
|
738 | 0 | if (!(starts_with(file_name, "multi-pack-index-") && |
739 | 0 | ends_with(file_name, data->ext))) |
740 | 0 | return; |
741 | 0 | for (i = 0; i < data->keep_nr; i++) { |
742 | 0 | if (!strcmp(data->keep[i], file_name)) |
743 | 0 | return; |
744 | 0 | } |
745 | 0 | if (unlink(full_path)) |
746 | 0 | die_errno(_("failed to remove %s"), full_path); |
747 | 0 | } |
748 | | |
749 | | void clear_midx_files_ext(struct odb_source *source, const char *ext, |
750 | | const char *keep_hash) |
751 | 0 | { |
752 | 0 | struct clear_midx_data data; |
753 | 0 | memset(&data, 0, sizeof(struct clear_midx_data)); |
754 | |
|
755 | 0 | if (keep_hash) { |
756 | 0 | ALLOC_ARRAY(data.keep, 1); |
757 | |
|
758 | 0 | data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); |
759 | 0 | data.keep_nr = 1; |
760 | 0 | } |
761 | 0 | data.ext = ext; |
762 | |
|
763 | 0 | for_each_file_in_pack_dir(source->path, |
764 | 0 | clear_midx_file_ext, |
765 | 0 | &data); |
766 | |
|
767 | 0 | if (keep_hash) |
768 | 0 | free(data.keep[0]); |
769 | 0 | free(data.keep); |
770 | 0 | } |
771 | | |
772 | | void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, |
773 | | char **keep_hashes, |
774 | | uint32_t hashes_nr) |
775 | 0 | { |
776 | 0 | struct clear_midx_data data; |
777 | 0 | uint32_t i; |
778 | |
|
779 | 0 | memset(&data, 0, sizeof(struct clear_midx_data)); |
780 | |
|
781 | 0 | ALLOC_ARRAY(data.keep, hashes_nr); |
782 | 0 | for (i = 0; i < hashes_nr; i++) |
783 | 0 | data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], |
784 | 0 | ext); |
785 | 0 | data.keep_nr = hashes_nr; |
786 | 0 | data.ext = ext; |
787 | |
|
788 | 0 | for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", |
789 | 0 | clear_midx_file_ext, &data); |
790 | |
|
791 | 0 | for (i = 0; i < hashes_nr; i++) |
792 | 0 | free(data.keep[i]); |
793 | 0 | free(data.keep); |
794 | 0 | } |
795 | | |
796 | | void clear_midx_file(struct repository *r) |
797 | 0 | { |
798 | 0 | struct strbuf midx = STRBUF_INIT; |
799 | |
|
800 | 0 | get_midx_filename(r->objects->sources, &midx); |
801 | |
|
802 | 0 | if (r->objects) { |
803 | 0 | struct odb_source *source; |
804 | |
|
805 | 0 | for (source = r->objects->sources; source; source = source->next) { |
806 | 0 | if (source->packfiles->midx) |
807 | 0 | close_midx(source->packfiles->midx); |
808 | 0 | source->packfiles->midx = NULL; |
809 | 0 | } |
810 | 0 | } |
811 | |
|
812 | 0 | if (remove_path(midx.buf)) |
813 | 0 | die(_("failed to clear multi-pack-index at %s"), midx.buf); |
814 | | |
815 | 0 | clear_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, NULL); |
816 | 0 | clear_midx_files_ext(r->objects->sources, MIDX_EXT_REV, NULL); |
817 | |
|
818 | 0 | strbuf_release(&midx); |
819 | 0 | } |
820 | | |
821 | | static int verify_midx_error; |
822 | | |
823 | | __attribute__((format (printf, 1, 2))) |
824 | | static void midx_report(const char *fmt, ...) |
825 | 0 | { |
826 | 0 | va_list ap; |
827 | 0 | verify_midx_error = 1; |
828 | 0 | va_start(ap, fmt); |
829 | 0 | vfprintf(stderr, fmt, ap); |
830 | 0 | fprintf(stderr, "\n"); |
831 | 0 | va_end(ap); |
832 | 0 | } |
833 | | |
834 | | struct pair_pos_vs_id |
835 | | { |
836 | | uint32_t pos; |
837 | | uint32_t pack_int_id; |
838 | | }; |
839 | | |
840 | | static int compare_pair_pos_vs_id(const void *_a, const void *_b) |
841 | 0 | { |
842 | 0 | struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; |
843 | 0 | struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; |
844 | |
|
845 | 0 | return b->pack_int_id - a->pack_int_id; |
846 | 0 | } |
847 | | |
848 | | /* |
849 | | * Limit calls to display_progress() for performance reasons. |
850 | | * The interval here was arbitrarily chosen. |
851 | | */ |
852 | 0 | #define SPARSE_PROGRESS_INTERVAL (1 << 12) |
853 | | #define midx_display_sparse_progress(progress, n) \ |
854 | 0 | do { \ |
855 | 0 | uint64_t _n = (n); \ |
856 | 0 | if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ |
857 | 0 | display_progress(progress, _n); \ |
858 | 0 | } while (0) |
859 | | |
860 | | int verify_midx_file(struct odb_source *source, unsigned flags) |
861 | 0 | { |
862 | 0 | struct repository *r = source->odb->repo; |
863 | 0 | struct pair_pos_vs_id *pairs = NULL; |
864 | 0 | uint32_t i; |
865 | 0 | struct progress *progress = NULL; |
866 | 0 | struct multi_pack_index *m = load_multi_pack_index(source); |
867 | 0 | struct multi_pack_index *curr; |
868 | 0 | verify_midx_error = 0; |
869 | |
|
870 | 0 | if (!m) { |
871 | 0 | int result = 0; |
872 | 0 | struct stat sb; |
873 | 0 | struct strbuf filename = STRBUF_INIT; |
874 | |
|
875 | 0 | get_midx_filename(source, &filename); |
876 | |
|
877 | 0 | if (!stat(filename.buf, &sb)) { |
878 | 0 | error(_("multi-pack-index file exists, but failed to parse")); |
879 | 0 | result = 1; |
880 | 0 | } |
881 | 0 | strbuf_release(&filename); |
882 | 0 | return result; |
883 | 0 | } |
884 | | |
885 | 0 | if (!midx_checksum_valid(m)) |
886 | 0 | midx_report(_("incorrect checksum")); |
887 | |
|
888 | 0 | if (flags & MIDX_PROGRESS) |
889 | 0 | progress = start_delayed_progress(r, |
890 | 0 | _("Looking for referenced packfiles"), |
891 | 0 | m->num_packs + m->num_packs_in_base); |
892 | 0 | for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { |
893 | 0 | if (prepare_midx_pack(m, i)) |
894 | 0 | midx_report("failed to load pack in position %d", i); |
895 | |
|
896 | 0 | display_progress(progress, i + 1); |
897 | 0 | } |
898 | 0 | stop_progress(&progress); |
899 | |
|
900 | 0 | if (m->num_objects == 0) { |
901 | 0 | midx_report(_("the midx contains no oid")); |
902 | | /* |
903 | | * Remaining tests assume that we have objects, so we can |
904 | | * return here. |
905 | | */ |
906 | 0 | goto cleanup; |
907 | 0 | } |
908 | | |
909 | 0 | if (flags & MIDX_PROGRESS) |
910 | 0 | progress = start_sparse_progress(r, |
911 | 0 | _("Verifying OID order in multi-pack-index"), |
912 | 0 | m->num_objects - 1); |
913 | |
|
914 | 0 | for (curr = m; curr; curr = curr->base_midx) { |
915 | 0 | for (i = 0; i < m->num_objects - 1; i++) { |
916 | 0 | struct object_id oid1, oid2; |
917 | |
|
918 | 0 | nth_midxed_object_oid(&oid1, m, m->num_objects_in_base + i); |
919 | 0 | nth_midxed_object_oid(&oid2, m, m->num_objects_in_base + i + 1); |
920 | |
|
921 | 0 | if (oidcmp(&oid1, &oid2) >= 0) |
922 | 0 | midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), |
923 | 0 | i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); |
924 | |
|
925 | 0 | midx_display_sparse_progress(progress, i + 1); |
926 | 0 | } |
927 | 0 | } |
928 | 0 | stop_progress(&progress); |
929 | | |
930 | | /* |
931 | | * Create an array mapping each object to its packfile id. Sort it |
932 | | * to group the objects by packfile. Use this permutation to visit |
933 | | * each of the objects and only require 1 packfile to be open at a |
934 | | * time. |
935 | | */ |
936 | 0 | ALLOC_ARRAY(pairs, m->num_objects + m->num_objects_in_base); |
937 | 0 | for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { |
938 | 0 | pairs[i].pos = i; |
939 | 0 | pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); |
940 | 0 | } |
941 | |
|
942 | 0 | if (flags & MIDX_PROGRESS) |
943 | 0 | progress = start_sparse_progress(r, |
944 | 0 | _("Sorting objects by packfile"), |
945 | 0 | m->num_objects); |
946 | 0 | display_progress(progress, 0); /* TODO: Measure QSORT() progress */ |
947 | 0 | QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); |
948 | 0 | stop_progress(&progress); |
949 | |
|
950 | 0 | if (flags & MIDX_PROGRESS) |
951 | 0 | progress = start_sparse_progress(r, |
952 | 0 | _("Verifying object offsets"), |
953 | 0 | m->num_objects); |
954 | 0 | for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { |
955 | 0 | struct object_id oid; |
956 | 0 | struct pack_entry e; |
957 | 0 | off_t m_offset, p_offset; |
958 | |
|
959 | 0 | if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && |
960 | 0 | nth_midxed_pack(m, pairs[i-1].pack_int_id)) { |
961 | 0 | uint32_t pack_int_id = pairs[i-1].pack_int_id; |
962 | 0 | struct packed_git *p = nth_midxed_pack(m, pack_int_id); |
963 | |
|
964 | 0 | close_pack_fd(p); |
965 | 0 | close_pack_index(p); |
966 | 0 | } |
967 | |
|
968 | 0 | nth_midxed_object_oid(&oid, m, pairs[i].pos); |
969 | |
|
970 | 0 | if (!fill_midx_entry(m, &oid, &e)) { |
971 | 0 | midx_report(_("failed to load pack entry for oid[%d] = %s"), |
972 | 0 | pairs[i].pos, oid_to_hex(&oid)); |
973 | 0 | continue; |
974 | 0 | } |
975 | | |
976 | 0 | if (open_pack_index(e.p)) { |
977 | 0 | midx_report(_("failed to load pack-index for packfile %s"), |
978 | 0 | e.p->pack_name); |
979 | 0 | break; |
980 | 0 | } |
981 | | |
982 | 0 | m_offset = e.offset; |
983 | 0 | p_offset = find_pack_entry_one(&oid, e.p); |
984 | |
|
985 | 0 | if (m_offset != p_offset) |
986 | 0 | midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), |
987 | 0 | pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); |
988 | |
|
989 | 0 | midx_display_sparse_progress(progress, i + 1); |
990 | 0 | } |
991 | 0 | stop_progress(&progress); |
992 | |
|
993 | 0 | cleanup: |
994 | 0 | free(pairs); |
995 | 0 | close_midx(m); |
996 | |
|
997 | 0 | return verify_midx_error; |
998 | 0 | } |