/src/git/builtin/index-pack.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "builtin.h" |
2 | | #include "config.h" |
3 | | #include "delta.h" |
4 | | #include "environment.h" |
5 | | #include "gettext.h" |
6 | | #include "hex.h" |
7 | | #include "pack.h" |
8 | | #include "csum-file.h" |
9 | | #include "blob.h" |
10 | | #include "commit.h" |
11 | | #include "tree.h" |
12 | | #include "progress.h" |
13 | | #include "fsck.h" |
14 | | #include "strbuf.h" |
15 | | #include "streaming.h" |
16 | | #include "thread-utils.h" |
17 | | #include "packfile.h" |
18 | | #include "pack-revindex.h" |
19 | | #include "object-file.h" |
20 | | #include "object-store-ll.h" |
21 | | #include "oid-array.h" |
22 | | #include "replace-object.h" |
23 | | #include "promisor-remote.h" |
24 | | #include "setup.h" |
25 | | |
26 | | static const char index_pack_usage[] = |
27 | | "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; |
28 | | |
29 | | struct object_entry { |
30 | | struct pack_idx_entry idx; |
31 | | unsigned long size; |
32 | | unsigned char hdr_size; |
33 | | signed char type; |
34 | | signed char real_type; |
35 | | }; |
36 | | |
37 | | struct object_stat { |
38 | | unsigned delta_depth; |
39 | | int base_object_no; |
40 | | }; |
41 | | |
42 | | struct base_data { |
43 | | /* Initialized by make_base(). */ |
44 | | struct base_data *base; |
45 | | struct object_entry *obj; |
46 | | int ref_first, ref_last; |
47 | | int ofs_first, ofs_last; |
48 | | /* |
49 | | * Threads should increment retain_data if they are about to call |
50 | | * patch_delta() using this struct's data as a base, and decrement this |
51 | | * when they are done. While retain_data is nonzero, this struct's data |
52 | | * will not be freed even if the delta base cache limit is exceeded. |
53 | | */ |
54 | | int retain_data; |
55 | | /* |
56 | | * The number of direct children that have not been fully processed |
57 | | * (entered work_head, entered done_head, left done_head). When this |
58 | | * number reaches zero, this struct base_data can be freed. |
59 | | */ |
60 | | int children_remaining; |
61 | | |
62 | | /* Not initialized by make_base(). */ |
63 | | struct list_head list; |
64 | | void *data; |
65 | | unsigned long size; |
66 | | }; |
67 | | |
68 | | /* |
69 | | * Stack of struct base_data that have unprocessed children. |
70 | | * threaded_second_pass() uses this as a source of work (the other being the |
71 | | * objects array). |
72 | | * |
73 | | * Guarded by work_mutex. |
74 | | */ |
75 | | static LIST_HEAD(work_head); |
76 | | |
77 | | /* |
78 | | * Stack of struct base_data that have children, all of whom have been |
79 | | * processed or are being processed, and at least one child is being processed. |
80 | | * These struct base_data must be kept around until the last child is |
81 | | * processed. |
82 | | * |
83 | | * Guarded by work_mutex. |
84 | | */ |
85 | | static LIST_HEAD(done_head); |
86 | | |
87 | | /* |
88 | | * All threads share one delta base cache. |
89 | | * |
90 | | * base_cache_used is guarded by work_mutex, and base_cache_limit is read-only |
91 | | * in a thread. |
92 | | */ |
93 | | static size_t base_cache_used; |
94 | | static size_t base_cache_limit; |
95 | | |
96 | | struct thread_local { |
97 | | pthread_t thread; |
98 | | int pack_fd; |
99 | | }; |
100 | | |
101 | | /* Remember to update object flag allocation in object.h */ |
102 | 0 | #define FLAG_LINK (1u<<20) |
103 | 0 | #define FLAG_CHECKED (1u<<21) |
104 | | |
105 | | struct ofs_delta_entry { |
106 | | off_t offset; |
107 | | int obj_no; |
108 | | }; |
109 | | |
110 | | struct ref_delta_entry { |
111 | | struct object_id oid; |
112 | | int obj_no; |
113 | | }; |
114 | | |
115 | | static struct object_entry *objects; |
116 | | static struct object_stat *obj_stat; |
117 | | static struct ofs_delta_entry *ofs_deltas; |
118 | | static struct ref_delta_entry *ref_deltas; |
119 | | static struct thread_local nothread_data; |
120 | | static int nr_objects; |
121 | | static int nr_ofs_deltas; |
122 | | static int nr_ref_deltas; |
123 | | static int ref_deltas_alloc; |
124 | | static int nr_resolved_deltas; |
125 | | static int nr_threads; |
126 | | |
127 | | static int from_stdin; |
128 | | static int strict; |
129 | | static int do_fsck_object; |
130 | | static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; |
131 | | static int verbose; |
132 | | static const char *progress_title; |
133 | | static int show_resolving_progress; |
134 | | static int show_stat; |
135 | | static int check_self_contained_and_connected; |
136 | | |
137 | | static struct progress *progress; |
138 | | |
139 | | /* We always read in 4kB chunks. */ |
140 | | static unsigned char input_buffer[4096]; |
141 | | static unsigned int input_offset, input_len; |
142 | | static off_t consumed_bytes; |
143 | | static off_t max_input_size; |
144 | | static unsigned deepest_delta; |
145 | | static git_hash_ctx input_ctx; |
146 | | static uint32_t input_crc32; |
147 | | static int input_fd, output_fd; |
148 | | static const char *curr_pack; |
149 | | |
150 | | static struct thread_local *thread_data; |
151 | | static int nr_dispatched; |
152 | | static int threads_active; |
153 | | |
154 | | static pthread_mutex_t read_mutex; |
155 | 0 | #define read_lock() lock_mutex(&read_mutex) |
156 | 0 | #define read_unlock() unlock_mutex(&read_mutex) |
157 | | |
158 | | static pthread_mutex_t counter_mutex; |
159 | 0 | #define counter_lock() lock_mutex(&counter_mutex) |
160 | 0 | #define counter_unlock() unlock_mutex(&counter_mutex) |
161 | | |
162 | | static pthread_mutex_t work_mutex; |
163 | 0 | #define work_lock() lock_mutex(&work_mutex) |
164 | 0 | #define work_unlock() unlock_mutex(&work_mutex) |
165 | | |
166 | | static pthread_mutex_t deepest_delta_mutex; |
167 | 0 | #define deepest_delta_lock() lock_mutex(&deepest_delta_mutex) |
168 | 0 | #define deepest_delta_unlock() unlock_mutex(&deepest_delta_mutex) |
169 | | |
170 | | static pthread_key_t key; |
171 | | |
172 | | static inline void lock_mutex(pthread_mutex_t *mutex) |
173 | 0 | { |
174 | 0 | if (threads_active) |
175 | 0 | pthread_mutex_lock(mutex); |
176 | 0 | } |
177 | | |
178 | | static inline void unlock_mutex(pthread_mutex_t *mutex) |
179 | 0 | { |
180 | 0 | if (threads_active) |
181 | 0 | pthread_mutex_unlock(mutex); |
182 | 0 | } |
183 | | |
184 | | /* |
185 | | * Mutex and conditional variable can't be statically-initialized on Windows. |
186 | | */ |
187 | | static void init_thread(void) |
188 | 0 | { |
189 | 0 | int i; |
190 | 0 | init_recursive_mutex(&read_mutex); |
191 | 0 | pthread_mutex_init(&counter_mutex, NULL); |
192 | 0 | pthread_mutex_init(&work_mutex, NULL); |
193 | 0 | if (show_stat) |
194 | 0 | pthread_mutex_init(&deepest_delta_mutex, NULL); |
195 | 0 | pthread_key_create(&key, NULL); |
196 | 0 | CALLOC_ARRAY(thread_data, nr_threads); |
197 | 0 | for (i = 0; i < nr_threads; i++) { |
198 | 0 | thread_data[i].pack_fd = xopen(curr_pack, O_RDONLY); |
199 | 0 | } |
200 | |
|
201 | 0 | threads_active = 1; |
202 | 0 | } |
203 | | |
204 | | static void cleanup_thread(void) |
205 | 0 | { |
206 | 0 | int i; |
207 | 0 | if (!threads_active) |
208 | 0 | return; |
209 | 0 | threads_active = 0; |
210 | 0 | pthread_mutex_destroy(&read_mutex); |
211 | 0 | pthread_mutex_destroy(&counter_mutex); |
212 | 0 | pthread_mutex_destroy(&work_mutex); |
213 | 0 | if (show_stat) |
214 | 0 | pthread_mutex_destroy(&deepest_delta_mutex); |
215 | 0 | for (i = 0; i < nr_threads; i++) |
216 | 0 | close(thread_data[i].pack_fd); |
217 | 0 | pthread_key_delete(key); |
218 | 0 | free(thread_data); |
219 | 0 | } |
220 | | |
221 | | static int mark_link(struct object *obj, enum object_type type, |
222 | | void *data UNUSED, |
223 | | struct fsck_options *options UNUSED) |
224 | 0 | { |
225 | 0 | if (!obj) |
226 | 0 | return -1; |
227 | | |
228 | 0 | if (type != OBJ_ANY && obj->type != type) |
229 | 0 | die(_("object type mismatch at %s"), oid_to_hex(&obj->oid)); |
230 | | |
231 | 0 | obj->flags |= FLAG_LINK; |
232 | 0 | return 0; |
233 | 0 | } |
234 | | |
235 | | /* The content of each linked object must have been checked |
236 | | or it must be already present in the object database */ |
237 | | static unsigned check_object(struct object *obj) |
238 | 0 | { |
239 | 0 | if (!obj) |
240 | 0 | return 0; |
241 | | |
242 | 0 | if (!(obj->flags & FLAG_LINK)) |
243 | 0 | return 0; |
244 | | |
245 | 0 | if (!(obj->flags & FLAG_CHECKED)) { |
246 | 0 | unsigned long size; |
247 | 0 | int type = oid_object_info(the_repository, &obj->oid, &size); |
248 | 0 | if (type <= 0) |
249 | 0 | die(_("did not receive expected object %s"), |
250 | 0 | oid_to_hex(&obj->oid)); |
251 | 0 | if (type != obj->type) |
252 | 0 | die(_("object %s: expected type %s, found %s"), |
253 | 0 | oid_to_hex(&obj->oid), |
254 | 0 | type_name(obj->type), type_name(type)); |
255 | 0 | obj->flags |= FLAG_CHECKED; |
256 | 0 | return 1; |
257 | 0 | } |
258 | | |
259 | 0 | return 0; |
260 | 0 | } |
261 | | |
262 | | static unsigned check_objects(void) |
263 | 0 | { |
264 | 0 | unsigned i, max, foreign_nr = 0; |
265 | |
|
266 | 0 | max = get_max_object_index(); |
267 | |
|
268 | 0 | if (verbose) |
269 | 0 | progress = start_delayed_progress(_("Checking objects"), max); |
270 | |
|
271 | 0 | for (i = 0; i < max; i++) { |
272 | 0 | foreign_nr += check_object(get_indexed_object(i)); |
273 | 0 | display_progress(progress, i + 1); |
274 | 0 | } |
275 | |
|
276 | 0 | stop_progress(&progress); |
277 | 0 | return foreign_nr; |
278 | 0 | } |
279 | | |
280 | | |
281 | | /* Discard current buffer used content. */ |
282 | | static void flush(void) |
283 | 0 | { |
284 | 0 | if (input_offset) { |
285 | 0 | if (output_fd >= 0) |
286 | 0 | write_or_die(output_fd, input_buffer, input_offset); |
287 | 0 | the_hash_algo->update_fn(&input_ctx, input_buffer, input_offset); |
288 | 0 | memmove(input_buffer, input_buffer + input_offset, input_len); |
289 | 0 | input_offset = 0; |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | | /* |
294 | | * Make sure at least "min" bytes are available in the buffer, and |
295 | | * return the pointer to the buffer. |
296 | | */ |
297 | | static void *fill(int min) |
298 | 0 | { |
299 | 0 | if (min <= input_len) |
300 | 0 | return input_buffer + input_offset; |
301 | 0 | if (min > sizeof(input_buffer)) |
302 | 0 | die(Q_("cannot fill %d byte", |
303 | 0 | "cannot fill %d bytes", |
304 | 0 | min), |
305 | 0 | min); |
306 | 0 | flush(); |
307 | 0 | do { |
308 | 0 | ssize_t ret = xread(input_fd, input_buffer + input_len, |
309 | 0 | sizeof(input_buffer) - input_len); |
310 | 0 | if (ret <= 0) { |
311 | 0 | if (!ret) |
312 | 0 | die(_("early EOF")); |
313 | 0 | die_errno(_("read error on input")); |
314 | 0 | } |
315 | 0 | input_len += ret; |
316 | 0 | if (from_stdin) |
317 | 0 | display_throughput(progress, consumed_bytes + input_len); |
318 | 0 | } while (input_len < min); |
319 | 0 | return input_buffer; |
320 | 0 | } |
321 | | |
322 | | static void use(int bytes) |
323 | 0 | { |
324 | 0 | if (bytes > input_len) |
325 | 0 | die(_("used more bytes than were available")); |
326 | 0 | input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes); |
327 | 0 | input_len -= bytes; |
328 | 0 | input_offset += bytes; |
329 | | |
330 | | /* make sure off_t is sufficiently large not to wrap */ |
331 | 0 | if (signed_add_overflows(consumed_bytes, bytes)) |
332 | 0 | die(_("pack too large for current definition of off_t")); |
333 | 0 | consumed_bytes += bytes; |
334 | 0 | if (max_input_size && consumed_bytes > max_input_size) { |
335 | 0 | struct strbuf size_limit = STRBUF_INIT; |
336 | 0 | strbuf_humanise_bytes(&size_limit, max_input_size); |
337 | 0 | die(_("pack exceeds maximum allowed size (%s)"), |
338 | 0 | size_limit.buf); |
339 | 0 | } |
340 | 0 | } |
341 | | |
342 | | static const char *open_pack_file(const char *pack_name) |
343 | 0 | { |
344 | 0 | if (from_stdin) { |
345 | 0 | input_fd = 0; |
346 | 0 | if (!pack_name) { |
347 | 0 | struct strbuf tmp_file = STRBUF_INIT; |
348 | 0 | output_fd = odb_mkstemp(&tmp_file, |
349 | 0 | "pack/tmp_pack_XXXXXX"); |
350 | 0 | pack_name = strbuf_detach(&tmp_file, NULL); |
351 | 0 | } else { |
352 | 0 | output_fd = xopen(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600); |
353 | 0 | } |
354 | 0 | nothread_data.pack_fd = output_fd; |
355 | 0 | } else { |
356 | 0 | input_fd = xopen(pack_name, O_RDONLY); |
357 | 0 | output_fd = -1; |
358 | 0 | nothread_data.pack_fd = input_fd; |
359 | 0 | } |
360 | 0 | the_hash_algo->init_fn(&input_ctx); |
361 | 0 | return pack_name; |
362 | 0 | } |
363 | | |
364 | | static void parse_pack_header(void) |
365 | 0 | { |
366 | 0 | struct pack_header *hdr = fill(sizeof(struct pack_header)); |
367 | | |
368 | | /* Header consistency check */ |
369 | 0 | if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) |
370 | 0 | die(_("pack signature mismatch")); |
371 | 0 | if (!pack_version_ok(hdr->hdr_version)) |
372 | 0 | die(_("pack version %"PRIu32" unsupported"), |
373 | 0 | ntohl(hdr->hdr_version)); |
374 | | |
375 | 0 | nr_objects = ntohl(hdr->hdr_entries); |
376 | 0 | use(sizeof(struct pack_header)); |
377 | 0 | } |
378 | | |
379 | | __attribute__((format (printf, 2, 3))) |
380 | | static NORETURN void bad_object(off_t offset, const char *format, ...) |
381 | 0 | { |
382 | 0 | va_list params; |
383 | 0 | char buf[1024]; |
384 | |
|
385 | 0 | va_start(params, format); |
386 | 0 | vsnprintf(buf, sizeof(buf), format, params); |
387 | 0 | va_end(params); |
388 | 0 | die(_("pack has bad object at offset %"PRIuMAX": %s"), |
389 | 0 | (uintmax_t)offset, buf); |
390 | 0 | } |
391 | | |
392 | | static inline struct thread_local *get_thread_data(void) |
393 | 0 | { |
394 | 0 | if (HAVE_THREADS) { |
395 | 0 | if (threads_active) |
396 | 0 | return pthread_getspecific(key); |
397 | 0 | assert(!threads_active && |
398 | 0 | "This should only be reached when all threads are gone"); |
399 | 0 | } |
400 | 0 | return ¬hread_data; |
401 | 0 | } |
402 | | |
403 | | static void set_thread_data(struct thread_local *data) |
404 | 0 | { |
405 | 0 | if (threads_active) |
406 | 0 | pthread_setspecific(key, data); |
407 | 0 | } |
408 | | |
409 | | static void free_base_data(struct base_data *c) |
410 | 0 | { |
411 | 0 | if (c->data) { |
412 | 0 | FREE_AND_NULL(c->data); |
413 | 0 | base_cache_used -= c->size; |
414 | 0 | } |
415 | 0 | } |
416 | | |
417 | | static void prune_base_data(struct base_data *retain) |
418 | 0 | { |
419 | 0 | struct list_head *pos; |
420 | |
|
421 | 0 | if (base_cache_used <= base_cache_limit) |
422 | 0 | return; |
423 | | |
424 | 0 | list_for_each_prev(pos, &done_head) { |
425 | 0 | struct base_data *b = list_entry(pos, struct base_data, list); |
426 | 0 | if (b->retain_data || b == retain) |
427 | 0 | continue; |
428 | 0 | if (b->data) { |
429 | 0 | free_base_data(b); |
430 | 0 | if (base_cache_used <= base_cache_limit) |
431 | 0 | return; |
432 | 0 | } |
433 | 0 | } |
434 | | |
435 | 0 | list_for_each_prev(pos, &work_head) { |
436 | 0 | struct base_data *b = list_entry(pos, struct base_data, list); |
437 | 0 | if (b->retain_data || b == retain) |
438 | 0 | continue; |
439 | 0 | if (b->data) { |
440 | 0 | free_base_data(b); |
441 | 0 | if (base_cache_used <= base_cache_limit) |
442 | 0 | return; |
443 | 0 | } |
444 | 0 | } |
445 | 0 | } |
446 | | |
447 | | static int is_delta_type(enum object_type type) |
448 | 0 | { |
449 | 0 | return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); |
450 | 0 | } |
451 | | |
452 | | static void *unpack_entry_data(off_t offset, unsigned long size, |
453 | | enum object_type type, struct object_id *oid) |
454 | 0 | { |
455 | 0 | static char fixed_buf[8192]; |
456 | 0 | int status; |
457 | 0 | git_zstream stream; |
458 | 0 | void *buf; |
459 | 0 | git_hash_ctx c; |
460 | 0 | char hdr[32]; |
461 | 0 | int hdrlen; |
462 | |
|
463 | 0 | if (!is_delta_type(type)) { |
464 | 0 | hdrlen = format_object_header(hdr, sizeof(hdr), type, size); |
465 | 0 | the_hash_algo->init_fn(&c); |
466 | 0 | the_hash_algo->update_fn(&c, hdr, hdrlen); |
467 | 0 | } else |
468 | 0 | oid = NULL; |
469 | 0 | if (type == OBJ_BLOB && size > big_file_threshold) |
470 | 0 | buf = fixed_buf; |
471 | 0 | else |
472 | 0 | buf = xmallocz(size); |
473 | |
|
474 | 0 | memset(&stream, 0, sizeof(stream)); |
475 | 0 | git_inflate_init(&stream); |
476 | 0 | stream.next_out = buf; |
477 | 0 | stream.avail_out = buf == fixed_buf ? sizeof(fixed_buf) : size; |
478 | |
|
479 | 0 | do { |
480 | 0 | unsigned char *last_out = stream.next_out; |
481 | 0 | stream.next_in = fill(1); |
482 | 0 | stream.avail_in = input_len; |
483 | 0 | status = git_inflate(&stream, 0); |
484 | 0 | use(input_len - stream.avail_in); |
485 | 0 | if (oid) |
486 | 0 | the_hash_algo->update_fn(&c, last_out, stream.next_out - last_out); |
487 | 0 | if (buf == fixed_buf) { |
488 | 0 | stream.next_out = buf; |
489 | 0 | stream.avail_out = sizeof(fixed_buf); |
490 | 0 | } |
491 | 0 | } while (status == Z_OK); |
492 | 0 | if (stream.total_out != size || status != Z_STREAM_END) |
493 | 0 | bad_object(offset, _("inflate returned %d"), status); |
494 | 0 | git_inflate_end(&stream); |
495 | 0 | if (oid) |
496 | 0 | the_hash_algo->final_oid_fn(oid, &c); |
497 | 0 | return buf == fixed_buf ? NULL : buf; |
498 | 0 | } |
499 | | |
500 | | static void *unpack_raw_entry(struct object_entry *obj, |
501 | | off_t *ofs_offset, |
502 | | struct object_id *ref_oid, |
503 | | struct object_id *oid) |
504 | 0 | { |
505 | 0 | unsigned char *p; |
506 | 0 | unsigned long size, c; |
507 | 0 | off_t base_offset; |
508 | 0 | unsigned shift; |
509 | 0 | void *data; |
510 | |
|
511 | 0 | obj->idx.offset = consumed_bytes; |
512 | 0 | input_crc32 = crc32(0, NULL, 0); |
513 | |
|
514 | 0 | p = fill(1); |
515 | 0 | c = *p; |
516 | 0 | use(1); |
517 | 0 | obj->type = (c >> 4) & 7; |
518 | 0 | size = (c & 15); |
519 | 0 | shift = 4; |
520 | 0 | while (c & 0x80) { |
521 | 0 | p = fill(1); |
522 | 0 | c = *p; |
523 | 0 | use(1); |
524 | 0 | size += (c & 0x7f) << shift; |
525 | 0 | shift += 7; |
526 | 0 | } |
527 | 0 | obj->size = size; |
528 | |
|
529 | 0 | switch (obj->type) { |
530 | 0 | case OBJ_REF_DELTA: |
531 | 0 | oidread(ref_oid, fill(the_hash_algo->rawsz), |
532 | 0 | the_repository->hash_algo); |
533 | 0 | use(the_hash_algo->rawsz); |
534 | 0 | break; |
535 | 0 | case OBJ_OFS_DELTA: |
536 | 0 | p = fill(1); |
537 | 0 | c = *p; |
538 | 0 | use(1); |
539 | 0 | base_offset = c & 127; |
540 | 0 | while (c & 128) { |
541 | 0 | base_offset += 1; |
542 | 0 | if (!base_offset || MSB(base_offset, 7)) |
543 | 0 | bad_object(obj->idx.offset, _("offset value overflow for delta base object")); |
544 | 0 | p = fill(1); |
545 | 0 | c = *p; |
546 | 0 | use(1); |
547 | 0 | base_offset = (base_offset << 7) + (c & 127); |
548 | 0 | } |
549 | 0 | *ofs_offset = obj->idx.offset - base_offset; |
550 | 0 | if (*ofs_offset <= 0 || *ofs_offset >= obj->idx.offset) |
551 | 0 | bad_object(obj->idx.offset, _("delta base offset is out of bound")); |
552 | 0 | break; |
553 | 0 | case OBJ_COMMIT: |
554 | 0 | case OBJ_TREE: |
555 | 0 | case OBJ_BLOB: |
556 | 0 | case OBJ_TAG: |
557 | 0 | break; |
558 | 0 | default: |
559 | 0 | bad_object(obj->idx.offset, _("unknown object type %d"), obj->type); |
560 | 0 | } |
561 | 0 | obj->hdr_size = consumed_bytes - obj->idx.offset; |
562 | |
|
563 | 0 | data = unpack_entry_data(obj->idx.offset, obj->size, obj->type, oid); |
564 | 0 | obj->idx.crc32 = input_crc32; |
565 | 0 | return data; |
566 | 0 | } |
567 | | |
568 | | static void *unpack_data(struct object_entry *obj, |
569 | | int (*consume)(const unsigned char *, unsigned long, void *), |
570 | | void *cb_data) |
571 | 0 | { |
572 | 0 | off_t from = obj[0].idx.offset + obj[0].hdr_size; |
573 | 0 | off_t len = obj[1].idx.offset - from; |
574 | 0 | unsigned char *data, *inbuf; |
575 | 0 | git_zstream stream; |
576 | 0 | int status; |
577 | |
|
578 | 0 | data = xmallocz(consume ? 64*1024 : obj->size); |
579 | 0 | inbuf = xmalloc((len < 64*1024) ? (int)len : 64*1024); |
580 | |
|
581 | 0 | memset(&stream, 0, sizeof(stream)); |
582 | 0 | git_inflate_init(&stream); |
583 | 0 | stream.next_out = data; |
584 | 0 | stream.avail_out = consume ? 64*1024 : obj->size; |
585 | |
|
586 | 0 | do { |
587 | 0 | ssize_t n = (len < 64*1024) ? (ssize_t)len : 64*1024; |
588 | 0 | n = xpread(get_thread_data()->pack_fd, inbuf, n, from); |
589 | 0 | if (n < 0) |
590 | 0 | die_errno(_("cannot pread pack file")); |
591 | 0 | if (!n) |
592 | 0 | die(Q_("premature end of pack file, %"PRIuMAX" byte missing", |
593 | 0 | "premature end of pack file, %"PRIuMAX" bytes missing", |
594 | 0 | len), |
595 | 0 | (uintmax_t)len); |
596 | 0 | from += n; |
597 | 0 | len -= n; |
598 | 0 | stream.next_in = inbuf; |
599 | 0 | stream.avail_in = n; |
600 | 0 | if (!consume) |
601 | 0 | status = git_inflate(&stream, 0); |
602 | 0 | else { |
603 | 0 | do { |
604 | 0 | status = git_inflate(&stream, 0); |
605 | 0 | if (consume(data, stream.next_out - data, cb_data)) { |
606 | 0 | free(inbuf); |
607 | 0 | free(data); |
608 | 0 | return NULL; |
609 | 0 | } |
610 | 0 | stream.next_out = data; |
611 | 0 | stream.avail_out = 64*1024; |
612 | 0 | } while (status == Z_OK && stream.avail_in); |
613 | 0 | } |
614 | 0 | } while (len && status == Z_OK && !stream.avail_in); |
615 | | |
616 | | /* This has been inflated OK when first encountered, so... */ |
617 | 0 | if (status != Z_STREAM_END || stream.total_out != obj->size) |
618 | 0 | die(_("serious inflate inconsistency")); |
619 | | |
620 | 0 | git_inflate_end(&stream); |
621 | 0 | free(inbuf); |
622 | 0 | if (consume) { |
623 | 0 | FREE_AND_NULL(data); |
624 | 0 | } |
625 | 0 | return data; |
626 | 0 | } |
627 | | |
628 | | static void *get_data_from_pack(struct object_entry *obj) |
629 | 0 | { |
630 | 0 | return unpack_data(obj, NULL, NULL); |
631 | 0 | } |
632 | | |
633 | | static int compare_ofs_delta_bases(off_t offset1, off_t offset2, |
634 | | enum object_type type1, |
635 | | enum object_type type2) |
636 | 0 | { |
637 | 0 | int cmp = type1 - type2; |
638 | 0 | if (cmp) |
639 | 0 | return cmp; |
640 | 0 | return offset1 < offset2 ? -1 : |
641 | 0 | offset1 > offset2 ? 1 : |
642 | 0 | 0; |
643 | 0 | } |
644 | | |
645 | | static int find_ofs_delta(const off_t offset) |
646 | 0 | { |
647 | 0 | int first = 0, last = nr_ofs_deltas; |
648 | |
|
649 | 0 | while (first < last) { |
650 | 0 | int next = first + (last - first) / 2; |
651 | 0 | struct ofs_delta_entry *delta = &ofs_deltas[next]; |
652 | 0 | int cmp; |
653 | |
|
654 | 0 | cmp = compare_ofs_delta_bases(offset, delta->offset, |
655 | 0 | OBJ_OFS_DELTA, |
656 | 0 | objects[delta->obj_no].type); |
657 | 0 | if (!cmp) |
658 | 0 | return next; |
659 | 0 | if (cmp < 0) { |
660 | 0 | last = next; |
661 | 0 | continue; |
662 | 0 | } |
663 | 0 | first = next+1; |
664 | 0 | } |
665 | 0 | return -first-1; |
666 | 0 | } |
667 | | |
668 | | static void find_ofs_delta_children(off_t offset, |
669 | | int *first_index, int *last_index) |
670 | 0 | { |
671 | 0 | int first = find_ofs_delta(offset); |
672 | 0 | int last = first; |
673 | 0 | int end = nr_ofs_deltas - 1; |
674 | |
|
675 | 0 | if (first < 0) { |
676 | 0 | *first_index = 0; |
677 | 0 | *last_index = -1; |
678 | 0 | return; |
679 | 0 | } |
680 | 0 | while (first > 0 && ofs_deltas[first - 1].offset == offset) |
681 | 0 | --first; |
682 | 0 | while (last < end && ofs_deltas[last + 1].offset == offset) |
683 | 0 | ++last; |
684 | 0 | *first_index = first; |
685 | 0 | *last_index = last; |
686 | 0 | } |
687 | | |
688 | | static int compare_ref_delta_bases(const struct object_id *oid1, |
689 | | const struct object_id *oid2, |
690 | | enum object_type type1, |
691 | | enum object_type type2) |
692 | 0 | { |
693 | 0 | int cmp = type1 - type2; |
694 | 0 | if (cmp) |
695 | 0 | return cmp; |
696 | 0 | return oidcmp(oid1, oid2); |
697 | 0 | } |
698 | | |
699 | | static int find_ref_delta(const struct object_id *oid) |
700 | 0 | { |
701 | 0 | int first = 0, last = nr_ref_deltas; |
702 | |
|
703 | 0 | while (first < last) { |
704 | 0 | int next = first + (last - first) / 2; |
705 | 0 | struct ref_delta_entry *delta = &ref_deltas[next]; |
706 | 0 | int cmp; |
707 | |
|
708 | 0 | cmp = compare_ref_delta_bases(oid, &delta->oid, |
709 | 0 | OBJ_REF_DELTA, |
710 | 0 | objects[delta->obj_no].type); |
711 | 0 | if (!cmp) |
712 | 0 | return next; |
713 | 0 | if (cmp < 0) { |
714 | 0 | last = next; |
715 | 0 | continue; |
716 | 0 | } |
717 | 0 | first = next+1; |
718 | 0 | } |
719 | 0 | return -first-1; |
720 | 0 | } |
721 | | |
722 | | static void find_ref_delta_children(const struct object_id *oid, |
723 | | int *first_index, int *last_index) |
724 | 0 | { |
725 | 0 | int first = find_ref_delta(oid); |
726 | 0 | int last = first; |
727 | 0 | int end = nr_ref_deltas - 1; |
728 | |
|
729 | 0 | if (first < 0) { |
730 | 0 | *first_index = 0; |
731 | 0 | *last_index = -1; |
732 | 0 | return; |
733 | 0 | } |
734 | 0 | while (first > 0 && oideq(&ref_deltas[first - 1].oid, oid)) |
735 | 0 | --first; |
736 | 0 | while (last < end && oideq(&ref_deltas[last + 1].oid, oid)) |
737 | 0 | ++last; |
738 | 0 | *first_index = first; |
739 | 0 | *last_index = last; |
740 | 0 | } |
741 | | |
742 | | struct compare_data { |
743 | | struct object_entry *entry; |
744 | | struct git_istream *st; |
745 | | unsigned char *buf; |
746 | | unsigned long buf_size; |
747 | | }; |
748 | | |
749 | | static int compare_objects(const unsigned char *buf, unsigned long size, |
750 | | void *cb_data) |
751 | 0 | { |
752 | 0 | struct compare_data *data = cb_data; |
753 | |
|
754 | 0 | if (data->buf_size < size) { |
755 | 0 | free(data->buf); |
756 | 0 | data->buf = xmalloc(size); |
757 | 0 | data->buf_size = size; |
758 | 0 | } |
759 | |
|
760 | 0 | while (size) { |
761 | 0 | ssize_t len = read_istream(data->st, data->buf, size); |
762 | 0 | if (len == 0) |
763 | 0 | die(_("SHA1 COLLISION FOUND WITH %s !"), |
764 | 0 | oid_to_hex(&data->entry->idx.oid)); |
765 | 0 | if (len < 0) |
766 | 0 | die(_("unable to read %s"), |
767 | 0 | oid_to_hex(&data->entry->idx.oid)); |
768 | 0 | if (memcmp(buf, data->buf, len)) |
769 | 0 | die(_("SHA1 COLLISION FOUND WITH %s !"), |
770 | 0 | oid_to_hex(&data->entry->idx.oid)); |
771 | 0 | size -= len; |
772 | 0 | buf += len; |
773 | 0 | } |
774 | 0 | return 0; |
775 | 0 | } |
776 | | |
777 | | static int check_collison(struct object_entry *entry) |
778 | 0 | { |
779 | 0 | struct compare_data data; |
780 | 0 | enum object_type type; |
781 | 0 | unsigned long size; |
782 | |
|
783 | 0 | if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB) |
784 | 0 | return -1; |
785 | | |
786 | 0 | memset(&data, 0, sizeof(data)); |
787 | 0 | data.entry = entry; |
788 | 0 | data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, |
789 | 0 | NULL); |
790 | 0 | if (!data.st) |
791 | 0 | return -1; |
792 | 0 | if (size != entry->size || type != entry->type) |
793 | 0 | die(_("SHA1 COLLISION FOUND WITH %s !"), |
794 | 0 | oid_to_hex(&entry->idx.oid)); |
795 | 0 | unpack_data(entry, compare_objects, &data); |
796 | 0 | close_istream(data.st); |
797 | 0 | free(data.buf); |
798 | 0 | return 0; |
799 | 0 | } |
800 | | |
801 | | static void sha1_object(const void *data, struct object_entry *obj_entry, |
802 | | unsigned long size, enum object_type type, |
803 | | const struct object_id *oid) |
804 | 0 | { |
805 | 0 | void *new_data = NULL; |
806 | 0 | int collision_test_needed = 0; |
807 | |
|
808 | 0 | assert(data || obj_entry); |
809 | | |
810 | 0 | if (startup_info->have_repository) { |
811 | 0 | read_lock(); |
812 | 0 | collision_test_needed = |
813 | 0 | repo_has_object_file_with_flags(the_repository, oid, |
814 | 0 | OBJECT_INFO_QUICK); |
815 | 0 | read_unlock(); |
816 | 0 | } |
817 | |
|
818 | 0 | if (collision_test_needed && !data) { |
819 | 0 | read_lock(); |
820 | 0 | if (!check_collison(obj_entry)) |
821 | 0 | collision_test_needed = 0; |
822 | 0 | read_unlock(); |
823 | 0 | } |
824 | 0 | if (collision_test_needed) { |
825 | 0 | void *has_data; |
826 | 0 | enum object_type has_type; |
827 | 0 | unsigned long has_size; |
828 | 0 | read_lock(); |
829 | 0 | has_type = oid_object_info(the_repository, oid, &has_size); |
830 | 0 | if (has_type < 0) |
831 | 0 | die(_("cannot read existing object info %s"), oid_to_hex(oid)); |
832 | 0 | if (has_type != type || has_size != size) |
833 | 0 | die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid)); |
834 | 0 | has_data = repo_read_object_file(the_repository, oid, |
835 | 0 | &has_type, &has_size); |
836 | 0 | read_unlock(); |
837 | 0 | if (!data) |
838 | 0 | data = new_data = get_data_from_pack(obj_entry); |
839 | 0 | if (!has_data) |
840 | 0 | die(_("cannot read existing object %s"), oid_to_hex(oid)); |
841 | 0 | if (size != has_size || type != has_type || |
842 | 0 | memcmp(data, has_data, size) != 0) |
843 | 0 | die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid)); |
844 | 0 | free(has_data); |
845 | 0 | } |
846 | | |
847 | 0 | if (strict || do_fsck_object) { |
848 | 0 | read_lock(); |
849 | 0 | if (type == OBJ_BLOB) { |
850 | 0 | struct blob *blob = lookup_blob(the_repository, oid); |
851 | 0 | if (blob) |
852 | 0 | blob->object.flags |= FLAG_CHECKED; |
853 | 0 | else |
854 | 0 | die(_("invalid blob object %s"), oid_to_hex(oid)); |
855 | 0 | if (do_fsck_object && |
856 | 0 | fsck_object(&blob->object, (void *)data, size, &fsck_options)) |
857 | 0 | die(_("fsck error in packed object")); |
858 | 0 | } else { |
859 | 0 | struct object *obj; |
860 | 0 | int eaten; |
861 | 0 | void *buf = (void *) data; |
862 | |
|
863 | 0 | assert(data && "data can only be NULL for large _blobs_"); |
864 | | |
865 | | /* |
866 | | * we do not need to free the memory here, as the |
867 | | * buf is deleted by the caller. |
868 | | */ |
869 | 0 | obj = parse_object_buffer(the_repository, oid, type, |
870 | 0 | size, buf, |
871 | 0 | &eaten); |
872 | 0 | if (!obj) |
873 | 0 | die(_("invalid %s"), type_name(type)); |
874 | 0 | if (do_fsck_object && |
875 | 0 | fsck_object(obj, buf, size, &fsck_options)) |
876 | 0 | die(_("fsck error in packed object")); |
877 | 0 | if (strict && fsck_walk(obj, NULL, &fsck_options)) |
878 | 0 | die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); |
879 | | |
880 | 0 | if (obj->type == OBJ_TREE) { |
881 | 0 | struct tree *item = (struct tree *) obj; |
882 | 0 | item->buffer = NULL; |
883 | 0 | obj->parsed = 0; |
884 | 0 | } |
885 | 0 | if (obj->type == OBJ_COMMIT) { |
886 | 0 | struct commit *commit = (struct commit *) obj; |
887 | 0 | if (detach_commit_buffer(commit, NULL) != data) |
888 | 0 | BUG("parse_object_buffer transmogrified our buffer"); |
889 | 0 | } |
890 | 0 | obj->flags |= FLAG_CHECKED; |
891 | 0 | } |
892 | 0 | read_unlock(); |
893 | 0 | } |
894 | | |
895 | 0 | free(new_data); |
896 | 0 | } |
897 | | |
898 | | /* |
899 | | * Ensure that this node has been reconstructed and return its contents. |
900 | | * |
901 | | * In the typical and best case, this node would already be reconstructed |
902 | | * (through the invocation to resolve_delta() in threaded_second_pass()) and it |
903 | | * would not be pruned. However, if pruning of this node was necessary due to |
904 | | * reaching delta_base_cache_limit, this function will find the closest |
905 | | * ancestor with reconstructed data that has not been pruned (or if there is |
906 | | * none, the ultimate base object), and reconstruct each node in the delta |
907 | | * chain in order to generate the reconstructed data for this node. |
908 | | */ |
909 | | static void *get_base_data(struct base_data *c) |
910 | 0 | { |
911 | 0 | if (!c->data) { |
912 | 0 | struct object_entry *obj = c->obj; |
913 | 0 | struct base_data **delta = NULL; |
914 | 0 | int delta_nr = 0, delta_alloc = 0; |
915 | |
|
916 | 0 | while (is_delta_type(c->obj->type) && !c->data) { |
917 | 0 | ALLOC_GROW(delta, delta_nr + 1, delta_alloc); |
918 | 0 | delta[delta_nr++] = c; |
919 | 0 | c = c->base; |
920 | 0 | } |
921 | 0 | if (!delta_nr) { |
922 | 0 | c->data = get_data_from_pack(obj); |
923 | 0 | c->size = obj->size; |
924 | 0 | base_cache_used += c->size; |
925 | 0 | prune_base_data(c); |
926 | 0 | } |
927 | 0 | for (; delta_nr > 0; delta_nr--) { |
928 | 0 | void *base, *raw; |
929 | 0 | c = delta[delta_nr - 1]; |
930 | 0 | obj = c->obj; |
931 | 0 | base = get_base_data(c->base); |
932 | 0 | raw = get_data_from_pack(obj); |
933 | 0 | c->data = patch_delta( |
934 | 0 | base, c->base->size, |
935 | 0 | raw, obj->size, |
936 | 0 | &c->size); |
937 | 0 | free(raw); |
938 | 0 | if (!c->data) |
939 | 0 | bad_object(obj->idx.offset, _("failed to apply delta")); |
940 | 0 | base_cache_used += c->size; |
941 | 0 | prune_base_data(c); |
942 | 0 | } |
943 | 0 | free(delta); |
944 | 0 | } |
945 | 0 | return c->data; |
946 | 0 | } |
947 | | |
948 | | static struct base_data *make_base(struct object_entry *obj, |
949 | | struct base_data *parent) |
950 | 0 | { |
951 | 0 | struct base_data *base = xcalloc(1, sizeof(struct base_data)); |
952 | 0 | base->base = parent; |
953 | 0 | base->obj = obj; |
954 | 0 | find_ref_delta_children(&obj->idx.oid, |
955 | 0 | &base->ref_first, &base->ref_last); |
956 | 0 | find_ofs_delta_children(obj->idx.offset, |
957 | 0 | &base->ofs_first, &base->ofs_last); |
958 | 0 | base->children_remaining = base->ref_last - base->ref_first + |
959 | 0 | base->ofs_last - base->ofs_first + 2; |
960 | 0 | return base; |
961 | 0 | } |
962 | | |
963 | | static struct base_data *resolve_delta(struct object_entry *delta_obj, |
964 | | struct base_data *base) |
965 | 0 | { |
966 | 0 | void *delta_data, *result_data; |
967 | 0 | struct base_data *result; |
968 | 0 | unsigned long result_size; |
969 | |
|
970 | 0 | if (show_stat) { |
971 | 0 | int i = delta_obj - objects; |
972 | 0 | int j = base->obj - objects; |
973 | 0 | obj_stat[i].delta_depth = obj_stat[j].delta_depth + 1; |
974 | 0 | deepest_delta_lock(); |
975 | 0 | if (deepest_delta < obj_stat[i].delta_depth) |
976 | 0 | deepest_delta = obj_stat[i].delta_depth; |
977 | 0 | deepest_delta_unlock(); |
978 | 0 | obj_stat[i].base_object_no = j; |
979 | 0 | } |
980 | 0 | delta_data = get_data_from_pack(delta_obj); |
981 | 0 | assert(base->data); |
982 | 0 | result_data = patch_delta(base->data, base->size, |
983 | 0 | delta_data, delta_obj->size, &result_size); |
984 | 0 | free(delta_data); |
985 | 0 | if (!result_data) |
986 | 0 | bad_object(delta_obj->idx.offset, _("failed to apply delta")); |
987 | 0 | hash_object_file(the_hash_algo, result_data, result_size, |
988 | 0 | delta_obj->real_type, &delta_obj->idx.oid); |
989 | 0 | sha1_object(result_data, NULL, result_size, delta_obj->real_type, |
990 | 0 | &delta_obj->idx.oid); |
991 | |
|
992 | 0 | result = make_base(delta_obj, base); |
993 | 0 | result->data = result_data; |
994 | 0 | result->size = result_size; |
995 | |
|
996 | 0 | counter_lock(); |
997 | 0 | nr_resolved_deltas++; |
998 | 0 | counter_unlock(); |
999 | |
|
1000 | 0 | return result; |
1001 | 0 | } |
1002 | | |
1003 | | static int compare_ofs_delta_entry(const void *a, const void *b) |
1004 | 0 | { |
1005 | 0 | const struct ofs_delta_entry *delta_a = a; |
1006 | 0 | const struct ofs_delta_entry *delta_b = b; |
1007 | |
|
1008 | 0 | return delta_a->offset < delta_b->offset ? -1 : |
1009 | 0 | delta_a->offset > delta_b->offset ? 1 : |
1010 | 0 | 0; |
1011 | 0 | } |
1012 | | |
1013 | | static int compare_ref_delta_entry(const void *a, const void *b) |
1014 | 0 | { |
1015 | 0 | const struct ref_delta_entry *delta_a = a; |
1016 | 0 | const struct ref_delta_entry *delta_b = b; |
1017 | |
|
1018 | 0 | return oidcmp(&delta_a->oid, &delta_b->oid); |
1019 | 0 | } |
1020 | | |
1021 | | static void *threaded_second_pass(void *data) |
1022 | 0 | { |
1023 | 0 | if (data) |
1024 | 0 | set_thread_data(data); |
1025 | 0 | for (;;) { |
1026 | 0 | struct base_data *parent = NULL; |
1027 | 0 | struct object_entry *child_obj; |
1028 | 0 | struct base_data *child; |
1029 | |
|
1030 | 0 | counter_lock(); |
1031 | 0 | display_progress(progress, nr_resolved_deltas); |
1032 | 0 | counter_unlock(); |
1033 | |
|
1034 | 0 | work_lock(); |
1035 | 0 | if (list_empty(&work_head)) { |
1036 | | /* |
1037 | | * Take an object from the object array. |
1038 | | */ |
1039 | 0 | while (nr_dispatched < nr_objects && |
1040 | 0 | is_delta_type(objects[nr_dispatched].type)) |
1041 | 0 | nr_dispatched++; |
1042 | 0 | if (nr_dispatched >= nr_objects) { |
1043 | 0 | work_unlock(); |
1044 | 0 | break; |
1045 | 0 | } |
1046 | 0 | child_obj = &objects[nr_dispatched++]; |
1047 | 0 | } else { |
1048 | | /* |
1049 | | * Peek at the top of the stack, and take a child from |
1050 | | * it. |
1051 | | */ |
1052 | 0 | parent = list_first_entry(&work_head, struct base_data, |
1053 | 0 | list); |
1054 | |
|
1055 | 0 | if (parent->ref_first <= parent->ref_last) { |
1056 | 0 | int offset = ref_deltas[parent->ref_first++].obj_no; |
1057 | 0 | child_obj = objects + offset; |
1058 | 0 | if (child_obj->real_type != OBJ_REF_DELTA) |
1059 | 0 | die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", |
1060 | 0 | (uintmax_t) child_obj->idx.offset, |
1061 | 0 | oid_to_hex(&parent->obj->idx.oid)); |
1062 | 0 | child_obj->real_type = parent->obj->real_type; |
1063 | 0 | } else { |
1064 | 0 | child_obj = objects + |
1065 | 0 | ofs_deltas[parent->ofs_first++].obj_no; |
1066 | 0 | assert(child_obj->real_type == OBJ_OFS_DELTA); |
1067 | 0 | child_obj->real_type = parent->obj->real_type; |
1068 | 0 | } |
1069 | | |
1070 | 0 | if (parent->ref_first > parent->ref_last && |
1071 | 0 | parent->ofs_first > parent->ofs_last) { |
1072 | | /* |
1073 | | * This parent has run out of children, so move |
1074 | | * it to done_head. |
1075 | | */ |
1076 | 0 | list_del(&parent->list); |
1077 | 0 | list_add(&parent->list, &done_head); |
1078 | 0 | } |
1079 | | |
1080 | | /* |
1081 | | * Ensure that the parent has data, since we will need |
1082 | | * it later. |
1083 | | * |
1084 | | * NEEDSWORK: If parent data needs to be reloaded, this |
1085 | | * prolongs the time that the current thread spends in |
1086 | | * the mutex. A mitigating factor is that parent data |
1087 | | * needs to be reloaded only if the delta base cache |
1088 | | * limit is exceeded, so in the typical case, this does |
1089 | | * not happen. |
1090 | | */ |
1091 | 0 | get_base_data(parent); |
1092 | 0 | parent->retain_data++; |
1093 | 0 | } |
1094 | 0 | work_unlock(); |
1095 | |
|
1096 | 0 | if (parent) { |
1097 | 0 | child = resolve_delta(child_obj, parent); |
1098 | 0 | if (!child->children_remaining) |
1099 | 0 | FREE_AND_NULL(child->data); |
1100 | 0 | } else { |
1101 | 0 | child = make_base(child_obj, NULL); |
1102 | 0 | if (child->children_remaining) { |
1103 | | /* |
1104 | | * Since this child has its own delta children, |
1105 | | * we will need this data in the future. |
1106 | | * Inflate now so that future iterations will |
1107 | | * have access to this object's data while |
1108 | | * outside the work mutex. |
1109 | | */ |
1110 | 0 | child->data = get_data_from_pack(child_obj); |
1111 | 0 | child->size = child_obj->size; |
1112 | 0 | } |
1113 | 0 | } |
1114 | |
|
1115 | 0 | work_lock(); |
1116 | 0 | if (parent) |
1117 | 0 | parent->retain_data--; |
1118 | 0 | if (child->data) { |
1119 | | /* |
1120 | | * This child has its own children, so add it to |
1121 | | * work_head. |
1122 | | */ |
1123 | 0 | list_add(&child->list, &work_head); |
1124 | 0 | base_cache_used += child->size; |
1125 | 0 | prune_base_data(NULL); |
1126 | 0 | free_base_data(child); |
1127 | 0 | } else { |
1128 | | /* |
1129 | | * This child does not have its own children. It may be |
1130 | | * the last descendant of its ancestors; free those |
1131 | | * that we can. |
1132 | | */ |
1133 | 0 | struct base_data *p = parent; |
1134 | |
|
1135 | 0 | while (p) { |
1136 | 0 | struct base_data *next_p; |
1137 | |
|
1138 | 0 | p->children_remaining--; |
1139 | 0 | if (p->children_remaining) |
1140 | 0 | break; |
1141 | | |
1142 | 0 | next_p = p->base; |
1143 | 0 | free_base_data(p); |
1144 | 0 | list_del(&p->list); |
1145 | 0 | free(p); |
1146 | |
|
1147 | 0 | p = next_p; |
1148 | 0 | } |
1149 | 0 | FREE_AND_NULL(child); |
1150 | 0 | } |
1151 | 0 | work_unlock(); |
1152 | 0 | } |
1153 | 0 | return NULL; |
1154 | 0 | } |
1155 | | |
1156 | | /* |
1157 | | * First pass: |
1158 | | * - find locations of all objects; |
1159 | | * - calculate SHA1 of all non-delta objects; |
1160 | | * - remember base (SHA1 or offset) for all deltas. |
1161 | | */ |
1162 | | static void parse_pack_objects(unsigned char *hash) |
1163 | 0 | { |
1164 | 0 | int i, nr_delays = 0; |
1165 | 0 | struct ofs_delta_entry *ofs_delta = ofs_deltas; |
1166 | 0 | struct object_id ref_delta_oid; |
1167 | 0 | struct stat st; |
1168 | 0 | git_hash_ctx tmp_ctx; |
1169 | |
|
1170 | 0 | if (verbose) |
1171 | 0 | progress = start_progress( |
1172 | 0 | progress_title ? progress_title : |
1173 | 0 | from_stdin ? _("Receiving objects") : _("Indexing objects"), |
1174 | 0 | nr_objects); |
1175 | 0 | for (i = 0; i < nr_objects; i++) { |
1176 | 0 | struct object_entry *obj = &objects[i]; |
1177 | 0 | void *data = unpack_raw_entry(obj, &ofs_delta->offset, |
1178 | 0 | &ref_delta_oid, |
1179 | 0 | &obj->idx.oid); |
1180 | 0 | obj->real_type = obj->type; |
1181 | 0 | if (obj->type == OBJ_OFS_DELTA) { |
1182 | 0 | nr_ofs_deltas++; |
1183 | 0 | ofs_delta->obj_no = i; |
1184 | 0 | ofs_delta++; |
1185 | 0 | } else if (obj->type == OBJ_REF_DELTA) { |
1186 | 0 | ALLOC_GROW(ref_deltas, nr_ref_deltas + 1, ref_deltas_alloc); |
1187 | 0 | oidcpy(&ref_deltas[nr_ref_deltas].oid, &ref_delta_oid); |
1188 | 0 | ref_deltas[nr_ref_deltas].obj_no = i; |
1189 | 0 | nr_ref_deltas++; |
1190 | 0 | } else if (!data) { |
1191 | | /* large blobs, check later */ |
1192 | 0 | obj->real_type = OBJ_BAD; |
1193 | 0 | nr_delays++; |
1194 | 0 | } else |
1195 | 0 | sha1_object(data, NULL, obj->size, obj->type, |
1196 | 0 | &obj->idx.oid); |
1197 | 0 | free(data); |
1198 | 0 | display_progress(progress, i+1); |
1199 | 0 | } |
1200 | 0 | objects[i].idx.offset = consumed_bytes; |
1201 | 0 | stop_progress(&progress); |
1202 | | |
1203 | | /* Check pack integrity */ |
1204 | 0 | flush(); |
1205 | 0 | the_hash_algo->init_fn(&tmp_ctx); |
1206 | 0 | the_hash_algo->clone_fn(&tmp_ctx, &input_ctx); |
1207 | 0 | the_hash_algo->final_fn(hash, &tmp_ctx); |
1208 | 0 | if (!hasheq(fill(the_hash_algo->rawsz), hash, the_repository->hash_algo)) |
1209 | 0 | die(_("pack is corrupted (SHA1 mismatch)")); |
1210 | 0 | use(the_hash_algo->rawsz); |
1211 | | |
1212 | | /* If input_fd is a file, we should have reached its end now. */ |
1213 | 0 | if (fstat(input_fd, &st)) |
1214 | 0 | die_errno(_("cannot fstat packfile")); |
1215 | 0 | if (S_ISREG(st.st_mode) && |
1216 | 0 | lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size) |
1217 | 0 | die(_("pack has junk at the end")); |
1218 | | |
1219 | 0 | for (i = 0; i < nr_objects; i++) { |
1220 | 0 | struct object_entry *obj = &objects[i]; |
1221 | 0 | if (obj->real_type != OBJ_BAD) |
1222 | 0 | continue; |
1223 | 0 | obj->real_type = obj->type; |
1224 | 0 | sha1_object(NULL, obj, obj->size, obj->type, |
1225 | 0 | &obj->idx.oid); |
1226 | 0 | nr_delays--; |
1227 | 0 | } |
1228 | 0 | if (nr_delays) |
1229 | 0 | die(_("confusion beyond insanity in parse_pack_objects()")); |
1230 | 0 | } |
1231 | | |
1232 | | /* |
1233 | | * Second pass: |
1234 | | * - for all non-delta objects, look if it is used as a base for |
1235 | | * deltas; |
1236 | | * - if used as a base, uncompress the object and apply all deltas, |
1237 | | * recursively checking if the resulting object is used as a base |
1238 | | * for some more deltas. |
1239 | | */ |
1240 | | static void resolve_deltas(void) |
1241 | 0 | { |
1242 | 0 | int i; |
1243 | |
|
1244 | 0 | if (!nr_ofs_deltas && !nr_ref_deltas) |
1245 | 0 | return; |
1246 | | |
1247 | | /* Sort deltas by base SHA1/offset for fast searching */ |
1248 | 0 | QSORT(ofs_deltas, nr_ofs_deltas, compare_ofs_delta_entry); |
1249 | 0 | QSORT(ref_deltas, nr_ref_deltas, compare_ref_delta_entry); |
1250 | |
|
1251 | 0 | if (verbose || show_resolving_progress) |
1252 | 0 | progress = start_progress(_("Resolving deltas"), |
1253 | 0 | nr_ref_deltas + nr_ofs_deltas); |
1254 | |
|
1255 | 0 | nr_dispatched = 0; |
1256 | 0 | base_cache_limit = delta_base_cache_limit * nr_threads; |
1257 | 0 | if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) { |
1258 | 0 | init_thread(); |
1259 | 0 | work_lock(); |
1260 | 0 | for (i = 0; i < nr_threads; i++) { |
1261 | 0 | int ret = pthread_create(&thread_data[i].thread, NULL, |
1262 | 0 | threaded_second_pass, thread_data + i); |
1263 | 0 | if (ret) |
1264 | 0 | die(_("unable to create thread: %s"), |
1265 | 0 | strerror(ret)); |
1266 | 0 | } |
1267 | 0 | work_unlock(); |
1268 | 0 | for (i = 0; i < nr_threads; i++) |
1269 | 0 | pthread_join(thread_data[i].thread, NULL); |
1270 | 0 | cleanup_thread(); |
1271 | 0 | return; |
1272 | 0 | } |
1273 | 0 | threaded_second_pass(¬hread_data); |
1274 | 0 | } |
1275 | | |
1276 | | /* |
1277 | | * Third pass: |
1278 | | * - append objects to convert thin pack to full pack if required |
1279 | | * - write the final pack hash |
1280 | | */ |
1281 | | static void fix_unresolved_deltas(struct hashfile *f); |
1282 | | static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned char *pack_hash) |
1283 | 0 | { |
1284 | 0 | if (nr_ref_deltas + nr_ofs_deltas == nr_resolved_deltas) { |
1285 | 0 | stop_progress(&progress); |
1286 | | /* Flush remaining pack final hash. */ |
1287 | 0 | flush(); |
1288 | 0 | return; |
1289 | 0 | } |
1290 | | |
1291 | 0 | if (fix_thin_pack) { |
1292 | 0 | struct hashfile *f; |
1293 | 0 | unsigned char read_hash[GIT_MAX_RAWSZ], tail_hash[GIT_MAX_RAWSZ]; |
1294 | 0 | struct strbuf msg = STRBUF_INIT; |
1295 | 0 | int nr_unresolved = nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas; |
1296 | 0 | int nr_objects_initial = nr_objects; |
1297 | 0 | if (nr_unresolved <= 0) |
1298 | 0 | die(_("confusion beyond insanity")); |
1299 | 0 | REALLOC_ARRAY(objects, nr_objects + nr_unresolved + 1); |
1300 | 0 | memset(objects + nr_objects + 1, 0, |
1301 | 0 | nr_unresolved * sizeof(*objects)); |
1302 | 0 | f = hashfd(output_fd, curr_pack); |
1303 | 0 | fix_unresolved_deltas(f); |
1304 | 0 | strbuf_addf(&msg, Q_("completed with %d local object", |
1305 | 0 | "completed with %d local objects", |
1306 | 0 | nr_objects - nr_objects_initial), |
1307 | 0 | nr_objects - nr_objects_initial); |
1308 | 0 | stop_progress_msg(&progress, msg.buf); |
1309 | 0 | strbuf_release(&msg); |
1310 | 0 | finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0); |
1311 | 0 | hashcpy(read_hash, pack_hash, the_repository->hash_algo); |
1312 | 0 | fixup_pack_header_footer(output_fd, pack_hash, |
1313 | 0 | curr_pack, nr_objects, |
1314 | 0 | read_hash, consumed_bytes-the_hash_algo->rawsz); |
1315 | 0 | if (!hasheq(read_hash, tail_hash, the_repository->hash_algo)) |
1316 | 0 | die(_("Unexpected tail checksum for %s " |
1317 | 0 | "(disk corruption?)"), curr_pack); |
1318 | 0 | } |
1319 | 0 | if (nr_ofs_deltas + nr_ref_deltas != nr_resolved_deltas) |
1320 | 0 | die(Q_("pack has %d unresolved delta", |
1321 | 0 | "pack has %d unresolved deltas", |
1322 | 0 | nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas), |
1323 | 0 | nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas); |
1324 | 0 | } |
1325 | | |
1326 | | static int write_compressed(struct hashfile *f, void *in, unsigned int size) |
1327 | 0 | { |
1328 | 0 | git_zstream stream; |
1329 | 0 | int status; |
1330 | 0 | unsigned char outbuf[4096]; |
1331 | |
|
1332 | 0 | git_deflate_init(&stream, zlib_compression_level); |
1333 | 0 | stream.next_in = in; |
1334 | 0 | stream.avail_in = size; |
1335 | |
|
1336 | 0 | do { |
1337 | 0 | stream.next_out = outbuf; |
1338 | 0 | stream.avail_out = sizeof(outbuf); |
1339 | 0 | status = git_deflate(&stream, Z_FINISH); |
1340 | 0 | hashwrite(f, outbuf, sizeof(outbuf) - stream.avail_out); |
1341 | 0 | } while (status == Z_OK); |
1342 | |
|
1343 | 0 | if (status != Z_STREAM_END) |
1344 | 0 | die(_("unable to deflate appended object (%d)"), status); |
1345 | 0 | size = stream.total_out; |
1346 | 0 | git_deflate_end(&stream); |
1347 | 0 | return size; |
1348 | 0 | } |
1349 | | |
1350 | | static struct object_entry *append_obj_to_pack(struct hashfile *f, |
1351 | | const unsigned char *sha1, void *buf, |
1352 | | unsigned long size, enum object_type type) |
1353 | 0 | { |
1354 | 0 | struct object_entry *obj = &objects[nr_objects++]; |
1355 | 0 | unsigned char header[10]; |
1356 | 0 | unsigned long s = size; |
1357 | 0 | int n = 0; |
1358 | 0 | unsigned char c = (type << 4) | (s & 15); |
1359 | 0 | s >>= 4; |
1360 | 0 | while (s) { |
1361 | 0 | header[n++] = c | 0x80; |
1362 | 0 | c = s & 0x7f; |
1363 | 0 | s >>= 7; |
1364 | 0 | } |
1365 | 0 | header[n++] = c; |
1366 | 0 | crc32_begin(f); |
1367 | 0 | hashwrite(f, header, n); |
1368 | 0 | obj[0].size = size; |
1369 | 0 | obj[0].hdr_size = n; |
1370 | 0 | obj[0].type = type; |
1371 | 0 | obj[0].real_type = type; |
1372 | 0 | obj[1].idx.offset = obj[0].idx.offset + n; |
1373 | 0 | obj[1].idx.offset += write_compressed(f, buf, size); |
1374 | 0 | obj[0].idx.crc32 = crc32_end(f); |
1375 | 0 | hashflush(f); |
1376 | 0 | oidread(&obj->idx.oid, sha1, the_repository->hash_algo); |
1377 | 0 | return obj; |
1378 | 0 | } |
1379 | | |
1380 | | static int delta_pos_compare(const void *_a, const void *_b) |
1381 | 0 | { |
1382 | 0 | struct ref_delta_entry *a = *(struct ref_delta_entry **)_a; |
1383 | 0 | struct ref_delta_entry *b = *(struct ref_delta_entry **)_b; |
1384 | 0 | return a->obj_no - b->obj_no; |
1385 | 0 | } |
1386 | | |
1387 | | static void fix_unresolved_deltas(struct hashfile *f) |
1388 | 0 | { |
1389 | 0 | struct ref_delta_entry **sorted_by_pos; |
1390 | 0 | int i; |
1391 | | |
1392 | | /* |
1393 | | * Since many unresolved deltas may well be themselves base objects |
1394 | | * for more unresolved deltas, we really want to include the |
1395 | | * smallest number of base objects that would cover as much delta |
1396 | | * as possible by picking the |
1397 | | * trunc deltas first, allowing for other deltas to resolve without |
1398 | | * additional base objects. Since most base objects are to be found |
1399 | | * before deltas depending on them, a good heuristic is to start |
1400 | | * resolving deltas in the same order as their position in the pack. |
1401 | | */ |
1402 | 0 | ALLOC_ARRAY(sorted_by_pos, nr_ref_deltas); |
1403 | 0 | for (i = 0; i < nr_ref_deltas; i++) |
1404 | 0 | sorted_by_pos[i] = &ref_deltas[i]; |
1405 | 0 | QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare); |
1406 | |
|
1407 | 0 | if (repo_has_promisor_remote(the_repository)) { |
1408 | | /* |
1409 | | * Prefetch the delta bases. |
1410 | | */ |
1411 | 0 | struct oid_array to_fetch = OID_ARRAY_INIT; |
1412 | 0 | for (i = 0; i < nr_ref_deltas; i++) { |
1413 | 0 | struct ref_delta_entry *d = sorted_by_pos[i]; |
1414 | 0 | if (!oid_object_info_extended(the_repository, &d->oid, |
1415 | 0 | NULL, |
1416 | 0 | OBJECT_INFO_FOR_PREFETCH)) |
1417 | 0 | continue; |
1418 | 0 | oid_array_append(&to_fetch, &d->oid); |
1419 | 0 | } |
1420 | 0 | promisor_remote_get_direct(the_repository, |
1421 | 0 | to_fetch.oid, to_fetch.nr); |
1422 | 0 | oid_array_clear(&to_fetch); |
1423 | 0 | } |
1424 | |
|
1425 | 0 | for (i = 0; i < nr_ref_deltas; i++) { |
1426 | 0 | struct ref_delta_entry *d = sorted_by_pos[i]; |
1427 | 0 | enum object_type type; |
1428 | 0 | void *data; |
1429 | 0 | unsigned long size; |
1430 | |
|
1431 | 0 | if (objects[d->obj_no].real_type != OBJ_REF_DELTA) |
1432 | 0 | continue; |
1433 | 0 | data = repo_read_object_file(the_repository, &d->oid, &type, |
1434 | 0 | &size); |
1435 | 0 | if (!data) |
1436 | 0 | continue; |
1437 | | |
1438 | 0 | if (check_object_signature(the_repository, &d->oid, data, size, |
1439 | 0 | type) < 0) |
1440 | 0 | die(_("local object %s is corrupt"), oid_to_hex(&d->oid)); |
1441 | | |
1442 | | /* |
1443 | | * Add this as an object to the objects array and call |
1444 | | * threaded_second_pass() (which will pick up the added |
1445 | | * object). |
1446 | | */ |
1447 | 0 | append_obj_to_pack(f, d->oid.hash, data, size, type); |
1448 | 0 | free(data); |
1449 | 0 | threaded_second_pass(NULL); |
1450 | |
|
1451 | 0 | display_progress(progress, nr_resolved_deltas); |
1452 | 0 | } |
1453 | 0 | free(sorted_by_pos); |
1454 | 0 | } |
1455 | | |
1456 | | static const char *derive_filename(const char *pack_name, const char *strip, |
1457 | | const char *suffix, struct strbuf *buf) |
1458 | 0 | { |
1459 | 0 | size_t len; |
1460 | 0 | if (!strip_suffix(pack_name, strip, &len) || !len || |
1461 | 0 | pack_name[len - 1] != '.') |
1462 | 0 | die(_("packfile name '%s' does not end with '.%s'"), |
1463 | 0 | pack_name, strip); |
1464 | 0 | strbuf_add(buf, pack_name, len); |
1465 | 0 | strbuf_addstr(buf, suffix); |
1466 | 0 | return buf->buf; |
1467 | 0 | } |
1468 | | |
1469 | | static void write_special_file(const char *suffix, const char *msg, |
1470 | | const char *pack_name, const unsigned char *hash, |
1471 | | const char **report) |
1472 | 0 | { |
1473 | 0 | struct strbuf name_buf = STRBUF_INIT; |
1474 | 0 | const char *filename; |
1475 | 0 | int fd; |
1476 | 0 | int msg_len = strlen(msg); |
1477 | |
|
1478 | 0 | if (pack_name) |
1479 | 0 | filename = derive_filename(pack_name, "pack", suffix, &name_buf); |
1480 | 0 | else |
1481 | 0 | filename = odb_pack_name(&name_buf, hash, suffix); |
1482 | |
|
1483 | 0 | fd = odb_pack_keep(filename); |
1484 | 0 | if (fd < 0) { |
1485 | 0 | if (errno != EEXIST) |
1486 | 0 | die_errno(_("cannot write %s file '%s'"), |
1487 | 0 | suffix, filename); |
1488 | 0 | } else { |
1489 | 0 | if (msg_len > 0) { |
1490 | 0 | write_or_die(fd, msg, msg_len); |
1491 | 0 | write_or_die(fd, "\n", 1); |
1492 | 0 | } |
1493 | 0 | if (close(fd) != 0) |
1494 | 0 | die_errno(_("cannot close written %s file '%s'"), |
1495 | 0 | suffix, filename); |
1496 | 0 | if (report) |
1497 | 0 | *report = suffix; |
1498 | 0 | } |
1499 | 0 | strbuf_release(&name_buf); |
1500 | 0 | } |
1501 | | |
1502 | | static void rename_tmp_packfile(const char **final_name, |
1503 | | const char *curr_name, |
1504 | | struct strbuf *name, unsigned char *hash, |
1505 | | const char *ext, int make_read_only_if_same) |
1506 | 0 | { |
1507 | 0 | if (*final_name != curr_name) { |
1508 | 0 | if (!*final_name) |
1509 | 0 | *final_name = odb_pack_name(name, hash, ext); |
1510 | 0 | if (finalize_object_file(curr_name, *final_name)) |
1511 | 0 | die(_("unable to rename temporary '*.%s' file to '%s'"), |
1512 | 0 | ext, *final_name); |
1513 | 0 | } else if (make_read_only_if_same) { |
1514 | 0 | chmod(*final_name, 0444); |
1515 | 0 | } |
1516 | 0 | } |
1517 | | |
1518 | | static void final(const char *final_pack_name, const char *curr_pack_name, |
1519 | | const char *final_index_name, const char *curr_index_name, |
1520 | | const char *final_rev_index_name, const char *curr_rev_index_name, |
1521 | | const char *keep_msg, const char *promisor_msg, |
1522 | | unsigned char *hash) |
1523 | 0 | { |
1524 | 0 | const char *report = "pack"; |
1525 | 0 | struct strbuf pack_name = STRBUF_INIT; |
1526 | 0 | struct strbuf index_name = STRBUF_INIT; |
1527 | 0 | struct strbuf rev_index_name = STRBUF_INIT; |
1528 | |
|
1529 | 0 | if (!from_stdin) { |
1530 | 0 | close(input_fd); |
1531 | 0 | } else { |
1532 | 0 | fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name); |
1533 | 0 | if (close(output_fd)) |
1534 | 0 | die_errno(_("error while closing pack file")); |
1535 | 0 | } |
1536 | | |
1537 | 0 | if (keep_msg) |
1538 | 0 | write_special_file("keep", keep_msg, final_pack_name, hash, |
1539 | 0 | &report); |
1540 | 0 | if (promisor_msg) |
1541 | 0 | write_special_file("promisor", promisor_msg, final_pack_name, |
1542 | 0 | hash, NULL); |
1543 | |
|
1544 | 0 | rename_tmp_packfile(&final_pack_name, curr_pack_name, &pack_name, |
1545 | 0 | hash, "pack", from_stdin); |
1546 | 0 | if (curr_rev_index_name) |
1547 | 0 | rename_tmp_packfile(&final_rev_index_name, curr_rev_index_name, |
1548 | 0 | &rev_index_name, hash, "rev", 1); |
1549 | 0 | rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, |
1550 | 0 | hash, "idx", 1); |
1551 | |
|
1552 | 0 | if (do_fsck_object) { |
1553 | 0 | struct packed_git *p; |
1554 | 0 | p = add_packed_git(final_index_name, strlen(final_index_name), 0); |
1555 | 0 | if (p) |
1556 | 0 | install_packed_git(the_repository, p); |
1557 | 0 | } |
1558 | |
|
1559 | 0 | if (!from_stdin) { |
1560 | 0 | printf("%s\n", hash_to_hex(hash)); |
1561 | 0 | } else { |
1562 | 0 | struct strbuf buf = STRBUF_INIT; |
1563 | |
|
1564 | 0 | strbuf_addf(&buf, "%s\t%s\n", report, hash_to_hex(hash)); |
1565 | 0 | write_or_die(1, buf.buf, buf.len); |
1566 | 0 | strbuf_release(&buf); |
1567 | | |
1568 | | /* Write the last part of the buffer to stdout */ |
1569 | 0 | write_in_full(1, input_buffer + input_offset, input_len); |
1570 | 0 | } |
1571 | |
|
1572 | 0 | strbuf_release(&rev_index_name); |
1573 | 0 | strbuf_release(&index_name); |
1574 | 0 | strbuf_release(&pack_name); |
1575 | 0 | } |
1576 | | |
1577 | | static int git_index_pack_config(const char *k, const char *v, |
1578 | | const struct config_context *ctx, void *cb) |
1579 | 0 | { |
1580 | 0 | struct pack_idx_option *opts = cb; |
1581 | |
|
1582 | 0 | if (!strcmp(k, "pack.indexversion")) { |
1583 | 0 | opts->version = git_config_int(k, v, ctx->kvi); |
1584 | 0 | if (opts->version > 2) |
1585 | 0 | die(_("bad pack.indexVersion=%"PRIu32), opts->version); |
1586 | 0 | return 0; |
1587 | 0 | } |
1588 | 0 | if (!strcmp(k, "pack.threads")) { |
1589 | 0 | nr_threads = git_config_int(k, v, ctx->kvi); |
1590 | 0 | if (nr_threads < 0) |
1591 | 0 | die(_("invalid number of threads specified (%d)"), |
1592 | 0 | nr_threads); |
1593 | 0 | if (!HAVE_THREADS && nr_threads != 1) { |
1594 | 0 | warning(_("no threads support, ignoring %s"), k); |
1595 | 0 | nr_threads = 1; |
1596 | 0 | } |
1597 | 0 | return 0; |
1598 | 0 | } |
1599 | 0 | if (!strcmp(k, "pack.writereverseindex")) { |
1600 | 0 | if (git_config_bool(k, v)) |
1601 | 0 | opts->flags |= WRITE_REV; |
1602 | 0 | else |
1603 | 0 | opts->flags &= ~WRITE_REV; |
1604 | 0 | } |
1605 | 0 | return git_default_config(k, v, ctx, cb); |
1606 | 0 | } |
1607 | | |
1608 | | static int cmp_uint32(const void *a_, const void *b_) |
1609 | 0 | { |
1610 | 0 | uint32_t a = *((uint32_t *)a_); |
1611 | 0 | uint32_t b = *((uint32_t *)b_); |
1612 | |
|
1613 | 0 | return (a < b) ? -1 : (a != b); |
1614 | 0 | } |
1615 | | |
1616 | | static void read_v2_anomalous_offsets(struct packed_git *p, |
1617 | | struct pack_idx_option *opts) |
1618 | 0 | { |
1619 | 0 | const uint32_t *idx1, *idx2; |
1620 | 0 | uint32_t i; |
1621 | | |
1622 | | /* The address of the 4-byte offset table */ |
1623 | 0 | idx1 = (((const uint32_t *)((const uint8_t *)p->index_data + p->crc_offset)) |
1624 | 0 | + (size_t)p->num_objects /* CRC32 table */ |
1625 | 0 | ); |
1626 | | |
1627 | | /* The address of the 8-byte offset table */ |
1628 | 0 | idx2 = idx1 + p->num_objects; |
1629 | |
|
1630 | 0 | for (i = 0; i < p->num_objects; i++) { |
1631 | 0 | uint32_t off = ntohl(idx1[i]); |
1632 | 0 | if (!(off & 0x80000000)) |
1633 | 0 | continue; |
1634 | 0 | off = off & 0x7fffffff; |
1635 | 0 | check_pack_index_ptr(p, &idx2[off * 2]); |
1636 | 0 | if (idx2[off * 2]) |
1637 | 0 | continue; |
1638 | | /* |
1639 | | * The real offset is ntohl(idx2[off * 2]) in high 4 |
1640 | | * octets, and ntohl(idx2[off * 2 + 1]) in low 4 |
1641 | | * octets. But idx2[off * 2] is Zero!!! |
1642 | | */ |
1643 | 0 | ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc); |
1644 | 0 | opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]); |
1645 | 0 | } |
1646 | |
|
1647 | 0 | QSORT(opts->anomaly, opts->anomaly_nr, cmp_uint32); |
1648 | 0 | } |
1649 | | |
1650 | | static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) |
1651 | 0 | { |
1652 | 0 | struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); |
1653 | |
|
1654 | 0 | if (!p) |
1655 | 0 | die(_("Cannot open existing pack file '%s'"), pack_name); |
1656 | 0 | if (open_pack_index(p)) |
1657 | 0 | die(_("Cannot open existing pack idx file for '%s'"), pack_name); |
1658 | | |
1659 | | /* Read the attributes from the existing idx file */ |
1660 | 0 | opts->version = p->index_version; |
1661 | |
|
1662 | 0 | if (opts->version == 2) |
1663 | 0 | read_v2_anomalous_offsets(p, opts); |
1664 | | |
1665 | | /* |
1666 | | * Get rid of the idx file as we do not need it anymore. |
1667 | | * NEEDSWORK: extract this bit from free_pack_by_name() in |
1668 | | * object-file.c, perhaps? It shouldn't matter very much as we |
1669 | | * know we haven't installed this pack (hence we never have |
1670 | | * read anything from it). |
1671 | | */ |
1672 | 0 | close_pack_index(p); |
1673 | 0 | free(p); |
1674 | 0 | } |
1675 | | |
1676 | | static void show_pack_info(int stat_only) |
1677 | 0 | { |
1678 | 0 | int i, baseobjects = nr_objects - nr_ref_deltas - nr_ofs_deltas; |
1679 | 0 | unsigned long *chain_histogram = NULL; |
1680 | |
|
1681 | 0 | if (deepest_delta) |
1682 | 0 | CALLOC_ARRAY(chain_histogram, deepest_delta); |
1683 | |
|
1684 | 0 | for (i = 0; i < nr_objects; i++) { |
1685 | 0 | struct object_entry *obj = &objects[i]; |
1686 | |
|
1687 | 0 | if (is_delta_type(obj->type)) |
1688 | 0 | chain_histogram[obj_stat[i].delta_depth - 1]++; |
1689 | 0 | if (stat_only) |
1690 | 0 | continue; |
1691 | 0 | printf("%s %-6s %"PRIuMAX" %"PRIuMAX" %"PRIuMAX, |
1692 | 0 | oid_to_hex(&obj->idx.oid), |
1693 | 0 | type_name(obj->real_type), (uintmax_t)obj->size, |
1694 | 0 | (uintmax_t)(obj[1].idx.offset - obj->idx.offset), |
1695 | 0 | (uintmax_t)obj->idx.offset); |
1696 | 0 | if (is_delta_type(obj->type)) { |
1697 | 0 | struct object_entry *bobj = &objects[obj_stat[i].base_object_no]; |
1698 | 0 | printf(" %u %s", obj_stat[i].delta_depth, |
1699 | 0 | oid_to_hex(&bobj->idx.oid)); |
1700 | 0 | } |
1701 | 0 | putchar('\n'); |
1702 | 0 | } |
1703 | |
|
1704 | 0 | if (baseobjects) |
1705 | 0 | printf_ln(Q_("non delta: %d object", |
1706 | 0 | "non delta: %d objects", |
1707 | 0 | baseobjects), |
1708 | 0 | baseobjects); |
1709 | 0 | for (i = 0; i < deepest_delta; i++) { |
1710 | 0 | if (!chain_histogram[i]) |
1711 | 0 | continue; |
1712 | 0 | printf_ln(Q_("chain length = %d: %lu object", |
1713 | 0 | "chain length = %d: %lu objects", |
1714 | 0 | chain_histogram[i]), |
1715 | 0 | i + 1, |
1716 | 0 | chain_histogram[i]); |
1717 | 0 | } |
1718 | 0 | free(chain_histogram); |
1719 | 0 | } |
1720 | | |
1721 | | int cmd_index_pack(int argc, const char **argv, const char *prefix) |
1722 | 0 | { |
1723 | 0 | int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index; |
1724 | 0 | const char *curr_index; |
1725 | 0 | const char *curr_rev_index = NULL; |
1726 | 0 | const char *index_name = NULL, *pack_name = NULL, *rev_index_name = NULL; |
1727 | 0 | const char *keep_msg = NULL; |
1728 | 0 | const char *promisor_msg = NULL; |
1729 | 0 | struct strbuf index_name_buf = STRBUF_INIT; |
1730 | 0 | struct strbuf rev_index_name_buf = STRBUF_INIT; |
1731 | 0 | struct pack_idx_entry **idx_objects; |
1732 | 0 | struct pack_idx_option opts; |
1733 | 0 | unsigned char pack_hash[GIT_MAX_RAWSZ]; |
1734 | 0 | unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */ |
1735 | 0 | int report_end_of_input = 0; |
1736 | 0 | int hash_algo = 0; |
1737 | | |
1738 | | /* |
1739 | | * index-pack never needs to fetch missing objects except when |
1740 | | * REF_DELTA bases are missing (which are explicitly handled). It only |
1741 | | * accesses the repo to do hash collision checks and to check which |
1742 | | * REF_DELTA bases need to be fetched. |
1743 | | */ |
1744 | 0 | fetch_if_missing = 0; |
1745 | |
|
1746 | 0 | if (argc == 2 && !strcmp(argv[1], "-h")) |
1747 | 0 | usage(index_pack_usage); |
1748 | | |
1749 | 0 | disable_replace_refs(); |
1750 | 0 | fsck_options.walk = mark_link; |
1751 | |
|
1752 | 0 | reset_pack_idx_option(&opts); |
1753 | 0 | opts.flags |= WRITE_REV; |
1754 | 0 | git_config(git_index_pack_config, &opts); |
1755 | 0 | if (prefix && chdir(prefix)) |
1756 | 0 | die(_("Cannot come back to cwd")); |
1757 | | |
1758 | 0 | if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0)) |
1759 | 0 | rev_index = 0; |
1760 | 0 | else |
1761 | 0 | rev_index = !!(opts.flags & (WRITE_REV_VERIFY | WRITE_REV)); |
1762 | |
|
1763 | 0 | for (i = 1; i < argc; i++) { |
1764 | 0 | const char *arg = argv[i]; |
1765 | |
|
1766 | 0 | if (*arg == '-') { |
1767 | 0 | if (!strcmp(arg, "--stdin")) { |
1768 | 0 | from_stdin = 1; |
1769 | 0 | } else if (!strcmp(arg, "--fix-thin")) { |
1770 | 0 | fix_thin_pack = 1; |
1771 | 0 | } else if (skip_to_optional_arg(arg, "--strict", &arg)) { |
1772 | 0 | strict = 1; |
1773 | 0 | do_fsck_object = 1; |
1774 | 0 | fsck_set_msg_types(&fsck_options, arg); |
1775 | 0 | } else if (!strcmp(arg, "--check-self-contained-and-connected")) { |
1776 | 0 | strict = 1; |
1777 | 0 | check_self_contained_and_connected = 1; |
1778 | 0 | } else if (skip_to_optional_arg(arg, "--fsck-objects", &arg)) { |
1779 | 0 | do_fsck_object = 1; |
1780 | 0 | fsck_set_msg_types(&fsck_options, arg); |
1781 | 0 | } else if (!strcmp(arg, "--verify")) { |
1782 | 0 | verify = 1; |
1783 | 0 | } else if (!strcmp(arg, "--verify-stat")) { |
1784 | 0 | verify = 1; |
1785 | 0 | show_stat = 1; |
1786 | 0 | } else if (!strcmp(arg, "--verify-stat-only")) { |
1787 | 0 | verify = 1; |
1788 | 0 | show_stat = 1; |
1789 | 0 | stat_only = 1; |
1790 | 0 | } else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) { |
1791 | 0 | ; /* nothing to do */ |
1792 | 0 | } else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) { |
1793 | 0 | ; /* already parsed */ |
1794 | 0 | } else if (starts_with(arg, "--threads=")) { |
1795 | 0 | char *end; |
1796 | 0 | nr_threads = strtoul(arg+10, &end, 0); |
1797 | 0 | if (!arg[10] || *end || nr_threads < 0) |
1798 | 0 | usage(index_pack_usage); |
1799 | 0 | if (!HAVE_THREADS && nr_threads != 1) { |
1800 | 0 | warning(_("no threads support, ignoring %s"), arg); |
1801 | 0 | nr_threads = 1; |
1802 | 0 | } |
1803 | 0 | } else if (starts_with(arg, "--pack_header=")) { |
1804 | 0 | struct pack_header *hdr; |
1805 | 0 | char *c; |
1806 | |
|
1807 | 0 | hdr = (struct pack_header *)input_buffer; |
1808 | 0 | hdr->hdr_signature = htonl(PACK_SIGNATURE); |
1809 | 0 | hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10)); |
1810 | 0 | if (*c != ',') |
1811 | 0 | die(_("bad %s"), arg); |
1812 | 0 | hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); |
1813 | 0 | if (*c) |
1814 | 0 | die(_("bad %s"), arg); |
1815 | 0 | input_len = sizeof(*hdr); |
1816 | 0 | } else if (!strcmp(arg, "-v")) { |
1817 | 0 | verbose = 1; |
1818 | 0 | } else if (!strcmp(arg, "--progress-title")) { |
1819 | 0 | if (progress_title || (i+1) >= argc) |
1820 | 0 | usage(index_pack_usage); |
1821 | 0 | progress_title = argv[++i]; |
1822 | 0 | } else if (!strcmp(arg, "--show-resolving-progress")) { |
1823 | 0 | show_resolving_progress = 1; |
1824 | 0 | } else if (!strcmp(arg, "--report-end-of-input")) { |
1825 | 0 | report_end_of_input = 1; |
1826 | 0 | } else if (!strcmp(arg, "-o")) { |
1827 | 0 | if (index_name || (i+1) >= argc) |
1828 | 0 | usage(index_pack_usage); |
1829 | 0 | index_name = argv[++i]; |
1830 | 0 | } else if (starts_with(arg, "--index-version=")) { |
1831 | 0 | char *c; |
1832 | 0 | opts.version = strtoul(arg + 16, &c, 10); |
1833 | 0 | if (opts.version > 2) |
1834 | 0 | die(_("bad %s"), arg); |
1835 | 0 | if (*c == ',') |
1836 | 0 | opts.off32_limit = strtoul(c+1, &c, 0); |
1837 | 0 | if (*c || opts.off32_limit & 0x80000000) |
1838 | 0 | die(_("bad %s"), arg); |
1839 | 0 | } else if (skip_prefix(arg, "--max-input-size=", &arg)) { |
1840 | 0 | max_input_size = strtoumax(arg, NULL, 10); |
1841 | 0 | } else if (skip_prefix(arg, "--object-format=", &arg)) { |
1842 | 0 | hash_algo = hash_algo_by_name(arg); |
1843 | 0 | if (hash_algo == GIT_HASH_UNKNOWN) |
1844 | 0 | die(_("unknown hash algorithm '%s'"), arg); |
1845 | 0 | repo_set_hash_algo(the_repository, hash_algo); |
1846 | 0 | } else if (!strcmp(arg, "--rev-index")) { |
1847 | 0 | rev_index = 1; |
1848 | 0 | } else if (!strcmp(arg, "--no-rev-index")) { |
1849 | 0 | rev_index = 0; |
1850 | 0 | } else |
1851 | 0 | usage(index_pack_usage); |
1852 | 0 | continue; |
1853 | 0 | } |
1854 | | |
1855 | 0 | if (pack_name) |
1856 | 0 | usage(index_pack_usage); |
1857 | 0 | pack_name = arg; |
1858 | 0 | } |
1859 | | |
1860 | 0 | if (!pack_name && !from_stdin) |
1861 | 0 | usage(index_pack_usage); |
1862 | 0 | if (fix_thin_pack && !from_stdin) |
1863 | 0 | die(_("the option '%s' requires '%s'"), "--fix-thin", "--stdin"); |
1864 | 0 | if (from_stdin && !startup_info->have_repository) |
1865 | 0 | die(_("--stdin requires a git repository")); |
1866 | 0 | if (from_stdin && hash_algo) |
1867 | 0 | die(_("options '%s' and '%s' cannot be used together"), "--object-format", "--stdin"); |
1868 | 0 | if (!index_name && pack_name) |
1869 | 0 | index_name = derive_filename(pack_name, "pack", "idx", &index_name_buf); |
1870 | | |
1871 | | /* |
1872 | | * Packfiles and indices do not carry enough information to be able to |
1873 | | * identify their object hash. So when we are neither in a repository |
1874 | | * nor has the user told us which object hash to use we have no other |
1875 | | * choice but to guess the object hash. |
1876 | | */ |
1877 | 0 | if (!the_repository->hash_algo) |
1878 | 0 | repo_set_hash_algo(the_repository, GIT_HASH_SHA1); |
1879 | |
|
1880 | 0 | opts.flags &= ~(WRITE_REV | WRITE_REV_VERIFY); |
1881 | 0 | if (rev_index) { |
1882 | 0 | opts.flags |= verify ? WRITE_REV_VERIFY : WRITE_REV; |
1883 | 0 | if (index_name) |
1884 | 0 | rev_index_name = derive_filename(index_name, |
1885 | 0 | "idx", "rev", |
1886 | 0 | &rev_index_name_buf); |
1887 | 0 | } |
1888 | |
|
1889 | 0 | if (verify) { |
1890 | 0 | if (!index_name) |
1891 | 0 | die(_("--verify with no packfile name given")); |
1892 | 0 | read_idx_option(&opts, index_name); |
1893 | 0 | opts.flags |= WRITE_IDX_VERIFY | WRITE_IDX_STRICT; |
1894 | 0 | } |
1895 | 0 | if (strict) |
1896 | 0 | opts.flags |= WRITE_IDX_STRICT; |
1897 | |
|
1898 | 0 | if (HAVE_THREADS && !nr_threads) { |
1899 | 0 | nr_threads = online_cpus(); |
1900 | | /* |
1901 | | * Experiments show that going above 20 threads doesn't help, |
1902 | | * no matter how many cores you have. Below that, we tend to |
1903 | | * max at half the number of online_cpus(), presumably because |
1904 | | * half of those are hyperthreads rather than full cores. We'll |
1905 | | * never reduce the level below "3", though, to match a |
1906 | | * historical value that nobody complained about. |
1907 | | */ |
1908 | 0 | if (nr_threads < 4) |
1909 | 0 | ; /* too few cores to consider capping */ |
1910 | 0 | else if (nr_threads < 6) |
1911 | 0 | nr_threads = 3; /* historic cap */ |
1912 | 0 | else if (nr_threads < 40) |
1913 | 0 | nr_threads /= 2; |
1914 | 0 | else |
1915 | 0 | nr_threads = 20; /* hard cap */ |
1916 | 0 | } |
1917 | |
|
1918 | 0 | curr_pack = open_pack_file(pack_name); |
1919 | 0 | parse_pack_header(); |
1920 | 0 | CALLOC_ARRAY(objects, st_add(nr_objects, 1)); |
1921 | 0 | if (show_stat) |
1922 | 0 | CALLOC_ARRAY(obj_stat, st_add(nr_objects, 1)); |
1923 | 0 | CALLOC_ARRAY(ofs_deltas, nr_objects); |
1924 | 0 | parse_pack_objects(pack_hash); |
1925 | 0 | if (report_end_of_input) |
1926 | 0 | write_in_full(2, "\0", 1); |
1927 | 0 | resolve_deltas(); |
1928 | 0 | conclude_pack(fix_thin_pack, curr_pack, pack_hash); |
1929 | 0 | free(ofs_deltas); |
1930 | 0 | free(ref_deltas); |
1931 | 0 | if (strict) |
1932 | 0 | foreign_nr = check_objects(); |
1933 | |
|
1934 | 0 | if (show_stat) |
1935 | 0 | show_pack_info(stat_only); |
1936 | |
|
1937 | 0 | ALLOC_ARRAY(idx_objects, nr_objects); |
1938 | 0 | for (i = 0; i < nr_objects; i++) |
1939 | 0 | idx_objects[i] = &objects[i].idx; |
1940 | 0 | curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash); |
1941 | 0 | if (rev_index) |
1942 | 0 | curr_rev_index = write_rev_file(rev_index_name, idx_objects, |
1943 | 0 | nr_objects, pack_hash, |
1944 | 0 | opts.flags); |
1945 | 0 | free(idx_objects); |
1946 | |
|
1947 | 0 | if (!verify) |
1948 | 0 | final(pack_name, curr_pack, |
1949 | 0 | index_name, curr_index, |
1950 | 0 | rev_index_name, curr_rev_index, |
1951 | 0 | keep_msg, promisor_msg, |
1952 | 0 | pack_hash); |
1953 | 0 | else |
1954 | 0 | close(input_fd); |
1955 | |
|
1956 | 0 | if (do_fsck_object && fsck_finish(&fsck_options)) |
1957 | 0 | die(_("fsck error in pack objects")); |
1958 | | |
1959 | 0 | free(opts.anomaly); |
1960 | 0 | free(objects); |
1961 | 0 | strbuf_release(&index_name_buf); |
1962 | 0 | strbuf_release(&rev_index_name_buf); |
1963 | 0 | if (!pack_name) |
1964 | 0 | free((void *) curr_pack); |
1965 | 0 | if (!index_name) |
1966 | 0 | free((void *) curr_index); |
1967 | 0 | if (!rev_index_name) |
1968 | 0 | free((void *) curr_rev_index); |
1969 | | |
1970 | | /* |
1971 | | * Let the caller know this pack is not self contained |
1972 | | */ |
1973 | 0 | if (check_self_contained_and_connected && foreign_nr) |
1974 | 0 | return 1; |
1975 | | |
1976 | 0 | return 0; |
1977 | 0 | } |