Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2011, Google Inc. |
3 | | */ |
4 | | |
5 | | #define USE_THE_REPOSITORY_VARIABLE |
6 | | |
7 | | #include "git-compat-util.h" |
8 | | #include "convert.h" |
9 | | #include "environment.h" |
10 | | #include "streaming.h" |
11 | | #include "repository.h" |
12 | | #include "object-file.h" |
13 | | #include "odb.h" |
14 | | #include "replace-object.h" |
15 | | #include "packfile.h" |
16 | | |
17 | | typedef int (*open_istream_fn)(struct git_istream *, |
18 | | struct repository *, |
19 | | const struct object_id *, |
20 | | enum object_type *); |
21 | | typedef int (*close_istream_fn)(struct git_istream *); |
22 | | typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); |
23 | | |
24 | 0 | #define FILTER_BUFFER (1024*16) |
25 | | |
26 | | struct filtered_istream { |
27 | | struct git_istream *upstream; |
28 | | struct stream_filter *filter; |
29 | | char ibuf[FILTER_BUFFER]; |
30 | | char obuf[FILTER_BUFFER]; |
31 | | int i_end, i_ptr; |
32 | | int o_end, o_ptr; |
33 | | int input_finished; |
34 | | }; |
35 | | |
36 | | struct git_istream { |
37 | | open_istream_fn open; |
38 | | close_istream_fn close; |
39 | | read_istream_fn read; |
40 | | |
41 | | unsigned long size; /* inflated size of full object */ |
42 | | git_zstream z; |
43 | | enum { z_unused, z_used, z_done, z_error } z_state; |
44 | | |
45 | | union { |
46 | | struct { |
47 | | char *buf; /* from odb_read_object_info_extended() */ |
48 | | unsigned long read_ptr; |
49 | | } incore; |
50 | | |
51 | | struct { |
52 | | void *mapped; |
53 | | unsigned long mapsize; |
54 | | char hdr[32]; |
55 | | int hdr_avail; |
56 | | int hdr_used; |
57 | | } loose; |
58 | | |
59 | | struct { |
60 | | struct packed_git *pack; |
61 | | off_t pos; |
62 | | } in_pack; |
63 | | |
64 | | struct filtered_istream filtered; |
65 | | } u; |
66 | | }; |
67 | | |
68 | | /***************************************************************** |
69 | | * |
70 | | * Common helpers |
71 | | * |
72 | | *****************************************************************/ |
73 | | |
74 | | static void close_deflated_stream(struct git_istream *st) |
75 | 0 | { |
76 | 0 | if (st->z_state == z_used) |
77 | 0 | git_inflate_end(&st->z); |
78 | 0 | } |
79 | | |
80 | | |
81 | | /***************************************************************** |
82 | | * |
83 | | * Filtered stream |
84 | | * |
85 | | *****************************************************************/ |
86 | | |
87 | | static int close_istream_filtered(struct git_istream *st) |
88 | 0 | { |
89 | 0 | free_stream_filter(st->u.filtered.filter); |
90 | 0 | return close_istream(st->u.filtered.upstream); |
91 | 0 | } |
92 | | |
93 | | static ssize_t read_istream_filtered(struct git_istream *st, char *buf, |
94 | | size_t sz) |
95 | 0 | { |
96 | 0 | struct filtered_istream *fs = &(st->u.filtered); |
97 | 0 | size_t filled = 0; |
98 | |
|
99 | 0 | while (sz) { |
100 | | /* do we already have filtered output? */ |
101 | 0 | if (fs->o_ptr < fs->o_end) { |
102 | 0 | size_t to_move = fs->o_end - fs->o_ptr; |
103 | 0 | if (sz < to_move) |
104 | 0 | to_move = sz; |
105 | 0 | memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); |
106 | 0 | fs->o_ptr += to_move; |
107 | 0 | sz -= to_move; |
108 | 0 | filled += to_move; |
109 | 0 | continue; |
110 | 0 | } |
111 | 0 | fs->o_end = fs->o_ptr = 0; |
112 | | |
113 | | /* do we have anything to feed the filter with? */ |
114 | 0 | if (fs->i_ptr < fs->i_end) { |
115 | 0 | size_t to_feed = fs->i_end - fs->i_ptr; |
116 | 0 | size_t to_receive = FILTER_BUFFER; |
117 | 0 | if (stream_filter(fs->filter, |
118 | 0 | fs->ibuf + fs->i_ptr, &to_feed, |
119 | 0 | fs->obuf, &to_receive)) |
120 | 0 | return -1; |
121 | 0 | fs->i_ptr = fs->i_end - to_feed; |
122 | 0 | fs->o_end = FILTER_BUFFER - to_receive; |
123 | 0 | continue; |
124 | 0 | } |
125 | | |
126 | | /* tell the filter to drain upon no more input */ |
127 | 0 | if (fs->input_finished) { |
128 | 0 | size_t to_receive = FILTER_BUFFER; |
129 | 0 | if (stream_filter(fs->filter, |
130 | 0 | NULL, NULL, |
131 | 0 | fs->obuf, &to_receive)) |
132 | 0 | return -1; |
133 | 0 | fs->o_end = FILTER_BUFFER - to_receive; |
134 | 0 | if (!fs->o_end) |
135 | 0 | break; |
136 | 0 | continue; |
137 | 0 | } |
138 | 0 | fs->i_end = fs->i_ptr = 0; |
139 | | |
140 | | /* refill the input from the upstream */ |
141 | 0 | if (!fs->input_finished) { |
142 | 0 | fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); |
143 | 0 | if (fs->i_end < 0) |
144 | 0 | return -1; |
145 | 0 | if (fs->i_end) |
146 | 0 | continue; |
147 | 0 | } |
148 | 0 | fs->input_finished = 1; |
149 | 0 | } |
150 | 0 | return filled; |
151 | 0 | } |
152 | | |
153 | | static struct git_istream *attach_stream_filter(struct git_istream *st, |
154 | | struct stream_filter *filter) |
155 | 0 | { |
156 | 0 | struct git_istream *ifs = xmalloc(sizeof(*ifs)); |
157 | 0 | struct filtered_istream *fs = &(ifs->u.filtered); |
158 | |
|
159 | 0 | ifs->close = close_istream_filtered; |
160 | 0 | ifs->read = read_istream_filtered; |
161 | 0 | fs->upstream = st; |
162 | 0 | fs->filter = filter; |
163 | 0 | fs->i_end = fs->i_ptr = 0; |
164 | 0 | fs->o_end = fs->o_ptr = 0; |
165 | 0 | fs->input_finished = 0; |
166 | 0 | ifs->size = -1; /* unknown */ |
167 | 0 | return ifs; |
168 | 0 | } |
169 | | |
170 | | /***************************************************************** |
171 | | * |
172 | | * Loose object stream |
173 | | * |
174 | | *****************************************************************/ |
175 | | |
176 | | static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) |
177 | 0 | { |
178 | 0 | size_t total_read = 0; |
179 | |
|
180 | 0 | switch (st->z_state) { |
181 | 0 | case z_done: |
182 | 0 | return 0; |
183 | 0 | case z_error: |
184 | 0 | return -1; |
185 | 0 | default: |
186 | 0 | break; |
187 | 0 | } |
188 | | |
189 | 0 | if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { |
190 | 0 | size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; |
191 | 0 | if (sz < to_copy) |
192 | 0 | to_copy = sz; |
193 | 0 | memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); |
194 | 0 | st->u.loose.hdr_used += to_copy; |
195 | 0 | total_read += to_copy; |
196 | 0 | } |
197 | |
|
198 | 0 | while (total_read < sz) { |
199 | 0 | int status; |
200 | |
|
201 | 0 | st->z.next_out = (unsigned char *)buf + total_read; |
202 | 0 | st->z.avail_out = sz - total_read; |
203 | 0 | status = git_inflate(&st->z, Z_FINISH); |
204 | |
|
205 | 0 | total_read = st->z.next_out - (unsigned char *)buf; |
206 | |
|
207 | 0 | if (status == Z_STREAM_END) { |
208 | 0 | git_inflate_end(&st->z); |
209 | 0 | st->z_state = z_done; |
210 | 0 | break; |
211 | 0 | } |
212 | 0 | if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { |
213 | 0 | git_inflate_end(&st->z); |
214 | 0 | st->z_state = z_error; |
215 | 0 | return -1; |
216 | 0 | } |
217 | 0 | } |
218 | 0 | return total_read; |
219 | 0 | } |
220 | | |
221 | | static int close_istream_loose(struct git_istream *st) |
222 | 0 | { |
223 | 0 | close_deflated_stream(st); |
224 | 0 | munmap(st->u.loose.mapped, st->u.loose.mapsize); |
225 | 0 | return 0; |
226 | 0 | } |
227 | | |
228 | | static int open_istream_loose(struct git_istream *st, struct repository *r, |
229 | | const struct object_id *oid, |
230 | | enum object_type *type) |
231 | 0 | { |
232 | 0 | struct object_info oi = OBJECT_INFO_INIT; |
233 | 0 | struct odb_source *source; |
234 | |
|
235 | 0 | oi.sizep = &st->size; |
236 | 0 | oi.typep = type; |
237 | |
|
238 | 0 | odb_prepare_alternates(r->objects); |
239 | 0 | for (source = r->objects->sources; source; source = source->next) { |
240 | 0 | st->u.loose.mapped = odb_source_loose_map_object(source, oid, |
241 | 0 | &st->u.loose.mapsize); |
242 | 0 | if (st->u.loose.mapped) |
243 | 0 | break; |
244 | 0 | } |
245 | 0 | if (!st->u.loose.mapped) |
246 | 0 | return -1; |
247 | | |
248 | 0 | switch (unpack_loose_header(&st->z, st->u.loose.mapped, |
249 | 0 | st->u.loose.mapsize, st->u.loose.hdr, |
250 | 0 | sizeof(st->u.loose.hdr))) { |
251 | 0 | case ULHR_OK: |
252 | 0 | break; |
253 | 0 | case ULHR_BAD: |
254 | 0 | case ULHR_TOO_LONG: |
255 | 0 | goto error; |
256 | 0 | } |
257 | 0 | if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) |
258 | 0 | goto error; |
259 | | |
260 | 0 | st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; |
261 | 0 | st->u.loose.hdr_avail = st->z.total_out; |
262 | 0 | st->z_state = z_used; |
263 | 0 | st->close = close_istream_loose; |
264 | 0 | st->read = read_istream_loose; |
265 | |
|
266 | 0 | return 0; |
267 | 0 | error: |
268 | 0 | git_inflate_end(&st->z); |
269 | 0 | munmap(st->u.loose.mapped, st->u.loose.mapsize); |
270 | 0 | return -1; |
271 | 0 | } |
272 | | |
273 | | |
274 | | /***************************************************************** |
275 | | * |
276 | | * Non-delta packed object stream |
277 | | * |
278 | | *****************************************************************/ |
279 | | |
280 | | static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, |
281 | | size_t sz) |
282 | 0 | { |
283 | 0 | size_t total_read = 0; |
284 | |
|
285 | 0 | switch (st->z_state) { |
286 | 0 | case z_unused: |
287 | 0 | memset(&st->z, 0, sizeof(st->z)); |
288 | 0 | git_inflate_init(&st->z); |
289 | 0 | st->z_state = z_used; |
290 | 0 | break; |
291 | 0 | case z_done: |
292 | 0 | return 0; |
293 | 0 | case z_error: |
294 | 0 | return -1; |
295 | 0 | case z_used: |
296 | 0 | break; |
297 | 0 | } |
298 | | |
299 | 0 | while (total_read < sz) { |
300 | 0 | int status; |
301 | 0 | struct pack_window *window = NULL; |
302 | 0 | unsigned char *mapped; |
303 | |
|
304 | 0 | mapped = use_pack(st->u.in_pack.pack, &window, |
305 | 0 | st->u.in_pack.pos, &st->z.avail_in); |
306 | |
|
307 | 0 | st->z.next_out = (unsigned char *)buf + total_read; |
308 | 0 | st->z.avail_out = sz - total_read; |
309 | 0 | st->z.next_in = mapped; |
310 | 0 | status = git_inflate(&st->z, Z_FINISH); |
311 | |
|
312 | 0 | st->u.in_pack.pos += st->z.next_in - mapped; |
313 | 0 | total_read = st->z.next_out - (unsigned char *)buf; |
314 | 0 | unuse_pack(&window); |
315 | |
|
316 | 0 | if (status == Z_STREAM_END) { |
317 | 0 | git_inflate_end(&st->z); |
318 | 0 | st->z_state = z_done; |
319 | 0 | break; |
320 | 0 | } |
321 | | |
322 | | /* |
323 | | * Unlike the loose object case, we do not have to worry here |
324 | | * about running out of input bytes and spinning infinitely. If |
325 | | * we get Z_BUF_ERROR due to too few input bytes, then we'll |
326 | | * replenish them in the next use_pack() call when we loop. If |
327 | | * we truly hit the end of the pack (i.e., because it's corrupt |
328 | | * or truncated), then use_pack() catches that and will die(). |
329 | | */ |
330 | 0 | if (status != Z_OK && status != Z_BUF_ERROR) { |
331 | 0 | git_inflate_end(&st->z); |
332 | 0 | st->z_state = z_error; |
333 | 0 | return -1; |
334 | 0 | } |
335 | 0 | } |
336 | 0 | return total_read; |
337 | 0 | } |
338 | | |
339 | | static int close_istream_pack_non_delta(struct git_istream *st) |
340 | 0 | { |
341 | 0 | close_deflated_stream(st); |
342 | 0 | return 0; |
343 | 0 | } |
344 | | |
345 | | static int open_istream_pack_non_delta(struct git_istream *st, |
346 | | struct repository *r UNUSED, |
347 | | const struct object_id *oid UNUSED, |
348 | | enum object_type *type UNUSED) |
349 | 0 | { |
350 | 0 | struct pack_window *window; |
351 | 0 | enum object_type in_pack_type; |
352 | |
|
353 | 0 | window = NULL; |
354 | |
|
355 | 0 | in_pack_type = unpack_object_header(st->u.in_pack.pack, |
356 | 0 | &window, |
357 | 0 | &st->u.in_pack.pos, |
358 | 0 | &st->size); |
359 | 0 | unuse_pack(&window); |
360 | 0 | switch (in_pack_type) { |
361 | 0 | default: |
362 | 0 | return -1; /* we do not do deltas for now */ |
363 | 0 | case OBJ_COMMIT: |
364 | 0 | case OBJ_TREE: |
365 | 0 | case OBJ_BLOB: |
366 | 0 | case OBJ_TAG: |
367 | 0 | break; |
368 | 0 | } |
369 | 0 | st->z_state = z_unused; |
370 | 0 | st->close = close_istream_pack_non_delta; |
371 | 0 | st->read = read_istream_pack_non_delta; |
372 | |
|
373 | 0 | return 0; |
374 | 0 | } |
375 | | |
376 | | |
377 | | /***************************************************************** |
378 | | * |
379 | | * In-core stream |
380 | | * |
381 | | *****************************************************************/ |
382 | | |
383 | | static int close_istream_incore(struct git_istream *st) |
384 | 0 | { |
385 | 0 | free(st->u.incore.buf); |
386 | 0 | return 0; |
387 | 0 | } |
388 | | |
389 | | static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) |
390 | 0 | { |
391 | 0 | size_t read_size = sz; |
392 | 0 | size_t remainder = st->size - st->u.incore.read_ptr; |
393 | |
|
394 | 0 | if (remainder <= read_size) |
395 | 0 | read_size = remainder; |
396 | 0 | if (read_size) { |
397 | 0 | memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); |
398 | 0 | st->u.incore.read_ptr += read_size; |
399 | 0 | } |
400 | 0 | return read_size; |
401 | 0 | } |
402 | | |
403 | | static int open_istream_incore(struct git_istream *st, struct repository *r, |
404 | | const struct object_id *oid, enum object_type *type) |
405 | 0 | { |
406 | 0 | struct object_info oi = OBJECT_INFO_INIT; |
407 | |
|
408 | 0 | st->u.incore.read_ptr = 0; |
409 | 0 | st->close = close_istream_incore; |
410 | 0 | st->read = read_istream_incore; |
411 | |
|
412 | 0 | oi.typep = type; |
413 | 0 | oi.sizep = &st->size; |
414 | 0 | oi.contentp = (void **)&st->u.incore.buf; |
415 | 0 | return odb_read_object_info_extended(r->objects, oid, &oi, |
416 | 0 | OBJECT_INFO_DIE_IF_CORRUPT); |
417 | 0 | } |
418 | | |
419 | | /***************************************************************************** |
420 | | * static helpers variables and functions for users of streaming interface |
421 | | *****************************************************************************/ |
422 | | |
423 | | static int istream_source(struct git_istream *st, |
424 | | struct repository *r, |
425 | | const struct object_id *oid, |
426 | | enum object_type *type) |
427 | 0 | { |
428 | 0 | unsigned long size; |
429 | 0 | int status; |
430 | 0 | struct object_info oi = OBJECT_INFO_INIT; |
431 | |
|
432 | 0 | oi.typep = type; |
433 | 0 | oi.sizep = &size; |
434 | 0 | status = odb_read_object_info_extended(r->objects, oid, &oi, 0); |
435 | 0 | if (status < 0) |
436 | 0 | return status; |
437 | | |
438 | 0 | switch (oi.whence) { |
439 | 0 | case OI_LOOSE: |
440 | 0 | st->open = open_istream_loose; |
441 | 0 | return 0; |
442 | 0 | case OI_PACKED: |
443 | 0 | if (!oi.u.packed.is_delta && |
444 | 0 | repo_settings_get_big_file_threshold(the_repository) < size) { |
445 | 0 | st->u.in_pack.pack = oi.u.packed.pack; |
446 | 0 | st->u.in_pack.pos = oi.u.packed.offset; |
447 | 0 | st->open = open_istream_pack_non_delta; |
448 | 0 | return 0; |
449 | 0 | } |
450 | | /* fallthru */ |
451 | 0 | default: |
452 | 0 | st->open = open_istream_incore; |
453 | 0 | return 0; |
454 | 0 | } |
455 | 0 | } |
456 | | |
457 | | /**************************************************************** |
458 | | * Users of streaming interface |
459 | | ****************************************************************/ |
460 | | |
461 | | int close_istream(struct git_istream *st) |
462 | 0 | { |
463 | 0 | int r = st->close(st); |
464 | 0 | free(st); |
465 | 0 | return r; |
466 | 0 | } |
467 | | |
468 | | ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) |
469 | 0 | { |
470 | 0 | return st->read(st, buf, sz); |
471 | 0 | } |
472 | | |
473 | | struct git_istream *open_istream(struct repository *r, |
474 | | const struct object_id *oid, |
475 | | enum object_type *type, |
476 | | unsigned long *size, |
477 | | struct stream_filter *filter) |
478 | 0 | { |
479 | 0 | struct git_istream *st = xmalloc(sizeof(*st)); |
480 | 0 | const struct object_id *real = lookup_replace_object(r, oid); |
481 | 0 | int ret = istream_source(st, r, real, type); |
482 | |
|
483 | 0 | if (ret) { |
484 | 0 | free(st); |
485 | 0 | return NULL; |
486 | 0 | } |
487 | | |
488 | 0 | if (st->open(st, r, real, type)) { |
489 | 0 | if (open_istream_incore(st, r, real, type)) { |
490 | 0 | free(st); |
491 | 0 | return NULL; |
492 | 0 | } |
493 | 0 | } |
494 | 0 | if (filter) { |
495 | | /* Add "&& !is_null_stream_filter(filter)" for performance */ |
496 | 0 | struct git_istream *nst = attach_stream_filter(st, filter); |
497 | 0 | if (!nst) { |
498 | 0 | close_istream(st); |
499 | 0 | return NULL; |
500 | 0 | } |
501 | 0 | st = nst; |
502 | 0 | } |
503 | | |
504 | 0 | *size = st->size; |
505 | 0 | return st; |
506 | 0 | } |
507 | | |
508 | | int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, |
509 | | int can_seek) |
510 | 0 | { |
511 | 0 | struct git_istream *st; |
512 | 0 | enum object_type type; |
513 | 0 | unsigned long sz; |
514 | 0 | ssize_t kept = 0; |
515 | 0 | int result = -1; |
516 | |
|
517 | 0 | st = open_istream(the_repository, oid, &type, &sz, filter); |
518 | 0 | if (!st) { |
519 | 0 | if (filter) |
520 | 0 | free_stream_filter(filter); |
521 | 0 | return result; |
522 | 0 | } |
523 | 0 | if (type != OBJ_BLOB) |
524 | 0 | goto close_and_exit; |
525 | 0 | for (;;) { |
526 | 0 | char buf[1024 * 16]; |
527 | 0 | ssize_t wrote, holeto; |
528 | 0 | ssize_t readlen = read_istream(st, buf, sizeof(buf)); |
529 | |
|
530 | 0 | if (readlen < 0) |
531 | 0 | goto close_and_exit; |
532 | 0 | if (!readlen) |
533 | 0 | break; |
534 | 0 | if (can_seek && sizeof(buf) == readlen) { |
535 | 0 | for (holeto = 0; holeto < readlen; holeto++) |
536 | 0 | if (buf[holeto]) |
537 | 0 | break; |
538 | 0 | if (readlen == holeto) { |
539 | 0 | kept += holeto; |
540 | 0 | continue; |
541 | 0 | } |
542 | 0 | } |
543 | | |
544 | 0 | if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) |
545 | 0 | goto close_and_exit; |
546 | 0 | else |
547 | 0 | kept = 0; |
548 | 0 | wrote = write_in_full(fd, buf, readlen); |
549 | |
|
550 | 0 | if (wrote < 0) |
551 | 0 | goto close_and_exit; |
552 | 0 | } |
553 | 0 | if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || |
554 | 0 | xwrite(fd, "", 1) != 1)) |
555 | 0 | goto close_and_exit; |
556 | 0 | result = 0; |
557 | |
|
558 | 0 | close_and_exit: |
559 | 0 | close_istream(st); |
560 | 0 | return result; |
561 | 0 | } |