Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2011, Google Inc. |
3 | | */ |
4 | | |
5 | | #define USE_THE_REPOSITORY_VARIABLE |
6 | | |
7 | | #include "git-compat-util.h" |
8 | | #include "convert.h" |
9 | | #include "environment.h" |
10 | | #include "streaming.h" |
11 | | #include "repository.h" |
12 | | #include "object-file.h" |
13 | | #include "object-store-ll.h" |
14 | | #include "replace-object.h" |
15 | | #include "packfile.h" |
16 | | |
17 | | typedef int (*open_istream_fn)(struct git_istream *, |
18 | | struct repository *, |
19 | | const struct object_id *, |
20 | | enum object_type *); |
21 | | typedef int (*close_istream_fn)(struct git_istream *); |
22 | | typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); |
23 | | |
24 | 0 | #define FILTER_BUFFER (1024*16) |
25 | | |
26 | | struct filtered_istream { |
27 | | struct git_istream *upstream; |
28 | | struct stream_filter *filter; |
29 | | char ibuf[FILTER_BUFFER]; |
30 | | char obuf[FILTER_BUFFER]; |
31 | | int i_end, i_ptr; |
32 | | int o_end, o_ptr; |
33 | | int input_finished; |
34 | | }; |
35 | | |
36 | | struct git_istream { |
37 | | open_istream_fn open; |
38 | | close_istream_fn close; |
39 | | read_istream_fn read; |
40 | | |
41 | | unsigned long size; /* inflated size of full object */ |
42 | | git_zstream z; |
43 | | enum { z_unused, z_used, z_done, z_error } z_state; |
44 | | |
45 | | union { |
46 | | struct { |
47 | | char *buf; /* from oid_object_info_extended() */ |
48 | | unsigned long read_ptr; |
49 | | } incore; |
50 | | |
51 | | struct { |
52 | | void *mapped; |
53 | | unsigned long mapsize; |
54 | | char hdr[32]; |
55 | | int hdr_avail; |
56 | | int hdr_used; |
57 | | } loose; |
58 | | |
59 | | struct { |
60 | | struct packed_git *pack; |
61 | | off_t pos; |
62 | | } in_pack; |
63 | | |
64 | | struct filtered_istream filtered; |
65 | | } u; |
66 | | }; |
67 | | |
68 | | /***************************************************************** |
69 | | * |
70 | | * Common helpers |
71 | | * |
72 | | *****************************************************************/ |
73 | | |
74 | | static void close_deflated_stream(struct git_istream *st) |
75 | 0 | { |
76 | 0 | if (st->z_state == z_used) |
77 | 0 | git_inflate_end(&st->z); |
78 | 0 | } |
79 | | |
80 | | |
81 | | /***************************************************************** |
82 | | * |
83 | | * Filtered stream |
84 | | * |
85 | | *****************************************************************/ |
86 | | |
87 | | static int close_istream_filtered(struct git_istream *st) |
88 | 0 | { |
89 | 0 | free_stream_filter(st->u.filtered.filter); |
90 | 0 | return close_istream(st->u.filtered.upstream); |
91 | 0 | } |
92 | | |
93 | | static ssize_t read_istream_filtered(struct git_istream *st, char *buf, |
94 | | size_t sz) |
95 | 0 | { |
96 | 0 | struct filtered_istream *fs = &(st->u.filtered); |
97 | 0 | size_t filled = 0; |
98 | |
|
99 | 0 | while (sz) { |
100 | | /* do we already have filtered output? */ |
101 | 0 | if (fs->o_ptr < fs->o_end) { |
102 | 0 | size_t to_move = fs->o_end - fs->o_ptr; |
103 | 0 | if (sz < to_move) |
104 | 0 | to_move = sz; |
105 | 0 | memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); |
106 | 0 | fs->o_ptr += to_move; |
107 | 0 | sz -= to_move; |
108 | 0 | filled += to_move; |
109 | 0 | continue; |
110 | 0 | } |
111 | 0 | fs->o_end = fs->o_ptr = 0; |
112 | | |
113 | | /* do we have anything to feed the filter with? */ |
114 | 0 | if (fs->i_ptr < fs->i_end) { |
115 | 0 | size_t to_feed = fs->i_end - fs->i_ptr; |
116 | 0 | size_t to_receive = FILTER_BUFFER; |
117 | 0 | if (stream_filter(fs->filter, |
118 | 0 | fs->ibuf + fs->i_ptr, &to_feed, |
119 | 0 | fs->obuf, &to_receive)) |
120 | 0 | return -1; |
121 | 0 | fs->i_ptr = fs->i_end - to_feed; |
122 | 0 | fs->o_end = FILTER_BUFFER - to_receive; |
123 | 0 | continue; |
124 | 0 | } |
125 | | |
126 | | /* tell the filter to drain upon no more input */ |
127 | 0 | if (fs->input_finished) { |
128 | 0 | size_t to_receive = FILTER_BUFFER; |
129 | 0 | if (stream_filter(fs->filter, |
130 | 0 | NULL, NULL, |
131 | 0 | fs->obuf, &to_receive)) |
132 | 0 | return -1; |
133 | 0 | fs->o_end = FILTER_BUFFER - to_receive; |
134 | 0 | if (!fs->o_end) |
135 | 0 | break; |
136 | 0 | continue; |
137 | 0 | } |
138 | 0 | fs->i_end = fs->i_ptr = 0; |
139 | | |
140 | | /* refill the input from the upstream */ |
141 | 0 | if (!fs->input_finished) { |
142 | 0 | fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); |
143 | 0 | if (fs->i_end < 0) |
144 | 0 | return -1; |
145 | 0 | if (fs->i_end) |
146 | 0 | continue; |
147 | 0 | } |
148 | 0 | fs->input_finished = 1; |
149 | 0 | } |
150 | 0 | return filled; |
151 | 0 | } |
152 | | |
153 | | static struct git_istream *attach_stream_filter(struct git_istream *st, |
154 | | struct stream_filter *filter) |
155 | 0 | { |
156 | 0 | struct git_istream *ifs = xmalloc(sizeof(*ifs)); |
157 | 0 | struct filtered_istream *fs = &(ifs->u.filtered); |
158 | |
|
159 | 0 | ifs->close = close_istream_filtered; |
160 | 0 | ifs->read = read_istream_filtered; |
161 | 0 | fs->upstream = st; |
162 | 0 | fs->filter = filter; |
163 | 0 | fs->i_end = fs->i_ptr = 0; |
164 | 0 | fs->o_end = fs->o_ptr = 0; |
165 | 0 | fs->input_finished = 0; |
166 | 0 | ifs->size = -1; /* unknown */ |
167 | 0 | return ifs; |
168 | 0 | } |
169 | | |
170 | | /***************************************************************** |
171 | | * |
172 | | * Loose object stream |
173 | | * |
174 | | *****************************************************************/ |
175 | | |
176 | | static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) |
177 | 0 | { |
178 | 0 | size_t total_read = 0; |
179 | |
|
180 | 0 | switch (st->z_state) { |
181 | 0 | case z_done: |
182 | 0 | return 0; |
183 | 0 | case z_error: |
184 | 0 | return -1; |
185 | 0 | default: |
186 | 0 | break; |
187 | 0 | } |
188 | | |
189 | 0 | if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { |
190 | 0 | size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; |
191 | 0 | if (sz < to_copy) |
192 | 0 | to_copy = sz; |
193 | 0 | memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); |
194 | 0 | st->u.loose.hdr_used += to_copy; |
195 | 0 | total_read += to_copy; |
196 | 0 | } |
197 | |
|
198 | 0 | while (total_read < sz) { |
199 | 0 | int status; |
200 | |
|
201 | 0 | st->z.next_out = (unsigned char *)buf + total_read; |
202 | 0 | st->z.avail_out = sz - total_read; |
203 | 0 | status = git_inflate(&st->z, Z_FINISH); |
204 | |
|
205 | 0 | total_read = st->z.next_out - (unsigned char *)buf; |
206 | |
|
207 | 0 | if (status == Z_STREAM_END) { |
208 | 0 | git_inflate_end(&st->z); |
209 | 0 | st->z_state = z_done; |
210 | 0 | break; |
211 | 0 | } |
212 | 0 | if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { |
213 | 0 | git_inflate_end(&st->z); |
214 | 0 | st->z_state = z_error; |
215 | 0 | return -1; |
216 | 0 | } |
217 | 0 | } |
218 | 0 | return total_read; |
219 | 0 | } |
220 | | |
221 | | static int close_istream_loose(struct git_istream *st) |
222 | 0 | { |
223 | 0 | close_deflated_stream(st); |
224 | 0 | munmap(st->u.loose.mapped, st->u.loose.mapsize); |
225 | 0 | return 0; |
226 | 0 | } |
227 | | |
228 | | static int open_istream_loose(struct git_istream *st, struct repository *r, |
229 | | const struct object_id *oid, |
230 | | enum object_type *type) |
231 | 0 | { |
232 | 0 | struct object_info oi = OBJECT_INFO_INIT; |
233 | 0 | oi.sizep = &st->size; |
234 | 0 | oi.typep = type; |
235 | |
|
236 | 0 | st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); |
237 | 0 | if (!st->u.loose.mapped) |
238 | 0 | return -1; |
239 | 0 | switch (unpack_loose_header(&st->z, st->u.loose.mapped, |
240 | 0 | st->u.loose.mapsize, st->u.loose.hdr, |
241 | 0 | sizeof(st->u.loose.hdr), NULL)) { |
242 | 0 | case ULHR_OK: |
243 | 0 | break; |
244 | 0 | case ULHR_BAD: |
245 | 0 | case ULHR_TOO_LONG: |
246 | 0 | goto error; |
247 | 0 | } |
248 | 0 | if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) |
249 | 0 | goto error; |
250 | | |
251 | 0 | st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; |
252 | 0 | st->u.loose.hdr_avail = st->z.total_out; |
253 | 0 | st->z_state = z_used; |
254 | 0 | st->close = close_istream_loose; |
255 | 0 | st->read = read_istream_loose; |
256 | |
|
257 | 0 | return 0; |
258 | 0 | error: |
259 | 0 | git_inflate_end(&st->z); |
260 | 0 | munmap(st->u.loose.mapped, st->u.loose.mapsize); |
261 | 0 | return -1; |
262 | 0 | } |
263 | | |
264 | | |
265 | | /***************************************************************** |
266 | | * |
267 | | * Non-delta packed object stream |
268 | | * |
269 | | *****************************************************************/ |
270 | | |
271 | | static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, |
272 | | size_t sz) |
273 | 0 | { |
274 | 0 | size_t total_read = 0; |
275 | |
|
276 | 0 | switch (st->z_state) { |
277 | 0 | case z_unused: |
278 | 0 | memset(&st->z, 0, sizeof(st->z)); |
279 | 0 | git_inflate_init(&st->z); |
280 | 0 | st->z_state = z_used; |
281 | 0 | break; |
282 | 0 | case z_done: |
283 | 0 | return 0; |
284 | 0 | case z_error: |
285 | 0 | return -1; |
286 | 0 | case z_used: |
287 | 0 | break; |
288 | 0 | } |
289 | | |
290 | 0 | while (total_read < sz) { |
291 | 0 | int status; |
292 | 0 | struct pack_window *window = NULL; |
293 | 0 | unsigned char *mapped; |
294 | |
|
295 | 0 | mapped = use_pack(st->u.in_pack.pack, &window, |
296 | 0 | st->u.in_pack.pos, &st->z.avail_in); |
297 | |
|
298 | 0 | st->z.next_out = (unsigned char *)buf + total_read; |
299 | 0 | st->z.avail_out = sz - total_read; |
300 | 0 | st->z.next_in = mapped; |
301 | 0 | status = git_inflate(&st->z, Z_FINISH); |
302 | |
|
303 | 0 | st->u.in_pack.pos += st->z.next_in - mapped; |
304 | 0 | total_read = st->z.next_out - (unsigned char *)buf; |
305 | 0 | unuse_pack(&window); |
306 | |
|
307 | 0 | if (status == Z_STREAM_END) { |
308 | 0 | git_inflate_end(&st->z); |
309 | 0 | st->z_state = z_done; |
310 | 0 | break; |
311 | 0 | } |
312 | | |
313 | | /* |
314 | | * Unlike the loose object case, we do not have to worry here |
315 | | * about running out of input bytes and spinning infinitely. If |
316 | | * we get Z_BUF_ERROR due to too few input bytes, then we'll |
317 | | * replenish them in the next use_pack() call when we loop. If |
318 | | * we truly hit the end of the pack (i.e., because it's corrupt |
319 | | * or truncated), then use_pack() catches that and will die(). |
320 | | */ |
321 | 0 | if (status != Z_OK && status != Z_BUF_ERROR) { |
322 | 0 | git_inflate_end(&st->z); |
323 | 0 | st->z_state = z_error; |
324 | 0 | return -1; |
325 | 0 | } |
326 | 0 | } |
327 | 0 | return total_read; |
328 | 0 | } |
329 | | |
330 | | static int close_istream_pack_non_delta(struct git_istream *st) |
331 | 0 | { |
332 | 0 | close_deflated_stream(st); |
333 | 0 | return 0; |
334 | 0 | } |
335 | | |
336 | | static int open_istream_pack_non_delta(struct git_istream *st, |
337 | | struct repository *r UNUSED, |
338 | | const struct object_id *oid UNUSED, |
339 | | enum object_type *type UNUSED) |
340 | 0 | { |
341 | 0 | struct pack_window *window; |
342 | 0 | enum object_type in_pack_type; |
343 | |
|
344 | 0 | window = NULL; |
345 | |
|
346 | 0 | in_pack_type = unpack_object_header(st->u.in_pack.pack, |
347 | 0 | &window, |
348 | 0 | &st->u.in_pack.pos, |
349 | 0 | &st->size); |
350 | 0 | unuse_pack(&window); |
351 | 0 | switch (in_pack_type) { |
352 | 0 | default: |
353 | 0 | return -1; /* we do not do deltas for now */ |
354 | 0 | case OBJ_COMMIT: |
355 | 0 | case OBJ_TREE: |
356 | 0 | case OBJ_BLOB: |
357 | 0 | case OBJ_TAG: |
358 | 0 | break; |
359 | 0 | } |
360 | 0 | st->z_state = z_unused; |
361 | 0 | st->close = close_istream_pack_non_delta; |
362 | 0 | st->read = read_istream_pack_non_delta; |
363 | |
|
364 | 0 | return 0; |
365 | 0 | } |
366 | | |
367 | | |
368 | | /***************************************************************** |
369 | | * |
370 | | * In-core stream |
371 | | * |
372 | | *****************************************************************/ |
373 | | |
374 | | static int close_istream_incore(struct git_istream *st) |
375 | 0 | { |
376 | 0 | free(st->u.incore.buf); |
377 | 0 | return 0; |
378 | 0 | } |
379 | | |
380 | | static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) |
381 | 0 | { |
382 | 0 | size_t read_size = sz; |
383 | 0 | size_t remainder = st->size - st->u.incore.read_ptr; |
384 | |
|
385 | 0 | if (remainder <= read_size) |
386 | 0 | read_size = remainder; |
387 | 0 | if (read_size) { |
388 | 0 | memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); |
389 | 0 | st->u.incore.read_ptr += read_size; |
390 | 0 | } |
391 | 0 | return read_size; |
392 | 0 | } |
393 | | |
394 | | static int open_istream_incore(struct git_istream *st, struct repository *r, |
395 | | const struct object_id *oid, enum object_type *type) |
396 | 0 | { |
397 | 0 | struct object_info oi = OBJECT_INFO_INIT; |
398 | |
|
399 | 0 | st->u.incore.read_ptr = 0; |
400 | 0 | st->close = close_istream_incore; |
401 | 0 | st->read = read_istream_incore; |
402 | |
|
403 | 0 | oi.typep = type; |
404 | 0 | oi.sizep = &st->size; |
405 | 0 | oi.contentp = (void **)&st->u.incore.buf; |
406 | 0 | return oid_object_info_extended(r, oid, &oi, |
407 | 0 | OBJECT_INFO_DIE_IF_CORRUPT); |
408 | 0 | } |
409 | | |
410 | | /***************************************************************************** |
411 | | * static helpers variables and functions for users of streaming interface |
412 | | *****************************************************************************/ |
413 | | |
414 | | static int istream_source(struct git_istream *st, |
415 | | struct repository *r, |
416 | | const struct object_id *oid, |
417 | | enum object_type *type) |
418 | 0 | { |
419 | 0 | unsigned long size; |
420 | 0 | int status; |
421 | 0 | struct object_info oi = OBJECT_INFO_INIT; |
422 | |
|
423 | 0 | oi.typep = type; |
424 | 0 | oi.sizep = &size; |
425 | 0 | status = oid_object_info_extended(r, oid, &oi, 0); |
426 | 0 | if (status < 0) |
427 | 0 | return status; |
428 | | |
429 | 0 | switch (oi.whence) { |
430 | 0 | case OI_LOOSE: |
431 | 0 | st->open = open_istream_loose; |
432 | 0 | return 0; |
433 | 0 | case OI_PACKED: |
434 | 0 | if (!oi.u.packed.is_delta && big_file_threshold < size) { |
435 | 0 | st->u.in_pack.pack = oi.u.packed.pack; |
436 | 0 | st->u.in_pack.pos = oi.u.packed.offset; |
437 | 0 | st->open = open_istream_pack_non_delta; |
438 | 0 | return 0; |
439 | 0 | } |
440 | | /* fallthru */ |
441 | 0 | default: |
442 | 0 | st->open = open_istream_incore; |
443 | 0 | return 0; |
444 | 0 | } |
445 | 0 | } |
446 | | |
447 | | /**************************************************************** |
448 | | * Users of streaming interface |
449 | | ****************************************************************/ |
450 | | |
451 | | int close_istream(struct git_istream *st) |
452 | 0 | { |
453 | 0 | int r = st->close(st); |
454 | 0 | free(st); |
455 | 0 | return r; |
456 | 0 | } |
457 | | |
458 | | ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) |
459 | 0 | { |
460 | 0 | return st->read(st, buf, sz); |
461 | 0 | } |
462 | | |
463 | | struct git_istream *open_istream(struct repository *r, |
464 | | const struct object_id *oid, |
465 | | enum object_type *type, |
466 | | unsigned long *size, |
467 | | struct stream_filter *filter) |
468 | 0 | { |
469 | 0 | struct git_istream *st = xmalloc(sizeof(*st)); |
470 | 0 | const struct object_id *real = lookup_replace_object(r, oid); |
471 | 0 | int ret = istream_source(st, r, real, type); |
472 | |
|
473 | 0 | if (ret) { |
474 | 0 | free(st); |
475 | 0 | return NULL; |
476 | 0 | } |
477 | | |
478 | 0 | if (st->open(st, r, real, type)) { |
479 | 0 | if (open_istream_incore(st, r, real, type)) { |
480 | 0 | free(st); |
481 | 0 | return NULL; |
482 | 0 | } |
483 | 0 | } |
484 | 0 | if (filter) { |
485 | | /* Add "&& !is_null_stream_filter(filter)" for performance */ |
486 | 0 | struct git_istream *nst = attach_stream_filter(st, filter); |
487 | 0 | if (!nst) { |
488 | 0 | close_istream(st); |
489 | 0 | return NULL; |
490 | 0 | } |
491 | 0 | st = nst; |
492 | 0 | } |
493 | | |
494 | 0 | *size = st->size; |
495 | 0 | return st; |
496 | 0 | } |
497 | | |
498 | | int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, |
499 | | int can_seek) |
500 | 0 | { |
501 | 0 | struct git_istream *st; |
502 | 0 | enum object_type type; |
503 | 0 | unsigned long sz; |
504 | 0 | ssize_t kept = 0; |
505 | 0 | int result = -1; |
506 | |
|
507 | 0 | st = open_istream(the_repository, oid, &type, &sz, filter); |
508 | 0 | if (!st) { |
509 | 0 | if (filter) |
510 | 0 | free_stream_filter(filter); |
511 | 0 | return result; |
512 | 0 | } |
513 | 0 | if (type != OBJ_BLOB) |
514 | 0 | goto close_and_exit; |
515 | 0 | for (;;) { |
516 | 0 | char buf[1024 * 16]; |
517 | 0 | ssize_t wrote, holeto; |
518 | 0 | ssize_t readlen = read_istream(st, buf, sizeof(buf)); |
519 | |
|
520 | 0 | if (readlen < 0) |
521 | 0 | goto close_and_exit; |
522 | 0 | if (!readlen) |
523 | 0 | break; |
524 | 0 | if (can_seek && sizeof(buf) == readlen) { |
525 | 0 | for (holeto = 0; holeto < readlen; holeto++) |
526 | 0 | if (buf[holeto]) |
527 | 0 | break; |
528 | 0 | if (readlen == holeto) { |
529 | 0 | kept += holeto; |
530 | 0 | continue; |
531 | 0 | } |
532 | 0 | } |
533 | | |
534 | 0 | if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) |
535 | 0 | goto close_and_exit; |
536 | 0 | else |
537 | 0 | kept = 0; |
538 | 0 | wrote = write_in_full(fd, buf, readlen); |
539 | |
|
540 | 0 | if (wrote < 0) |
541 | 0 | goto close_and_exit; |
542 | 0 | } |
543 | 0 | if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || |
544 | 0 | xwrite(fd, "", 1) != 1)) |
545 | 0 | goto close_and_exit; |
546 | 0 | result = 0; |
547 | |
|
548 | 0 | close_and_exit: |
549 | 0 | close_istream(st); |
550 | 0 | return result; |
551 | 0 | } |