/src/git/reftable/reader.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2020 Google LLC |
3 | | |
4 | | Use of this source code is governed by a BSD-style |
5 | | license that can be found in the LICENSE file or at |
6 | | https://developers.google.com/open-source/licenses/bsd |
7 | | */ |
8 | | |
9 | | #include "reader.h" |
10 | | |
11 | | #include "system.h" |
12 | | #include "block.h" |
13 | | #include "constants.h" |
14 | | #include "iter.h" |
15 | | #include "record.h" |
16 | | #include "reftable-error.h" |
17 | | |
18 | | uint64_t block_source_size(struct reftable_block_source *source) |
19 | 0 | { |
20 | 0 | return source->ops->size(source->arg); |
21 | 0 | } |
22 | | |
23 | | int block_source_read_block(struct reftable_block_source *source, |
24 | | struct reftable_block *dest, uint64_t off, |
25 | | uint32_t size) |
26 | 0 | { |
27 | 0 | int result = source->ops->read_block(source->arg, dest, off, size); |
28 | 0 | dest->source = *source; |
29 | 0 | return result; |
30 | 0 | } |
31 | | |
32 | | void block_source_close(struct reftable_block_source *source) |
33 | 0 | { |
34 | 0 | if (!source->ops) { |
35 | 0 | return; |
36 | 0 | } |
37 | | |
38 | 0 | source->ops->close(source->arg); |
39 | 0 | source->ops = NULL; |
40 | 0 | } |
41 | | |
42 | | static struct reftable_reader_offsets * |
43 | | reader_offsets_for(struct reftable_reader *r, uint8_t typ) |
44 | 0 | { |
45 | 0 | switch (typ) { |
46 | 0 | case BLOCK_TYPE_REF: |
47 | 0 | return &r->ref_offsets; |
48 | 0 | case BLOCK_TYPE_LOG: |
49 | 0 | return &r->log_offsets; |
50 | 0 | case BLOCK_TYPE_OBJ: |
51 | 0 | return &r->obj_offsets; |
52 | 0 | } |
53 | 0 | abort(); |
54 | 0 | } |
55 | | |
56 | | static int reader_get_block(struct reftable_reader *r, |
57 | | struct reftable_block *dest, uint64_t off, |
58 | | uint32_t sz) |
59 | 0 | { |
60 | 0 | if (off >= r->size) |
61 | 0 | return 0; |
62 | | |
63 | 0 | if (off + sz > r->size) { |
64 | 0 | sz = r->size - off; |
65 | 0 | } |
66 | |
|
67 | 0 | return block_source_read_block(&r->source, dest, off, sz); |
68 | 0 | } |
69 | | |
70 | | uint32_t reftable_reader_hash_id(struct reftable_reader *r) |
71 | 0 | { |
72 | 0 | return r->hash_id; |
73 | 0 | } |
74 | | |
75 | | const char *reader_name(struct reftable_reader *r) |
76 | 0 | { |
77 | 0 | return r->name; |
78 | 0 | } |
79 | | |
80 | | static int parse_footer(struct reftable_reader *r, uint8_t *footer, |
81 | | uint8_t *header) |
82 | 0 | { |
83 | 0 | uint8_t *f = footer; |
84 | 0 | uint8_t first_block_typ; |
85 | 0 | int err = 0; |
86 | 0 | uint32_t computed_crc; |
87 | 0 | uint32_t file_crc; |
88 | |
|
89 | 0 | if (memcmp(f, "REFT", 4)) { |
90 | 0 | err = REFTABLE_FORMAT_ERROR; |
91 | 0 | goto done; |
92 | 0 | } |
93 | 0 | f += 4; |
94 | |
|
95 | 0 | if (memcmp(footer, header, header_size(r->version))) { |
96 | 0 | err = REFTABLE_FORMAT_ERROR; |
97 | 0 | goto done; |
98 | 0 | } |
99 | | |
100 | 0 | f++; |
101 | 0 | r->block_size = get_be24(f); |
102 | |
|
103 | 0 | f += 3; |
104 | 0 | r->min_update_index = get_be64(f); |
105 | 0 | f += 8; |
106 | 0 | r->max_update_index = get_be64(f); |
107 | 0 | f += 8; |
108 | |
|
109 | 0 | if (r->version == 1) { |
110 | 0 | r->hash_id = GIT_SHA1_FORMAT_ID; |
111 | 0 | } else { |
112 | 0 | r->hash_id = get_be32(f); |
113 | 0 | switch (r->hash_id) { |
114 | 0 | case GIT_SHA1_FORMAT_ID: |
115 | 0 | break; |
116 | 0 | case GIT_SHA256_FORMAT_ID: |
117 | 0 | break; |
118 | 0 | default: |
119 | 0 | err = REFTABLE_FORMAT_ERROR; |
120 | 0 | goto done; |
121 | 0 | } |
122 | 0 | f += 4; |
123 | 0 | } |
124 | | |
125 | 0 | r->ref_offsets.index_offset = get_be64(f); |
126 | 0 | f += 8; |
127 | |
|
128 | 0 | r->obj_offsets.offset = get_be64(f); |
129 | 0 | f += 8; |
130 | |
|
131 | 0 | r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1); |
132 | 0 | r->obj_offsets.offset >>= 5; |
133 | |
|
134 | 0 | r->obj_offsets.index_offset = get_be64(f); |
135 | 0 | f += 8; |
136 | 0 | r->log_offsets.offset = get_be64(f); |
137 | 0 | f += 8; |
138 | 0 | r->log_offsets.index_offset = get_be64(f); |
139 | 0 | f += 8; |
140 | |
|
141 | 0 | computed_crc = crc32(0, footer, f - footer); |
142 | 0 | file_crc = get_be32(f); |
143 | 0 | f += 4; |
144 | 0 | if (computed_crc != file_crc) { |
145 | 0 | err = REFTABLE_FORMAT_ERROR; |
146 | 0 | goto done; |
147 | 0 | } |
148 | | |
149 | 0 | first_block_typ = header[header_size(r->version)]; |
150 | 0 | r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF); |
151 | 0 | r->ref_offsets.offset = 0; |
152 | 0 | r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG || |
153 | 0 | r->log_offsets.offset > 0); |
154 | 0 | r->obj_offsets.is_present = r->obj_offsets.offset > 0; |
155 | 0 | if (r->obj_offsets.is_present && !r->object_id_len) { |
156 | 0 | err = REFTABLE_FORMAT_ERROR; |
157 | 0 | goto done; |
158 | 0 | } |
159 | | |
160 | 0 | err = 0; |
161 | 0 | done: |
162 | 0 | return err; |
163 | 0 | } |
164 | | |
165 | | struct table_iter { |
166 | | struct reftable_reader *r; |
167 | | uint8_t typ; |
168 | | uint64_t block_off; |
169 | | struct block_reader br; |
170 | | struct block_iter bi; |
171 | | int is_finished; |
172 | | }; |
173 | | |
174 | | static int table_iter_init(struct table_iter *ti, struct reftable_reader *r) |
175 | 0 | { |
176 | 0 | struct block_iter bi = BLOCK_ITER_INIT; |
177 | 0 | memset(ti, 0, sizeof(*ti)); |
178 | 0 | reftable_reader_incref(r); |
179 | 0 | ti->r = r; |
180 | 0 | ti->bi = bi; |
181 | 0 | return 0; |
182 | 0 | } |
183 | | |
184 | | static int table_iter_next_in_block(struct table_iter *ti, |
185 | | struct reftable_record *rec) |
186 | 0 | { |
187 | 0 | int res = block_iter_next(&ti->bi, rec); |
188 | 0 | if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) { |
189 | 0 | rec->u.ref.update_index += ti->r->min_update_index; |
190 | 0 | } |
191 | |
|
192 | 0 | return res; |
193 | 0 | } |
194 | | |
195 | | static void table_iter_block_done(struct table_iter *ti) |
196 | 0 | { |
197 | 0 | block_reader_release(&ti->br); |
198 | 0 | block_iter_reset(&ti->bi); |
199 | 0 | } |
200 | | |
201 | | static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off, |
202 | | int version) |
203 | 0 | { |
204 | 0 | int32_t result = 0; |
205 | |
|
206 | 0 | if (off == 0) { |
207 | 0 | data += header_size(version); |
208 | 0 | } |
209 | |
|
210 | 0 | *typ = data[0]; |
211 | 0 | if (reftable_is_block_type(*typ)) { |
212 | 0 | result = get_be24(data + 1); |
213 | 0 | } |
214 | 0 | return result; |
215 | 0 | } |
216 | | |
217 | | int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br, |
218 | | uint64_t next_off, uint8_t want_typ) |
219 | 0 | { |
220 | 0 | int32_t guess_block_size = r->block_size ? r->block_size : |
221 | 0 | DEFAULT_BLOCK_SIZE; |
222 | 0 | struct reftable_block block = { NULL }; |
223 | 0 | uint8_t block_typ = 0; |
224 | 0 | int err = 0; |
225 | 0 | uint32_t header_off = next_off ? 0 : header_size(r->version); |
226 | 0 | int32_t block_size = 0; |
227 | |
|
228 | 0 | if (next_off >= r->size) |
229 | 0 | return 1; |
230 | | |
231 | 0 | err = reader_get_block(r, &block, next_off, guess_block_size); |
232 | 0 | if (err < 0) |
233 | 0 | goto done; |
234 | | |
235 | 0 | block_size = extract_block_size(block.data, &block_typ, next_off, |
236 | 0 | r->version); |
237 | 0 | if (block_size < 0) { |
238 | 0 | err = block_size; |
239 | 0 | goto done; |
240 | 0 | } |
241 | 0 | if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) { |
242 | 0 | err = 1; |
243 | 0 | goto done; |
244 | 0 | } |
245 | | |
246 | 0 | if (block_size > guess_block_size) { |
247 | 0 | reftable_block_done(&block); |
248 | 0 | err = reader_get_block(r, &block, next_off, block_size); |
249 | 0 | if (err < 0) { |
250 | 0 | goto done; |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | 0 | err = block_reader_init(br, &block, header_off, r->block_size, |
255 | 0 | hash_size(r->hash_id)); |
256 | 0 | done: |
257 | 0 | reftable_block_done(&block); |
258 | |
|
259 | 0 | return err; |
260 | 0 | } |
261 | | |
262 | | static void table_iter_close(struct table_iter *ti) |
263 | 0 | { |
264 | 0 | table_iter_block_done(ti); |
265 | 0 | block_iter_close(&ti->bi); |
266 | 0 | reftable_reader_decref(ti->r); |
267 | 0 | } |
268 | | |
269 | | static int table_iter_next_block(struct table_iter *ti) |
270 | 0 | { |
271 | 0 | uint64_t next_block_off = ti->block_off + ti->br.full_block_size; |
272 | 0 | int err; |
273 | |
|
274 | 0 | err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ); |
275 | 0 | if (err > 0) |
276 | 0 | ti->is_finished = 1; |
277 | 0 | if (err) |
278 | 0 | return err; |
279 | | |
280 | 0 | ti->block_off = next_block_off; |
281 | 0 | ti->is_finished = 0; |
282 | 0 | block_iter_seek_start(&ti->bi, &ti->br); |
283 | |
|
284 | 0 | return 0; |
285 | 0 | } |
286 | | |
287 | | static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) |
288 | 0 | { |
289 | 0 | if (reftable_record_type(rec) != ti->typ) |
290 | 0 | return REFTABLE_API_ERROR; |
291 | | |
292 | 0 | while (1) { |
293 | 0 | int err; |
294 | |
|
295 | 0 | if (ti->is_finished) |
296 | 0 | return 1; |
297 | | |
298 | | /* |
299 | | * Check whether the current block still has more records. If |
300 | | * so, return it. If the iterator returns positive then the |
301 | | * current block has been exhausted. |
302 | | */ |
303 | 0 | err = table_iter_next_in_block(ti, rec); |
304 | 0 | if (err <= 0) |
305 | 0 | return err; |
306 | | |
307 | | /* |
308 | | * Otherwise, we need to continue to the next block in the |
309 | | * table and retry. If there are no more blocks then the |
310 | | * iterator is drained. |
311 | | */ |
312 | 0 | err = table_iter_next_block(ti); |
313 | 0 | if (err) { |
314 | 0 | ti->is_finished = 1; |
315 | 0 | return err; |
316 | 0 | } |
317 | 0 | } |
318 | 0 | } |
319 | | |
320 | | static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) |
321 | 0 | { |
322 | 0 | int err; |
323 | |
|
324 | 0 | err = reader_init_block_reader(ti->r, &ti->br, off, typ); |
325 | 0 | if (err != 0) |
326 | 0 | return err; |
327 | | |
328 | 0 | ti->typ = block_reader_type(&ti->br); |
329 | 0 | ti->block_off = off; |
330 | 0 | block_iter_seek_start(&ti->bi, &ti->br); |
331 | 0 | return 0; |
332 | 0 | } |
333 | | |
334 | | static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index) |
335 | 0 | { |
336 | 0 | struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); |
337 | 0 | uint64_t off = offs->offset; |
338 | 0 | if (index) { |
339 | 0 | off = offs->index_offset; |
340 | 0 | if (off == 0) { |
341 | 0 | return 1; |
342 | 0 | } |
343 | 0 | typ = BLOCK_TYPE_INDEX; |
344 | 0 | } |
345 | | |
346 | 0 | return table_iter_seek_to(ti, off, typ); |
347 | 0 | } |
348 | | |
349 | | static int table_iter_seek_linear(struct table_iter *ti, |
350 | | struct reftable_record *want) |
351 | 0 | { |
352 | 0 | struct strbuf want_key = STRBUF_INIT; |
353 | 0 | struct strbuf got_key = STRBUF_INIT; |
354 | 0 | struct reftable_record rec; |
355 | 0 | int err; |
356 | |
|
357 | 0 | reftable_record_init(&rec, reftable_record_type(want)); |
358 | 0 | reftable_record_key(want, &want_key); |
359 | | |
360 | | /* |
361 | | * First we need to locate the block that must contain our record. To |
362 | | * do so we scan through blocks linearly until we find the first block |
363 | | * whose first key is bigger than our wanted key. Once we have found |
364 | | * that block we know that the key must be contained in the preceding |
365 | | * block. |
366 | | * |
367 | | * This algorithm is somewhat unfortunate because it means that we |
368 | | * always have to seek one block too far and then back up. But as we |
369 | | * can only decode the _first_ key of a block but not its _last_ key we |
370 | | * have no other way to do this. |
371 | | */ |
372 | 0 | while (1) { |
373 | 0 | struct table_iter next = *ti; |
374 | | |
375 | | /* |
376 | | * We must be careful to not modify underlying data of `ti` |
377 | | * because we may find that `next` does not contain our desired |
378 | | * block, but that `ti` does. In that case, we would discard |
379 | | * `next` and continue with `ti`. |
380 | | * |
381 | | * This also means that we cannot reuse allocated memory for |
382 | | * `next` here. While it would be great if we could, it should |
383 | | * in practice not be too bad given that we should only ever |
384 | | * end up doing linear seeks with at most three blocks. As soon |
385 | | * as we have more than three blocks we would have an index, so |
386 | | * we would not do a linear search there anymore. |
387 | | */ |
388 | 0 | memset(&next.br.block, 0, sizeof(next.br.block)); |
389 | 0 | next.br.zstream = NULL; |
390 | 0 | next.br.uncompressed_data = NULL; |
391 | 0 | next.br.uncompressed_cap = 0; |
392 | |
|
393 | 0 | err = table_iter_next_block(&next); |
394 | 0 | if (err < 0) |
395 | 0 | goto done; |
396 | 0 | if (err > 0) |
397 | 0 | break; |
398 | | |
399 | 0 | err = block_reader_first_key(&next.br, &got_key); |
400 | 0 | if (err < 0) |
401 | 0 | goto done; |
402 | | |
403 | 0 | if (strbuf_cmp(&got_key, &want_key) > 0) { |
404 | 0 | table_iter_block_done(&next); |
405 | 0 | break; |
406 | 0 | } |
407 | | |
408 | 0 | table_iter_block_done(ti); |
409 | 0 | *ti = next; |
410 | 0 | } |
411 | | |
412 | | /* |
413 | | * We have located the block that must contain our record, so we seek |
414 | | * the wanted key inside of it. If the block does not contain our key |
415 | | * we know that the corresponding record does not exist. |
416 | | */ |
417 | 0 | err = block_iter_seek_key(&ti->bi, &ti->br, &want_key); |
418 | 0 | if (err < 0) |
419 | 0 | goto done; |
420 | 0 | err = 0; |
421 | |
|
422 | 0 | done: |
423 | 0 | reftable_record_release(&rec); |
424 | 0 | strbuf_release(&want_key); |
425 | 0 | strbuf_release(&got_key); |
426 | 0 | return err; |
427 | 0 | } |
428 | | |
429 | | static int table_iter_seek_indexed(struct table_iter *ti, |
430 | | struct reftable_record *rec) |
431 | 0 | { |
432 | 0 | struct reftable_record want_index = { |
433 | 0 | .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT } |
434 | 0 | }; |
435 | 0 | struct reftable_record index_result = { |
436 | 0 | .type = BLOCK_TYPE_INDEX, |
437 | 0 | .u.idx = { .last_key = STRBUF_INIT }, |
438 | 0 | }; |
439 | 0 | int err; |
440 | |
|
441 | 0 | reftable_record_key(rec, &want_index.u.idx.last_key); |
442 | | |
443 | | /* |
444 | | * The index may consist of multiple levels, where each level may have |
445 | | * multiple index blocks. We start by doing a linear search in the |
446 | | * highest layer that identifies the relevant index block as well as |
447 | | * the record inside that block that corresponds to our wanted key. |
448 | | */ |
449 | 0 | err = table_iter_seek_linear(ti, &want_index); |
450 | 0 | if (err < 0) |
451 | 0 | goto done; |
452 | | |
453 | | /* |
454 | | * Traverse down the levels until we find a non-index entry. |
455 | | */ |
456 | 0 | while (1) { |
457 | | /* |
458 | | * In case we seek a record that does not exist the index iter |
459 | | * will tell us that the iterator is over. This works because |
460 | | * the last index entry of the current level will contain the |
461 | | * last key it knows about. So in case our seeked key is larger |
462 | | * than the last indexed key we know that it won't exist. |
463 | | * |
464 | | * There is one subtlety in the layout of the index section |
465 | | * that makes this work as expected: the highest-level index is |
466 | | * at end of the section and will point backwards and thus we |
467 | | * start reading from the end of the index section, not the |
468 | | * beginning. |
469 | | * |
470 | | * If that wasn't the case and the order was reversed then the |
471 | | * linear seek would seek into the lower levels and traverse |
472 | | * all levels of the index only to find out that the key does |
473 | | * not exist. |
474 | | */ |
475 | 0 | err = table_iter_next(ti, &index_result); |
476 | 0 | if (err != 0) |
477 | 0 | goto done; |
478 | | |
479 | 0 | err = table_iter_seek_to(ti, index_result.u.idx.offset, 0); |
480 | 0 | if (err != 0) |
481 | 0 | goto done; |
482 | | |
483 | 0 | err = block_iter_seek_key(&ti->bi, &ti->br, &want_index.u.idx.last_key); |
484 | 0 | if (err < 0) |
485 | 0 | goto done; |
486 | | |
487 | 0 | if (ti->typ == reftable_record_type(rec)) { |
488 | 0 | err = 0; |
489 | 0 | break; |
490 | 0 | } |
491 | | |
492 | 0 | if (ti->typ != BLOCK_TYPE_INDEX) { |
493 | 0 | err = REFTABLE_FORMAT_ERROR; |
494 | 0 | goto done; |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | 0 | done: |
499 | 0 | reftable_record_release(&want_index); |
500 | 0 | reftable_record_release(&index_result); |
501 | 0 | return err; |
502 | 0 | } |
503 | | |
504 | | static int table_iter_seek(struct table_iter *ti, |
505 | | struct reftable_record *want) |
506 | 0 | { |
507 | 0 | uint8_t typ = reftable_record_type(want); |
508 | 0 | struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); |
509 | 0 | int err; |
510 | |
|
511 | 0 | err = table_iter_seek_start(ti, reftable_record_type(want), |
512 | 0 | !!offs->index_offset); |
513 | 0 | if (err < 0) |
514 | 0 | goto out; |
515 | | |
516 | 0 | if (offs->index_offset) |
517 | 0 | err = table_iter_seek_indexed(ti, want); |
518 | 0 | else |
519 | 0 | err = table_iter_seek_linear(ti, want); |
520 | 0 | if (err) |
521 | 0 | goto out; |
522 | | |
523 | 0 | out: |
524 | 0 | return err; |
525 | 0 | } |
526 | | |
527 | | static int table_iter_seek_void(void *ti, struct reftable_record *want) |
528 | 0 | { |
529 | 0 | return table_iter_seek(ti, want); |
530 | 0 | } |
531 | | |
532 | | static int table_iter_next_void(void *ti, struct reftable_record *rec) |
533 | 0 | { |
534 | 0 | return table_iter_next(ti, rec); |
535 | 0 | } |
536 | | |
537 | | static void table_iter_close_void(void *ti) |
538 | 0 | { |
539 | 0 | table_iter_close(ti); |
540 | 0 | } |
541 | | |
542 | | static struct reftable_iterator_vtable table_iter_vtable = { |
543 | | .seek = &table_iter_seek_void, |
544 | | .next = &table_iter_next_void, |
545 | | .close = &table_iter_close_void, |
546 | | }; |
547 | | |
548 | | static void iterator_from_table_iter(struct reftable_iterator *it, |
549 | | struct table_iter *ti) |
550 | 0 | { |
551 | 0 | assert(!it->ops); |
552 | 0 | it->iter_arg = ti; |
553 | 0 | it->ops = &table_iter_vtable; |
554 | 0 | } |
555 | | |
556 | | void reader_init_iter(struct reftable_reader *r, |
557 | | struct reftable_iterator *it, |
558 | | uint8_t typ) |
559 | 0 | { |
560 | 0 | struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); |
561 | |
|
562 | 0 | if (offs->is_present) { |
563 | 0 | struct table_iter *ti; |
564 | 0 | REFTABLE_ALLOC_ARRAY(ti, 1); |
565 | 0 | table_iter_init(ti, r); |
566 | 0 | iterator_from_table_iter(it, ti); |
567 | 0 | } else { |
568 | 0 | iterator_set_empty(it); |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | | void reftable_reader_init_ref_iterator(struct reftable_reader *r, |
573 | | struct reftable_iterator *it) |
574 | 0 | { |
575 | 0 | reader_init_iter(r, it, BLOCK_TYPE_REF); |
576 | 0 | } |
577 | | |
578 | | void reftable_reader_init_log_iterator(struct reftable_reader *r, |
579 | | struct reftable_iterator *it) |
580 | 0 | { |
581 | 0 | reader_init_iter(r, it, BLOCK_TYPE_LOG); |
582 | 0 | } |
583 | | |
584 | | int reftable_reader_new(struct reftable_reader **out, |
585 | | struct reftable_block_source *source, char const *name) |
586 | 0 | { |
587 | 0 | struct reftable_block footer = { 0 }; |
588 | 0 | struct reftable_block header = { 0 }; |
589 | 0 | struct reftable_reader *r; |
590 | 0 | uint64_t file_size = block_source_size(source); |
591 | 0 | uint32_t read_size; |
592 | 0 | int err; |
593 | |
|
594 | 0 | REFTABLE_CALLOC_ARRAY(r, 1); |
595 | | |
596 | | /* |
597 | | * We need one extra byte to read the type of first block. We also |
598 | | * pretend to always be reading v2 of the format because it is larger. |
599 | | */ |
600 | 0 | read_size = header_size(2) + 1; |
601 | 0 | if (read_size > file_size) { |
602 | 0 | err = REFTABLE_FORMAT_ERROR; |
603 | 0 | goto done; |
604 | 0 | } |
605 | | |
606 | 0 | err = block_source_read_block(source, &header, 0, read_size); |
607 | 0 | if (err != read_size) { |
608 | 0 | err = REFTABLE_IO_ERROR; |
609 | 0 | goto done; |
610 | 0 | } |
611 | | |
612 | 0 | if (memcmp(header.data, "REFT", 4)) { |
613 | 0 | err = REFTABLE_FORMAT_ERROR; |
614 | 0 | goto done; |
615 | 0 | } |
616 | 0 | r->version = header.data[4]; |
617 | 0 | if (r->version != 1 && r->version != 2) { |
618 | 0 | err = REFTABLE_FORMAT_ERROR; |
619 | 0 | goto done; |
620 | 0 | } |
621 | | |
622 | 0 | r->size = file_size - footer_size(r->version); |
623 | 0 | r->source = *source; |
624 | 0 | r->name = xstrdup(name); |
625 | 0 | r->hash_id = 0; |
626 | 0 | r->refcount = 1; |
627 | |
|
628 | 0 | err = block_source_read_block(source, &footer, r->size, |
629 | 0 | footer_size(r->version)); |
630 | 0 | if (err != footer_size(r->version)) { |
631 | 0 | err = REFTABLE_IO_ERROR; |
632 | 0 | goto done; |
633 | 0 | } |
634 | | |
635 | 0 | err = parse_footer(r, footer.data, header.data); |
636 | 0 | if (err) |
637 | 0 | goto done; |
638 | | |
639 | 0 | *out = r; |
640 | |
|
641 | 0 | done: |
642 | 0 | reftable_block_done(&footer); |
643 | 0 | reftable_block_done(&header); |
644 | 0 | if (err) { |
645 | 0 | reftable_free(r); |
646 | 0 | block_source_close(source); |
647 | 0 | } |
648 | 0 | return err; |
649 | 0 | } |
650 | | |
651 | | void reftable_reader_incref(struct reftable_reader *r) |
652 | 0 | { |
653 | 0 | if (!r->refcount) |
654 | 0 | BUG("cannot increment ref counter of dead reader"); |
655 | 0 | r->refcount++; |
656 | 0 | } |
657 | | |
658 | | void reftable_reader_decref(struct reftable_reader *r) |
659 | 0 | { |
660 | 0 | if (!r) |
661 | 0 | return; |
662 | 0 | if (!r->refcount) |
663 | 0 | BUG("cannot decrement ref counter of dead reader"); |
664 | 0 | if (--r->refcount) |
665 | 0 | return; |
666 | 0 | block_source_close(&r->source); |
667 | 0 | FREE_AND_NULL(r->name); |
668 | 0 | reftable_free(r); |
669 | 0 | } |
670 | | |
671 | | static int reftable_reader_refs_for_indexed(struct reftable_reader *r, |
672 | | struct reftable_iterator *it, |
673 | | uint8_t *oid) |
674 | 0 | { |
675 | 0 | struct reftable_record want = { |
676 | 0 | .type = BLOCK_TYPE_OBJ, |
677 | 0 | .u.obj = { |
678 | 0 | .hash_prefix = oid, |
679 | 0 | .hash_prefix_len = r->object_id_len, |
680 | 0 | }, |
681 | 0 | }; |
682 | 0 | struct reftable_iterator oit = { NULL }; |
683 | 0 | struct reftable_record got = { |
684 | 0 | .type = BLOCK_TYPE_OBJ, |
685 | 0 | .u.obj = { 0 }, |
686 | 0 | }; |
687 | 0 | int err = 0; |
688 | 0 | struct indexed_table_ref_iter *itr = NULL; |
689 | | |
690 | | /* Look through the reverse index. */ |
691 | 0 | reader_init_iter(r, &oit, BLOCK_TYPE_OBJ); |
692 | 0 | err = iterator_seek(&oit, &want); |
693 | 0 | if (err != 0) |
694 | 0 | goto done; |
695 | | |
696 | | /* read out the reftable_obj_record */ |
697 | 0 | err = iterator_next(&oit, &got); |
698 | 0 | if (err < 0) |
699 | 0 | goto done; |
700 | | |
701 | 0 | if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix, |
702 | 0 | r->object_id_len)) { |
703 | | /* didn't find it; return empty iterator */ |
704 | 0 | iterator_set_empty(it); |
705 | 0 | err = 0; |
706 | 0 | goto done; |
707 | 0 | } |
708 | | |
709 | 0 | err = new_indexed_table_ref_iter(&itr, r, oid, hash_size(r->hash_id), |
710 | 0 | got.u.obj.offsets, |
711 | 0 | got.u.obj.offset_len); |
712 | 0 | if (err < 0) |
713 | 0 | goto done; |
714 | 0 | got.u.obj.offsets = NULL; |
715 | 0 | iterator_from_indexed_table_ref_iter(it, itr); |
716 | |
|
717 | 0 | done: |
718 | 0 | reftable_iterator_destroy(&oit); |
719 | 0 | reftable_record_release(&got); |
720 | 0 | return err; |
721 | 0 | } |
722 | | |
723 | | static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, |
724 | | struct reftable_iterator *it, |
725 | | uint8_t *oid) |
726 | 0 | { |
727 | 0 | struct table_iter *ti; |
728 | 0 | struct filtering_ref_iterator *filter = NULL; |
729 | 0 | struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; |
730 | 0 | int oid_len = hash_size(r->hash_id); |
731 | 0 | int err; |
732 | |
|
733 | 0 | REFTABLE_ALLOC_ARRAY(ti, 1); |
734 | 0 | table_iter_init(ti, r); |
735 | 0 | err = table_iter_seek_start(ti, BLOCK_TYPE_REF, 0); |
736 | 0 | if (err < 0) { |
737 | 0 | reftable_free(ti); |
738 | 0 | return err; |
739 | 0 | } |
740 | | |
741 | 0 | filter = reftable_malloc(sizeof(struct filtering_ref_iterator)); |
742 | 0 | *filter = empty; |
743 | |
|
744 | 0 | strbuf_add(&filter->oid, oid, oid_len); |
745 | 0 | iterator_from_table_iter(&filter->it, ti); |
746 | |
|
747 | 0 | iterator_from_filtering_ref_iterator(it, filter); |
748 | 0 | return 0; |
749 | 0 | } |
750 | | |
751 | | int reftable_reader_refs_for(struct reftable_reader *r, |
752 | | struct reftable_iterator *it, uint8_t *oid) |
753 | 0 | { |
754 | 0 | if (r->obj_offsets.is_present) |
755 | 0 | return reftable_reader_refs_for_indexed(r, it, oid); |
756 | 0 | return reftable_reader_refs_for_unindexed(r, it, oid); |
757 | 0 | } |
758 | | |
759 | | uint64_t reftable_reader_max_update_index(struct reftable_reader *r) |
760 | 0 | { |
761 | 0 | return r->max_update_index; |
762 | 0 | } |
763 | | |
764 | | uint64_t reftable_reader_min_update_index(struct reftable_reader *r) |
765 | 0 | { |
766 | 0 | return r->min_update_index; |
767 | 0 | } |
768 | | |
769 | | int reftable_reader_print_blocks(const char *tablename) |
770 | 0 | { |
771 | 0 | struct { |
772 | 0 | const char *name; |
773 | 0 | int type; |
774 | 0 | } sections[] = { |
775 | 0 | { |
776 | 0 | .name = "ref", |
777 | 0 | .type = BLOCK_TYPE_REF, |
778 | 0 | }, |
779 | 0 | { |
780 | 0 | .name = "obj", |
781 | 0 | .type = BLOCK_TYPE_OBJ, |
782 | 0 | }, |
783 | 0 | { |
784 | 0 | .name = "log", |
785 | 0 | .type = BLOCK_TYPE_LOG, |
786 | 0 | }, |
787 | 0 | }; |
788 | 0 | struct reftable_block_source src = { 0 }; |
789 | 0 | struct reftable_reader *r = NULL; |
790 | 0 | struct table_iter ti = { 0 }; |
791 | 0 | size_t i; |
792 | 0 | int err; |
793 | |
|
794 | 0 | err = reftable_block_source_from_file(&src, tablename); |
795 | 0 | if (err < 0) |
796 | 0 | goto done; |
797 | | |
798 | 0 | err = reftable_reader_new(&r, &src, tablename); |
799 | 0 | if (err < 0) |
800 | 0 | goto done; |
801 | | |
802 | 0 | table_iter_init(&ti, r); |
803 | |
|
804 | 0 | printf("header:\n"); |
805 | 0 | printf(" block_size: %d\n", r->block_size); |
806 | |
|
807 | 0 | for (i = 0; i < ARRAY_SIZE(sections); i++) { |
808 | 0 | err = table_iter_seek_start(&ti, sections[i].type, 0); |
809 | 0 | if (err < 0) |
810 | 0 | goto done; |
811 | 0 | if (err > 0) |
812 | 0 | continue; |
813 | | |
814 | 0 | printf("%s:\n", sections[i].name); |
815 | |
|
816 | 0 | while (1) { |
817 | 0 | printf(" - length: %u\n", ti.br.block_len); |
818 | 0 | printf(" restarts: %u\n", ti.br.restart_count); |
819 | |
|
820 | 0 | err = table_iter_next_block(&ti); |
821 | 0 | if (err < 0) |
822 | 0 | goto done; |
823 | 0 | if (err > 0) |
824 | 0 | break; |
825 | 0 | } |
826 | 0 | } |
827 | | |
828 | 0 | done: |
829 | 0 | reftable_reader_decref(r); |
830 | 0 | table_iter_close(&ti); |
831 | 0 | return err; |
832 | 0 | } |