/src/git/reftable/block.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2020 Google LLC |
3 | | |
4 | | Use of this source code is governed by a BSD-style |
5 | | license that can be found in the LICENSE file or at |
6 | | https://developers.google.com/open-source/licenses/bsd |
7 | | */ |
8 | | |
9 | | #include "block.h" |
10 | | |
11 | | #include "blocksource.h" |
12 | | #include "constants.h" |
13 | | #include "record.h" |
14 | | #include "reftable-error.h" |
15 | | #include "system.h" |
16 | | #include <zlib.h> |
17 | | |
18 | | int header_size(int version) |
19 | 0 | { |
20 | 0 | switch (version) { |
21 | 0 | case 1: |
22 | 0 | return 24; |
23 | 0 | case 2: |
24 | 0 | return 28; |
25 | 0 | } |
26 | 0 | abort(); |
27 | 0 | } |
28 | | |
29 | | int footer_size(int version) |
30 | 0 | { |
31 | 0 | switch (version) { |
32 | 0 | case 1: |
33 | 0 | return 68; |
34 | 0 | case 2: |
35 | 0 | return 72; |
36 | 0 | } |
37 | 0 | abort(); |
38 | 0 | } |
39 | | |
40 | | static int block_writer_register_restart(struct block_writer *w, int n, |
41 | | int is_restart, struct strbuf *key) |
42 | 0 | { |
43 | 0 | int rlen = w->restart_len; |
44 | 0 | if (rlen >= MAX_RESTARTS) { |
45 | 0 | is_restart = 0; |
46 | 0 | } |
47 | |
|
48 | 0 | if (is_restart) { |
49 | 0 | rlen++; |
50 | 0 | } |
51 | 0 | if (2 + 3 * rlen + n > w->block_size - w->next) |
52 | 0 | return -1; |
53 | 0 | if (is_restart) { |
54 | 0 | REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap); |
55 | 0 | w->restarts[w->restart_len++] = w->next; |
56 | 0 | } |
57 | |
|
58 | 0 | w->next += n; |
59 | |
|
60 | 0 | strbuf_reset(&w->last_key); |
61 | 0 | strbuf_addbuf(&w->last_key, key); |
62 | 0 | w->entries++; |
63 | 0 | return 0; |
64 | 0 | } |
65 | | |
66 | | void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, |
67 | | uint32_t block_size, uint32_t header_off, int hash_size) |
68 | 0 | { |
69 | 0 | bw->buf = buf; |
70 | 0 | bw->hash_size = hash_size; |
71 | 0 | bw->block_size = block_size; |
72 | 0 | bw->header_off = header_off; |
73 | 0 | bw->buf[header_off] = typ; |
74 | 0 | bw->next = header_off + 4; |
75 | 0 | bw->restart_interval = 16; |
76 | 0 | bw->entries = 0; |
77 | 0 | bw->restart_len = 0; |
78 | 0 | bw->last_key.len = 0; |
79 | 0 | if (!bw->zstream) { |
80 | 0 | REFTABLE_CALLOC_ARRAY(bw->zstream, 1); |
81 | 0 | deflateInit(bw->zstream, 9); |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | | uint8_t block_writer_type(struct block_writer *bw) |
86 | 0 | { |
87 | 0 | return bw->buf[bw->header_off]; |
88 | 0 | } |
89 | | |
90 | | /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on |
91 | | success. Returns REFTABLE_API_ERROR if attempting to write a record with |
92 | | empty key. */ |
93 | | int block_writer_add(struct block_writer *w, struct reftable_record *rec) |
94 | 0 | { |
95 | 0 | struct strbuf empty = STRBUF_INIT; |
96 | 0 | struct strbuf last = |
97 | 0 | w->entries % w->restart_interval == 0 ? empty : w->last_key; |
98 | 0 | struct string_view out = { |
99 | 0 | .buf = w->buf + w->next, |
100 | 0 | .len = w->block_size - w->next, |
101 | 0 | }; |
102 | |
|
103 | 0 | struct string_view start = out; |
104 | |
|
105 | 0 | int is_restart = 0; |
106 | 0 | struct strbuf key = STRBUF_INIT; |
107 | 0 | int n = 0; |
108 | 0 | int err = -1; |
109 | |
|
110 | 0 | reftable_record_key(rec, &key); |
111 | 0 | if (!key.len) { |
112 | 0 | err = REFTABLE_API_ERROR; |
113 | 0 | goto done; |
114 | 0 | } |
115 | | |
116 | 0 | n = reftable_encode_key(&is_restart, out, last, key, |
117 | 0 | reftable_record_val_type(rec)); |
118 | 0 | if (n < 0) |
119 | 0 | goto done; |
120 | 0 | string_view_consume(&out, n); |
121 | |
|
122 | 0 | n = reftable_record_encode(rec, out, w->hash_size); |
123 | 0 | if (n < 0) |
124 | 0 | goto done; |
125 | 0 | string_view_consume(&out, n); |
126 | |
|
127 | 0 | err = block_writer_register_restart(w, start.len - out.len, is_restart, |
128 | 0 | &key); |
129 | 0 | done: |
130 | 0 | strbuf_release(&key); |
131 | 0 | return err; |
132 | 0 | } |
133 | | |
134 | | int block_writer_finish(struct block_writer *w) |
135 | 0 | { |
136 | 0 | int i; |
137 | 0 | for (i = 0; i < w->restart_len; i++) { |
138 | 0 | put_be24(w->buf + w->next, w->restarts[i]); |
139 | 0 | w->next += 3; |
140 | 0 | } |
141 | |
|
142 | 0 | put_be16(w->buf + w->next, w->restart_len); |
143 | 0 | w->next += 2; |
144 | 0 | put_be24(w->buf + 1 + w->header_off, w->next); |
145 | | |
146 | | /* |
147 | | * Log records are stored zlib-compressed. Note that the compression |
148 | | * also spans over the restart points we have just written. |
149 | | */ |
150 | 0 | if (block_writer_type(w) == BLOCK_TYPE_LOG) { |
151 | 0 | int block_header_skip = 4 + w->header_off; |
152 | 0 | uLongf src_len = w->next - block_header_skip, compressed_len; |
153 | 0 | int ret; |
154 | |
|
155 | 0 | ret = deflateReset(w->zstream); |
156 | 0 | if (ret != Z_OK) |
157 | 0 | return REFTABLE_ZLIB_ERROR; |
158 | | |
159 | | /* |
160 | | * Precompute the upper bound of how many bytes the compressed |
161 | | * data may end up with. Combined with `Z_FINISH`, `deflate()` |
162 | | * is guaranteed to return `Z_STREAM_END`. |
163 | | */ |
164 | 0 | compressed_len = deflateBound(w->zstream, src_len); |
165 | 0 | REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap); |
166 | |
|
167 | 0 | w->zstream->next_out = w->compressed; |
168 | 0 | w->zstream->avail_out = compressed_len; |
169 | 0 | w->zstream->next_in = w->buf + block_header_skip; |
170 | 0 | w->zstream->avail_in = src_len; |
171 | | |
172 | | /* |
173 | | * We want to perform all decompression in a single step, which |
174 | | * is why we can pass Z_FINISH here. As we have precomputed the |
175 | | * deflated buffer's size via `deflateBound()` this function is |
176 | | * guaranteed to succeed according to the zlib documentation. |
177 | | */ |
178 | 0 | ret = deflate(w->zstream, Z_FINISH); |
179 | 0 | if (ret != Z_STREAM_END) |
180 | 0 | return REFTABLE_ZLIB_ERROR; |
181 | | |
182 | | /* |
183 | | * Overwrite the uncompressed data we have already written and |
184 | | * adjust the `next` pointer to point right after the |
185 | | * compressed data. |
186 | | */ |
187 | 0 | memcpy(w->buf + block_header_skip, w->compressed, |
188 | 0 | w->zstream->total_out); |
189 | 0 | w->next = w->zstream->total_out + block_header_skip; |
190 | 0 | } |
191 | | |
192 | 0 | return w->next; |
193 | 0 | } |
194 | | |
195 | | int block_reader_init(struct block_reader *br, struct reftable_block *block, |
196 | | uint32_t header_off, uint32_t table_block_size, |
197 | | int hash_size) |
198 | 0 | { |
199 | 0 | uint32_t full_block_size = table_block_size; |
200 | 0 | uint8_t typ = block->data[header_off]; |
201 | 0 | uint32_t sz = get_be24(block->data + header_off + 1); |
202 | 0 | int err = 0; |
203 | 0 | uint16_t restart_count = 0; |
204 | 0 | uint32_t restart_start = 0; |
205 | 0 | uint8_t *restart_bytes = NULL; |
206 | |
|
207 | 0 | reftable_block_done(&br->block); |
208 | |
|
209 | 0 | if (!reftable_is_block_type(typ)) { |
210 | 0 | err = REFTABLE_FORMAT_ERROR; |
211 | 0 | goto done; |
212 | 0 | } |
213 | | |
214 | 0 | if (typ == BLOCK_TYPE_LOG) { |
215 | 0 | uint32_t block_header_skip = 4 + header_off; |
216 | 0 | uLong dst_len = sz - block_header_skip; |
217 | 0 | uLong src_len = block->len - block_header_skip; |
218 | | |
219 | | /* Log blocks specify the *uncompressed* size in their header. */ |
220 | 0 | REFTABLE_ALLOC_GROW(br->uncompressed_data, sz, |
221 | 0 | br->uncompressed_cap); |
222 | | |
223 | | /* Copy over the block header verbatim. It's not compressed. */ |
224 | 0 | memcpy(br->uncompressed_data, block->data, block_header_skip); |
225 | |
|
226 | 0 | if (!br->zstream) { |
227 | 0 | REFTABLE_CALLOC_ARRAY(br->zstream, 1); |
228 | 0 | err = inflateInit(br->zstream); |
229 | 0 | } else { |
230 | 0 | err = inflateReset(br->zstream); |
231 | 0 | } |
232 | 0 | if (err != Z_OK) { |
233 | 0 | err = REFTABLE_ZLIB_ERROR; |
234 | 0 | goto done; |
235 | 0 | } |
236 | | |
237 | 0 | br->zstream->next_in = block->data + block_header_skip; |
238 | 0 | br->zstream->avail_in = src_len; |
239 | 0 | br->zstream->next_out = br->uncompressed_data + block_header_skip; |
240 | 0 | br->zstream->avail_out = dst_len; |
241 | | |
242 | | /* |
243 | | * We know both input as well as output size, and we know that |
244 | | * the sizes should never be bigger than `uInt_MAX` because |
245 | | * blocks can at most be 16MB large. We can thus use `Z_FINISH` |
246 | | * here to instruct zlib to inflate the data in one go, which |
247 | | * is more efficient than using `Z_NO_FLUSH`. |
248 | | */ |
249 | 0 | err = inflate(br->zstream, Z_FINISH); |
250 | 0 | if (err != Z_STREAM_END) { |
251 | 0 | err = REFTABLE_ZLIB_ERROR; |
252 | 0 | goto done; |
253 | 0 | } |
254 | 0 | err = 0; |
255 | |
|
256 | 0 | if (br->zstream->total_out + block_header_skip != sz) { |
257 | 0 | err = REFTABLE_FORMAT_ERROR; |
258 | 0 | goto done; |
259 | 0 | } |
260 | | |
261 | | /* We're done with the input data. */ |
262 | 0 | reftable_block_done(block); |
263 | 0 | block->data = br->uncompressed_data; |
264 | 0 | block->len = sz; |
265 | 0 | full_block_size = src_len + block_header_skip - br->zstream->avail_in; |
266 | 0 | } else if (full_block_size == 0) { |
267 | 0 | full_block_size = sz; |
268 | 0 | } else if (sz < full_block_size && sz < block->len && |
269 | 0 | block->data[sz] != 0) { |
270 | | /* If the block is smaller than the full block size, it is |
271 | | padded (data followed by '\0') or the next block is |
272 | | unaligned. */ |
273 | 0 | full_block_size = sz; |
274 | 0 | } |
275 | | |
276 | 0 | restart_count = get_be16(block->data + sz - 2); |
277 | 0 | restart_start = sz - 2 - 3 * restart_count; |
278 | 0 | restart_bytes = block->data + restart_start; |
279 | | |
280 | | /* transfer ownership. */ |
281 | 0 | br->block = *block; |
282 | 0 | block->data = NULL; |
283 | 0 | block->len = 0; |
284 | |
|
285 | 0 | br->hash_size = hash_size; |
286 | 0 | br->block_len = restart_start; |
287 | 0 | br->full_block_size = full_block_size; |
288 | 0 | br->header_off = header_off; |
289 | 0 | br->restart_count = restart_count; |
290 | 0 | br->restart_bytes = restart_bytes; |
291 | |
|
292 | 0 | done: |
293 | 0 | return err; |
294 | 0 | } |
295 | | |
296 | | void block_reader_release(struct block_reader *br) |
297 | 0 | { |
298 | 0 | inflateEnd(br->zstream); |
299 | 0 | reftable_free(br->zstream); |
300 | 0 | reftable_free(br->uncompressed_data); |
301 | 0 | reftable_block_done(&br->block); |
302 | 0 | } |
303 | | |
304 | | uint8_t block_reader_type(const struct block_reader *r) |
305 | 0 | { |
306 | 0 | return r->block.data[r->header_off]; |
307 | 0 | } |
308 | | |
309 | | int block_reader_first_key(const struct block_reader *br, struct strbuf *key) |
310 | 0 | { |
311 | 0 | int off = br->header_off + 4, n; |
312 | 0 | struct string_view in = { |
313 | 0 | .buf = br->block.data + off, |
314 | 0 | .len = br->block_len - off, |
315 | 0 | }; |
316 | 0 | uint8_t extra = 0; |
317 | |
|
318 | 0 | strbuf_reset(key); |
319 | |
|
320 | 0 | n = reftable_decode_key(key, &extra, in); |
321 | 0 | if (n < 0) |
322 | 0 | return n; |
323 | 0 | if (!key->len) |
324 | 0 | return REFTABLE_FORMAT_ERROR; |
325 | | |
326 | 0 | return 0; |
327 | 0 | } |
328 | | |
329 | | static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx) |
330 | 0 | { |
331 | 0 | return get_be24(br->restart_bytes + 3 * idx); |
332 | 0 | } |
333 | | |
334 | | void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) |
335 | 0 | { |
336 | 0 | it->block = br->block.data; |
337 | 0 | it->block_len = br->block_len; |
338 | 0 | it->hash_size = br->hash_size; |
339 | 0 | strbuf_reset(&it->last_key); |
340 | 0 | it->next_off = br->header_off + 4; |
341 | 0 | } |
342 | | |
343 | | struct restart_needle_less_args { |
344 | | int error; |
345 | | struct strbuf needle; |
346 | | const struct block_reader *reader; |
347 | | }; |
348 | | |
349 | | static int restart_needle_less(size_t idx, void *_args) |
350 | 0 | { |
351 | 0 | struct restart_needle_less_args *args = _args; |
352 | 0 | uint32_t off = block_reader_restart_offset(args->reader, idx); |
353 | 0 | struct string_view in = { |
354 | 0 | .buf = args->reader->block.data + off, |
355 | 0 | .len = args->reader->block_len - off, |
356 | 0 | }; |
357 | 0 | uint64_t prefix_len, suffix_len; |
358 | 0 | uint8_t extra; |
359 | 0 | int n; |
360 | | |
361 | | /* |
362 | | * Records at restart points are stored without prefix compression, so |
363 | | * there is no need to fully decode the record key here. This removes |
364 | | * the need for allocating memory. |
365 | | */ |
366 | 0 | n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); |
367 | 0 | if (n < 0 || prefix_len) { |
368 | 0 | args->error = 1; |
369 | 0 | return -1; |
370 | 0 | } |
371 | | |
372 | 0 | string_view_consume(&in, n); |
373 | 0 | if (suffix_len > in.len) { |
374 | 0 | args->error = 1; |
375 | 0 | return -1; |
376 | 0 | } |
377 | | |
378 | 0 | n = memcmp(args->needle.buf, in.buf, |
379 | 0 | args->needle.len < suffix_len ? args->needle.len : suffix_len); |
380 | 0 | if (n) |
381 | 0 | return n < 0; |
382 | 0 | return args->needle.len < suffix_len; |
383 | 0 | } |
384 | | |
385 | | int block_iter_next(struct block_iter *it, struct reftable_record *rec) |
386 | 0 | { |
387 | 0 | struct string_view in = { |
388 | 0 | .buf = (unsigned char *) it->block + it->next_off, |
389 | 0 | .len = it->block_len - it->next_off, |
390 | 0 | }; |
391 | 0 | struct string_view start = in; |
392 | 0 | uint8_t extra = 0; |
393 | 0 | int n = 0; |
394 | |
|
395 | 0 | if (it->next_off >= it->block_len) |
396 | 0 | return 1; |
397 | | |
398 | 0 | n = reftable_decode_key(&it->last_key, &extra, in); |
399 | 0 | if (n < 0) |
400 | 0 | return -1; |
401 | 0 | if (!it->last_key.len) |
402 | 0 | return REFTABLE_FORMAT_ERROR; |
403 | | |
404 | 0 | string_view_consume(&in, n); |
405 | 0 | n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size, |
406 | 0 | &it->scratch); |
407 | 0 | if (n < 0) |
408 | 0 | return -1; |
409 | 0 | string_view_consume(&in, n); |
410 | |
|
411 | 0 | it->next_off += start.len - in.len; |
412 | 0 | return 0; |
413 | 0 | } |
414 | | |
415 | | void block_iter_reset(struct block_iter *it) |
416 | 0 | { |
417 | 0 | strbuf_reset(&it->last_key); |
418 | 0 | it->next_off = 0; |
419 | 0 | it->block = NULL; |
420 | 0 | it->block_len = 0; |
421 | 0 | it->hash_size = 0; |
422 | 0 | } |
423 | | |
424 | | void block_iter_close(struct block_iter *it) |
425 | 0 | { |
426 | 0 | strbuf_release(&it->last_key); |
427 | 0 | strbuf_release(&it->scratch); |
428 | 0 | } |
429 | | |
430 | | int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, |
431 | | struct strbuf *want) |
432 | 0 | { |
433 | 0 | struct restart_needle_less_args args = { |
434 | 0 | .needle = *want, |
435 | 0 | .reader = br, |
436 | 0 | }; |
437 | 0 | struct reftable_record rec; |
438 | 0 | int err = 0; |
439 | 0 | size_t i; |
440 | | |
441 | | /* |
442 | | * Perform a binary search over the block's restart points, which |
443 | | * avoids doing a linear scan over the whole block. Like this, we |
444 | | * identify the section of the block that should contain our key. |
445 | | * |
446 | | * Note that we explicitly search for the first restart point _greater_ |
447 | | * than the sought-after record, not _greater or equal_ to it. In case |
448 | | * the sought-after record is located directly at the restart point we |
449 | | * would otherwise start doing the linear search at the preceding |
450 | | * restart point. While that works alright, we would end up scanning |
451 | | * too many record. |
452 | | */ |
453 | 0 | i = binsearch(br->restart_count, &restart_needle_less, &args); |
454 | 0 | if (args.error) { |
455 | 0 | err = REFTABLE_FORMAT_ERROR; |
456 | 0 | goto done; |
457 | 0 | } |
458 | | |
459 | | /* |
460 | | * Now there are multiple cases: |
461 | | * |
462 | | * - `i == 0`: The wanted record is smaller than the record found at |
463 | | * the first restart point. As the first restart point is the first |
464 | | * record in the block, our wanted record cannot be located in this |
465 | | * block at all. We still need to position the iterator so that the |
466 | | * next call to `block_iter_next()` will yield an end-of-iterator |
467 | | * signal. |
468 | | * |
469 | | * - `i == restart_count`: The wanted record was not found at any of |
470 | | * the restart points. As there is no restart point at the end of |
471 | | * the section the record may thus be contained in the last block. |
472 | | * |
473 | | * - `i > 0`: The wanted record must be contained in the section |
474 | | * before the found restart point. We thus do a linear search |
475 | | * starting from the preceding restart point. |
476 | | */ |
477 | 0 | if (i > 0) |
478 | 0 | it->next_off = block_reader_restart_offset(br, i - 1); |
479 | 0 | else |
480 | 0 | it->next_off = br->header_off + 4; |
481 | 0 | it->block = br->block.data; |
482 | 0 | it->block_len = br->block_len; |
483 | 0 | it->hash_size = br->hash_size; |
484 | |
|
485 | 0 | reftable_record_init(&rec, block_reader_type(br)); |
486 | | |
487 | | /* |
488 | | * We're looking for the last entry less than the wanted key so that |
489 | | * the next call to `block_reader_next()` would yield the wanted |
490 | | * record. We thus don't want to position our reader at the sought |
491 | | * after record, but one before. To do so, we have to go one entry too |
492 | | * far and then back up. |
493 | | */ |
494 | 0 | while (1) { |
495 | 0 | size_t prev_off = it->next_off; |
496 | |
|
497 | 0 | err = block_iter_next(it, &rec); |
498 | 0 | if (err < 0) |
499 | 0 | goto done; |
500 | 0 | if (err > 0) { |
501 | 0 | it->next_off = prev_off; |
502 | 0 | err = 0; |
503 | 0 | goto done; |
504 | 0 | } |
505 | | |
506 | | /* |
507 | | * Check whether the current key is greater or equal to the |
508 | | * sought-after key. In case it is greater we know that the |
509 | | * record does not exist in the block and can thus abort early. |
510 | | * In case it is equal to the sought-after key we have found |
511 | | * the desired record. |
512 | | * |
513 | | * Note that we store the next record's key record directly in |
514 | | * `last_key` without restoring the key of the preceding record |
515 | | * in case we need to go one record back. This is safe to do as |
516 | | * `block_iter_next()` would return the ref whose key is equal |
517 | | * to `last_key` now, and naturally all keys share a prefix |
518 | | * with themselves. |
519 | | */ |
520 | 0 | reftable_record_key(&rec, &it->last_key); |
521 | 0 | if (strbuf_cmp(&it->last_key, want) >= 0) { |
522 | 0 | it->next_off = prev_off; |
523 | 0 | goto done; |
524 | 0 | } |
525 | 0 | } |
526 | | |
527 | 0 | done: |
528 | 0 | reftable_record_release(&rec); |
529 | 0 | return err; |
530 | 0 | } |
531 | | |
532 | | void block_writer_release(struct block_writer *bw) |
533 | 0 | { |
534 | 0 | deflateEnd(bw->zstream); |
535 | 0 | FREE_AND_NULL(bw->zstream); |
536 | 0 | FREE_AND_NULL(bw->restarts); |
537 | 0 | FREE_AND_NULL(bw->compressed); |
538 | 0 | strbuf_release(&bw->last_key); |
539 | | /* the block is not owned. */ |
540 | 0 | } |
541 | | |
542 | | void reftable_block_done(struct reftable_block *blockp) |
543 | 0 | { |
544 | 0 | struct reftable_block_source source = blockp->source; |
545 | 0 | if (blockp && source.ops) |
546 | 0 | source.ops->return_block(source.arg, blockp); |
547 | 0 | blockp->data = NULL; |
548 | 0 | blockp->len = 0; |
549 | 0 | blockp->source.ops = NULL; |
550 | 0 | blockp->source.arg = NULL; |
551 | 0 | } |