/src/git/reftable/writer.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2020 Google LLC |
3 | | |
4 | | Use of this source code is governed by a BSD-style |
5 | | license that can be found in the LICENSE file or at |
6 | | https://developers.google.com/open-source/licenses/bsd |
7 | | */ |
8 | | |
9 | | #include "writer.h" |
10 | | |
11 | | #include "system.h" |
12 | | |
13 | | #include "block.h" |
14 | | #include "constants.h" |
15 | | #include "record.h" |
16 | | #include "tree.h" |
17 | | #include "reftable-error.h" |
18 | | |
19 | | /* finishes a block, and writes it to storage */ |
20 | | static int writer_flush_block(struct reftable_writer *w); |
21 | | |
22 | | /* deallocates memory related to the index */ |
23 | | static void writer_clear_index(struct reftable_writer *w); |
24 | | |
25 | | /* finishes writing a 'r' (refs) or 'g' (reflogs) section */ |
26 | | static int writer_finish_public_section(struct reftable_writer *w); |
27 | | |
28 | | static struct reftable_block_stats * |
29 | | writer_reftable_block_stats(struct reftable_writer *w, uint8_t typ) |
30 | 0 | { |
31 | 0 | switch (typ) { |
32 | 0 | case 'r': |
33 | 0 | return &w->stats.ref_stats; |
34 | 0 | case 'o': |
35 | 0 | return &w->stats.obj_stats; |
36 | 0 | case 'i': |
37 | 0 | return &w->stats.idx_stats; |
38 | 0 | case 'g': |
39 | 0 | return &w->stats.log_stats; |
40 | 0 | } |
41 | 0 | abort(); |
42 | 0 | return NULL; |
43 | 0 | } |
44 | | |
45 | | /* write data, queuing the padding for the next write. Returns negative for |
46 | | * error. */ |
47 | | static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len, |
48 | | int padding) |
49 | 0 | { |
50 | 0 | int n = 0; |
51 | 0 | if (w->pending_padding > 0) { |
52 | 0 | uint8_t *zeroed = reftable_calloc(w->pending_padding, sizeof(*zeroed)); |
53 | 0 | int n = w->write(w->write_arg, zeroed, w->pending_padding); |
54 | 0 | if (n < 0) |
55 | 0 | return n; |
56 | | |
57 | 0 | w->pending_padding = 0; |
58 | 0 | reftable_free(zeroed); |
59 | 0 | } |
60 | | |
61 | 0 | w->pending_padding = padding; |
62 | 0 | n = w->write(w->write_arg, data, len); |
63 | 0 | if (n < 0) |
64 | 0 | return n; |
65 | 0 | n += padding; |
66 | 0 | return 0; |
67 | 0 | } |
68 | | |
69 | | static void options_set_defaults(struct reftable_write_options *opts) |
70 | 0 | { |
71 | 0 | if (opts->restart_interval == 0) { |
72 | 0 | opts->restart_interval = 16; |
73 | 0 | } |
74 | |
|
75 | 0 | if (opts->hash_id == 0) { |
76 | 0 | opts->hash_id = GIT_SHA1_FORMAT_ID; |
77 | 0 | } |
78 | 0 | if (opts->block_size == 0) { |
79 | 0 | opts->block_size = DEFAULT_BLOCK_SIZE; |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | | static int writer_version(struct reftable_writer *w) |
84 | 0 | { |
85 | 0 | return (w->opts.hash_id == 0 || w->opts.hash_id == GIT_SHA1_FORMAT_ID) ? |
86 | 0 | 1 : |
87 | 0 | 2; |
88 | 0 | } |
89 | | |
90 | | static int writer_write_header(struct reftable_writer *w, uint8_t *dest) |
91 | 0 | { |
92 | 0 | memcpy(dest, "REFT", 4); |
93 | |
|
94 | 0 | dest[4] = writer_version(w); |
95 | |
|
96 | 0 | put_be24(dest + 5, w->opts.block_size); |
97 | 0 | put_be64(dest + 8, w->min_update_index); |
98 | 0 | put_be64(dest + 16, w->max_update_index); |
99 | 0 | if (writer_version(w) == 2) { |
100 | 0 | put_be32(dest + 24, w->opts.hash_id); |
101 | 0 | } |
102 | 0 | return header_size(writer_version(w)); |
103 | 0 | } |
104 | | |
105 | | static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ) |
106 | 0 | { |
107 | 0 | int block_start = 0; |
108 | 0 | if (w->next == 0) { |
109 | 0 | block_start = header_size(writer_version(w)); |
110 | 0 | } |
111 | |
|
112 | 0 | strbuf_reset(&w->last_key); |
113 | 0 | block_writer_init(&w->block_writer_data, typ, w->block, |
114 | 0 | w->opts.block_size, block_start, |
115 | 0 | hash_size(w->opts.hash_id)); |
116 | 0 | w->block_writer = &w->block_writer_data; |
117 | 0 | w->block_writer->restart_interval = w->opts.restart_interval; |
118 | 0 | } |
119 | | |
120 | | struct reftable_writer * |
121 | | reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), |
122 | | int (*flush_func)(void *), |
123 | | void *writer_arg, const struct reftable_write_options *_opts) |
124 | 0 | { |
125 | 0 | struct reftable_writer *wp = reftable_calloc(1, sizeof(*wp)); |
126 | 0 | struct reftable_write_options opts = {0}; |
127 | |
|
128 | 0 | if (_opts) |
129 | 0 | opts = *_opts; |
130 | 0 | options_set_defaults(&opts); |
131 | 0 | if (opts.block_size >= (1 << 24)) |
132 | 0 | BUG("configured block size exceeds 16MB"); |
133 | | |
134 | 0 | strbuf_init(&wp->block_writer_data.last_key, 0); |
135 | 0 | strbuf_init(&wp->last_key, 0); |
136 | 0 | REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size); |
137 | 0 | wp->write = writer_func; |
138 | 0 | wp->write_arg = writer_arg; |
139 | 0 | wp->opts = opts; |
140 | 0 | wp->flush = flush_func; |
141 | 0 | writer_reinit_block_writer(wp, BLOCK_TYPE_REF); |
142 | |
|
143 | 0 | return wp; |
144 | 0 | } |
145 | | |
146 | | void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, |
147 | | uint64_t max) |
148 | 0 | { |
149 | 0 | w->min_update_index = min; |
150 | 0 | w->max_update_index = max; |
151 | 0 | } |
152 | | |
153 | | static void writer_release(struct reftable_writer *w) |
154 | 0 | { |
155 | 0 | if (w) { |
156 | 0 | reftable_free(w->block); |
157 | 0 | w->block = NULL; |
158 | 0 | block_writer_release(&w->block_writer_data); |
159 | 0 | w->block_writer = NULL; |
160 | 0 | writer_clear_index(w); |
161 | 0 | strbuf_release(&w->last_key); |
162 | 0 | } |
163 | 0 | } |
164 | | |
165 | | void reftable_writer_free(struct reftable_writer *w) |
166 | 0 | { |
167 | 0 | writer_release(w); |
168 | 0 | reftable_free(w); |
169 | 0 | } |
170 | | |
171 | | struct obj_index_tree_node { |
172 | | struct strbuf hash; |
173 | | uint64_t *offsets; |
174 | | size_t offset_len; |
175 | | size_t offset_cap; |
176 | | }; |
177 | | |
178 | | #define OBJ_INDEX_TREE_NODE_INIT \ |
179 | 0 | { \ |
180 | 0 | .hash = STRBUF_INIT \ |
181 | 0 | } |
182 | | |
183 | | static int obj_index_tree_node_compare(const void *a, const void *b) |
184 | 0 | { |
185 | 0 | return strbuf_cmp(&((const struct obj_index_tree_node *)a)->hash, |
186 | 0 | &((const struct obj_index_tree_node *)b)->hash); |
187 | 0 | } |
188 | | |
189 | | static void writer_index_hash(struct reftable_writer *w, struct strbuf *hash) |
190 | 0 | { |
191 | 0 | uint64_t off = w->next; |
192 | |
|
193 | 0 | struct obj_index_tree_node want = { .hash = *hash }; |
194 | |
|
195 | 0 | struct tree_node *node = tree_search(&want, &w->obj_index_tree, |
196 | 0 | &obj_index_tree_node_compare, 0); |
197 | 0 | struct obj_index_tree_node *key = NULL; |
198 | 0 | if (!node) { |
199 | 0 | struct obj_index_tree_node empty = OBJ_INDEX_TREE_NODE_INIT; |
200 | 0 | key = reftable_malloc(sizeof(struct obj_index_tree_node)); |
201 | 0 | *key = empty; |
202 | |
|
203 | 0 | strbuf_reset(&key->hash); |
204 | 0 | strbuf_addbuf(&key->hash, hash); |
205 | 0 | tree_search((void *)key, &w->obj_index_tree, |
206 | 0 | &obj_index_tree_node_compare, 1); |
207 | 0 | } else { |
208 | 0 | key = node->key; |
209 | 0 | } |
210 | |
|
211 | 0 | if (key->offset_len > 0 && key->offsets[key->offset_len - 1] == off) { |
212 | 0 | return; |
213 | 0 | } |
214 | | |
215 | 0 | REFTABLE_ALLOC_GROW(key->offsets, key->offset_len + 1, key->offset_cap); |
216 | 0 | key->offsets[key->offset_len++] = off; |
217 | 0 | } |
218 | | |
219 | | static int writer_add_record(struct reftable_writer *w, |
220 | | struct reftable_record *rec) |
221 | 0 | { |
222 | 0 | struct strbuf key = STRBUF_INIT; |
223 | 0 | int err; |
224 | |
|
225 | 0 | reftable_record_key(rec, &key); |
226 | 0 | if (strbuf_cmp(&w->last_key, &key) >= 0) { |
227 | 0 | err = REFTABLE_API_ERROR; |
228 | 0 | goto done; |
229 | 0 | } |
230 | | |
231 | 0 | strbuf_reset(&w->last_key); |
232 | 0 | strbuf_addbuf(&w->last_key, &key); |
233 | 0 | if (!w->block_writer) |
234 | 0 | writer_reinit_block_writer(w, reftable_record_type(rec)); |
235 | |
|
236 | 0 | if (block_writer_type(w->block_writer) != reftable_record_type(rec)) |
237 | 0 | BUG("record of type %d added to writer of type %d", |
238 | 0 | reftable_record_type(rec), block_writer_type(w->block_writer)); |
239 | | |
240 | | /* |
241 | | * Try to add the record to the writer. If this succeeds then we're |
242 | | * done. Otherwise the block writer may have hit the block size limit |
243 | | * and needs to be flushed. |
244 | | */ |
245 | 0 | if (!block_writer_add(w->block_writer, rec)) { |
246 | 0 | err = 0; |
247 | 0 | goto done; |
248 | 0 | } |
249 | | |
250 | | /* |
251 | | * The current block is full, so we need to flush and reinitialize the |
252 | | * writer to start writing the next block. |
253 | | */ |
254 | 0 | err = writer_flush_block(w); |
255 | 0 | if (err < 0) |
256 | 0 | goto done; |
257 | 0 | writer_reinit_block_writer(w, reftable_record_type(rec)); |
258 | | |
259 | | /* |
260 | | * Try to add the record to the writer again. If this still fails then |
261 | | * the record does not fit into the block size. |
262 | | * |
263 | | * TODO: it would be great to have `block_writer_add()` return proper |
264 | | * error codes so that we don't have to second-guess the failure |
265 | | * mode here. |
266 | | */ |
267 | 0 | err = block_writer_add(w->block_writer, rec); |
268 | 0 | if (err) { |
269 | 0 | err = REFTABLE_ENTRY_TOO_BIG_ERROR; |
270 | 0 | goto done; |
271 | 0 | } |
272 | | |
273 | 0 | done: |
274 | 0 | strbuf_release(&key); |
275 | 0 | return err; |
276 | 0 | } |
277 | | |
278 | | int reftable_writer_add_ref(struct reftable_writer *w, |
279 | | struct reftable_ref_record *ref) |
280 | 0 | { |
281 | 0 | struct reftable_record rec = { |
282 | 0 | .type = BLOCK_TYPE_REF, |
283 | 0 | .u = { |
284 | 0 | .ref = *ref |
285 | 0 | }, |
286 | 0 | }; |
287 | 0 | int err = 0; |
288 | |
|
289 | 0 | if (!ref->refname) |
290 | 0 | return REFTABLE_API_ERROR; |
291 | 0 | if (ref->update_index < w->min_update_index || |
292 | 0 | ref->update_index > w->max_update_index) |
293 | 0 | return REFTABLE_API_ERROR; |
294 | | |
295 | 0 | rec.u.ref.update_index -= w->min_update_index; |
296 | |
|
297 | 0 | err = writer_add_record(w, &rec); |
298 | 0 | if (err < 0) |
299 | 0 | return err; |
300 | | |
301 | 0 | if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) { |
302 | 0 | struct strbuf h = STRBUF_INIT; |
303 | 0 | strbuf_add(&h, (char *)reftable_ref_record_val1(ref), |
304 | 0 | hash_size(w->opts.hash_id)); |
305 | 0 | writer_index_hash(w, &h); |
306 | 0 | strbuf_release(&h); |
307 | 0 | } |
308 | |
|
309 | 0 | if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) { |
310 | 0 | struct strbuf h = STRBUF_INIT; |
311 | 0 | strbuf_add(&h, reftable_ref_record_val2(ref), |
312 | 0 | hash_size(w->opts.hash_id)); |
313 | 0 | writer_index_hash(w, &h); |
314 | 0 | strbuf_release(&h); |
315 | 0 | } |
316 | 0 | return 0; |
317 | 0 | } |
318 | | |
319 | | int reftable_writer_add_refs(struct reftable_writer *w, |
320 | | struct reftable_ref_record *refs, int n) |
321 | 0 | { |
322 | 0 | int err = 0; |
323 | 0 | int i = 0; |
324 | 0 | QSORT(refs, n, reftable_ref_record_compare_name); |
325 | 0 | for (i = 0; err == 0 && i < n; i++) { |
326 | 0 | err = reftable_writer_add_ref(w, &refs[i]); |
327 | 0 | } |
328 | 0 | return err; |
329 | 0 | } |
330 | | |
331 | | static int reftable_writer_add_log_verbatim(struct reftable_writer *w, |
332 | | struct reftable_log_record *log) |
333 | 0 | { |
334 | 0 | struct reftable_record rec = { |
335 | 0 | .type = BLOCK_TYPE_LOG, |
336 | 0 | .u = { |
337 | 0 | .log = *log, |
338 | 0 | }, |
339 | 0 | }; |
340 | 0 | if (w->block_writer && |
341 | 0 | block_writer_type(w->block_writer) == BLOCK_TYPE_REF) { |
342 | 0 | int err = writer_finish_public_section(w); |
343 | 0 | if (err < 0) |
344 | 0 | return err; |
345 | 0 | } |
346 | | |
347 | 0 | w->next -= w->pending_padding; |
348 | 0 | w->pending_padding = 0; |
349 | 0 | return writer_add_record(w, &rec); |
350 | 0 | } |
351 | | |
352 | | int reftable_writer_add_log(struct reftable_writer *w, |
353 | | struct reftable_log_record *log) |
354 | 0 | { |
355 | 0 | char *input_log_message = NULL; |
356 | 0 | struct strbuf cleaned_message = STRBUF_INIT; |
357 | 0 | int err = 0; |
358 | |
|
359 | 0 | if (log->value_type == REFTABLE_LOG_DELETION) |
360 | 0 | return reftable_writer_add_log_verbatim(w, log); |
361 | | |
362 | 0 | if (!log->refname) |
363 | 0 | return REFTABLE_API_ERROR; |
364 | | |
365 | 0 | input_log_message = log->value.update.message; |
366 | 0 | if (!w->opts.exact_log_message && log->value.update.message) { |
367 | 0 | strbuf_addstr(&cleaned_message, log->value.update.message); |
368 | 0 | while (cleaned_message.len && |
369 | 0 | cleaned_message.buf[cleaned_message.len - 1] == '\n') |
370 | 0 | strbuf_setlen(&cleaned_message, |
371 | 0 | cleaned_message.len - 1); |
372 | 0 | if (strchr(cleaned_message.buf, '\n')) { |
373 | | /* multiple lines not allowed. */ |
374 | 0 | err = REFTABLE_API_ERROR; |
375 | 0 | goto done; |
376 | 0 | } |
377 | 0 | strbuf_addstr(&cleaned_message, "\n"); |
378 | 0 | log->value.update.message = cleaned_message.buf; |
379 | 0 | } |
380 | | |
381 | 0 | err = reftable_writer_add_log_verbatim(w, log); |
382 | 0 | log->value.update.message = input_log_message; |
383 | 0 | done: |
384 | 0 | strbuf_release(&cleaned_message); |
385 | 0 | return err; |
386 | 0 | } |
387 | | |
388 | | int reftable_writer_add_logs(struct reftable_writer *w, |
389 | | struct reftable_log_record *logs, int n) |
390 | 0 | { |
391 | 0 | int err = 0; |
392 | 0 | int i = 0; |
393 | 0 | QSORT(logs, n, reftable_log_record_compare_key); |
394 | |
|
395 | 0 | for (i = 0; err == 0 && i < n; i++) { |
396 | 0 | err = reftable_writer_add_log(w, &logs[i]); |
397 | 0 | } |
398 | 0 | return err; |
399 | 0 | } |
400 | | |
401 | | static int writer_finish_section(struct reftable_writer *w) |
402 | 0 | { |
403 | 0 | struct reftable_block_stats *bstats = NULL; |
404 | 0 | uint8_t typ = block_writer_type(w->block_writer); |
405 | 0 | uint64_t index_start = 0; |
406 | 0 | int max_level = 0; |
407 | 0 | size_t threshold = w->opts.unpadded ? 1 : 3; |
408 | 0 | int before_blocks = w->stats.idx_stats.blocks; |
409 | 0 | int err; |
410 | |
|
411 | 0 | err = writer_flush_block(w); |
412 | 0 | if (err < 0) |
413 | 0 | return err; |
414 | | |
415 | | /* |
416 | | * When the section we are about to index has a lot of blocks then the |
417 | | * index itself may span across multiple blocks, as well. This would |
418 | | * require a linear scan over index blocks only to find the desired |
419 | | * indexed block, which is inefficient. Instead, we write a multi-level |
420 | | * index where index records of level N+1 will refer to index blocks of |
421 | | * level N. This isn't constant time, either, but at least logarithmic. |
422 | | * |
423 | | * This loop handles writing this multi-level index. Note that we write |
424 | | * the lowest-level index pointing to the indexed blocks first. We then |
425 | | * continue writing additional index levels until the current level has |
426 | | * less blocks than the threshold so that the highest level will be at |
427 | | * the end of the index section. |
428 | | * |
429 | | * Readers are thus required to start reading the index section from |
430 | | * its end, which is why we set `index_start` to the beginning of the |
431 | | * last index section. |
432 | | */ |
433 | 0 | while (w->index_len > threshold) { |
434 | 0 | struct reftable_index_record *idx = NULL; |
435 | 0 | size_t i, idx_len; |
436 | |
|
437 | 0 | max_level++; |
438 | 0 | index_start = w->next; |
439 | 0 | writer_reinit_block_writer(w, BLOCK_TYPE_INDEX); |
440 | |
|
441 | 0 | idx = w->index; |
442 | 0 | idx_len = w->index_len; |
443 | |
|
444 | 0 | w->index = NULL; |
445 | 0 | w->index_len = 0; |
446 | 0 | w->index_cap = 0; |
447 | 0 | for (i = 0; i < idx_len; i++) { |
448 | 0 | struct reftable_record rec = { |
449 | 0 | .type = BLOCK_TYPE_INDEX, |
450 | 0 | .u = { |
451 | 0 | .idx = idx[i], |
452 | 0 | }, |
453 | 0 | }; |
454 | |
|
455 | 0 | err = writer_add_record(w, &rec); |
456 | 0 | if (err < 0) |
457 | 0 | return err; |
458 | 0 | } |
459 | | |
460 | 0 | err = writer_flush_block(w); |
461 | 0 | if (err < 0) |
462 | 0 | return err; |
463 | | |
464 | 0 | for (i = 0; i < idx_len; i++) |
465 | 0 | strbuf_release(&idx[i].last_key); |
466 | 0 | reftable_free(idx); |
467 | 0 | } |
468 | | |
469 | | /* |
470 | | * The index may still contain a number of index blocks lower than the |
471 | | * threshold. Clear it so that these entries don't leak into the next |
472 | | * index section. |
473 | | */ |
474 | 0 | writer_clear_index(w); |
475 | |
|
476 | 0 | bstats = writer_reftable_block_stats(w, typ); |
477 | 0 | bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks; |
478 | 0 | bstats->index_offset = index_start; |
479 | 0 | bstats->max_index_level = max_level; |
480 | | |
481 | | /* Reinit lastKey, as the next section can start with any key. */ |
482 | 0 | strbuf_reset(&w->last_key); |
483 | |
|
484 | 0 | return 0; |
485 | 0 | } |
486 | | |
487 | | struct common_prefix_arg { |
488 | | struct strbuf *last; |
489 | | int max; |
490 | | }; |
491 | | |
492 | | static void update_common(void *void_arg, void *key) |
493 | 0 | { |
494 | 0 | struct common_prefix_arg *arg = void_arg; |
495 | 0 | struct obj_index_tree_node *entry = key; |
496 | 0 | if (arg->last) { |
497 | 0 | int n = common_prefix_size(&entry->hash, arg->last); |
498 | 0 | if (n > arg->max) { |
499 | 0 | arg->max = n; |
500 | 0 | } |
501 | 0 | } |
502 | 0 | arg->last = &entry->hash; |
503 | 0 | } |
504 | | |
505 | | struct write_record_arg { |
506 | | struct reftable_writer *w; |
507 | | int err; |
508 | | }; |
509 | | |
510 | | static void write_object_record(void *void_arg, void *key) |
511 | 0 | { |
512 | 0 | struct write_record_arg *arg = void_arg; |
513 | 0 | struct obj_index_tree_node *entry = key; |
514 | 0 | struct reftable_record |
515 | 0 | rec = { .type = BLOCK_TYPE_OBJ, |
516 | 0 | .u.obj = { |
517 | 0 | .hash_prefix = (uint8_t *)entry->hash.buf, |
518 | 0 | .hash_prefix_len = arg->w->stats.object_id_len, |
519 | 0 | .offsets = entry->offsets, |
520 | 0 | .offset_len = entry->offset_len, |
521 | 0 | } }; |
522 | 0 | if (arg->err < 0) |
523 | 0 | goto done; |
524 | | |
525 | 0 | arg->err = block_writer_add(arg->w->block_writer, &rec); |
526 | 0 | if (arg->err == 0) |
527 | 0 | goto done; |
528 | | |
529 | 0 | arg->err = writer_flush_block(arg->w); |
530 | 0 | if (arg->err < 0) |
531 | 0 | goto done; |
532 | | |
533 | 0 | writer_reinit_block_writer(arg->w, BLOCK_TYPE_OBJ); |
534 | 0 | arg->err = block_writer_add(arg->w->block_writer, &rec); |
535 | 0 | if (arg->err == 0) |
536 | 0 | goto done; |
537 | | |
538 | 0 | rec.u.obj.offset_len = 0; |
539 | 0 | arg->err = block_writer_add(arg->w->block_writer, &rec); |
540 | | |
541 | | /* Should be able to write into a fresh block. */ |
542 | 0 | assert(arg->err == 0); |
543 | | |
544 | 0 | done:; |
545 | 0 | } |
546 | | |
547 | | static void object_record_free(void *void_arg UNUSED, void *key) |
548 | 0 | { |
549 | 0 | struct obj_index_tree_node *entry = key; |
550 | |
|
551 | 0 | FREE_AND_NULL(entry->offsets); |
552 | 0 | strbuf_release(&entry->hash); |
553 | 0 | reftable_free(entry); |
554 | 0 | } |
555 | | |
556 | | static int writer_dump_object_index(struct reftable_writer *w) |
557 | 0 | { |
558 | 0 | struct write_record_arg closure = { .w = w }; |
559 | 0 | struct common_prefix_arg common = { |
560 | 0 | .max = 1, /* obj_id_len should be >= 2. */ |
561 | 0 | }; |
562 | 0 | if (w->obj_index_tree) { |
563 | 0 | infix_walk(w->obj_index_tree, &update_common, &common); |
564 | 0 | } |
565 | 0 | w->stats.object_id_len = common.max + 1; |
566 | |
|
567 | 0 | writer_reinit_block_writer(w, BLOCK_TYPE_OBJ); |
568 | |
|
569 | 0 | if (w->obj_index_tree) { |
570 | 0 | infix_walk(w->obj_index_tree, &write_object_record, &closure); |
571 | 0 | } |
572 | |
|
573 | 0 | if (closure.err < 0) |
574 | 0 | return closure.err; |
575 | 0 | return writer_finish_section(w); |
576 | 0 | } |
577 | | |
578 | | static int writer_finish_public_section(struct reftable_writer *w) |
579 | 0 | { |
580 | 0 | uint8_t typ = 0; |
581 | 0 | int err = 0; |
582 | |
|
583 | 0 | if (!w->block_writer) |
584 | 0 | return 0; |
585 | | |
586 | 0 | typ = block_writer_type(w->block_writer); |
587 | 0 | err = writer_finish_section(w); |
588 | 0 | if (err < 0) |
589 | 0 | return err; |
590 | 0 | if (typ == BLOCK_TYPE_REF && !w->opts.skip_index_objects && |
591 | 0 | w->stats.ref_stats.index_blocks > 0) { |
592 | 0 | err = writer_dump_object_index(w); |
593 | 0 | if (err < 0) |
594 | 0 | return err; |
595 | 0 | } |
596 | | |
597 | 0 | if (w->obj_index_tree) { |
598 | 0 | infix_walk(w->obj_index_tree, &object_record_free, NULL); |
599 | 0 | tree_free(w->obj_index_tree); |
600 | 0 | w->obj_index_tree = NULL; |
601 | 0 | } |
602 | |
|
603 | 0 | w->block_writer = NULL; |
604 | 0 | return 0; |
605 | 0 | } |
606 | | |
607 | | int reftable_writer_close(struct reftable_writer *w) |
608 | 0 | { |
609 | 0 | uint8_t footer[72]; |
610 | 0 | uint8_t *p = footer; |
611 | 0 | int err = writer_finish_public_section(w); |
612 | 0 | int empty_table = w->next == 0; |
613 | 0 | if (err != 0) |
614 | 0 | goto done; |
615 | 0 | w->pending_padding = 0; |
616 | 0 | if (empty_table) { |
617 | | /* Empty tables need a header anyway. */ |
618 | 0 | uint8_t header[28]; |
619 | 0 | int n = writer_write_header(w, header); |
620 | 0 | err = padded_write(w, header, n, 0); |
621 | 0 | if (err < 0) |
622 | 0 | goto done; |
623 | 0 | } |
624 | | |
625 | 0 | p += writer_write_header(w, footer); |
626 | 0 | put_be64(p, w->stats.ref_stats.index_offset); |
627 | 0 | p += 8; |
628 | 0 | put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len); |
629 | 0 | p += 8; |
630 | 0 | put_be64(p, w->stats.obj_stats.index_offset); |
631 | 0 | p += 8; |
632 | |
|
633 | 0 | put_be64(p, w->stats.log_stats.offset); |
634 | 0 | p += 8; |
635 | 0 | put_be64(p, w->stats.log_stats.index_offset); |
636 | 0 | p += 8; |
637 | |
|
638 | 0 | put_be32(p, crc32(0, footer, p - footer)); |
639 | 0 | p += 4; |
640 | |
|
641 | 0 | err = w->flush(w->write_arg); |
642 | 0 | if (err < 0) { |
643 | 0 | err = REFTABLE_IO_ERROR; |
644 | 0 | goto done; |
645 | 0 | } |
646 | | |
647 | 0 | err = padded_write(w, footer, footer_size(writer_version(w)), 0); |
648 | 0 | if (err < 0) |
649 | 0 | goto done; |
650 | | |
651 | 0 | if (empty_table) { |
652 | 0 | err = REFTABLE_EMPTY_TABLE_ERROR; |
653 | 0 | goto done; |
654 | 0 | } |
655 | | |
656 | 0 | done: |
657 | 0 | writer_release(w); |
658 | 0 | return err; |
659 | 0 | } |
660 | | |
661 | | static void writer_clear_index(struct reftable_writer *w) |
662 | 0 | { |
663 | 0 | for (size_t i = 0; w->index && i < w->index_len; i++) |
664 | 0 | strbuf_release(&w->index[i].last_key); |
665 | 0 | FREE_AND_NULL(w->index); |
666 | 0 | w->index_len = 0; |
667 | 0 | w->index_cap = 0; |
668 | 0 | } |
669 | | |
670 | | static int writer_flush_nonempty_block(struct reftable_writer *w) |
671 | 0 | { |
672 | 0 | struct reftable_index_record index_record = { |
673 | 0 | .last_key = STRBUF_INIT, |
674 | 0 | }; |
675 | 0 | uint8_t typ = block_writer_type(w->block_writer); |
676 | 0 | struct reftable_block_stats *bstats; |
677 | 0 | int raw_bytes, padding = 0, err; |
678 | 0 | uint64_t block_typ_off; |
679 | | |
680 | | /* |
681 | | * Finish the current block. This will cause the block writer to emit |
682 | | * restart points and potentially compress records in case we are |
683 | | * writing a log block. |
684 | | * |
685 | | * Note that this is still happening in memory. |
686 | | */ |
687 | 0 | raw_bytes = block_writer_finish(w->block_writer); |
688 | 0 | if (raw_bytes < 0) |
689 | 0 | return raw_bytes; |
690 | | |
691 | | /* |
692 | | * By default, all records except for log records are padded to the |
693 | | * block size. |
694 | | */ |
695 | 0 | if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) |
696 | 0 | padding = w->opts.block_size - raw_bytes; |
697 | |
|
698 | 0 | bstats = writer_reftable_block_stats(w, typ); |
699 | 0 | block_typ_off = (bstats->blocks == 0) ? w->next : 0; |
700 | 0 | if (block_typ_off > 0) |
701 | 0 | bstats->offset = block_typ_off; |
702 | 0 | bstats->entries += w->block_writer->entries; |
703 | 0 | bstats->restarts += w->block_writer->restart_len; |
704 | 0 | bstats->blocks++; |
705 | 0 | w->stats.blocks++; |
706 | | |
707 | | /* |
708 | | * If this is the first block we're writing to the table then we need |
709 | | * to also write the reftable header. |
710 | | */ |
711 | 0 | if (!w->next) |
712 | 0 | writer_write_header(w, w->block); |
713 | |
|
714 | 0 | err = padded_write(w, w->block, raw_bytes, padding); |
715 | 0 | if (err < 0) |
716 | 0 | return err; |
717 | | |
718 | | /* |
719 | | * Add an index record for every block that we're writing. If we end up |
720 | | * having more than a threshold of index records we will end up writing |
721 | | * an index section in `writer_finish_section()`. Each index record |
722 | | * contains the last record key of the block it is indexing as well as |
723 | | * the offset of that block. |
724 | | * |
725 | | * Note that this also applies when flushing index blocks, in which |
726 | | * case we will end up with a multi-level index. |
727 | | */ |
728 | 0 | REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap); |
729 | 0 | index_record.offset = w->next; |
730 | 0 | strbuf_reset(&index_record.last_key); |
731 | 0 | strbuf_addbuf(&index_record.last_key, &w->block_writer->last_key); |
732 | 0 | w->index[w->index_len] = index_record; |
733 | 0 | w->index_len++; |
734 | |
|
735 | 0 | w->next += padding + raw_bytes; |
736 | 0 | w->block_writer = NULL; |
737 | |
|
738 | 0 | return 0; |
739 | 0 | } |
740 | | |
741 | | static int writer_flush_block(struct reftable_writer *w) |
742 | 0 | { |
743 | 0 | if (!w->block_writer) |
744 | 0 | return 0; |
745 | 0 | if (w->block_writer->entries == 0) |
746 | 0 | return 0; |
747 | 0 | return writer_flush_nonempty_block(w); |
748 | 0 | } |
749 | | |
750 | | const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w) |
751 | 0 | { |
752 | 0 | return &w->stats; |
753 | 0 | } |