/src/leveldb/table/block_builder.cc
Line | Count | Source |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | // |
5 | | // BlockBuilder generates blocks where keys are prefix-compressed: |
6 | | // |
7 | | // When we store a key, we drop the prefix shared with the previous |
8 | | // string. This helps reduce the space requirement significantly. |
9 | | // Furthermore, once every K keys, we do not apply the prefix |
10 | | // compression and store the entire key. We call this a "restart |
11 | | // point". The tail end of the block stores the offsets of all of the |
12 | | // restart points, and can be used to do a binary search when looking |
13 | | // for a particular key. Values are stored as-is (without compression) |
14 | | // immediately following the corresponding key. |
15 | | // |
16 | | // An entry for a particular key-value pair has the form: |
17 | | // shared_bytes: varint32 |
18 | | // unshared_bytes: varint32 |
19 | | // value_length: varint32 |
20 | | // key_delta: char[unshared_bytes] |
21 | | // value: char[value_length] |
22 | | // shared_bytes == 0 for restart points. |
23 | | // |
24 | | // The trailer of the block has the form: |
25 | | // restarts: uint32[num_restarts] |
26 | | // num_restarts: uint32 |
27 | | // restarts[i] contains the offset within the block of the ith restart point. |
28 | | |
29 | | #include "table/block_builder.h" |
30 | | |
31 | | #include <algorithm> |
32 | | #include <cassert> |
33 | | |
34 | | #include "leveldb/comparator.h" |
35 | | #include "leveldb/options.h" |
36 | | #include "util/coding.h" |
37 | | |
38 | | namespace leveldb { |
39 | | |
40 | | BlockBuilder::BlockBuilder(const Options* options) |
41 | 228k | : options_(options), restarts_(), counter_(0), finished_(false) { |
42 | 228k | assert(options->block_restart_interval >= 1); |
43 | 228k | restarts_.push_back(0); // First restart point is at offset 0 |
44 | 228k | } |
45 | | |
46 | 240k | void BlockBuilder::Reset() { |
47 | 240k | buffer_.clear(); |
48 | 240k | restarts_.clear(); |
49 | 240k | restarts_.push_back(0); // First restart point is at offset 0 |
50 | 240k | counter_ = 0; |
51 | 240k | finished_ = false; |
52 | 240k | last_key_.clear(); |
53 | 240k | } |
54 | | |
55 | 1.79M | size_t BlockBuilder::CurrentSizeEstimate() const { |
56 | 1.79M | return (buffer_.size() + // Raw data buffer |
57 | 1.79M | restarts_.size() * sizeof(uint32_t) + // Restart array |
58 | 1.79M | sizeof(uint32_t)); // Restart array length |
59 | 1.79M | } |
60 | | |
61 | 240k | Slice BlockBuilder::Finish() { |
62 | | // Append restart array |
63 | 593k | for (size_t i = 0; i < restarts_.size(); i++) { |
64 | 352k | PutFixed32(&buffer_, restarts_[i]); |
65 | 352k | } |
66 | 240k | PutFixed32(&buffer_, restarts_.size()); |
67 | 240k | finished_ = true; |
68 | 240k | return Slice(buffer_); |
69 | 240k | } |
70 | | |
71 | 1.88M | void BlockBuilder::Add(const Slice& key, const Slice& value) { |
72 | 1.88M | Slice last_key_piece(last_key_); |
73 | 1.88M | assert(!finished_); |
74 | 1.88M | assert(counter_ <= options_->block_restart_interval); |
75 | 1.88M | assert(buffer_.empty() // No values yet? |
76 | 1.88M | || options_->comparator->Compare(key, last_key_piece) > 0); |
77 | 1.88M | size_t shared = 0; |
78 | 1.88M | if (counter_ < options_->block_restart_interval) { |
79 | | // See how much sharing to do with previous string |
80 | 1.77M | const size_t min_length = std::min(last_key_piece.size(), key.size()); |
81 | 7.00M | while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { |
82 | 5.23M | shared++; |
83 | 5.23M | } |
84 | 1.77M | } else { |
85 | | // Restart compression |
86 | 112k | restarts_.push_back(buffer_.size()); |
87 | 112k | counter_ = 0; |
88 | 112k | } |
89 | 1.88M | const size_t non_shared = key.size() - shared; |
90 | | |
91 | | // Add "<shared><non_shared><value_size>" to buffer_ |
92 | 1.88M | PutVarint32(&buffer_, shared); |
93 | 1.88M | PutVarint32(&buffer_, non_shared); |
94 | 1.88M | PutVarint32(&buffer_, value.size()); |
95 | | |
96 | | // Add string delta to buffer_ followed by value |
97 | 1.88M | buffer_.append(key.data() + shared, non_shared); |
98 | 1.88M | buffer_.append(value.data(), value.size()); |
99 | | |
100 | | // Update state |
101 | 1.88M | last_key_.resize(shared); |
102 | 1.88M | last_key_.append(key.data() + shared, non_shared); |
103 | | assert(Slice(last_key_) == key); |
104 | 1.88M | counter_++; |
105 | 1.88M | } |
106 | | |
107 | | } // namespace leveldb |