/src/leveldb/table/block.cc
Line | Count | Source |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | // |
5 | | // Decodes the blocks generated by block_builder.cc. |
6 | | |
7 | | #include "table/block.h" |
8 | | |
9 | | #include <algorithm> |
10 | | #include <cstdint> |
11 | | #include <vector> |
12 | | |
13 | | #include "leveldb/comparator.h" |
14 | | #include "table/format.h" |
15 | | #include "util/coding.h" |
16 | | #include "util/logging.h" |
17 | | |
18 | | namespace leveldb { |
19 | | |
20 | 4.01M | inline uint32_t Block::NumRestarts() const { |
21 | 4.01M | assert(size_ >= sizeof(uint32_t)); |
22 | 4.01M | return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); |
23 | 4.01M | } |
24 | | |
25 | | Block::Block(const BlockContents& contents) |
26 | 1.24M | : data_(contents.data.data()), |
27 | 1.24M | size_(contents.data.size()), |
28 | 1.24M | owned_(contents.heap_allocated) { |
29 | 1.24M | if (size_ < sizeof(uint32_t)) { |
30 | 0 | size_ = 0; // Error marker |
31 | 1.24M | } else { |
32 | 1.24M | size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t); |
33 | 1.24M | if (NumRestarts() > max_restarts_allowed) { |
34 | | // The size is too small for NumRestarts() |
35 | 0 | size_ = 0; |
36 | 1.24M | } else { |
37 | 1.24M | restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); |
38 | 1.24M | } |
39 | 1.24M | } |
40 | 1.24M | } |
41 | | |
42 | 1.24M | Block::~Block() { |
43 | 1.24M | if (owned_) { |
44 | 0 | delete[] data_; |
45 | 0 | } |
46 | 1.24M | } |
47 | | |
48 | | // Helper routine: decode the next block entry starting at "p", |
49 | | // storing the number of shared key bytes, non_shared key bytes, |
50 | | // and the length of the value in "*shared", "*non_shared", and |
51 | | // "*value_length", respectively. Will not dereference past "limit". |
52 | | // |
53 | | // If any errors are detected, returns nullptr. Otherwise, returns a |
54 | | // pointer to the key delta (just past the three decoded values). |
55 | | static inline const char* DecodeEntry(const char* p, const char* limit, |
56 | | uint32_t* shared, uint32_t* non_shared, |
57 | 7.13M | uint32_t* value_length) { |
58 | 7.13M | if (limit - p < 3) return nullptr; |
59 | 7.13M | *shared = reinterpret_cast<const uint8_t*>(p)[0]; |
60 | 7.13M | *non_shared = reinterpret_cast<const uint8_t*>(p)[1]; |
61 | 7.13M | *value_length = reinterpret_cast<const uint8_t*>(p)[2]; |
62 | 7.13M | if ((*shared | *non_shared | *value_length) < 128) { |
63 | | // Fast path: all three values are encoded in one byte each |
64 | 6.70M | p += 3; |
65 | 6.70M | } else { |
66 | 426k | if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; |
67 | 426k | if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; |
68 | 426k | if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr; |
69 | 426k | } |
70 | | |
71 | 7.13M | if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) { |
72 | 0 | return nullptr; |
73 | 0 | } |
74 | 7.13M | return p; |
75 | 7.13M | } |
76 | | |
77 | | class Block::Iter : public Iterator { |
78 | | private: |
79 | | const Comparator* const comparator_; |
80 | | const char* const data_; // underlying block contents |
81 | | uint32_t const restarts_; // Offset of restart array (list of fixed32) |
82 | | uint32_t const num_restarts_; // Number of uint32_t entries in restart array |
83 | | |
84 | | // current_ is offset in data_ of current entry. >= restarts_ if !Valid |
85 | | uint32_t current_; |
86 | | uint32_t restart_index_; // Index of restart block in which current_ falls |
87 | | std::string key_; |
88 | | Slice value_; |
89 | | Status status_; |
90 | | |
91 | 339k | inline int Compare(const Slice& a, const Slice& b) const { |
92 | 339k | return comparator_->Compare(a, b); |
93 | 339k | } |
94 | | |
95 | | // Return the offset in data_ just past the end of the current entry. |
96 | 8.17M | inline uint32_t NextEntryOffset() const { |
97 | 8.17M | return (value_.data() + value_.size()) - data_; |
98 | 8.17M | } |
99 | | |
100 | 6.90M | uint32_t GetRestartPoint(uint32_t index) { |
101 | 6.90M | assert(index < num_restarts_); |
102 | 6.90M | return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); |
103 | 6.90M | } |
104 | | |
105 | 1.28M | void SeekToRestartPoint(uint32_t index) { |
106 | 1.28M | key_.clear(); |
107 | 1.28M | restart_index_ = index; |
108 | | // current_ will be fixed by ParseNextKey(); |
109 | | |
110 | | // ParseNextKey() starts at the end of value_, so set value_ accordingly |
111 | 1.28M | uint32_t offset = GetRestartPoint(index); |
112 | 1.28M | value_ = Slice(data_ + offset, 0); |
113 | 1.28M | } |
114 | | |
115 | | public: |
116 | | Iter(const Comparator* comparator, const char* data, uint32_t restarts, |
117 | | uint32_t num_restarts) |
118 | 1.53M | : comparator_(comparator), |
119 | 1.53M | data_(data), |
120 | 1.53M | restarts_(restarts), |
121 | 1.53M | num_restarts_(num_restarts), |
122 | 1.53M | current_(restarts_), |
123 | 1.53M | restart_index_(num_restarts_) { |
124 | 1.53M | assert(num_restarts_ > 0); |
125 | 1.53M | } |
126 | | |
127 | 9.60M | bool Valid() const override { return current_ < restarts_; } |
128 | 1.16M | Status status() const override { return status_; } |
129 | 6.92M | Slice key() const override { |
130 | 6.92M | assert(Valid()); |
131 | 6.92M | return key_; |
132 | 6.92M | } |
133 | 6.00M | Slice value() const override { |
134 | 6.00M | assert(Valid()); |
135 | 6.00M | return value_; |
136 | 6.00M | } |
137 | | |
138 | 6.71M | void Next() override { |
139 | 6.71M | assert(Valid()); |
140 | 6.71M | ParseNextKey(); |
141 | 6.71M | } |
142 | | |
143 | 0 | void Prev() override { |
144 | 0 | assert(Valid()); |
145 | | |
146 | | // Scan backwards to a restart point before current_ |
147 | 0 | const uint32_t original = current_; |
148 | 0 | while (GetRestartPoint(restart_index_) >= original) { |
149 | 0 | if (restart_index_ == 0) { |
150 | | // No more entries |
151 | 0 | current_ = restarts_; |
152 | 0 | restart_index_ = num_restarts_; |
153 | 0 | return; |
154 | 0 | } |
155 | 0 | restart_index_--; |
156 | 0 | } |
157 | | |
158 | 0 | SeekToRestartPoint(restart_index_); |
159 | 0 | do { |
160 | | // Loop until end of current entry hits the start of original entry |
161 | 0 | } while (ParseNextKey() && NextEntryOffset() < original); |
162 | 0 | } |
163 | | |
164 | 115k | void Seek(const Slice& target) override { |
165 | | // Binary search in restart array to find the last restart point |
166 | | // with a key < target |
167 | 115k | uint32_t left = 0; |
168 | 115k | uint32_t right = num_restarts_ - 1; |
169 | 115k | int current_key_compare = 0; |
170 | | |
171 | 115k | if (Valid()) { |
172 | | // If we're already scanning, use the current position as a starting |
173 | | // point. This is beneficial if the key we're seeking to is ahead of the |
174 | | // current position. |
175 | 0 | current_key_compare = Compare(key_, target); |
176 | 0 | if (current_key_compare < 0) { |
177 | | // key_ is smaller than target |
178 | 0 | left = restart_index_; |
179 | 0 | } else if (current_key_compare > 0) { |
180 | 0 | right = restart_index_; |
181 | 0 | } else { |
182 | | // We're seeking to the key we're already at. |
183 | 0 | return; |
184 | 0 | } |
185 | 0 | } |
186 | | |
187 | 158k | while (left < right) { |
188 | 42.9k | uint32_t mid = (left + right + 1) / 2; |
189 | 42.9k | uint32_t region_offset = GetRestartPoint(mid); |
190 | 42.9k | uint32_t shared, non_shared, value_length; |
191 | 42.9k | const char* key_ptr = |
192 | 42.9k | DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, |
193 | 42.9k | &non_shared, &value_length); |
194 | 42.9k | if (key_ptr == nullptr || (shared != 0)) { |
195 | 0 | CorruptionError(); |
196 | 0 | return; |
197 | 0 | } |
198 | 42.9k | Slice mid_key(key_ptr, non_shared); |
199 | 42.9k | if (Compare(mid_key, target) < 0) { |
200 | | // Key at "mid" is smaller than "target". Therefore all |
201 | | // blocks before "mid" are uninteresting. |
202 | 13.3k | left = mid; |
203 | 29.5k | } else { |
204 | | // Key at "mid" is >= "target". Therefore all blocks at or |
205 | | // after "mid" are uninteresting. |
206 | 29.5k | right = mid - 1; |
207 | 29.5k | } |
208 | 42.9k | } |
209 | | |
210 | | // We might be able to use our current position within the restart block. |
211 | | // This is true if we determined the key we desire is in the current block |
212 | | // and is after than the current key. |
213 | 115k | assert(current_key_compare == 0 || Valid()); |
214 | 115k | bool skip_seek = left == restart_index_ && current_key_compare < 0; |
215 | 115k | if (!skip_seek) { |
216 | 115k | SeekToRestartPoint(left); |
217 | 115k | } |
218 | | // Linear search (within restart block) for first key >= target |
219 | 296k | while (true) { |
220 | 296k | if (!ParseNextKey()) { |
221 | 465 | return; |
222 | 465 | } |
223 | 296k | if (Compare(key_, target) >= 0) { |
224 | 114k | return; |
225 | 114k | } |
226 | 296k | } |
227 | 115k | } |
228 | | |
229 | 1.17M | void SeekToFirst() override { |
230 | 1.17M | SeekToRestartPoint(0); |
231 | 1.17M | ParseNextKey(); |
232 | 1.17M | } |
233 | | |
234 | 0 | void SeekToLast() override { |
235 | 0 | SeekToRestartPoint(num_restarts_ - 1); |
236 | 0 | while (ParseNextKey() && NextEntryOffset() < restarts_) { |
237 | | // Keep skipping |
238 | 0 | } |
239 | 0 | } |
240 | | |
241 | | private: |
242 | 0 | void CorruptionError() { |
243 | 0 | current_ = restarts_; |
244 | 0 | restart_index_ = num_restarts_; |
245 | 0 | status_ = Status::Corruption("bad entry in block"); |
246 | 0 | key_.clear(); |
247 | 0 | value_.clear(); |
248 | 0 | } |
249 | | |
250 | 8.17M | bool ParseNextKey() { |
251 | 8.17M | current_ = NextEntryOffset(); |
252 | 8.17M | const char* p = data_ + current_; |
253 | 8.17M | const char* limit = data_ + restarts_; // Restarts come right after data |
254 | 8.17M | if (p >= limit) { |
255 | | // No more entries to return. Mark as invalid. |
256 | 1.08M | current_ = restarts_; |
257 | 1.08M | restart_index_ = num_restarts_; |
258 | 1.08M | return false; |
259 | 1.08M | } |
260 | | |
261 | | // Decode next entry |
262 | 7.09M | uint32_t shared, non_shared, value_length; |
263 | 7.09M | p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); |
264 | 7.09M | if (p == nullptr || key_.size() < shared) { |
265 | 0 | CorruptionError(); |
266 | 0 | return false; |
267 | 7.09M | } else { |
268 | 7.09M | key_.resize(shared); |
269 | 7.09M | key_.append(p, non_shared); |
270 | 7.09M | value_ = Slice(p + non_shared, value_length); |
271 | 7.45M | while (restart_index_ + 1 < num_restarts_ && |
272 | 5.58M | GetRestartPoint(restart_index_ + 1) < current_) { |
273 | 361k | ++restart_index_; |
274 | 361k | } |
275 | 7.09M | return true; |
276 | 7.09M | } |
277 | 7.09M | } |
278 | | }; |
279 | | |
280 | 1.53M | Iterator* Block::NewIterator(const Comparator* comparator) { |
281 | 1.53M | if (size_ < sizeof(uint32_t)) { |
282 | 0 | return NewErrorIterator(Status::Corruption("bad block contents")); |
283 | 0 | } |
284 | 1.53M | const uint32_t num_restarts = NumRestarts(); |
285 | 1.53M | if (num_restarts == 0) { |
286 | 0 | return NewEmptyIterator(); |
287 | 1.53M | } else { |
288 | 1.53M | return new Iter(comparator, data_, restart_offset_, num_restarts); |
289 | 1.53M | } |
290 | 1.53M | } |
291 | | |
292 | | } // namespace leveldb |