/src/leveldb/table/block.cc
Line | Count | Source |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | // |
5 | | // Decodes the blocks generated by block_builder.cc. |
6 | | |
7 | | #include "table/block.h" |
8 | | |
9 | | #include <algorithm> |
10 | | #include <cstdint> |
11 | | #include <vector> |
12 | | |
13 | | #include "leveldb/comparator.h" |
14 | | #include "table/format.h" |
15 | | #include "util/coding.h" |
16 | | #include "util/logging.h" |
17 | | |
18 | | namespace leveldb { |
19 | | |
20 | 4.11M | inline uint32_t Block::NumRestarts() const { |
21 | 4.11M | assert(size_ >= sizeof(uint32_t)); |
22 | 4.11M | return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); |
23 | 4.11M | } |
24 | | |
25 | | Block::Block(const BlockContents& contents) |
26 | 1.27M | : data_(contents.data.data()), |
27 | 1.27M | size_(contents.data.size()), |
28 | 1.27M | owned_(contents.heap_allocated) { |
29 | 1.27M | if (size_ < sizeof(uint32_t)) { |
30 | 0 | size_ = 0; // Error marker |
31 | 1.27M | } else { |
32 | 1.27M | size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t); |
33 | 1.27M | if (NumRestarts() > max_restarts_allowed) { |
34 | | // The size is too small for NumRestarts() |
35 | 0 | size_ = 0; |
36 | 1.27M | } else { |
37 | 1.27M | restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); |
38 | 1.27M | } |
39 | 1.27M | } |
40 | 1.27M | } |
41 | | |
42 | 1.27M | Block::~Block() { |
43 | 1.27M | if (owned_) { |
44 | 0 | delete[] data_; |
45 | 0 | } |
46 | 1.27M | } |
47 | | |
48 | | // Helper routine: decode the next block entry starting at "p", |
49 | | // storing the number of shared key bytes, non_shared key bytes, |
50 | | // and the length of the value in "*shared", "*non_shared", and |
51 | | // "*value_length", respectively. Will not dereference past "limit". |
52 | | // |
53 | | // If any errors are detected, returns nullptr. Otherwise, returns a |
54 | | // pointer to the key delta (just past the three decoded values). |
55 | | static inline const char* DecodeEntry(const char* p, const char* limit, |
56 | | uint32_t* shared, uint32_t* non_shared, |
57 | 7.60M | uint32_t* value_length) { |
58 | 7.60M | if (limit - p < 3) return nullptr; |
59 | 7.60M | *shared = reinterpret_cast<const uint8_t*>(p)[0]; |
60 | 7.60M | *non_shared = reinterpret_cast<const uint8_t*>(p)[1]; |
61 | 7.60M | *value_length = reinterpret_cast<const uint8_t*>(p)[2]; |
62 | 7.60M | if ((*shared | *non_shared | *value_length) < 128) { |
63 | | // Fast path: all three values are encoded in one byte each |
64 | 7.14M | p += 3; |
65 | 7.14M | } else { |
66 | 455k | if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; |
67 | 455k | if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; |
68 | 455k | if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr; |
69 | 455k | } |
70 | | |
71 | 7.60M | if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) { |
72 | 0 | return nullptr; |
73 | 0 | } |
74 | 7.60M | return p; |
75 | 7.60M | } |
76 | | |
77 | | class Block::Iter : public Iterator { |
78 | | private: |
79 | | const Comparator* const comparator_; |
80 | | const char* const data_; // underlying block contents |
81 | | uint32_t const restarts_; // Offset of restart array (list of fixed32) |
82 | | uint32_t const num_restarts_; // Number of uint32_t entries in restart array |
83 | | |
84 | | // current_ is offset in data_ of current entry. >= restarts_ if !Valid |
85 | | uint32_t current_; |
86 | | uint32_t restart_index_; // Index of restart block in which current_ falls |
87 | | std::string key_; |
88 | | Slice value_; |
89 | | Status status_; |
90 | | |
91 | 337k | inline int Compare(const Slice& a, const Slice& b) const { |
92 | 337k | return comparator_->Compare(a, b); |
93 | 337k | } |
94 | | |
95 | | // Return the offset in data_ just past the end of the current entry. |
96 | 8.67M | inline uint32_t NextEntryOffset() const { |
97 | 8.67M | return (value_.data() + value_.size()) - data_; |
98 | 8.67M | } |
99 | | |
100 | 7.28M | uint32_t GetRestartPoint(uint32_t index) { |
101 | 7.28M | assert(index < num_restarts_); |
102 | 7.28M | return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); |
103 | 7.28M | } |
104 | | |
105 | 1.31M | void SeekToRestartPoint(uint32_t index) { |
106 | 1.31M | key_.clear(); |
107 | 1.31M | restart_index_ = index; |
108 | | // current_ will be fixed by ParseNextKey(); |
109 | | |
110 | | // ParseNextKey() starts at the end of value_, so set value_ accordingly |
111 | 1.31M | uint32_t offset = GetRestartPoint(index); |
112 | 1.31M | value_ = Slice(data_ + offset, 0); |
113 | 1.31M | } |
114 | | |
115 | | public: |
116 | | Iter(const Comparator* comparator, const char* data, uint32_t restarts, |
117 | | uint32_t num_restarts) |
118 | 1.57M | : comparator_(comparator), |
119 | 1.57M | data_(data), |
120 | 1.57M | restarts_(restarts), |
121 | 1.57M | num_restarts_(num_restarts), |
122 | 1.57M | current_(restarts_), |
123 | 1.57M | restart_index_(num_restarts_) { |
124 | 1.57M | assert(num_restarts_ > 0); |
125 | 1.57M | } |
126 | | |
127 | 10.0M | bool Valid() const override { return current_ < restarts_; } |
128 | 1.19M | Status status() const override { return status_; } |
129 | 7.32M | Slice key() const override { |
130 | 7.32M | assert(Valid()); |
131 | 7.32M | return key_; |
132 | 7.32M | } |
133 | 6.29M | Slice value() const override { |
134 | 6.29M | assert(Valid()); |
135 | 6.29M | return value_; |
136 | 6.29M | } |
137 | | |
138 | 7.18M | void Next() override { |
139 | 7.18M | assert(Valid()); |
140 | 7.18M | ParseNextKey(); |
141 | 7.18M | } |
142 | | |
143 | 0 | void Prev() override { |
144 | 0 | assert(Valid()); |
145 | | |
146 | | // Scan backwards to a restart point before current_ |
147 | 0 | const uint32_t original = current_; |
148 | 0 | while (GetRestartPoint(restart_index_) >= original) { |
149 | 0 | if (restart_index_ == 0) { |
150 | | // No more entries |
151 | 0 | current_ = restarts_; |
152 | 0 | restart_index_ = num_restarts_; |
153 | 0 | return; |
154 | 0 | } |
155 | 0 | restart_index_--; |
156 | 0 | } |
157 | | |
158 | 0 | SeekToRestartPoint(restart_index_); |
159 | 0 | do { |
160 | | // Loop until end of current entry hits the start of original entry |
161 | 0 | } while (ParseNextKey() && NextEntryOffset() < original); |
162 | 0 | } |
163 | | |
164 | 112k | void Seek(const Slice& target) override { |
165 | | // Binary search in restart array to find the last restart point |
166 | | // with a key < target |
167 | 112k | uint32_t left = 0; |
168 | 112k | uint32_t right = num_restarts_ - 1; |
169 | 112k | int current_key_compare = 0; |
170 | | |
171 | 112k | if (Valid()) { |
172 | | // If we're already scanning, use the current position as a starting |
173 | | // point. This is beneficial if the key we're seeking to is ahead of the |
174 | | // current position. |
175 | 0 | current_key_compare = Compare(key_, target); |
176 | 0 | if (current_key_compare < 0) { |
177 | | // key_ is smaller than target |
178 | 0 | left = restart_index_; |
179 | 0 | } else if (current_key_compare > 0) { |
180 | 0 | right = restart_index_; |
181 | 0 | } else { |
182 | | // We're seeking to the key we're already at. |
183 | 0 | return; |
184 | 0 | } |
185 | 0 | } |
186 | | |
187 | 155k | while (left < right) { |
188 | 42.8k | uint32_t mid = (left + right + 1) / 2; |
189 | 42.8k | uint32_t region_offset = GetRestartPoint(mid); |
190 | 42.8k | uint32_t shared, non_shared, value_length; |
191 | 42.8k | const char* key_ptr = |
192 | 42.8k | DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, |
193 | 42.8k | &non_shared, &value_length); |
194 | 42.8k | if (key_ptr == nullptr || (shared != 0)) { |
195 | 0 | CorruptionError(); |
196 | 0 | return; |
197 | 0 | } |
198 | 42.8k | Slice mid_key(key_ptr, non_shared); |
199 | 42.8k | if (Compare(mid_key, target) < 0) { |
200 | | // Key at "mid" is smaller than "target". Therefore all |
201 | | // blocks before "mid" are uninteresting. |
202 | 13.9k | left = mid; |
203 | 28.8k | } else { |
204 | | // Key at "mid" is >= "target". Therefore all blocks at or |
205 | | // after "mid" are uninteresting. |
206 | 28.8k | right = mid - 1; |
207 | 28.8k | } |
208 | 42.8k | } |
209 | | |
210 | | // We might be able to use our current position within the restart block. |
211 | | // This is true if we determined the key we desire is in the current block |
212 | | // and is after than the current key. |
213 | 112k | assert(current_key_compare == 0 || Valid()); |
214 | 112k | bool skip_seek = left == restart_index_ && current_key_compare < 0; |
215 | 112k | if (!skip_seek) { |
216 | 112k | SeekToRestartPoint(left); |
217 | 112k | } |
218 | | // Linear search (within restart block) for first key >= target |
219 | 295k | while (true) { |
220 | 295k | if (!ParseNextKey()) { |
221 | 473 | return; |
222 | 473 | } |
223 | 295k | if (Compare(key_, target) >= 0) { |
224 | 111k | return; |
225 | 111k | } |
226 | 295k | } |
227 | 112k | } |
228 | | |
229 | 1.19M | void SeekToFirst() override { |
230 | 1.19M | SeekToRestartPoint(0); |
231 | 1.19M | ParseNextKey(); |
232 | 1.19M | } |
233 | | |
234 | 0 | void SeekToLast() override { |
235 | 0 | SeekToRestartPoint(num_restarts_ - 1); |
236 | 0 | while (ParseNextKey() && NextEntryOffset() < restarts_) { |
237 | | // Keep skipping |
238 | 0 | } |
239 | 0 | } |
240 | | |
241 | | private: |
242 | 0 | void CorruptionError() { |
243 | 0 | current_ = restarts_; |
244 | 0 | restart_index_ = num_restarts_; |
245 | 0 | status_ = Status::Corruption("bad entry in block"); |
246 | 0 | key_.clear(); |
247 | 0 | value_.clear(); |
248 | 0 | } |
249 | | |
250 | 8.67M | bool ParseNextKey() { |
251 | 8.67M | current_ = NextEntryOffset(); |
252 | 8.67M | const char* p = data_ + current_; |
253 | 8.67M | const char* limit = data_ + restarts_; // Restarts come right after data |
254 | 8.67M | if (p >= limit) { |
255 | | // No more entries to return. Mark as invalid. |
256 | 1.11M | current_ = restarts_; |
257 | 1.11M | restart_index_ = num_restarts_; |
258 | 1.11M | return false; |
259 | 1.11M | } |
260 | | |
261 | | // Decode next entry |
262 | 7.55M | uint32_t shared, non_shared, value_length; |
263 | 7.55M | p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); |
264 | 7.56M | if (p == nullptr || key_.size() < shared) { |
265 | 0 | CorruptionError(); |
266 | 0 | return false; |
267 | 7.55M | } else { |
268 | 7.55M | key_.resize(shared); |
269 | 7.55M | key_.append(p, non_shared); |
270 | 7.55M | value_ = Slice(p + non_shared, value_length); |
271 | 7.94M | while (restart_index_ + 1 < num_restarts_ && |
272 | 5.94M | GetRestartPoint(restart_index_ + 1) < current_) { |
273 | 387k | ++restart_index_; |
274 | 387k | } |
275 | 7.55M | return true; |
276 | 7.55M | } |
277 | 7.55M | } |
278 | | }; |
279 | | |
280 | 1.57M | Iterator* Block::NewIterator(const Comparator* comparator) { |
281 | 1.57M | if (size_ < sizeof(uint32_t)) { |
282 | 0 | return NewErrorIterator(Status::Corruption("bad block contents")); |
283 | 0 | } |
284 | 1.57M | const uint32_t num_restarts = NumRestarts(); |
285 | 1.57M | if (num_restarts == 0) { |
286 | 0 | return NewEmptyIterator(); |
287 | 1.57M | } else { |
288 | 1.57M | return new Iter(comparator, data_, restart_offset_, num_restarts); |
289 | 1.57M | } |
290 | 1.57M | } |
291 | | |
292 | | } // namespace leveldb |