/src/rocksdb/db/dbformat.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
7 | | // Use of this source code is governed by a BSD-style license that can be |
8 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
9 | | #include "db/dbformat.h" |
10 | | |
11 | | #include <cinttypes> |
12 | | #include <cstdio> |
13 | | |
14 | | #include "db/lookup_key.h" |
15 | | #include "monitoring/perf_context_imp.h" |
16 | | #include "port/port.h" |
17 | | #include "util/coding.h" |
18 | | #include "util/string_util.h" |
19 | | |
20 | | namespace ROCKSDB_NAMESPACE { |
21 | | |
22 | | // kValueTypeForSeek defines the ValueType that should be passed when |
23 | | // constructing a ParsedInternalKey object for seeking to a particular |
24 | | // sequence number (since we sort sequence numbers in decreasing order |
25 | | // and the value type is embedded as the low 8 bits in the sequence |
26 | | // number in internal keys, we need to use the highest-numbered |
27 | | // ValueType, not the lowest). |
28 | | const ValueType kValueTypeForSeek = kTypeValuePreferredSeqno; |
29 | | const ValueType kValueTypeForSeekForPrev = kTypeDeletion; |
30 | | const std::string kDisableUserTimestamp; |
31 | | |
32 | 0 | EntryType GetEntryType(ValueType value_type) { |
33 | 0 | switch (value_type) { |
34 | 0 | case kTypeValue: |
35 | 0 | return kEntryPut; |
36 | 0 | case kTypeDeletion: |
37 | 0 | return kEntryDelete; |
38 | 0 | case kTypeDeletionWithTimestamp: |
39 | 0 | return kEntryDeleteWithTimestamp; |
40 | 0 | case kTypeSingleDeletion: |
41 | 0 | return kEntrySingleDelete; |
42 | 0 | case kTypeMerge: |
43 | 0 | return kEntryMerge; |
44 | 0 | case kTypeRangeDeletion: |
45 | 0 | return kEntryRangeDeletion; |
46 | 0 | case kTypeBlobIndex: |
47 | 0 | return kEntryBlobIndex; |
48 | 0 | case kTypeWideColumnEntity: |
49 | 0 | return kEntryWideColumnEntity; |
50 | 0 | case kTypeValuePreferredSeqno: |
51 | 0 | return kEntryTimedPut; |
52 | 0 | default: |
53 | 0 | return kEntryOther; |
54 | 0 | } |
55 | 0 | } |
56 | | |
57 | 149M | void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { |
58 | 149M | result->append(key.user_key.data(), key.user_key.size()); |
59 | 149M | PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); |
60 | 149M | } |
61 | | |
62 | | void AppendInternalKeyWithDifferentTimestamp(std::string* result, |
63 | | const ParsedInternalKey& key, |
64 | 0 | const Slice& ts) { |
65 | 0 | assert(key.user_key.size() >= ts.size()); |
66 | 0 | result->append(key.user_key.data(), key.user_key.size() - ts.size()); |
67 | 0 | result->append(ts.data(), ts.size()); |
68 | 0 | PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); |
69 | 0 | } |
70 | | |
71 | | void AppendUserKeyWithDifferentTimestamp(std::string* result, const Slice& key, |
72 | 0 | const Slice& ts) { |
73 | 0 | assert(key.size() >= ts.size()); |
74 | 0 | result->append(key.data(), key.size() - ts.size()); |
75 | 0 | result->append(ts.data(), ts.size()); |
76 | 0 | } |
77 | | |
78 | | void AppendInternalKeyFooter(std::string* result, SequenceNumber s, |
79 | 0 | ValueType t) { |
80 | 0 | PutFixed64(result, PackSequenceAndType(s, t)); |
81 | 0 | } |
82 | | |
83 | | void AppendKeyWithMinTimestamp(std::string* result, const Slice& key, |
84 | 0 | size_t ts_sz) { |
85 | 0 | assert(ts_sz > 0); |
86 | 0 | const std::string kTsMin(ts_sz, static_cast<unsigned char>(0)); |
87 | 0 | result->append(key.data(), key.size()); |
88 | 0 | result->append(kTsMin.data(), ts_sz); |
89 | 0 | } |
90 | | |
91 | | void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key, |
92 | 0 | size_t ts_sz) { |
93 | 0 | assert(ts_sz > 0); |
94 | 0 | const std::string kTsMax(ts_sz, static_cast<unsigned char>(0xff)); |
95 | 0 | result->append(key.data(), key.size()); |
96 | 0 | result->append(kTsMax.data(), ts_sz); |
97 | 0 | } |
98 | | |
99 | | void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key, |
100 | 0 | size_t ts_sz) { |
101 | 0 | assert(ts_sz > 0); |
102 | 0 | result->append(key.data(), key.size() - ts_sz); |
103 | 0 | result->append(ts_sz, static_cast<unsigned char>(0)); |
104 | 0 | } |
105 | | |
106 | | void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key, |
107 | 0 | size_t ts_sz) { |
108 | 0 | assert(ts_sz > 0); |
109 | 0 | result->append(key.data(), key.size() - ts_sz); |
110 | |
|
111 | 0 | static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff"; |
112 | 0 | if (ts_sz < strlen(kTsMax)) { |
113 | 0 | result->append(kTsMax, ts_sz); |
114 | 0 | } else { |
115 | 0 | result->append(std::string(ts_sz, '\xff')); |
116 | 0 | } |
117 | 0 | } |
118 | | |
119 | | void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key, |
120 | 0 | size_t ts_sz) { |
121 | 0 | assert(ts_sz > 0); |
122 | 0 | assert(key.size() >= kNumInternalBytes); |
123 | 0 | size_t user_key_size = key.size() - kNumInternalBytes; |
124 | 0 | result->reserve(key.size() + ts_sz); |
125 | 0 | result->append(key.data(), user_key_size); |
126 | 0 | result->append(ts_sz, static_cast<unsigned char>(0)); |
127 | 0 | result->append(key.data() + user_key_size, kNumInternalBytes); |
128 | 0 | } |
129 | | |
130 | | void PadInternalKeyWithMaxTimestamp(std::string* result, const Slice& key, |
131 | 0 | size_t ts_sz) { |
132 | 0 | assert(ts_sz > 0); |
133 | 0 | assert(key.size() >= kNumInternalBytes); |
134 | 0 | size_t user_key_size = key.size() - kNumInternalBytes; |
135 | 0 | result->reserve(key.size() + ts_sz); |
136 | 0 | result->append(key.data(), user_key_size); |
137 | 0 | result->append(std::string(ts_sz, '\xff')); |
138 | 0 | result->append(key.data() + user_key_size, kNumInternalBytes); |
139 | 0 | } |
140 | | |
141 | | void StripTimestampFromInternalKey(std::string* result, const Slice& key, |
142 | 0 | size_t ts_sz) { |
143 | 0 | assert(key.size() >= ts_sz + kNumInternalBytes); |
144 | 0 | result->reserve(key.size() - ts_sz); |
145 | 0 | result->append(key.data(), key.size() - kNumInternalBytes - ts_sz); |
146 | 0 | result->append(key.data() + key.size() - kNumInternalBytes, |
147 | 0 | kNumInternalBytes); |
148 | 0 | } |
149 | | |
150 | | void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key, |
151 | 0 | size_t ts_sz) { |
152 | 0 | const size_t key_sz = key.size(); |
153 | 0 | assert(key_sz >= ts_sz + kNumInternalBytes); |
154 | 0 | result->reserve(key_sz); |
155 | 0 | result->append(key.data(), key_sz - kNumInternalBytes - ts_sz); |
156 | 0 | result->append(ts_sz, static_cast<unsigned char>(0)); |
157 | 0 | result->append(key.data() + key_sz - kNumInternalBytes, kNumInternalBytes); |
158 | 0 | } |
159 | | |
160 | | std::string ParsedInternalKey::DebugString(bool log_err_key, bool hex, |
161 | 3.27k | const Comparator* ucmp) const { |
162 | 3.27k | std::string result = "'"; |
163 | 3.27k | size_t ts_sz_for_debug = ucmp == nullptr ? 0 : ucmp->timestamp_size(); |
164 | 3.27k | if (log_err_key) { |
165 | 3.27k | if (ts_sz_for_debug == 0) { |
166 | 3.27k | result += user_key.ToString(hex); |
167 | 3.27k | } else { |
168 | 0 | assert(user_key.size() >= ts_sz_for_debug); |
169 | 0 | Slice user_key_without_ts = user_key; |
170 | 0 | user_key_without_ts.remove_suffix(ts_sz_for_debug); |
171 | 0 | result += user_key_without_ts.ToString(hex); |
172 | 0 | Slice ts = Slice(user_key.data() + user_key.size() - ts_sz_for_debug, |
173 | 0 | ts_sz_for_debug); |
174 | 0 | result += "|timestamp:"; |
175 | 0 | result += ucmp->TimestampToString(ts); |
176 | 0 | } |
177 | 3.27k | } else { |
178 | 0 | result += "<redacted>"; |
179 | 0 | } |
180 | | |
181 | 3.27k | char buf[50]; |
182 | 3.27k | snprintf(buf, sizeof(buf), "' seq:%" PRIu64 ", type:%d", sequence, |
183 | 3.27k | static_cast<int>(type)); |
184 | | |
185 | 3.27k | result += buf; |
186 | 3.27k | return result; |
187 | 3.27k | } |
188 | | |
189 | 3.27k | std::string InternalKey::DebugString(bool hex, const Comparator* ucmp) const { |
190 | 3.27k | std::string result; |
191 | 3.27k | ParsedInternalKey parsed; |
192 | 3.27k | if (ParseInternalKey(rep_, &parsed, false /* log_err_key */).ok()) { |
193 | 3.27k | result = parsed.DebugString(true /* log_err_key */, hex, ucmp); // TODO |
194 | 3.27k | } else { |
195 | 0 | result = "(bad)"; |
196 | 0 | result.append(EscapeString(rep_)); |
197 | 0 | } |
198 | 3.27k | return result; |
199 | 3.27k | } |
200 | | |
201 | | int InternalKeyComparator::Compare(const ParsedInternalKey& a, |
202 | 1.21G | const ParsedInternalKey& b) const { |
203 | | // Order by: |
204 | | // increasing user key (according to user-supplied comparator) |
205 | | // decreasing sequence number |
206 | | // decreasing type (though sequence# should be enough to disambiguate) |
207 | 1.21G | int r = user_comparator_.Compare(a.user_key, b.user_key); |
208 | 1.21G | if (r == 0) { |
209 | 971M | if (a.sequence > b.sequence) { |
210 | 485M | r = -1; |
211 | 485M | } else if (a.sequence < b.sequence) { |
212 | 485M | r = +1; |
213 | 485M | } else if (a.type > b.type) { |
214 | 1.30k | r = -1; |
215 | 1.87k | } else if (a.type < b.type) { |
216 | 0 | r = +1; |
217 | 0 | } |
218 | 971M | } |
219 | 1.21G | return r; |
220 | 1.21G | } |
221 | | |
222 | | int InternalKeyComparator::Compare(const Slice& a, |
223 | 177k | const ParsedInternalKey& b) const { |
224 | | // Order by: |
225 | | // increasing user key (according to user-supplied comparator) |
226 | | // decreasing sequence number |
227 | | // decreasing type (though sequence# should be enough to disambiguate) |
228 | 177k | int r = user_comparator_.Compare(ExtractUserKey(a), b.user_key); |
229 | 177k | if (r == 0) { |
230 | 5.41k | const uint64_t anum = |
231 | 5.41k | DecodeFixed64(a.data() + a.size() - kNumInternalBytes); |
232 | 5.41k | const uint64_t bnum = (b.sequence << 8) | b.type; |
233 | 5.41k | if (anum > bnum) { |
234 | 3.21k | r = -1; |
235 | 3.21k | } else if (anum < bnum) { |
236 | 2.19k | r = +1; |
237 | 2.19k | } |
238 | 5.41k | } |
239 | 177k | return r; |
240 | 177k | } |
241 | | |
242 | | int InternalKeyComparator::Compare(const ParsedInternalKey& a, |
243 | 150k | const Slice& b) const { |
244 | 150k | return -Compare(b, a); |
245 | 150k | } |
246 | | |
247 | | LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s, |
248 | 9.03k | const Slice* ts) { |
249 | 9.03k | size_t usize = _user_key.size(); |
250 | 9.03k | size_t ts_sz = (nullptr == ts) ? 0 : ts->size(); |
251 | 9.03k | size_t needed = usize + ts_sz + 13; // A conservative estimate |
252 | 9.03k | char* dst; |
253 | 9.03k | if (needed <= sizeof(space_)) { |
254 | 9.00k | dst = space_; |
255 | 9.00k | } else { |
256 | 24 | dst = new char[needed]; |
257 | 24 | } |
258 | 9.03k | start_ = dst; |
259 | | // NOTE: We don't support users keys of more than 2GB :) |
260 | 9.03k | dst = EncodeVarint32(dst, static_cast<uint32_t>(usize + ts_sz + 8)); |
261 | 9.03k | kstart_ = dst; |
262 | 9.03k | memcpy(dst, _user_key.data(), usize); |
263 | 9.03k | dst += usize; |
264 | 9.03k | if (nullptr != ts) { |
265 | 0 | memcpy(dst, ts->data(), ts_sz); |
266 | 0 | dst += ts_sz; |
267 | 0 | } |
268 | 9.03k | EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); |
269 | 9.03k | dst += 8; |
270 | 9.03k | end_ = dst; |
271 | 9.03k | } |
272 | | |
273 | 44.6k | void IterKey::EnlargeBuffer(size_t key_size) { |
274 | | // If size is smaller than buffer size, continue using current buffer, |
275 | | // or the static allocated one, as default |
276 | 44.6k | assert(key_size > buf_size_); |
277 | | // Need to enlarge the buffer. |
278 | 44.6k | ResetBuffer(); |
279 | 44.6k | buf_ = new char[key_size]; |
280 | 44.6k | buf_size_ = key_size; |
281 | 44.6k | } |
282 | | } // namespace ROCKSDB_NAMESPACE |