/src/rocksdb/db/dbformat.cc
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
7 | | // Use of this source code is governed by a BSD-style license that can be |
8 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
9 | | #include "db/dbformat.h" |
10 | | |
11 | | #include <cinttypes> |
12 | | #include <cstdio> |
13 | | |
14 | | #include "db/lookup_key.h" |
15 | | #include "monitoring/perf_context_imp.h" |
16 | | #include "port/port.h" |
17 | | #include "util/coding.h" |
18 | | #include "util/string_util.h" |
19 | | |
20 | | namespace ROCKSDB_NAMESPACE { |
21 | | |
22 | | // kValueTypeForSeek defines the ValueType that should be passed when |
23 | | // constructing a ParsedInternalKey object for seeking to a particular |
24 | | // sequence number (since we sort sequence numbers in decreasing order |
25 | | // and the value type is embedded as the low 8 bits in the sequence |
26 | | // number in internal keys, we need to use the highest-numbered |
27 | | // ValueType, not the lowest). |
28 | | const ValueType kValueTypeForSeek = kTypeValuePreferredSeqno; |
29 | | const ValueType kValueTypeForSeekForPrev = kTypeDeletion; |
30 | | const std::string kDisableUserTimestamp; |
31 | | |
32 | 0 | EntryType GetEntryType(ValueType value_type) { |
33 | 0 | switch (value_type) { |
34 | 0 | case kTypeValue: |
35 | 0 | return kEntryPut; |
36 | 0 | case kTypeDeletion: |
37 | 0 | return kEntryDelete; |
38 | 0 | case kTypeDeletionWithTimestamp: |
39 | 0 | return kEntryDeleteWithTimestamp; |
40 | 0 | case kTypeSingleDeletion: |
41 | 0 | return kEntrySingleDelete; |
42 | 0 | case kTypeMerge: |
43 | 0 | return kEntryMerge; |
44 | 0 | case kTypeRangeDeletion: |
45 | 0 | return kEntryRangeDeletion; |
46 | 0 | case kTypeBlobIndex: |
47 | 0 | return kEntryBlobIndex; |
48 | 0 | case kTypeWideColumnEntity: |
49 | 0 | return kEntryWideColumnEntity; |
50 | 0 | case kTypeValuePreferredSeqno: |
51 | 0 | return kEntryTimedPut; |
52 | 0 | default: |
53 | 0 | return kEntryOther; |
54 | 0 | } |
55 | 0 | } |
56 | | |
57 | 133M | void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { |
58 | 133M | result->append(key.user_key.data(), key.user_key.size()); |
59 | 133M | PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); |
60 | 133M | } |
61 | | |
62 | | void AppendInternalKeyWithDifferentTimestamp(std::string* result, |
63 | | const ParsedInternalKey& key, |
64 | 0 | const Slice& ts) { |
65 | 0 | assert(key.user_key.size() >= ts.size()); |
66 | 0 | result->append(key.user_key.data(), key.user_key.size() - ts.size()); |
67 | 0 | result->append(ts.data(), ts.size()); |
68 | 0 | PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); |
69 | 0 | } |
70 | | |
71 | | void AppendUserKeyWithDifferentTimestamp(std::string* result, const Slice& key, |
72 | 0 | const Slice& ts) { |
73 | 0 | assert(key.size() >= ts.size()); |
74 | 0 | result->append(key.data(), key.size() - ts.size()); |
75 | 0 | result->append(ts.data(), ts.size()); |
76 | 0 | } |
77 | | |
78 | | void AppendInternalKeyFooter(std::string* result, SequenceNumber s, |
79 | 0 | ValueType t) { |
80 | 0 | PutFixed64(result, PackSequenceAndType(s, t)); |
81 | 0 | } |
82 | | |
83 | | void AppendKeyWithMinTimestamp(std::string* result, const Slice& key, |
84 | 0 | size_t ts_sz) { |
85 | 0 | assert(ts_sz > 0); |
86 | 0 | const std::string kTsMin(ts_sz, static_cast<unsigned char>(0)); |
87 | 0 | result->append(key.data(), key.size()); |
88 | 0 | result->append(kTsMin.data(), ts_sz); |
89 | 0 | } |
90 | | |
91 | | void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key, |
92 | 0 | size_t ts_sz) { |
93 | 0 | assert(ts_sz > 0); |
94 | 0 | const std::string kTsMax(ts_sz, static_cast<unsigned char>(0xff)); |
95 | 0 | result->append(key.data(), key.size()); |
96 | 0 | result->append(kTsMax.data(), ts_sz); |
97 | 0 | } |
98 | | |
99 | | void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key, |
100 | 0 | size_t ts_sz) { |
101 | 0 | assert(ts_sz > 0); |
102 | 0 | result->append(key.data(), key.size() - ts_sz); |
103 | 0 | result->append(ts_sz, static_cast<unsigned char>(0)); |
104 | 0 | } |
105 | | |
106 | | void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key, |
107 | 0 | size_t ts_sz) { |
108 | 0 | assert(ts_sz > 0); |
109 | 0 | result->append(key.data(), key.size() - ts_sz); |
110 | |
|
111 | 0 | static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff"; |
112 | 0 | if (ts_sz < strlen(kTsMax)) { |
113 | 0 | result->append(kTsMax, ts_sz); |
114 | 0 | } else { |
115 | 0 | result->append(std::string(ts_sz, '\xff')); |
116 | 0 | } |
117 | 0 | } |
118 | | |
119 | | void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key, |
120 | 0 | size_t ts_sz) { |
121 | 0 | assert(ts_sz > 0); |
122 | 0 | assert(key.size() >= kNumInternalBytes); |
123 | 0 | size_t user_key_size = key.size() - kNumInternalBytes; |
124 | 0 | result->reserve(key.size() + ts_sz); |
125 | 0 | result->append(key.data(), user_key_size); |
126 | 0 | result->append(ts_sz, static_cast<unsigned char>(0)); |
127 | 0 | result->append(key.data() + user_key_size, kNumInternalBytes); |
128 | 0 | } |
129 | | |
130 | | void PadInternalKeyWithMaxTimestamp(std::string* result, const Slice& key, |
131 | 0 | size_t ts_sz) { |
132 | 0 | assert(ts_sz > 0); |
133 | 0 | assert(key.size() >= kNumInternalBytes); |
134 | 0 | size_t user_key_size = key.size() - kNumInternalBytes; |
135 | 0 | result->reserve(key.size() + ts_sz); |
136 | 0 | result->append(key.data(), user_key_size); |
137 | 0 | result->append(std::string(ts_sz, '\xff')); |
138 | 0 | result->append(key.data() + user_key_size, kNumInternalBytes); |
139 | 0 | } |
140 | | |
141 | | void StripTimestampFromInternalKey(std::string* result, const Slice& key, |
142 | 0 | size_t ts_sz) { |
143 | 0 | assert(key.size() >= ts_sz + kNumInternalBytes); |
144 | 0 | result->reserve(key.size() - ts_sz); |
145 | 0 | result->append(key.data(), key.size() - kNumInternalBytes - ts_sz); |
146 | 0 | result->append(key.data() + key.size() - kNumInternalBytes, |
147 | 0 | kNumInternalBytes); |
148 | 0 | } |
149 | | |
150 | | void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key, |
151 | 0 | size_t ts_sz) { |
152 | 0 | const size_t key_sz = key.size(); |
153 | 0 | assert(key_sz >= ts_sz + kNumInternalBytes); |
154 | 0 | result->reserve(key_sz); |
155 | 0 | result->append(key.data(), key_sz - kNumInternalBytes - ts_sz); |
156 | 0 | result->append(ts_sz, static_cast<unsigned char>(0)); |
157 | 0 | result->append(key.data() + key_sz - kNumInternalBytes, kNumInternalBytes); |
158 | 0 | } |
159 | | |
160 | | std::string ParsedInternalKey::DebugString(bool log_err_key, bool hex, |
161 | 4.36k | const Comparator* ucmp) const { |
162 | 4.36k | std::string result = "'"; |
163 | 4.36k | size_t ts_sz_for_debug = ucmp == nullptr ? 0 : ucmp->timestamp_size(); |
164 | 4.36k | if (log_err_key) { |
165 | 4.36k | if (ts_sz_for_debug == 0) { |
166 | 4.36k | result += user_key.ToString(hex); |
167 | 4.36k | } else { |
168 | 0 | assert(user_key.size() >= ts_sz_for_debug); |
169 | 0 | Slice user_key_without_ts = user_key; |
170 | 0 | user_key_without_ts.remove_suffix(ts_sz_for_debug); |
171 | 0 | result += user_key_without_ts.ToString(hex); |
172 | 0 | Slice ts = Slice(user_key.data() + user_key.size() - ts_sz_for_debug, |
173 | 0 | ts_sz_for_debug); |
174 | 0 | result += "|timestamp:"; |
175 | 0 | result += ucmp->TimestampToString(ts); |
176 | 0 | } |
177 | 4.36k | } else { |
178 | 0 | result += "<redacted>"; |
179 | 0 | } |
180 | | |
181 | 4.36k | result += "' seq:" + std::to_string(sequence); |
182 | 4.36k | result += ", type:" + std::to_string(type); |
183 | | |
184 | 4.36k | return result; |
185 | 4.36k | } |
186 | | |
187 | 4.36k | std::string InternalKey::DebugString(bool hex, const Comparator* ucmp) const { |
188 | 4.36k | std::string result; |
189 | 4.36k | ParsedInternalKey parsed; |
190 | 4.36k | if (ParseInternalKey(rep_, &parsed, false /* log_err_key */).ok()) { |
191 | 4.36k | result = parsed.DebugString(true /* log_err_key */, hex, ucmp); // TODO |
192 | 4.36k | } else { |
193 | 0 | result = "(bad)"; |
194 | 0 | result.append(EscapeString(rep_)); |
195 | 0 | } |
196 | 4.36k | return result; |
197 | 4.36k | } |
198 | | |
199 | | int InternalKeyComparator::Compare(const ParsedInternalKey& a, |
200 | 1.07G | const ParsedInternalKey& b) const { |
201 | | // Order by: |
202 | | // increasing user key (according to user-supplied comparator) |
203 | | // decreasing sequence number |
204 | | // decreasing type (though sequence# should be enough to disambiguate) |
205 | 1.07G | int r = user_comparator_.Compare(a.user_key, b.user_key); |
206 | 1.07G | if (r == 0) { |
207 | 853M | if (a.sequence > b.sequence) { |
208 | 426M | r = -1; |
209 | 426M | } else if (a.sequence < b.sequence) { |
210 | 426M | r = +1; |
211 | 426M | } else if (a.type > b.type) { |
212 | 2.10k | r = -1; |
213 | 2.52k | } else if (a.type < b.type) { |
214 | 0 | r = +1; |
215 | 0 | } |
216 | 853M | } |
217 | 1.07G | return r; |
218 | 1.07G | } |
219 | | |
220 | | int InternalKeyComparator::Compare(const Slice& a, |
221 | 166k | const ParsedInternalKey& b) const { |
222 | | // Order by: |
223 | | // increasing user key (according to user-supplied comparator) |
224 | | // decreasing sequence number |
225 | | // decreasing type (though sequence# should be enough to disambiguate) |
226 | 166k | int r = user_comparator_.Compare(ExtractUserKey(a), b.user_key); |
227 | 166k | if (r == 0) { |
228 | 5.45k | const uint64_t anum = |
229 | 5.45k | DecodeFixed64(a.data() + a.size() - kNumInternalBytes); |
230 | 5.45k | const uint64_t bnum = (b.sequence << 8) | b.type; |
231 | 5.45k | if (anum > bnum) { |
232 | 3.14k | r = -1; |
233 | 3.14k | } else if (anum < bnum) { |
234 | 2.31k | r = +1; |
235 | 2.31k | } |
236 | 5.45k | } |
237 | 166k | return r; |
238 | 166k | } |
239 | | |
240 | | int InternalKeyComparator::Compare(const ParsedInternalKey& a, |
241 | 141k | const Slice& b) const { |
242 | 141k | return -Compare(b, a); |
243 | 141k | } |
244 | | |
245 | | LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s, |
246 | 19.2k | const Slice* ts) { |
247 | 19.2k | size_t usize = _user_key.size(); |
248 | 19.2k | size_t ts_sz = (nullptr == ts) ? 0 : ts->size(); |
249 | 19.2k | size_t needed = usize + ts_sz + 13; // A conservative estimate |
250 | 19.2k | char* dst; |
251 | 19.2k | if (needed <= sizeof(space_)) { |
252 | 19.2k | dst = space_; |
253 | 19.2k | } else { |
254 | 3 | dst = new char[needed]; |
255 | 3 | } |
256 | 19.2k | start_ = dst; |
257 | | // NOTE: We don't support users keys of more than 2GB :) |
258 | 19.2k | dst = EncodeVarint32(dst, static_cast<uint32_t>(usize + ts_sz + 8)); |
259 | 19.2k | kstart_ = dst; |
260 | 19.2k | memcpy(dst, _user_key.data(), usize); |
261 | 19.2k | dst += usize; |
262 | 19.2k | if (nullptr != ts) { |
263 | 0 | memcpy(dst, ts->data(), ts_sz); |
264 | 0 | dst += ts_sz; |
265 | 0 | } |
266 | 19.2k | EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); |
267 | 19.2k | dst += 8; |
268 | 19.2k | end_ = dst; |
269 | 19.2k | } |
270 | | |
271 | 13.3k | void IterKey::EnlargeBuffer(size_t key_size) { |
272 | | // If size is smaller than buffer size, continue using current buffer, |
273 | | // or the inline one, as default |
274 | 13.3k | assert(key_size > buf_size_); |
275 | | // Need to enlarge the buffer. |
276 | 13.3k | ResetBuffer(); |
277 | 13.3k | buf_ = new char[key_size]; |
278 | 13.3k | buf_size_ = key_size; |
279 | 13.3k | } |
280 | | |
281 | 0 | void IterKey::EnlargeSecondaryBufferIfNeeded(size_t key_size) { |
282 | | // If size is smaller than buffer size, continue using current buffer, |
283 | | // or the inline one, as default |
284 | 0 | if (key_size <= secondary_buf_size_) { |
285 | 0 | return; |
286 | 0 | } |
287 | | // Need to enlarge the secondary buffer. |
288 | 0 | ResetSecondaryBuffer(); |
289 | 0 | secondary_buf_ = new char[key_size]; |
290 | 0 | secondary_buf_size_ = key_size; |
291 | 0 | } |
292 | | } // namespace ROCKSDB_NAMESPACE |