/src/rocksdb/table/unique_id.cc
Line | Count | Source |
1 | | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #include <cstdint> |
7 | | |
8 | | #include "table/unique_id_impl.h" |
9 | | #include "util/coding_lean.h" |
10 | | #include "util/hash.h" |
11 | | #include "util/string_util.h" |
12 | | |
13 | | namespace ROCKSDB_NAMESPACE { |
14 | | |
15 | 40.2k | std::string EncodeSessionId(uint64_t upper, uint64_t lower) { |
16 | 40.2k | std::string db_session_id(20U, '\0'); |
17 | 40.2k | char* buf = db_session_id.data(); |
18 | | // Preserving `lower` is slightly tricky. 36^12 is slightly more than |
19 | | // 62 bits, so we use 12 chars plus the bottom two bits of one more. |
20 | | // (A tiny fraction of 20 digit strings go unused.) |
21 | 40.2k | uint64_t a = (upper << 2) | (lower >> 62); |
22 | 40.2k | uint64_t b = lower & (UINT64_MAX >> 2); |
23 | 40.2k | PutBaseChars<36>(&buf, 8, a, /*uppercase*/ true); |
24 | 40.2k | PutBaseChars<36>(&buf, 12, b, /*uppercase*/ true); |
25 | 40.2k | assert(buf == &db_session_id.back() + 1); |
26 | 40.2k | return db_session_id; |
27 | 40.2k | } |
28 | | |
29 | | Status DecodeSessionId(const std::string& db_session_id, uint64_t* upper, |
30 | 190k | uint64_t* lower) { |
31 | 190k | const size_t len = db_session_id.size(); |
32 | 190k | if (len == 0) { |
33 | 0 | return Status::NotSupported("Missing db_session_id"); |
34 | 0 | } |
35 | | // Anything from 13 to 24 chars is reasonable. We don't have to limit to |
36 | | // exactly 20. |
37 | 190k | if (len < 13) { |
38 | 0 | return Status::NotSupported("Too short db_session_id"); |
39 | 0 | } |
40 | 190k | if (len > 24) { |
41 | 0 | return Status::NotSupported("Too long db_session_id"); |
42 | 0 | } |
43 | 190k | uint64_t a = 0, b = 0; |
44 | 190k | const char* buf = &db_session_id.front(); |
45 | 190k | bool success = ParseBaseChars<36>(&buf, len - 12U, &a); |
46 | 190k | if (!success) { |
47 | 0 | return Status::NotSupported("Bad digit in db_session_id"); |
48 | 0 | } |
49 | 190k | success = ParseBaseChars<36>(&buf, 12U, &b); |
50 | 190k | if (!success) { |
51 | 0 | return Status::NotSupported("Bad digit in db_session_id"); |
52 | 0 | } |
53 | 190k | assert(buf == &db_session_id.back() + 1); |
54 | 190k | *upper = a >> 2; |
55 | 190k | *lower = (b & (UINT64_MAX >> 2)) | (a << 62); |
56 | 190k | return Status::OK(); |
57 | 190k | } |
58 | | |
59 | | Status GetSstInternalUniqueId(const std::string& db_id, |
60 | | const std::string& db_session_id, |
61 | | uint64_t file_number, UniqueIdPtr out, |
62 | 190k | bool force) { |
63 | 190k | if (!force) { |
64 | 17.4k | if (db_id.empty()) { |
65 | 0 | return Status::NotSupported("Missing db_id"); |
66 | 0 | } |
67 | 17.4k | if (file_number == 0) { |
68 | 0 | return Status::NotSupported("Missing or bad file number"); |
69 | 0 | } |
70 | 17.4k | if (db_session_id.empty()) { |
71 | 0 | return Status::NotSupported("Missing db_session_id"); |
72 | 0 | } |
73 | 17.4k | } |
74 | 190k | uint64_t session_upper = 0; // Assignment to appease clang-analyze |
75 | 190k | uint64_t session_lower = 0; // Assignment to appease clang-analyze |
76 | 190k | { |
77 | 190k | Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower); |
78 | 190k | if (!s.ok()) { |
79 | 0 | if (!force) { |
80 | 0 | return s; |
81 | 0 | } else { |
82 | | // A reasonable fallback in case malformed |
83 | 0 | Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper, |
84 | 0 | &session_lower); |
85 | 0 | if (session_lower == 0) { |
86 | 0 | session_lower = session_upper | 1; |
87 | 0 | } |
88 | 0 | } |
89 | 0 | } |
90 | 190k | } |
91 | | |
92 | | // Exactly preserve session lower to ensure that session ids generated |
93 | | // during the same process lifetime are guaranteed unique. |
94 | | // DBImpl also guarantees (in recent versions) that this is not zero, |
95 | | // so that we can guarantee unique ID is never all zeros. (Can't assert |
96 | | // that here because of testing and old versions.) |
97 | | // We put this first in anticipation of matching a small-ish set of cache |
98 | | // key prefixes to cover entries relevant to any DB. |
99 | 190k | out.ptr[0] = session_lower; |
100 | | |
101 | | // Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy) |
102 | | // for very high global uniqueness entropy. |
103 | | // (It is possible that many DBs descended from one common DB id are copied |
104 | | // around and proliferate, in which case session id is critical, but it is |
105 | | // more common for different DBs to have different DB ids.) |
106 | 190k | uint64_t db_a, db_b; |
107 | 190k | Hash2x64(db_id.data(), db_id.size(), session_upper, &db_a, &db_b); |
108 | | |
109 | | // Xor in file number for guaranteed uniqueness by file number for a given |
110 | | // session and DB id. (Xor slightly better than + here. See |
111 | | // https://github.com/pdillinger/unique_id ) |
112 | 190k | out.ptr[1] = db_a ^ file_number; |
113 | | |
114 | | // Extra (optional) global uniqueness |
115 | 190k | if (out.extended) { |
116 | 0 | out.ptr[2] = db_b; |
117 | 0 | } |
118 | | |
119 | 190k | return Status::OK(); |
120 | 190k | } |
121 | | |
122 | | namespace { |
123 | | // For InternalUniqueIdToExternal / ExternalUniqueIdToInternal we want all |
124 | | // zeros in first 128 bits to map to itself, so that excluding zero in |
125 | | // internal IDs (session_lower != 0 above) does the same for external IDs. |
126 | | // These values are meaningless except for making that work. |
127 | | constexpr uint64_t kHiOffsetForZero = 17391078804906429400U; |
128 | | constexpr uint64_t kLoOffsetForZero = 6417269962128484497U; |
129 | | } // namespace |
130 | | |
131 | 0 | void InternalUniqueIdToExternal(UniqueIdPtr in_out) { |
132 | 0 | uint64_t hi, lo; |
133 | 0 | BijectiveHash2x64(in_out.ptr[1] + kHiOffsetForZero, |
134 | 0 | in_out.ptr[0] + kLoOffsetForZero, &hi, &lo); |
135 | 0 | in_out.ptr[0] = lo; |
136 | 0 | in_out.ptr[1] = hi; |
137 | 0 | if (in_out.extended) { |
138 | 0 | in_out.ptr[2] += lo + hi; |
139 | 0 | } |
140 | 0 | } |
141 | | |
142 | 0 | void ExternalUniqueIdToInternal(UniqueIdPtr in_out) { |
143 | 0 | uint64_t lo = in_out.ptr[0]; |
144 | 0 | uint64_t hi = in_out.ptr[1]; |
145 | 0 | if (in_out.extended) { |
146 | 0 | in_out.ptr[2] -= lo + hi; |
147 | 0 | } |
148 | 0 | BijectiveUnhash2x64(hi, lo, &hi, &lo); |
149 | 0 | in_out.ptr[0] = lo - kLoOffsetForZero; |
150 | 0 | in_out.ptr[1] = hi - kHiOffsetForZero; |
151 | 0 | } |
152 | | |
153 | 83.3k | std::string EncodeUniqueIdBytes(UniqueIdPtr in) { |
154 | 83.3k | std::string ret(in.extended ? 24U : 16U, '\0'); |
155 | 83.3k | EncodeFixed64(ret.data(), in.ptr[0]); |
156 | 83.3k | EncodeFixed64(&ret[8], in.ptr[1]); |
157 | 83.3k | if (in.extended) { |
158 | 0 | EncodeFixed64(&ret[16], in.ptr[2]); |
159 | 0 | } |
160 | 83.3k | return ret; |
161 | 83.3k | } |
162 | | |
163 | 68.7k | Status DecodeUniqueIdBytes(const std::string& unique_id, UniqueIdPtr out) { |
164 | 68.7k | if (unique_id.size() != (out.extended ? 24 : 16)) { |
165 | 0 | return Status::NotSupported("Not a valid unique_id"); |
166 | 0 | } |
167 | 68.7k | const char* buf = &unique_id.front(); |
168 | 68.7k | out.ptr[0] = DecodeFixed64(&buf[0]); |
169 | 68.7k | out.ptr[1] = DecodeFixed64(&buf[8]); |
170 | 68.7k | if (out.extended) { |
171 | 0 | out.ptr[2] = DecodeFixed64(&buf[16]); |
172 | 0 | } |
173 | 68.7k | return Status::OK(); |
174 | 68.7k | } |
175 | | |
176 | | template <typename ID> |
177 | | Status GetUniqueIdFromTablePropertiesHelper(const TableProperties& props, |
178 | 0 | std::string* out_id) { |
179 | 0 | ID tmp{}; |
180 | 0 | Status s = GetSstInternalUniqueId(props.db_id, props.db_session_id, |
181 | 0 | props.orig_file_number, &tmp); |
182 | 0 | if (s.ok()) { |
183 | 0 | InternalUniqueIdToExternal(&tmp); |
184 | 0 | *out_id = EncodeUniqueIdBytes(&tmp); |
185 | 0 | } else { |
186 | 0 | out_id->clear(); |
187 | 0 | } |
188 | 0 | return s; |
189 | 0 | } Unexecuted instantiation: rocksdb::Status rocksdb::GetUniqueIdFromTablePropertiesHelper<std::__1::array<unsigned long, 3ul> >(rocksdb::TableProperties const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*) Unexecuted instantiation: rocksdb::Status rocksdb::GetUniqueIdFromTablePropertiesHelper<std::__1::array<unsigned long, 2ul> >(rocksdb::TableProperties const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*) |
190 | | |
191 | | Status GetExtendedUniqueIdFromTableProperties(const TableProperties& props, |
192 | 0 | std::string* out_id) { |
193 | 0 | return GetUniqueIdFromTablePropertiesHelper<UniqueId64x3>(props, out_id); |
194 | 0 | } |
195 | | |
196 | | Status GetUniqueIdFromTableProperties(const TableProperties& props, |
197 | 0 | std::string* out_id) { |
198 | 0 | return GetUniqueIdFromTablePropertiesHelper<UniqueId64x2>(props, out_id); |
199 | 0 | } |
200 | | |
201 | 0 | std::string UniqueIdToHumanString(const std::string& id) { |
202 | 0 | std::string hex = Slice(id).ToString(/*hex*/ true); |
203 | 0 | std::string result; |
204 | 0 | result.reserve(hex.size() + hex.size() / 16); |
205 | 0 | for (size_t i = 0; i < hex.size(); i++) { |
206 | 0 | if (i > 0 && i % 16 == 0) { |
207 | 0 | result.push_back('-'); |
208 | 0 | } |
209 | 0 | result.push_back(hex[i]); |
210 | 0 | } |
211 | 0 | return result; |
212 | 0 | } |
213 | | |
214 | 0 | std::string InternalUniqueIdToHumanString(UniqueIdPtr in) { |
215 | 0 | std::string str = "{"; |
216 | 0 | str += std::to_string(in.ptr[0]); |
217 | 0 | str += ","; |
218 | 0 | str += std::to_string(in.ptr[1]); |
219 | 0 | if (in.extended) { |
220 | 0 | str += ","; |
221 | 0 | str += std::to_string(in.ptr[2]); |
222 | 0 | } |
223 | 0 | str += "}"; |
224 | 0 | return str; |
225 | 0 | } |
226 | | |
227 | | } // namespace ROCKSDB_NAMESPACE |