/src/rocksdb/util/ribbon_impl.h
Line | Count | Source |
1 | | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #pragma once |
7 | | |
8 | | #include <cmath> |
9 | | |
10 | | #include "port/port.h" // for PREFETCH |
11 | | #include "util/fastrange.h" |
12 | | #include "util/ribbon_alg.h" |
13 | | |
14 | | namespace ROCKSDB_NAMESPACE { |
15 | | |
16 | | namespace ribbon { |
17 | | |
18 | | // RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly) |
19 | | // |
20 | | // ribbon_impl.h: templated (parameterized) standard implementations |
21 | | // |
22 | | // Ribbon is a Perfect Hash Static Function construction useful as a compact |
23 | | // static Bloom filter alternative. See ribbon_alg.h for core algorithms |
24 | | // and core design details. |
25 | | // |
26 | | // TODO: more details on trade-offs and practical issues. |
27 | | // |
28 | | // APIs for configuring Ribbon are in ribbon_config.h |
29 | | |
30 | | // Ribbon implementations in this file take these parameters, which must be |
31 | | // provided in a class/struct type with members expressed in this concept: |
32 | | |
33 | | // concept TypesAndSettings { |
34 | | // // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have |
35 | | // // the added constraint that Hash be equivalent to either uint32_t or |
36 | | // // uint64_t. |
37 | | // typename Hash; |
38 | | // typename CoeffRow; |
39 | | // typename ResultRow; |
40 | | // typename Index; |
41 | | // typename Key; |
42 | | // static constexpr bool kFirstCoeffAlwaysOne; |
43 | | // |
44 | | // // An unsigned integer type for identifying a hash seed, typically |
45 | | // // uint32_t or uint64_t. Importantly, this is the amount of data |
46 | | // // stored in memory for identifying a raw seed. See StandardHasher. |
47 | | // typename Seed; |
48 | | // |
49 | | // // When true, the PHSF implements a static filter, expecting just |
50 | | // // keys as inputs for construction. When false, implements a general |
51 | | // // PHSF and expects std::pair<Key, ResultRow> as inputs for |
52 | | // // construction. |
53 | | // static constexpr bool kIsFilter; |
54 | | // |
55 | | // // When true, enables a special "homogeneous" filter implementation that |
56 | | // // is slightly faster to construct, and never fails to construct though |
57 | | // // FP rate can quickly explode in cases where corresponding |
58 | | // // non-homogeneous filter would fail (or nearly fail?) to construct. |
59 | | // // For smaller filters, you can configure with ConstructionFailureChance |
60 | | // // smaller than desired FP rate to largely counteract this effect. |
61 | | // // TODO: configuring Homogeneous Ribbon for arbitrarily large filters |
62 | | // // based on data from OptimizeHomogAtScale |
63 | | // static constexpr bool kHomogeneous; |
64 | | // |
65 | | // // When true, adds a tiny bit more hashing logic on queries and |
66 | | // // construction to improve utilization at the beginning and end of |
67 | | // // the structure. Recommended when CoeffRow is only 64 bits (or |
68 | | // // less), so typical num_starts < 10k. Although this is compatible |
69 | | // // with kHomogeneous, the competing space vs. time priorities might |
70 | | // // not be useful. |
71 | | // static constexpr bool kUseSmash; |
72 | | // |
73 | | // // When true, allows number of "starts" to be zero, for best support |
74 | | // // of the "no keys to add" case by always returning false for filter |
75 | | // // queries. (This is distinct from the "keys added but no space for |
76 | | // // any data" case, in which a filter always returns true.) The cost |
77 | | // // supporting this is a conditional branch (probably predictable) in |
78 | | // // queries. |
79 | | // static constexpr bool kAllowZeroStarts; |
80 | | // |
81 | | // // A seedable stock hash function on Keys. All bits of Hash must |
82 | | // // be reasonably high quality. XXH functions recommended, but |
83 | | // // Murmur, City, Farm, etc. also work. |
84 | | // static Hash HashFn(const Key &, Seed raw_seed); |
85 | | // }; |
86 | | |
87 | | // A bit of a hack to automatically construct the type for |
88 | | // AddInput based on a constexpr bool. |
89 | | template <typename Key, typename ResultRow, bool IsFilter> |
90 | | struct AddInputSelector { |
91 | | // For general PHSF, not filter |
92 | | using T = std::pair<Key, ResultRow>; |
93 | | }; |
94 | | |
95 | | template <typename Key, typename ResultRow> |
96 | | struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> { |
97 | | // For Filter |
98 | | using T = Key; |
99 | | }; |
100 | | |
101 | | // To avoid writing 'typename' everywhere that we use types like 'Index' |
102 | | #define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \ |
103 | | using CoeffRow = typename TypesAndSettings::CoeffRow; \ |
104 | | using ResultRow = typename TypesAndSettings::ResultRow; \ |
105 | | using Index = typename TypesAndSettings::Index; \ |
106 | | using Hash = typename TypesAndSettings::Hash; \ |
107 | | using Key = typename TypesAndSettings::Key; \ |
108 | | using Seed = typename TypesAndSettings::Seed; \ |
109 | | \ |
110 | | /* Some more additions */ \ |
111 | | using QueryInput = Key; \ |
112 | | using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector< \ |
113 | | Key, ResultRow, TypesAndSettings::kIsFilter>::T; \ |
114 | | static constexpr auto kCoeffBits = \ |
115 | | static_cast<Index>(sizeof(CoeffRow) * 8U); \ |
116 | | \ |
117 | | /* Export to algorithm */ \ |
118 | | static constexpr bool kFirstCoeffAlwaysOne = \ |
119 | | TypesAndSettings::kFirstCoeffAlwaysOne; \ |
120 | | \ |
121 | | static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) + \ |
122 | | sizeof(Hash) + sizeof(Key) + sizeof(Seed) + \ |
123 | | sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \ |
124 | | kFirstCoeffAlwaysOne > \ |
125 | | 0, \ |
126 | | "avoid unused warnings, semicolon expected after macro call") |
127 | | |
128 | | #ifdef _MSC_VER |
129 | | #pragma warning(push) |
130 | | #pragma warning(disable : 4309) // cast truncating constant |
131 | | #pragma warning(disable : 4307) // arithmetic constant overflow |
132 | | #endif |
133 | | |
134 | | // StandardHasher: A standard implementation of concepts RibbonTypes, |
135 | | // PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h. |
136 | | // |
137 | | // This implementation should be suitable for most all practical purposes |
138 | | // as it "behaves" across a wide range of settings, with little room left |
139 | | // for improvement. The key functionality in this hasher is generating |
140 | | // CoeffRows, starts, and (for filters) ResultRows, which could be ~150 |
141 | | // bits of data or more, from a modest hash of 64 or even just 32 bits, with |
142 | | // enough uniformity and bitwise independence to be close to "the best you |
143 | | // can do" with available hash information in terms of FP rate and |
144 | | // compactness. (64 bits recommended and sufficient for PHSF practical |
145 | | // purposes.) |
146 | | // |
147 | | // Another feature of this hasher is a minimal "premixing" of seeds before |
148 | | // they are provided to TypesAndSettings::HashFn in case that function does |
149 | | // not provide sufficiently independent hashes when iterating merely |
150 | | // sequentially on seeds. (This for example works around a problem with the |
151 | | // preview version 0.7.2 of XXH3 used in RocksDB, a.k.a. XXPH3 or Hash64, and |
152 | | // MurmurHash1 used in RocksDB, a.k.a. Hash.) We say this pre-mixing step |
153 | | // translates "ordinal seeds," which we iterate sequentially to find a |
154 | | // solution, into "raw seeds," with many more bits changing for each |
155 | | // iteration. The translation is an easily reversible lightweight mixing, |
156 | | // not suitable for hashing on its own. An advantage of this approach is that |
157 | | // StandardHasher can store just the raw seed (e.g. 64 bits) for fast query |
158 | | // times, while from the application perspective, we can limit to a small |
159 | | // number of ordinal keys (e.g. 64 in 6 bits) for saving in metadata. |
160 | | // |
161 | | // The default constructor initializes the seed to ordinal seed zero, which |
162 | | // is equal to raw seed zero. |
163 | | // |
164 | | template <class TypesAndSettings> |
165 | | class StandardHasher { |
166 | | public: |
167 | | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); |
168 | | |
169 | 0 | inline Hash GetHash(const Key& key) const { |
170 | 0 | return TypesAndSettings::HashFn(key, raw_seed_); |
171 | 0 | } |
172 | | // For when AddInput == pair<Key, ResultRow> (kIsFilter == false) |
173 | | inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const { |
174 | | return GetHash(bi.first); |
175 | | } |
176 | 0 | inline Index GetStart(Hash h, Index num_starts) const { |
177 | | // This is "critical path" code because it's required before memory |
178 | | // lookup. |
179 | | // |
180 | | // FastRange gives us a fast and effective mapping from h to the |
181 | | // appropriate range. This depends most, sometimes exclusively, on |
182 | | // upper bits of h. |
183 | | // |
184 | 0 | if (TypesAndSettings::kUseSmash) { |
185 | | // Extra logic to "smash" entries at beginning and end, for |
186 | | // better utilization. For example, without smash and with |
187 | | // kFirstCoeffAlwaysOne, there's about a 30% chance that the |
188 | | // first slot in the banding will be unused, and worse without |
189 | | // kFirstCoeffAlwaysOne. The ending slots are even less utilized |
190 | | // without smash. |
191 | | // |
192 | | // But since this only affects roughly kCoeffBits of the slots, |
193 | | // it's usually small enough to be ignorable (less computation in |
194 | | // this function) when number of slots is roughly 10k or larger. |
195 | | // |
196 | | // The best values for these smash weights might depend on how |
197 | | // densely you're packing entries, and also kCoeffBits, but this |
198 | | // seems to work well for roughly 95% success probability. |
199 | | // |
200 | 0 | constexpr Index kFrontSmash = kCoeffBits / 4; |
201 | 0 | constexpr Index kBackSmash = kCoeffBits / 4; |
202 | 0 | Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash); |
203 | 0 | start = std::max(start, kFrontSmash); |
204 | 0 | start -= kFrontSmash; |
205 | 0 | start = std::min(start, num_starts - 1); |
206 | 0 | return start; |
207 | 0 | } else { |
208 | | // For query speed, we allow small number of initial and final |
209 | | // entries to be under-utilized. |
210 | | // NOTE: This call statically enforces that Hash is equivalent to |
211 | | // either uint32_t or uint64_t. |
212 | 0 | return FastRangeGeneric(h, num_starts); |
213 | 0 | } |
214 | 0 | } |
215 | 0 | inline CoeffRow GetCoeffRow(Hash h) const { |
216 | | // This is not so much "critical path" code because it can be done in |
217 | | // parallel (instruction level) with memory lookup. |
218 | | // |
219 | | // When we might have many entries squeezed into a single start, |
220 | | // we need reasonably good remixing for CoeffRow. |
221 | 0 | if (TypesAndSettings::kUseSmash) { |
222 | | // Reasonably good, reasonably fast, reasonably general. |
223 | | // Probably not 1:1 but probably close enough. |
224 | 0 | Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1); |
225 | 0 | Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2); |
226 | 0 | auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64)); |
227 | | |
228 | | // Now ensure the value is non-zero |
229 | 0 | if (kFirstCoeffAlwaysOne) { |
230 | 0 | cr |= 1; |
231 | 0 | } else { |
232 | | // Still have to ensure some bit is non-zero |
233 | 0 | cr |= (cr == 0) ? 1 : 0; |
234 | 0 | } |
235 | 0 | return cr; |
236 | 0 | } |
237 | | // If not kUseSmash, we ensure we're not squeezing many entries into a |
238 | | // single start, in part by ensuring num_starts > num_slots / 2. Thus, |
239 | | // here we do not need good remixing for CoeffRow, but just enough that |
240 | | // (a) every bit is reasonably independent from Start. |
241 | | // (b) every Hash-length bit subsequence of the CoeffRow has full or |
242 | | // nearly full entropy from h. |
243 | | // (c) if nontrivial bit subsequences within are correlated, it needs to |
244 | | // be more complicated than exact copy or bitwise not (at least without |
245 | | // kFirstCoeffAlwaysOne), or else there seems to be a kind of |
246 | | // correlated clustering effect. |
247 | | // (d) the CoeffRow is not zero, so that no one input on its own can |
248 | | // doom construction success. (Preferably a mix of 1's and 0's if |
249 | | // satisfying above.) |
250 | | |
251 | | // First, establish sufficient bitwise independence from Start, with |
252 | | // multiplication by a large random prime. |
253 | | // Note that we cast to Hash because if we use product bits beyond |
254 | | // original input size, that's going to correlate with Start (FastRange) |
255 | | // even with a (likely) different multiplier here. |
256 | 0 | Hash a = h * kCoeffAndResultFactor; |
257 | |
|
258 | 0 | static_assert( |
259 | 0 | sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t), |
260 | 0 | "Supported sizes"); |
261 | | // If that's big enough, we're done. If not, we have to expand it, |
262 | | // maybe up to 4x size. |
263 | 0 | uint64_t b; |
264 | 0 | if (sizeof(Hash) < sizeof(uint64_t)) { |
265 | | // Almost-trivial hash expansion (OK - see above), favoring roughly |
266 | | // equal number of 1's and 0's in result |
267 | 0 | b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32); |
268 | 0 | } else { |
269 | 0 | b = a; |
270 | 0 | } |
271 | 0 | static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes"); |
272 | 0 | Unsigned128 c; |
273 | 0 | if (sizeof(uint64_t) < sizeof(CoeffRow)) { |
274 | | // Almost-trivial hash expansion (OK - see above), favoring roughly |
275 | | // equal number of 1's and 0's in result |
276 | 0 | c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64); |
277 | 0 | } else { |
278 | 0 | c = b; |
279 | 0 | } |
280 | 0 | auto cr = static_cast<CoeffRow>(c); |
281 | | |
282 | | // Now ensure the value is non-zero |
283 | 0 | if (kFirstCoeffAlwaysOne) { |
284 | 0 | cr |= 1; |
285 | 0 | } else if (sizeof(CoeffRow) == sizeof(Hash)) { |
286 | | // Still have to ensure some bit is non-zero |
287 | 0 | cr |= (cr == 0) ? 1 : 0; |
288 | 0 | } else { |
289 | | // (We did trivial expansion with constant xor, which ensures some |
290 | | // bits are non-zero.) |
291 | 0 | } |
292 | 0 | return cr; |
293 | 0 | } |
294 | 0 | inline ResultRow GetResultRowMask() const { |
295 | | // TODO: will be used with InterleavedSolutionStorage? |
296 | | // For now, all bits set (note: might be a small type so might need to |
297 | | // narrow after promotion) |
298 | 0 | return static_cast<ResultRow>(~ResultRow{0}); |
299 | 0 | } |
300 | 0 | inline ResultRow GetResultRowFromHash(Hash h) const { |
301 | 0 | if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) { |
302 | | // This is not so much "critical path" code because it can be done in |
303 | | // parallel (instruction level) with memory lookup. |
304 | | // |
305 | | // ResultRow bits only needs to be independent from CoeffRow bits if |
306 | | // many entries might have the same start location, where "many" is |
307 | | // comparable to number of hash bits or kCoeffBits. If !kUseSmash |
308 | | // and num_starts > kCoeffBits, it is safe and efficient to draw from |
309 | | // the same bits computed for CoeffRow, which are reasonably |
310 | | // independent from Start. (Inlining and common subexpression |
311 | | // elimination with GetCoeffRow should make this |
312 | | // a single shared multiplication in generated code when !kUseSmash.) |
313 | 0 | Hash a = h * kCoeffAndResultFactor; |
314 | | |
315 | | // The bits here that are *most* independent of Start are the highest |
316 | | // order bits (as in Knuth multiplicative hash). To make those the |
317 | | // most preferred for use in the result row, we do a bswap here. |
318 | 0 | auto rr = static_cast<ResultRow>(EndianSwapValue(a)); |
319 | 0 | return rr & GetResultRowMask(); |
320 | 0 | } else { |
321 | | // Must be zero |
322 | 0 | return 0; |
323 | 0 | } |
324 | 0 | } |
325 | | // For when AddInput == Key (kIsFilter == true) |
326 | 0 | inline ResultRow GetResultRowFromInput(const Key&) const { |
327 | | // Must be zero |
328 | 0 | return 0; |
329 | 0 | } |
330 | | // For when AddInput == pair<Key, ResultRow> (kIsFilter == false) |
331 | | inline ResultRow GetResultRowFromInput( |
332 | | const std::pair<Key, ResultRow>& bi) const { |
333 | | // Simple extraction |
334 | | return bi.second; |
335 | | } |
336 | | |
337 | | // Seed tracking APIs - see class comment |
338 | | void SetRawSeed(Seed seed) { raw_seed_ = seed; } |
339 | | Seed GetRawSeed() { return raw_seed_; } |
340 | 0 | void SetOrdinalSeed(Seed count) { |
341 | | // A simple, reversible mixing of any size (whole bytes) up to 64 bits. |
342 | | // This allows casting the raw seed to any smaller size we use for |
343 | | // ordinal seeds without risk of duplicate raw seeds for unique ordinal |
344 | | // seeds. |
345 | | |
346 | | // Seed type might be smaller than numerical promotion size, but Hash |
347 | | // should be at least that size, so we use Hash as intermediate type. |
348 | 0 | static_assert(sizeof(Seed) <= sizeof(Hash), |
349 | 0 | "Hash must be at least size of Seed"); |
350 | | |
351 | | // Multiply by a large random prime (one-to-one for any prefix of bits) |
352 | 0 | Hash tmp = count * kToRawSeedFactor; |
353 | | // Within-byte one-to-one mixing |
354 | 0 | static_assert((kSeedMixMask & (kSeedMixMask >> kSeedMixShift)) == 0, |
355 | 0 | "Illegal mask+shift"); |
356 | 0 | tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift; |
357 | 0 | raw_seed_ = static_cast<Seed>(tmp); |
358 | | // dynamic verification |
359 | 0 | assert(GetOrdinalSeed() == count); |
360 | 0 | } |
361 | 0 | Seed GetOrdinalSeed() { |
362 | 0 | Hash tmp = raw_seed_; |
363 | | // Within-byte one-to-one mixing (its own inverse) |
364 | 0 | tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift; |
365 | | // Multiply by 64-bit multiplicative inverse |
366 | 0 | static_assert(kToRawSeedFactor * kFromRawSeedFactor == Hash{1}, |
367 | 0 | "Must be inverses"); |
368 | 0 | return static_cast<Seed>(tmp * kFromRawSeedFactor); |
369 | 0 | } |
370 | | |
371 | | protected: |
372 | | // For expanding hash: |
373 | | // large random prime |
374 | | static constexpr Hash kCoeffAndResultFactor = |
375 | | static_cast<Hash>(0xc28f82822b650bedULL); |
376 | | static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U; |
377 | | static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU; |
378 | | // random-ish data |
379 | | static constexpr uint32_t kCoeffXor32 = 0xa6293635U; |
380 | | static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU; |
381 | | |
382 | | // For pre-mixing seeds |
383 | | static constexpr Hash kSeedMixMask = static_cast<Hash>(0xf0f0f0f0f0f0f0f0ULL); |
384 | | static constexpr unsigned kSeedMixShift = 4U; |
385 | | static constexpr Hash kToRawSeedFactor = |
386 | | static_cast<Hash>(0xc78219a23eeadd03ULL); |
387 | | static constexpr Hash kFromRawSeedFactor = |
388 | | static_cast<Hash>(0xfe1a137d14b475abULL); |
389 | | |
390 | | // See class description |
391 | | Seed raw_seed_ = 0; |
392 | | }; |
393 | | |
394 | | // StandardRehasher (and StandardRehasherAdapter): A variant of |
395 | | // StandardHasher that uses the same type for keys as for hashes. |
396 | | // This is primarily intended for building a Ribbon filter |
397 | | // from existing hashes without going back to original inputs in |
398 | | // order to apply a different seed. This hasher seeds a 1-to-1 mixing |
399 | | // transformation to apply a seed to an existing hash. (Untested for |
400 | | // hash-sized keys that are not already uniformly distributed.) This |
401 | | // transformation builds on the seed pre-mixing done in StandardHasher. |
402 | | // |
403 | | // Testing suggests essentially no degradation of solution success rate |
404 | | // vs. going back to original inputs when changing hash seeds. For example: |
405 | | // Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys |
406 | | // is about 1.10 for both StandardHasher and StandardRehasher. |
407 | | // |
408 | | // StandardRehasher is not really recommended for general PHSFs (not |
409 | | // filters) because a collision in the original hash could prevent |
410 | | // construction despite re-seeding the Rehasher. (Such collisions |
411 | | // do not interfere with filter construction.) |
412 | | // |
413 | | // concept RehasherTypesAndSettings: like TypesAndSettings but |
414 | | // does not require Key or HashFn. |
415 | | template <class RehasherTypesAndSettings> |
416 | | class StandardRehasherAdapter : public RehasherTypesAndSettings { |
417 | | public: |
418 | | using Hash = typename RehasherTypesAndSettings::Hash; |
419 | | using Key = Hash; |
420 | | using Seed = typename RehasherTypesAndSettings::Seed; |
421 | | |
422 | 0 | static Hash HashFn(const Hash& input, Seed raw_seed) { |
423 | | // Note: raw_seed is already lightly pre-mixed, and this multiplication |
424 | | // by a large prime is sufficient mixing (low-to-high bits) on top of |
425 | | // that for good FastRange results, which depends primarily on highest |
426 | | // bits. (The hashed CoeffRow and ResultRow are less sensitive to |
427 | | // mixing than Start.) |
428 | | // Also note: did consider adding ^ (input >> some) before the |
429 | | // multiplication, but doesn't appear to be necessary. |
430 | 0 | return (input ^ raw_seed) * kRehashFactor; |
431 | 0 | } |
432 | | |
433 | | private: |
434 | | static constexpr Hash kRehashFactor = |
435 | | static_cast<Hash>(0x6193d459236a3a0dULL); |
436 | | }; |
437 | | |
438 | | // See comment on StandardRehasherAdapter |
439 | | template <class RehasherTypesAndSettings> |
440 | | using StandardRehasher = |
441 | | StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>; |
442 | | |
443 | | #ifdef _MSC_VER |
444 | | #pragma warning(pop) |
445 | | #endif |
446 | | |
447 | | // Especially with smaller hashes (e.g. 32 bit), there can be noticeable |
448 | | // false positives due to collisions in the Hash returned by GetHash. |
449 | | // This function returns the expected FP rate due to those collisions, |
450 | | // which can be added to the expected FP rate from the underlying data |
451 | | // structure. (Note: technically, a + b is only a good approximation of |
452 | | // 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.) |
453 | | // The number of entries added can be a double here in case it's an |
454 | | // average. |
455 | | template <class Hasher, typename Numerical> |
456 | | double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) { |
457 | | // Standardize on the 'double' specialization |
458 | | return ExpectedCollisionFpRate(hasher, 1.0 * added); |
459 | | } |
460 | | template <class Hasher> |
461 | | double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) { |
462 | | // Technically, there could be overlap among the added, but ignoring that |
463 | | // is typically close enough. |
464 | | return added / std::pow(256.0, sizeof(typename Hasher::Hash)); |
465 | | } |
466 | | |
467 | | // StandardBanding: a canonical implementation of BandingStorage and |
468 | | // BacktrackStorage, with convenience API for banding (solving with on-the-fly |
469 | | // Gaussian elimination) with and without backtracking. |
470 | | template <class TypesAndSettings> |
471 | | class StandardBanding : public StandardHasher<TypesAndSettings> { |
472 | | public: |
473 | | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); |
474 | | |
475 | 0 | StandardBanding(Index num_slots = 0, Index backtrack_size = 0) { |
476 | 0 | Reset(num_slots, backtrack_size); |
477 | 0 | } |
478 | | |
479 | 0 | void Reset(Index num_slots, Index backtrack_size = 0) { |
480 | 0 | if (num_slots == 0) { |
481 | | // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized" |
482 | 0 | num_starts_ = 0; |
483 | 0 | } else { |
484 | | // Normal |
485 | 0 | assert(num_slots >= kCoeffBits); |
486 | 0 | if (num_slots > num_slots_allocated_) { |
487 | 0 | coeff_rows_.reset(new CoeffRow[num_slots]()); |
488 | 0 | if (!TypesAndSettings::kHomogeneous) { |
489 | | // Note: don't strictly have to zero-init result_rows, |
490 | | // except possible information leakage, etc ;) |
491 | 0 | result_rows_.reset(new ResultRow[num_slots]()); |
492 | 0 | } |
493 | 0 | num_slots_allocated_ = num_slots; |
494 | 0 | } else { |
495 | 0 | for (Index i = 0; i < num_slots; ++i) { |
496 | 0 | coeff_rows_[i] = 0; |
497 | 0 | if (!TypesAndSettings::kHomogeneous) { |
498 | | // Note: don't strictly have to zero-init result_rows, |
499 | | // except possible information leakage, etc ;) |
500 | 0 | result_rows_[i] = 0; |
501 | 0 | } |
502 | 0 | } |
503 | 0 | } |
504 | 0 | num_starts_ = num_slots - kCoeffBits + 1; |
505 | 0 | } |
506 | 0 | EnsureBacktrackSize(backtrack_size); |
507 | 0 | } |
508 | | |
509 | 0 | void EnsureBacktrackSize(Index backtrack_size) { |
510 | 0 | if (backtrack_size > backtrack_size_) { |
511 | 0 | backtrack_.reset(new Index[backtrack_size]); |
512 | 0 | backtrack_size_ = backtrack_size; |
513 | 0 | } |
514 | 0 | } |
515 | | |
516 | | // ******************************************************************** |
517 | | // From concept BandingStorage |
518 | | |
519 | 0 | inline bool UsePrefetch() const { |
520 | | // A rough guesstimate of when prefetching during construction pays off. |
521 | | // TODO: verify/validate |
522 | 0 | return num_starts_ > 1500; |
523 | 0 | } |
524 | 0 | inline void Prefetch(Index i) const { |
525 | 0 | PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */); |
526 | 0 | if (!TypesAndSettings::kHomogeneous) { |
527 | 0 | PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */); |
528 | 0 | } |
529 | 0 | } |
530 | | inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr, |
531 | 0 | bool for_back_subst) const { |
532 | 0 | *cr = coeff_rows_[i]; |
533 | 0 | if (TypesAndSettings::kHomogeneous) { |
534 | 0 | if (for_back_subst && *cr == 0) { |
535 | | // Cheap pseudorandom data to fill unconstrained solution rows |
536 | 0 | *rr = static_cast<ResultRow>(i * 0x9E3779B185EBCA87ULL); |
537 | 0 | } else { |
538 | 0 | *rr = 0; |
539 | 0 | } |
540 | 0 | } else { |
541 | 0 | *rr = result_rows_[i]; |
542 | 0 | } |
543 | 0 | } |
544 | 0 | inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) { |
545 | 0 | coeff_rows_[i] = cr; |
546 | 0 | if (TypesAndSettings::kHomogeneous) { |
547 | 0 | assert(rr == 0); |
548 | 0 | } else { |
549 | 0 | result_rows_[i] = rr; |
550 | 0 | } |
551 | 0 | } |
552 | 0 | inline Index GetNumStarts() const { return num_starts_; } |
553 | | |
554 | | // from concept BacktrackStorage, for when backtracking is used |
555 | | inline bool UseBacktrack() const { return true; } |
556 | | inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; } |
557 | | inline Index BacktrackGet(Index i) const { return backtrack_[i]; } |
558 | | |
559 | | // ******************************************************************** |
560 | | // Some useful API, still somewhat low level. Here an input is |
561 | | // a Key for filters, or std::pair<Key, ResultRow> for general PHSF. |
562 | | |
563 | | // Adds a range of inputs to the banding, returning true if successful. |
564 | | // False means none or some may have been successfully added, so it's |
565 | | // best to Reset this banding before any further use. |
566 | | // |
567 | | // Adding can fail even before all the "slots" are completely "full". |
568 | | // |
569 | | template <typename InputIterator> |
570 | 0 | bool AddRange(InputIterator begin, InputIterator end) { |
571 | 0 | assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts); |
572 | 0 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
573 | | // Unusual. Can't add any in this case. |
574 | 0 | return begin == end; |
575 | 0 | } |
576 | | // Normal |
577 | 0 | return BandingAddRange(this, *this, begin, end); |
578 | 0 | } |
579 | | |
580 | | // Adds a range of inputs to the banding, returning true if successful, |
581 | | // or if unsuccessful, rolls back to state before this call and returns |
582 | | // false. Caller guarantees that the number of inputs in this batch |
583 | | // does not exceed `backtrack_size` provided to Reset. |
584 | | // |
585 | | // Adding can fail even before all the "slots" are completely "full". |
586 | | // |
587 | | template <typename InputIterator> |
588 | | bool AddRangeOrRollBack(InputIterator begin, InputIterator end) { |
589 | | assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts); |
590 | | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
591 | | // Unusual. Can't add any in this case. |
592 | | return begin == end; |
593 | | } |
594 | | // else Normal |
595 | | return BandingAddRange(this, this, *this, begin, end); |
596 | | } |
597 | | |
598 | | // Adds a single input to the banding, returning true if successful. |
599 | | // If unsuccessful, returns false and banding state is unchanged. |
600 | | // |
601 | | // Adding can fail even before all the "slots" are completely "full". |
602 | | // |
603 | | bool Add(const AddInput& input) { |
604 | | // Pointer can act as iterator |
605 | | return AddRange(&input, &input + 1); |
606 | | } |
607 | | |
608 | | // Return the number of "occupied" rows (with non-zero coefficients stored). |
609 | | Index GetOccupiedCount() const { |
610 | | Index count = 0; |
611 | | if (num_starts_ > 0) { |
612 | | const Index num_slots = num_starts_ + kCoeffBits - 1; |
613 | | for (Index i = 0; i < num_slots; ++i) { |
614 | | if (coeff_rows_[i] != 0) { |
615 | | ++count; |
616 | | } |
617 | | } |
618 | | } |
619 | | return count; |
620 | | } |
621 | | |
622 | | // Returns whether a row is "occupied" in the banding (non-zero |
623 | | // coefficients stored). (Only recommended for debug/test) |
624 | | bool IsOccupied(Index i) { return coeff_rows_[i] != 0; } |
625 | | |
626 | | // ******************************************************************** |
627 | | // High-level API |
628 | | |
629 | | // Iteratively (a) resets the structure for `num_slots`, (b) attempts |
630 | | // to add the range of inputs, and (c) if unsuccessful, chooses next |
631 | | // hash seed, until either successful or unsuccessful with all the |
632 | | // allowed seeds. Returns true if successful. In that case, use |
633 | | // GetOrdinalSeed() or GetRawSeed() to get the successful seed. |
634 | | // |
635 | | // The allowed sequence of hash seeds is determined by |
636 | | // `starting_ordinal_seed,` the first ordinal seed to be attempted |
637 | | // (see StandardHasher), and `ordinal_seed_mask,` a bit mask (power of |
638 | | // two minus one) for the range of ordinal seeds to consider. The |
639 | | // max number of seeds considered will be ordinal_seed_mask + 1. |
640 | | // For filters we suggest `starting_ordinal_seed` be chosen randomly |
641 | | // or round-robin, to minimize false positive correlations between keys. |
642 | | // |
643 | | // If unsuccessful, how best to continue is going to be application |
644 | | // specific. It should be possible to choose parameters such that |
645 | | // failure is extremely unlikely, using max_seed around 32 to 64. |
646 | | // (TODO: APIs to help choose parameters) One option for fallback in |
647 | | // constructing a filter is to construct a Bloom filter instead. |
648 | | // Increasing num_slots is an option, but should not be used often |
649 | | // unless construction maximum latency is a concern (rather than |
650 | | // average running time of construction). Instead, choose parameters |
651 | | // appropriately and trust that seeds are independent. (Also, |
652 | | // increasing num_slots without changing hash seed would have a |
653 | | // significant correlation in success, rather than independence.) |
654 | | template <typename InputIterator> |
655 | | bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin, |
656 | | InputIterator end, |
657 | | Seed starting_ordinal_seed = 0U, |
658 | 0 | Seed ordinal_seed_mask = 63U) { |
659 | | // power of 2 minus 1 |
660 | 0 | assert((ordinal_seed_mask & (ordinal_seed_mask + 1)) == 0); |
661 | | // starting seed is within mask |
662 | 0 | assert((starting_ordinal_seed & ordinal_seed_mask) == |
663 | 0 | starting_ordinal_seed); |
664 | 0 | starting_ordinal_seed &= ordinal_seed_mask; // if not debug |
665 | |
|
666 | 0 | Seed cur_ordinal_seed = starting_ordinal_seed; |
667 | 0 | do { |
668 | 0 | StandardHasher<TypesAndSettings>::SetOrdinalSeed(cur_ordinal_seed); |
669 | 0 | Reset(num_slots); |
670 | 0 | bool success = AddRange(begin, end); |
671 | 0 | if (success) { |
672 | 0 | return true; |
673 | 0 | } |
674 | 0 | cur_ordinal_seed = (cur_ordinal_seed + 1) & ordinal_seed_mask; |
675 | 0 | } while (cur_ordinal_seed != starting_ordinal_seed); |
676 | | // Reached limit by circling around |
677 | 0 | return false; |
678 | 0 | } |
679 | | |
680 | 0 | static std::size_t EstimateMemoryUsage(uint32_t num_slots) { |
681 | 0 | std::size_t bytes_coeff_rows = num_slots * sizeof(CoeffRow); |
682 | 0 | std::size_t bytes_result_rows = num_slots * sizeof(ResultRow); |
683 | 0 | std::size_t bytes_backtrack = 0; |
684 | 0 | std::size_t bytes_banding = |
685 | 0 | bytes_coeff_rows + bytes_result_rows + bytes_backtrack; |
686 | |
|
687 | 0 | return bytes_banding; |
688 | 0 | } |
689 | | |
690 | | protected: |
691 | | // TODO: explore combining in a struct |
692 | | std::unique_ptr<CoeffRow[]> coeff_rows_; |
693 | | std::unique_ptr<ResultRow[]> result_rows_; |
694 | | // We generally store "starts" instead of slots for speed of GetStart(), |
695 | | // as in StandardHasher. |
696 | | Index num_starts_ = 0; |
697 | | Index num_slots_allocated_ = 0; |
698 | | std::unique_ptr<Index[]> backtrack_; |
699 | | Index backtrack_size_ = 0; |
700 | | }; |
701 | | |
702 | | // Implements concept SimpleSolutionStorage, mostly for demonstration |
703 | | // purposes. This is "in memory" only because it does not handle byte |
704 | | // ordering issues for serialization. |
705 | | template <class TypesAndSettings> |
706 | | class InMemSimpleSolution { |
707 | | public: |
708 | | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); |
709 | | |
710 | | void PrepareForNumStarts(Index num_starts) { |
711 | | if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) { |
712 | | // Unusual |
713 | | num_starts_ = 0; |
714 | | } else { |
715 | | // Normal |
716 | | const Index num_slots = num_starts + kCoeffBits - 1; |
717 | | assert(num_slots >= kCoeffBits); |
718 | | if (num_slots > num_slots_allocated_) { |
719 | | // Do not need to init the memory |
720 | | solution_rows_.reset(new ResultRow[num_slots]); |
721 | | num_slots_allocated_ = num_slots; |
722 | | } |
723 | | num_starts_ = num_starts; |
724 | | } |
725 | | } |
726 | | |
727 | | Index GetNumStarts() const { return num_starts_; } |
728 | | |
729 | | ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; } |
730 | | |
731 | | void Store(Index slot_num, ResultRow solution_row) { |
732 | | solution_rows_[slot_num] = solution_row; |
733 | | } |
734 | | |
735 | | // ******************************************************************** |
736 | | // High-level API |
737 | | |
738 | | template <typename BandingStorage> |
739 | | void BackSubstFrom(const BandingStorage& bs) { |
740 | | if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) { |
741 | | // Unusual |
742 | | PrepareForNumStarts(0); |
743 | | } else { |
744 | | // Normal |
745 | | SimpleBackSubst(this, bs); |
746 | | } |
747 | | } |
748 | | |
749 | | template <typename PhsfQueryHasher> |
750 | | ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const { |
751 | | // assert(!TypesAndSettings::kIsFilter); Can be useful in testing |
752 | | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
753 | | // Unusual |
754 | | return 0; |
755 | | } else { |
756 | | // Normal |
757 | | return SimplePhsfQuery(input, hasher, *this); |
758 | | } |
759 | | } |
760 | | |
761 | | template <typename FilterQueryHasher> |
762 | | bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const { |
763 | | assert(TypesAndSettings::kIsFilter); |
764 | | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
765 | | // Unusual. Zero starts presumes no keys added -> always false |
766 | | return false; |
767 | | } else { |
768 | | // Normal, or upper_num_columns_ == 0 means "no space for data" and |
769 | | // thus will always return true. |
770 | | return SimpleFilterQuery(input, hasher, *this); |
771 | | } |
772 | | } |
773 | | |
774 | | double ExpectedFpRate() const { |
775 | | assert(TypesAndSettings::kIsFilter); |
776 | | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
777 | | // Unusual, but we don't have FPs if we always return false. |
778 | | return 0.0; |
779 | | } |
780 | | // else Normal |
781 | | |
782 | | // Each result (solution) bit (column) cuts FP rate in half |
783 | | return std::pow(0.5, 8U * sizeof(ResultRow)); |
784 | | } |
785 | | |
786 | | // ******************************************************************** |
787 | | // Static high-level API |
788 | | |
789 | | // Round up to a number of slots supported by this structure. Note that |
790 | | // this needs to be must be taken into account for the banding if this |
791 | | // solution layout/storage is to be used. |
792 | | static Index RoundUpNumSlots(Index num_slots) { |
793 | | // Must be at least kCoeffBits for at least one start |
794 | | // Or if not smash, even more because hashing not equipped |
795 | | // for stacking up so many entries on a single start location |
796 | | auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2); |
797 | | return std::max(num_slots, static_cast<Index>(min_slots)); |
798 | | } |
799 | | |
800 | | protected: |
801 | | // We generally store "starts" instead of slots for speed of GetStart(), |
802 | | // as in StandardHasher. |
803 | | Index num_starts_ = 0; |
804 | | Index num_slots_allocated_ = 0; |
805 | | std::unique_ptr<ResultRow[]> solution_rows_; |
806 | | }; |
807 | | |
808 | | // Implements concept InterleavedSolutionStorage always using little-endian |
809 | | // byte order, so easy for serialization/deserialization. This implementation |
810 | | // fully supports fractional bits per key, where any number of segments |
811 | | // (number of bytes multiple of sizeof(CoeffRow)) can be used with any number |
812 | | // of slots that is a multiple of kCoeffBits. |
813 | | // |
814 | | // The structure is passed an externally allocated/de-allocated byte buffer |
815 | | // that is optionally pre-populated (from storage) for answering queries, |
816 | | // or can be populated by BackSubstFrom. |
817 | | // |
818 | | template <class TypesAndSettings> |
819 | | class SerializableInterleavedSolution { |
820 | | public: |
821 | | IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); |
822 | | |
823 | | // Does not take ownership of `data` but uses it (up to `data_len` bytes) |
824 | | // throughout lifetime |
825 | | SerializableInterleavedSolution(char* data, size_t data_len) |
826 | 0 | : data_(data), data_len_(data_len) {} |
827 | | |
828 | 0 | void PrepareForNumStarts(Index num_starts) { |
829 | 0 | assert(num_starts == 0 || (num_starts % kCoeffBits == 1)); |
830 | 0 | num_starts_ = num_starts; |
831 | |
|
832 | 0 | InternalConfigure(); |
833 | 0 | } |
834 | | |
835 | 0 | Index GetNumStarts() const { return num_starts_; } |
836 | | |
837 | 0 | Index GetNumBlocks() const { |
838 | 0 | const Index num_slots = num_starts_ + kCoeffBits - 1; |
839 | 0 | return num_slots / kCoeffBits; |
840 | 0 | } |
841 | | |
842 | 0 | Index GetUpperNumColumns() const { return upper_num_columns_; } |
843 | | |
844 | 0 | Index GetUpperStartBlock() const { return upper_start_block_; } |
845 | | |
846 | 0 | Index GetNumSegments() const { |
847 | 0 | return static_cast<Index>(data_len_ / sizeof(CoeffRow)); |
848 | 0 | } |
849 | | |
850 | 0 | CoeffRow LoadSegment(Index segment_num) const { |
851 | 0 | assert(data_ != nullptr); // suppress clang analyzer report |
852 | 0 | return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow)); |
853 | 0 | } |
854 | 0 | void StoreSegment(Index segment_num, CoeffRow val) { |
855 | 0 | assert(data_ != nullptr); // suppress clang analyzer report |
856 | 0 | EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val); |
857 | 0 | } |
858 | | void PrefetchSegmentRange(Index begin_segment_num, |
859 | 0 | Index end_segment_num) const { |
860 | 0 | if (end_segment_num == begin_segment_num) { |
861 | | // Nothing to do |
862 | 0 | return; |
863 | 0 | } |
864 | 0 | char* cur = data_ + begin_segment_num * sizeof(CoeffRow); |
865 | 0 | char* last = data_ + (end_segment_num - 1) * sizeof(CoeffRow); |
866 | 0 | while (cur < last) { |
867 | 0 | PREFETCH(cur, 0 /* rw */, 1 /* locality */); |
868 | 0 | cur += CACHE_LINE_SIZE; |
869 | 0 | } |
870 | 0 | PREFETCH(last, 0 /* rw */, 1 /* locality */); |
871 | 0 | } |
872 | | |
873 | | // ******************************************************************** |
874 | | // High-level API |
875 | | |
876 | 0 | void ConfigureForNumBlocks(Index num_blocks) { |
877 | 0 | if (num_blocks == 0) { |
878 | 0 | PrepareForNumStarts(0); |
879 | 0 | } else { |
880 | 0 | PrepareForNumStarts(num_blocks * kCoeffBits - kCoeffBits + 1); |
881 | 0 | } |
882 | 0 | } |
883 | | |
884 | 0 | void ConfigureForNumSlots(Index num_slots) { |
885 | 0 | assert(num_slots % kCoeffBits == 0); |
886 | 0 | ConfigureForNumBlocks(num_slots / kCoeffBits); |
887 | 0 | } |
888 | | |
889 | | template <typename BandingStorage> |
890 | 0 | void BackSubstFrom(const BandingStorage& bs) { |
891 | 0 | if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) { |
892 | | // Unusual |
893 | 0 | PrepareForNumStarts(0); |
894 | 0 | } else { |
895 | | // Normal |
896 | 0 | InterleavedBackSubst(this, bs); |
897 | 0 | } |
898 | 0 | } |
899 | | |
900 | | template <typename PhsfQueryHasher> |
901 | | ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const { |
902 | | // assert(!TypesAndSettings::kIsFilter); Can be useful in testing |
903 | | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
904 | | // Unusual |
905 | | return 0; |
906 | | } else { |
907 | | // Normal |
908 | | // NOTE: not using a struct to encourage compiler optimization |
909 | | Hash hash; |
910 | | Index segment_num; |
911 | | Index num_columns; |
912 | | Index start_bit; |
913 | | InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num, |
914 | | &num_columns, &start_bit); |
915 | | return InterleavedPhsfQuery(hash, segment_num, num_columns, start_bit, |
916 | | hasher, *this); |
917 | | } |
918 | | } |
919 | | |
920 | | template <typename FilterQueryHasher> |
921 | 0 | bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const { |
922 | 0 | assert(TypesAndSettings::kIsFilter); |
923 | 0 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
924 | | // Unusual. Zero starts presumes no keys added -> always false |
925 | 0 | return false; |
926 | 0 | } else { |
927 | | // Normal, or upper_num_columns_ == 0 means "no space for data" and |
928 | | // thus will always return true. |
929 | | // NOTE: not using a struct to encourage compiler optimization |
930 | 0 | Hash hash; |
931 | 0 | Index segment_num; |
932 | 0 | Index num_columns; |
933 | 0 | Index start_bit; |
934 | 0 | InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num, |
935 | 0 | &num_columns, &start_bit); |
936 | 0 | return InterleavedFilterQuery(hash, segment_num, num_columns, start_bit, |
937 | 0 | hasher, *this); |
938 | 0 | } |
939 | 0 | } |
940 | | |
941 | 0 | double ExpectedFpRate() const { |
942 | 0 | assert(TypesAndSettings::kIsFilter); |
943 | 0 | if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { |
944 | | // Unusual. Zero starts presumes no keys added -> always false |
945 | 0 | return 0.0; |
946 | 0 | } |
947 | | // else Normal |
948 | | |
949 | | // Note: Ignoring smash setting; still close enough in that case |
950 | 0 | double lower_portion = |
951 | 0 | (upper_start_block_ * 1.0 * kCoeffBits) / num_starts_; |
952 | | |
953 | | // Each result (solution) bit (column) cuts FP rate in half. Weight that |
954 | | // for upper and lower number of bits (columns). |
955 | 0 | return lower_portion * std::pow(0.5, upper_num_columns_ - 1) + |
956 | 0 | (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_); |
957 | 0 | } |
958 | | |
959 | | // ******************************************************************** |
960 | | // Static high-level API |
961 | | |
962 | | // Round up to a number of slots supported by this structure. Note that |
963 | | // this needs to be must be taken into account for the banding if this |
964 | | // solution layout/storage is to be used. |
965 | 0 | static Index RoundUpNumSlots(Index num_slots) { |
966 | | // Must be multiple of kCoeffBits |
967 | 0 | Index corrected = (num_slots + kCoeffBits - 1) / kCoeffBits * kCoeffBits; |
968 | | |
969 | | // Do not use num_starts==1 unless kUseSmash, because the hashing |
970 | | // might not be equipped for stacking up so many entries on a |
971 | | // single start location. |
972 | 0 | if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) { |
973 | 0 | corrected += kCoeffBits; |
974 | 0 | } |
975 | 0 | return corrected; |
976 | 0 | } |
977 | | |
978 | | // Round down to a number of slots supported by this structure. Note that |
979 | | // this needs to be must be taken into account for the banding if this |
980 | | // solution layout/storage is to be used. |
981 | 0 | static Index RoundDownNumSlots(Index num_slots) { |
982 | | // Must be multiple of kCoeffBits |
983 | 0 | Index corrected = num_slots / kCoeffBits * kCoeffBits; |
984 | | |
985 | | // Do not use num_starts==1 unless kUseSmash, because the hashing |
986 | | // might not be equipped for stacking up so many entries on a |
987 | | // single start location. |
988 | 0 | if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) { |
989 | 0 | corrected = 0; |
990 | 0 | } |
991 | 0 | return corrected; |
992 | 0 | } |
993 | | |
994 | | // Compute the number of bytes for a given number of slots and desired |
995 | | // FP rate. Since desired FP rate might not be exactly achievable, |
996 | | // rounding_bias32==0 means to always round toward lower FP rate |
997 | | // than desired (more bytes); rounding_bias32==max uint32_t means always |
998 | | // round toward higher FP rate than desired (fewer bytes); other values |
999 | | // act as a proportional threshold or bias between the two. |
1000 | | static size_t GetBytesForFpRate(Index num_slots, double desired_fp_rate, |
1001 | | uint32_t rounding_bias32) { |
1002 | | return InternalGetBytesForFpRate(num_slots, desired_fp_rate, |
1003 | | 1.0 / desired_fp_rate, rounding_bias32); |
1004 | | } |
1005 | | |
1006 | | // The same, but specifying desired accuracy as 1.0 / FP rate, or |
1007 | | // one_in_fp_rate. E.g. desired_one_in_fp_rate=100 means 1% FP rate. |
1008 | | static size_t GetBytesForOneInFpRate(Index num_slots, |
1009 | | double desired_one_in_fp_rate, |
1010 | 0 | uint32_t rounding_bias32) { |
1011 | 0 | return InternalGetBytesForFpRate(num_slots, 1.0 / desired_one_in_fp_rate, |
1012 | 0 | desired_one_in_fp_rate, rounding_bias32); |
1013 | 0 | } |
1014 | | |
1015 | | protected: |
1016 | | static size_t InternalGetBytesForFpRate(Index num_slots, |
1017 | | double desired_fp_rate, |
1018 | | double desired_one_in_fp_rate, |
1019 | 0 | uint32_t rounding_bias32) { |
1020 | 0 | assert(TypesAndSettings::kIsFilter); |
1021 | 0 | if (TypesAndSettings::kAllowZeroStarts) { |
1022 | 0 | if (num_slots == 0) { |
1023 | | // Unusual. Zero starts presumes no keys added -> always false (no FPs) |
1024 | 0 | return 0U; |
1025 | 0 | } |
1026 | 0 | } else { |
1027 | 0 | assert(num_slots > 0); |
1028 | 0 | } |
1029 | | // Must be rounded up already. |
1030 | 0 | assert(RoundUpNumSlots(num_slots) == num_slots); |
1031 | |
|
1032 | 0 | if (desired_one_in_fp_rate > 1.0 && desired_fp_rate < 1.0) { |
1033 | | // Typical: less than 100% FP rate |
1034 | 0 | if (desired_one_in_fp_rate <= static_cast<ResultRow>(-1)) { |
1035 | | // Typical: Less than maximum result row entropy |
1036 | 0 | ResultRow rounded = static_cast<ResultRow>(desired_one_in_fp_rate); |
1037 | 0 | int lower_columns = FloorLog2(rounded); |
1038 | 0 | double lower_columns_fp_rate = std::pow(2.0, -lower_columns); |
1039 | 0 | double upper_columns_fp_rate = std::pow(2.0, -(lower_columns + 1)); |
1040 | | // Floating point don't let me down! |
1041 | 0 | assert(lower_columns_fp_rate >= desired_fp_rate); |
1042 | 0 | assert(upper_columns_fp_rate <= desired_fp_rate); |
1043 | |
|
1044 | 0 | double lower_portion = (desired_fp_rate - upper_columns_fp_rate) / |
1045 | 0 | (lower_columns_fp_rate - upper_columns_fp_rate); |
1046 | | // Floating point don't let me down! |
1047 | 0 | assert(lower_portion >= 0.0); |
1048 | 0 | assert(lower_portion <= 1.0); |
1049 | |
|
1050 | 0 | double rounding_bias = (rounding_bias32 + 0.5) / double{0x100000000}; |
1051 | 0 | assert(rounding_bias > 0.0); |
1052 | 0 | assert(rounding_bias < 1.0); |
1053 | | |
1054 | | // Note: Ignoring smash setting; still close enough in that case |
1055 | 0 | Index num_starts = num_slots - kCoeffBits + 1; |
1056 | | // Lower upper_start_block means lower FP rate (higher accuracy) |
1057 | 0 | Index upper_start_block = static_cast<Index>( |
1058 | 0 | (lower_portion * num_starts + rounding_bias) / kCoeffBits); |
1059 | 0 | Index num_blocks = num_slots / kCoeffBits; |
1060 | 0 | assert(upper_start_block < num_blocks); |
1061 | | |
1062 | | // Start by assuming all blocks use lower number of columns |
1063 | 0 | Index num_segments = num_blocks * static_cast<Index>(lower_columns); |
1064 | | // Correct by 1 each for blocks using upper number of columns |
1065 | 0 | num_segments += (num_blocks - upper_start_block); |
1066 | | // Total bytes |
1067 | 0 | return num_segments * sizeof(CoeffRow); |
1068 | 0 | } else { |
1069 | | // one_in_fp_rate too big, thus requested FP rate is smaller than |
1070 | | // supported. Use max number of columns for minimum supported FP rate. |
1071 | 0 | return num_slots * sizeof(ResultRow); |
1072 | 0 | } |
1073 | 0 | } else { |
1074 | | // Effectively asking for 100% FP rate, or NaN etc. |
1075 | 0 | if (TypesAndSettings::kAllowZeroStarts) { |
1076 | | // Zero segments |
1077 | 0 | return 0U; |
1078 | 0 | } else { |
1079 | | // One segment (minimum size, maximizing FP rate) |
1080 | 0 | return sizeof(CoeffRow); |
1081 | 0 | } |
1082 | 0 | } |
1083 | 0 | } |
1084 | | |
1085 | 0 | void InternalConfigure() { |
1086 | 0 | const Index num_blocks = GetNumBlocks(); |
1087 | 0 | Index num_segments = GetNumSegments(); |
1088 | |
|
1089 | 0 | if (num_blocks == 0) { |
1090 | | // Exceptional |
1091 | 0 | upper_num_columns_ = 0; |
1092 | 0 | upper_start_block_ = 0; |
1093 | 0 | } else { |
1094 | | // Normal |
1095 | 0 | upper_num_columns_ = |
1096 | 0 | (num_segments + /*round up*/ num_blocks - 1) / num_blocks; |
1097 | 0 | upper_start_block_ = upper_num_columns_ * num_blocks - num_segments; |
1098 | | // Unless that's more columns than supported by ResultRow data type |
1099 | 0 | if (upper_num_columns_ > 8U * sizeof(ResultRow)) { |
1100 | | // Use maximum columns (there will be space unused) |
1101 | 0 | upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow)); |
1102 | 0 | upper_start_block_ = 0; |
1103 | 0 | num_segments = num_blocks * upper_num_columns_; |
1104 | 0 | } |
1105 | 0 | } |
1106 | | // Update data_len_ for correct rounding and/or unused space |
1107 | | // NOTE: unused space stays gone if we PrepareForNumStarts again. |
1108 | | // We are prioritizing minimizing the number of fields over making |
1109 | | // the "unusued space" feature work well. |
1110 | 0 | data_len_ = num_segments * sizeof(CoeffRow); |
1111 | 0 | } |
1112 | | |
1113 | | char* const data_; |
1114 | | size_t data_len_; |
1115 | | Index num_starts_ = 0; |
1116 | | Index upper_num_columns_ = 0; |
1117 | | Index upper_start_block_ = 0; |
1118 | | }; |
1119 | | |
1120 | | } // namespace ribbon |
1121 | | |
1122 | | } // namespace ROCKSDB_NAMESPACE |
1123 | | |
1124 | | // For convenience working with templates |
1125 | | #define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings) \ |
1126 | | using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \ |
1127 | | using Banding = \ |
1128 | | ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>; \ |
1129 | | using SimpleSoln = \ |
1130 | | ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>; \ |
1131 | | using InterleavedSoln = \ |
1132 | | ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution< \ |
1133 | | TypesAndSettings>; \ |
1134 | | static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) + \ |
1135 | | sizeof(InterleavedSoln) > \ |
1136 | | 0, \ |
1137 | | "avoid unused warnings, semicolon expected after macro call") |