Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package testkeys provides facilities for generating and comparing
6 : // human-readable test keys for use in tests and benchmarks. This package
7 : // provides a single Comparer implementation that compares all keys generated
8 : // by this package.
9 : //
10 : // Keys generated by this package may optionally have a 'suffix' encoding an
11 : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
12 : // suffix are performed using integer value, not the byte representation.
13 : package testkeys
14 :
15 : import (
16 : "bytes"
17 : "cmp"
18 : "fmt"
19 : "math"
20 : "strconv"
21 : "strings"
22 :
23 : "github.com/cockroachdb/pebble/internal/base"
24 : "golang.org/x/exp/rand"
25 : )
26 :
27 : const alpha = "abcdefghijklmnopqrstuvwxyz"
28 :
29 : const suffixDelim = '@'
30 :
31 : var inverseAlphabet = make(map[byte]int64, len(alpha))
32 :
33 1 : func init() {
34 1 : for i := range alpha {
35 1 : inverseAlphabet[alpha[i]] = int64(i)
36 1 : }
37 : }
38 :
39 : // MaxSuffixLen is the maximum length of a suffix generated by this package.
40 : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
41 :
42 : // Comparer is the comparer for test keys generated by this package.
43 : var Comparer = &base.Comparer{
44 : Compare: compare,
45 1 : Equal: func(a, b []byte) bool { return compare(a, b) == 0 },
46 1 : AbbreviatedKey: func(k []byte) uint64 {
47 1 : return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
48 1 : },
49 : FormatKey: base.DefaultFormatter,
50 1 : Separator: func(dst, a, b []byte) []byte {
51 1 : ai := split(a)
52 1 : if ai == len(a) {
53 1 : return append(dst, a...)
54 1 : }
55 1 : bi := split(b)
56 1 : if bi == len(b) {
57 1 : return append(dst, a...)
58 1 : }
59 :
60 : // If the keys are the same just return a.
61 1 : if bytes.Equal(a[:ai], b[:bi]) {
62 1 : return append(dst, a...)
63 1 : }
64 1 : n := len(dst)
65 1 : dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
66 1 : // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
67 1 : buf := dst[n:]
68 1 : if bytes.Equal(a[:ai], buf) {
69 1 : return append(dst[:n], a...)
70 1 : }
71 : // The separator is > a[:ai], so return it
72 1 : return dst
73 : },
74 1 : Successor: func(dst, a []byte) []byte {
75 1 : ai := split(a)
76 1 : if ai == len(a) {
77 1 : return append(dst, a...)
78 1 : }
79 1 : n := len(dst)
80 1 : dst = base.DefaultComparer.Successor(dst, a[:ai])
81 1 : // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
82 1 : buf := dst[n:]
83 1 : if bytes.Equal(a[:ai], buf) {
84 0 : return append(dst[:n], a...)
85 0 : }
86 : // The successor is > a[:ai], so return it.
87 1 : return dst
88 : },
89 1 : ImmediateSuccessor: func(dst, a []byte) []byte {
90 1 : // TODO(jackson): Consider changing this Comparer to only support
91 1 : // representable prefix keys containing characters a-z.
92 1 : ai := split(a)
93 1 : if ai != len(a) {
94 0 : panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
95 : }
96 1 : return append(append(dst, a...), 0x00)
97 : },
98 : Split: split,
99 : Name: "pebble.internal.testkeys",
100 : }
101 :
102 1 : func compare(a, b []byte) int {
103 1 : ai, bi := split(a), split(b)
104 1 : if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
105 1 : return v
106 1 : }
107 :
108 1 : if len(a[ai:]) == 0 {
109 1 : if len(b[bi:]) == 0 {
110 1 : return 0
111 1 : }
112 1 : return -1
113 1 : } else if len(b[bi:]) == 0 {
114 1 : return +1
115 1 : }
116 1 : return compareTimestamps(a[ai:], b[bi:])
117 : }
118 :
119 1 : func split(a []byte) int {
120 1 : i := bytes.LastIndexByte(a, suffixDelim)
121 1 : if i >= 0 {
122 1 : return i
123 1 : }
124 1 : return len(a)
125 : }
126 :
127 1 : func compareTimestamps(a, b []byte) int {
128 1 : ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64)
129 1 : if err != nil {
130 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
131 : }
132 1 : bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64)
133 1 : if err != nil {
134 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
135 : }
136 1 : return cmp.Compare(bi, ai)
137 : }
138 :
139 : // Keyspace describes a finite keyspace of unsuffixed test keys.
140 : type Keyspace interface {
141 : // Count returns the number of keys that exist within this keyspace.
142 : Count() int64
143 :
144 : // MaxLen returns the maximum length, in bytes, of a key within this
145 : // keyspace. This is only guaranteed to return an upper bound.
146 : MaxLen() int
147 :
148 : // Slice returns the sub-keyspace from index i, inclusive, to index j,
149 : // exclusive. The receiver is unmodified.
150 : Slice(i, j int64) Keyspace
151 :
152 : // EveryN returns a key space that includes 1 key for every N keys in the
153 : // original keyspace. The receiver is unmodified.
154 : EveryN(n int64) Keyspace
155 :
156 : // key writes the i-th key to the buffer and returns the length.
157 : key(buf []byte, i int64) int
158 : }
159 :
160 : // Divvy divides the provided keyspace into N equal portions, containing
161 : // disjoint keys evenly distributed across the keyspace.
162 1 : func Divvy(ks Keyspace, n int64) []Keyspace {
163 1 : ret := make([]Keyspace, n)
164 1 : for i := int64(0); i < n; i++ {
165 1 : ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
166 1 : }
167 1 : return ret
168 : }
169 :
170 : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
171 : // with at most `maxLength` characters.
172 1 : func Alpha(maxLength int) Keyspace {
173 1 : return alphabet{
174 1 : alphabet: []byte(alpha),
175 1 : maxLength: maxLength,
176 1 : increment: 1,
177 1 : }
178 1 : }
179 :
180 : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
181 : // timestamp t.
182 1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
183 1 : b := make([]byte, k.MaxLen()+MaxSuffixLen)
184 1 : return b[:WriteKeyAt(b, k, i, t)]
185 1 : }
186 :
187 : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
188 : // suffix encoding the timestamp t suffix. It returns the number of bytes
189 : // written.
190 1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
191 1 : n := WriteKey(dst, k, i)
192 1 : n += WriteSuffix(dst[n:], t)
193 1 : return n
194 1 : }
195 :
196 : // Suffix returns the test keys suffix representation of timestamp t.
197 1 : func Suffix(t int64) []byte {
198 1 : b := make([]byte, MaxSuffixLen)
199 1 : return b[:WriteSuffix(b, t)]
200 1 : }
201 :
202 : // SuffixLen returns the exact length of the given suffix when encoded.
203 1 : func SuffixLen(t int64) int {
204 1 : // Begin at 1 for the '@' delimiter, 1 for a single digit.
205 1 : n := 2
206 1 : t /= 10
207 1 : for t > 0 {
208 1 : t /= 10
209 1 : n++
210 1 : }
211 1 : return n
212 : }
213 :
214 : // ParseSuffix returns the integer representation of the encoded suffix.
215 1 : func ParseSuffix(s []byte) (int64, error) {
216 1 : return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
217 1 : }
218 :
219 : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
220 : // returning the number of bytes written.
221 1 : func WriteSuffix(dst []byte, t int64) int {
222 1 : dst[0] = suffixDelim
223 1 : n := 1
224 1 : n += len(strconv.AppendInt(dst[n:n], t, 10))
225 1 : return n
226 1 : }
227 :
228 : // Key returns the i-th unsuffixed key within the keyspace.
229 1 : func Key(k Keyspace, i int64) []byte {
230 1 : b := make([]byte, k.MaxLen())
231 1 : return b[:k.key(b, i)]
232 1 : }
233 :
234 : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
235 : // returns the number of bytes written.
236 1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
237 1 : return k.key(dst, i)
238 1 : }
239 :
240 : type alphabet struct {
241 : alphabet []byte
242 : maxLength int
243 : headSkip int64
244 : tailSkip int64
245 : increment int64
246 : }
247 :
248 1 : func (a alphabet) Count() int64 {
249 1 : // Calculate the total number of keys, ignoring the increment.
250 1 : total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
251 1 :
252 1 : // The increment dictates that we take every N keys, where N = a.increment.
253 1 : // Consider a total containing the 5 keys:
254 1 : // a b c d e
255 1 : // ^ ^ ^
256 1 : // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
257 1 : // dividing by the increment, there may be remainder. If there is, there's
258 1 : // one additional key in the alphabet.
259 1 : count := total / a.increment
260 1 : if total%a.increment > 0 {
261 1 : count++
262 1 : }
263 1 : return count
264 : }
265 :
266 1 : func (a alphabet) MaxLen() int {
267 1 : return a.maxLength
268 1 : }
269 :
270 1 : func (a alphabet) Slice(i, j int64) Keyspace {
271 1 : s := a
272 1 : s.headSkip += i
273 1 : s.tailSkip += a.Count() - j
274 1 : return s
275 1 : }
276 :
277 1 : func (a alphabet) EveryN(n int64) Keyspace {
278 1 : s := a
279 1 : s.increment *= n
280 1 : return s
281 1 : }
282 :
283 1 : func keyCount(n, l int) int64 {
284 1 : if n == 0 {
285 0 : return 0
286 1 : } else if n == 1 {
287 0 : return int64(l)
288 0 : }
289 : // The number of representable keys in the keyspace is a function of the
290 : // length of the alphabet n and the max key length l. Consider how the
291 : // number of representable keys grows as l increases:
292 : //
293 : // l = 1: n
294 : // l = 2: n + n^2
295 : // l = 3: n + n^2 + n^3
296 : // ...
297 : // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1)
298 1 : return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1)
299 : }
300 :
301 1 : func (a alphabet) key(buf []byte, idx int64) int {
302 1 : // This function generates keys of length 1..maxKeyLength, pulling
303 1 : // characters from the alphabet. The idx determines which key to generate,
304 1 : // generating the i-th lexicographically next key.
305 1 : //
306 1 : // The index to use is advanced by `headSkip`, allowing a keyspace to encode
307 1 : // a subregion of the keyspace.
308 1 : //
309 1 : // Eg, alphabet = `ab`, maxKeyLength = 3:
310 1 : //
311 1 : // aaa aab aba abb baa bab bba bbb
312 1 : // aa ab ba bb
313 1 : // a b
314 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
315 1 : //
316 1 : return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
317 1 : keyCount(len(a.alphabet), a.maxLength))
318 1 : }
319 :
320 1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
321 1 : if keyCount == 0 || i > keyCount || i < 0 {
322 1 : return 0
323 1 : }
324 :
325 : // Of the keyCount keys in the generative keyspace, how many are there
326 : // starting with a particular character?
327 1 : keysPerCharacter := keyCount / int64(len(alphabet))
328 1 :
329 1 : // Find the character that the key at index i starts with and set it.
330 1 : characterIdx := i / keysPerCharacter
331 1 : buf[0] = alphabet[characterIdx]
332 1 :
333 1 : // Consider characterIdx = 0, pointing to 'a'.
334 1 : //
335 1 : // aaa aab aba abb baa bab bba bbb
336 1 : // aa ab ba bb
337 1 : // a b
338 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
339 1 : // \_________________________/
340 1 : // |keysPerCharacter| keys
341 1 : //
342 1 : // In our recursive call, we reduce the problem to:
343 1 : //
344 1 : // aaa aab aba abb
345 1 : // aa ab
346 1 : // 0 1 2 3 4 5
347 1 : // \________________________/
348 1 : // |keysPerCharacter-1| keys
349 1 : //
350 1 : // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
351 1 : // just 'a' key, plus any keys beginning with any other character).
352 1 : //
353 1 : // The index i is also offset, reduced by the count of keys beginning with
354 1 : // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
355 1 : // the key consisting of just the 'a' (-1).
356 1 : i = i - keysPerCharacter*characterIdx - 1
357 1 : return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
358 : }
359 :
360 : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
361 : // returning the index of a particular key, given the provided alphabet and max
362 : // length of a key.
363 : //
364 : // len(key) must be ≥ 1.
365 1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
366 1 : i, ok := alphabet[key[0]]
367 1 : if !ok {
368 0 : panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
369 : }
370 : // How many keys exist that start with the preceding i characters? Each of
371 : // the i characters themselves are a key, plus the count of all the keys
372 : // with one less character for each.
373 1 : ret := i + i*keyCount(len(alphabet), n-1)
374 1 : if len(key) > 1 {
375 1 : ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
376 1 : }
377 1 : return ret
378 : }
379 :
380 1 : func abs(a int64) int64 {
381 1 : if a < 0 {
382 1 : return -a
383 1 : }
384 0 : return a
385 : }
386 :
387 : // RandomSeparator returns a random alphabetic key k such that a < k < b,
388 : // pulling randomness from the provided random number generator. If dst is
389 : // provided and the generated key fits within dst's capacity, the returned slice
390 : // will use dst's memory.
391 : //
392 : // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key
393 : // will consist of the prefix P appended with the provided suffix. A zero suffix
394 : // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will
395 : // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b,
396 : // but the generated key will not use the provided suffix. Note that it's
397 : // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case
398 : // RandomSeparator returns nil.
399 : //
400 : // If RandomSeparator generates a new prefix, the generated prefix will have
401 : // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))).
402 : //
403 : // RandomSeparator panics if a or b fails to decode.
404 1 : func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte {
405 1 : if Comparer.Compare(a, b) >= 0 {
406 0 : return nil
407 0 : }
408 :
409 : // Determine both keys' logical prefixes and suffixes.
410 1 : ai := Comparer.Split(a)
411 1 : bi := Comparer.Split(b)
412 1 : ap := a[:ai]
413 1 : bp := b[:bi]
414 1 : maxLength = max(maxLength, len(ap), len(bp))
415 1 : var as, bs int64
416 1 : var err error
417 1 : if ai != len(a) {
418 1 : as, err = ParseSuffix(a[ai:])
419 1 : if err != nil {
420 0 : panic(fmt.Sprintf("failed to parse suffix of %q", a))
421 : }
422 : }
423 1 : if bi != len(b) {
424 1 : bs, err = ParseSuffix(b[bi:])
425 1 : if err != nil {
426 0 : panic(fmt.Sprintf("failed to parse suffix of %q", b))
427 : }
428 : }
429 :
430 1 : apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength)
431 1 : bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength)
432 1 : diff := bpIdx - apIdx
433 1 : generatedIdx := bpIdx
434 1 : if diff > 0 {
435 1 : var add int64 = diff + 1
436 1 : var start int64 = apIdx
437 1 : if as == 1 {
438 1 : // There's no expressible key with prefix a greater than a@1. So,
439 1 : // exclude ap.
440 1 : start = apIdx + 1
441 1 : add = diff
442 1 : }
443 1 : if bs == 0 {
444 1 : // No key with prefix b can sort before b@0. We don't want to pick b.
445 1 : add--
446 1 : }
447 : // We're allowing generated id to be in the range [start, start + add - 1].
448 1 : if start > start+add-1 {
449 0 : return nil
450 0 : }
451 : // If we can generate a key which is actually in the middle of apIdx
452 : // and bpIdx use it so that we don't have to bother about timestamps.
453 1 : generatedIdx = rng.Int63n(add) + start
454 1 : for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx {
455 1 : generatedIdx = rng.Int63n(add) + start
456 1 : }
457 : }
458 :
459 1 : switch {
460 1 : case generatedIdx == apIdx && generatedIdx == bpIdx:
461 1 : if abs(bs-as) <= 1 {
462 1 : // There's no expressible suffix between the two, and there's no
463 1 : // possible separator key.
464 1 : return nil
465 1 : }
466 : // The key b is >= key a, but has the same prefix, so b must have the
467 : // smaller timestamp, unless a has timestamp of 0.
468 : //
469 : // NB: The zero suffix (suffix-less) sorts before all other suffixes, so
470 : // any suffix we generate will be greater than it.
471 0 : if as == 0 {
472 0 : // bs > as
473 0 : suffix = bs + rng.Int63n(10) + 1
474 0 : } else {
475 0 : // bs < as.
476 0 : // Generate suffix in range [bs + 1, as - 1]
477 0 : suffix = bs + 1 + rng.Int63n(as-bs-1)
478 0 : }
479 1 : case generatedIdx == apIdx:
480 1 : // NB: The zero suffix (suffix-less) sorts before all other suffixes, so
481 1 : // any suffix we generate will be greater than it.
482 1 : if as == 0 && suffix == 0 {
483 0 : suffix++
484 1 : } else if as != 0 && suffix >= as {
485 0 : suffix = rng.Int63n(as)
486 0 : }
487 0 : case generatedIdx == bpIdx:
488 0 : if suffix <= bs {
489 0 : suffix = bs + rng.Int63n(10) + 1
490 0 : }
491 : }
492 1 : if sz := maxLength + SuffixLen(suffix); cap(dst) < sz {
493 1 : dst = make([]byte, sz)
494 1 : } else {
495 0 : dst = dst[:cap(dst)]
496 0 : }
497 1 : var w int
498 1 : if suffix == 0 {
499 1 : w = WriteKey(dst, Alpha(maxLength), generatedIdx)
500 1 : } else {
501 1 : w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix)
502 1 : }
503 1 : return dst[:w]
504 : }
|