Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package testkeys provides facilities for generating and comparing
6 : // human-readable test keys for use in tests and benchmarks. This package
7 : // provides a single Comparer implementation that compares all keys generated
8 : // by this package.
9 : //
10 : // Keys generated by this package may optionally have a 'suffix' encoding an
11 : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
12 : // suffix are performed using integer value, not the byte representation.
13 : package testkeys
14 :
15 : import (
16 : "bytes"
17 : "cmp"
18 : "fmt"
19 : "math"
20 : "math/rand/v2"
21 : "regexp"
22 : "strconv"
23 : "strings"
24 :
25 : "github.com/cockroachdb/pebble/internal/base"
26 : )
27 :
28 : const alpha = "abcdefghijklmnopqrstuvwxyz"
29 :
30 : const suffixDelim = '@'
31 :
32 : var inverseAlphabet = make(map[byte]int64, len(alpha))
33 :
34 : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
35 :
36 : // ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but
37 : // not when comparing suffixes. It simulates the CRDB synthetic bit situation
38 : // (see https://github.com/cockroachdb/cockroach/issues/130533).
39 : var ignoreTimestampSuffix = []byte("_synthetic")
40 :
41 1 : func init() {
42 1 : for i := range alpha {
43 1 : inverseAlphabet[alpha[i]] = int64(i)
44 1 : }
45 : }
46 :
47 : // MaxSuffixLen is the maximum length of a suffix generated by this package.
48 : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
49 :
50 : // Comparer is the comparer for test keys generated by this package.
51 : var Comparer = &base.Comparer{
52 : CompareSuffixes: compareSuffixes,
53 : Compare: compare,
54 1 : Equal: func(a, b []byte) bool { return compare(a, b) == 0 },
55 1 : AbbreviatedKey: func(k []byte) uint64 {
56 1 : return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
57 1 : },
58 : FormatKey: base.DefaultFormatter,
59 1 : Separator: func(dst, a, b []byte) []byte {
60 1 : ai := split(a)
61 1 : if ai == len(a) {
62 1 : return append(dst, a...)
63 1 : }
64 1 : bi := split(b)
65 1 : if bi == len(b) {
66 1 : return append(dst, a...)
67 1 : }
68 :
69 : // If the keys are the same just return a.
70 1 : if bytes.Equal(a[:ai], b[:bi]) {
71 1 : return append(dst, a...)
72 1 : }
73 1 : n := len(dst)
74 1 : dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
75 1 : // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
76 1 : buf := dst[n:]
77 1 : if bytes.Equal(a[:ai], buf) {
78 1 : return append(dst[:n], a...)
79 1 : }
80 : // The separator is > a[:ai], so return it
81 1 : return dst
82 : },
83 1 : Successor: func(dst, a []byte) []byte {
84 1 : ai := split(a)
85 1 : if ai == len(a) {
86 1 : return append(dst, a...)
87 1 : }
88 1 : n := len(dst)
89 1 : dst = base.DefaultComparer.Successor(dst, a[:ai])
90 1 : // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
91 1 : buf := dst[n:]
92 1 : if bytes.Equal(a[:ai], buf) {
93 0 : return append(dst[:n], a...)
94 0 : }
95 : // The successor is > a[:ai], so return it.
96 1 : return dst
97 : },
98 1 : ImmediateSuccessor: func(dst, a []byte) []byte {
99 1 : // TODO(jackson): Consider changing this Comparer to only support
100 1 : // representable prefix keys containing characters a-z.
101 1 : ai := split(a)
102 1 : if ai != len(a) {
103 0 : panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
104 : }
105 1 : return append(append(dst, a...), 0x00)
106 : },
107 : Split: split,
108 : Name: "pebble.internal.testkeys",
109 : }
110 :
111 : // The comparator is similar to the one in Cockroach; when the prefixes are
112 : // equal:
113 : // - a key without a suffix is smaller than one with a suffix;
114 : // - when both keys have a suffix, the key with the larger (decoded) suffix
115 : // value is smaller.
116 1 : func compare(a, b []byte) int {
117 1 : ai, bi := split(a), split(b)
118 1 : if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
119 1 : return v
120 1 : }
121 1 : return compareTimestamps(a[ai:], b[bi:])
122 : }
123 :
124 1 : func split(a []byte) int {
125 1 : i := bytes.LastIndexByte(a, suffixDelim)
126 1 : if i >= 0 {
127 1 : return i
128 1 : }
129 1 : return len(a)
130 : }
131 :
132 1 : func compareTimestamps(a, b []byte) int {
133 1 : a = bytes.TrimSuffix(a, ignoreTimestampSuffix)
134 1 : b = bytes.TrimSuffix(b, ignoreTimestampSuffix)
135 1 : if len(a) == 0 || len(b) == 0 {
136 1 : // The empty suffix sorts first.
137 1 : return cmp.Compare(len(a), len(b))
138 1 : }
139 1 : if a[0] != suffixDelim || b[0] != suffixDelim {
140 0 : panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
141 : }
142 1 : ai, err := parseUintBytes(a[1:], 10, 64)
143 1 : if err != nil {
144 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
145 : }
146 1 : bi, err := parseUintBytes(b[1:], 10, 64)
147 1 : if err != nil {
148 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
149 : }
150 1 : return cmp.Compare(bi, ai)
151 : }
152 :
153 1 : func compareSuffixes(a, b []byte) int {
154 1 : cmp := compareTimestamps(a, b)
155 1 : if cmp == 0 {
156 1 : aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix)
157 1 : bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix)
158 1 : if aHasIgnorableSuffix && !bHasIgnorableSuffix {
159 1 : return 1
160 1 : }
161 1 : if !aHasIgnorableSuffix && bHasIgnorableSuffix {
162 1 : return -1
163 1 : }
164 : }
165 1 : return cmp
166 : }
167 :
168 : // Keyspace describes a finite keyspace of unsuffixed test keys.
169 : type Keyspace interface {
170 : // Count returns the number of keys that exist within this keyspace.
171 : Count() int64
172 :
173 : // MaxLen returns the maximum length, in bytes, of a key within this
174 : // keyspace. This is only guaranteed to return an upper bound.
175 : MaxLen() int
176 :
177 : // Slice returns the sub-keyspace from index i, inclusive, to index j,
178 : // exclusive. The receiver is unmodified.
179 : Slice(i, j int64) Keyspace
180 :
181 : // EveryN returns a key space that includes 1 key for every N keys in the
182 : // original keyspace. The receiver is unmodified.
183 : EveryN(n int64) Keyspace
184 :
185 : // key writes the i-th key to the buffer and returns the length.
186 : key(buf []byte, i int64) int
187 : }
188 :
189 : // Divvy divides the provided keyspace into N equal portions, containing
190 : // disjoint keys evenly distributed across the keyspace.
191 1 : func Divvy(ks Keyspace, n int64) []Keyspace {
192 1 : ret := make([]Keyspace, n)
193 1 : for i := int64(0); i < n; i++ {
194 1 : ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
195 1 : }
196 1 : return ret
197 : }
198 :
199 : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
200 : // with at most `maxLength` characters.
201 1 : func Alpha(maxLength int) Keyspace {
202 1 : return alphabet{
203 1 : alphabet: []byte(alpha),
204 1 : maxLength: maxLength,
205 1 : increment: 1,
206 1 : }
207 1 : }
208 :
209 : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
210 : // timestamp t.
211 1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
212 1 : b := make([]byte, k.MaxLen()+MaxSuffixLen)
213 1 : return b[:WriteKeyAt(b, k, i, t)]
214 1 : }
215 :
216 : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
217 : // suffix encoding the timestamp t suffix. It returns the number of bytes
218 : // written.
219 1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
220 1 : n := WriteKey(dst, k, i)
221 1 : n += WriteSuffix(dst[n:], t)
222 1 : return n
223 1 : }
224 :
225 : // Suffix returns the test keys suffix representation of timestamp t.
226 1 : func Suffix(t int64) []byte {
227 1 : b := make([]byte, MaxSuffixLen)
228 1 : return b[:WriteSuffix(b, t)]
229 1 : }
230 :
231 : // SuffixLen returns the exact length of the given suffix when encoded.
232 1 : func SuffixLen(t int64) int {
233 1 : // Begin at 1 for the '@' delimiter, 1 for a single digit.
234 1 : n := 2
235 1 : t /= 10
236 1 : for t > 0 {
237 1 : t /= 10
238 1 : n++
239 1 : }
240 1 : return n
241 : }
242 :
243 : // ParseSuffix returns the integer representation of the encoded suffix.
244 1 : func ParseSuffix(s []byte) (int64, error) {
245 1 : s = bytes.TrimSuffix(s, ignoreTimestampSuffix)
246 1 : return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
247 1 : }
248 :
249 : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
250 : // returning the number of bytes written.
251 1 : func WriteSuffix(dst []byte, t int64) int {
252 1 : dst[0] = suffixDelim
253 1 : n := 1
254 1 : n += len(strconv.AppendInt(dst[n:n], t, 10))
255 1 : return n
256 1 : }
257 :
258 : // Key returns the i-th unsuffixed key within the keyspace.
259 1 : func Key(k Keyspace, i int64) []byte {
260 1 : b := make([]byte, k.MaxLen())
261 1 : return b[:k.key(b, i)]
262 1 : }
263 :
264 : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
265 : // returns the number of bytes written.
266 1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
267 1 : return k.key(dst, i)
268 1 : }
269 :
270 : type alphabet struct {
271 : alphabet []byte
272 : maxLength int
273 : headSkip int64
274 : tailSkip int64
275 : increment int64
276 : }
277 :
278 1 : func (a alphabet) Count() int64 {
279 1 : // Calculate the total number of keys, ignoring the increment.
280 1 : total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
281 1 :
282 1 : // The increment dictates that we take every N keys, where N = a.increment.
283 1 : // Consider a total containing the 5 keys:
284 1 : // a b c d e
285 1 : // ^ ^ ^
286 1 : // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
287 1 : // dividing by the increment, there may be remainder. If there is, there's
288 1 : // one additional key in the alphabet.
289 1 : count := total / a.increment
290 1 : if total%a.increment > 0 {
291 1 : count++
292 1 : }
293 1 : return count
294 : }
295 :
296 1 : func (a alphabet) MaxLen() int {
297 1 : return a.maxLength
298 1 : }
299 :
300 1 : func (a alphabet) Slice(i, j int64) Keyspace {
301 1 : s := a
302 1 : s.headSkip += i
303 1 : s.tailSkip += a.Count() - j
304 1 : return s
305 1 : }
306 :
307 1 : func (a alphabet) EveryN(n int64) Keyspace {
308 1 : s := a
309 1 : s.increment *= n
310 1 : return s
311 1 : }
312 :
313 1 : func keyCount(n, l int) int64 {
314 1 : // The number of representable keys in the keyspace is a function of the
315 1 : // length of the alphabet n and the max key length l:
316 1 : // n + n^2 + ... + n^l
317 1 : x := int64(1)
318 1 : res := int64(0)
319 1 : for i := 1; i <= l; i++ {
320 1 : if x >= math.MaxInt64/int64(n) {
321 1 : panic("overflow")
322 : }
323 1 : x *= int64(n)
324 1 : res += x
325 1 : if res < 0 {
326 0 : panic("overflow")
327 : }
328 : }
329 1 : return res
330 : }
331 :
332 1 : func (a alphabet) key(buf []byte, idx int64) int {
333 1 : // This function generates keys of length 1..maxKeyLength, pulling
334 1 : // characters from the alphabet. The idx determines which key to generate,
335 1 : // generating the i-th lexicographically next key.
336 1 : //
337 1 : // The index to use is advanced by `headSkip`, allowing a keyspace to encode
338 1 : // a subregion of the keyspace.
339 1 : //
340 1 : // Eg, alphabet = `ab`, maxKeyLength = 3:
341 1 : //
342 1 : // aaa aab aba abb baa bab bba bbb
343 1 : // aa ab ba bb
344 1 : // a b
345 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
346 1 : //
347 1 : return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
348 1 : keyCount(len(a.alphabet), a.maxLength))
349 1 : }
350 :
351 1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
352 1 : if keyCount == 0 || i > keyCount || i < 0 {
353 1 : return 0
354 1 : }
355 :
356 : // Of the keyCount keys in the generative keyspace, how many are there
357 : // starting with a particular character?
358 1 : keysPerCharacter := keyCount / int64(len(alphabet))
359 1 :
360 1 : // Find the character that the key at index i starts with and set it.
361 1 : characterIdx := i / keysPerCharacter
362 1 : buf[0] = alphabet[characterIdx]
363 1 :
364 1 : // Consider characterIdx = 0, pointing to 'a'.
365 1 : //
366 1 : // aaa aab aba abb baa bab bba bbb
367 1 : // aa ab ba bb
368 1 : // a b
369 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
370 1 : // \_________________________/
371 1 : // |keysPerCharacter| keys
372 1 : //
373 1 : // In our recursive call, we reduce the problem to:
374 1 : //
375 1 : // aaa aab aba abb
376 1 : // aa ab
377 1 : // 0 1 2 3 4 5
378 1 : // \________________________/
379 1 : // |keysPerCharacter-1| keys
380 1 : //
381 1 : // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
382 1 : // just 'a' key, plus any keys beginning with any other character).
383 1 : //
384 1 : // The index i is also offset, reduced by the count of keys beginning with
385 1 : // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
386 1 : // the key consisting of just the 'a' (-1).
387 1 : i = i - keysPerCharacter*characterIdx - 1
388 1 : return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
389 : }
390 :
391 : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
392 : // returning the index of a particular key, given the provided alphabet and max
393 : // length of a key.
394 : //
395 : // len(key) must be ≥ 1.
396 1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
397 1 : i, ok := alphabet[key[0]]
398 1 : if !ok {
399 0 : panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
400 : }
401 : // How many keys exist that start with the preceding i characters? Each of
402 : // the i characters themselves are a key, plus the count of all the keys
403 : // with one less character for each.
404 1 : ret := i + i*keyCount(len(alphabet), n-1)
405 1 : if len(key) > 1 {
406 1 : ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
407 1 : }
408 1 : return ret
409 : }
410 :
411 : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
412 : // b are prefixes.
413 1 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
414 1 : assertValidPrefix(a)
415 1 : assertValidPrefix(b)
416 1 : assertLess(a, b)
417 1 : commonPrefix := 0
418 1 : for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
419 1 : commonPrefix++
420 1 : }
421 :
422 : // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
423 : // that maxLength cannot be higher than ~13 or we will encounter overflows.
424 1 : maxLength := 4 + rng.IntN(8)
425 1 :
426 1 : // Skip any common prefix (but leave at least one character in each key).
427 1 : skipPrefix := 0
428 1 : for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
429 1 : skipPrefix++
430 1 : }
431 1 : aPiece := a[skipPrefix:]
432 1 : bPiece := b[skipPrefix:]
433 1 : if len(aPiece) > maxLength {
434 1 : // The trimmed prefix is smaller than a; we must be careful below to not
435 1 : // return a key smaller than a.
436 1 : aPiece = aPiece[:maxLength]
437 1 : }
438 1 : if len(bPiece) > maxLength {
439 1 : // The trimmed prefix is smaller than b, so we will still respect the bound.
440 1 : bPiece = bPiece[:maxLength]
441 1 : }
442 1 : assertLess(aPiece, bPiece)
443 1 : apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
444 1 : bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
445 1 : if bpIdx <= apIdx {
446 0 : panic("unreachable")
447 : }
448 1 : generatedIdx := apIdx + rng.Int64N(bpIdx-apIdx)
449 1 : if generatedIdx == apIdx {
450 1 : // Return key a. We handle this separately in case we trimmed aPiece above.
451 1 : return append([]byte(nil), a...)
452 1 : }
453 1 : dst := make([]byte, skipPrefix+maxLength)
454 1 : copy(dst, a[:skipPrefix])
455 1 : pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
456 1 : dst = dst[:skipPrefix+pieceLen]
457 1 : assertLE(a, dst)
458 1 : assertLess(dst, b)
459 1 : return dst
460 : }
461 :
462 1 : func assertValidPrefix(p []byte) {
463 1 : if !prefixRE.Match(p) {
464 0 : panic(fmt.Sprintf("invalid prefix %q", p))
465 : }
466 : }
467 :
468 1 : func assertLess(a, b []byte) {
469 1 : if Comparer.Compare(a, b) >= 0 {
470 0 : panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
471 : }
472 : }
473 :
474 1 : func assertLE(a, b []byte) {
475 1 : if Comparer.Compare(a, b) > 0 {
476 0 : panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
477 : }
478 : }
|