Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package testkeys provides facilities for generating and comparing
6 : // human-readable test keys for use in tests and benchmarks. This package
7 : // provides a single Comparer implementation that compares all keys generated
8 : // by this package.
9 : //
10 : // Keys generated by this package may optionally have a 'suffix' encoding an
11 : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
12 : // suffix are performed using integer value, not the byte representation.
13 : package testkeys
14 :
15 : import (
16 : "bytes"
17 : "cmp"
18 : "fmt"
19 : "math"
20 : "math/rand/v2"
21 : "regexp"
22 : "strconv"
23 : "strings"
24 :
25 : "github.com/cockroachdb/pebble/internal/base"
26 : )
27 :
28 : const alpha = "abcdefghijklmnopqrstuvwxyz"
29 :
30 : const suffixDelim = '@'
31 :
32 : var inverseAlphabet = make(map[byte]int64, len(alpha))
33 :
34 : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
35 :
36 : // ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but
37 : // not when comparing suffixes. It simulates the CRDB synthetic bit situation
38 : // (see https://github.com/cockroachdb/cockroach/issues/130533).
39 : var ignoreTimestampSuffix = []byte("_synthetic")
40 :
41 2 : func init() {
42 2 : for i := range alpha {
43 2 : inverseAlphabet[alpha[i]] = int64(i)
44 2 : }
45 : }
46 :
47 : // MaxSuffixLen is the maximum length of a suffix generated by this package.
48 : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
49 :
50 : // Comparer is the comparer for test keys generated by this package.
51 : var Comparer = &base.Comparer{
52 : ComparePointSuffixes: compareSuffixes,
53 : CompareRangeSuffixes: compareSuffixes,
54 : Compare: compare,
55 2 : Equal: func(a, b []byte) bool { return compare(a, b) == 0 },
56 2 : AbbreviatedKey: func(k []byte) uint64 {
57 2 : return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
58 2 : },
59 : FormatKey: base.DefaultFormatter,
60 2 : Separator: func(dst, a, b []byte) []byte {
61 2 : ai := split(a)
62 2 : if ai == len(a) {
63 2 : return append(dst, a...)
64 2 : }
65 2 : bi := split(b)
66 2 : if bi == len(b) {
67 2 : return append(dst, a...)
68 2 : }
69 :
70 : // If the keys are the same just return a.
71 2 : if bytes.Equal(a[:ai], b[:bi]) {
72 2 : return append(dst, a...)
73 2 : }
74 2 : n := len(dst)
75 2 : dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
76 2 : // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
77 2 : buf := dst[n:]
78 2 : if bytes.Equal(a[:ai], buf) {
79 2 : return append(dst[:n], a...)
80 2 : }
81 : // The separator is > a[:ai], so return it
82 2 : return dst
83 : },
84 2 : Successor: func(dst, a []byte) []byte {
85 2 : ai := split(a)
86 2 : if ai == len(a) {
87 2 : return append(dst, a...)
88 2 : }
89 2 : n := len(dst)
90 2 : dst = base.DefaultComparer.Successor(dst, a[:ai])
91 2 : // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
92 2 : buf := dst[n:]
93 2 : if bytes.Equal(a[:ai], buf) {
94 0 : return append(dst[:n], a...)
95 0 : }
96 : // The successor is > a[:ai], so return it.
97 2 : return dst
98 : },
99 2 : ImmediateSuccessor: func(dst, a []byte) []byte {
100 2 : // TODO(jackson): Consider changing this Comparer to only support
101 2 : // representable prefix keys containing characters a-z.
102 2 : ai := split(a)
103 2 : if ai != len(a) {
104 0 : panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
105 : }
106 2 : return append(append(dst, a...), 0x00)
107 : },
108 : Split: split,
109 : Name: "pebble.internal.testkeys",
110 : }
111 :
112 : // The comparator is similar to the one in Cockroach; when the prefixes are
113 : // equal:
114 : // - a key without a suffix is smaller than one with a suffix;
115 : // - when both keys have a suffix, the key with the larger (decoded) suffix
116 : // value is smaller.
117 2 : func compare(a, b []byte) int {
118 2 : ai, bi := split(a), split(b)
119 2 : if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
120 2 : return v
121 2 : }
122 2 : return compareTimestamps(a[ai:], b[bi:])
123 : }
124 :
125 2 : func split(a []byte) int {
126 2 : i := bytes.LastIndexByte(a, suffixDelim)
127 2 : if i >= 0 {
128 2 : return i
129 2 : }
130 2 : return len(a)
131 : }
132 :
133 2 : func compareTimestamps(a, b []byte) int {
134 2 : a = bytes.TrimSuffix(a, ignoreTimestampSuffix)
135 2 : b = bytes.TrimSuffix(b, ignoreTimestampSuffix)
136 2 : if len(a) == 0 || len(b) == 0 {
137 2 : // The empty suffix sorts first.
138 2 : return cmp.Compare(len(a), len(b))
139 2 : }
140 2 : if a[0] != suffixDelim || b[0] != suffixDelim {
141 0 : panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
142 : }
143 2 : ai, err := parseUintBytes(a[1:], 10, 64)
144 2 : if err != nil {
145 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
146 : }
147 2 : bi, err := parseUintBytes(b[1:], 10, 64)
148 2 : if err != nil {
149 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
150 : }
151 2 : return cmp.Compare(bi, ai)
152 : }
153 :
154 2 : func compareSuffixes(a, b []byte) int {
155 2 : cmp := compareTimestamps(a, b)
156 2 : if cmp == 0 {
157 2 : aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix)
158 2 : bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix)
159 2 : if aHasIgnorableSuffix && !bHasIgnorableSuffix {
160 1 : return 1
161 1 : }
162 2 : if !aHasIgnorableSuffix && bHasIgnorableSuffix {
163 1 : return -1
164 1 : }
165 : }
166 2 : return cmp
167 : }
168 :
169 : // Keyspace describes a finite keyspace of unsuffixed test keys.
170 : type Keyspace interface {
171 : // Count returns the number of keys that exist within this keyspace.
172 : Count() int64
173 :
174 : // MaxLen returns the maximum length, in bytes, of a key within this
175 : // keyspace. This is only guaranteed to return an upper bound.
176 : MaxLen() int
177 :
178 : // Slice returns the sub-keyspace from index i, inclusive, to index j,
179 : // exclusive. The receiver is unmodified.
180 : Slice(i, j int64) Keyspace
181 :
182 : // EveryN returns a key space that includes 1 key for every N keys in the
183 : // original keyspace. The receiver is unmodified.
184 : EveryN(n int64) Keyspace
185 :
186 : // key writes the i-th key to the buffer and returns the length.
187 : key(buf []byte, i int64) int
188 : }
189 :
190 : // Divvy divides the provided keyspace into N equal portions, containing
191 : // disjoint keys evenly distributed across the keyspace.
192 1 : func Divvy(ks Keyspace, n int64) []Keyspace {
193 1 : ret := make([]Keyspace, n)
194 1 : for i := int64(0); i < n; i++ {
195 1 : ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
196 1 : }
197 1 : return ret
198 : }
199 :
200 : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
201 : // with at most `maxLength` characters.
202 1 : func Alpha(maxLength int) Keyspace {
203 1 : return alphabet{
204 1 : alphabet: []byte(alpha),
205 1 : maxLength: maxLength,
206 1 : increment: 1,
207 1 : }
208 1 : }
209 :
210 : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
211 : // timestamp t.
212 1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
213 1 : b := make([]byte, k.MaxLen()+MaxSuffixLen)
214 1 : return b[:WriteKeyAt(b, k, i, t)]
215 1 : }
216 :
217 : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
218 : // suffix encoding the timestamp t suffix. It returns the number of bytes
219 : // written.
220 1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
221 1 : n := WriteKey(dst, k, i)
222 1 : n += WriteSuffix(dst[n:], t)
223 1 : return n
224 1 : }
225 :
226 : // Suffix returns the test keys suffix representation of timestamp t.
227 1 : func Suffix(t int64) []byte {
228 1 : b := make([]byte, MaxSuffixLen)
229 1 : return b[:WriteSuffix(b, t)]
230 1 : }
231 :
232 : // SuffixLen returns the exact length of the given suffix when encoded.
233 1 : func SuffixLen(t int64) int {
234 1 : // Begin at 1 for the '@' delimiter, 1 for a single digit.
235 1 : n := 2
236 1 : t /= 10
237 1 : for t > 0 {
238 1 : t /= 10
239 1 : n++
240 1 : }
241 1 : return n
242 : }
243 :
244 : // ParseSuffix returns the integer representation of the encoded suffix.
245 2 : func ParseSuffix(s []byte) (int64, error) {
246 2 : s = bytes.TrimSuffix(s, ignoreTimestampSuffix)
247 2 : return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
248 2 : }
249 :
250 : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
251 : // returning the number of bytes written.
252 1 : func WriteSuffix(dst []byte, t int64) int {
253 1 : dst[0] = suffixDelim
254 1 : n := 1
255 1 : n += len(strconv.AppendInt(dst[n:n], t, 10))
256 1 : return n
257 1 : }
258 :
259 : // Key returns the i-th unsuffixed key within the keyspace.
260 1 : func Key(k Keyspace, i int64) []byte {
261 1 : b := make([]byte, k.MaxLen())
262 1 : return b[:k.key(b, i)]
263 1 : }
264 :
265 : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
266 : // returns the number of bytes written.
267 1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
268 1 : return k.key(dst, i)
269 1 : }
270 :
271 : type alphabet struct {
272 : alphabet []byte
273 : maxLength int
274 : headSkip int64
275 : tailSkip int64
276 : increment int64
277 : }
278 :
279 1 : func (a alphabet) Count() int64 {
280 1 : // Calculate the total number of keys, ignoring the increment.
281 1 : total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
282 1 :
283 1 : // The increment dictates that we take every N keys, where N = a.increment.
284 1 : // Consider a total containing the 5 keys:
285 1 : // a b c d e
286 1 : // ^ ^ ^
287 1 : // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
288 1 : // dividing by the increment, there may be remainder. If there is, there's
289 1 : // one additional key in the alphabet.
290 1 : count := total / a.increment
291 1 : if total%a.increment > 0 {
292 1 : count++
293 1 : }
294 1 : return count
295 : }
296 :
297 1 : func (a alphabet) MaxLen() int {
298 1 : return a.maxLength
299 1 : }
300 :
301 1 : func (a alphabet) Slice(i, j int64) Keyspace {
302 1 : s := a
303 1 : s.headSkip += i
304 1 : s.tailSkip += a.Count() - j
305 1 : return s
306 1 : }
307 :
308 1 : func (a alphabet) EveryN(n int64) Keyspace {
309 1 : s := a
310 1 : s.increment *= n
311 1 : return s
312 1 : }
313 :
314 1 : func keyCount(n, l int) int64 {
315 1 : // The number of representable keys in the keyspace is a function of the
316 1 : // length of the alphabet n and the max key length l:
317 1 : // n + n^2 + ... + n^l
318 1 : x := int64(1)
319 1 : res := int64(0)
320 1 : for i := 1; i <= l; i++ {
321 1 : if x >= math.MaxInt64/int64(n) {
322 1 : panic("overflow")
323 : }
324 1 : x *= int64(n)
325 1 : res += x
326 1 : if res < 0 {
327 0 : panic("overflow")
328 : }
329 : }
330 1 : return res
331 : }
332 :
333 1 : func (a alphabet) key(buf []byte, idx int64) int {
334 1 : // This function generates keys of length 1..maxKeyLength, pulling
335 1 : // characters from the alphabet. The idx determines which key to generate,
336 1 : // generating the i-th lexicographically next key.
337 1 : //
338 1 : // The index to use is advanced by `headSkip`, allowing a keyspace to encode
339 1 : // a subregion of the keyspace.
340 1 : //
341 1 : // Eg, alphabet = `ab`, maxKeyLength = 3:
342 1 : //
343 1 : // aaa aab aba abb baa bab bba bbb
344 1 : // aa ab ba bb
345 1 : // a b
346 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
347 1 : //
348 1 : return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
349 1 : keyCount(len(a.alphabet), a.maxLength))
350 1 : }
351 :
352 1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
353 1 : if keyCount == 0 || i > keyCount || i < 0 {
354 1 : return 0
355 1 : }
356 :
357 : // Of the keyCount keys in the generative keyspace, how many are there
358 : // starting with a particular character?
359 1 : keysPerCharacter := keyCount / int64(len(alphabet))
360 1 :
361 1 : // Find the character that the key at index i starts with and set it.
362 1 : characterIdx := i / keysPerCharacter
363 1 : buf[0] = alphabet[characterIdx]
364 1 :
365 1 : // Consider characterIdx = 0, pointing to 'a'.
366 1 : //
367 1 : // aaa aab aba abb baa bab bba bbb
368 1 : // aa ab ba bb
369 1 : // a b
370 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
371 1 : // \_________________________/
372 1 : // |keysPerCharacter| keys
373 1 : //
374 1 : // In our recursive call, we reduce the problem to:
375 1 : //
376 1 : // aaa aab aba abb
377 1 : // aa ab
378 1 : // 0 1 2 3 4 5
379 1 : // \________________________/
380 1 : // |keysPerCharacter-1| keys
381 1 : //
382 1 : // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
383 1 : // just 'a' key, plus any keys beginning with any other character).
384 1 : //
385 1 : // The index i is also offset, reduced by the count of keys beginning with
386 1 : // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
387 1 : // the key consisting of just the 'a' (-1).
388 1 : i = i - keysPerCharacter*characterIdx - 1
389 1 : return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
390 : }
391 :
392 : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
393 : // returning the index of a particular key, given the provided alphabet and max
394 : // length of a key.
395 : //
396 : // len(key) must be ≥ 1.
397 1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
398 1 : i, ok := alphabet[key[0]]
399 1 : if !ok {
400 0 : panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
401 : }
402 : // How many keys exist that start with the preceding i characters? Each of
403 : // the i characters themselves are a key, plus the count of all the keys
404 : // with one less character for each.
405 1 : ret := i + i*keyCount(len(alphabet), n-1)
406 1 : if len(key) > 1 {
407 1 : ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
408 1 : }
409 1 : return ret
410 : }
411 :
412 : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
413 : // b are prefixes.
414 1 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
415 1 : assertValidPrefix(a)
416 1 : assertValidPrefix(b)
417 1 : assertLess(a, b)
418 1 : commonPrefix := 0
419 1 : for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
420 1 : commonPrefix++
421 1 : }
422 :
423 : // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
424 : // that maxLength cannot be higher than ~13 or we will encounter overflows.
425 1 : maxLength := 4 + rng.IntN(8)
426 1 :
427 1 : // Skip any common prefix (but leave at least one character in each key).
428 1 : skipPrefix := 0
429 1 : for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
430 1 : skipPrefix++
431 1 : }
432 1 : aPiece := a[skipPrefix:]
433 1 : bPiece := b[skipPrefix:]
434 1 : if len(aPiece) > maxLength {
435 1 : // The trimmed prefix is smaller than a; we must be careful below to not
436 1 : // return a key smaller than a.
437 1 : aPiece = aPiece[:maxLength]
438 1 : }
439 1 : if len(bPiece) > maxLength {
440 1 : // The trimmed prefix is smaller than b, so we will still respect the bound.
441 1 : bPiece = bPiece[:maxLength]
442 1 : }
443 1 : assertLess(aPiece, bPiece)
444 1 : apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
445 1 : bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
446 1 : if bpIdx <= apIdx {
447 0 : panic("unreachable")
448 : }
449 1 : generatedIdx := apIdx + rng.Int64N(bpIdx-apIdx)
450 1 : if generatedIdx == apIdx {
451 1 : // Return key a. We handle this separately in case we trimmed aPiece above.
452 1 : return append([]byte(nil), a...)
453 1 : }
454 1 : dst := make([]byte, skipPrefix+maxLength)
455 1 : copy(dst, a[:skipPrefix])
456 1 : pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
457 1 : dst = dst[:skipPrefix+pieceLen]
458 1 : assertLE(a, dst)
459 1 : assertLess(dst, b)
460 1 : return dst
461 : }
462 :
463 1 : func assertValidPrefix(p []byte) {
464 1 : if !prefixRE.Match(p) {
465 0 : panic(fmt.Sprintf("invalid prefix %q", p))
466 : }
467 : }
468 :
469 1 : func assertLess(a, b []byte) {
470 1 : if Comparer.Compare(a, b) >= 0 {
471 0 : panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
472 : }
473 : }
474 :
475 1 : func assertLE(a, b []byte) {
476 1 : if Comparer.Compare(a, b) > 0 {
477 0 : panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
478 : }
479 : }
|