Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package testkeys provides facilities for generating and comparing
6 : // human-readable test keys for use in tests and benchmarks. This package
7 : // provides a single Comparer implementation that compares all keys generated
8 : // by this package.
9 : //
10 : // Keys generated by this package may optionally have a 'suffix' encoding an
11 : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
12 : // suffix are performed using integer value, not the byte representation.
13 : package testkeys
14 :
15 : import (
16 : "bytes"
17 : "cmp"
18 : "fmt"
19 : "math"
20 : "math/rand/v2"
21 : "regexp"
22 : "strconv"
23 : "strings"
24 :
25 : "github.com/cockroachdb/pebble/internal/base"
26 : )
27 :
28 : const alpha = "abcdefghijklmnopqrstuvwxyz"
29 :
30 : const suffixDelim = '@'
31 :
32 : var inverseAlphabet = make(map[byte]int64, len(alpha))
33 :
34 : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
35 :
36 : // ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but
37 : // not when comparing suffixes. It simulates the CRDB synthetic bit situation
38 : // (see https://github.com/cockroachdb/cockroach/issues/130533).
39 : var ignoreTimestampSuffix = []byte("_synthetic")
40 :
41 1 : func init() {
42 1 : for i := range alpha {
43 1 : inverseAlphabet[alpha[i]] = int64(i)
44 1 : }
45 : }
46 :
47 : // MaxSuffixLen is the maximum length of a suffix generated by this package.
48 : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
49 :
50 : // Comparer is the comparer for test keys generated by this package.
51 : var Comparer = &base.Comparer{
52 : ComparePointSuffixes: compareSuffixes,
53 : CompareRangeSuffixes: compareSuffixes,
54 : Compare: compare,
55 1 : Equal: func(a, b []byte) bool { return compare(a, b) == 0 },
56 1 : AbbreviatedKey: func(k []byte) uint64 {
57 1 : return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
58 1 : },
59 : FormatKey: base.DefaultFormatter,
60 1 : Separator: func(dst, a, b []byte) []byte {
61 1 : ai := split(a)
62 1 : if ai == len(a) {
63 1 : return append(dst, a...)
64 1 : }
65 1 : bi := split(b)
66 1 : if bi == len(b) {
67 1 : return append(dst, a...)
68 1 : }
69 :
70 : // If the keys are the same just return a.
71 1 : if bytes.Equal(a[:ai], b[:bi]) {
72 1 : return append(dst, a...)
73 1 : }
74 1 : n := len(dst)
75 1 : dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
76 1 : // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
77 1 : buf := dst[n:]
78 1 : if bytes.Equal(a[:ai], buf) {
79 1 : return append(dst[:n], a...)
80 1 : }
81 : // The separator is > a[:ai], so return it
82 1 : return dst
83 : },
84 1 : Successor: func(dst, a []byte) []byte {
85 1 : ai := split(a)
86 1 : if ai == len(a) {
87 1 : return append(dst, a...)
88 1 : }
89 1 : n := len(dst)
90 1 : dst = base.DefaultComparer.Successor(dst, a[:ai])
91 1 : // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
92 1 : buf := dst[n:]
93 1 : if bytes.Equal(a[:ai], buf) {
94 0 : return append(dst[:n], a...)
95 0 : }
96 : // The successor is > a[:ai], so return it.
97 1 : return dst
98 : },
99 1 : ImmediateSuccessor: func(dst, a []byte) []byte {
100 1 : // TODO(jackson): Consider changing this Comparer to only support
101 1 : // representable prefix keys containing characters a-z.
102 1 : ai := split(a)
103 1 : if ai != len(a) {
104 0 : panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
105 : }
106 1 : return append(append(dst, a...), 0x00)
107 : },
108 : Split: split,
109 : Name: "pebble.internal.testkeys",
110 : }
111 :
112 : // The comparator is similar to the one in Cockroach; when the prefixes are
113 : // equal:
114 : // - a key without a suffix is smaller than one with a suffix;
115 : // - when both keys have a suffix, the key with the larger (decoded) suffix
116 : // value is smaller.
117 1 : func compare(a, b []byte) int {
118 1 : ai, bi := split(a), split(b)
119 1 : if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
120 1 : return v
121 1 : }
122 1 : return compareTimestamps(a[ai:], b[bi:])
123 : }
124 :
125 1 : func split(a []byte) int {
126 1 : i := bytes.LastIndexByte(a, suffixDelim)
127 1 : if i >= 0 {
128 1 : return i
129 1 : }
130 1 : return len(a)
131 : }
132 :
133 1 : func compareTimestamps(a, b []byte) int {
134 1 : a = bytes.TrimSuffix(a, ignoreTimestampSuffix)
135 1 : b = bytes.TrimSuffix(b, ignoreTimestampSuffix)
136 1 : if len(a) == 0 || len(b) == 0 {
137 1 : // The empty suffix sorts first.
138 1 : return cmp.Compare(len(a), len(b))
139 1 : }
140 1 : if a[0] != suffixDelim || b[0] != suffixDelim {
141 0 : panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
142 : }
143 1 : ai, err := parseUintBytes(a[1:], 10, 64)
144 1 : if err != nil {
145 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
146 : }
147 1 : bi, err := parseUintBytes(b[1:], 10, 64)
148 1 : if err != nil {
149 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
150 : }
151 1 : return cmp.Compare(bi, ai)
152 : }
153 :
154 1 : func compareSuffixes(a, b []byte) int {
155 1 : cmp := compareTimestamps(a, b)
156 1 : if cmp == 0 {
157 1 : aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix)
158 1 : bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix)
159 1 : if aHasIgnorableSuffix && !bHasIgnorableSuffix {
160 0 : return 1
161 0 : }
162 1 : if !aHasIgnorableSuffix && bHasIgnorableSuffix {
163 0 : return -1
164 0 : }
165 : }
166 1 : return cmp
167 : }
168 :
169 : // Keyspace describes a finite keyspace of unsuffixed test keys.
170 : type Keyspace interface {
171 : // Count returns the number of keys that exist within this keyspace.
172 : Count() int64
173 :
174 : // MaxLen returns the maximum length, in bytes, of a key within this
175 : // keyspace. This is only guaranteed to return an upper bound.
176 : MaxLen() int
177 :
178 : // Slice returns the sub-keyspace from index i, inclusive, to index j,
179 : // exclusive. The receiver is unmodified.
180 : Slice(i, j int64) Keyspace
181 :
182 : // EveryN returns a key space that includes 1 key for every N keys in the
183 : // original keyspace. The receiver is unmodified.
184 : EveryN(n int64) Keyspace
185 :
186 : // key writes the i-th key to the buffer and returns the length.
187 : key(buf []byte, i int64) int
188 : }
189 :
190 : // Divvy divides the provided keyspace into N equal portions, containing
191 : // disjoint keys evenly distributed across the keyspace.
192 0 : func Divvy(ks Keyspace, n int64) []Keyspace {
193 0 : ret := make([]Keyspace, n)
194 0 : for i := int64(0); i < n; i++ {
195 0 : ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
196 0 : }
197 0 : return ret
198 : }
199 :
200 : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
201 : // with at most `maxLength` characters.
202 0 : func Alpha(maxLength int) Keyspace {
203 0 : return alphabet{
204 0 : alphabet: []byte(alpha),
205 0 : maxLength: maxLength,
206 0 : increment: 1,
207 0 : }
208 0 : }
209 :
210 : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
211 : // timestamp t.
212 0 : func KeyAt(k Keyspace, i int64, t int64) []byte {
213 0 : b := make([]byte, k.MaxLen()+MaxSuffixLen)
214 0 : return b[:WriteKeyAt(b, k, i, t)]
215 0 : }
216 :
217 : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
218 : // suffix encoding the timestamp t suffix. It returns the number of bytes
219 : // written.
220 0 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
221 0 : n := WriteKey(dst, k, i)
222 0 : n += WriteSuffix(dst[n:], t)
223 0 : return n
224 0 : }
225 :
226 : // Suffix returns the test keys suffix representation of timestamp t.
227 0 : func Suffix(t int64) []byte {
228 0 : b := make([]byte, MaxSuffixLen)
229 0 : return b[:WriteSuffix(b, t)]
230 0 : }
231 :
232 : // SuffixLen returns the exact length of the given suffix when encoded.
233 0 : func SuffixLen(t int64) int {
234 0 : // Begin at 1 for the '@' delimiter, 1 for a single digit.
235 0 : n := 2
236 0 : t /= 10
237 0 : for t > 0 {
238 0 : t /= 10
239 0 : n++
240 0 : }
241 0 : return n
242 : }
243 :
244 : // ParseSuffix returns the integer representation of the encoded suffix.
245 1 : func ParseSuffix(s []byte) (int64, error) {
246 1 : s = bytes.TrimSuffix(s, ignoreTimestampSuffix)
247 1 : return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
248 1 : }
249 :
250 : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
251 : // returning the number of bytes written.
252 0 : func WriteSuffix(dst []byte, t int64) int {
253 0 : dst[0] = suffixDelim
254 0 : n := 1
255 0 : n += len(strconv.AppendInt(dst[n:n], t, 10))
256 0 : return n
257 0 : }
258 :
259 : // Key returns the i-th unsuffixed key within the keyspace.
260 0 : func Key(k Keyspace, i int64) []byte {
261 0 : b := make([]byte, k.MaxLen())
262 0 : return b[:k.key(b, i)]
263 0 : }
264 :
265 : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
266 : // returns the number of bytes written.
267 0 : func WriteKey(dst []byte, k Keyspace, i int64) int {
268 0 : return k.key(dst, i)
269 0 : }
270 :
271 : type alphabet struct {
272 : alphabet []byte
273 : maxLength int
274 : headSkip int64
275 : tailSkip int64
276 : increment int64
277 : }
278 :
279 0 : func (a alphabet) Count() int64 {
280 0 : // Calculate the total number of keys, ignoring the increment.
281 0 : total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
282 0 :
283 0 : // The increment dictates that we take every N keys, where N = a.increment.
284 0 : // Consider a total containing the 5 keys:
285 0 : // a b c d e
286 0 : // ^ ^ ^
287 0 : // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
288 0 : // dividing by the increment, there may be remainder. If there is, there's
289 0 : // one additional key in the alphabet.
290 0 : count := total / a.increment
291 0 : if total%a.increment > 0 {
292 0 : count++
293 0 : }
294 0 : return count
295 : }
296 :
297 0 : func (a alphabet) MaxLen() int {
298 0 : return a.maxLength
299 0 : }
300 :
301 0 : func (a alphabet) Slice(i, j int64) Keyspace {
302 0 : s := a
303 0 : s.headSkip += i
304 0 : s.tailSkip += a.Count() - j
305 0 : return s
306 0 : }
307 :
308 0 : func (a alphabet) EveryN(n int64) Keyspace {
309 0 : s := a
310 0 : s.increment *= n
311 0 : return s
312 0 : }
313 :
314 0 : func keyCount(n, l int) int64 {
315 0 : // The number of representable keys in the keyspace is a function of the
316 0 : // length of the alphabet n and the max key length l:
317 0 : // n + n^2 + ... + n^l
318 0 : x := int64(1)
319 0 : res := int64(0)
320 0 : for i := 1; i <= l; i++ {
321 0 : if x >= math.MaxInt64/int64(n) {
322 0 : panic("overflow")
323 : }
324 0 : x *= int64(n)
325 0 : res += x
326 0 : if res < 0 {
327 0 : panic("overflow")
328 : }
329 : }
330 0 : return res
331 : }
332 :
333 0 : func (a alphabet) key(buf []byte, idx int64) int {
334 0 : // This function generates keys of length 1..maxKeyLength, pulling
335 0 : // characters from the alphabet. The idx determines which key to generate,
336 0 : // generating the i-th lexicographically next key.
337 0 : //
338 0 : // The index to use is advanced by `headSkip`, allowing a keyspace to encode
339 0 : // a subregion of the keyspace.
340 0 : //
341 0 : // Eg, alphabet = `ab`, maxKeyLength = 3:
342 0 : //
343 0 : // aaa aab aba abb baa bab bba bbb
344 0 : // aa ab ba bb
345 0 : // a b
346 0 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
347 0 : //
348 0 : return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
349 0 : keyCount(len(a.alphabet), a.maxLength))
350 0 : }
351 :
352 0 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
353 0 : if keyCount == 0 || i > keyCount || i < 0 {
354 0 : return 0
355 0 : }
356 :
357 : // Of the keyCount keys in the generative keyspace, how many are there
358 : // starting with a particular character?
359 0 : keysPerCharacter := keyCount / int64(len(alphabet))
360 0 :
361 0 : // Find the character that the key at index i starts with and set it.
362 0 : characterIdx := i / keysPerCharacter
363 0 : buf[0] = alphabet[characterIdx]
364 0 :
365 0 : // Consider characterIdx = 0, pointing to 'a'.
366 0 : //
367 0 : // aaa aab aba abb baa bab bba bbb
368 0 : // aa ab ba bb
369 0 : // a b
370 0 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
371 0 : // \_________________________/
372 0 : // |keysPerCharacter| keys
373 0 : //
374 0 : // In our recursive call, we reduce the problem to:
375 0 : //
376 0 : // aaa aab aba abb
377 0 : // aa ab
378 0 : // 0 1 2 3 4 5
379 0 : // \________________________/
380 0 : // |keysPerCharacter-1| keys
381 0 : //
382 0 : // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
383 0 : // just 'a' key, plus any keys beginning with any other character).
384 0 : //
385 0 : // The index i is also offset, reduced by the count of keys beginning with
386 0 : // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
387 0 : // the key consisting of just the 'a' (-1).
388 0 : i = i - keysPerCharacter*characterIdx - 1
389 0 : return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
390 : }
391 :
392 : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
393 : // returning the index of a particular key, given the provided alphabet and max
394 : // length of a key.
395 : //
396 : // len(key) must be ≥ 1.
397 0 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
398 0 : i, ok := alphabet[key[0]]
399 0 : if !ok {
400 0 : panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
401 : }
402 : // How many keys exist that start with the preceding i characters? Each of
403 : // the i characters themselves are a key, plus the count of all the keys
404 : // with one less character for each.
405 0 : ret := i + i*keyCount(len(alphabet), n-1)
406 0 : if len(key) > 1 {
407 0 : ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
408 0 : }
409 0 : return ret
410 : }
411 :
412 : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
413 : // b are prefixes.
414 0 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
415 0 : assertValidPrefix(a)
416 0 : assertValidPrefix(b)
417 0 : assertLess(a, b)
418 0 : commonPrefix := 0
419 0 : for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
420 0 : commonPrefix++
421 0 : }
422 :
423 : // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
424 : // that maxLength cannot be higher than ~13 or we will encounter overflows.
425 0 : maxLength := 4 + rng.IntN(8)
426 0 :
427 0 : // Skip any common prefix (but leave at least one character in each key).
428 0 : skipPrefix := 0
429 0 : for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
430 0 : skipPrefix++
431 0 : }
432 0 : aPiece := a[skipPrefix:]
433 0 : bPiece := b[skipPrefix:]
434 0 : if len(aPiece) > maxLength {
435 0 : // The trimmed prefix is smaller than a; we must be careful below to not
436 0 : // return a key smaller than a.
437 0 : aPiece = aPiece[:maxLength]
438 0 : }
439 0 : if len(bPiece) > maxLength {
440 0 : // The trimmed prefix is smaller than b, so we will still respect the bound.
441 0 : bPiece = bPiece[:maxLength]
442 0 : }
443 0 : assertLess(aPiece, bPiece)
444 0 : apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
445 0 : bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
446 0 : if bpIdx <= apIdx {
447 0 : panic("unreachable")
448 : }
449 0 : generatedIdx := apIdx + rng.Int64N(bpIdx-apIdx)
450 0 : if generatedIdx == apIdx {
451 0 : // Return key a. We handle this separately in case we trimmed aPiece above.
452 0 : return append([]byte(nil), a...)
453 0 : }
454 0 : dst := make([]byte, skipPrefix+maxLength)
455 0 : copy(dst, a[:skipPrefix])
456 0 : pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
457 0 : dst = dst[:skipPrefix+pieceLen]
458 0 : assertLE(a, dst)
459 0 : assertLess(dst, b)
460 0 : return dst
461 : }
462 :
463 0 : func assertValidPrefix(p []byte) {
464 0 : if !prefixRE.Match(p) {
465 0 : panic(fmt.Sprintf("invalid prefix %q", p))
466 : }
467 : }
468 :
469 0 : func assertLess(a, b []byte) {
470 0 : if Comparer.Compare(a, b) >= 0 {
471 0 : panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
472 : }
473 : }
474 :
475 0 : func assertLE(a, b []byte) {
476 0 : if Comparer.Compare(a, b) > 0 {
477 0 : panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
478 : }
479 : }
|