Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package testkeys provides facilities for generating and comparing
6 : // human-readable test keys for use in tests and benchmarks. This package
7 : // provides a single Comparer implementation that compares all keys generated
8 : // by this package.
9 : //
10 : // Keys generated by this package may optionally have a 'suffix' encoding an
11 : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
12 : // suffix are performed using integer value, not the byte representation.
13 : package testkeys
14 :
15 : import (
16 : "bytes"
17 : "cmp"
18 : "fmt"
19 : "math"
20 : "regexp"
21 : "strconv"
22 : "strings"
23 :
24 : "github.com/cockroachdb/pebble/internal/base"
25 : "golang.org/x/exp/rand"
26 : )
27 :
28 : const alpha = "abcdefghijklmnopqrstuvwxyz"
29 :
30 : const suffixDelim = '@'
31 :
32 : var inverseAlphabet = make(map[byte]int64, len(alpha))
33 :
34 : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
35 :
36 2 : func init() {
37 2 : for i := range alpha {
38 2 : inverseAlphabet[alpha[i]] = int64(i)
39 2 : }
40 : }
41 :
42 : // MaxSuffixLen is the maximum length of a suffix generated by this package.
43 : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
44 :
45 : // Comparer is the comparer for test keys generated by this package.
46 : var Comparer = &base.Comparer{
47 : CompareSuffixes: compareTimestamps,
48 : Compare: compare,
49 2 : Equal: func(a, b []byte) bool { return compare(a, b) == 0 },
50 2 : AbbreviatedKey: func(k []byte) uint64 {
51 2 : return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
52 2 : },
53 : FormatKey: base.DefaultFormatter,
54 2 : Separator: func(dst, a, b []byte) []byte {
55 2 : ai := split(a)
56 2 : if ai == len(a) {
57 2 : return append(dst, a...)
58 2 : }
59 2 : bi := split(b)
60 2 : if bi == len(b) {
61 2 : return append(dst, a...)
62 2 : }
63 :
64 : // If the keys are the same just return a.
65 2 : if bytes.Equal(a[:ai], b[:bi]) {
66 2 : return append(dst, a...)
67 2 : }
68 2 : n := len(dst)
69 2 : dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
70 2 : // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
71 2 : buf := dst[n:]
72 2 : if bytes.Equal(a[:ai], buf) {
73 2 : return append(dst[:n], a...)
74 2 : }
75 : // The separator is > a[:ai], so return it
76 2 : return dst
77 : },
78 2 : Successor: func(dst, a []byte) []byte {
79 2 : ai := split(a)
80 2 : if ai == len(a) {
81 2 : return append(dst, a...)
82 2 : }
83 2 : n := len(dst)
84 2 : dst = base.DefaultComparer.Successor(dst, a[:ai])
85 2 : // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
86 2 : buf := dst[n:]
87 2 : if bytes.Equal(a[:ai], buf) {
88 0 : return append(dst[:n], a...)
89 0 : }
90 : // The successor is > a[:ai], so return it.
91 2 : return dst
92 : },
93 2 : ImmediateSuccessor: func(dst, a []byte) []byte {
94 2 : // TODO(jackson): Consider changing this Comparer to only support
95 2 : // representable prefix keys containing characters a-z.
96 2 : ai := split(a)
97 2 : if ai != len(a) {
98 0 : panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
99 : }
100 2 : return append(append(dst, a...), 0x00)
101 : },
102 : Split: split,
103 : Name: "pebble.internal.testkeys",
104 : }
105 :
106 : // The comparator is similar to the one in Cockroach; when the prefixes are
107 : // equal:
108 : // - a key without a suffix is smaller than one with a suffix;
109 : // - when both keys have a suffix, the key with the larger (decoded) suffix
110 : // value is smaller.
111 2 : func compare(a, b []byte) int {
112 2 : ai, bi := split(a), split(b)
113 2 : if ai == 0 && len(a) > 0 {
114 0 : panic(fmt.Sprintf("Compare called with bare suffix %s", a))
115 : }
116 2 : if bi == 0 && len(b) > 0 {
117 0 : panic(fmt.Sprintf("Compare called with bare suffix %s", b))
118 : }
119 2 : if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
120 2 : return v
121 2 : }
122 2 : return compareTimestamps(a[ai:], b[bi:])
123 : }
124 :
125 2 : func split(a []byte) int {
126 2 : i := bytes.LastIndexByte(a, suffixDelim)
127 2 : if i == 0 {
128 0 : panic("split called on bare prefix")
129 : }
130 2 : if i > 0 {
131 2 : return i
132 2 : }
133 2 : return len(a)
134 : }
135 :
136 2 : func compareTimestamps(a, b []byte) int {
137 2 : if len(a) == 0 || len(b) == 0 {
138 2 : // The empty suffix sorts first.
139 2 : return cmp.Compare(len(a), len(b))
140 2 : }
141 2 : if a[0] != suffixDelim || b[0] != suffixDelim {
142 0 : panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
143 : }
144 2 : ai, err := parseUintBytes(a[1:], 10, 64)
145 2 : if err != nil {
146 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
147 : }
148 2 : bi, err := parseUintBytes(b[1:], 10, 64)
149 2 : if err != nil {
150 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
151 : }
152 2 : return cmp.Compare(bi, ai)
153 : }
154 :
155 : // Keyspace describes a finite keyspace of unsuffixed test keys.
156 : type Keyspace interface {
157 : // Count returns the number of keys that exist within this keyspace.
158 : Count() int64
159 :
160 : // MaxLen returns the maximum length, in bytes, of a key within this
161 : // keyspace. This is only guaranteed to return an upper bound.
162 : MaxLen() int
163 :
164 : // Slice returns the sub-keyspace from index i, inclusive, to index j,
165 : // exclusive. The receiver is unmodified.
166 : Slice(i, j int64) Keyspace
167 :
168 : // EveryN returns a key space that includes 1 key for every N keys in the
169 : // original keyspace. The receiver is unmodified.
170 : EveryN(n int64) Keyspace
171 :
172 : // key writes the i-th key to the buffer and returns the length.
173 : key(buf []byte, i int64) int
174 : }
175 :
176 : // Divvy divides the provided keyspace into N equal portions, containing
177 : // disjoint keys evenly distributed across the keyspace.
178 1 : func Divvy(ks Keyspace, n int64) []Keyspace {
179 1 : ret := make([]Keyspace, n)
180 1 : for i := int64(0); i < n; i++ {
181 1 : ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
182 1 : }
183 1 : return ret
184 : }
185 :
186 : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
187 : // with at most `maxLength` characters.
188 1 : func Alpha(maxLength int) Keyspace {
189 1 : return alphabet{
190 1 : alphabet: []byte(alpha),
191 1 : maxLength: maxLength,
192 1 : increment: 1,
193 1 : }
194 1 : }
195 :
196 : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
197 : // timestamp t.
198 1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
199 1 : b := make([]byte, k.MaxLen()+MaxSuffixLen)
200 1 : return b[:WriteKeyAt(b, k, i, t)]
201 1 : }
202 :
203 : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
204 : // suffix encoding the timestamp t suffix. It returns the number of bytes
205 : // written.
206 1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
207 1 : n := WriteKey(dst, k, i)
208 1 : n += WriteSuffix(dst[n:], t)
209 1 : return n
210 1 : }
211 :
212 : // Suffix returns the test keys suffix representation of timestamp t.
213 1 : func Suffix(t int64) []byte {
214 1 : b := make([]byte, MaxSuffixLen)
215 1 : return b[:WriteSuffix(b, t)]
216 1 : }
217 :
218 : // SuffixLen returns the exact length of the given suffix when encoded.
219 1 : func SuffixLen(t int64) int {
220 1 : // Begin at 1 for the '@' delimiter, 1 for a single digit.
221 1 : n := 2
222 1 : t /= 10
223 1 : for t > 0 {
224 1 : t /= 10
225 1 : n++
226 1 : }
227 1 : return n
228 : }
229 :
230 : // ParseSuffix returns the integer representation of the encoded suffix.
231 2 : func ParseSuffix(s []byte) (int64, error) {
232 2 : return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
233 2 : }
234 :
235 : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
236 : // returning the number of bytes written.
237 1 : func WriteSuffix(dst []byte, t int64) int {
238 1 : dst[0] = suffixDelim
239 1 : n := 1
240 1 : n += len(strconv.AppendInt(dst[n:n], t, 10))
241 1 : return n
242 1 : }
243 :
244 : // Key returns the i-th unsuffixed key within the keyspace.
245 1 : func Key(k Keyspace, i int64) []byte {
246 1 : b := make([]byte, k.MaxLen())
247 1 : return b[:k.key(b, i)]
248 1 : }
249 :
250 : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
251 : // returns the number of bytes written.
252 1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
253 1 : return k.key(dst, i)
254 1 : }
255 :
256 : type alphabet struct {
257 : alphabet []byte
258 : maxLength int
259 : headSkip int64
260 : tailSkip int64
261 : increment int64
262 : }
263 :
264 1 : func (a alphabet) Count() int64 {
265 1 : // Calculate the total number of keys, ignoring the increment.
266 1 : total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
267 1 :
268 1 : // The increment dictates that we take every N keys, where N = a.increment.
269 1 : // Consider a total containing the 5 keys:
270 1 : // a b c d e
271 1 : // ^ ^ ^
272 1 : // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
273 1 : // dividing by the increment, there may be remainder. If there is, there's
274 1 : // one additional key in the alphabet.
275 1 : count := total / a.increment
276 1 : if total%a.increment > 0 {
277 1 : count++
278 1 : }
279 1 : return count
280 : }
281 :
282 1 : func (a alphabet) MaxLen() int {
283 1 : return a.maxLength
284 1 : }
285 :
286 1 : func (a alphabet) Slice(i, j int64) Keyspace {
287 1 : s := a
288 1 : s.headSkip += i
289 1 : s.tailSkip += a.Count() - j
290 1 : return s
291 1 : }
292 :
293 1 : func (a alphabet) EveryN(n int64) Keyspace {
294 1 : s := a
295 1 : s.increment *= n
296 1 : return s
297 1 : }
298 :
299 1 : func keyCount(n, l int) int64 {
300 1 : // The number of representable keys in the keyspace is a function of the
301 1 : // length of the alphabet n and the max key length l:
302 1 : // n + n^2 + ... + n^l
303 1 : x := int64(1)
304 1 : res := int64(0)
305 1 : for i := 1; i <= l; i++ {
306 1 : if x >= math.MaxInt64/int64(n) {
307 1 : panic("overflow")
308 : }
309 1 : x *= int64(n)
310 1 : res += x
311 1 : if res < 0 {
312 0 : panic("overflow")
313 : }
314 : }
315 1 : return res
316 : }
317 :
318 1 : func (a alphabet) key(buf []byte, idx int64) int {
319 1 : // This function generates keys of length 1..maxKeyLength, pulling
320 1 : // characters from the alphabet. The idx determines which key to generate,
321 1 : // generating the i-th lexicographically next key.
322 1 : //
323 1 : // The index to use is advanced by `headSkip`, allowing a keyspace to encode
324 1 : // a subregion of the keyspace.
325 1 : //
326 1 : // Eg, alphabet = `ab`, maxKeyLength = 3:
327 1 : //
328 1 : // aaa aab aba abb baa bab bba bbb
329 1 : // aa ab ba bb
330 1 : // a b
331 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
332 1 : //
333 1 : return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
334 1 : keyCount(len(a.alphabet), a.maxLength))
335 1 : }
336 :
337 1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
338 1 : if keyCount == 0 || i > keyCount || i < 0 {
339 1 : return 0
340 1 : }
341 :
342 : // Of the keyCount keys in the generative keyspace, how many are there
343 : // starting with a particular character?
344 1 : keysPerCharacter := keyCount / int64(len(alphabet))
345 1 :
346 1 : // Find the character that the key at index i starts with and set it.
347 1 : characterIdx := i / keysPerCharacter
348 1 : buf[0] = alphabet[characterIdx]
349 1 :
350 1 : // Consider characterIdx = 0, pointing to 'a'.
351 1 : //
352 1 : // aaa aab aba abb baa bab bba bbb
353 1 : // aa ab ba bb
354 1 : // a b
355 1 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
356 1 : // \_________________________/
357 1 : // |keysPerCharacter| keys
358 1 : //
359 1 : // In our recursive call, we reduce the problem to:
360 1 : //
361 1 : // aaa aab aba abb
362 1 : // aa ab
363 1 : // 0 1 2 3 4 5
364 1 : // \________________________/
365 1 : // |keysPerCharacter-1| keys
366 1 : //
367 1 : // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
368 1 : // just 'a' key, plus any keys beginning with any other character).
369 1 : //
370 1 : // The index i is also offset, reduced by the count of keys beginning with
371 1 : // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
372 1 : // the key consisting of just the 'a' (-1).
373 1 : i = i - keysPerCharacter*characterIdx - 1
374 1 : return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
375 : }
376 :
377 : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
378 : // returning the index of a particular key, given the provided alphabet and max
379 : // length of a key.
380 : //
381 : // len(key) must be ≥ 1.
382 1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
383 1 : i, ok := alphabet[key[0]]
384 1 : if !ok {
385 0 : panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
386 : }
387 : // How many keys exist that start with the preceding i characters? Each of
388 : // the i characters themselves are a key, plus the count of all the keys
389 : // with one less character for each.
390 1 : ret := i + i*keyCount(len(alphabet), n-1)
391 1 : if len(key) > 1 {
392 1 : ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
393 1 : }
394 1 : return ret
395 : }
396 :
397 : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
398 : // b are prefixes.
399 1 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
400 1 : assertValidPrefix(a)
401 1 : assertValidPrefix(b)
402 1 : assertLess(a, b)
403 1 : commonPrefix := 0
404 1 : for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
405 1 : commonPrefix++
406 1 : }
407 :
408 : // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
409 : // that maxLength cannot be higher than ~13 or we will encounter overflows.
410 1 : maxLength := 4 + rng.Intn(8)
411 1 :
412 1 : // Skip any common prefix (but leave at least one character in each key).
413 1 : skipPrefix := 0
414 1 : for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
415 1 : skipPrefix++
416 1 : }
417 1 : aPiece := a[skipPrefix:]
418 1 : bPiece := b[skipPrefix:]
419 1 : if len(aPiece) > maxLength {
420 1 : // The trimmed prefix is smaller than a; we must be careful below to not
421 1 : // return a key smaller than a.
422 1 : aPiece = aPiece[:maxLength]
423 1 : }
424 1 : if len(bPiece) > maxLength {
425 1 : // The trimmed prefix is smaller than b, so we will still respect the bound.
426 1 : bPiece = bPiece[:maxLength]
427 1 : }
428 1 : assertLess(aPiece, bPiece)
429 1 : apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
430 1 : bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
431 1 : if bpIdx <= apIdx {
432 0 : panic("unreachable")
433 : }
434 1 : generatedIdx := apIdx + rng.Int63n(bpIdx-apIdx)
435 1 : if generatedIdx == apIdx {
436 1 : // Return key a. We handle this separately in case we trimmed aPiece above.
437 1 : return append([]byte(nil), a...)
438 1 : }
439 1 : dst := make([]byte, skipPrefix+maxLength)
440 1 : copy(dst, a[:skipPrefix])
441 1 : pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
442 1 : dst = dst[:skipPrefix+pieceLen]
443 1 : assertLE(a, dst)
444 1 : assertLess(dst, b)
445 1 : return dst
446 : }
447 :
448 1 : func assertValidPrefix(p []byte) {
449 1 : if !prefixRE.Match(p) {
450 0 : panic(fmt.Sprintf("invalid prefix %q", p))
451 : }
452 : }
453 :
454 1 : func assertLess(a, b []byte) {
455 1 : if Comparer.Compare(a, b) >= 0 {
456 0 : panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
457 : }
458 : }
459 :
460 1 : func assertLE(a, b []byte) {
461 1 : if Comparer.Compare(a, b) > 0 {
462 0 : panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
463 : }
464 : }
|