Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package testkeys provides facilities for generating and comparing
6 : // human-readable test keys for use in tests and benchmarks. This package
7 : // provides a single Comparer implementation that compares all keys generated
8 : // by this package.
9 : //
10 : // Keys generated by this package may optionally have a 'suffix' encoding an
11 : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
12 : // suffix are performed using integer value, not the byte representation.
13 : package testkeys
14 :
15 : import (
16 : "bytes"
17 : "cmp"
18 : "fmt"
19 : "math"
20 : "regexp"
21 : "strconv"
22 : "strings"
23 :
24 : "github.com/cockroachdb/pebble/internal/base"
25 : "golang.org/x/exp/rand"
26 : )
27 :
28 : const alpha = "abcdefghijklmnopqrstuvwxyz"
29 :
30 : const suffixDelim = '@'
31 :
32 : var inverseAlphabet = make(map[byte]int64, len(alpha))
33 :
34 : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
35 :
36 1 : func init() {
37 1 : for i := range alpha {
38 1 : inverseAlphabet[alpha[i]] = int64(i)
39 1 : }
40 : }
41 :
42 : // MaxSuffixLen is the maximum length of a suffix generated by this package.
43 : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
44 :
45 : // Comparer is the comparer for test keys generated by this package.
46 : var Comparer = &base.Comparer{
47 : Compare: compare,
48 1 : Equal: func(a, b []byte) bool { return compare(a, b) == 0 },
49 1 : AbbreviatedKey: func(k []byte) uint64 {
50 1 : return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
51 1 : },
52 : FormatKey: base.DefaultFormatter,
53 1 : Separator: func(dst, a, b []byte) []byte {
54 1 : ai := split(a)
55 1 : if ai == len(a) {
56 1 : return append(dst, a...)
57 1 : }
58 1 : bi := split(b)
59 1 : if bi == len(b) {
60 1 : return append(dst, a...)
61 1 : }
62 :
63 : // If the keys are the same just return a.
64 1 : if bytes.Equal(a[:ai], b[:bi]) {
65 1 : return append(dst, a...)
66 1 : }
67 1 : n := len(dst)
68 1 : dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
69 1 : // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
70 1 : buf := dst[n:]
71 1 : if bytes.Equal(a[:ai], buf) {
72 1 : return append(dst[:n], a...)
73 1 : }
74 : // The separator is > a[:ai], so return it
75 1 : return dst
76 : },
77 1 : Successor: func(dst, a []byte) []byte {
78 1 : ai := split(a)
79 1 : if ai == len(a) {
80 1 : return append(dst, a...)
81 1 : }
82 1 : n := len(dst)
83 1 : dst = base.DefaultComparer.Successor(dst, a[:ai])
84 1 : // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
85 1 : buf := dst[n:]
86 1 : if bytes.Equal(a[:ai], buf) {
87 0 : return append(dst[:n], a...)
88 0 : }
89 : // The successor is > a[:ai], so return it.
90 1 : return dst
91 : },
92 1 : ImmediateSuccessor: func(dst, a []byte) []byte {
93 1 : // TODO(jackson): Consider changing this Comparer to only support
94 1 : // representable prefix keys containing characters a-z.
95 1 : ai := split(a)
96 1 : if ai != len(a) {
97 0 : panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
98 : }
99 1 : return append(append(dst, a...), 0x00)
100 : },
101 : Split: split,
102 : Name: "pebble.internal.testkeys",
103 : }
104 :
105 : // The comparator is similar to the one in Cockroach; when the prefixes are
106 : // equal:
107 : // - a key without a suffix is smaller than one with a suffix;
108 : // - when both keys have a suffix, the key with the larger (decoded) suffix
109 : // value is smaller.
110 1 : func compare(a, b []byte) int {
111 1 : ai, bi := split(a), split(b)
112 1 : if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
113 1 : return v
114 1 : }
115 :
116 1 : if len(a[ai:]) == 0 {
117 1 : if len(b[bi:]) == 0 {
118 1 : return 0
119 1 : }
120 1 : return -1
121 1 : } else if len(b[bi:]) == 0 {
122 1 : return +1
123 1 : }
124 1 : return compareTimestamps(a[ai:], b[bi:])
125 : }
126 :
127 1 : func split(a []byte) int {
128 1 : i := bytes.LastIndexByte(a, suffixDelim)
129 1 : if i >= 0 {
130 1 : return i
131 1 : }
132 1 : return len(a)
133 : }
134 :
135 1 : func compareTimestamps(a, b []byte) int {
136 1 : ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64)
137 1 : if err != nil {
138 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
139 : }
140 1 : bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64)
141 1 : if err != nil {
142 0 : panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
143 : }
144 1 : return cmp.Compare(bi, ai)
145 : }
146 :
147 : // Keyspace describes a finite keyspace of unsuffixed test keys.
148 : type Keyspace interface {
149 : // Count returns the number of keys that exist within this keyspace.
150 : Count() int64
151 :
152 : // MaxLen returns the maximum length, in bytes, of a key within this
153 : // keyspace. This is only guaranteed to return an upper bound.
154 : MaxLen() int
155 :
156 : // Slice returns the sub-keyspace from index i, inclusive, to index j,
157 : // exclusive. The receiver is unmodified.
158 : Slice(i, j int64) Keyspace
159 :
160 : // EveryN returns a key space that includes 1 key for every N keys in the
161 : // original keyspace. The receiver is unmodified.
162 : EveryN(n int64) Keyspace
163 :
164 : // key writes the i-th key to the buffer and returns the length.
165 : key(buf []byte, i int64) int
166 : }
167 :
168 : // Divvy divides the provided keyspace into N equal portions, containing
169 : // disjoint keys evenly distributed across the keyspace.
170 0 : func Divvy(ks Keyspace, n int64) []Keyspace {
171 0 : ret := make([]Keyspace, n)
172 0 : for i := int64(0); i < n; i++ {
173 0 : ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
174 0 : }
175 0 : return ret
176 : }
177 :
178 : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
179 : // with at most `maxLength` characters.
180 0 : func Alpha(maxLength int) Keyspace {
181 0 : return alphabet{
182 0 : alphabet: []byte(alpha),
183 0 : maxLength: maxLength,
184 0 : increment: 1,
185 0 : }
186 0 : }
187 :
188 : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
189 : // timestamp t.
190 0 : func KeyAt(k Keyspace, i int64, t int64) []byte {
191 0 : b := make([]byte, k.MaxLen()+MaxSuffixLen)
192 0 : return b[:WriteKeyAt(b, k, i, t)]
193 0 : }
194 :
195 : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
196 : // suffix encoding the timestamp t suffix. It returns the number of bytes
197 : // written.
198 0 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
199 0 : n := WriteKey(dst, k, i)
200 0 : n += WriteSuffix(dst[n:], t)
201 0 : return n
202 0 : }
203 :
204 : // Suffix returns the test keys suffix representation of timestamp t.
205 0 : func Suffix(t int64) []byte {
206 0 : b := make([]byte, MaxSuffixLen)
207 0 : return b[:WriteSuffix(b, t)]
208 0 : }
209 :
210 : // SuffixLen returns the exact length of the given suffix when encoded.
211 0 : func SuffixLen(t int64) int {
212 0 : // Begin at 1 for the '@' delimiter, 1 for a single digit.
213 0 : n := 2
214 0 : t /= 10
215 0 : for t > 0 {
216 0 : t /= 10
217 0 : n++
218 0 : }
219 0 : return n
220 : }
221 :
222 : // ParseSuffix returns the integer representation of the encoded suffix.
223 1 : func ParseSuffix(s []byte) (int64, error) {
224 1 : return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
225 1 : }
226 :
227 : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
228 : // returning the number of bytes written.
229 0 : func WriteSuffix(dst []byte, t int64) int {
230 0 : dst[0] = suffixDelim
231 0 : n := 1
232 0 : n += len(strconv.AppendInt(dst[n:n], t, 10))
233 0 : return n
234 0 : }
235 :
236 : // Key returns the i-th unsuffixed key within the keyspace.
237 0 : func Key(k Keyspace, i int64) []byte {
238 0 : b := make([]byte, k.MaxLen())
239 0 : return b[:k.key(b, i)]
240 0 : }
241 :
242 : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
243 : // returns the number of bytes written.
244 0 : func WriteKey(dst []byte, k Keyspace, i int64) int {
245 0 : return k.key(dst, i)
246 0 : }
247 :
248 : type alphabet struct {
249 : alphabet []byte
250 : maxLength int
251 : headSkip int64
252 : tailSkip int64
253 : increment int64
254 : }
255 :
256 0 : func (a alphabet) Count() int64 {
257 0 : // Calculate the total number of keys, ignoring the increment.
258 0 : total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
259 0 :
260 0 : // The increment dictates that we take every N keys, where N = a.increment.
261 0 : // Consider a total containing the 5 keys:
262 0 : // a b c d e
263 0 : // ^ ^ ^
264 0 : // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
265 0 : // dividing by the increment, there may be remainder. If there is, there's
266 0 : // one additional key in the alphabet.
267 0 : count := total / a.increment
268 0 : if total%a.increment > 0 {
269 0 : count++
270 0 : }
271 0 : return count
272 : }
273 :
274 0 : func (a alphabet) MaxLen() int {
275 0 : return a.maxLength
276 0 : }
277 :
278 0 : func (a alphabet) Slice(i, j int64) Keyspace {
279 0 : s := a
280 0 : s.headSkip += i
281 0 : s.tailSkip += a.Count() - j
282 0 : return s
283 0 : }
284 :
285 0 : func (a alphabet) EveryN(n int64) Keyspace {
286 0 : s := a
287 0 : s.increment *= n
288 0 : return s
289 0 : }
290 :
291 0 : func keyCount(n, l int) int64 {
292 0 : // The number of representable keys in the keyspace is a function of the
293 0 : // length of the alphabet n and the max key length l:
294 0 : // n + n^2 + ... + n^l
295 0 : x := int64(1)
296 0 : res := int64(0)
297 0 : for i := 1; i <= l; i++ {
298 0 : if x >= math.MaxInt64/int64(n) {
299 0 : panic("overflow")
300 : }
301 0 : x *= int64(n)
302 0 : res += x
303 0 : if res < 0 {
304 0 : panic("overflow")
305 : }
306 : }
307 0 : return res
308 : }
309 :
310 0 : func (a alphabet) key(buf []byte, idx int64) int {
311 0 : // This function generates keys of length 1..maxKeyLength, pulling
312 0 : // characters from the alphabet. The idx determines which key to generate,
313 0 : // generating the i-th lexicographically next key.
314 0 : //
315 0 : // The index to use is advanced by `headSkip`, allowing a keyspace to encode
316 0 : // a subregion of the keyspace.
317 0 : //
318 0 : // Eg, alphabet = `ab`, maxKeyLength = 3:
319 0 : //
320 0 : // aaa aab aba abb baa bab bba bbb
321 0 : // aa ab ba bb
322 0 : // a b
323 0 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
324 0 : //
325 0 : return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
326 0 : keyCount(len(a.alphabet), a.maxLength))
327 0 : }
328 :
329 0 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
330 0 : if keyCount == 0 || i > keyCount || i < 0 {
331 0 : return 0
332 0 : }
333 :
334 : // Of the keyCount keys in the generative keyspace, how many are there
335 : // starting with a particular character?
336 0 : keysPerCharacter := keyCount / int64(len(alphabet))
337 0 :
338 0 : // Find the character that the key at index i starts with and set it.
339 0 : characterIdx := i / keysPerCharacter
340 0 : buf[0] = alphabet[characterIdx]
341 0 :
342 0 : // Consider characterIdx = 0, pointing to 'a'.
343 0 : //
344 0 : // aaa aab aba abb baa bab bba bbb
345 0 : // aa ab ba bb
346 0 : // a b
347 0 : // 0 1 2 3 4 5 6 7 8 9 10 11 12 13
348 0 : // \_________________________/
349 0 : // |keysPerCharacter| keys
350 0 : //
351 0 : // In our recursive call, we reduce the problem to:
352 0 : //
353 0 : // aaa aab aba abb
354 0 : // aa ab
355 0 : // 0 1 2 3 4 5
356 0 : // \________________________/
357 0 : // |keysPerCharacter-1| keys
358 0 : //
359 0 : // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
360 0 : // just 'a' key, plus any keys beginning with any other character).
361 0 : //
362 0 : // The index i is also offset, reduced by the count of keys beginning with
363 0 : // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
364 0 : // the key consisting of just the 'a' (-1).
365 0 : i = i - keysPerCharacter*characterIdx - 1
366 0 : return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
367 : }
368 :
369 : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
370 : // returning the index of a particular key, given the provided alphabet and max
371 : // length of a key.
372 : //
373 : // len(key) must be ≥ 1.
374 0 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
375 0 : i, ok := alphabet[key[0]]
376 0 : if !ok {
377 0 : panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
378 : }
379 : // How many keys exist that start with the preceding i characters? Each of
380 : // the i characters themselves are a key, plus the count of all the keys
381 : // with one less character for each.
382 0 : ret := i + i*keyCount(len(alphabet), n-1)
383 0 : if len(key) > 1 {
384 0 : ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
385 0 : }
386 0 : return ret
387 : }
388 :
389 : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
390 : // b are prefixes.
391 0 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
392 0 : assertValidPrefix(a)
393 0 : assertValidPrefix(b)
394 0 : assertLess(a, b)
395 0 : commonPrefix := 0
396 0 : for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
397 0 : commonPrefix++
398 0 : }
399 :
400 : // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
401 : // that maxLength cannot be higher than ~13 or we will encounter overflows.
402 0 : maxLength := 4 + rng.Intn(8)
403 0 :
404 0 : // Skip any common prefix (but leave at least one character in each key).
405 0 : skipPrefix := 0
406 0 : for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
407 0 : skipPrefix++
408 0 : }
409 0 : aPiece := a[skipPrefix:]
410 0 : bPiece := b[skipPrefix:]
411 0 : if len(aPiece) > maxLength {
412 0 : // The trimmed prefix is smaller than a; we must be careful below to not
413 0 : // return a key smaller than a.
414 0 : aPiece = aPiece[:maxLength]
415 0 : }
416 0 : if len(bPiece) > maxLength {
417 0 : // The trimmed prefix is smaller than b, so we will still respect the bound.
418 0 : bPiece = bPiece[:maxLength]
419 0 : }
420 0 : assertLess(aPiece, bPiece)
421 0 : apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
422 0 : bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
423 0 : if bpIdx <= apIdx {
424 0 : panic("unreachable")
425 : }
426 0 : generatedIdx := apIdx + rng.Int63n(bpIdx-apIdx)
427 0 : if generatedIdx == apIdx {
428 0 : // Return key a. We handle this separately in case we trimmed aPiece above.
429 0 : return append([]byte(nil), a...)
430 0 : }
431 0 : dst := make([]byte, skipPrefix+maxLength)
432 0 : copy(dst, a[:skipPrefix])
433 0 : pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
434 0 : dst = dst[:skipPrefix+pieceLen]
435 0 : assertLE(a, dst)
436 0 : assertLess(dst, b)
437 0 : return dst
438 : }
439 :
440 0 : func assertValidPrefix(p []byte) {
441 0 : if !prefixRE.Match(p) {
442 0 : panic(fmt.Sprintf("invalid prefix %q", p))
443 : }
444 : }
445 :
446 0 : func assertLess(a, b []byte) {
447 0 : if Comparer.Compare(a, b) >= 0 {
448 0 : panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
449 : }
450 : }
451 :
452 0 : func assertLE(a, b []byte) {
453 0 : if Comparer.Compare(a, b) > 0 {
454 0 : panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
455 : }
456 : }
|