LCOV - code coverage report
Current view: top level - pebble/internal/testkeys - testkeys.go (source / functions) Hit Total Coverage
Test: 2023-12-07 08:16Z b3f359e3 - tests only.lcov Lines: 280 313 89.5 %
Date: 2023-12-07 08:16:26 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package testkeys provides facilities for generating and comparing
       6             : // human-readable test keys for use in tests and benchmarks. This package
       7             : // provides a single Comparer implementation that compares all keys generated
       8             : // by this package.
       9             : //
      10             : // Keys generated by this package may optionally have a 'suffix' encoding an
      11             : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
      12             : // suffix are performed using integer value, not the byte representation.
      13             : package testkeys
      14             : 
      15             : import (
      16             :         "bytes"
      17             :         "cmp"
      18             :         "fmt"
      19             :         "math"
      20             :         "strconv"
      21             :         "strings"
      22             : 
      23             :         "github.com/cockroachdb/pebble/internal/base"
      24             :         "golang.org/x/exp/rand"
      25             : )
      26             : 
      27             : const alpha = "abcdefghijklmnopqrstuvwxyz"
      28             : 
      29             : const suffixDelim = '@'
      30             : 
      31             : var inverseAlphabet = make(map[byte]int64, len(alpha))
      32             : 
      33           1 : func init() {
      34           1 :         for i := range alpha {
      35           1 :                 inverseAlphabet[alpha[i]] = int64(i)
      36           1 :         }
      37             : }
      38             : 
      39             : // MaxSuffixLen is the maximum length of a suffix generated by this package.
      40             : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
      41             : 
      42             : // Comparer is the comparer for test keys generated by this package.
      43             : var Comparer = &base.Comparer{
      44             :         Compare: compare,
      45           1 :         Equal:   func(a, b []byte) bool { return compare(a, b) == 0 },
      46           1 :         AbbreviatedKey: func(k []byte) uint64 {
      47           1 :                 return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
      48           1 :         },
      49             :         FormatKey: base.DefaultFormatter,
      50           1 :         Separator: func(dst, a, b []byte) []byte {
      51           1 :                 ai := split(a)
      52           1 :                 if ai == len(a) {
      53           1 :                         return append(dst, a...)
      54           1 :                 }
      55           1 :                 bi := split(b)
      56           1 :                 if bi == len(b) {
      57           1 :                         return append(dst, a...)
      58           1 :                 }
      59             : 
      60             :                 // If the keys are the same just return a.
      61           1 :                 if bytes.Equal(a[:ai], b[:bi]) {
      62           1 :                         return append(dst, a...)
      63           1 :                 }
      64           1 :                 n := len(dst)
      65           1 :                 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
      66           1 :                 // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
      67           1 :                 buf := dst[n:]
      68           1 :                 if bytes.Equal(a[:ai], buf) {
      69           1 :                         return append(dst[:n], a...)
      70           1 :                 }
      71             :                 // The separator is > a[:ai], so return it
      72           1 :                 return dst
      73             :         },
      74           1 :         Successor: func(dst, a []byte) []byte {
      75           1 :                 ai := split(a)
      76           1 :                 if ai == len(a) {
      77           1 :                         return append(dst, a...)
      78           1 :                 }
      79           1 :                 n := len(dst)
      80           1 :                 dst = base.DefaultComparer.Successor(dst, a[:ai])
      81           1 :                 // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
      82           1 :                 buf := dst[n:]
      83           1 :                 if bytes.Equal(a[:ai], buf) {
      84           0 :                         return append(dst[:n], a...)
      85           0 :                 }
      86             :                 // The successor is > a[:ai], so return it.
      87           1 :                 return dst
      88             :         },
      89           1 :         ImmediateSuccessor: func(dst, a []byte) []byte {
      90           1 :                 // TODO(jackson): Consider changing this Comparer to only support
      91           1 :                 // representable prefix keys containing characters a-z.
      92           1 :                 ai := split(a)
      93           1 :                 if ai != len(a) {
      94           0 :                         panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
      95             :                 }
      96           1 :                 return append(append(dst, a...), 0x00)
      97             :         },
      98             :         Split: split,
      99             :         Name:  "pebble.internal.testkeys",
     100             : }
     101             : 
     102           1 : func compare(a, b []byte) int {
     103           1 :         ai, bi := split(a), split(b)
     104           1 :         if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
     105           1 :                 return v
     106           1 :         }
     107             : 
     108           1 :         if len(a[ai:]) == 0 {
     109           1 :                 if len(b[bi:]) == 0 {
     110           1 :                         return 0
     111           1 :                 }
     112           1 :                 return -1
     113           1 :         } else if len(b[bi:]) == 0 {
     114           1 :                 return +1
     115           1 :         }
     116           1 :         return compareTimestamps(a[ai:], b[bi:])
     117             : }
     118             : 
     119           1 : func split(a []byte) int {
     120           1 :         i := bytes.LastIndexByte(a, suffixDelim)
     121           1 :         if i >= 0 {
     122           1 :                 return i
     123           1 :         }
     124           1 :         return len(a)
     125             : }
     126             : 
     127           1 : func compareTimestamps(a, b []byte) int {
     128           1 :         ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64)
     129           1 :         if err != nil {
     130           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
     131             :         }
     132           1 :         bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64)
     133           1 :         if err != nil {
     134           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
     135             :         }
     136           1 :         return cmp.Compare(bi, ai)
     137             : }
     138             : 
     139             : // Keyspace describes a finite keyspace of unsuffixed test keys.
     140             : type Keyspace interface {
     141             :         // Count returns the number of keys that exist within this keyspace.
     142             :         Count() int64
     143             : 
     144             :         // MaxLen returns the maximum length, in bytes, of a key within this
     145             :         // keyspace. This is only guaranteed to return an upper bound.
     146             :         MaxLen() int
     147             : 
     148             :         // Slice returns the sub-keyspace from index i, inclusive, to index j,
     149             :         // exclusive. The receiver is unmodified.
     150             :         Slice(i, j int64) Keyspace
     151             : 
     152             :         // EveryN returns a key space that includes 1 key for every N keys in the
     153             :         // original keyspace. The receiver is unmodified.
     154             :         EveryN(n int64) Keyspace
     155             : 
     156             :         // key writes the i-th key to the buffer and returns the length.
     157             :         key(buf []byte, i int64) int
     158             : }
     159             : 
     160             : // Divvy divides the provided keyspace into N equal portions, containing
     161             : // disjoint keys evenly distributed across the keyspace.
     162           1 : func Divvy(ks Keyspace, n int64) []Keyspace {
     163           1 :         ret := make([]Keyspace, n)
     164           1 :         for i := int64(0); i < n; i++ {
     165           1 :                 ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
     166           1 :         }
     167           1 :         return ret
     168             : }
     169             : 
     170             : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
     171             : // with at most `maxLength` characters.
     172           1 : func Alpha(maxLength int) Keyspace {
     173           1 :         return alphabet{
     174           1 :                 alphabet:  []byte(alpha),
     175           1 :                 maxLength: maxLength,
     176           1 :                 increment: 1,
     177           1 :         }
     178           1 : }
     179             : 
     180             : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
     181             : // timestamp t.
     182           1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
     183           1 :         b := make([]byte, k.MaxLen()+MaxSuffixLen)
     184           1 :         return b[:WriteKeyAt(b, k, i, t)]
     185           1 : }
     186             : 
     187             : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
     188             : // suffix encoding the timestamp t suffix. It returns the number of bytes
     189             : // written.
     190           1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
     191           1 :         n := WriteKey(dst, k, i)
     192           1 :         n += WriteSuffix(dst[n:], t)
     193           1 :         return n
     194           1 : }
     195             : 
     196             : // Suffix returns the test keys suffix representation of timestamp t.
     197           1 : func Suffix(t int64) []byte {
     198           1 :         b := make([]byte, MaxSuffixLen)
     199           1 :         return b[:WriteSuffix(b, t)]
     200           1 : }
     201             : 
     202             : // SuffixLen returns the exact length of the given suffix when encoded.
     203           1 : func SuffixLen(t int64) int {
     204           1 :         // Begin at 1 for the '@' delimiter, 1 for a single digit.
     205           1 :         n := 2
     206           1 :         t /= 10
     207           1 :         for t > 0 {
     208           1 :                 t /= 10
     209           1 :                 n++
     210           1 :         }
     211           1 :         return n
     212             : }
     213             : 
     214             : // ParseSuffix returns the integer representation of the encoded suffix.
     215           1 : func ParseSuffix(s []byte) (int64, error) {
     216           1 :         return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
     217           1 : }
     218             : 
     219             : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
     220             : // returning the number of bytes written.
     221           1 : func WriteSuffix(dst []byte, t int64) int {
     222           1 :         dst[0] = suffixDelim
     223           1 :         n := 1
     224           1 :         n += len(strconv.AppendInt(dst[n:n], t, 10))
     225           1 :         return n
     226           1 : }
     227             : 
     228             : // Key returns the i-th unsuffixed key within the keyspace.
     229           1 : func Key(k Keyspace, i int64) []byte {
     230           1 :         b := make([]byte, k.MaxLen())
     231           1 :         return b[:k.key(b, i)]
     232           1 : }
     233             : 
     234             : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
     235             : // returns the number of bytes written.
     236           1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
     237           1 :         return k.key(dst, i)
     238           1 : }
     239             : 
     240             : type alphabet struct {
     241             :         alphabet  []byte
     242             :         maxLength int
     243             :         headSkip  int64
     244             :         tailSkip  int64
     245             :         increment int64
     246             : }
     247             : 
     248           1 : func (a alphabet) Count() int64 {
     249           1 :         // Calculate the total number of keys, ignoring the increment.
     250           1 :         total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
     251           1 : 
     252           1 :         // The increment dictates that we take every N keys, where N = a.increment.
     253           1 :         // Consider a total containing the 5 keys:
     254           1 :         //   a  b  c  d  e
     255           1 :         //   ^     ^     ^
     256           1 :         // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
     257           1 :         // dividing by the increment, there may be remainder. If there is, there's
     258           1 :         // one additional key in the alphabet.
     259           1 :         count := total / a.increment
     260           1 :         if total%a.increment > 0 {
     261           1 :                 count++
     262           1 :         }
     263           1 :         return count
     264             : }
     265             : 
     266           1 : func (a alphabet) MaxLen() int {
     267           1 :         return a.maxLength
     268           1 : }
     269             : 
     270           1 : func (a alphabet) Slice(i, j int64) Keyspace {
     271           1 :         s := a
     272           1 :         s.headSkip += i
     273           1 :         s.tailSkip += a.Count() - j
     274           1 :         return s
     275           1 : }
     276             : 
     277           1 : func (a alphabet) EveryN(n int64) Keyspace {
     278           1 :         s := a
     279           1 :         s.increment *= n
     280           1 :         return s
     281           1 : }
     282             : 
     283           1 : func keyCount(n, l int) int64 {
     284           1 :         if n == 0 {
     285           0 :                 return 0
     286           1 :         } else if n == 1 {
     287           0 :                 return int64(l)
     288           0 :         }
     289             :         // The number of representable keys in the keyspace is a function of the
     290             :         // length of the alphabet n and the max key length l. Consider how the
     291             :         // number of representable keys grows as l increases:
     292             :         //
     293             :         // l = 1: n
     294             :         // l = 2: n + n^2
     295             :         // l = 3: n + n^2 + n^3
     296             :         // ...
     297             :         // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1)
     298           1 :         return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1)
     299             : }
     300             : 
     301           1 : func (a alphabet) key(buf []byte, idx int64) int {
     302           1 :         // This function generates keys of length 1..maxKeyLength, pulling
     303           1 :         // characters from the alphabet. The idx determines which key to generate,
     304           1 :         // generating the i-th lexicographically next key.
     305           1 :         //
     306           1 :         // The index to use is advanced by `headSkip`, allowing a keyspace to encode
     307           1 :         // a subregion of the keyspace.
     308           1 :         //
     309           1 :         // Eg, alphabet = `ab`, maxKeyLength = 3:
     310           1 :         //
     311           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     312           1 :         //       aa          ab              ba          bb
     313           1 :         //   a                           b
     314           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     315           1 :         //
     316           1 :         return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
     317           1 :                 keyCount(len(a.alphabet), a.maxLength))
     318           1 : }
     319             : 
     320           1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
     321           1 :         if keyCount == 0 || i > keyCount || i < 0 {
     322           1 :                 return 0
     323           1 :         }
     324             : 
     325             :         // Of the keyCount keys in the generative keyspace, how many are there
     326             :         // starting with a particular character?
     327           1 :         keysPerCharacter := keyCount / int64(len(alphabet))
     328           1 : 
     329           1 :         // Find the character that the key at index i starts with and set it.
     330           1 :         characterIdx := i / keysPerCharacter
     331           1 :         buf[0] = alphabet[characterIdx]
     332           1 : 
     333           1 :         // Consider characterIdx = 0, pointing to 'a'.
     334           1 :         //
     335           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     336           1 :         //       aa          ab              ba          bb
     337           1 :         //   a                           b
     338           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     339           1 :         //  \_________________________/
     340           1 :         //    |keysPerCharacter| keys
     341           1 :         //
     342           1 :         // In our recursive call, we reduce the problem to:
     343           1 :         //
     344           1 :         //           aaa aab     aba abb
     345           1 :         //       aa          ab
     346           1 :         //       0   1   2   3   4   5
     347           1 :         //     \________________________/
     348           1 :         //    |keysPerCharacter-1| keys
     349           1 :         //
     350           1 :         // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
     351           1 :         // just 'a' key, plus any keys beginning with any other character).
     352           1 :         //
     353           1 :         // The index i is also offset, reduced by the count of keys beginning with
     354           1 :         // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
     355           1 :         // the key consisting of just the 'a' (-1).
     356           1 :         i = i - keysPerCharacter*characterIdx - 1
     357           1 :         return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
     358             : }
     359             : 
     360             : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
     361             : // returning the index of a particular key, given the provided alphabet and max
     362             : // length of a key.
     363             : //
     364             : // len(key) must be ≥ 1.
     365           1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
     366           1 :         i, ok := alphabet[key[0]]
     367           1 :         if !ok {
     368           0 :                 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
     369             :         }
     370             :         // How many keys exist that start with the preceding i characters? Each of
     371             :         // the i characters themselves are a key, plus the count of all the keys
     372             :         // with one less character for each.
     373           1 :         ret := i + i*keyCount(len(alphabet), n-1)
     374           1 :         if len(key) > 1 {
     375           1 :                 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
     376           1 :         }
     377           1 :         return ret
     378             : }
     379             : 
     380           1 : func abs(a int64) int64 {
     381           1 :         if a < 0 {
     382           1 :                 return -a
     383           1 :         }
     384           0 :         return a
     385             : }
     386             : 
     387             : // RandomSeparator returns a random alphabetic key k such that a < k < b,
     388             : // pulling randomness from the provided random number generator. If dst is
     389             : // provided and the generated key fits within dst's capacity, the returned slice
     390             : // will use dst's memory.
     391             : //
     392             : // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key
     393             : // will consist of the prefix P appended with the provided suffix. A zero suffix
     394             : // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will
     395             : // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b,
     396             : // but the generated key will not use the provided suffix. Note that it's
     397             : // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case
     398             : // RandomSeparator returns nil.
     399             : //
     400             : // If RandomSeparator generates a new prefix, the generated prefix will have
     401             : // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))).
     402             : //
     403             : // RandomSeparator panics if a or b fails to decode.
     404           1 : func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte {
     405           1 :         if Comparer.Compare(a, b) >= 0 {
     406           0 :                 return nil
     407           0 :         }
     408             : 
     409             :         // Determine both keys' logical prefixes and suffixes.
     410           1 :         ai := Comparer.Split(a)
     411           1 :         bi := Comparer.Split(b)
     412           1 :         ap := a[:ai]
     413           1 :         bp := b[:bi]
     414           1 :         maxLength = max(maxLength, len(ap), len(bp))
     415           1 :         var as, bs int64
     416           1 :         var err error
     417           1 :         if ai != len(a) {
     418           1 :                 as, err = ParseSuffix(a[ai:])
     419           1 :                 if err != nil {
     420           0 :                         panic(fmt.Sprintf("failed to parse suffix of %q", a))
     421             :                 }
     422             :         }
     423           1 :         if bi != len(b) {
     424           1 :                 bs, err = ParseSuffix(b[bi:])
     425           1 :                 if err != nil {
     426           0 :                         panic(fmt.Sprintf("failed to parse suffix of %q", b))
     427             :                 }
     428             :         }
     429             : 
     430           1 :         apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength)
     431           1 :         bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength)
     432           1 :         diff := bpIdx - apIdx
     433           1 :         generatedIdx := bpIdx
     434           1 :         if diff > 0 {
     435           1 :                 var add int64 = diff + 1
     436           1 :                 var start int64 = apIdx
     437           1 :                 if as == 1 {
     438           1 :                         // There's no expressible key with prefix a greater than a@1. So,
     439           1 :                         // exclude ap.
     440           1 :                         start = apIdx + 1
     441           1 :                         add = diff
     442           1 :                 }
     443           1 :                 if bs == 0 {
     444           1 :                         // No key with prefix b can sort before b@0. We don't want to pick b.
     445           1 :                         add--
     446           1 :                 }
     447             :                 // We're allowing generated id to be in the range [start, start + add - 1].
     448           1 :                 if start > start+add-1 {
     449           0 :                         return nil
     450           0 :                 }
     451             :                 // If we can generate a key which is actually in the middle of apIdx
     452             :                 // and bpIdx use it so that we don't have to bother about timestamps.
     453           1 :                 generatedIdx = rng.Int63n(add) + start
     454           1 :                 for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx {
     455           1 :                         generatedIdx = rng.Int63n(add) + start
     456           1 :                 }
     457             :         }
     458             : 
     459           1 :         switch {
     460           1 :         case generatedIdx == apIdx && generatedIdx == bpIdx:
     461           1 :                 if abs(bs-as) <= 1 {
     462           1 :                         // There's no expressible suffix between the two, and there's no
     463           1 :                         // possible separator key.
     464           1 :                         return nil
     465           1 :                 }
     466             :                 // The key b is >= key a, but has the same prefix, so b must have the
     467             :                 // smaller timestamp, unless a has timestamp of 0.
     468             :                 //
     469             :                 // NB: The zero suffix (suffix-less) sorts before all other suffixes, so
     470             :                 // any suffix we generate will be greater than it.
     471           0 :                 if as == 0 {
     472           0 :                         // bs > as
     473           0 :                         suffix = bs + rng.Int63n(10) + 1
     474           0 :                 } else {
     475           0 :                         // bs < as.
     476           0 :                         // Generate suffix in range [bs + 1, as - 1]
     477           0 :                         suffix = bs + 1 + rng.Int63n(as-bs-1)
     478           0 :                 }
     479           1 :         case generatedIdx == apIdx:
     480           1 :                 // NB: The zero suffix (suffix-less) sorts before all other suffixes, so
     481           1 :                 // any suffix we generate will be greater than it.
     482           1 :                 if as == 0 && suffix == 0 {
     483           0 :                         suffix++
     484           1 :                 } else if as != 0 && suffix >= as {
     485           0 :                         suffix = rng.Int63n(as)
     486           0 :                 }
     487           0 :         case generatedIdx == bpIdx:
     488           0 :                 if suffix <= bs {
     489           0 :                         suffix = bs + rng.Int63n(10) + 1
     490           0 :                 }
     491             :         }
     492           1 :         if sz := maxLength + SuffixLen(suffix); cap(dst) < sz {
     493           1 :                 dst = make([]byte, sz)
     494           1 :         } else {
     495           0 :                 dst = dst[:cap(dst)]
     496           0 :         }
     497           1 :         var w int
     498           1 :         if suffix == 0 {
     499           1 :                 w = WriteKey(dst, Alpha(maxLength), generatedIdx)
     500           1 :         } else {
     501           1 :                 w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix)
     502           1 :         }
     503           1 :         return dst[:w]
     504             : }

Generated by: LCOV version 1.14