LCOV - 2024-08-03 08:16Z cda4471a - tests + meta.lcov - pebble/internal/testkeys/testkeys.go

LCOV - code coverage report

Current view:	top level - pebble/internal/testkeys - testkeys.go (source / functions)		Hit	Total	Coverage
Test:	2024-08-03 08:16Z cda4471a - tests + meta.lcov	Lines:	274	289	94.8 %
Date:	2024-08-03 08:17:19	Functions:	0	0	-

          Line data    Source code

       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package testkeys provides facilities for generating and comparing
       6             : // human-readable test keys for use in tests and benchmarks. This package
       7             : // provides a single Comparer implementation that compares all keys generated
       8             : // by this package.
       9             : //
      10             : // Keys generated by this package may optionally have a 'suffix' encoding an
      11             : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
      12             : // suffix are performed using integer value, not the byte representation.
      13             : package testkeys
      14             : 
      15             : import (
      16             :         "bytes"
      17             :         "cmp"
      18             :         "fmt"
      19             :         "math"
      20             :         "regexp"
      21             :         "strconv"
      22             :         "strings"
      23             : 
      24             :         "github.com/cockroachdb/pebble/internal/base"
      25             :         "golang.org/x/exp/rand"
      26             : )
      27             : 
      28             : const alpha = "abcdefghijklmnopqrstuvwxyz"
      29             : 
      30             : const suffixDelim = '@'
      31             : 
      32             : var inverseAlphabet = make(map[byte]int64, len(alpha))
      33             : 
      34             : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
      35             : 
      36           2 : func init() {
      37           2 :         for i := range alpha {
      38           2 :                 inverseAlphabet[alpha[i]] = int64(i)
      39           2 :         }
      40             : }
      41             : 
      42             : // MaxSuffixLen is the maximum length of a suffix generated by this package.
      43             : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
      44             : 
      45             : // Comparer is the comparer for test keys generated by this package.
      46             : var Comparer = &base.Comparer{
      47             :         CompareSuffixes: compareTimestamps,
      48             :         Compare:         compare,
      49           2 :         Equal:           func(a, b []byte) bool { return compare(a, b) == 0 },
      50           2 :         AbbreviatedKey: func(k []byte) uint64 {
      51           2 :                 return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
      52           2 :         },
      53             :         FormatKey: base.DefaultFormatter,
      54           2 :         Separator: func(dst, a, b []byte) []byte {
      55           2 :                 ai := split(a)
      56           2 :                 if ai == len(a) {
      57           2 :                         return append(dst, a...)
      58           2 :                 }
      59           2 :                 bi := split(b)
      60           2 :                 if bi == len(b) {
      61           2 :                         return append(dst, a...)
      62           2 :                 }
      63             : 
      64             :                 // If the keys are the same just return a.
      65           2 :                 if bytes.Equal(a[:ai], b[:bi]) {
      66           2 :                         return append(dst, a...)
      67           2 :                 }
      68           2 :                 n := len(dst)
      69           2 :                 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
      70           2 :                 // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
      71           2 :                 buf := dst[n:]
      72           2 :                 if bytes.Equal(a[:ai], buf) {
      73           2 :                         return append(dst[:n], a...)
      74           2 :                 }
      75             :                 // The separator is > a[:ai], so return it
      76           2 :                 return dst
      77             :         },
      78           2 :         Successor: func(dst, a []byte) []byte {
      79           2 :                 ai := split(a)
      80           2 :                 if ai == len(a) {
      81           2 :                         return append(dst, a...)
      82           2 :                 }
      83           2 :                 n := len(dst)
      84           2 :                 dst = base.DefaultComparer.Successor(dst, a[:ai])
      85           2 :                 // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
      86           2 :                 buf := dst[n:]
      87           2 :                 if bytes.Equal(a[:ai], buf) {
      88           0 :                         return append(dst[:n], a...)
      89           0 :                 }
      90             :                 // The successor is > a[:ai], so return it.
      91           2 :                 return dst
      92             :         },
      93           2 :         ImmediateSuccessor: func(dst, a []byte) []byte {
      94           2 :                 // TODO(jackson): Consider changing this Comparer to only support
      95           2 :                 // representable prefix keys containing characters a-z.
      96           2 :                 ai := split(a)
      97           2 :                 if ai != len(a) {
      98           0 :                         panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
      99             :                 }
     100           2 :                 return append(append(dst, a...), 0x00)
     101             :         },
     102             :         Split: split,
     103             :         Name:  "pebble.internal.testkeys",
     104             : }
     105             : 
     106             : // The comparator is similar to the one in Cockroach; when the prefixes are
     107             : // equal:
     108             : //   - a key without a suffix is smaller than one with a suffix;
     109             : //   - when both keys have a suffix, the key with the larger (decoded) suffix
     110             : //     value is smaller.
     111           2 : func compare(a, b []byte) int {
     112           2 :         ai, bi := split(a), split(b)
     113           2 :         if ai == 0 && len(a) > 0 {
     114           0 :                 panic(fmt.Sprintf("Compare called with bare suffix %s", a))
     115             :         }
     116           2 :         if bi == 0 && len(b) > 0 {
     117           0 :                 panic(fmt.Sprintf("Compare called with bare suffix %s", b))
     118             :         }
     119           2 :         if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
     120           2 :                 return v
     121           2 :         }
     122           2 :         return compareTimestamps(a[ai:], b[bi:])
     123             : }
     124             : 
     125           2 : func split(a []byte) int {
     126           2 :         i := bytes.LastIndexByte(a, suffixDelim)
     127           2 :         if i == 0 {
     128           0 :                 panic("split called on bare prefix")
     129             :         }
     130           2 :         if i > 0 {
     131           2 :                 return i
     132           2 :         }
     133           2 :         return len(a)
     134             : }
     135             : 
     136           2 : func compareTimestamps(a, b []byte) int {
     137           2 :         if len(a) == 0 || len(b) == 0 {
     138           2 :                 // The empty suffix sorts first.
     139           2 :                 return cmp.Compare(len(a), len(b))
     140           2 :         }
     141           2 :         if a[0] != suffixDelim || b[0] != suffixDelim {
     142           0 :                 panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
     143             :         }
     144           2 :         ai, err := parseUintBytes(a[1:], 10, 64)
     145           2 :         if err != nil {
     146           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
     147             :         }
     148           2 :         bi, err := parseUintBytes(b[1:], 10, 64)
     149           2 :         if err != nil {
     150           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
     151             :         }
     152           2 :         return cmp.Compare(bi, ai)
     153             : }
     154             : 
     155             : // Keyspace describes a finite keyspace of unsuffixed test keys.
     156             : type Keyspace interface {
     157             :         // Count returns the number of keys that exist within this keyspace.
     158             :         Count() int64
     159             : 
     160             :         // MaxLen returns the maximum length, in bytes, of a key within this
     161             :         // keyspace. This is only guaranteed to return an upper bound.
     162             :         MaxLen() int
     163             : 
     164             :         // Slice returns the sub-keyspace from index i, inclusive, to index j,
     165             :         // exclusive. The receiver is unmodified.
     166             :         Slice(i, j int64) Keyspace
     167             : 
     168             :         // EveryN returns a key space that includes 1 key for every N keys in the
     169             :         // original keyspace. The receiver is unmodified.
     170             :         EveryN(n int64) Keyspace
     171             : 
     172             :         // key writes the i-th key to the buffer and returns the length.
     173             :         key(buf []byte, i int64) int
     174             : }
     175             : 
     176             : // Divvy divides the provided keyspace into N equal portions, containing
     177             : // disjoint keys evenly distributed across the keyspace.
     178           1 : func Divvy(ks Keyspace, n int64) []Keyspace {
     179           1 :         ret := make([]Keyspace, n)
     180           1 :         for i := int64(0); i < n; i++ {
     181           1 :                 ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
     182           1 :         }
     183           1 :         return ret
     184             : }
     185             : 
     186             : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
     187             : // with at most `maxLength` characters.
     188           1 : func Alpha(maxLength int) Keyspace {
     189           1 :         return alphabet{
     190           1 :                 alphabet:  []byte(alpha),
     191           1 :                 maxLength: maxLength,
     192           1 :                 increment: 1,
     193           1 :         }
     194           1 : }
     195             : 
     196             : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
     197             : // timestamp t.
     198           1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
     199           1 :         b := make([]byte, k.MaxLen()+MaxSuffixLen)
     200           1 :         return b[:WriteKeyAt(b, k, i, t)]
     201           1 : }
     202             : 
     203             : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
     204             : // suffix encoding the timestamp t suffix. It returns the number of bytes
     205             : // written.
     206           1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
     207           1 :         n := WriteKey(dst, k, i)
     208           1 :         n += WriteSuffix(dst[n:], t)
     209           1 :         return n
     210           1 : }
     211             : 
     212             : // Suffix returns the test keys suffix representation of timestamp t.
     213           1 : func Suffix(t int64) []byte {
     214           1 :         b := make([]byte, MaxSuffixLen)
     215           1 :         return b[:WriteSuffix(b, t)]
     216           1 : }
     217             : 
     218             : // SuffixLen returns the exact length of the given suffix when encoded.
     219           1 : func SuffixLen(t int64) int {
     220           1 :         // Begin at 1 for the '@' delimiter, 1 for a single digit.
     221           1 :         n := 2
     222           1 :         t /= 10
     223           1 :         for t > 0 {
     224           1 :                 t /= 10
     225           1 :                 n++
     226           1 :         }
     227           1 :         return n
     228             : }
     229             : 
     230             : // ParseSuffix returns the integer representation of the encoded suffix.
     231           2 : func ParseSuffix(s []byte) (int64, error) {
     232           2 :         return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
     233           2 : }
     234             : 
     235             : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
     236             : // returning the number of bytes written.
     237           1 : func WriteSuffix(dst []byte, t int64) int {
     238           1 :         dst[0] = suffixDelim
     239           1 :         n := 1
     240           1 :         n += len(strconv.AppendInt(dst[n:n], t, 10))
     241           1 :         return n
     242           1 : }
     243             : 
     244             : // Key returns the i-th unsuffixed key within the keyspace.
     245           1 : func Key(k Keyspace, i int64) []byte {
     246           1 :         b := make([]byte, k.MaxLen())
     247           1 :         return b[:k.key(b, i)]
     248           1 : }
     249             : 
     250             : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
     251             : // returns the number of bytes written.
     252           1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
     253           1 :         return k.key(dst, i)
     254           1 : }
     255             : 
     256             : type alphabet struct {
     257             :         alphabet  []byte
     258             :         maxLength int
     259             :         headSkip  int64
     260             :         tailSkip  int64
     261             :         increment int64
     262             : }
     263             : 
     264           1 : func (a alphabet) Count() int64 {
     265           1 :         // Calculate the total number of keys, ignoring the increment.
     266           1 :         total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
     267           1 : 
     268           1 :         // The increment dictates that we take every N keys, where N = a.increment.
     269           1 :         // Consider a total containing the 5 keys:
     270           1 :         //   a  b  c  d  e
     271           1 :         //   ^     ^     ^
     272           1 :         // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
     273           1 :         // dividing by the increment, there may be remainder. If there is, there's
     274           1 :         // one additional key in the alphabet.
     275           1 :         count := total / a.increment
     276           1 :         if total%a.increment > 0 {
     277           1 :                 count++
     278           1 :         }
     279           1 :         return count
     280             : }
     281             : 
     282           1 : func (a alphabet) MaxLen() int {
     283           1 :         return a.maxLength
     284           1 : }
     285             : 
     286           1 : func (a alphabet) Slice(i, j int64) Keyspace {
     287           1 :         s := a
     288           1 :         s.headSkip += i
     289           1 :         s.tailSkip += a.Count() - j
     290           1 :         return s
     291           1 : }
     292             : 
     293           1 : func (a alphabet) EveryN(n int64) Keyspace {
     294           1 :         s := a
     295           1 :         s.increment *= n
     296           1 :         return s
     297           1 : }
     298             : 
     299           1 : func keyCount(n, l int) int64 {
     300           1 :         // The number of representable keys in the keyspace is a function of the
     301           1 :         // length of the alphabet n and the max key length l:
     302           1 :         //   n + n^2 + ... + n^l
     303           1 :         x := int64(1)
     304           1 :         res := int64(0)
     305           1 :         for i := 1; i <= l; i++ {
     306           1 :                 if x >= math.MaxInt64/int64(n) {
     307           1 :                         panic("overflow")
     308             :                 }
     309           1 :                 x *= int64(n)
     310           1 :                 res += x
     311           1 :                 if res < 0 {
     312           0 :                         panic("overflow")
     313             :                 }
     314             :         }
     315           1 :         return res
     316             : }
     317             : 
     318           1 : func (a alphabet) key(buf []byte, idx int64) int {
     319           1 :         // This function generates keys of length 1..maxKeyLength, pulling
     320           1 :         // characters from the alphabet. The idx determines which key to generate,
     321           1 :         // generating the i-th lexicographically next key.
     322           1 :         //
     323           1 :         // The index to use is advanced by `headSkip`, allowing a keyspace to encode
     324           1 :         // a subregion of the keyspace.
     325           1 :         //
     326           1 :         // Eg, alphabet = `ab`, maxKeyLength = 3:
     327           1 :         //
     328           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     329           1 :         //       aa          ab              ba          bb
     330           1 :         //   a                           b
     331           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     332           1 :         //
     333           1 :         return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
     334           1 :                 keyCount(len(a.alphabet), a.maxLength))
     335           1 : }
     336             : 
     337           1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
     338           1 :         if keyCount == 0 || i > keyCount || i < 0 {
     339           1 :                 return 0
     340           1 :         }
     341             : 
     342             :         // Of the keyCount keys in the generative keyspace, how many are there
     343             :         // starting with a particular character?
     344           1 :         keysPerCharacter := keyCount / int64(len(alphabet))
     345           1 : 
     346           1 :         // Find the character that the key at index i starts with and set it.
     347           1 :         characterIdx := i / keysPerCharacter
     348           1 :         buf[0] = alphabet[characterIdx]
     349           1 : 
     350           1 :         // Consider characterIdx = 0, pointing to 'a'.
     351           1 :         //
     352           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     353           1 :         //       aa          ab              ba          bb
     354           1 :         //   a                           b
     355           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     356           1 :         //  \_________________________/
     357           1 :         //    |keysPerCharacter| keys
     358           1 :         //
     359           1 :         // In our recursive call, we reduce the problem to:
     360           1 :         //
     361           1 :         //           aaa aab     aba abb
     362           1 :         //       aa          ab
     363           1 :         //       0   1   2   3   4   5
     364           1 :         //     \________________________/
     365           1 :         //    |keysPerCharacter-1| keys
     366           1 :         //
     367           1 :         // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
     368           1 :         // just 'a' key, plus any keys beginning with any other character).
     369           1 :         //
     370           1 :         // The index i is also offset, reduced by the count of keys beginning with
     371           1 :         // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
     372           1 :         // the key consisting of just the 'a' (-1).
     373           1 :         i = i - keysPerCharacter*characterIdx - 1
     374           1 :         return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
     375             : }
     376             : 
     377             : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
     378             : // returning the index of a particular key, given the provided alphabet and max
     379             : // length of a key.
     380             : //
     381             : // len(key) must be ≥ 1.
     382           1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
     383           1 :         i, ok := alphabet[key[0]]
     384           1 :         if !ok {
     385           0 :                 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
     386             :         }
     387             :         // How many keys exist that start with the preceding i characters? Each of
     388             :         // the i characters themselves are a key, plus the count of all the keys
     389             :         // with one less character for each.
     390           1 :         ret := i + i*keyCount(len(alphabet), n-1)
     391           1 :         if len(key) > 1 {
     392           1 :                 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
     393           1 :         }
     394           1 :         return ret
     395             : }
     396             : 
     397             : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
     398             : // b are prefixes.
     399           1 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
     400           1 :         assertValidPrefix(a)
     401           1 :         assertValidPrefix(b)
     402           1 :         assertLess(a, b)
     403           1 :         commonPrefix := 0
     404           1 :         for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
     405           1 :                 commonPrefix++
     406           1 :         }
     407             : 
     408             :         // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
     409             :         // that maxLength cannot be higher than ~13 or we will encounter overflows.
     410           1 :         maxLength := 4 + rng.Intn(8)
     411           1 : 
     412           1 :         // Skip any common prefix (but leave at least one character in each key).
     413           1 :         skipPrefix := 0
     414           1 :         for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
     415           1 :                 skipPrefix++
     416           1 :         }
     417           1 :         aPiece := a[skipPrefix:]
     418           1 :         bPiece := b[skipPrefix:]
     419           1 :         if len(aPiece) > maxLength {
     420           1 :                 // The trimmed prefix is smaller than a; we must be careful below to not
     421           1 :                 // return a key smaller than a.
     422           1 :                 aPiece = aPiece[:maxLength]
     423           1 :         }
     424           1 :         if len(bPiece) > maxLength {
     425           1 :                 // The trimmed prefix is smaller than b, so we will still respect the bound.
     426           1 :                 bPiece = bPiece[:maxLength]
     427           1 :         }
     428           1 :         assertLess(aPiece, bPiece)
     429           1 :         apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
     430           1 :         bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
     431           1 :         if bpIdx <= apIdx {
     432           0 :                 panic("unreachable")
     433             :         }
     434           1 :         generatedIdx := apIdx + rng.Int63n(bpIdx-apIdx)
     435           1 :         if generatedIdx == apIdx {
     436           1 :                 // Return key a. We handle this separately in case we trimmed aPiece above.
     437           1 :                 return append([]byte(nil), a...)
     438           1 :         }
     439           1 :         dst := make([]byte, skipPrefix+maxLength)
     440           1 :         copy(dst, a[:skipPrefix])
     441           1 :         pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
     442           1 :         dst = dst[:skipPrefix+pieceLen]
     443           1 :         assertLE(a, dst)
     444           1 :         assertLess(dst, b)
     445           1 :         return dst
     446             : }
     447             : 
     448           1 : func assertValidPrefix(p []byte) {
     449           1 :         if !prefixRE.Match(p) {
     450           0 :                 panic(fmt.Sprintf("invalid prefix %q", p))
     451             :         }
     452             : }
     453             : 
     454           1 : func assertLess(a, b []byte) {
     455           1 :         if Comparer.Compare(a, b) >= 0 {
     456           0 :                 panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
     457             :         }
     458             : }
     459             : 
     460           1 : func assertLE(a, b []byte) {
     461           1 :         if Comparer.Compare(a, b) > 0 {
     462           0 :                 panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
     463             :         }
     464             : }

Generated by: LCOV version 1.14