LCOV - code coverage report
Current view: top level - pebble/internal/testkeys - testkeys.go (source / functions) Hit Total Coverage
Test: 2024-11-03 08:16Z 71bb6ba2 - meta test only.lcov Lines: 82 298 27.5 %
Date: 2024-11-03 08:17:16 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package testkeys provides facilities for generating and comparing
       6             : // human-readable test keys for use in tests and benchmarks. This package
       7             : // provides a single Comparer implementation that compares all keys generated
       8             : // by this package.
       9             : //
      10             : // Keys generated by this package may optionally have a 'suffix' encoding an
      11             : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
      12             : // suffix are performed using integer value, not the byte representation.
      13             : package testkeys
      14             : 
      15             : import (
      16             :         "bytes"
      17             :         "cmp"
      18             :         "fmt"
      19             :         "math"
      20             :         "math/rand/v2"
      21             :         "regexp"
      22             :         "strconv"
      23             :         "strings"
      24             : 
      25             :         "github.com/cockroachdb/pebble/internal/base"
      26             : )
      27             : 
      28             : const alpha = "abcdefghijklmnopqrstuvwxyz"
      29             : 
      30             : const suffixDelim = '@'
      31             : 
      32             : var inverseAlphabet = make(map[byte]int64, len(alpha))
      33             : 
      34             : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
      35             : 
      36             : // ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but
      37             : // not when comparing suffixes. It simulates the CRDB synthetic bit situation
      38             : // (see https://github.com/cockroachdb/cockroach/issues/130533).
      39             : var ignoreTimestampSuffix = []byte("_synthetic")
      40             : 
      41           1 : func init() {
      42           1 :         for i := range alpha {
      43           1 :                 inverseAlphabet[alpha[i]] = int64(i)
      44           1 :         }
      45             : }
      46             : 
      47             : // MaxSuffixLen is the maximum length of a suffix generated by this package.
      48             : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
      49             : 
      50             : // Comparer is the comparer for test keys generated by this package.
      51             : var Comparer = &base.Comparer{
      52             :         ComparePointSuffixes: compareSuffixes,
      53             :         CompareRangeSuffixes: compareSuffixes,
      54             :         Compare:              compare,
      55           1 :         Equal:                func(a, b []byte) bool { return compare(a, b) == 0 },
      56           1 :         AbbreviatedKey: func(k []byte) uint64 {
      57           1 :                 return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
      58           1 :         },
      59             :         FormatKey: base.DefaultFormatter,
      60           1 :         Separator: func(dst, a, b []byte) []byte {
      61           1 :                 ai := split(a)
      62           1 :                 if ai == len(a) {
      63           1 :                         return append(dst, a...)
      64           1 :                 }
      65           1 :                 bi := split(b)
      66           1 :                 if bi == len(b) {
      67           1 :                         return append(dst, a...)
      68           1 :                 }
      69             : 
      70             :                 // If the keys are the same just return a.
      71           1 :                 if bytes.Equal(a[:ai], b[:bi]) {
      72           1 :                         return append(dst, a...)
      73           1 :                 }
      74           1 :                 n := len(dst)
      75           1 :                 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
      76           1 :                 // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
      77           1 :                 buf := dst[n:]
      78           1 :                 if bytes.Equal(a[:ai], buf) {
      79           1 :                         return append(dst[:n], a...)
      80           1 :                 }
      81             :                 // The separator is > a[:ai], so return it
      82           1 :                 return dst
      83             :         },
      84           1 :         Successor: func(dst, a []byte) []byte {
      85           1 :                 ai := split(a)
      86           1 :                 if ai == len(a) {
      87           1 :                         return append(dst, a...)
      88           1 :                 }
      89           1 :                 n := len(dst)
      90           1 :                 dst = base.DefaultComparer.Successor(dst, a[:ai])
      91           1 :                 // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
      92           1 :                 buf := dst[n:]
      93           1 :                 if bytes.Equal(a[:ai], buf) {
      94           0 :                         return append(dst[:n], a...)
      95           0 :                 }
      96             :                 // The successor is > a[:ai], so return it.
      97           1 :                 return dst
      98             :         },
      99           1 :         ImmediateSuccessor: func(dst, a []byte) []byte {
     100           1 :                 // TODO(jackson): Consider changing this Comparer to only support
     101           1 :                 // representable prefix keys containing characters a-z.
     102           1 :                 ai := split(a)
     103           1 :                 if ai != len(a) {
     104           0 :                         panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
     105             :                 }
     106           1 :                 return append(append(dst, a...), 0x00)
     107             :         },
     108             :         Split: split,
     109             :         Name:  "pebble.internal.testkeys",
     110             : }
     111             : 
     112             : // The comparator is similar to the one in Cockroach; when the prefixes are
     113             : // equal:
     114             : //   - a key without a suffix is smaller than one with a suffix;
     115             : //   - when both keys have a suffix, the key with the larger (decoded) suffix
     116             : //     value is smaller.
     117           1 : func compare(a, b []byte) int {
     118           1 :         ai, bi := split(a), split(b)
     119           1 :         if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
     120           1 :                 return v
     121           1 :         }
     122           1 :         return compareTimestamps(a[ai:], b[bi:])
     123             : }
     124             : 
     125           1 : func split(a []byte) int {
     126           1 :         i := bytes.LastIndexByte(a, suffixDelim)
     127           1 :         if i >= 0 {
     128           1 :                 return i
     129           1 :         }
     130           1 :         return len(a)
     131             : }
     132             : 
     133           1 : func compareTimestamps(a, b []byte) int {
     134           1 :         a = bytes.TrimSuffix(a, ignoreTimestampSuffix)
     135           1 :         b = bytes.TrimSuffix(b, ignoreTimestampSuffix)
     136           1 :         if len(a) == 0 || len(b) == 0 {
     137           1 :                 // The empty suffix sorts first.
     138           1 :                 return cmp.Compare(len(a), len(b))
     139           1 :         }
     140           1 :         if a[0] != suffixDelim || b[0] != suffixDelim {
     141           0 :                 panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
     142             :         }
     143           1 :         ai, err := parseUintBytes(a[1:], 10, 64)
     144           1 :         if err != nil {
     145           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
     146             :         }
     147           1 :         bi, err := parseUintBytes(b[1:], 10, 64)
     148           1 :         if err != nil {
     149           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
     150             :         }
     151           1 :         return cmp.Compare(bi, ai)
     152             : }
     153             : 
     154           1 : func compareSuffixes(a, b []byte) int {
     155           1 :         cmp := compareTimestamps(a, b)
     156           1 :         if cmp == 0 {
     157           1 :                 aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix)
     158           1 :                 bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix)
     159           1 :                 if aHasIgnorableSuffix && !bHasIgnorableSuffix {
     160           0 :                         return 1
     161           0 :                 }
     162           1 :                 if !aHasIgnorableSuffix && bHasIgnorableSuffix {
     163           0 :                         return -1
     164           0 :                 }
     165             :         }
     166           1 :         return cmp
     167             : }
     168             : 
     169             : // Keyspace describes a finite keyspace of unsuffixed test keys.
     170             : type Keyspace interface {
     171             :         // Count returns the number of keys that exist within this keyspace.
     172             :         Count() int64
     173             : 
     174             :         // MaxLen returns the maximum length, in bytes, of a key within this
     175             :         // keyspace. This is only guaranteed to return an upper bound.
     176             :         MaxLen() int
     177             : 
     178             :         // Slice returns the sub-keyspace from index i, inclusive, to index j,
     179             :         // exclusive. The receiver is unmodified.
     180             :         Slice(i, j int64) Keyspace
     181             : 
     182             :         // EveryN returns a key space that includes 1 key for every N keys in the
     183             :         // original keyspace. The receiver is unmodified.
     184             :         EveryN(n int64) Keyspace
     185             : 
     186             :         // key writes the i-th key to the buffer and returns the length.
     187             :         key(buf []byte, i int64) int
     188             : }
     189             : 
     190             : // Divvy divides the provided keyspace into N equal portions, containing
     191             : // disjoint keys evenly distributed across the keyspace.
     192           0 : func Divvy(ks Keyspace, n int64) []Keyspace {
     193           0 :         ret := make([]Keyspace, n)
     194           0 :         for i := int64(0); i < n; i++ {
     195           0 :                 ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
     196           0 :         }
     197           0 :         return ret
     198             : }
     199             : 
     200             : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
     201             : // with at most `maxLength` characters.
     202           0 : func Alpha(maxLength int) Keyspace {
     203           0 :         return alphabet{
     204           0 :                 alphabet:  []byte(alpha),
     205           0 :                 maxLength: maxLength,
     206           0 :                 increment: 1,
     207           0 :         }
     208           0 : }
     209             : 
     210             : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
     211             : // timestamp t.
     212           0 : func KeyAt(k Keyspace, i int64, t int64) []byte {
     213           0 :         b := make([]byte, k.MaxLen()+MaxSuffixLen)
     214           0 :         return b[:WriteKeyAt(b, k, i, t)]
     215           0 : }
     216             : 
     217             : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
     218             : // suffix encoding the timestamp t suffix. It returns the number of bytes
     219             : // written.
     220           0 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
     221           0 :         n := WriteKey(dst, k, i)
     222           0 :         n += WriteSuffix(dst[n:], t)
     223           0 :         return n
     224           0 : }
     225             : 
     226             : // Suffix returns the test keys suffix representation of timestamp t.
     227           0 : func Suffix(t int64) []byte {
     228           0 :         b := make([]byte, MaxSuffixLen)
     229           0 :         return b[:WriteSuffix(b, t)]
     230           0 : }
     231             : 
     232             : // SuffixLen returns the exact length of the given suffix when encoded.
     233           0 : func SuffixLen(t int64) int {
     234           0 :         // Begin at 1 for the '@' delimiter, 1 for a single digit.
     235           0 :         n := 2
     236           0 :         t /= 10
     237           0 :         for t > 0 {
     238           0 :                 t /= 10
     239           0 :                 n++
     240           0 :         }
     241           0 :         return n
     242             : }
     243             : 
     244             : // ParseSuffix returns the integer representation of the encoded suffix.
     245           1 : func ParseSuffix(s []byte) (int64, error) {
     246           1 :         s = bytes.TrimSuffix(s, ignoreTimestampSuffix)
     247           1 :         return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
     248           1 : }
     249             : 
     250             : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
     251             : // returning the number of bytes written.
     252           0 : func WriteSuffix(dst []byte, t int64) int {
     253           0 :         dst[0] = suffixDelim
     254           0 :         n := 1
     255           0 :         n += len(strconv.AppendInt(dst[n:n], t, 10))
     256           0 :         return n
     257           0 : }
     258             : 
     259             : // Key returns the i-th unsuffixed key within the keyspace.
     260           0 : func Key(k Keyspace, i int64) []byte {
     261           0 :         b := make([]byte, k.MaxLen())
     262           0 :         return b[:k.key(b, i)]
     263           0 : }
     264             : 
     265             : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
     266             : // returns the number of bytes written.
     267           0 : func WriteKey(dst []byte, k Keyspace, i int64) int {
     268           0 :         return k.key(dst, i)
     269           0 : }
     270             : 
     271             : type alphabet struct {
     272             :         alphabet  []byte
     273             :         maxLength int
     274             :         headSkip  int64
     275             :         tailSkip  int64
     276             :         increment int64
     277             : }
     278             : 
     279           0 : func (a alphabet) Count() int64 {
     280           0 :         // Calculate the total number of keys, ignoring the increment.
     281           0 :         total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
     282           0 : 
     283           0 :         // The increment dictates that we take every N keys, where N = a.increment.
     284           0 :         // Consider a total containing the 5 keys:
     285           0 :         //   a  b  c  d  e
     286           0 :         //   ^     ^     ^
     287           0 :         // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
     288           0 :         // dividing by the increment, there may be remainder. If there is, there's
     289           0 :         // one additional key in the alphabet.
     290           0 :         count := total / a.increment
     291           0 :         if total%a.increment > 0 {
     292           0 :                 count++
     293           0 :         }
     294           0 :         return count
     295             : }
     296             : 
     297           0 : func (a alphabet) MaxLen() int {
     298           0 :         return a.maxLength
     299           0 : }
     300             : 
     301           0 : func (a alphabet) Slice(i, j int64) Keyspace {
     302           0 :         s := a
     303           0 :         s.headSkip += i
     304           0 :         s.tailSkip += a.Count() - j
     305           0 :         return s
     306           0 : }
     307             : 
     308           0 : func (a alphabet) EveryN(n int64) Keyspace {
     309           0 :         s := a
     310           0 :         s.increment *= n
     311           0 :         return s
     312           0 : }
     313             : 
     314           0 : func keyCount(n, l int) int64 {
     315           0 :         // The number of representable keys in the keyspace is a function of the
     316           0 :         // length of the alphabet n and the max key length l:
     317           0 :         //   n + n^2 + ... + n^l
     318           0 :         x := int64(1)
     319           0 :         res := int64(0)
     320           0 :         for i := 1; i <= l; i++ {
     321           0 :                 if x >= math.MaxInt64/int64(n) {
     322           0 :                         panic("overflow")
     323             :                 }
     324           0 :                 x *= int64(n)
     325           0 :                 res += x
     326           0 :                 if res < 0 {
     327           0 :                         panic("overflow")
     328             :                 }
     329             :         }
     330           0 :         return res
     331             : }
     332             : 
     333           0 : func (a alphabet) key(buf []byte, idx int64) int {
     334           0 :         // This function generates keys of length 1..maxKeyLength, pulling
     335           0 :         // characters from the alphabet. The idx determines which key to generate,
     336           0 :         // generating the i-th lexicographically next key.
     337           0 :         //
     338           0 :         // The index to use is advanced by `headSkip`, allowing a keyspace to encode
     339           0 :         // a subregion of the keyspace.
     340           0 :         //
     341           0 :         // Eg, alphabet = `ab`, maxKeyLength = 3:
     342           0 :         //
     343           0 :         //           aaa aab     aba abb         baa bab     bba bbb
     344           0 :         //       aa          ab              ba          bb
     345           0 :         //   a                           b
     346           0 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     347           0 :         //
     348           0 :         return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
     349           0 :                 keyCount(len(a.alphabet), a.maxLength))
     350           0 : }
     351             : 
     352           0 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
     353           0 :         if keyCount == 0 || i > keyCount || i < 0 {
     354           0 :                 return 0
     355           0 :         }
     356             : 
     357             :         // Of the keyCount keys in the generative keyspace, how many are there
     358             :         // starting with a particular character?
     359           0 :         keysPerCharacter := keyCount / int64(len(alphabet))
     360           0 : 
     361           0 :         // Find the character that the key at index i starts with and set it.
     362           0 :         characterIdx := i / keysPerCharacter
     363           0 :         buf[0] = alphabet[characterIdx]
     364           0 : 
     365           0 :         // Consider characterIdx = 0, pointing to 'a'.
     366           0 :         //
     367           0 :         //           aaa aab     aba abb         baa bab     bba bbb
     368           0 :         //       aa          ab              ba          bb
     369           0 :         //   a                           b
     370           0 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     371           0 :         //  \_________________________/
     372           0 :         //    |keysPerCharacter| keys
     373           0 :         //
     374           0 :         // In our recursive call, we reduce the problem to:
     375           0 :         //
     376           0 :         //           aaa aab     aba abb
     377           0 :         //       aa          ab
     378           0 :         //       0   1   2   3   4   5
     379           0 :         //     \________________________/
     380           0 :         //    |keysPerCharacter-1| keys
     381           0 :         //
     382           0 :         // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
     383           0 :         // just 'a' key, plus any keys beginning with any other character).
     384           0 :         //
     385           0 :         // The index i is also offset, reduced by the count of keys beginning with
     386           0 :         // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
     387           0 :         // the key consisting of just the 'a' (-1).
     388           0 :         i = i - keysPerCharacter*characterIdx - 1
     389           0 :         return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
     390             : }
     391             : 
     392             : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
     393             : // returning the index of a particular key, given the provided alphabet and max
     394             : // length of a key.
     395             : //
     396             : // len(key) must be ≥ 1.
     397           0 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
     398           0 :         i, ok := alphabet[key[0]]
     399           0 :         if !ok {
     400           0 :                 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
     401             :         }
     402             :         // How many keys exist that start with the preceding i characters? Each of
     403             :         // the i characters themselves are a key, plus the count of all the keys
     404             :         // with one less character for each.
     405           0 :         ret := i + i*keyCount(len(alphabet), n-1)
     406           0 :         if len(key) > 1 {
     407           0 :                 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
     408           0 :         }
     409           0 :         return ret
     410             : }
     411             : 
     412             : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
     413             : // b are prefixes.
     414           0 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
     415           0 :         assertValidPrefix(a)
     416           0 :         assertValidPrefix(b)
     417           0 :         assertLess(a, b)
     418           0 :         commonPrefix := 0
     419           0 :         for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
     420           0 :                 commonPrefix++
     421           0 :         }
     422             : 
     423             :         // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
     424             :         // that maxLength cannot be higher than ~13 or we will encounter overflows.
     425           0 :         maxLength := 4 + rng.IntN(8)
     426           0 : 
     427           0 :         // Skip any common prefix (but leave at least one character in each key).
     428           0 :         skipPrefix := 0
     429           0 :         for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
     430           0 :                 skipPrefix++
     431           0 :         }
     432           0 :         aPiece := a[skipPrefix:]
     433           0 :         bPiece := b[skipPrefix:]
     434           0 :         if len(aPiece) > maxLength {
     435           0 :                 // The trimmed prefix is smaller than a; we must be careful below to not
     436           0 :                 // return a key smaller than a.
     437           0 :                 aPiece = aPiece[:maxLength]
     438           0 :         }
     439           0 :         if len(bPiece) > maxLength {
     440           0 :                 // The trimmed prefix is smaller than b, so we will still respect the bound.
     441           0 :                 bPiece = bPiece[:maxLength]
     442           0 :         }
     443           0 :         assertLess(aPiece, bPiece)
     444           0 :         apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
     445           0 :         bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
     446           0 :         if bpIdx <= apIdx {
     447           0 :                 panic("unreachable")
     448             :         }
     449           0 :         generatedIdx := apIdx + rng.Int64N(bpIdx-apIdx)
     450           0 :         if generatedIdx == apIdx {
     451           0 :                 // Return key a. We handle this separately in case we trimmed aPiece above.
     452           0 :                 return append([]byte(nil), a...)
     453           0 :         }
     454           0 :         dst := make([]byte, skipPrefix+maxLength)
     455           0 :         copy(dst, a[:skipPrefix])
     456           0 :         pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
     457           0 :         dst = dst[:skipPrefix+pieceLen]
     458           0 :         assertLE(a, dst)
     459           0 :         assertLess(dst, b)
     460           0 :         return dst
     461             : }
     462             : 
     463           0 : func assertValidPrefix(p []byte) {
     464           0 :         if !prefixRE.Match(p) {
     465           0 :                 panic(fmt.Sprintf("invalid prefix %q", p))
     466             :         }
     467             : }
     468             : 
     469           0 : func assertLess(a, b []byte) {
     470           0 :         if Comparer.Compare(a, b) >= 0 {
     471           0 :                 panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
     472             :         }
     473             : }
     474             : 
     475           0 : func assertLE(a, b []byte) {
     476           0 :         if Comparer.Compare(a, b) > 0 {
     477           0 :                 panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
     478             :         }
     479             : }

Generated by: LCOV version 1.14