LCOV - code coverage report
Current view: top level - pebble/internal/testkeys - testkeys.go (source / functions) Hit Total Coverage
Test: 2024-11-12 08:17Z 9f68a214 - tests + meta.lcov Lines: 286 298 96.0 %
Date: 2024-11-12 08:18:08 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package testkeys provides facilities for generating and comparing
       6             : // human-readable test keys for use in tests and benchmarks. This package
       7             : // provides a single Comparer implementation that compares all keys generated
       8             : // by this package.
       9             : //
      10             : // Keys generated by this package may optionally have a 'suffix' encoding an
      11             : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
      12             : // suffix are performed using integer value, not the byte representation.
      13             : package testkeys
      14             : 
      15             : import (
      16             :         "bytes"
      17             :         "cmp"
      18             :         "fmt"
      19             :         "math"
      20             :         "math/rand/v2"
      21             :         "regexp"
      22             :         "strconv"
      23             :         "strings"
      24             : 
      25             :         "github.com/cockroachdb/pebble/internal/base"
      26             : )
      27             : 
      28             : const alpha = "abcdefghijklmnopqrstuvwxyz"
      29             : 
      30             : const suffixDelim = '@'
      31             : 
      32             : var inverseAlphabet = make(map[byte]int64, len(alpha))
      33             : 
      34             : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
      35             : 
      36             : // ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but
      37             : // not when comparing suffixes. It simulates the CRDB synthetic bit situation
      38             : // (see https://github.com/cockroachdb/cockroach/issues/130533).
      39             : var ignoreTimestampSuffix = []byte("_synthetic")
      40             : 
      41           2 : func init() {
      42           2 :         for i := range alpha {
      43           2 :                 inverseAlphabet[alpha[i]] = int64(i)
      44           2 :         }
      45             : }
      46             : 
      47             : // MaxSuffixLen is the maximum length of a suffix generated by this package.
      48             : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
      49             : 
      50             : // Comparer is the comparer for test keys generated by this package.
      51             : var Comparer = &base.Comparer{
      52             :         ComparePointSuffixes: compareSuffixes,
      53             :         CompareRangeSuffixes: compareSuffixes,
      54             :         Compare:              compare,
      55           2 :         Equal:                func(a, b []byte) bool { return compare(a, b) == 0 },
      56           2 :         AbbreviatedKey: func(k []byte) uint64 {
      57           2 :                 return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
      58           2 :         },
      59             :         FormatKey: base.DefaultFormatter,
      60           2 :         Separator: func(dst, a, b []byte) []byte {
      61           2 :                 ai := split(a)
      62           2 :                 if ai == len(a) {
      63           2 :                         return append(dst, a...)
      64           2 :                 }
      65           2 :                 bi := split(b)
      66           2 :                 if bi == len(b) {
      67           2 :                         return append(dst, a...)
      68           2 :                 }
      69             : 
      70             :                 // If the keys are the same just return a.
      71           2 :                 if bytes.Equal(a[:ai], b[:bi]) {
      72           2 :                         return append(dst, a...)
      73           2 :                 }
      74           2 :                 n := len(dst)
      75           2 :                 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
      76           2 :                 // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
      77           2 :                 buf := dst[n:]
      78           2 :                 if bytes.Equal(a[:ai], buf) {
      79           2 :                         return append(dst[:n], a...)
      80           2 :                 }
      81             :                 // The separator is > a[:ai], so return it
      82           2 :                 return dst
      83             :         },
      84           2 :         Successor: func(dst, a []byte) []byte {
      85           2 :                 ai := split(a)
      86           2 :                 if ai == len(a) {
      87           2 :                         return append(dst, a...)
      88           2 :                 }
      89           2 :                 n := len(dst)
      90           2 :                 dst = base.DefaultComparer.Successor(dst, a[:ai])
      91           2 :                 // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
      92           2 :                 buf := dst[n:]
      93           2 :                 if bytes.Equal(a[:ai], buf) {
      94           0 :                         return append(dst[:n], a...)
      95           0 :                 }
      96             :                 // The successor is > a[:ai], so return it.
      97           2 :                 return dst
      98             :         },
      99           2 :         ImmediateSuccessor: func(dst, a []byte) []byte {
     100           2 :                 // TODO(jackson): Consider changing this Comparer to only support
     101           2 :                 // representable prefix keys containing characters a-z.
     102           2 :                 ai := split(a)
     103           2 :                 if ai != len(a) {
     104           0 :                         panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
     105             :                 }
     106           2 :                 return append(append(dst, a...), 0x00)
     107             :         },
     108             :         Split: split,
     109             :         Name:  "pebble.internal.testkeys",
     110             : }
     111             : 
     112             : // The comparator is similar to the one in Cockroach; when the prefixes are
     113             : // equal:
     114             : //   - a key without a suffix is smaller than one with a suffix;
     115             : //   - when both keys have a suffix, the key with the larger (decoded) suffix
     116             : //     value is smaller.
     117           2 : func compare(a, b []byte) int {
     118           2 :         ai, bi := split(a), split(b)
     119           2 :         if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
     120           2 :                 return v
     121           2 :         }
     122           2 :         return compareTimestamps(a[ai:], b[bi:])
     123             : }
     124             : 
     125           2 : func split(a []byte) int {
     126           2 :         i := bytes.LastIndexByte(a, suffixDelim)
     127           2 :         if i >= 0 {
     128           2 :                 return i
     129           2 :         }
     130           2 :         return len(a)
     131             : }
     132             : 
     133           2 : func compareTimestamps(a, b []byte) int {
     134           2 :         a = bytes.TrimSuffix(a, ignoreTimestampSuffix)
     135           2 :         b = bytes.TrimSuffix(b, ignoreTimestampSuffix)
     136           2 :         if len(a) == 0 || len(b) == 0 {
     137           2 :                 // The empty suffix sorts first.
     138           2 :                 return cmp.Compare(len(a), len(b))
     139           2 :         }
     140           2 :         if a[0] != suffixDelim || b[0] != suffixDelim {
     141           0 :                 panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
     142             :         }
     143           2 :         ai, err := parseUintBytes(a[1:], 10, 64)
     144           2 :         if err != nil {
     145           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
     146             :         }
     147           2 :         bi, err := parseUintBytes(b[1:], 10, 64)
     148           2 :         if err != nil {
     149           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
     150             :         }
     151           2 :         return cmp.Compare(bi, ai)
     152             : }
     153             : 
     154           2 : func compareSuffixes(a, b []byte) int {
     155           2 :         cmp := compareTimestamps(a, b)
     156           2 :         if cmp == 0 {
     157           2 :                 aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix)
     158           2 :                 bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix)
     159           2 :                 if aHasIgnorableSuffix && !bHasIgnorableSuffix {
     160           1 :                         return 1
     161           1 :                 }
     162           2 :                 if !aHasIgnorableSuffix && bHasIgnorableSuffix {
     163           1 :                         return -1
     164           1 :                 }
     165             :         }
     166           2 :         return cmp
     167             : }
     168             : 
     169             : // Keyspace describes a finite keyspace of unsuffixed test keys.
     170             : type Keyspace interface {
     171             :         // Count returns the number of keys that exist within this keyspace.
     172             :         Count() int64
     173             : 
     174             :         // MaxLen returns the maximum length, in bytes, of a key within this
     175             :         // keyspace. This is only guaranteed to return an upper bound.
     176             :         MaxLen() int
     177             : 
     178             :         // Slice returns the sub-keyspace from index i, inclusive, to index j,
     179             :         // exclusive. The receiver is unmodified.
     180             :         Slice(i, j int64) Keyspace
     181             : 
     182             :         // EveryN returns a key space that includes 1 key for every N keys in the
     183             :         // original keyspace. The receiver is unmodified.
     184             :         EveryN(n int64) Keyspace
     185             : 
     186             :         // key writes the i-th key to the buffer and returns the length.
     187             :         key(buf []byte, i int64) int
     188             : }
     189             : 
     190             : // Divvy divides the provided keyspace into N equal portions, containing
     191             : // disjoint keys evenly distributed across the keyspace.
     192           1 : func Divvy(ks Keyspace, n int64) []Keyspace {
     193           1 :         ret := make([]Keyspace, n)
     194           1 :         for i := int64(0); i < n; i++ {
     195           1 :                 ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
     196           1 :         }
     197           1 :         return ret
     198             : }
     199             : 
     200             : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
     201             : // with at most `maxLength` characters.
     202           1 : func Alpha(maxLength int) Keyspace {
     203           1 :         return alphabet{
     204           1 :                 alphabet:  []byte(alpha),
     205           1 :                 maxLength: maxLength,
     206           1 :                 increment: 1,
     207           1 :         }
     208           1 : }
     209             : 
     210             : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
     211             : // timestamp t.
     212           1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
     213           1 :         b := make([]byte, k.MaxLen()+MaxSuffixLen)
     214           1 :         return b[:WriteKeyAt(b, k, i, t)]
     215           1 : }
     216             : 
     217             : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
     218             : // suffix encoding the timestamp t suffix. It returns the number of bytes
     219             : // written.
     220           1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
     221           1 :         n := WriteKey(dst, k, i)
     222           1 :         n += WriteSuffix(dst[n:], t)
     223           1 :         return n
     224           1 : }
     225             : 
     226             : // Suffix returns the test keys suffix representation of timestamp t.
     227           1 : func Suffix(t int64) []byte {
     228           1 :         b := make([]byte, MaxSuffixLen)
     229           1 :         return b[:WriteSuffix(b, t)]
     230           1 : }
     231             : 
     232             : // SuffixLen returns the exact length of the given suffix when encoded.
     233           1 : func SuffixLen(t int64) int {
     234           1 :         // Begin at 1 for the '@' delimiter, 1 for a single digit.
     235           1 :         n := 2
     236           1 :         t /= 10
     237           1 :         for t > 0 {
     238           1 :                 t /= 10
     239           1 :                 n++
     240           1 :         }
     241           1 :         return n
     242             : }
     243             : 
     244             : // ParseSuffix returns the integer representation of the encoded suffix.
     245           2 : func ParseSuffix(s []byte) (int64, error) {
     246           2 :         s = bytes.TrimSuffix(s, ignoreTimestampSuffix)
     247           2 :         return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
     248           2 : }
     249             : 
     250             : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
     251             : // returning the number of bytes written.
     252           1 : func WriteSuffix(dst []byte, t int64) int {
     253           1 :         dst[0] = suffixDelim
     254           1 :         n := 1
     255           1 :         n += len(strconv.AppendInt(dst[n:n], t, 10))
     256           1 :         return n
     257           1 : }
     258             : 
     259             : // Key returns the i-th unsuffixed key within the keyspace.
     260           1 : func Key(k Keyspace, i int64) []byte {
     261           1 :         b := make([]byte, k.MaxLen())
     262           1 :         return b[:k.key(b, i)]
     263           1 : }
     264             : 
     265             : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
     266             : // returns the number of bytes written.
     267           1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
     268           1 :         return k.key(dst, i)
     269           1 : }
     270             : 
     271             : type alphabet struct {
     272             :         alphabet  []byte
     273             :         maxLength int
     274             :         headSkip  int64
     275             :         tailSkip  int64
     276             :         increment int64
     277             : }
     278             : 
     279           1 : func (a alphabet) Count() int64 {
     280           1 :         // Calculate the total number of keys, ignoring the increment.
     281           1 :         total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
     282           1 : 
     283           1 :         // The increment dictates that we take every N keys, where N = a.increment.
     284           1 :         // Consider a total containing the 5 keys:
     285           1 :         //   a  b  c  d  e
     286           1 :         //   ^     ^     ^
     287           1 :         // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
     288           1 :         // dividing by the increment, there may be remainder. If there is, there's
     289           1 :         // one additional key in the alphabet.
     290           1 :         count := total / a.increment
     291           1 :         if total%a.increment > 0 {
     292           1 :                 count++
     293           1 :         }
     294           1 :         return count
     295             : }
     296             : 
     297           1 : func (a alphabet) MaxLen() int {
     298           1 :         return a.maxLength
     299           1 : }
     300             : 
     301           1 : func (a alphabet) Slice(i, j int64) Keyspace {
     302           1 :         s := a
     303           1 :         s.headSkip += i
     304           1 :         s.tailSkip += a.Count() - j
     305           1 :         return s
     306           1 : }
     307             : 
     308           1 : func (a alphabet) EveryN(n int64) Keyspace {
     309           1 :         s := a
     310           1 :         s.increment *= n
     311           1 :         return s
     312           1 : }
     313             : 
     314           1 : func keyCount(n, l int) int64 {
     315           1 :         // The number of representable keys in the keyspace is a function of the
     316           1 :         // length of the alphabet n and the max key length l:
     317           1 :         //   n + n^2 + ... + n^l
     318           1 :         x := int64(1)
     319           1 :         res := int64(0)
     320           1 :         for i := 1; i <= l; i++ {
     321           1 :                 if x >= math.MaxInt64/int64(n) {
     322           1 :                         panic("overflow")
     323             :                 }
     324           1 :                 x *= int64(n)
     325           1 :                 res += x
     326           1 :                 if res < 0 {
     327           0 :                         panic("overflow")
     328             :                 }
     329             :         }
     330           1 :         return res
     331             : }
     332             : 
     333           1 : func (a alphabet) key(buf []byte, idx int64) int {
     334           1 :         // This function generates keys of length 1..maxKeyLength, pulling
     335           1 :         // characters from the alphabet. The idx determines which key to generate,
     336           1 :         // generating the i-th lexicographically next key.
     337           1 :         //
     338           1 :         // The index to use is advanced by `headSkip`, allowing a keyspace to encode
     339           1 :         // a subregion of the keyspace.
     340           1 :         //
     341           1 :         // Eg, alphabet = `ab`, maxKeyLength = 3:
     342           1 :         //
     343           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     344           1 :         //       aa          ab              ba          bb
     345           1 :         //   a                           b
     346           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     347           1 :         //
     348           1 :         return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
     349           1 :                 keyCount(len(a.alphabet), a.maxLength))
     350           1 : }
     351             : 
     352           1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
     353           1 :         if keyCount == 0 || i > keyCount || i < 0 {
     354           1 :                 return 0
     355           1 :         }
     356             : 
     357             :         // Of the keyCount keys in the generative keyspace, how many are there
     358             :         // starting with a particular character?
     359           1 :         keysPerCharacter := keyCount / int64(len(alphabet))
     360           1 : 
     361           1 :         // Find the character that the key at index i starts with and set it.
     362           1 :         characterIdx := i / keysPerCharacter
     363           1 :         buf[0] = alphabet[characterIdx]
     364           1 : 
     365           1 :         // Consider characterIdx = 0, pointing to 'a'.
     366           1 :         //
     367           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     368           1 :         //       aa          ab              ba          bb
     369           1 :         //   a                           b
     370           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     371           1 :         //  \_________________________/
     372           1 :         //    |keysPerCharacter| keys
     373           1 :         //
     374           1 :         // In our recursive call, we reduce the problem to:
     375           1 :         //
     376           1 :         //           aaa aab     aba abb
     377           1 :         //       aa          ab
     378           1 :         //       0   1   2   3   4   5
     379           1 :         //     \________________________/
     380           1 :         //    |keysPerCharacter-1| keys
     381           1 :         //
     382           1 :         // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
     383           1 :         // just 'a' key, plus any keys beginning with any other character).
     384           1 :         //
     385           1 :         // The index i is also offset, reduced by the count of keys beginning with
     386           1 :         // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
     387           1 :         // the key consisting of just the 'a' (-1).
     388           1 :         i = i - keysPerCharacter*characterIdx - 1
     389           1 :         return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
     390             : }
     391             : 
     392             : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
     393             : // returning the index of a particular key, given the provided alphabet and max
     394             : // length of a key.
     395             : //
     396             : // len(key) must be ≥ 1.
     397           1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
     398           1 :         i, ok := alphabet[key[0]]
     399           1 :         if !ok {
     400           0 :                 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
     401             :         }
     402             :         // How many keys exist that start with the preceding i characters? Each of
     403             :         // the i characters themselves are a key, plus the count of all the keys
     404             :         // with one less character for each.
     405           1 :         ret := i + i*keyCount(len(alphabet), n-1)
     406           1 :         if len(key) > 1 {
     407           1 :                 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
     408           1 :         }
     409           1 :         return ret
     410             : }
     411             : 
     412             : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
     413             : // b are prefixes.
     414           1 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
     415           1 :         assertValidPrefix(a)
     416           1 :         assertValidPrefix(b)
     417           1 :         assertLess(a, b)
     418           1 :         commonPrefix := 0
     419           1 :         for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
     420           1 :                 commonPrefix++
     421           1 :         }
     422             : 
     423             :         // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
     424             :         // that maxLength cannot be higher than ~13 or we will encounter overflows.
     425           1 :         maxLength := 4 + rng.IntN(8)
     426           1 : 
     427           1 :         // Skip any common prefix (but leave at least one character in each key).
     428           1 :         skipPrefix := 0
     429           1 :         for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
     430           1 :                 skipPrefix++
     431           1 :         }
     432           1 :         aPiece := a[skipPrefix:]
     433           1 :         bPiece := b[skipPrefix:]
     434           1 :         if len(aPiece) > maxLength {
     435           1 :                 // The trimmed prefix is smaller than a; we must be careful below to not
     436           1 :                 // return a key smaller than a.
     437           1 :                 aPiece = aPiece[:maxLength]
     438           1 :         }
     439           1 :         if len(bPiece) > maxLength {
     440           1 :                 // The trimmed prefix is smaller than b, so we will still respect the bound.
     441           1 :                 bPiece = bPiece[:maxLength]
     442           1 :         }
     443           1 :         assertLess(aPiece, bPiece)
     444           1 :         apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
     445           1 :         bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
     446           1 :         if bpIdx <= apIdx {
     447           0 :                 panic("unreachable")
     448             :         }
     449           1 :         generatedIdx := apIdx + rng.Int64N(bpIdx-apIdx)
     450           1 :         if generatedIdx == apIdx {
     451           1 :                 // Return key a. We handle this separately in case we trimmed aPiece above.
     452           1 :                 return append([]byte(nil), a...)
     453           1 :         }
     454           1 :         dst := make([]byte, skipPrefix+maxLength)
     455           1 :         copy(dst, a[:skipPrefix])
     456           1 :         pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
     457           1 :         dst = dst[:skipPrefix+pieceLen]
     458           1 :         assertLE(a, dst)
     459           1 :         assertLess(dst, b)
     460           1 :         return dst
     461             : }
     462             : 
     463           1 : func assertValidPrefix(p []byte) {
     464           1 :         if !prefixRE.Match(p) {
     465           0 :                 panic(fmt.Sprintf("invalid prefix %q", p))
     466             :         }
     467             : }
     468             : 
     469           1 : func assertLess(a, b []byte) {
     470           1 :         if Comparer.Compare(a, b) >= 0 {
     471           0 :                 panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
     472             :         }
     473             : }
     474             : 
     475           1 : func assertLE(a, b []byte) {
     476           1 :         if Comparer.Compare(a, b) > 0 {
     477           0 :                 panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
     478             :         }
     479             : }

Generated by: LCOV version 1.14