LCOV - 2024-10-17 08:16Z dd26bf0e - tests only.lcov - pebble/internal/testkeys/testkeys.go

LCOV - code coverage report

Current view:	top level - pebble/internal/testkeys - testkeys.go (source / functions)		Hit	Total	Coverage
Test:	2024-10-17 08:16Z dd26bf0e - tests only.lcov	Lines:	286	298	96.0 %
Date:	2024-10-17 08:18:17	Functions:	0	0	-

          Line data    Source code

       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package testkeys provides facilities for generating and comparing
       6             : // human-readable test keys for use in tests and benchmarks. This package
       7             : // provides a single Comparer implementation that compares all keys generated
       8             : // by this package.
       9             : //
      10             : // Keys generated by this package may optionally have a 'suffix' encoding an
      11             : // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
      12             : // suffix are performed using integer value, not the byte representation.
      13             : package testkeys
      14             : 
      15             : import (
      16             :         "bytes"
      17             :         "cmp"
      18             :         "fmt"
      19             :         "math"
      20             :         "math/rand/v2"
      21             :         "regexp"
      22             :         "strconv"
      23             :         "strings"
      24             : 
      25             :         "github.com/cockroachdb/pebble/internal/base"
      26             : )
      27             : 
      28             : const alpha = "abcdefghijklmnopqrstuvwxyz"
      29             : 
      30             : const suffixDelim = '@'
      31             : 
      32             : var inverseAlphabet = make(map[byte]int64, len(alpha))
      33             : 
      34             : var prefixRE = regexp.MustCompile("[" + alpha + "]+")
      35             : 
      36             : // ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but
      37             : // not when comparing suffixes. It simulates the CRDB synthetic bit situation
      38             : // (see https://github.com/cockroachdb/cockroach/issues/130533).
      39             : var ignoreTimestampSuffix = []byte("_synthetic")
      40             : 
      41           1 : func init() {
      42           1 :         for i := range alpha {
      43           1 :                 inverseAlphabet[alpha[i]] = int64(i)
      44           1 :         }
      45             : }
      46             : 
      47             : // MaxSuffixLen is the maximum length of a suffix generated by this package.
      48             : var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
      49             : 
      50             : // Comparer is the comparer for test keys generated by this package.
      51             : var Comparer = &base.Comparer{
      52             :         CompareSuffixes: compareSuffixes,
      53             :         Compare:         compare,
      54           1 :         Equal:           func(a, b []byte) bool { return compare(a, b) == 0 },
      55           1 :         AbbreviatedKey: func(k []byte) uint64 {
      56           1 :                 return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
      57           1 :         },
      58             :         FormatKey: base.DefaultFormatter,
      59           1 :         Separator: func(dst, a, b []byte) []byte {
      60           1 :                 ai := split(a)
      61           1 :                 if ai == len(a) {
      62           1 :                         return append(dst, a...)
      63           1 :                 }
      64           1 :                 bi := split(b)
      65           1 :                 if bi == len(b) {
      66           1 :                         return append(dst, a...)
      67           1 :                 }
      68             : 
      69             :                 // If the keys are the same just return a.
      70           1 :                 if bytes.Equal(a[:ai], b[:bi]) {
      71           1 :                         return append(dst, a...)
      72           1 :                 }
      73           1 :                 n := len(dst)
      74           1 :                 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
      75           1 :                 // Did it pick a separator different than a[:ai] -- if not we can't do better than a.
      76           1 :                 buf := dst[n:]
      77           1 :                 if bytes.Equal(a[:ai], buf) {
      78           1 :                         return append(dst[:n], a...)
      79           1 :                 }
      80             :                 // The separator is > a[:ai], so return it
      81           1 :                 return dst
      82             :         },
      83           1 :         Successor: func(dst, a []byte) []byte {
      84           1 :                 ai := split(a)
      85           1 :                 if ai == len(a) {
      86           1 :                         return append(dst, a...)
      87           1 :                 }
      88           1 :                 n := len(dst)
      89           1 :                 dst = base.DefaultComparer.Successor(dst, a[:ai])
      90           1 :                 // Did it pick a successor different than a[:ai] -- if not we can't do better than a.
      91           1 :                 buf := dst[n:]
      92           1 :                 if bytes.Equal(a[:ai], buf) {
      93           0 :                         return append(dst[:n], a...)
      94           0 :                 }
      95             :                 // The successor is > a[:ai], so return it.
      96           1 :                 return dst
      97             :         },
      98           1 :         ImmediateSuccessor: func(dst, a []byte) []byte {
      99           1 :                 // TODO(jackson): Consider changing this Comparer to only support
     100           1 :                 // representable prefix keys containing characters a-z.
     101           1 :                 ai := split(a)
     102           1 :                 if ai != len(a) {
     103           0 :                         panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
     104             :                 }
     105           1 :                 return append(append(dst, a...), 0x00)
     106             :         },
     107             :         Split: split,
     108             :         Name:  "pebble.internal.testkeys",
     109             : }
     110             : 
     111             : // The comparator is similar to the one in Cockroach; when the prefixes are
     112             : // equal:
     113             : //   - a key without a suffix is smaller than one with a suffix;
     114             : //   - when both keys have a suffix, the key with the larger (decoded) suffix
     115             : //     value is smaller.
     116           1 : func compare(a, b []byte) int {
     117           1 :         ai, bi := split(a), split(b)
     118           1 :         if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
     119           1 :                 return v
     120           1 :         }
     121           1 :         return compareTimestamps(a[ai:], b[bi:])
     122             : }
     123             : 
     124           1 : func split(a []byte) int {
     125           1 :         i := bytes.LastIndexByte(a, suffixDelim)
     126           1 :         if i >= 0 {
     127           1 :                 return i
     128           1 :         }
     129           1 :         return len(a)
     130             : }
     131             : 
     132           1 : func compareTimestamps(a, b []byte) int {
     133           1 :         a = bytes.TrimSuffix(a, ignoreTimestampSuffix)
     134           1 :         b = bytes.TrimSuffix(b, ignoreTimestampSuffix)
     135           1 :         if len(a) == 0 || len(b) == 0 {
     136           1 :                 // The empty suffix sorts first.
     137           1 :                 return cmp.Compare(len(a), len(b))
     138           1 :         }
     139           1 :         if a[0] != suffixDelim || b[0] != suffixDelim {
     140           0 :                 panic(fmt.Sprintf("invalid suffixes %q %q", a, b))
     141             :         }
     142           1 :         ai, err := parseUintBytes(a[1:], 10, 64)
     143           1 :         if err != nil {
     144           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
     145             :         }
     146           1 :         bi, err := parseUintBytes(b[1:], 10, 64)
     147           1 :         if err != nil {
     148           0 :                 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
     149             :         }
     150           1 :         return cmp.Compare(bi, ai)
     151             : }
     152             : 
     153           1 : func compareSuffixes(a, b []byte) int {
     154           1 :         cmp := compareTimestamps(a, b)
     155           1 :         if cmp == 0 {
     156           1 :                 aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix)
     157           1 :                 bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix)
     158           1 :                 if aHasIgnorableSuffix && !bHasIgnorableSuffix {
     159           1 :                         return 1
     160           1 :                 }
     161           1 :                 if !aHasIgnorableSuffix && bHasIgnorableSuffix {
     162           1 :                         return -1
     163           1 :                 }
     164             :         }
     165           1 :         return cmp
     166             : }
     167             : 
     168             : // Keyspace describes a finite keyspace of unsuffixed test keys.
     169             : type Keyspace interface {
     170             :         // Count returns the number of keys that exist within this keyspace.
     171             :         Count() int64
     172             : 
     173             :         // MaxLen returns the maximum length, in bytes, of a key within this
     174             :         // keyspace. This is only guaranteed to return an upper bound.
     175             :         MaxLen() int
     176             : 
     177             :         // Slice returns the sub-keyspace from index i, inclusive, to index j,
     178             :         // exclusive. The receiver is unmodified.
     179             :         Slice(i, j int64) Keyspace
     180             : 
     181             :         // EveryN returns a key space that includes 1 key for every N keys in the
     182             :         // original keyspace. The receiver is unmodified.
     183             :         EveryN(n int64) Keyspace
     184             : 
     185             :         // key writes the i-th key to the buffer and returns the length.
     186             :         key(buf []byte, i int64) int
     187             : }
     188             : 
     189             : // Divvy divides the provided keyspace into N equal portions, containing
     190             : // disjoint keys evenly distributed across the keyspace.
     191           1 : func Divvy(ks Keyspace, n int64) []Keyspace {
     192           1 :         ret := make([]Keyspace, n)
     193           1 :         for i := int64(0); i < n; i++ {
     194           1 :                 ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
     195           1 :         }
     196           1 :         return ret
     197             : }
     198             : 
     199             : // Alpha constructs a keyspace consisting of all keys containing characters a-z,
     200             : // with at most `maxLength` characters.
     201           1 : func Alpha(maxLength int) Keyspace {
     202           1 :         return alphabet{
     203           1 :                 alphabet:  []byte(alpha),
     204           1 :                 maxLength: maxLength,
     205           1 :                 increment: 1,
     206           1 :         }
     207           1 : }
     208             : 
     209             : // KeyAt returns the i-th key within the keyspace with a suffix encoding the
     210             : // timestamp t.
     211           1 : func KeyAt(k Keyspace, i int64, t int64) []byte {
     212           1 :         b := make([]byte, k.MaxLen()+MaxSuffixLen)
     213           1 :         return b[:WriteKeyAt(b, k, i, t)]
     214           1 : }
     215             : 
     216             : // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
     217             : // suffix encoding the timestamp t suffix. It returns the number of bytes
     218             : // written.
     219           1 : func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
     220           1 :         n := WriteKey(dst, k, i)
     221           1 :         n += WriteSuffix(dst[n:], t)
     222           1 :         return n
     223           1 : }
     224             : 
     225             : // Suffix returns the test keys suffix representation of timestamp t.
     226           1 : func Suffix(t int64) []byte {
     227           1 :         b := make([]byte, MaxSuffixLen)
     228           1 :         return b[:WriteSuffix(b, t)]
     229           1 : }
     230             : 
     231             : // SuffixLen returns the exact length of the given suffix when encoded.
     232           1 : func SuffixLen(t int64) int {
     233           1 :         // Begin at 1 for the '@' delimiter, 1 for a single digit.
     234           1 :         n := 2
     235           1 :         t /= 10
     236           1 :         for t > 0 {
     237           1 :                 t /= 10
     238           1 :                 n++
     239           1 :         }
     240           1 :         return n
     241             : }
     242             : 
     243             : // ParseSuffix returns the integer representation of the encoded suffix.
     244           1 : func ParseSuffix(s []byte) (int64, error) {
     245           1 :         s = bytes.TrimSuffix(s, ignoreTimestampSuffix)
     246           1 :         return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
     247           1 : }
     248             : 
     249             : // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
     250             : // returning the number of bytes written.
     251           1 : func WriteSuffix(dst []byte, t int64) int {
     252           1 :         dst[0] = suffixDelim
     253           1 :         n := 1
     254           1 :         n += len(strconv.AppendInt(dst[n:n], t, 10))
     255           1 :         return n
     256           1 : }
     257             : 
     258             : // Key returns the i-th unsuffixed key within the keyspace.
     259           1 : func Key(k Keyspace, i int64) []byte {
     260           1 :         b := make([]byte, k.MaxLen())
     261           1 :         return b[:k.key(b, i)]
     262           1 : }
     263             : 
     264             : // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
     265             : // returns the number of bytes written.
     266           1 : func WriteKey(dst []byte, k Keyspace, i int64) int {
     267           1 :         return k.key(dst, i)
     268           1 : }
     269             : 
     270             : type alphabet struct {
     271             :         alphabet  []byte
     272             :         maxLength int
     273             :         headSkip  int64
     274             :         tailSkip  int64
     275             :         increment int64
     276             : }
     277             : 
     278           1 : func (a alphabet) Count() int64 {
     279           1 :         // Calculate the total number of keys, ignoring the increment.
     280           1 :         total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
     281           1 : 
     282           1 :         // The increment dictates that we take every N keys, where N = a.increment.
     283           1 :         // Consider a total containing the 5 keys:
     284           1 :         //   a  b  c  d  e
     285           1 :         //   ^     ^     ^
     286           1 :         // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
     287           1 :         // dividing by the increment, there may be remainder. If there is, there's
     288           1 :         // one additional key in the alphabet.
     289           1 :         count := total / a.increment
     290           1 :         if total%a.increment > 0 {
     291           1 :                 count++
     292           1 :         }
     293           1 :         return count
     294             : }
     295             : 
     296           1 : func (a alphabet) MaxLen() int {
     297           1 :         return a.maxLength
     298           1 : }
     299             : 
     300           1 : func (a alphabet) Slice(i, j int64) Keyspace {
     301           1 :         s := a
     302           1 :         s.headSkip += i
     303           1 :         s.tailSkip += a.Count() - j
     304           1 :         return s
     305           1 : }
     306             : 
     307           1 : func (a alphabet) EveryN(n int64) Keyspace {
     308           1 :         s := a
     309           1 :         s.increment *= n
     310           1 :         return s
     311           1 : }
     312             : 
     313           1 : func keyCount(n, l int) int64 {
     314           1 :         // The number of representable keys in the keyspace is a function of the
     315           1 :         // length of the alphabet n and the max key length l:
     316           1 :         //   n + n^2 + ... + n^l
     317           1 :         x := int64(1)
     318           1 :         res := int64(0)
     319           1 :         for i := 1; i <= l; i++ {
     320           1 :                 if x >= math.MaxInt64/int64(n) {
     321           1 :                         panic("overflow")
     322             :                 }
     323           1 :                 x *= int64(n)
     324           1 :                 res += x
     325           1 :                 if res < 0 {
     326           0 :                         panic("overflow")
     327             :                 }
     328             :         }
     329           1 :         return res
     330             : }
     331             : 
     332           1 : func (a alphabet) key(buf []byte, idx int64) int {
     333           1 :         // This function generates keys of length 1..maxKeyLength, pulling
     334           1 :         // characters from the alphabet. The idx determines which key to generate,
     335           1 :         // generating the i-th lexicographically next key.
     336           1 :         //
     337           1 :         // The index to use is advanced by `headSkip`, allowing a keyspace to encode
     338           1 :         // a subregion of the keyspace.
     339           1 :         //
     340           1 :         // Eg, alphabet = `ab`, maxKeyLength = 3:
     341           1 :         //
     342           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     343           1 :         //       aa          ab              ba          bb
     344           1 :         //   a                           b
     345           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     346           1 :         //
     347           1 :         return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
     348           1 :                 keyCount(len(a.alphabet), a.maxLength))
     349           1 : }
     350             : 
     351           1 : func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
     352           1 :         if keyCount == 0 || i > keyCount || i < 0 {
     353           1 :                 return 0
     354           1 :         }
     355             : 
     356             :         // Of the keyCount keys in the generative keyspace, how many are there
     357             :         // starting with a particular character?
     358           1 :         keysPerCharacter := keyCount / int64(len(alphabet))
     359           1 : 
     360           1 :         // Find the character that the key at index i starts with and set it.
     361           1 :         characterIdx := i / keysPerCharacter
     362           1 :         buf[0] = alphabet[characterIdx]
     363           1 : 
     364           1 :         // Consider characterIdx = 0, pointing to 'a'.
     365           1 :         //
     366           1 :         //           aaa aab     aba abb         baa bab     bba bbb
     367           1 :         //       aa          ab              ba          bb
     368           1 :         //   a                           b
     369           1 :         //   0   1   2   3   4   5   6   7   8   9   10  11  12  13
     370           1 :         //  \_________________________/
     371           1 :         //    |keysPerCharacter| keys
     372           1 :         //
     373           1 :         // In our recursive call, we reduce the problem to:
     374           1 :         //
     375           1 :         //           aaa aab     aba abb
     376           1 :         //       aa          ab
     377           1 :         //       0   1   2   3   4   5
     378           1 :         //     \________________________/
     379           1 :         //    |keysPerCharacter-1| keys
     380           1 :         //
     381           1 :         // In the subproblem, there are keysPerCharacter-1 keys (eliminating the
     382           1 :         // just 'a' key, plus any keys beginning with any other character).
     383           1 :         //
     384           1 :         // The index i is also offset, reduced by the count of keys beginning with
     385           1 :         // characters earlier in the alphabet (keysPerCharacter*characterIdx) and
     386           1 :         // the key consisting of just the 'a' (-1).
     387           1 :         i = i - keysPerCharacter*characterIdx - 1
     388           1 :         return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
     389             : }
     390             : 
     391             : // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
     392             : // returning the index of a particular key, given the provided alphabet and max
     393             : // length of a key.
     394             : //
     395             : // len(key) must be ≥ 1.
     396           1 : func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
     397           1 :         i, ok := alphabet[key[0]]
     398           1 :         if !ok {
     399           0 :                 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
     400             :         }
     401             :         // How many keys exist that start with the preceding i characters? Each of
     402             :         // the i characters themselves are a key, plus the count of all the keys
     403             :         // with one less character for each.
     404           1 :         ret := i + i*keyCount(len(alphabet), n-1)
     405           1 :         if len(key) > 1 {
     406           1 :                 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
     407           1 :         }
     408           1 :         return ret
     409             : }
     410             : 
     411             : // RandomPrefixInRange returns a random prefix in the range [a, b), where a and
     412             : // b are prefixes.
     413           1 : func RandomPrefixInRange(a, b []byte, rng *rand.Rand) []byte {
     414           1 :         assertValidPrefix(a)
     415           1 :         assertValidPrefix(b)
     416           1 :         assertLess(a, b)
     417           1 :         commonPrefix := 0
     418           1 :         for commonPrefix < len(a)-1 && commonPrefix < len(b)-1 && a[commonPrefix] == b[commonPrefix] {
     419           1 :                 commonPrefix++
     420           1 :         }
     421             : 
     422             :         // We will generate a piece of a key from the Alpha(maxLength) keyspace. Note
     423             :         // that maxLength cannot be higher than ~13 or we will encounter overflows.
     424           1 :         maxLength := 4 + rng.IntN(8)
     425           1 : 
     426           1 :         // Skip any common prefix (but leave at least one character in each key).
     427           1 :         skipPrefix := 0
     428           1 :         for skipPrefix+1 < min(len(a), len(b)) && a[skipPrefix] == b[skipPrefix] {
     429           1 :                 skipPrefix++
     430           1 :         }
     431           1 :         aPiece := a[skipPrefix:]
     432           1 :         bPiece := b[skipPrefix:]
     433           1 :         if len(aPiece) > maxLength {
     434           1 :                 // The trimmed prefix is smaller than a; we must be careful below to not
     435           1 :                 // return a key smaller than a.
     436           1 :                 aPiece = aPiece[:maxLength]
     437           1 :         }
     438           1 :         if len(bPiece) > maxLength {
     439           1 :                 // The trimmed prefix is smaller than b, so we will still respect the bound.
     440           1 :                 bPiece = bPiece[:maxLength]
     441           1 :         }
     442           1 :         assertLess(aPiece, bPiece)
     443           1 :         apIdx := computeAlphabetKeyIndex(aPiece, inverseAlphabet, maxLength)
     444           1 :         bpIdx := computeAlphabetKeyIndex(bPiece, inverseAlphabet, maxLength)
     445           1 :         if bpIdx <= apIdx {
     446           0 :                 panic("unreachable")
     447             :         }
     448           1 :         generatedIdx := apIdx + rng.Int64N(bpIdx-apIdx)
     449           1 :         if generatedIdx == apIdx {
     450           1 :                 // Return key a. We handle this separately in case we trimmed aPiece above.
     451           1 :                 return append([]byte(nil), a...)
     452           1 :         }
     453           1 :         dst := make([]byte, skipPrefix+maxLength)
     454           1 :         copy(dst, a[:skipPrefix])
     455           1 :         pieceLen := WriteKey(dst[skipPrefix:], Alpha(maxLength), generatedIdx)
     456           1 :         dst = dst[:skipPrefix+pieceLen]
     457           1 :         assertLE(a, dst)
     458           1 :         assertLess(dst, b)
     459           1 :         return dst
     460             : }
     461             : 
     462           1 : func assertValidPrefix(p []byte) {
     463           1 :         if !prefixRE.Match(p) {
     464           0 :                 panic(fmt.Sprintf("invalid prefix %q", p))
     465             :         }
     466             : }
     467             : 
     468           1 : func assertLess(a, b []byte) {
     469           1 :         if Comparer.Compare(a, b) >= 0 {
     470           0 :                 panic(fmt.Sprintf("invalid key ordering: %q >= %q", a, b))
     471             :         }
     472             : }
     473             : 
     474           1 : func assertLE(a, b []byte) {
     475           1 :         if Comparer.Compare(a, b) > 0 {
     476           0 :                 panic(fmt.Sprintf("invalid key ordering: %q > %q", a, b))
     477             :         }
     478             : }

Generated by: LCOV version 1.14