LCOV - code coverage report
Current view: top level - pebble/internal/crdbtest - crdbtest.go (source / functions) Hit Total Coverage
Test: 2024-10-16 08:16Z 8989e4e6 - tests only.lcov Lines: 354 562 63.0 %
Date: 2024-10-16 08:17:27 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package crdbtest provides facilities for representing keys, workloads, etc
       6             : // representative of CockroachDB's use of Pebble.
       7             : package crdbtest
       8             : 
       9             : import (
      10             :         "bytes"
      11             :         "cmp"
      12             :         "encoding/binary"
      13             :         "fmt"
      14             :         "io"
      15             :         "math/rand/v2"
      16             :         "slices"
      17             :         "sync"
      18             :         "time"
      19             :         "unsafe"
      20             : 
      21             :         "github.com/cockroachdb/crlib/crbytes"
      22             :         "github.com/cockroachdb/crlib/crstrings"
      23             :         "github.com/cockroachdb/errors"
      24             :         "github.com/cockroachdb/pebble/internal/base"
      25             :         "github.com/cockroachdb/pebble/internal/invariants"
      26             :         "github.com/cockroachdb/pebble/sstable/colblk"
      27             : )
      28             : 
      29             : const withWall = 9
      30             : const withLogical = withWall + 4
      31             : const withSynthetic = withLogical + 1
      32             : const withLockTableLen = 18
      33             : 
      34             : // MaxSuffixLen is the maximum length of the CockroachDB key suffix.
      35             : const MaxSuffixLen = max(withLockTableLen, withSynthetic, withLogical, withWall)
      36             : 
      37             : // Comparer is a base.Comparer for CockroachDB keys.
      38             : var Comparer = base.Comparer{
      39             :         Split:           Split,
      40             :         CompareSuffixes: CompareSuffixes,
      41             :         Compare:         Compare,
      42             :         Equal:           Equal,
      43           0 :         AbbreviatedKey: func(k []byte) uint64 {
      44           0 :                 key, ok := getKeyPartFromEngineKey(k)
      45           0 :                 if !ok {
      46           0 :                         return 0
      47           0 :                 }
      48           0 :                 return base.DefaultComparer.AbbreviatedKey(key)
      49             :         },
      50             :         FormatKey: base.DefaultFormatter,
      51           0 :         Separator: func(dst, a, b []byte) []byte {
      52           0 :                 aKey, ok := getKeyPartFromEngineKey(a)
      53           0 :                 if !ok {
      54           0 :                         return append(dst, a...)
      55           0 :                 }
      56           0 :                 bKey, ok := getKeyPartFromEngineKey(b)
      57           0 :                 if !ok {
      58           0 :                         return append(dst, a...)
      59           0 :                 }
      60             :                 // If the keys are the same just return a.
      61           0 :                 if bytes.Equal(aKey, bKey) {
      62           0 :                         return append(dst, a...)
      63           0 :                 }
      64           0 :                 n := len(dst)
      65           0 :                 dst = base.DefaultComparer.Separator(dst, aKey, bKey)
      66           0 :                 // Did it pick a separator different than aKey -- if it did not we can't do better than a.
      67           0 :                 buf := dst[n:]
      68           0 :                 if bytes.Equal(aKey, buf) {
      69           0 :                         return append(dst[:n], a...)
      70           0 :                 }
      71             :                 // The separator is > aKey, so we only need to add the sentinel.
      72           0 :                 return append(dst, 0)
      73             :         },
      74           0 :         Successor: func(dst, a []byte) []byte {
      75           0 :                 aKey, ok := getKeyPartFromEngineKey(a)
      76           0 :                 if !ok {
      77           0 :                         return append(dst, a...)
      78           0 :                 }
      79           0 :                 n := len(dst)
      80           0 :                 // Engine key comparison uses bytes.Compare on the roachpb.Key, which is the same semantics as
      81           0 :                 // pebble.DefaultComparer, so reuse the latter's SeekSetBitGE implementation.
      82           0 :                 dst = base.DefaultComparer.Successor(dst, aKey)
      83           0 :                 // Did it pick a successor different than aKey -- if it did not we can't do better than a.
      84           0 :                 buf := dst[n:]
      85           0 :                 if bytes.Equal(aKey, buf) {
      86           0 :                         return append(dst[:n], a...)
      87           0 :                 }
      88             :                 // The successor is > aKey, so we only need to add the sentinel.
      89           0 :                 return append(dst, 0)
      90             :         },
      91           0 :         ImmediateSuccessor: func(dst, a []byte) []byte {
      92           0 :                 // The key `a` is guaranteed to be a bare prefix: It's a
      93           0 :                 // `engineKeyNoVersion` key without a version—just a trailing 0-byte to
      94           0 :                 // signify the length of the version. For example the user key "foo" is
      95           0 :                 // encoded as: "foo\0". We need to encode the immediate successor to
      96           0 :                 // "foo", which in the natural byte ordering is "foo\0".  Append a
      97           0 :                 // single additional zero, to encode the user key "foo\0" with a
      98           0 :                 // zero-length version.
      99           0 :                 return append(append(dst, a...), 0)
     100           0 :         },
     101             :         Name: "cockroach_comparator",
     102             : }
     103             : 
     104             : // EncodeMVCCKey encodes a MVCC key into dst, growing dst as necessary.
     105           1 : func EncodeMVCCKey(dst []byte, key []byte, walltime uint64, logical uint32) []byte {
     106           1 :         if cap(dst) < len(key)+withSynthetic {
     107           1 :                 dst = make([]byte, 0, len(key)+withSynthetic)
     108           1 :         }
     109           1 :         dst = append(dst[:0], key...)
     110           1 :         return EncodeTimestamp(dst, walltime, logical)
     111             : }
     112             : 
     113             : // AppendTimestamp appends an encoded MVCC timestamp onto key, returning the new
     114             : // key. The provided key should already have the 0x00 sentinel byte (i.e., key
     115             : // should be a proper prefix from the perspective of Pebble).
     116           1 : func AppendTimestamp(key []byte, walltime uint64, logical uint32) []byte {
     117           1 :         if key[len(key)-1] != 0 {
     118           0 :                 panic(errors.AssertionFailedf("key does not end with 0x00 sentinel byte: %x", key))
     119             :         }
     120           1 :         if logical == 0 {
     121           1 :                 if walltime == 0 {
     122           0 :                         return key
     123           0 :                 }
     124           1 :                 key = append(key, make([]byte, 9)...)
     125           1 :                 binary.BigEndian.PutUint64(key[len(key)-9:], walltime)
     126           1 :                 key[len(key)-1] = 9 // Version length byte
     127           1 :                 return key
     128             :         }
     129           0 :         key = append(key, make([]byte, 13)...)
     130           0 :         binary.BigEndian.PutUint64(key[len(key)-13:], walltime)
     131           0 :         binary.BigEndian.PutUint32(key[len(key)-5:], logical)
     132           0 :         key[len(key)-1] = 13 // Version length byte
     133           0 :         return key
     134             : 
     135             : }
     136             : 
     137             : // EncodeTimestamp encodes a MVCC timestamp into a key, returning the new key.
     138             : // The key's capacity must be sufficiently large to hold the encoded timestamp.
     139           1 : func EncodeTimestamp(key []byte, walltime uint64, logical uint32) []byte {
     140           1 :         pos := len(key)
     141           1 :         if logical == 0 {
     142           1 :                 if walltime == 0 {
     143           1 :                         key = key[:pos+1]
     144           1 :                         key[pos] = 0 // sentinel byte
     145           1 :                         return key
     146           1 :                 }
     147             : 
     148           1 :                 key = key[:pos+1+8+1]
     149           1 :                 key[pos] = 0 // sentinel byte
     150           1 :                 key[pos+1+8] = 9
     151           1 :                 binary.BigEndian.PutUint64(key[pos+1:], walltime)
     152           1 :                 return key
     153             :         }
     154             : 
     155           1 :         key = key[:pos+1+12+1]
     156           1 :         key[pos] = 0 // sentinel byte
     157           1 :         key[pos+1+8+4] = 13
     158           1 :         binary.BigEndian.PutUint64(key[pos+1:], walltime)
     159           1 :         binary.BigEndian.PutUint32(key[pos+1+8:], logical)
     160           1 :         return key
     161             : }
     162             : 
     163             : // DecodeEngineKey decodes an Engine key (the key that gets stored in Pebble).
     164             : // Returns:
     165             : //
     166             : //   - roachKey: key prefix without the separator byte; corresponds to
     167             : //     MVCCKey.Key / EngineKey.Key (roachpb.Key type).
     168             : //
     169             : //   - untypedVersion: when the suffix does not correspond to a timestamp,
     170             : //     untypedVersion is set to the suffix without the length
     171             : //     byte; corresponds to EngineKey.Version.
     172             : //
     173             : //   - wallTime, logicalTime: timestamp for the key, or 0 if the key does not
     174             : //     correspond to a timestamp.
     175             : func DecodeEngineKey(
     176             :         engineKey []byte,
     177           1 : ) (roachKey []byte, untypedVersion []byte, wallTime uint64, logicalTime uint32) {
     178           1 :         tsLen := int(engineKey[len(engineKey)-1])
     179           1 :         tsStart := len(engineKey) - tsLen
     180           1 :         if tsStart < 0 {
     181           0 :                 if invariants.Enabled {
     182           0 :                         panic("invalid length byte")
     183             :                 }
     184           0 :                 return nil, nil, 0, 0
     185             :         }
     186           1 :         roachKeyEnd := tsStart - 1
     187           1 :         if roachKeyEnd < 0 {
     188           0 :                 roachKeyEnd = 0
     189           1 :         } else if invariants.Enabled && engineKey[roachKeyEnd] != 0 {
     190           0 :                 panic("invalid separator byte")
     191             :         }
     192             : 
     193           1 :         roachKey = engineKey[:roachKeyEnd]
     194           1 :         untypedVersion, wallTime, logicalTime = DecodeSuffix(engineKey[tsStart:])
     195           1 :         return roachKey, untypedVersion, wallTime, logicalTime
     196             : }
     197             : 
     198             : // DecodeSuffix decodes the suffix of a key. If the suffix is a timestamp,
     199             : // wallTime and logicalTime are returned; otherwise untypedVersion contains the
     200             : // version (which is the suffix without the terminator length byte).
     201             : //
     202             : //gcassert:inline
     203           1 : func DecodeSuffix(suffix []byte) (untypedVersion []byte, wallTime uint64, logicalTime uint32) {
     204           1 :         if invariants.Enabled && len(suffix) > 0 && len(suffix) != int(suffix[len(suffix)-1]) {
     205           0 :                 panic("invalid length byte")
     206             :         }
     207           1 :         switch len(suffix) {
     208           0 :         case 0:
     209           0 :                 return nil, 0, 0
     210           1 :         case 9:
     211           1 :                 return nil, binary.BigEndian.Uint64(suffix[:8]), 0
     212           0 :         case 13, 14:
     213           0 :                 return nil, binary.BigEndian.Uint64(suffix[:8]), binary.BigEndian.Uint32(suffix[8:12])
     214           0 :         default:
     215           0 :                 return suffix[:len(suffix)-1], 0, 0
     216             :         }
     217             : }
     218             : 
     219             : // Split implements base.Split for CockroachDB keys.
     220           1 : func Split(key []byte) int {
     221           1 :         if len(key) == 0 {
     222           0 :                 return 0
     223           0 :         }
     224             : 
     225             :         // Last byte is the version length + 1 when there is a version, else it is
     226             :         // 0.
     227           1 :         versionLen := int(key[len(key)-1])
     228           1 :         if versionLen > len(key) {
     229           0 :                 panic(errors.AssertionFailedf("invalid version length"))
     230             :         }
     231           1 :         return len(key) - versionLen
     232             : }
     233             : 
     234             : // Compare compares cockroach keys, including the version (which could be MVCC
     235             : // timestamps).
     236           1 : func Compare(a, b []byte) int {
     237           1 :         if len(a) == 0 || len(b) == 0 {
     238           0 :                 return cmp.Compare(len(a), len(b))
     239           0 :         }
     240             : 
     241             :         // NB: For performance, this routine manually splits the key into the
     242             :         // user-key and version components rather than using DecodeEngineKey. In
     243             :         // most situations, use DecodeEngineKey or GetKeyPartFromEngineKey or
     244             :         // SplitMVCCKey instead of doing this.
     245           1 :         aEnd := len(a) - 1
     246           1 :         bEnd := len(b) - 1
     247           1 : 
     248           1 :         // Compute the index of the separator between the key and the version. If the
     249           1 :         // separator is found to be at -1 for both keys, then we are comparing bare
     250           1 :         // suffixes without a user key part. Pebble requires bare suffixes to be
     251           1 :         // comparable with the same ordering as if they had a common user key.
     252           1 :         aSep := aEnd - int(a[aEnd])
     253           1 :         bSep := bEnd - int(b[bEnd])
     254           1 :         if aSep < 0 || bSep < 0 || a[aSep] != 0 || b[bSep] != 0 {
     255           0 :                 panic(errors.AssertionFailedf("malformed key: %x, %x", a, b))
     256             :         }
     257             :         // Compare the "user key" part of the key.
     258           1 :         if c := bytes.Compare(a[:aSep], b[:bSep]); c != 0 {
     259           1 :                 return c
     260           1 :         }
     261             : 
     262             :         // Compare the version part of the key. Note that when the version is a
     263             :         // timestamp, the timestamp encoding causes byte comparison to be equivalent
     264             :         // to timestamp comparison.
     265           1 :         a, b = a[aSep+1:], b[bSep+1:]
     266           1 :         if len(a) == 0 || len(b) == 0 {
     267           1 :                 // Empty suffixes come before non-empty suffixes.
     268           1 :                 return cmp.Compare(len(a), len(b))
     269           1 :         }
     270           1 :         return bytes.Compare(
     271           1 :                 normalizeEngineKeyVersionForCompare(b),
     272           1 :                 normalizeEngineKeyVersionForCompare(a),
     273           1 :         )
     274             : }
     275             : 
     276             : // CompareSuffixes compares suffixes (normally timestamps).
     277           1 : func CompareSuffixes(a, b []byte) int {
     278           1 :         if len(a) == 0 || len(b) == 0 {
     279           1 :                 // Empty suffixes come before non-empty suffixes.
     280           1 :                 return cmp.Compare(len(a), len(b))
     281           1 :         }
     282             :         // Here we are not using normalizeEngineKeyVersionForCompare for historical
     283             :         // reasons, summarized in
     284             :         // https://github.com/cockroachdb/cockroach/issues/130533.
     285           1 :         return bytes.Compare(b[:len(b)-1], a[:len(a)-1])
     286             : }
     287             : 
     288             : // Equal implements base.Equal for Cockroach keys.
     289           1 : func Equal(a, b []byte) bool {
     290           1 :         // TODO(radu): Pebble sometimes passes empty "keys" and we have to tolerate
     291           1 :         // them until we fix that.
     292           1 :         if len(a) == 0 || len(b) == 0 {
     293           0 :                 return len(a) == len(b)
     294           0 :         }
     295           1 :         aEnd := len(a) - 1
     296           1 :         bEnd := len(b) - 1
     297           1 : 
     298           1 :         // Last byte is the version length + 1 when there is a version,
     299           1 :         // else it is 0.
     300           1 :         aVerLen := int(a[aEnd])
     301           1 :         bVerLen := int(b[bEnd])
     302           1 : 
     303           1 :         // Fast-path. If the key version is empty or contains only a walltime
     304           1 :         // component then normalizeEngineKeyVersionForCompare is a no-op, so we don't
     305           1 :         // need to split the "user key" from the version suffix before comparing to
     306           1 :         // compute equality. Instead, we can check for byte equality immediately.
     307           1 :         if (aVerLen <= withWall && bVerLen <= withWall) || (aVerLen == withLockTableLen && bVerLen == withLockTableLen) {
     308           1 :                 return bytes.Equal(a, b)
     309           1 :         }
     310             : 
     311             :         // Compute the index of the separator between the key and the version. If the
     312             :         // separator is found to be at -1 for both keys, then we are comparing bare
     313             :         // suffixes without a user key part. Pebble requires bare suffixes to be
     314             :         // comparable with the same ordering as if they had a common user key.
     315           1 :         aSep := aEnd - aVerLen
     316           1 :         bSep := bEnd - bVerLen
     317           1 :         // Compare the "user key" part of the key.
     318           1 :         if !bytes.Equal(a[:aSep], b[:bSep]) {
     319           1 :                 return false
     320           1 :         }
     321           1 :         if aVerLen == 0 || bVerLen == 0 {
     322           1 :                 return aVerLen == bVerLen
     323           1 :         }
     324             : 
     325             :         // Compare the version part of the key.
     326           1 :         aVer := a[aSep+1:]
     327           1 :         bVer := b[bSep+1:]
     328           1 :         aVer = normalizeEngineKeyVersionForCompare(aVer)
     329           1 :         bVer = normalizeEngineKeyVersionForCompare(bVer)
     330           1 :         return bytes.Equal(aVer, bVer)
     331             : }
     332             : 
     333             : var zeroLogical [4]byte
     334             : 
     335           1 : func normalizeEngineKeyVersionForCompare(a []byte) []byte {
     336           1 :         // Check sentinel byte.
     337           1 :         if len(a) != int(a[len(a)-1]) {
     338           0 :                 panic(errors.AssertionFailedf("malformed suffix: %x", a))
     339             :         }
     340             :         // Strip off sentinel byte.
     341           1 :         a = a[:len(a)-1]
     342           1 :         // In general, the version could also be a non-timestamp version, but we know
     343           1 :         // that engineKeyVersionLockTableLen+mvccEncodedTimeSentinelLen is a different
     344           1 :         // constant than the above, so there is no danger here of stripping parts from
     345           1 :         // a non-timestamp version.
     346           1 :         if len(a) == withSynthetic-1 {
     347           1 :                 // Strip the synthetic bit component from the timestamp version. The
     348           1 :                 // presence of the synthetic bit does not affect key ordering or equality.
     349           1 :                 a = a[:withLogical-1]
     350           1 :         }
     351           1 :         if len(a) == withLogical-1 {
     352           1 :                 // If the timestamp version contains a logical timestamp component that is
     353           1 :                 // zero, strip the component. encodeMVCCTimestampToBuf will typically omit
     354           1 :                 // the entire logical component in these cases as an optimization, but it
     355           1 :                 // does not guarantee to never include a zero logical component.
     356           1 :                 // Additionally, we can fall into this case after stripping off other
     357           1 :                 // components of the key version earlier on in this function.
     358           1 :                 if bytes.Equal(a[withWall-1:], zeroLogical[:]) {
     359           1 :                         a = a[:withWall-1]
     360           1 :                 }
     361             :         }
     362           1 :         return a
     363             : }
     364             : 
     365           0 : func getKeyPartFromEngineKey(engineKey []byte) (key []byte, ok bool) {
     366           0 :         if len(engineKey) == 0 {
     367           0 :                 return nil, false
     368           0 :         }
     369             :         // Last byte is the version length + 1 when there is a version,
     370             :         // else it is 0.
     371           0 :         versionLen := int(engineKey[len(engineKey)-1])
     372           0 :         // keyPartEnd points to the sentinel byte.
     373           0 :         keyPartEnd := len(engineKey) - 1 - versionLen
     374           0 :         if keyPartEnd < 0 || engineKey[keyPartEnd] != 0x00 {
     375           0 :                 return nil, false
     376           0 :         }
     377             :         // Key excludes the sentinel byte.
     378           0 :         return engineKey[:keyPartEnd], true
     379             : }
     380             : 
     381             : // KeyConfig configures the shape of the random keys generated.
     382             : type KeyConfig struct {
     383             :         PrefixAlphabetLen int    // Number of bytes in the alphabet used for the prefix.
     384             :         PrefixLenShared   int    // Number of bytes shared by all key prefixes.
     385             :         PrefixLen         int    // Number of bytes in the prefix.
     386             :         AvgKeysPerPrefix  int    // Average number of keys (with varying suffixes) per prefix.
     387             :         BaseWallTime      uint64 // Smallest MVCC WallTime.
     388             :         PercentLogical    int    // Percent of keys with non-zero MVCC logical time.
     389             : }
     390             : 
     391           0 : func (cfg KeyConfig) String() string {
     392           0 :         return fmt.Sprintf(
     393           0 :                 "AlphaLen=%d,Prefix=%d,Shared=%d,KeysPerPrefix=%d%s",
     394           0 :                 cfg.PrefixAlphabetLen, cfg.PrefixLen, cfg.PrefixLenShared,
     395           0 :                 cfg.AvgKeysPerPrefix,
     396           0 :                 crstrings.If(cfg.PercentLogical != 0, fmt.Sprintf(",Logical=%d", cfg.PercentLogical)),
     397           0 :         )
     398           0 : }
     399             : 
     400             : // RandomKVs constructs count random KVs with the provided parameters.
     401           1 : func RandomKVs(rng *rand.Rand, count int, cfg KeyConfig, valueLen int) (keys, vals [][]byte) {
     402           1 :         g := makeCockroachKeyGen(rng, cfg)
     403           1 :         sharedPrefix := make([]byte, cfg.PrefixLenShared)
     404           1 :         for i := 0; i < len(sharedPrefix); i++ {
     405           1 :                 sharedPrefix[i] = byte(rng.IntN(cfg.PrefixAlphabetLen) + 'a')
     406           1 :         }
     407             : 
     408           1 :         keys = make([][]byte, 0, count)
     409           1 :         vals = make([][]byte, 0, count)
     410           1 :         for len(keys) < count {
     411           1 :                 prefix := g.randPrefix(sharedPrefix)
     412           1 :                 // We use the exponential distribution so that we occasionally have many
     413           1 :                 // suffixes
     414           1 :                 n := int(rng.ExpFloat64() * float64(cfg.AvgKeysPerPrefix))
     415           1 :                 n = max(n, 1)
     416           1 :                 for i := 0; i < n && len(keys) < count; i++ {
     417           1 :                         wallTime, logicalTime := g.randTimestamp()
     418           1 :                         k := makeKey(prefix, wallTime, logicalTime)
     419           1 :                         v := make([]byte, valueLen)
     420           1 :                         for j := range v {
     421           1 :                                 v[j] = byte(rng.Uint32())
     422           1 :                         }
     423           1 :                         keys = append(keys, k)
     424           1 :                         vals = append(vals, v)
     425             :                 }
     426             :         }
     427           1 :         slices.SortFunc(keys, Compare)
     428           1 :         return keys, vals
     429             : }
     430             : 
     431           1 : func makeKey(prefix []byte, wallTime uint64, logicalTime uint32) []byte {
     432           1 :         k := make([]byte, 0, len(prefix)+MaxSuffixLen)
     433           1 :         k = append(k, prefix...)
     434           1 :         return EncodeTimestamp(k, wallTime, logicalTime)
     435           1 : }
     436             : 
     437             : // RandomQueryKeys returns a slice of count random query keys. Each key has a
     438             : // random prefix uniformly chosen from  the distinct prefixes in writtenKeys and
     439             : // a random timestamp.
     440             : //
     441             : // Note that by setting baseWallTime to be large enough, we can simulate a query
     442             : // pattern that always retrieves the latest version of any prefix.
     443             : func RandomQueryKeys(
     444             :         rng *rand.Rand, count int, writtenKeys [][]byte, baseWallTime uint64,
     445           1 : ) [][]byte {
     446           1 :         // Gather prefixes.
     447           1 :         prefixes := make([][]byte, len(writtenKeys))
     448           1 :         for i, k := range writtenKeys {
     449           1 :                 prefixes[i] = k[:Split(k)-1]
     450           1 :         }
     451           1 :         slices.SortFunc(prefixes, bytes.Compare)
     452           1 :         prefixes = slices.CompactFunc(prefixes, bytes.Equal)
     453           1 :         result := make([][]byte, count)
     454           1 :         for i := range result {
     455           1 :                 prefix := prefixes[rng.IntN(len(prefixes))]
     456           1 :                 wallTime := baseWallTime + rng.Uint64N(uint64(time.Hour))
     457           1 :                 var logicalTime uint32
     458           1 :                 if rng.IntN(10) == 0 {
     459           1 :                         logicalTime = rng.Uint32()
     460           1 :                 }
     461           1 :                 result[i] = makeKey(prefix, wallTime, logicalTime)
     462             :         }
     463           1 :         return result
     464             : }
     465             : 
     466             : type cockroachKeyGen struct {
     467             :         rng *rand.Rand
     468             :         cfg KeyConfig
     469             : }
     470             : 
     471           1 : func makeCockroachKeyGen(rng *rand.Rand, cfg KeyConfig) cockroachKeyGen {
     472           1 :         return cockroachKeyGen{
     473           1 :                 rng: rng,
     474           1 :                 cfg: cfg,
     475           1 :         }
     476           1 : }
     477             : 
     478           1 : func (g *cockroachKeyGen) randPrefix(blockPrefix []byte) []byte {
     479           1 :         prefix := make([]byte, 0, g.cfg.PrefixLen+MaxSuffixLen)
     480           1 :         prefix = append(prefix, blockPrefix...)
     481           1 :         for len(prefix) < g.cfg.PrefixLen {
     482           1 :                 prefix = append(prefix, byte(g.rng.IntN(g.cfg.PrefixAlphabetLen)+'a'))
     483           1 :         }
     484           1 :         return prefix
     485             : }
     486             : 
     487           1 : func (g *cockroachKeyGen) randTimestamp() (wallTime uint64, logicalTime uint32) {
     488           1 :         wallTime = g.cfg.BaseWallTime + g.rng.Uint64N(uint64(time.Hour))
     489           1 :         if g.cfg.PercentLogical > 0 && g.rng.IntN(100) < g.cfg.PercentLogical {
     490           1 :                 logicalTime = g.rng.Uint32()
     491           1 :         }
     492           1 :         return wallTime, logicalTime
     493             : }
     494             : 
     495             : const (
     496             :         cockroachColRoachKey int = iota
     497             :         cockroachColMVCCWallTime
     498             :         cockroachColMVCCLogical
     499             :         cockroachColUntypedVersion
     500             :         cockroachColCount
     501             : )
     502             : 
     503             : var KeySchema = colblk.KeySchema{
     504             :         ColumnTypes: []colblk.DataType{
     505             :                 cockroachColRoachKey:       colblk.DataTypePrefixBytes,
     506             :                 cockroachColMVCCWallTime:   colblk.DataTypeUint,
     507             :                 cockroachColMVCCLogical:    colblk.DataTypeUint,
     508             :                 cockroachColUntypedVersion: colblk.DataTypeBytes,
     509             :         },
     510           1 :         NewKeyWriter: func() colblk.KeyWriter {
     511           1 :                 kw := &cockroachKeyWriter{}
     512           1 :                 kw.roachKeys.Init(16)
     513           1 :                 kw.wallTimes.Init()
     514           1 :                 kw.logicalTimes.InitWithDefault()
     515           1 :                 kw.untypedVersions.Init()
     516           1 :                 return kw
     517           1 :         },
     518           1 :         NewKeySeeker: func() colblk.KeySeeker {
     519           1 :                 return cockroachKeySeekerPool.Get().(*cockroachKeySeeker)
     520           1 :         },
     521             : }
     522             : 
     523             : type cockroachKeyWriter struct {
     524             :         roachKeys       colblk.PrefixBytesBuilder
     525             :         wallTimes       colblk.UintBuilder
     526             :         logicalTimes    colblk.UintBuilder
     527             :         untypedVersions colblk.RawBytesBuilder
     528             :         prevSuffix      []byte
     529             : }
     530             : 
     531           1 : func (kw *cockroachKeyWriter) ComparePrev(key []byte) colblk.KeyComparison {
     532           1 :         var cmpv colblk.KeyComparison
     533           1 :         cmpv.PrefixLen = int32(Split(key)) // TODO(jackson): Inline
     534           1 :         if kw.roachKeys.Rows() == 0 {
     535           1 :                 cmpv.UserKeyComparison = 1
     536           1 :                 return cmpv
     537           1 :         }
     538           1 :         lp := kw.roachKeys.UnsafeGet(kw.roachKeys.Rows() - 1)
     539           1 :         cmpv.CommonPrefixLen = int32(crbytes.CommonPrefix(lp, key[:cmpv.PrefixLen-1]))
     540           1 :         if cmpv.CommonPrefixLen == cmpv.PrefixLen-1 {
     541           1 :                 // Adjust CommonPrefixLen to include the sentinel byte,
     542           1 :                 cmpv.CommonPrefixLen = cmpv.PrefixLen
     543           1 :                 cmpv.UserKeyComparison = int32(CompareSuffixes(key[cmpv.PrefixLen:], kw.prevSuffix))
     544           1 :                 return cmpv
     545           1 :         }
     546             :         // The keys have different MVCC prefixes. We haven't determined which is
     547             :         // greater, but we know the index at which they diverge. The base.Comparer
     548             :         // contract dictates that prefixes must be lexicographically ordered.
     549           1 :         if len(lp) == int(cmpv.CommonPrefixLen) {
     550           0 :                 // cmpv.PrefixLen > cmpv.PrefixLenShared; key is greater.
     551           0 :                 cmpv.UserKeyComparison = +1
     552           1 :         } else {
     553           1 :                 // Both keys have at least 1 additional byte at which they diverge.
     554           1 :                 // Compare the diverging byte.
     555           1 :                 cmpv.UserKeyComparison = int32(cmp.Compare(key[cmpv.CommonPrefixLen], lp[cmpv.CommonPrefixLen]))
     556           1 :         }
     557           1 :         return cmpv
     558             : }
     559             : 
     560             : func (kw *cockroachKeyWriter) WriteKey(
     561             :         row int, key []byte, keyPrefixLen, keyPrefixLenSharedWithPrev int32,
     562           1 : ) {
     563           1 :         // TODO(jackson): Avoid copying the previous suffix.
     564           1 :         // TODO(jackson): Use keyPrefixLen to speed up decoding.
     565           1 :         roachKey, untypedVersion, wallTime, logicalTime := DecodeEngineKey(key)
     566           1 :         kw.prevSuffix = append(kw.prevSuffix[:0], key[keyPrefixLen:]...)
     567           1 :         // When the roach key is the same, keyPrefixLenSharedWithPrev includes the
     568           1 :         // separator byte.
     569           1 :         kw.roachKeys.Put(roachKey, min(int(keyPrefixLenSharedWithPrev), len(roachKey)))
     570           1 :         kw.wallTimes.Set(row, wallTime)
     571           1 :         // The w.logicalTimes builder was initialized with InitWithDefault, so if we
     572           1 :         // don't set a value, the column value is implicitly zero. We only need to
     573           1 :         // Set anything for non-zero values.
     574           1 :         if logicalTime > 0 {
     575           0 :                 kw.logicalTimes.Set(row, uint64(logicalTime))
     576           0 :         }
     577           1 :         kw.untypedVersions.Put(untypedVersion)
     578             : }
     579             : 
     580           1 : func (kw *cockroachKeyWriter) MaterializeKey(dst []byte, i int) []byte {
     581           1 :         dst = append(dst, kw.roachKeys.UnsafeGet(i)...)
     582           1 :         // Append separator byte.
     583           1 :         dst = append(dst, 0)
     584           1 :         if untypedVersion := kw.untypedVersions.UnsafeGet(i); len(untypedVersion) > 0 {
     585           0 :                 dst = append(dst, untypedVersion...)
     586           0 :                 dst = append(dst, byte(len(untypedVersion)))
     587           0 :                 return dst
     588           0 :         }
     589           1 :         return AppendTimestamp(dst, kw.wallTimes.Get(i), uint32(kw.logicalTimes.Get(i)))
     590             : }
     591             : 
     592           0 : func (kw *cockroachKeyWriter) Reset() {
     593           0 :         kw.roachKeys.Reset()
     594           0 :         kw.wallTimes.Reset()
     595           0 :         kw.logicalTimes.Reset()
     596           0 :         kw.untypedVersions.Reset()
     597           0 : }
     598             : 
     599           0 : func (kw *cockroachKeyWriter) WriteDebug(dst io.Writer, rows int) {
     600           0 :         fmt.Fprint(dst, "prefixes: ")
     601           0 :         kw.roachKeys.WriteDebug(dst, rows)
     602           0 :         fmt.Fprintln(dst)
     603           0 :         fmt.Fprint(dst, "wall times: ")
     604           0 :         kw.wallTimes.WriteDebug(dst, rows)
     605           0 :         fmt.Fprintln(dst)
     606           0 :         fmt.Fprint(dst, "logical times: ")
     607           0 :         kw.logicalTimes.WriteDebug(dst, rows)
     608           0 :         fmt.Fprintln(dst)
     609           0 :         fmt.Fprint(dst, "untyped suffixes: ")
     610           0 :         kw.untypedVersions.WriteDebug(dst, rows)
     611           0 :         fmt.Fprintln(dst)
     612           0 : }
     613             : 
     614           1 : func (kw *cockroachKeyWriter) NumColumns() int {
     615           1 :         return cockroachColCount
     616           1 : }
     617             : 
     618           1 : func (kw *cockroachKeyWriter) DataType(col int) colblk.DataType {
     619           1 :         return KeySchema.ColumnTypes[col]
     620           1 : }
     621             : 
     622           1 : func (kw *cockroachKeyWriter) Size(rows int, offset uint32) uint32 {
     623           1 :         offset = kw.roachKeys.Size(rows, offset)
     624           1 :         offset = kw.wallTimes.Size(rows, offset)
     625           1 :         offset = kw.logicalTimes.Size(rows, offset)
     626           1 :         offset = kw.untypedVersions.Size(rows, offset)
     627           1 :         return offset
     628           1 : }
     629             : 
     630             : func (kw *cockroachKeyWriter) Finish(
     631             :         col int, rows int, offset uint32, buf []byte,
     632           1 : ) (endOffset uint32) {
     633           1 :         switch col {
     634           1 :         case cockroachColRoachKey:
     635           1 :                 return kw.roachKeys.Finish(0, rows, offset, buf)
     636           1 :         case cockroachColMVCCWallTime:
     637           1 :                 return kw.wallTimes.Finish(0, rows, offset, buf)
     638           1 :         case cockroachColMVCCLogical:
     639           1 :                 return kw.logicalTimes.Finish(0, rows, offset, buf)
     640           1 :         case cockroachColUntypedVersion:
     641           1 :                 return kw.untypedVersions.Finish(0, rows, offset, buf)
     642           0 :         default:
     643           0 :                 panic(fmt.Sprintf("unknown default key column: %d", col))
     644             :         }
     645             : }
     646             : 
     647             : var cockroachKeySeekerPool = sync.Pool{
     648           1 :         New: func() interface{} { return &cockroachKeySeeker{} },
     649             : }
     650             : 
     651             : type cockroachKeySeeker struct {
     652             :         roachKeys       colblk.PrefixBytes
     653             :         roachKeyChanged colblk.Bitmap
     654             :         mvccWallTimes   colblk.UnsafeUints
     655             :         mvccLogical     colblk.UnsafeUints
     656             :         untypedVersions colblk.RawBytes
     657             : }
     658             : 
     659             : var _ colblk.KeySeeker = (*cockroachKeySeeker)(nil)
     660             : 
     661             : // Init is part of the KeySeeker interface.
     662           1 : func (ks *cockroachKeySeeker) Init(d *colblk.DataBlockDecoder) error {
     663           1 :         bd := d.BlockDecoder()
     664           1 :         ks.roachKeys = bd.PrefixBytes(cockroachColRoachKey)
     665           1 :         ks.roachKeyChanged = d.PrefixChanged()
     666           1 :         ks.mvccWallTimes = bd.Uints(cockroachColMVCCWallTime)
     667           1 :         ks.mvccLogical = bd.Uints(cockroachColMVCCLogical)
     668           1 :         ks.untypedVersions = bd.RawBytes(cockroachColUntypedVersion)
     669           1 :         return nil
     670           1 : }
     671             : 
     672             : // CompareFirstUserKey compares the provided key to the first user key
     673             : // contained within the data block. It's equivalent to performing
     674             : //
     675             : //      Compare(firstUserKey, k)
     676           0 : func (ks *cockroachKeySeeker) IsLowerBound(k []byte, syntheticSuffix []byte) bool {
     677           0 :         roachKey, untypedVersion, wallTime, logicalTime := DecodeEngineKey(k)
     678           0 :         if v := Compare(ks.roachKeys.UnsafeFirstSlice(), roachKey); v != 0 {
     679           0 :                 return v > 0
     680           0 :         }
     681           0 :         if len(syntheticSuffix) > 0 {
     682           0 :                 return Compare(syntheticSuffix, k[len(roachKey)+1:]) >= 0
     683           0 :         }
     684           0 :         if len(untypedVersion) > 0 {
     685           0 :                 if invariants.Enabled && ks.mvccWallTimes.At(0) != 0 {
     686           0 :                         panic("comparing timestamp with untyped suffix")
     687             :                 }
     688           0 :                 return Compare(ks.untypedVersions.At(0), untypedVersion) >= 0
     689             :         }
     690           0 :         if v := cmp.Compare(ks.mvccWallTimes.At(0), wallTime); v != 0 {
     691           0 :                 return v > 0
     692           0 :         }
     693           0 :         return cmp.Compare(uint32(ks.mvccLogical.At(0)), logicalTime) >= 0
     694             : }
     695             : 
     696             : // SeekGE is part of the KeySeeker interface.
     697             : func (ks *cockroachKeySeeker) SeekGE(
     698             :         key []byte, boundRow int, searchDir int8,
     699           1 : ) (row int, equalPrefix bool) {
     700           1 :         // TODO(jackson): Inline Split.
     701           1 :         si := Split(key)
     702           1 :         row, eq := ks.roachKeys.Search(key[:si-1])
     703           1 :         if eq {
     704           1 :                 return ks.seekGEOnSuffix(row, key[si:]), true
     705           1 :         }
     706           0 :         return row, false
     707             : }
     708             : 
     709             : // seekGEOnSuffix is a helper function for SeekGE when a seek key's prefix
     710             : // exactly matches a row. seekGEOnSuffix finds the first row at index or later
     711             : // with the same prefix as index and a suffix greater than or equal to [suffix],
     712             : // or if no such row exists, the next row with a different prefix.
     713           1 : func (ks *cockroachKeySeeker) seekGEOnSuffix(index int, seekSuffix []byte) (row int) {
     714           1 :         // The search key's prefix exactly matches the prefix of the row at index.
     715           1 :         const withWall = 9
     716           1 :         const withLogical = withWall + 4
     717           1 :         const withSynthetic = withLogical + 1
     718           1 :         var seekWallTime uint64
     719           1 :         var seekLogicalTime uint32
     720           1 :         switch len(seekSuffix) {
     721           0 :         case 0:
     722           0 :                 // The search key has no suffix, so it's the smallest possible key with
     723           0 :                 // its prefix. Return the row. This is a common case where the user is
     724           0 :                 // seeking to the most-recent row and just wants the smallest key with
     725           0 :                 // the prefix.
     726           0 :                 return index
     727           0 :         case withLogical, withSynthetic:
     728           0 :                 seekWallTime = binary.BigEndian.Uint64(seekSuffix)
     729           0 :                 seekLogicalTime = binary.BigEndian.Uint32(seekSuffix[8:])
     730           1 :         case withWall:
     731           1 :                 seekWallTime = binary.BigEndian.Uint64(seekSuffix)
     732           0 :         default:
     733           0 :                 // The suffix is untyped. Compare the untyped suffixes.
     734           0 :                 // Binary search between [index, prefixChanged.SeekSetBitGE(index+1)].
     735           0 :                 //
     736           0 :                 // Define f(i) = true iff key at i is >= seek key.
     737           0 :                 // Invariant: f(l-1) == false, f(u) == true.
     738           0 :                 l := index
     739           0 :                 u := ks.roachKeyChanged.SeekSetBitGE(index + 1)
     740           0 :                 for l < u {
     741           0 :                         h := int(uint(l+u) >> 1) // avoid overflow when computing h
     742           0 :                         // l ≤ h < u
     743           0 :                         if bytes.Compare(ks.untypedVersions.At(h), seekSuffix) >= 0 {
     744           0 :                                 u = h // preserves f(u) == true
     745           0 :                         } else {
     746           0 :                                 l = h + 1 // preserves f(l-1) == false
     747           0 :                         }
     748             :                 }
     749           0 :                 return l
     750             :         }
     751             :         // Seeking among MVCC versions using a MVCC timestamp.
     752             : 
     753             :         // TODO(jackson): What if the row has an untyped suffix?
     754             : 
     755             :         // First check the suffix at index, because querying for the latest value is
     756             :         // the most common case.
     757           1 :         if latestWallTime := ks.mvccWallTimes.At(index); latestWallTime < seekWallTime ||
     758           1 :                 (latestWallTime == seekWallTime && uint32(ks.mvccLogical.At(index)) <= seekLogicalTime) {
     759           1 :                 return index
     760           1 :         }
     761             : 
     762             :         // Binary search between [index+1, prefixChanged.SeekSetBitGE(index+1)].
     763             :         //
     764             :         // Define f(i) = true iff key at i is >= seek key.
     765             :         // Invariant: f(l-1) == false, f(u) == true.
     766           1 :         l := index + 1
     767           1 :         u := ks.roachKeyChanged.SeekSetBitGE(index + 1)
     768           1 :         for l < u {
     769           1 :                 h := int(uint(l+u) >> 1) // avoid overflow when computing h
     770           1 :                 // l ≤ h < u
     771           1 :                 hWallTime := ks.mvccWallTimes.At(h)
     772           1 :                 if hWallTime < seekWallTime ||
     773           1 :                         (hWallTime == seekWallTime && uint32(ks.mvccLogical.At(h)) <= seekLogicalTime) {
     774           1 :                         u = h // preserves f(u) = true
     775           1 :                 } else {
     776           1 :                         l = h + 1 // preserves f(l-1) = false
     777           1 :                 }
     778             :         }
     779           1 :         return l
     780             : }
     781             : 
     782             : // MaterializeUserKey is part of the KeySeeker interface.
     783             : func (ks *cockroachKeySeeker) MaterializeUserKey(
     784             :         ki *colblk.PrefixBytesIter, prevRow, row int,
     785           1 : ) []byte {
     786           1 :         if prevRow+1 == row && prevRow >= 0 {
     787           1 :                 ks.roachKeys.SetNext(ki)
     788           1 :         } else {
     789           1 :                 ks.roachKeys.SetAt(ki, row)
     790           1 :         }
     791             : 
     792           1 :         roachKeyLen := len(ki.Buf)
     793           1 :         ptr := unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(ki.Buf))) + uintptr(roachKeyLen))
     794           1 :         mvccWall := ks.mvccWallTimes.At(row)
     795           1 :         mvccLogical := uint32(ks.mvccLogical.At(row))
     796           1 :         if mvccWall == 0 && mvccLogical == 0 {
     797           0 :                 // This is not an MVCC key. Use the untyped suffix.
     798           0 :                 untypedVersion := ks.untypedVersions.At(row)
     799           0 :                 if len(untypedVersion) == 0 {
     800           0 :                         res := ki.Buf[:roachKeyLen+1]
     801           0 :                         res[roachKeyLen] = 0
     802           0 :                         return res
     803           0 :                 }
     804             :                 // Slice first, to check that the capacity is sufficient.
     805           0 :                 res := ki.Buf[:roachKeyLen+1+len(untypedVersion)]
     806           0 :                 *(*byte)(ptr) = 0
     807           0 :                 memmove(
     808           0 :                         unsafe.Pointer(uintptr(ptr)+1),
     809           0 :                         unsafe.Pointer(unsafe.SliceData(untypedVersion)),
     810           0 :                         uintptr(len(untypedVersion)),
     811           0 :                 )
     812           0 :                 return res
     813             :         }
     814             : 
     815             :         // Inline binary.BigEndian.PutUint64. Note that this code is converted into
     816             :         // word-size instructions by the compiler.
     817           1 :         *(*byte)(ptr) = 0
     818           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 1)) = byte(mvccWall >> 56)
     819           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 2)) = byte(mvccWall >> 48)
     820           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 3)) = byte(mvccWall >> 40)
     821           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 4)) = byte(mvccWall >> 32)
     822           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 5)) = byte(mvccWall >> 24)
     823           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 6)) = byte(mvccWall >> 16)
     824           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 7)) = byte(mvccWall >> 8)
     825           1 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 8)) = byte(mvccWall)
     826           1 : 
     827           1 :         ptr = unsafe.Pointer(uintptr(ptr) + 9)
     828           1 :         // This is an MVCC key.
     829           1 :         if mvccLogical == 0 {
     830           1 :                 *(*byte)(ptr) = 9
     831           1 :                 return ki.Buf[:len(ki.Buf)+10]
     832           1 :         }
     833             : 
     834             :         // Inline binary.BigEndian.PutUint32.
     835           0 :         *(*byte)(ptr) = byte(mvccWall >> 24)
     836           0 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 1)) = byte(mvccWall >> 16)
     837           0 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 2)) = byte(mvccWall >> 8)
     838           0 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 3)) = byte(mvccWall)
     839           0 :         *(*byte)(unsafe.Pointer(uintptr(ptr) + 4)) = 13
     840           0 :         return ki.Buf[:len(ki.Buf)+14]
     841             : }
     842             : 
     843             : // MaterializeUserKeyWithSyntheticSuffix is part of the KeySeeker interface.
     844             : func (ks *cockroachKeySeeker) MaterializeUserKeyWithSyntheticSuffix(
     845             :         ki *colblk.PrefixBytesIter, suffix []byte, prevRow, row int,
     846           0 : ) []byte {
     847           0 :         if prevRow+1 == row && prevRow >= 0 {
     848           0 :                 ks.roachKeys.SetNext(ki)
     849           0 :         } else {
     850           0 :                 ks.roachKeys.SetAt(ki, row)
     851           0 :         }
     852             : 
     853             :         // Slice first, to check that the capacity is sufficient.
     854           0 :         res := ki.Buf[:len(ki.Buf)+1+len(suffix)]
     855           0 :         ptr := unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(ki.Buf))) + uintptr(len(ki.Buf)))
     856           0 :         *(*byte)(ptr) = 0
     857           0 :         memmove(unsafe.Pointer(uintptr(ptr)+1), unsafe.Pointer(unsafe.SliceData(suffix)), uintptr(len(suffix)))
     858           0 :         return res
     859             : }
     860             : 
     861             : // Release is part of the KeySeeker interface.
     862           0 : func (ks *cockroachKeySeeker) Release() {
     863           0 :         *ks = cockroachKeySeeker{}
     864           0 :         cockroachKeySeekerPool.Put(ks)
     865           0 : }
     866             : 
     867             : //go:linkname memmove runtime.memmove
     868             : func memmove(to, from unsafe.Pointer, n uintptr)

Generated by: LCOV version 1.14