LCOV - code coverage report
Current view: top level - pebble/internal/rangekey - rangekey.go (source / functions) Hit Total Coverage
Test: 2024-02-18 08:16Z d10a640f - tests + meta.lcov Lines: 197 227 86.8 %
Date: 2024-02-18 08:16:58 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package rangekey provides facilities for encoding, decoding and merging range
       6             : // keys.
       7             : //
       8             : // Range keys map a span of keyspan `[start, end)`, at an optional suffix, to a
       9             : // value.
      10             : //
      11             : // # Encoding
      12             : //
      13             : // Unlike other Pebble keys, range keys encode several fields of information:
      14             : // start key, end key, suffix and value. Internally within Pebble and its
      15             : // sstables, all keys including range keys are represented as a key-value tuple.
      16             : // Range keys map to internal key-value tuples by mapping the start key to the
      17             : // key and encoding the remainder of the fields in the value.
      18             : //
      19             : // ## `RANGEKEYSET`
      20             : //
      21             : // A `RANGEKEYSET` represents one more range keys set over a single region of
      22             : // user key space. Each represented range key must have a unique suffix.  A
      23             : // `RANGEKEYSET` encapsulates a start key, an end key and a set of SuffixValue
      24             : // pairs.
      25             : //
      26             : // A `RANGEKEYSET` key's user key holds the start key. Its value is a varstring
      27             : // end key, followed by a set of SuffixValue pairs. A `RANGEKEYSET` may have
      28             : // multiple SuffixValue pairs if the keyspan was set at multiple unique suffix
      29             : // values.
      30             : //
      31             : // ## `RANGEKEYUNSET`
      32             : //
      33             : // A `RANGEKEYUNSET` represents the removal of range keys at specific suffixes
      34             : // over a single region of user key space. A `RANGEKEYUNSET` encapsulates a
      35             : // start key, an end key and a set of suffixes.
      36             : //
      37             : // A `RANGEKEYUNSET` key's user key holds the start key. Its value is a
      38             : // varstring end key, followed by a set of suffixes. A `RANGEKEYUNSET` may have
      39             : // multiple suffixes if the keyspan was unset at multiple unique suffixes.
      40             : //
      41             : // ## `RANGEKEYDEL`
      42             : //
      43             : // A `RANGEKEYDEL` represents the removal of all range keys over a single region
      44             : // of user key space, regardless of suffix. A `RANGEKEYDEL` encapsulates a
      45             : // start key and an end key. The end key is stored in the value, without any
      46             : // varstring length prefixing.
      47             : package rangekey
      48             : 
      49             : // TODO(jackson): Document the encoding of RANGEKEYSET and RANGEKEYUNSET values
      50             : // once we're confident they're stable.
      51             : 
      52             : import (
      53             :         "encoding/binary"
      54             : 
      55             :         "github.com/cockroachdb/errors"
      56             :         "github.com/cockroachdb/pebble/internal/base"
      57             :         "github.com/cockroachdb/pebble/internal/keyspan"
      58             : )
      59             : 
      60             : // Encode takes a Span containing only range keys. It invokes the provided
      61             : // closure with the encoded internal keys that represent the Span's state. The
      62             : // keys and values passed to emit are only valid until the closure returns.
      63             : // If emit returns an error, Encode stops and returns the error.
      64           2 : func Encode(s *keyspan.Span, emit func(k base.InternalKey, v []byte) error) error {
      65           2 :         enc := Encoder{Emit: emit}
      66           2 :         return enc.Encode(s)
      67           2 : }
      68             : 
      69             : // An Encoder encodes range keys into their on-disk InternalKey format. An
      70             : // Encoder holds internal buffers, reused between Emit calls.
      71             : type Encoder struct {
      72             :         Emit   func(base.InternalKey, []byte) error
      73             :         buf    []byte
      74             :         unsets [][]byte
      75             :         sets   []SuffixValue
      76             : }
      77             : 
      78             : // Encode takes a Span containing only range keys. It invokes the Encoder's Emit
      79             : // closure with the encoded internal keys that represent the Span's state. The
      80             : // keys and values passed to emit are only valid until the closure returns.  If
      81             : // Emit returns an error, Encode stops and returns the error.
      82             : //
      83             : // The encoded key-value pair passed to Emit is only valid until the closure
      84             : // completes.
      85           2 : func (e *Encoder) Encode(s *keyspan.Span) error {
      86           2 :         if s.Empty() {
      87           1 :                 return nil
      88           1 :         }
      89             : 
      90             :         // This for loop iterates through the span's keys, which are sorted by
      91             :         // sequence number descending, grouping them into sequence numbers. All keys
      92             :         // with identical sequence numbers are flushed together.
      93           2 :         var del bool
      94           2 :         var seqNum uint64
      95           2 :         for i := range s.Keys {
      96           2 :                 if i == 0 || s.Keys[i].SeqNum() != seqNum {
      97           2 :                         if i > 0 {
      98           2 :                                 // Flush all the existing internal keys that exist at seqNum.
      99           2 :                                 if err := e.flush(s, seqNum, del); err != nil {
     100           0 :                                         return err
     101           0 :                                 }
     102             :                         }
     103             : 
     104             :                         // Reset sets, unsets, del.
     105           2 :                         seqNum = s.Keys[i].SeqNum()
     106           2 :                         del = false
     107           2 :                         e.sets = e.sets[:0]
     108           2 :                         e.unsets = e.unsets[:0]
     109             :                 }
     110             : 
     111           2 :                 switch s.Keys[i].Kind() {
     112           2 :                 case base.InternalKeyKindRangeKeySet:
     113           2 :                         e.sets = append(e.sets, SuffixValue{
     114           2 :                                 Suffix: s.Keys[i].Suffix,
     115           2 :                                 Value:  s.Keys[i].Value,
     116           2 :                         })
     117           2 :                 case base.InternalKeyKindRangeKeyUnset:
     118           2 :                         e.unsets = append(e.unsets, s.Keys[i].Suffix)
     119           2 :                 case base.InternalKeyKindRangeKeyDelete:
     120           2 :                         del = true
     121           0 :                 default:
     122           0 :                         return base.CorruptionErrorf("pebble: %s key kind is not a range key", s.Keys[i].Kind())
     123             :                 }
     124             :         }
     125           2 :         return e.flush(s, seqNum, del)
     126             : }
     127             : 
     128             : // flush constructs internal keys for accumulated key state, and emits the
     129             : // internal keys.
     130           2 : func (e *Encoder) flush(s *keyspan.Span, seqNum uint64, del bool) error {
     131           2 :         if len(e.sets) > 0 {
     132           2 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeySet)
     133           2 :                 l := EncodedSetValueLen(s.End, e.sets)
     134           2 :                 if l > cap(e.buf) {
     135           2 :                         e.buf = make([]byte, l)
     136           2 :                 }
     137           2 :                 EncodeSetValue(e.buf[:l], s.End, e.sets)
     138           2 :                 if err := e.Emit(ik, e.buf[:l]); err != nil {
     139           0 :                         return err
     140           0 :                 }
     141             :         }
     142           2 :         if len(e.unsets) > 0 {
     143           2 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeyUnset)
     144           2 :                 l := EncodedUnsetValueLen(s.End, e.unsets)
     145           2 :                 if l > cap(e.buf) {
     146           2 :                         e.buf = make([]byte, l)
     147           2 :                 }
     148           2 :                 EncodeUnsetValue(e.buf[:l], s.End, e.unsets)
     149           2 :                 if err := e.Emit(ik, e.buf[:l]); err != nil {
     150           0 :                         return err
     151           0 :                 }
     152             :         }
     153           2 :         if del {
     154           2 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeyDelete)
     155           2 :                 // s.End is stored directly in the value for RangeKeyDeletes.
     156           2 :                 if err := e.Emit(ik, s.End); err != nil {
     157           0 :                         return err
     158           0 :                 }
     159             :         }
     160           2 :         return nil
     161             : }
     162             : 
     163             : // Decode takes an internal key pair encoding range key(s) and returns a decoded
     164             : // keyspan containing the keys. If keysDst is provided, keys will be appended to
     165             : // keysDst.
     166           2 : func Decode(ik base.InternalKey, v []byte, keysDst []keyspan.Key) (keyspan.Span, error) {
     167           2 :         var s keyspan.Span
     168           2 : 
     169           2 :         // Hydrate the user key bounds.
     170           2 :         s.Start = ik.UserKey
     171           2 :         var ok bool
     172           2 :         s.End, v, ok = DecodeEndKey(ik.Kind(), v)
     173           2 :         if !ok {
     174           0 :                 return keyspan.Span{}, base.CorruptionErrorf("pebble: unable to decode range key end from %s", ik.Kind())
     175           0 :         }
     176           2 :         s.Keys = keysDst
     177           2 : 
     178           2 :         // Hydrate the contents of the range key(s).
     179           2 :         switch ik.Kind() {
     180           2 :         case base.InternalKeyKindRangeKeySet:
     181           2 :                 for len(v) > 0 {
     182           2 :                         var sv SuffixValue
     183           2 :                         sv, v, ok = decodeSuffixValue(v)
     184           2 :                         if !ok {
     185           0 :                                 return keyspan.Span{}, base.CorruptionErrorf("pebble: unable to decode range key suffix-value tuple")
     186           0 :                         }
     187           2 :                         s.Keys = append(s.Keys, keyspan.Key{
     188           2 :                                 Trailer: ik.Trailer,
     189           2 :                                 Suffix:  sv.Suffix,
     190           2 :                                 Value:   sv.Value,
     191           2 :                         })
     192             :                 }
     193           2 :         case base.InternalKeyKindRangeKeyUnset:
     194           2 :                 for len(v) > 0 {
     195           2 :                         var suffix []byte
     196           2 :                         suffix, v, ok = decodeSuffix(v)
     197           2 :                         if !ok {
     198           0 :                                 return keyspan.Span{}, base.CorruptionErrorf("pebble: unable to decode range key unset suffix")
     199           0 :                         }
     200           2 :                         s.Keys = append(s.Keys, keyspan.Key{
     201           2 :                                 Trailer: ik.Trailer,
     202           2 :                                 Suffix:  suffix,
     203           2 :                         })
     204             :                 }
     205           2 :         case base.InternalKeyKindRangeKeyDelete:
     206           2 :                 if len(v) > 0 {
     207           0 :                         return keyspan.Span{}, base.CorruptionErrorf("pebble: RANGEKEYDELs must not contain additional data")
     208           0 :                 }
     209           2 :                 s.Keys = append(s.Keys, keyspan.Key{Trailer: ik.Trailer})
     210           0 :         default:
     211           0 :                 return keyspan.Span{}, base.CorruptionErrorf("pebble: %s is not a range key", ik.Kind())
     212             :         }
     213           2 :         return s, nil
     214             : }
     215             : 
     216             : // SuffixValue represents a tuple of a suffix and a corresponding value. A
     217             : // physical RANGEKEYSET key may contain many logical RangeKeySets, each
     218             : // represented with a separate SuffixValue tuple.
     219             : type SuffixValue struct {
     220             :         Suffix []byte
     221             :         Value  []byte
     222             : }
     223             : 
     224             : // encodedSetSuffixValuesLen precomputes the length of the given slice of
     225             : // SuffixValues, when encoded for a RangeKeySet. It may be used to construct a
     226             : // buffer of the appropriate size before encoding.
     227           2 : func encodedSetSuffixValuesLen(suffixValues []SuffixValue) int {
     228           2 :         var n int
     229           2 :         for i := 0; i < len(suffixValues); i++ {
     230           2 :                 n += lenVarint(len(suffixValues[i].Suffix))
     231           2 :                 n += len(suffixValues[i].Suffix)
     232           2 :                 n += lenVarint(len(suffixValues[i].Value))
     233           2 :                 n += len(suffixValues[i].Value)
     234           2 :         }
     235           2 :         return n
     236             : }
     237             : 
     238             : // encodeSetSuffixValues encodes a slice of SuffixValues for a RangeKeySet into
     239             : // dst. The length of dst must be greater than or equal to
     240             : // encodedSetSuffixValuesLen. encodeSetSuffixValues returns the number of bytes
     241             : // written, which should always equal the EncodedSetValueLen with the same
     242             : // arguments.
     243           2 : func encodeSetSuffixValues(dst []byte, suffixValues []SuffixValue) int {
     244           2 :         // Encode the list of (suffix, value-len) tuples.
     245           2 :         var n int
     246           2 :         for i := 0; i < len(suffixValues); i++ {
     247           2 :                 // Encode the length of the suffix.
     248           2 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixValues[i].Suffix)))
     249           2 : 
     250           2 :                 // Encode the suffix itself.
     251           2 :                 n += copy(dst[n:], suffixValues[i].Suffix)
     252           2 : 
     253           2 :                 // Encode the value length.
     254           2 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixValues[i].Value)))
     255           2 : 
     256           2 :                 // Encode the value itself.
     257           2 :                 n += copy(dst[n:], suffixValues[i].Value)
     258           2 :         }
     259           2 :         return n
     260             : }
     261             : 
     262             : // EncodedSetValueLen precomputes the length of a RangeKeySet's value when
     263             : // encoded. It may be used to construct a buffer of the appropriate size before
     264             : // encoding.
     265           2 : func EncodedSetValueLen(endKey []byte, suffixValues []SuffixValue) int {
     266           2 :         n := lenVarint(len(endKey))
     267           2 :         n += len(endKey)
     268           2 :         n += encodedSetSuffixValuesLen(suffixValues)
     269           2 :         return n
     270           2 : }
     271             : 
     272             : // EncodeSetValue encodes a RangeKeySet's value into dst. The length of dst must
     273             : // be greater than or equal to EncodedSetValueLen. EncodeSetValue returns the
     274             : // number of bytes written, which should always equal the EncodedSetValueLen
     275             : // with the same arguments.
     276           2 : func EncodeSetValue(dst []byte, endKey []byte, suffixValues []SuffixValue) int {
     277           2 :         // First encode the end key as a varstring.
     278           2 :         n := binary.PutUvarint(dst, uint64(len(endKey)))
     279           2 :         n += copy(dst[n:], endKey)
     280           2 :         n += encodeSetSuffixValues(dst[n:], suffixValues)
     281           2 :         return n
     282           2 : }
     283             : 
     284             : // DecodeEndKey reads the end key from the beginning of a range key (RANGEKEYSET,
     285             : // RANGEKEYUNSET or RANGEKEYDEL)'s physical encoded value. Both sets and unsets
     286             : // encode the range key, plus additional data in the value.
     287           2 : func DecodeEndKey(kind base.InternalKeyKind, data []byte) (endKey, value []byte, ok bool) {
     288           2 :         switch kind {
     289           2 :         case base.InternalKeyKindRangeKeyDelete:
     290           2 :                 // No splitting is necessary for range key deletes. The value is the end
     291           2 :                 // key, and there is no additional associated value.
     292           2 :                 return data, nil, true
     293           2 :         case base.InternalKeyKindRangeKeySet, base.InternalKeyKindRangeKeyUnset:
     294           2 :                 v, n := binary.Uvarint(data)
     295           2 :                 if n <= 0 || uint64(n)+v >= uint64(len(data)) {
     296           0 :                         return nil, nil, false
     297           0 :                 }
     298           2 :                 endKey, value = data[n:n+int(v)], data[n+int(v):]
     299           2 :                 return endKey, value, true
     300           0 :         default:
     301           0 :                 panic(errors.Newf("key kind %s is not a range key kind", kind))
     302             :         }
     303             : }
     304             : 
     305             : // decodeSuffixValue decodes a single encoded SuffixValue from a RangeKeySet's
     306             : // split value. The end key must have already been stripped from the
     307             : // RangeKeySet's value (see DecodeEndKey).
     308           2 : func decodeSuffixValue(data []byte) (sv SuffixValue, rest []byte, ok bool) {
     309           2 :         // Decode the suffix.
     310           2 :         sv.Suffix, data, ok = decodeVarstring(data)
     311           2 :         if !ok {
     312           0 :                 return SuffixValue{}, nil, false
     313           0 :         }
     314             :         // Decode the value.
     315           2 :         sv.Value, data, ok = decodeVarstring(data)
     316           2 :         if !ok {
     317           0 :                 return SuffixValue{}, nil, false
     318           0 :         }
     319           2 :         return sv, data, true
     320             : }
     321             : 
     322             : // encodedUnsetSuffixesLen precomputes the length of the given slice of
     323             : // suffixes, when encoded for a RangeKeyUnset. It may be used to construct a
     324             : // buffer of the appropriate size before encoding.
     325           2 : func encodedUnsetSuffixesLen(suffixes [][]byte) int {
     326           2 :         var n int
     327           2 :         for i := 0; i < len(suffixes); i++ {
     328           2 :                 n += lenVarint(len(suffixes[i]))
     329           2 :                 n += len(suffixes[i])
     330           2 :         }
     331           2 :         return n
     332             : }
     333             : 
     334             : // encodeUnsetSuffixes encodes a slice of suffixes for a RangeKeyUnset into dst.
     335             : // The length of dst must be greater than or equal to EncodedUnsetSuffixesLen.
     336             : // EncodeUnsetSuffixes returns the number of bytes written, which should always
     337             : // equal the EncodedUnsetSuffixesLen with the same arguments.
     338           2 : func encodeUnsetSuffixes(dst []byte, suffixes [][]byte) int {
     339           2 :         // Encode the list of (suffix, value-len) tuples.
     340           2 :         var n int
     341           2 :         for i := 0; i < len(suffixes); i++ {
     342           2 :                 //  Encode the length of the suffix.
     343           2 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixes[i])))
     344           2 : 
     345           2 :                 // Encode the suffix itself.
     346           2 :                 n += copy(dst[n:], suffixes[i])
     347           2 :         }
     348           2 :         return n
     349             : }
     350             : 
     351             : // EncodedUnsetValueLen precomputes the length of a RangeKeyUnset's value when
     352             : // encoded.  It may be used to construct a buffer of the appropriate size before
     353             : // encoding.
     354           2 : func EncodedUnsetValueLen(endKey []byte, suffixes [][]byte) int {
     355           2 :         n := lenVarint(len(endKey))
     356           2 :         n += len(endKey)
     357           2 :         n += encodedUnsetSuffixesLen(suffixes)
     358           2 :         return n
     359           2 : }
     360             : 
     361             : // EncodeUnsetValue encodes a RangeKeyUnset's value into dst. The length of dst
     362             : // must be greater than or equal to EncodedUnsetValueLen. EncodeUnsetValue
     363             : // returns the number of bytes written, which should always equal the
     364             : // EncodedUnsetValueLen with the same arguments.
     365           2 : func EncodeUnsetValue(dst []byte, endKey []byte, suffixes [][]byte) int {
     366           2 :         // First encode the end key as a varstring.
     367           2 :         n := binary.PutUvarint(dst, uint64(len(endKey)))
     368           2 :         n += copy(dst[n:], endKey)
     369           2 :         n += encodeUnsetSuffixes(dst[n:], suffixes)
     370           2 :         return n
     371           2 : }
     372             : 
     373             : // decodeSuffix decodes a single suffix from the beginning of data. If decoding
     374             : // suffixes from a RangeKeyUnset's value, the end key must have already been
     375             : // stripped from the RangeKeyUnset's value (see DecodeEndKey).
     376           2 : func decodeSuffix(data []byte) (suffix, rest []byte, ok bool) {
     377           2 :         return decodeVarstring(data)
     378           2 : }
     379             : 
     380           2 : func decodeVarstring(data []byte) (v, rest []byte, ok bool) {
     381           2 :         // Decode the length of the string.
     382           2 :         l, n := binary.Uvarint(data)
     383           2 :         if n <= 0 {
     384           0 :                 return nil, nil, ok
     385           0 :         }
     386             : 
     387             :         // Extract the string itself.
     388           2 :         return data[n : n+int(l)], data[n+int(l):], true
     389             : }
     390             : 
     391             : // IsRangeKey returns true if the given key kind is one of the range key kinds.
     392           2 : func IsRangeKey(kind base.InternalKeyKind) bool {
     393           2 :         switch kind {
     394             :         case base.InternalKeyKindRangeKeyDelete,
     395             :                 base.InternalKeyKindRangeKeyUnset,
     396           2 :                 base.InternalKeyKindRangeKeySet:
     397           2 :                 return true
     398           2 :         default:
     399           2 :                 return false
     400             :         }
     401             : }
     402             : 
     403           2 : func lenVarint(v int) (n int) {
     404           2 :         x := uint32(v)
     405           2 :         n++
     406           2 :         for x >= 0x80 {
     407           1 :                 x >>= 7
     408           1 :                 n++
     409           1 :         }
     410           2 :         return n
     411             : }

Generated by: LCOV version 1.14