LCOV - code coverage report
Current view: top level - pebble/internal/rangekey - rangekey.go (source / functions) Hit Total Coverage
Test: 2024-07-20 08:16Z 4f01ec48 - meta test only.lcov Lines: 199 246 80.9 %
Date: 2024-07-20 08:16:48 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package rangekey provides facilities for encoding, decoding and merging range
       6             : // keys.
       7             : //
       8             : // Range keys map a span of keyspan `[start, end)`, at an optional suffix, to a
       9             : // value.
      10             : //
      11             : // # Encoding
      12             : //
      13             : // Unlike other Pebble keys, range keys encode several fields of information:
      14             : // start key, end key, suffix and value. Internally within Pebble and its
      15             : // sstables, all keys including range keys are represented as a key-value tuple.
      16             : // Range keys map to internal key-value tuples by mapping the start key to the
      17             : // key and encoding the remainder of the fields in the value.
      18             : //
      19             : // ## `RANGEKEYSET`
      20             : //
      21             : // A `RANGEKEYSET` represents one more range keys set over a single region of
      22             : // user key space. Each represented range key must have a unique suffix.  A
      23             : // `RANGEKEYSET` encapsulates a start key, an end key and a set of SuffixValue
      24             : // pairs.
      25             : //
      26             : // A `RANGEKEYSET` key's user key holds the start key. Its value is a varstring
      27             : // end key, followed by a set of SuffixValue pairs. A `RANGEKEYSET` may have
      28             : // multiple SuffixValue pairs if the keyspan was set at multiple unique suffix
      29             : // values.
      30             : //
      31             : // ## `RANGEKEYUNSET`
      32             : //
      33             : // A `RANGEKEYUNSET` represents the removal of range keys at specific suffixes
      34             : // over a single region of user key space. A `RANGEKEYUNSET` encapsulates a
      35             : // start key, an end key and a set of suffixes.
      36             : //
      37             : // A `RANGEKEYUNSET` key's user key holds the start key. Its value is a
      38             : // varstring end key, followed by a set of suffixes. A `RANGEKEYUNSET` may have
      39             : // multiple suffixes if the keyspan was unset at multiple unique suffixes.
      40             : //
      41             : // ## `RANGEKEYDEL`
      42             : //
      43             : // A `RANGEKEYDEL` represents the removal of all range keys over a single region
      44             : // of user key space, regardless of suffix. A `RANGEKEYDEL` encapsulates a
      45             : // start key and an end key. The end key is stored in the value, without any
      46             : // varstring length prefixing.
      47             : package rangekey
      48             : 
      49             : // TODO(jackson): Document the encoding of RANGEKEYSET and RANGEKEYUNSET values
      50             : // once we're confident they're stable.
      51             : 
      52             : import (
      53             :         "encoding/binary"
      54             : 
      55             :         "github.com/cockroachdb/pebble/internal/base"
      56             :         "github.com/cockroachdb/pebble/internal/invariants"
      57             :         "github.com/cockroachdb/pebble/internal/keyspan"
      58             : )
      59             : 
      60             : // Encode takes a Span containing only range keys. It invokes the provided
      61             : // closure with the encoded internal keys that represent the Span's state. The
      62             : // keys and values passed to emit are only valid until the closure returns.
      63             : // If emit returns an error, Encode stops and returns the error.
      64           0 : func Encode(s *keyspan.Span, emit func(k base.InternalKey, v []byte) error) error {
      65           0 :         enc := Encoder{Emit: emit}
      66           0 :         return enc.Encode(s)
      67           0 : }
      68             : 
      69             : // An Encoder encodes range keys into their on-disk InternalKey format. An
      70             : // Encoder holds internal buffers, reused between Emit calls.
      71             : type Encoder struct {
      72             :         Emit   func(base.InternalKey, []byte) error
      73             :         buf    []byte
      74             :         unsets [][]byte
      75             :         sets   []SuffixValue
      76             : }
      77             : 
      78             : // Encode takes a Span containing only range keys. It invokes the Encoder's Emit
      79             : // closure with the encoded internal keys that represent the Span's state. The
      80             : // keys and values passed to emit are only valid until the closure returns.  If
      81             : // Emit returns an error, Encode stops and returns the error.
      82             : //
      83             : // The encoded key-value pair passed to Emit is only valid until the closure
      84             : // completes.
      85           1 : func (e *Encoder) Encode(s *keyspan.Span) error {
      86           1 :         if s.Empty() {
      87           0 :                 return nil
      88           0 :         }
      89             : 
      90             :         // This for loop iterates through the span's keys, which are sorted by
      91             :         // sequence number descending, grouping them into sequence numbers. All keys
      92             :         // with identical sequence numbers are flushed together.
      93           1 :         var del bool
      94           1 :         var seqNum base.SeqNum
      95           1 :         for i := range s.Keys {
      96           1 :                 if i == 0 || s.Keys[i].SeqNum() != seqNum {
      97           1 :                         if i > 0 {
      98           1 :                                 // Flush all the existing internal keys that exist at seqNum.
      99           1 :                                 if err := e.flush(s, seqNum, del); err != nil {
     100           0 :                                         return err
     101           0 :                                 }
     102             :                         }
     103             : 
     104             :                         // Reset sets, unsets, del.
     105           1 :                         seqNum = s.Keys[i].SeqNum()
     106           1 :                         del = false
     107           1 :                         e.sets = e.sets[:0]
     108           1 :                         e.unsets = e.unsets[:0]
     109             :                 }
     110             : 
     111           1 :                 switch s.Keys[i].Kind() {
     112           1 :                 case base.InternalKeyKindRangeKeySet:
     113           1 :                         e.sets = append(e.sets, SuffixValue{
     114           1 :                                 Suffix: s.Keys[i].Suffix,
     115           1 :                                 Value:  s.Keys[i].Value,
     116           1 :                         })
     117           1 :                 case base.InternalKeyKindRangeKeyUnset:
     118           1 :                         e.unsets = append(e.unsets, s.Keys[i].Suffix)
     119           1 :                 case base.InternalKeyKindRangeKeyDelete:
     120           1 :                         del = true
     121           0 :                 default:
     122           0 :                         return base.CorruptionErrorf("pebble: %s key kind is not a range key", s.Keys[i].Kind())
     123             :                 }
     124             :         }
     125           1 :         return e.flush(s, seqNum, del)
     126             : }
     127             : 
     128             : // flush constructs internal keys for accumulated key state, and emits the
     129             : // internal keys.
     130           1 : func (e *Encoder) flush(s *keyspan.Span, seqNum base.SeqNum, del bool) error {
     131           1 :         if len(e.sets) > 0 {
     132           1 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeySet)
     133           1 :                 l := EncodedSetValueLen(s.End, e.sets)
     134           1 :                 if l > cap(e.buf) {
     135           1 :                         e.buf = make([]byte, l)
     136           1 :                 }
     137           1 :                 EncodeSetValue(e.buf[:l], s.End, e.sets)
     138           1 :                 if err := e.Emit(ik, e.buf[:l]); err != nil {
     139           0 :                         return err
     140           0 :                 }
     141             :         }
     142           1 :         if len(e.unsets) > 0 {
     143           1 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeyUnset)
     144           1 :                 l := EncodedUnsetValueLen(s.End, e.unsets)
     145           1 :                 if l > cap(e.buf) {
     146           1 :                         e.buf = make([]byte, l)
     147           1 :                 }
     148           1 :                 EncodeUnsetValue(e.buf[:l], s.End, e.unsets)
     149           1 :                 if err := e.Emit(ik, e.buf[:l]); err != nil {
     150           0 :                         return err
     151           0 :                 }
     152             :         }
     153           1 :         if del {
     154           1 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeyDelete)
     155           1 :                 // s.End is stored directly in the value for RangeKeyDeletes.
     156           1 :                 if err := e.Emit(ik, s.End); err != nil {
     157           0 :                         return err
     158           0 :                 }
     159             :         }
     160           1 :         return nil
     161             : }
     162             : 
     163             : // Decode takes an internal key pair encoding range key(s) and returns a decoded
     164             : // keyspan containing the keys. If keysBuf is provided, keys will be appended to
     165             : // it.
     166           1 : func Decode(ik base.InternalKey, v []byte, keysBuf []keyspan.Key) (keyspan.Span, error) {
     167           1 :         var s keyspan.Span
     168           1 :         s.Start = ik.UserKey
     169           1 :         var err error
     170           1 :         s.End, v, err = DecodeEndKey(ik.Kind(), v)
     171           1 :         if err != nil {
     172           0 :                 return keyspan.Span{}, err
     173           0 :         }
     174           1 :         s.Keys, err = appendKeys(keysBuf, ik, v)
     175           1 :         if err != nil {
     176           0 :                 return keyspan.Span{}, err
     177           0 :         }
     178           1 :         return s, nil
     179             : }
     180             : 
     181             : // DecodeIntoSpan decodes an internal key pair encoding range key(s) and appends
     182             : // them to the given span. The start and end keys must match those in the span.
     183           1 : func DecodeIntoSpan(cmp base.Compare, ik base.InternalKey, v []byte, s *keyspan.Span) error {
     184           1 :         // Hydrate the user key bounds.
     185           1 :         startKey := ik.UserKey
     186           1 :         endKey, v, err := DecodeEndKey(ik.Kind(), v)
     187           1 :         if err != nil {
     188           0 :                 return err
     189           0 :         }
     190             :         // This function should only be called when ik.UserKey matches the Start of
     191             :         // the span we already have. If this is not the case, it is a bug in the
     192             :         // calling code.
     193           1 :         if invariants.Enabled && cmp(s.Start, startKey) != 0 {
     194           0 :                 return base.AssertionFailedf("DecodeIntoSpan called with different start key")
     195           0 :         }
     196             :         // The value can come from disk or from the user, so we want to check the end
     197             :         // key in all builds.
     198           1 :         if cmp(s.End, endKey) != 0 {
     199           0 :                 return base.CorruptionErrorf("pebble: corrupt range key fragmentation")
     200           0 :         }
     201           1 :         s.Keys, err = appendKeys(s.Keys, ik, v)
     202           1 :         return err
     203             : }
     204             : 
     205           1 : func appendKeys(buf []keyspan.Key, ik base.InternalKey, v []byte) ([]keyspan.Key, error) {
     206           1 :         // Hydrate the contents of the range key(s).
     207           1 :         switch ik.Kind() {
     208           1 :         case base.InternalKeyKindRangeKeySet:
     209           1 :                 for len(v) > 0 {
     210           1 :                         var sv SuffixValue
     211           1 :                         var ok bool
     212           1 :                         sv, v, ok = decodeSuffixValue(v)
     213           1 :                         if !ok {
     214           0 :                                 return nil, base.CorruptionErrorf("pebble: unable to decode range key suffix-value tuple")
     215           0 :                         }
     216           1 :                         buf = append(buf, keyspan.Key{
     217           1 :                                 Trailer: ik.Trailer,
     218           1 :                                 Suffix:  sv.Suffix,
     219           1 :                                 Value:   sv.Value,
     220           1 :                         })
     221             :                 }
     222           1 :         case base.InternalKeyKindRangeKeyUnset:
     223           1 :                 for len(v) > 0 {
     224           1 :                         var suffix []byte
     225           1 :                         var ok bool
     226           1 :                         suffix, v, ok = decodeSuffix(v)
     227           1 :                         if !ok {
     228           0 :                                 return nil, base.CorruptionErrorf("pebble: unable to decode range key unset suffix")
     229           0 :                         }
     230           1 :                         buf = append(buf, keyspan.Key{
     231           1 :                                 Trailer: ik.Trailer,
     232           1 :                                 Suffix:  suffix,
     233           1 :                         })
     234             :                 }
     235           1 :         case base.InternalKeyKindRangeKeyDelete:
     236           1 :                 if len(v) > 0 {
     237           0 :                         return nil, base.CorruptionErrorf("pebble: RANGEKEYDELs must not contain additional data")
     238           0 :                 }
     239           1 :                 buf = append(buf, keyspan.Key{Trailer: ik.Trailer})
     240           0 :         default:
     241           0 :                 return nil, base.CorruptionErrorf("pebble: %s is not a range key", ik.Kind())
     242             :         }
     243           1 :         return buf, nil
     244             : }
     245             : 
     246             : // SuffixValue represents a tuple of a suffix and a corresponding value. A
     247             : // physical RANGEKEYSET key may contain many logical RangeKeySets, each
     248             : // represented with a separate SuffixValue tuple.
     249             : type SuffixValue struct {
     250             :         Suffix []byte
     251             :         Value  []byte
     252             : }
     253             : 
     254             : // encodedSetSuffixValuesLen precomputes the length of the given slice of
     255             : // SuffixValues, when encoded for a RangeKeySet. It may be used to construct a
     256             : // buffer of the appropriate size before encoding.
     257           1 : func encodedSetSuffixValuesLen(suffixValues []SuffixValue) int {
     258           1 :         var n int
     259           1 :         for i := 0; i < len(suffixValues); i++ {
     260           1 :                 n += lenVarint(len(suffixValues[i].Suffix))
     261           1 :                 n += len(suffixValues[i].Suffix)
     262           1 :                 n += lenVarint(len(suffixValues[i].Value))
     263           1 :                 n += len(suffixValues[i].Value)
     264           1 :         }
     265           1 :         return n
     266             : }
     267             : 
     268             : // encodeSetSuffixValues encodes a slice of SuffixValues for a RangeKeySet into
     269             : // dst. The length of dst must be greater than or equal to
     270             : // encodedSetSuffixValuesLen. encodeSetSuffixValues returns the number of bytes
     271             : // written, which should always equal the EncodedSetValueLen with the same
     272             : // arguments.
     273           1 : func encodeSetSuffixValues(dst []byte, suffixValues []SuffixValue) int {
     274           1 :         // Encode the list of (suffix, value-len) tuples.
     275           1 :         var n int
     276           1 :         for i := 0; i < len(suffixValues); i++ {
     277           1 :                 // Encode the length of the suffix.
     278           1 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixValues[i].Suffix)))
     279           1 : 
     280           1 :                 // Encode the suffix itself.
     281           1 :                 n += copy(dst[n:], suffixValues[i].Suffix)
     282           1 : 
     283           1 :                 // Encode the value length.
     284           1 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixValues[i].Value)))
     285           1 : 
     286           1 :                 // Encode the value itself.
     287           1 :                 n += copy(dst[n:], suffixValues[i].Value)
     288           1 :         }
     289           1 :         return n
     290             : }
     291             : 
     292             : // EncodedSetValueLen precomputes the length of a RangeKeySet's value when
     293             : // encoded. It may be used to construct a buffer of the appropriate size before
     294             : // encoding.
     295           1 : func EncodedSetValueLen(endKey []byte, suffixValues []SuffixValue) int {
     296           1 :         n := lenVarint(len(endKey))
     297           1 :         n += len(endKey)
     298           1 :         n += encodedSetSuffixValuesLen(suffixValues)
     299           1 :         return n
     300           1 : }
     301             : 
     302             : // EncodeSetValue encodes a RangeKeySet's value into dst. The length of dst must
     303             : // be greater than or equal to EncodedSetValueLen. EncodeSetValue returns the
     304             : // number of bytes written, which should always equal the EncodedSetValueLen
     305             : // with the same arguments.
     306           1 : func EncodeSetValue(dst []byte, endKey []byte, suffixValues []SuffixValue) int {
     307           1 :         // First encode the end key as a varstring.
     308           1 :         n := binary.PutUvarint(dst, uint64(len(endKey)))
     309           1 :         n += copy(dst[n:], endKey)
     310           1 :         n += encodeSetSuffixValues(dst[n:], suffixValues)
     311           1 :         return n
     312           1 : }
     313             : 
     314             : // DecodeEndKey reads the end key from the beginning of a range key (RANGEKEYSET,
     315             : // RANGEKEYUNSET or RANGEKEYDEL)'s physical encoded value. Both sets and unsets
     316             : // encode the range key, plus additional data in the value.
     317           1 : func DecodeEndKey(kind base.InternalKeyKind, data []byte) (endKey, value []byte, _ error) {
     318           1 :         switch kind {
     319           1 :         case base.InternalKeyKindRangeKeyDelete:
     320           1 :                 // No splitting is necessary for range key deletes. The value is the end
     321           1 :                 // key, and there is no additional associated value.
     322           1 :                 return data, nil, nil
     323             : 
     324           1 :         case base.InternalKeyKindRangeKeySet, base.InternalKeyKindRangeKeyUnset:
     325           1 :                 v, n := binary.Uvarint(data)
     326           1 :                 if n <= 0 || uint64(n)+v >= uint64(len(data)) {
     327           0 :                         return nil, nil, base.CorruptionErrorf("pebble: unable to decode range key end from %s", kind)
     328           0 :                 }
     329           1 :                 endKey, value = data[n:n+int(v)], data[n+int(v):]
     330           1 :                 return endKey, value, nil
     331             : 
     332           0 :         default:
     333           0 :                 return nil, nil, base.AssertionFailedf("key kind %s is not a range key kind", kind)
     334             :         }
     335             : }
     336             : 
     337             : // decodeSuffixValue decodes a single encoded SuffixValue from a RangeKeySet's
     338             : // split value. The end key must have already been stripped from the
     339             : // RangeKeySet's value (see DecodeEndKey).
     340           1 : func decodeSuffixValue(data []byte) (sv SuffixValue, rest []byte, ok bool) {
     341           1 :         // Decode the suffix.
     342           1 :         sv.Suffix, data, ok = decodeVarstring(data)
     343           1 :         if !ok {
     344           0 :                 return SuffixValue{}, nil, false
     345           0 :         }
     346             :         // Decode the value.
     347           1 :         sv.Value, data, ok = decodeVarstring(data)
     348           1 :         if !ok {
     349           0 :                 return SuffixValue{}, nil, false
     350           0 :         }
     351           1 :         return sv, data, true
     352             : }
     353             : 
     354             : // encodedUnsetSuffixesLen precomputes the length of the given slice of
     355             : // suffixes, when encoded for a RangeKeyUnset. It may be used to construct a
     356             : // buffer of the appropriate size before encoding.
     357           1 : func encodedUnsetSuffixesLen(suffixes [][]byte) int {
     358           1 :         var n int
     359           1 :         for i := 0; i < len(suffixes); i++ {
     360           1 :                 n += lenVarint(len(suffixes[i]))
     361           1 :                 n += len(suffixes[i])
     362           1 :         }
     363           1 :         return n
     364             : }
     365             : 
     366             : // encodeUnsetSuffixes encodes a slice of suffixes for a RangeKeyUnset into dst.
     367             : // The length of dst must be greater than or equal to EncodedUnsetSuffixesLen.
     368             : // EncodeUnsetSuffixes returns the number of bytes written, which should always
     369             : // equal the EncodedUnsetSuffixesLen with the same arguments.
     370           1 : func encodeUnsetSuffixes(dst []byte, suffixes [][]byte) int {
     371           1 :         // Encode the list of (suffix, value-len) tuples.
     372           1 :         var n int
     373           1 :         for i := 0; i < len(suffixes); i++ {
     374           1 :                 //  Encode the length of the suffix.
     375           1 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixes[i])))
     376           1 : 
     377           1 :                 // Encode the suffix itself.
     378           1 :                 n += copy(dst[n:], suffixes[i])
     379           1 :         }
     380           1 :         return n
     381             : }
     382             : 
     383             : // EncodedUnsetValueLen precomputes the length of a RangeKeyUnset's value when
     384             : // encoded.  It may be used to construct a buffer of the appropriate size before
     385             : // encoding.
     386           1 : func EncodedUnsetValueLen(endKey []byte, suffixes [][]byte) int {
     387           1 :         n := lenVarint(len(endKey))
     388           1 :         n += len(endKey)
     389           1 :         n += encodedUnsetSuffixesLen(suffixes)
     390           1 :         return n
     391           1 : }
     392             : 
     393             : // EncodeUnsetValue encodes a RangeKeyUnset's value into dst. The length of dst
     394             : // must be greater than or equal to EncodedUnsetValueLen. EncodeUnsetValue
     395             : // returns the number of bytes written, which should always equal the
     396             : // EncodedUnsetValueLen with the same arguments.
     397           1 : func EncodeUnsetValue(dst []byte, endKey []byte, suffixes [][]byte) int {
     398           1 :         // First encode the end key as a varstring.
     399           1 :         n := binary.PutUvarint(dst, uint64(len(endKey)))
     400           1 :         n += copy(dst[n:], endKey)
     401           1 :         n += encodeUnsetSuffixes(dst[n:], suffixes)
     402           1 :         return n
     403           1 : }
     404             : 
     405             : // decodeSuffix decodes a single suffix from the beginning of data. If decoding
     406             : // suffixes from a RangeKeyUnset's value, the end key must have already been
     407             : // stripped from the RangeKeyUnset's value (see DecodeEndKey).
     408           1 : func decodeSuffix(data []byte) (suffix, rest []byte, ok bool) {
     409           1 :         return decodeVarstring(data)
     410           1 : }
     411             : 
     412           1 : func decodeVarstring(data []byte) (v, rest []byte, ok bool) {
     413           1 :         // Decode the length of the string.
     414           1 :         l, n := binary.Uvarint(data)
     415           1 :         if n <= 0 {
     416           0 :                 return nil, nil, ok
     417           0 :         }
     418             : 
     419             :         // Extract the string itself.
     420           1 :         return data[n : n+int(l)], data[n+int(l):], true
     421             : }
     422             : 
     423             : // IsRangeKey returns true if the given key kind is one of the range key kinds.
     424           1 : func IsRangeKey(kind base.InternalKeyKind) bool {
     425           1 :         switch kind {
     426             :         case base.InternalKeyKindRangeKeyDelete,
     427             :                 base.InternalKeyKindRangeKeyUnset,
     428           1 :                 base.InternalKeyKindRangeKeySet:
     429           1 :                 return true
     430           1 :         default:
     431           1 :                 return false
     432             :         }
     433             : }
     434             : 
     435           1 : func lenVarint(v int) (n int) {
     436           1 :         x := uint32(v)
     437           1 :         n++
     438           1 :         for x >= 0x80 {
     439           0 :                 x >>= 7
     440           0 :                 n++
     441           0 :         }
     442           1 :         return n
     443             : }

Generated by: LCOV version 1.14