LCOV - code coverage report
Current view: top level - pebble/internal/rangekey - rangekey.go (source / functions) Hit Total Coverage
Test: 2024-03-04 08:25Z dd51d85c - tests only.lcov Lines: 197 227 86.8 %
Date: 2024-03-04 08:26:13 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package rangekey provides facilities for encoding, decoding and merging range
       6             : // keys.
       7             : //
       8             : // Range keys map a span of keyspan `[start, end)`, at an optional suffix, to a
       9             : // value.
      10             : //
      11             : // # Encoding
      12             : //
      13             : // Unlike other Pebble keys, range keys encode several fields of information:
      14             : // start key, end key, suffix and value. Internally within Pebble and its
      15             : // sstables, all keys including range keys are represented as a key-value tuple.
      16             : // Range keys map to internal key-value tuples by mapping the start key to the
      17             : // key and encoding the remainder of the fields in the value.
      18             : //
      19             : // ## `RANGEKEYSET`
      20             : //
      21             : // A `RANGEKEYSET` represents one more range keys set over a single region of
      22             : // user key space. Each represented range key must have a unique suffix.  A
      23             : // `RANGEKEYSET` encapsulates a start key, an end key and a set of SuffixValue
      24             : // pairs.
      25             : //
      26             : // A `RANGEKEYSET` key's user key holds the start key. Its value is a varstring
      27             : // end key, followed by a set of SuffixValue pairs. A `RANGEKEYSET` may have
      28             : // multiple SuffixValue pairs if the keyspan was set at multiple unique suffix
      29             : // values.
      30             : //
      31             : // ## `RANGEKEYUNSET`
      32             : //
      33             : // A `RANGEKEYUNSET` represents the removal of range keys at specific suffixes
      34             : // over a single region of user key space. A `RANGEKEYUNSET` encapsulates a
      35             : // start key, an end key and a set of suffixes.
      36             : //
      37             : // A `RANGEKEYUNSET` key's user key holds the start key. Its value is a
      38             : // varstring end key, followed by a set of suffixes. A `RANGEKEYUNSET` may have
      39             : // multiple suffixes if the keyspan was unset at multiple unique suffixes.
      40             : //
      41             : // ## `RANGEKEYDEL`
      42             : //
      43             : // A `RANGEKEYDEL` represents the removal of all range keys over a single region
      44             : // of user key space, regardless of suffix. A `RANGEKEYDEL` encapsulates a
      45             : // start key and an end key. The end key is stored in the value, without any
      46             : // varstring length prefixing.
      47             : package rangekey
      48             : 
      49             : // TODO(jackson): Document the encoding of RANGEKEYSET and RANGEKEYUNSET values
      50             : // once we're confident they're stable.
      51             : 
      52             : import (
      53             :         "encoding/binary"
      54             : 
      55             :         "github.com/cockroachdb/errors"
      56             :         "github.com/cockroachdb/pebble/internal/base"
      57             :         "github.com/cockroachdb/pebble/internal/keyspan"
      58             : )
      59             : 
      60             : // Encode takes a Span containing only range keys. It invokes the provided
      61             : // closure with the encoded internal keys that represent the Span's state. The
      62             : // keys and values passed to emit are only valid until the closure returns.
      63             : // If emit returns an error, Encode stops and returns the error.
      64           1 : func Encode(s *keyspan.Span, emit func(k base.InternalKey, v []byte) error) error {
      65           1 :         enc := Encoder{Emit: emit}
      66           1 :         return enc.Encode(s)
      67           1 : }
      68             : 
      69             : // An Encoder encodes range keys into their on-disk InternalKey format. An
      70             : // Encoder holds internal buffers, reused between Emit calls.
      71             : type Encoder struct {
      72             :         Emit   func(base.InternalKey, []byte) error
      73             :         buf    []byte
      74             :         unsets [][]byte
      75             :         sets   []SuffixValue
      76             : }
      77             : 
      78             : // Encode takes a Span containing only range keys. It invokes the Encoder's Emit
      79             : // closure with the encoded internal keys that represent the Span's state. The
      80             : // keys and values passed to emit are only valid until the closure returns.  If
      81             : // Emit returns an error, Encode stops and returns the error.
      82             : //
      83             : // The encoded key-value pair passed to Emit is only valid until the closure
      84             : // completes.
      85           1 : func (e *Encoder) Encode(s *keyspan.Span) error {
      86           1 :         if s.Empty() {
      87           1 :                 return nil
      88           1 :         }
      89             : 
      90             :         // This for loop iterates through the span's keys, which are sorted by
      91             :         // sequence number descending, grouping them into sequence numbers. All keys
      92             :         // with identical sequence numbers are flushed together.
      93           1 :         var del bool
      94           1 :         var seqNum uint64
      95           1 :         for i := range s.Keys {
      96           1 :                 if i == 0 || s.Keys[i].SeqNum() != seqNum {
      97           1 :                         if i > 0 {
      98           1 :                                 // Flush all the existing internal keys that exist at seqNum.
      99           1 :                                 if err := e.flush(s, seqNum, del); err != nil {
     100           0 :                                         return err
     101           0 :                                 }
     102             :                         }
     103             : 
     104             :                         // Reset sets, unsets, del.
     105           1 :                         seqNum = s.Keys[i].SeqNum()
     106           1 :                         del = false
     107           1 :                         e.sets = e.sets[:0]
     108           1 :                         e.unsets = e.unsets[:0]
     109             :                 }
     110             : 
     111           1 :                 switch s.Keys[i].Kind() {
     112           1 :                 case base.InternalKeyKindRangeKeySet:
     113           1 :                         e.sets = append(e.sets, SuffixValue{
     114           1 :                                 Suffix: s.Keys[i].Suffix,
     115           1 :                                 Value:  s.Keys[i].Value,
     116           1 :                         })
     117           1 :                 case base.InternalKeyKindRangeKeyUnset:
     118           1 :                         e.unsets = append(e.unsets, s.Keys[i].Suffix)
     119           1 :                 case base.InternalKeyKindRangeKeyDelete:
     120           1 :                         del = true
     121           0 :                 default:
     122           0 :                         return base.CorruptionErrorf("pebble: %s key kind is not a range key", s.Keys[i].Kind())
     123             :                 }
     124             :         }
     125           1 :         return e.flush(s, seqNum, del)
     126             : }
     127             : 
     128             : // flush constructs internal keys for accumulated key state, and emits the
     129             : // internal keys.
     130           1 : func (e *Encoder) flush(s *keyspan.Span, seqNum uint64, del bool) error {
     131           1 :         if len(e.sets) > 0 {
     132           1 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeySet)
     133           1 :                 l := EncodedSetValueLen(s.End, e.sets)
     134           1 :                 if l > cap(e.buf) {
     135           1 :                         e.buf = make([]byte, l)
     136           1 :                 }
     137           1 :                 EncodeSetValue(e.buf[:l], s.End, e.sets)
     138           1 :                 if err := e.Emit(ik, e.buf[:l]); err != nil {
     139           0 :                         return err
     140           0 :                 }
     141             :         }
     142           1 :         if len(e.unsets) > 0 {
     143           1 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeyUnset)
     144           1 :                 l := EncodedUnsetValueLen(s.End, e.unsets)
     145           1 :                 if l > cap(e.buf) {
     146           1 :                         e.buf = make([]byte, l)
     147           1 :                 }
     148           1 :                 EncodeUnsetValue(e.buf[:l], s.End, e.unsets)
     149           1 :                 if err := e.Emit(ik, e.buf[:l]); err != nil {
     150           0 :                         return err
     151           0 :                 }
     152             :         }
     153           1 :         if del {
     154           1 :                 ik := base.MakeInternalKey(s.Start, seqNum, base.InternalKeyKindRangeKeyDelete)
     155           1 :                 // s.End is stored directly in the value for RangeKeyDeletes.
     156           1 :                 if err := e.Emit(ik, s.End); err != nil {
     157           0 :                         return err
     158           0 :                 }
     159             :         }
     160           1 :         return nil
     161             : }
     162             : 
     163             : // Decode takes an internal key pair encoding range key(s) and returns a decoded
     164             : // keyspan containing the keys. If keysDst is provided, keys will be appended to
     165             : // keysDst.
     166           1 : func Decode(ik base.InternalKey, v []byte, keysDst []keyspan.Key) (keyspan.Span, error) {
     167           1 :         var s keyspan.Span
     168           1 : 
     169           1 :         // Hydrate the user key bounds.
     170           1 :         s.Start = ik.UserKey
     171           1 :         var ok bool
     172           1 :         s.End, v, ok = DecodeEndKey(ik.Kind(), v)
     173           1 :         if !ok {
     174           0 :                 return keyspan.Span{}, base.CorruptionErrorf("pebble: unable to decode range key end from %s", ik.Kind())
     175           0 :         }
     176           1 :         s.Keys = keysDst
     177           1 : 
     178           1 :         // Hydrate the contents of the range key(s).
     179           1 :         switch ik.Kind() {
     180           1 :         case base.InternalKeyKindRangeKeySet:
     181           1 :                 for len(v) > 0 {
     182           1 :                         var sv SuffixValue
     183           1 :                         sv, v, ok = decodeSuffixValue(v)
     184           1 :                         if !ok {
     185           0 :                                 return keyspan.Span{}, base.CorruptionErrorf("pebble: unable to decode range key suffix-value tuple")
     186           0 :                         }
     187           1 :                         s.Keys = append(s.Keys, keyspan.Key{
     188           1 :                                 Trailer: ik.Trailer,
     189           1 :                                 Suffix:  sv.Suffix,
     190           1 :                                 Value:   sv.Value,
     191           1 :                         })
     192             :                 }
     193           1 :         case base.InternalKeyKindRangeKeyUnset:
     194           1 :                 for len(v) > 0 {
     195           1 :                         var suffix []byte
     196           1 :                         suffix, v, ok = decodeSuffix(v)
     197           1 :                         if !ok {
     198           0 :                                 return keyspan.Span{}, base.CorruptionErrorf("pebble: unable to decode range key unset suffix")
     199           0 :                         }
     200           1 :                         s.Keys = append(s.Keys, keyspan.Key{
     201           1 :                                 Trailer: ik.Trailer,
     202           1 :                                 Suffix:  suffix,
     203           1 :                         })
     204             :                 }
     205           1 :         case base.InternalKeyKindRangeKeyDelete:
     206           1 :                 if len(v) > 0 {
     207           0 :                         return keyspan.Span{}, base.CorruptionErrorf("pebble: RANGEKEYDELs must not contain additional data")
     208           0 :                 }
     209           1 :                 s.Keys = append(s.Keys, keyspan.Key{Trailer: ik.Trailer})
     210           0 :         default:
     211           0 :                 return keyspan.Span{}, base.CorruptionErrorf("pebble: %s is not a range key", ik.Kind())
     212             :         }
     213           1 :         return s, nil
     214             : }
     215             : 
     216             : // SuffixValue represents a tuple of a suffix and a corresponding value. A
     217             : // physical RANGEKEYSET key may contain many logical RangeKeySets, each
     218             : // represented with a separate SuffixValue tuple.
     219             : type SuffixValue struct {
     220             :         Suffix []byte
     221             :         Value  []byte
     222             : }
     223             : 
     224             : // encodedSetSuffixValuesLen precomputes the length of the given slice of
     225             : // SuffixValues, when encoded for a RangeKeySet. It may be used to construct a
     226             : // buffer of the appropriate size before encoding.
     227           1 : func encodedSetSuffixValuesLen(suffixValues []SuffixValue) int {
     228           1 :         var n int
     229           1 :         for i := 0; i < len(suffixValues); i++ {
     230           1 :                 n += lenVarint(len(suffixValues[i].Suffix))
     231           1 :                 n += len(suffixValues[i].Suffix)
     232           1 :                 n += lenVarint(len(suffixValues[i].Value))
     233           1 :                 n += len(suffixValues[i].Value)
     234           1 :         }
     235           1 :         return n
     236             : }
     237             : 
     238             : // encodeSetSuffixValues encodes a slice of SuffixValues for a RangeKeySet into
     239             : // dst. The length of dst must be greater than or equal to
     240             : // encodedSetSuffixValuesLen. encodeSetSuffixValues returns the number of bytes
     241             : // written, which should always equal the EncodedSetValueLen with the same
     242             : // arguments.
     243           1 : func encodeSetSuffixValues(dst []byte, suffixValues []SuffixValue) int {
     244           1 :         // Encode the list of (suffix, value-len) tuples.
     245           1 :         var n int
     246           1 :         for i := 0; i < len(suffixValues); i++ {
     247           1 :                 // Encode the length of the suffix.
     248           1 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixValues[i].Suffix)))
     249           1 : 
     250           1 :                 // Encode the suffix itself.
     251           1 :                 n += copy(dst[n:], suffixValues[i].Suffix)
     252           1 : 
     253           1 :                 // Encode the value length.
     254           1 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixValues[i].Value)))
     255           1 : 
     256           1 :                 // Encode the value itself.
     257           1 :                 n += copy(dst[n:], suffixValues[i].Value)
     258           1 :         }
     259           1 :         return n
     260             : }
     261             : 
     262             : // EncodedSetValueLen precomputes the length of a RangeKeySet's value when
     263             : // encoded. It may be used to construct a buffer of the appropriate size before
     264             : // encoding.
     265           1 : func EncodedSetValueLen(endKey []byte, suffixValues []SuffixValue) int {
     266           1 :         n := lenVarint(len(endKey))
     267           1 :         n += len(endKey)
     268           1 :         n += encodedSetSuffixValuesLen(suffixValues)
     269           1 :         return n
     270           1 : }
     271             : 
     272             : // EncodeSetValue encodes a RangeKeySet's value into dst. The length of dst must
     273             : // be greater than or equal to EncodedSetValueLen. EncodeSetValue returns the
     274             : // number of bytes written, which should always equal the EncodedSetValueLen
     275             : // with the same arguments.
     276           1 : func EncodeSetValue(dst []byte, endKey []byte, suffixValues []SuffixValue) int {
     277           1 :         // First encode the end key as a varstring.
     278           1 :         n := binary.PutUvarint(dst, uint64(len(endKey)))
     279           1 :         n += copy(dst[n:], endKey)
     280           1 :         n += encodeSetSuffixValues(dst[n:], suffixValues)
     281           1 :         return n
     282           1 : }
     283             : 
     284             : // DecodeEndKey reads the end key from the beginning of a range key (RANGEKEYSET,
     285             : // RANGEKEYUNSET or RANGEKEYDEL)'s physical encoded value. Both sets and unsets
     286             : // encode the range key, plus additional data in the value.
     287           1 : func DecodeEndKey(kind base.InternalKeyKind, data []byte) (endKey, value []byte, ok bool) {
     288           1 :         switch kind {
     289           1 :         case base.InternalKeyKindRangeKeyDelete:
     290           1 :                 // No splitting is necessary for range key deletes. The value is the end
     291           1 :                 // key, and there is no additional associated value.
     292           1 :                 return data, nil, true
     293           1 :         case base.InternalKeyKindRangeKeySet, base.InternalKeyKindRangeKeyUnset:
     294           1 :                 v, n := binary.Uvarint(data)
     295           1 :                 if n <= 0 || uint64(n)+v >= uint64(len(data)) {
     296           0 :                         return nil, nil, false
     297           0 :                 }
     298           1 :                 endKey, value = data[n:n+int(v)], data[n+int(v):]
     299           1 :                 return endKey, value, true
     300           0 :         default:
     301           0 :                 panic(errors.Newf("key kind %s is not a range key kind", kind))
     302             :         }
     303             : }
     304             : 
     305             : // decodeSuffixValue decodes a single encoded SuffixValue from a RangeKeySet's
     306             : // split value. The end key must have already been stripped from the
     307             : // RangeKeySet's value (see DecodeEndKey).
     308           1 : func decodeSuffixValue(data []byte) (sv SuffixValue, rest []byte, ok bool) {
     309           1 :         // Decode the suffix.
     310           1 :         sv.Suffix, data, ok = decodeVarstring(data)
     311           1 :         if !ok {
     312           0 :                 return SuffixValue{}, nil, false
     313           0 :         }
     314             :         // Decode the value.
     315           1 :         sv.Value, data, ok = decodeVarstring(data)
     316           1 :         if !ok {
     317           0 :                 return SuffixValue{}, nil, false
     318           0 :         }
     319           1 :         return sv, data, true
     320             : }
     321             : 
     322             : // encodedUnsetSuffixesLen precomputes the length of the given slice of
     323             : // suffixes, when encoded for a RangeKeyUnset. It may be used to construct a
     324             : // buffer of the appropriate size before encoding.
     325           1 : func encodedUnsetSuffixesLen(suffixes [][]byte) int {
     326           1 :         var n int
     327           1 :         for i := 0; i < len(suffixes); i++ {
     328           1 :                 n += lenVarint(len(suffixes[i]))
     329           1 :                 n += len(suffixes[i])
     330           1 :         }
     331           1 :         return n
     332             : }
     333             : 
     334             : // encodeUnsetSuffixes encodes a slice of suffixes for a RangeKeyUnset into dst.
     335             : // The length of dst must be greater than or equal to EncodedUnsetSuffixesLen.
     336             : // EncodeUnsetSuffixes returns the number of bytes written, which should always
     337             : // equal the EncodedUnsetSuffixesLen with the same arguments.
     338           1 : func encodeUnsetSuffixes(dst []byte, suffixes [][]byte) int {
     339           1 :         // Encode the list of (suffix, value-len) tuples.
     340           1 :         var n int
     341           1 :         for i := 0; i < len(suffixes); i++ {
     342           1 :                 //  Encode the length of the suffix.
     343           1 :                 n += binary.PutUvarint(dst[n:], uint64(len(suffixes[i])))
     344           1 : 
     345           1 :                 // Encode the suffix itself.
     346           1 :                 n += copy(dst[n:], suffixes[i])
     347           1 :         }
     348           1 :         return n
     349             : }
     350             : 
     351             : // EncodedUnsetValueLen precomputes the length of a RangeKeyUnset's value when
     352             : // encoded.  It may be used to construct a buffer of the appropriate size before
     353             : // encoding.
     354           1 : func EncodedUnsetValueLen(endKey []byte, suffixes [][]byte) int {
     355           1 :         n := lenVarint(len(endKey))
     356           1 :         n += len(endKey)
     357           1 :         n += encodedUnsetSuffixesLen(suffixes)
     358           1 :         return n
     359           1 : }
     360             : 
     361             : // EncodeUnsetValue encodes a RangeKeyUnset's value into dst. The length of dst
     362             : // must be greater than or equal to EncodedUnsetValueLen. EncodeUnsetValue
     363             : // returns the number of bytes written, which should always equal the
     364             : // EncodedUnsetValueLen with the same arguments.
     365           1 : func EncodeUnsetValue(dst []byte, endKey []byte, suffixes [][]byte) int {
     366           1 :         // First encode the end key as a varstring.
     367           1 :         n := binary.PutUvarint(dst, uint64(len(endKey)))
     368           1 :         n += copy(dst[n:], endKey)
     369           1 :         n += encodeUnsetSuffixes(dst[n:], suffixes)
     370           1 :         return n
     371           1 : }
     372             : 
     373             : // decodeSuffix decodes a single suffix from the beginning of data. If decoding
     374             : // suffixes from a RangeKeyUnset's value, the end key must have already been
     375             : // stripped from the RangeKeyUnset's value (see DecodeEndKey).
     376           1 : func decodeSuffix(data []byte) (suffix, rest []byte, ok bool) {
     377           1 :         return decodeVarstring(data)
     378           1 : }
     379             : 
     380           1 : func decodeVarstring(data []byte) (v, rest []byte, ok bool) {
     381           1 :         // Decode the length of the string.
     382           1 :         l, n := binary.Uvarint(data)
     383           1 :         if n <= 0 {
     384           0 :                 return nil, nil, ok
     385           0 :         }
     386             : 
     387             :         // Extract the string itself.
     388           1 :         return data[n : n+int(l)], data[n+int(l):], true
     389             : }
     390             : 
     391             : // IsRangeKey returns true if the given key kind is one of the range key kinds.
     392           1 : func IsRangeKey(kind base.InternalKeyKind) bool {
     393           1 :         switch kind {
     394             :         case base.InternalKeyKindRangeKeyDelete,
     395             :                 base.InternalKeyKindRangeKeyUnset,
     396           1 :                 base.InternalKeyKindRangeKeySet:
     397           1 :                 return true
     398           1 :         default:
     399           1 :                 return false
     400             :         }
     401             : }
     402             : 
     403           1 : func lenVarint(v int) (n int) {
     404           1 :         x := uint32(v)
     405           1 :         n++
     406           1 :         for x >= 0x80 {
     407           1 :                 x >>= 7
     408           1 :                 n++
     409           1 :         }
     410           1 :         return n
     411             : }

Generated by: LCOV version 1.14