LCOV - code coverage report
Current view: top level - pebble/sstable/block - block.go (source / functions) Hit Total Coverage
Test: 2024-08-27 08:16Z a70d5b3c - tests + meta.lcov Lines: 62 77 80.5 %
Date: 2024-08-27 08:17:22 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package block
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "encoding/binary"
      10             :         "fmt"
      11             : 
      12             :         "github.com/cespare/xxhash/v2"
      13             :         "github.com/cockroachdb/errors"
      14             :         "github.com/cockroachdb/pebble/internal/base"
      15             :         "github.com/cockroachdb/pebble/internal/crc"
      16             : )
      17             : 
      18             : // Handle is the file offset and length of a block.
      19             : type Handle struct {
      20             :         Offset, Length uint64
      21             : }
      22             : 
      23             : // EncodeVarints encodes the block handle into dst using a variable-width
      24             : // encoding and returns the number of bytes written.
      25           2 : func (h Handle) EncodeVarints(dst []byte) int {
      26           2 :         n := binary.PutUvarint(dst, h.Offset)
      27           2 :         m := binary.PutUvarint(dst[n:], h.Length)
      28           2 :         return n + m
      29           2 : }
      30             : 
      31             : // HandleWithProperties is used for data blocks and first/lower level index
      32             : // blocks, since they can be annotated using BlockPropertyCollectors.
      33             : type HandleWithProperties struct {
      34             :         Handle
      35             :         Props []byte
      36             : }
      37             : 
      38             : // EncodeVarints encodes the block handle and properties into dst using a
      39             : // variable-width encoding and returns the number of bytes written.
      40           2 : func (h HandleWithProperties) EncodeVarints(dst []byte) []byte {
      41           2 :         n := h.Handle.EncodeVarints(dst)
      42           2 :         dst = append(dst[:n], h.Props...)
      43           2 :         return dst
      44           2 : }
      45             : 
      46             : // DecodeHandle returns the block handle encoded in a variable-width encoding at
      47             : // the start of src, as well as the number of bytes it occupies. It returns zero
      48             : // if given invalid input. A block handle for a data block or a first/lower
      49             : // level index block should not be decoded using DecodeHandle since the caller
      50             : // may validate that the number of bytes decoded is equal to the length of src,
      51             : // which will be false if the properties are not decoded. In those cases the
      52             : // caller should use DecodeHandleWithProperties.
      53           2 : func DecodeHandle(src []byte) (Handle, int) {
      54           2 :         offset, n := binary.Uvarint(src)
      55           2 :         length, m := binary.Uvarint(src[n:])
      56           2 :         if n == 0 || m == 0 {
      57           0 :                 return Handle{}, 0
      58           0 :         }
      59           2 :         return Handle{Offset: offset, Length: length}, n + m
      60             : }
      61             : 
      62             : // DecodeHandleWithProperties returns the block handle and properties encoded in
      63             : // a variable-width encoding at the start of src. src needs to be exactly the
      64             : // length that was encoded. This method must be used for data block and
      65             : // first/lower level index blocks. The properties in the block handle point to
      66             : // the bytes in src.
      67           2 : func DecodeHandleWithProperties(src []byte) (HandleWithProperties, error) {
      68           2 :         bh, n := DecodeHandle(src)
      69           2 :         if n == 0 {
      70           0 :                 return HandleWithProperties{}, errors.Errorf("invalid block.Handle")
      71           0 :         }
      72           2 :         return HandleWithProperties{
      73           2 :                 Handle: bh,
      74           2 :                 Props:  src[n:],
      75           2 :         }, nil
      76             : }
      77             : 
      78             : // TrailerLen is the length of the trailer at the end of a block.
      79             : const TrailerLen = 5
      80             : 
      81             : // Trailer is the trailer at the end of a block, encoding the block type
      82             : // (compression) and a checksum.
      83             : type Trailer = [TrailerLen]byte
      84             : 
      85             : // MakeTrailer constructs a trailer from a block type and a checksum.
      86           2 : func MakeTrailer(blockType byte, checksum uint32) (t Trailer) {
      87           2 :         t[0] = blockType
      88           2 :         binary.LittleEndian.PutUint32(t[1:5], checksum)
      89           2 :         return t
      90           2 : }
      91             : 
      92             : // ChecksumType specifies the checksum used for blocks.
      93             : type ChecksumType byte
      94             : 
      95             : // The available checksum types. These values are part of the durable format and
      96             : // should not be changed.
      97             : const (
      98             :         ChecksumTypeNone     ChecksumType = 0
      99             :         ChecksumTypeCRC32c   ChecksumType = 1
     100             :         ChecksumTypeXXHash   ChecksumType = 2
     101             :         ChecksumTypeXXHash64 ChecksumType = 3
     102             : )
     103             : 
     104             : // String implements fmt.Stringer.
     105           1 : func (t ChecksumType) String() string {
     106           1 :         switch t {
     107           1 :         case ChecksumTypeCRC32c:
     108           1 :                 return "crc32c"
     109           0 :         case ChecksumTypeNone:
     110           0 :                 return "none"
     111           0 :         case ChecksumTypeXXHash:
     112           0 :                 return "xxhash"
     113           0 :         case ChecksumTypeXXHash64:
     114           0 :                 return "xxhash64"
     115           0 :         default:
     116           0 :                 panic(errors.Newf("sstable: unknown checksum type: %d", t))
     117             :         }
     118             : }
     119             : 
     120             : // A Checksummer calculates checksums for blocks.
     121             : type Checksummer struct {
     122             :         Type     ChecksumType
     123             :         xxHasher *xxhash.Digest
     124             : }
     125             : 
     126             : // Checksum computes a checksum over the provided block and block type.
     127           2 : func (c *Checksummer) Checksum(block []byte, blockType []byte) (checksum uint32) {
     128           2 :         // Calculate the checksum.
     129           2 :         switch c.Type {
     130           2 :         case ChecksumTypeCRC32c:
     131           2 :                 checksum = crc.New(block).Update(blockType).Value()
     132           1 :         case ChecksumTypeXXHash64:
     133           1 :                 if c.xxHasher == nil {
     134           1 :                         c.xxHasher = xxhash.New()
     135           1 :                 } else {
     136           1 :                         c.xxHasher.Reset()
     137           1 :                 }
     138           1 :                 c.xxHasher.Write(block)
     139           1 :                 c.xxHasher.Write(blockType)
     140           1 :                 checksum = uint32(c.xxHasher.Sum64())
     141           0 :         default:
     142           0 :                 panic(errors.Newf("unsupported checksum type: %d", c.Type))
     143             :         }
     144           2 :         return checksum
     145             : }
     146             : 
     147             : // DataBlockIterator is a type constraint for implementations of block iterators
     148             : // over data blocks. It's currently satisifed by the *rowblk.Iter type.
     149             : //
     150             : // DataBlockIterator requires that the type be a pointer to its type parameter,
     151             : // D, to allow sstable iterators embed the block iterator within its struct. See
     152             : // this example from the Go generics proposal:
     153             : // https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#pointer-method-example
     154             : type DataBlockIterator[D any] interface {
     155             :         base.InternalIterator
     156             : 
     157             :         // Handle returns the handle to the block.
     158             :         Handle() BufferHandle
     159             :         // InitHandle initializes the block from the provided buffer handle.
     160             :         InitHandle(base.Compare, base.Split, BufferHandle, IterTransforms) error
     161             :         // Valid returns true if the iterator is currently positioned at a valid KV.
     162             :         Valid() bool
     163             :         // KV returns the key-value pair at the current iterator position. The
     164             :         // iterator must be Valid().
     165             :         KV() *base.InternalKV
     166             :         // ResetForReuse resets the iterator so that it may be used for iteration
     167             :         // over a new block. It returns the non-pointer D type to allow resetting
     168             :         // while initializing the containing struct, eg::
     169             :         //   iter = sstableIter{dataBlockIter: iter.dataBlockIter.ResetForReuse()}
     170             :         ResetForReuse() D
     171             :         // FirstUserKey returns the first user key contained within the data block.
     172             :         FirstUserKey() []byte
     173             :         // Invalidate invalidates the block iterator, removing references to the block
     174             :         // it was initialized with.
     175             :         Invalidate()
     176             :         // IsDataInvalidated returns true when the iterator has been invalidated
     177             :         // using an Invalidate call.
     178             :         //
     179             :         // NB: this is different from Valid which indicates whether the current *KV*
     180             :         // is valid.
     181             :         IsDataInvalidated() bool
     182             : 
     183             :         *D // non-interface type constraint element
     184             : }
     185             : 
     186             : // IndexBlockIterator is a type constraint for implementations of block
     187             : // iterators over index blocks. It's currently satisifed by the
     188             : // *rowblk.IndexIter type.
     189             : //
     190             : // IndexBlockIterator requires that the type be a pointer to its type parameter,
     191             : // I, to allow sstable iterators embed the block iterator within its struct. See
     192             : // this example from the Go generics proposal:
     193             : // https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#pointer-method-example
     194             : type IndexBlockIterator[I any] interface {
     195             :         // InitHandle initializes an iterator from the provided block handle.
     196             :         InitHandle(base.Compare, base.Split, BufferHandle, IterTransforms) error
     197             :         // ResetForReuse resets the index iterator for reuse, retaining buffers to
     198             :         // avoid future allocations.
     199             :         ResetForReuse() I
     200             :         // Valid returns true if the iterator is currently positioned at a valid
     201             :         // block handle.
     202             :         Valid() bool
     203             :         // IsDataInvalidated returns true when the iterator has been invalidated
     204             :         // using an Invalidate call.
     205             :         //
     206             :         // NB: this is different from Valid which indicates whether the iterator is
     207             :         // currently positioned over a valid block entry.
     208             :         IsDataInvalidated() bool
     209             :         // Invalidate invalidates the block iterator, removing references to the
     210             :         // block it was initialized with.
     211             :         Invalidate()
     212             :         // Handle returns the underlying block buffer handle, if the iterator was
     213             :         // initialized with one.
     214             :         Handle() BufferHandle
     215             :         // Separator returns the separator at the iterator's current position. The
     216             :         // iterator must be positioned at a valid row. A Separator is a user key
     217             :         // guaranteed to be greater than or equal to every key contained within the
     218             :         // referenced block(s).
     219             :         Separator() []byte
     220             :         // BlockHandleWithProperties decodes the block handle with any encoded
     221             :         // properties at the iterator's current position.
     222             :         BlockHandleWithProperties() (HandleWithProperties, error)
     223             :         // SeekGE seeks the index iterator to the first block entry with a separator
     224             :         // key greater or equal to the given key. If it returns true, the iterator
     225             :         // is positioned over the first block that might contain the key [key], and
     226             :         // following blocks have keys ≥ Separator(). It returns false if the seek
     227             :         // key is greater than all index block separators.
     228             :         SeekGE(key []byte) bool
     229             :         // First seeks index iterator to the first block entry. It returns false if
     230             :         // the index block is empty.
     231             :         First() bool
     232             :         // Last seeks index iterator to the last block entry. It returns false if
     233             :         // the index block is empty.
     234             :         Last() bool
     235             :         // Next steps the index iterator to the next block entry. It returns false
     236             :         // if the index block is exhausted.
     237             :         Next() bool
     238             :         // Prev steps the index iterator to the previous block entry. It returns
     239             :         // false if the index block is exhausted.
     240             :         Prev() bool
     241             :         // Close closes the iterator, releasing any resources it holds.
     242             :         Close() error
     243             : 
     244             :         *I // non-interface type constraint element
     245             : }
     246             : 
     247             : // IterTransforms allow on-the-fly transformation of data at iteration time.
     248             : //
     249             : // These transformations could in principle be implemented as block transforms
     250             : // (at least for non-virtual sstables), but applying them during iteration is
     251             : // preferable.
     252             : type IterTransforms struct {
     253             :         SyntheticSeqNum    SyntheticSeqNum
     254             :         HideObsoletePoints bool
     255             :         SyntheticPrefix    SyntheticPrefix
     256             :         SyntheticSuffix    SyntheticSuffix
     257             : }
     258             : 
     259             : // NoTransforms is the default value for IterTransforms.
     260             : var NoTransforms = IterTransforms{}
     261             : 
     262             : // FragmentIterTransforms allow on-the-fly transformation of range deletion or
     263             : // range key data at iteration time.
     264             : type FragmentIterTransforms struct {
     265             :         SyntheticSeqNum SyntheticSeqNum
     266             :         // ElideSameSeqNum, if true, returns only the first-occurring (in forward
     267             :         // order) keyspan.Key for each sequence number.
     268             :         ElideSameSeqNum bool
     269             :         SyntheticPrefix SyntheticPrefix
     270             :         SyntheticSuffix SyntheticSuffix
     271             : }
     272             : 
     273             : // NoFragmentTransforms is the default value for IterTransforms.
     274             : var NoFragmentTransforms = FragmentIterTransforms{}
     275             : 
     276             : // SyntheticSeqNum is used to override all sequence numbers in a table. It is
     277             : // set to a non-zero value when the table was created externally and ingested
     278             : // whole.
     279             : type SyntheticSeqNum base.SeqNum
     280             : 
     281             : // NoSyntheticSeqNum is the default zero value for SyntheticSeqNum, which
     282             : // disables overriding the sequence number.
     283             : const NoSyntheticSeqNum SyntheticSeqNum = 0
     284             : 
     285             : // SyntheticSuffix will replace every suffix of every point key surfaced during
     286             : // block iteration. A synthetic suffix can be used if:
     287             : //  1. no two keys in the sst share the same prefix; and
     288             : //  2. pebble.Compare(prefix + replacementSuffix, prefix + originalSuffix) < 0,
     289             : //     for all keys in the backing sst which have a suffix (i.e. originalSuffix
     290             : //     is not empty).
     291             : //
     292             : // Range dels are not supported when synthetic suffix is used.
     293             : //
     294             : // For range keys, the synthetic suffix applies to the suffix that is part of
     295             : // RangeKeySet - if it is non-empty, it is replaced with the SyntheticSuffix.
     296             : // RangeKeyUnset keys are not supported when a synthetic suffix is used.
     297             : type SyntheticSuffix []byte
     298             : 
     299             : // IsSet returns true if the synthetic suffix is not enpty.
     300           2 : func (ss SyntheticSuffix) IsSet() bool {
     301           2 :         return len(ss) > 0
     302           2 : }
     303             : 
     304             : // SyntheticPrefix represents a byte slice that is implicitly prepended to every
     305             : // key in a file being read or accessed by a reader.  Note that the table is
     306             : // assumed to contain "prefix-less" keys that become full keys when prepended
     307             : // with the synthetic prefix. The table's bloom filters are constructed only on
     308             : // the "prefix-less" keys in the table, but interactions with the file including
     309             : // seeks and reads, will all behave as if the file had been constructed from
     310             : // keys that did include the prefix. Note that all Compare operations may act on
     311             : // a prefix-less key as the synthetic prefix will never modify key metadata
     312             : // stored in the key suffix.
     313             : //
     314             : // NB: Since this transformation currently only applies to point keys, a block
     315             : // with range keys cannot be iterated over with a synthetic prefix.
     316             : type SyntheticPrefix []byte
     317             : 
     318             : // IsSet returns true if the synthetic prefix is not enpty.
     319           2 : func (sp SyntheticPrefix) IsSet() bool {
     320           2 :         return len(sp) > 0
     321           2 : }
     322             : 
     323             : // Apply prepends the synthetic prefix to a key.
     324           2 : func (sp SyntheticPrefix) Apply(key []byte) []byte {
     325           2 :         res := make([]byte, 0, len(sp)+len(key))
     326           2 :         res = append(res, sp...)
     327           2 :         res = append(res, key...)
     328           2 :         return res
     329           2 : }
     330             : 
     331             : // Invert removes the synthetic prefix from a key.
     332           2 : func (sp SyntheticPrefix) Invert(key []byte) []byte {
     333           2 :         res, ok := bytes.CutPrefix(key, sp)
     334           2 :         if !ok {
     335           0 :                 panic(fmt.Sprintf("unexpected prefix: %s", key))
     336             :         }
     337           2 :         return res
     338             : }

Generated by: LCOV version 1.14