LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - index_block.go (source / functions) Hit Total Coverage
Test: 2024-11-30 08:16Z 1724e81e - tests + meta.lcov Lines: 199 204 97.5 %
Date: 2024-11-30 08:17:23 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "slices"
       9             :         "unsafe"
      10             : 
      11             :         "github.com/cockroachdb/errors"
      12             :         "github.com/cockroachdb/pebble/internal/base"
      13             :         "github.com/cockroachdb/pebble/internal/binfmt"
      14             :         "github.com/cockroachdb/pebble/internal/invariants"
      15             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      16             :         "github.com/cockroachdb/pebble/sstable/block"
      17             : )
      18             : 
      19             : const indexBlockCustomHeaderSize = 0
      20             : 
      21             : // IndexBlockWriter writes columnar index blocks. The writer is used for both
      22             : // first-level and second-level index blocks. The index block schema consists of
      23             : // three primary columns:
      24             : //   - Separators: a user key that is ≥ the largest user key in the
      25             : //     corresponding entry, and ≤ the smallest user key in the next entry.
      26             : //     Note that this allows consecutive separators to be equal. This is
      27             : //     possible when snapshots required we preserve duplicate user keys at
      28             : //     different sequence numbers.
      29             : //   - Offsets: the offset of the end of the corresponding block.
      30             : //   - Lengths: the length in bytes of the corresponding block.
      31             : //   - Block properties: a slice encoding arbitrary user-defined block
      32             : //     properties.
      33             : //
      34             : // TODO(jackson): Consider splitting separators into prefixes and suffixes (even
      35             : // without user-defined columns). This would allow us to use prefix compression
      36             : // for the prefix. Separators should typically be suffixless unless two KVs with
      37             : // the same prefix straddle a block boundary. We would need to use a buffer to
      38             : // materialize the separator key when we need to use it outside the context of
      39             : // seeking within the block.
      40             : type IndexBlockWriter struct {
      41             :         separators      RawBytesBuilder
      42             :         offsets         UintBuilder
      43             :         lengths         UintBuilder
      44             :         blockProperties RawBytesBuilder
      45             :         rows            int
      46             :         enc             blockEncoder
      47             : }
      48             : 
      49             : const (
      50             :         indexBlockColumnSeparator = iota
      51             :         indexBlockColumnOffsets
      52             :         indexBlockColumnLengths
      53             :         indexBlockColumnBlockProperties
      54             :         indexBlockColumnCount
      55             : )
      56             : 
      57             : // Init initializes the index block writer.
      58           2 : func (w *IndexBlockWriter) Init() {
      59           2 :         w.separators.Init()
      60           2 :         w.offsets.Init()
      61           2 :         w.lengths.Init()
      62           2 :         w.blockProperties.Init()
      63           2 :         w.rows = 0
      64           2 : }
      65             : 
      66             : // Reset resets the index block writer to its initial state, retaining buffers.
      67           2 : func (w *IndexBlockWriter) Reset() {
      68           2 :         w.separators.Reset()
      69           2 :         w.offsets.Reset()
      70           2 :         w.lengths.Reset()
      71           2 :         w.blockProperties.Reset()
      72           2 :         w.rows = 0
      73           2 :         w.enc.reset()
      74           2 : }
      75             : 
      76             : // Rows returns the number of entries in the index block so far.
      77           2 : func (w *IndexBlockWriter) Rows() int {
      78           2 :         return w.rows
      79           2 : }
      80             : 
      81             : // AddBlockHandle adds a new separator and end offset of a data block to the
      82             : // index block.  Add returns the index of the row.
      83             : //
      84             : // AddBlockHandle should only be used for first-level index blocks.
      85             : func (w *IndexBlockWriter) AddBlockHandle(
      86             :         separator []byte, handle block.Handle, blockProperties []byte,
      87           2 : ) int {
      88           2 :         idx := w.rows
      89           2 :         w.separators.Put(separator)
      90           2 :         w.offsets.Set(w.rows, handle.Offset)
      91           2 :         w.lengths.Set(w.rows, handle.Length)
      92           2 :         w.blockProperties.Put(blockProperties)
      93           2 :         w.rows++
      94           2 :         return idx
      95           2 : }
      96             : 
      97             : // UnsafeSeparator returns the separator of the i'th entry.
      98           2 : func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte {
      99           2 :         return w.separators.UnsafeGet(i)
     100           2 : }
     101             : 
     102             : // Size returns the size of the pending index block.
     103           2 : func (w *IndexBlockWriter) Size() int {
     104           2 :         return w.size(w.rows)
     105           2 : }
     106             : 
     107           2 : func (w *IndexBlockWriter) size(rows int) int {
     108           2 :         off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize)
     109           2 :         off = w.separators.Size(rows, off)
     110           2 :         off = w.offsets.Size(rows, off)
     111           2 :         off = w.lengths.Size(rows, off)
     112           2 :         off = w.blockProperties.Size(rows, off)
     113           2 :         off++
     114           2 :         return int(off)
     115           2 : }
     116             : 
     117             : // Finish serializes the pending index block, including the first [rows] rows.
     118             : // The value of [rows] must be Rows() or Rows()-1.
     119           2 : func (w *IndexBlockWriter) Finish(rows int) []byte {
     120           2 :         if invariants.Enabled && rows != w.rows && rows != w.rows-1 {
     121           0 :                 panic(errors.AssertionFailedf("index block has %d rows; asked to finish %d", w.rows, rows))
     122             :         }
     123             : 
     124           2 :         w.enc.init(w.size(rows), Header{
     125           2 :                 Version: Version1,
     126           2 :                 Columns: indexBlockColumnCount,
     127           2 :                 Rows:    uint32(rows),
     128           2 :         }, indexBlockCustomHeaderSize)
     129           2 :         w.enc.encode(rows, &w.separators)
     130           2 :         w.enc.encode(rows, &w.offsets)
     131           2 :         w.enc.encode(rows, &w.lengths)
     132           2 :         w.enc.encode(rows, &w.blockProperties)
     133           2 :         return w.enc.finish()
     134             : }
     135             : 
     136             : // An IndexBlockDecoder reads columnar index blocks.
     137             : type IndexBlockDecoder struct {
     138             :         separators RawBytes
     139             :         offsets    UnsafeUints
     140             :         lengths    UnsafeUints // only used for second-level index blocks
     141             :         blockProps RawBytes
     142             :         bd         BlockDecoder
     143             : }
     144             : 
     145             : // Init initializes the index block decoder with the given serialized index
     146             : // block.
     147           2 : func (r *IndexBlockDecoder) Init(data []byte) {
     148           2 :         r.bd.Init(data, indexBlockCustomHeaderSize)
     149           2 :         r.separators = r.bd.RawBytes(indexBlockColumnSeparator)
     150           2 :         r.offsets = r.bd.Uints(indexBlockColumnOffsets)
     151           2 :         r.lengths = r.bd.Uints(indexBlockColumnLengths)
     152           2 :         r.blockProps = r.bd.RawBytes(indexBlockColumnBlockProperties)
     153           2 : }
     154             : 
     155             : // DebugString prints a human-readable explanation of the keyspan block's binary
     156             : // representation.
     157           1 : func (r *IndexBlockDecoder) DebugString() string {
     158           1 :         f := binfmt.New(r.bd.data).LineWidth(20)
     159           1 :         tp := treeprinter.New()
     160           1 :         r.Describe(f, tp.Child("index-block-decoder"))
     161           1 :         return tp.String()
     162           1 : }
     163             : 
     164             : // Describe describes the binary format of the index block, assuming f.Offset()
     165             : // is positioned at the beginning of the same index block described by r.
     166           1 : func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
     167           1 :         // Set the relative offset. When loaded into memory, the beginning of blocks
     168           1 :         // are aligned. Padding that ensures alignment is done relative to the
     169           1 :         // current offset. Setting the relative offset ensures that if we're
     170           1 :         // describing this block within a larger structure (eg, f.Offset()>0), we
     171           1 :         // compute padding appropriately assuming the current byte f.Offset() is
     172           1 :         // aligned.
     173           1 :         f.SetAnchorOffset()
     174           1 : 
     175           1 :         n := tp.Child("index block header")
     176           1 :         r.bd.headerToBinFormatter(f, n)
     177           1 :         for i := 0; i < indexBlockColumnCount; i++ {
     178           1 :                 r.bd.columnToBinFormatter(f, n, i, int(r.bd.header.Rows))
     179           1 :         }
     180           1 :         f.HexBytesln(1, "block padding byte")
     181           1 :         f.ToTreePrinter(n)
     182             : }
     183             : 
     184             : // IndexIter is an iterator over the block entries in an index block.
     185             : type IndexIter struct {
     186             :         compare base.Compare
     187             :         split   base.Split
     188             :         d       *IndexBlockDecoder
     189             :         n       int
     190             :         row     int
     191             : 
     192             :         syntheticPrefixAndSuffix block.SyntheticPrefixAndSuffix
     193             : 
     194             :         h block.BufferHandle
     195             :         // TODO(radu): remove allocDecoder and require any Init callers to provide the
     196             :         // decoder.
     197             :         allocDecoder IndexBlockDecoder
     198             :         keyBuf       []byte
     199             : }
     200             : 
     201             : // Assert that IndexIter satisfies the block.IndexBlockIterator interface.
     202             : var _ block.IndexBlockIterator = (*IndexIter)(nil)
     203             : 
     204             : // InitWithDecoder initializes an index iterator from the provided decoder.
     205             : func (i *IndexIter) InitWithDecoder(
     206             :         compare base.Compare, split base.Split, d *IndexBlockDecoder, transforms block.IterTransforms,
     207           2 : ) {
     208           2 :         i.compare = compare
     209           2 :         i.split = split
     210           2 :         i.d = d
     211           2 :         i.n = int(d.bd.header.Rows)
     212           2 :         i.row = -1
     213           2 :         i.syntheticPrefixAndSuffix = transforms.SyntheticPrefixAndSuffix
     214           2 :         // Leave h, allocDecoder, keyBuf unchanged.
     215           2 : }
     216             : 
     217             : // Init initializes an iterator from the provided block data slice.
     218             : func (i *IndexIter) Init(
     219             :         cmp base.Compare, split base.Split, blk []byte, transforms block.IterTransforms,
     220           2 : ) error {
     221           2 :         i.h.Release()
     222           2 :         i.h = block.BufferHandle{}
     223           2 :         i.allocDecoder.Init(blk)
     224           2 :         i.InitWithDecoder(cmp, split, &i.allocDecoder, transforms)
     225           2 :         return nil
     226           2 : }
     227             : 
     228             : // InitHandle initializes an iterator from the provided block handle.
     229             : func (i *IndexIter) InitHandle(
     230             :         cmp base.Compare, split base.Split, blk block.BufferHandle, transforms block.IterTransforms,
     231           2 : ) error {
     232           2 :         i.h.Release()
     233           2 :         i.h = blk
     234           2 :         d := (*IndexBlockDecoder)(unsafe.Pointer(blk.BlockMetadata()))
     235           2 :         i.InitWithDecoder(cmp, split, d, transforms)
     236           2 :         return nil
     237           2 : }
     238             : 
     239             : // RowIndex returns the index of the block entry at the iterator's current
     240             : // position.
     241           0 : func (i *IndexIter) RowIndex() int {
     242           0 :         return i.row
     243           0 : }
     244             : 
     245             : // Valid returns true if the iterator is currently positioned at a valid block
     246             : // handle.
     247           2 : func (i *IndexIter) Valid() bool {
     248           2 :         return 0 <= i.row && i.row < i.n
     249           2 : }
     250             : 
     251             : // Invalidate invalidates the block iterator, removing references to the block
     252             : // it was initialized with.
     253           2 : func (i *IndexIter) Invalidate() {
     254           2 :         i.d = nil
     255           2 :         i.n = 0
     256           2 : }
     257             : 
     258             : // IsDataInvalidated returns true when the iterator has been invalidated
     259             : // using an Invalidate call. NB: this is different from Valid.
     260           2 : func (i *IndexIter) IsDataInvalidated() bool {
     261           2 :         return i.d == nil
     262           2 : }
     263             : 
     264             : // Handle returns the underlying block buffer handle, if the iterator was
     265             : // initialized with one.
     266           2 : func (i *IndexIter) Handle() block.BufferHandle {
     267           2 :         return i.h
     268           2 : }
     269             : 
     270             : // Separator returns the separator at the iterator's current position. The
     271             : // iterator must be positioned at a valid row.
     272           2 : func (i *IndexIter) Separator() []byte {
     273           2 :         key := i.d.separators.At(i.row)
     274           2 :         if i.syntheticPrefixAndSuffix.IsUnset() {
     275           2 :                 return key
     276           2 :         }
     277           2 :         return i.applyTransforms(key)
     278             : }
     279             : 
     280             : // SeparatorLT returns true if the separator at the iterator's current
     281             : // position is strictly less than the provided key.
     282           2 : func (i *IndexIter) SeparatorLT(key []byte) bool {
     283           2 :         return i.compare(i.Separator(), key) < 0
     284           2 : }
     285             : 
     286             : // SeparatorGT returns true if the separator at the iterator's current position
     287             : // is strictly greater than (or equal, if orEqual=true) the provided key.
     288           2 : func (i *IndexIter) SeparatorGT(key []byte, inclusively bool) bool {
     289           2 :         cmp := i.compare(i.Separator(), key)
     290           2 :         return cmp > 0 || (cmp == 0 && inclusively)
     291           2 : }
     292             : 
     293           2 : func (i *IndexIter) applyTransforms(key []byte) []byte {
     294           2 :         syntheticPrefix := i.syntheticPrefixAndSuffix.Prefix()
     295           2 :         syntheticSuffix := i.syntheticPrefixAndSuffix.Suffix()
     296           2 :         if syntheticSuffix.IsSet() {
     297           2 :                 key = key[:i.split(key)]
     298           2 :         }
     299           2 :         i.keyBuf = slices.Grow(i.keyBuf[:0], len(syntheticPrefix)+len(key)+len(syntheticSuffix))
     300           2 :         i.keyBuf = append(i.keyBuf, syntheticPrefix...)
     301           2 :         i.keyBuf = append(i.keyBuf, key...)
     302           2 :         i.keyBuf = append(i.keyBuf, syntheticSuffix...)
     303           2 :         return i.keyBuf
     304             : }
     305             : 
     306             : // BlockHandleWithProperties decodes the block handle with any encoded
     307             : // properties at the iterator's current position.
     308           2 : func (i *IndexIter) BlockHandleWithProperties() (block.HandleWithProperties, error) {
     309           2 :         if invariants.Enabled && !i.Valid() {
     310           0 :                 panic(errors.AssertionFailedf("invalid row %d (n=%d)", i.row, i.n))
     311             :         }
     312           2 :         return block.HandleWithProperties{
     313           2 :                 Handle: block.Handle{
     314           2 :                         Offset: i.d.offsets.At(i.row),
     315           2 :                         Length: i.d.lengths.At(i.row),
     316           2 :                 },
     317           2 :                 Props: i.d.blockProps.At(i.row),
     318           2 :         }, nil
     319             : }
     320             : 
     321             : // SeekGE seeks the index iterator to the first block entry with a separator key
     322             : // greater or equal to the given key. It returns false if the seek key is
     323             : // greater than all index block separators.
     324           2 : func (i *IndexIter) SeekGE(key []byte) bool {
     325           2 :         // Define f(-1) == false and f(upper) == true.
     326           2 :         // Invariant: f(index-1) == false, f(upper) == true.
     327           2 :         index, upper := 0, i.n
     328           2 :         for index < upper {
     329           2 :                 h := int(uint(index+upper) >> 1) // avoid overflow when computing h
     330           2 :                 // index ≤ h < upper
     331           2 : 
     332           2 :                 // TODO(jackson): Is Bytes.At or Bytes.Slice(Bytes.Offset(h),
     333           2 :                 // Bytes.Offset(h+1)) faster in this code?
     334           2 :                 separator := i.d.separators.At(h)
     335           2 :                 if !i.syntheticPrefixAndSuffix.IsUnset() {
     336           2 :                         // TODO(radu): compare without materializing the transformed key.
     337           2 :                         separator = i.applyTransforms(separator)
     338           2 :                 }
     339             :                 // TODO(radu): experiment with splitting the separator prefix and suffix in
     340             :                 // separate columns and using bytes.Compare() on the prefix in the hot path.
     341           2 :                 c := i.compare(key, separator)
     342           2 :                 if c > 0 {
     343           2 :                         index = h + 1 // preserves f(index-1) == false
     344           2 :                 } else {
     345           2 :                         upper = h // preserves f(upper) == true
     346           2 :                 }
     347             :         }
     348             :         // index == upper, f(index-1) == false, and f(upper) (= f(index)) == true  =>  answer is index.
     349           2 :         i.row = index
     350           2 :         return index < i.n
     351             : }
     352             : 
     353             : // First seeks index iterator to the first block entry. It returns false if the
     354             : // index block is empty.
     355           2 : func (i *IndexIter) First() bool {
     356           2 :         i.row = 0
     357           2 :         return i.n > 0
     358           2 : }
     359             : 
     360             : // Last seeks index iterator to the last block entry. It returns false if the
     361             : // index block is empty.
     362           2 : func (i *IndexIter) Last() bool {
     363           2 :         i.row = i.n - 1
     364           2 :         return i.n > 0
     365           2 : }
     366             : 
     367             : // Next steps the index iterator to the next block entry. It returns false if
     368             : // the index block is exhausted in the forward direction. A call to Next while
     369             : // already exhausted in the forward direction is a no-op.
     370           2 : func (i *IndexIter) Next() bool {
     371           2 :         i.row = min(i.n, i.row+1)
     372           2 :         return i.row < i.n
     373           2 : }
     374             : 
     375             : // Prev steps the index iterator to the previous block entry. It returns false
     376             : // if the index block is exhausted in the reverse direction. A call to Prev
     377             : // while already exhausted in the reverse direction is a no-op.
     378           2 : func (i *IndexIter) Prev() bool {
     379           2 :         i.row = max(-1, i.row-1)
     380           2 :         return i.row >= 0 && i.row < i.n
     381           2 : }
     382             : 
     383             : // Close closes the iterator, releasing any resources it holds.
     384           2 : func (i *IndexIter) Close() error {
     385           2 :         i.h.Release()
     386           2 :         i.h = block.BufferHandle{}
     387           2 :         i.d = nil
     388           2 :         i.n = 0
     389           2 :         i.syntheticPrefixAndSuffix = block.SyntheticPrefixAndSuffix{}
     390           2 :         return nil
     391           2 : }

Generated by: LCOV version 1.14