LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - index_block.go (source / functions) Hit Total Coverage
Test: 2024-10-03 08:17Z 41145ee9 - meta test only.lcov Lines: 0 184 0.0 %
Date: 2024-10-03 08:17:51 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "github.com/cockroachdb/errors"
       9             :         "github.com/cockroachdb/pebble/internal/base"
      10             :         "github.com/cockroachdb/pebble/internal/binfmt"
      11             :         "github.com/cockroachdb/pebble/internal/invariants"
      12             :         "github.com/cockroachdb/pebble/sstable/block"
      13             : )
      14             : 
      15             : const indexBlockCustomHeaderSize = 0
      16             : 
      17             : // IndexBlockWriter writes columnar index blocks. The writer is used for both
      18             : // first-level and second-level index blocks. The index block schema consists of
      19             : // three primary columns:
      20             : //   - Separators: a user key that is ≥ the largest user key in the
      21             : //     corresponding entry, and ≤ the smallest user key in the next entry.
      22             : //     Note that this allows consecutive separators to be equal. This is
      23             : //     possible when snapshots required we preserve duplicate user keys at
      24             : //     different sequence numbers.
      25             : //   - Offsets: the offset of the end of the corresponding block.
      26             : //   - Lengths: the length in bytes of the corresponding block.
      27             : //   - Block properties: a slice encoding arbitrary user-defined block
      28             : //     properties.
      29             : //
      30             : // TODO(jackson): Consider splitting separators into prefixes and suffixes (even
      31             : // without user-defined columns). This would allow us to use prefix compression
      32             : // for the prefix. Separators should typically be suffixless unless two KVs with
      33             : // the same prefix straddle a block boundary. We would need to use a buffer to
      34             : // materialize the separator key when we need to use it outside the context of
      35             : // seeking within the block.
      36             : type IndexBlockWriter struct {
      37             :         separators      RawBytesBuilder
      38             :         offsets         UintBuilder
      39             :         lengths         UintBuilder
      40             :         blockProperties RawBytesBuilder
      41             :         rows            int
      42             :         enc             blockEncoder
      43             : }
      44             : 
      45             : const (
      46             :         indexBlockColumnSeparator = iota
      47             :         indexBlockColumnOffsets
      48             :         indexBlockColumnLengths
      49             :         indexBlockColumnBlockProperties
      50             :         indexBlockColumnCount
      51             : )
      52             : 
      53             : // Init initializes the index block writer.
      54           0 : func (w *IndexBlockWriter) Init() {
      55           0 :         w.separators.Init()
      56           0 :         w.offsets.Init()
      57           0 :         w.lengths.InitWithDefault()
      58           0 :         w.blockProperties.Init()
      59           0 :         w.rows = 0
      60           0 : }
      61             : 
      62             : // Reset resets the index block writer to its initial state, retaining buffers.
      63           0 : func (w *IndexBlockWriter) Reset() {
      64           0 :         w.separators.Reset()
      65           0 :         w.offsets.Reset()
      66           0 :         w.lengths.Reset()
      67           0 :         w.blockProperties.Reset()
      68           0 :         w.rows = 0
      69           0 :         w.enc.reset()
      70           0 : }
      71             : 
      72             : // Rows returns the number of entries in the index block so far.
      73           0 : func (w *IndexBlockWriter) Rows() int {
      74           0 :         return w.rows
      75           0 : }
      76             : 
      77             : // AddBlockHandle adds a new separator and end offset of a data block to the
      78             : // index block.  Add returns the index of the row.
      79             : //
      80             : // AddBlockHandle should only be used for first-level index blocks.
      81             : func (w *IndexBlockWriter) AddBlockHandle(
      82             :         separator []byte, handle block.Handle, blockProperties []byte,
      83           0 : ) int {
      84           0 :         idx := w.rows
      85           0 :         w.separators.Put(separator)
      86           0 :         w.offsets.Set(w.rows, handle.Offset)
      87           0 :         w.lengths.Set(w.rows, handle.Length)
      88           0 :         w.blockProperties.Put(blockProperties)
      89           0 :         w.rows++
      90           0 :         return idx
      91           0 : }
      92             : 
      93             : // UnsafeSeparator returns the separator of the i'th entry.
      94           0 : func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte {
      95           0 :         return w.separators.UnsafeGet(i)
      96           0 : }
      97             : 
      98             : // Size returns the size of the pending index block.
      99           0 : func (w *IndexBlockWriter) Size() int {
     100           0 :         return w.size(w.rows)
     101           0 : }
     102             : 
     103           0 : func (w *IndexBlockWriter) size(rows int) int {
     104           0 :         off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize)
     105           0 :         off = w.separators.Size(rows, off)
     106           0 :         off = w.offsets.Size(rows, off)
     107           0 :         off = w.lengths.Size(rows, off)
     108           0 :         off = w.blockProperties.Size(rows, off)
     109           0 :         off++
     110           0 :         return int(off)
     111           0 : }
     112             : 
     113             : // Finish serializes the pending index block, including the first [rows] rows.
     114             : // The value of [rows] must be Rows() or Rows()-1.
     115           0 : func (w *IndexBlockWriter) Finish(rows int) []byte {
     116           0 :         if invariants.Enabled && rows != w.rows && rows != w.rows-1 {
     117           0 :                 panic(errors.AssertionFailedf("index block has %d rows; asked to finish %d", w.rows, rows))
     118             :         }
     119             : 
     120           0 :         w.enc.init(w.size(rows), Header{
     121           0 :                 Version: Version1,
     122           0 :                 Columns: indexBlockColumnCount,
     123           0 :                 Rows:    uint32(rows),
     124           0 :         }, indexBlockCustomHeaderSize)
     125           0 :         w.enc.encode(rows, &w.separators)
     126           0 :         w.enc.encode(rows, &w.offsets)
     127           0 :         w.enc.encode(rows, &w.lengths)
     128           0 :         w.enc.encode(rows, &w.blockProperties)
     129           0 :         return w.enc.finish()
     130             : }
     131             : 
     132             : // An IndexReader reads columnar index blocks.
     133             : type IndexReader struct {
     134             :         separators RawBytes
     135             :         offsets    UnsafeUints
     136             :         lengths    UnsafeUints // only used for second-level index blocks
     137             :         blockProps RawBytes
     138             :         br         BlockReader
     139             : }
     140             : 
     141             : // Init initializes the index reader with the given serialized index block.
     142           0 : func (r *IndexReader) Init(data []byte) {
     143           0 :         r.br.Init(data, indexBlockCustomHeaderSize)
     144           0 :         r.separators = r.br.RawBytes(indexBlockColumnSeparator)
     145           0 :         r.offsets = r.br.Uints(indexBlockColumnOffsets)
     146           0 :         r.lengths = r.br.Uints(indexBlockColumnLengths)
     147           0 :         r.blockProps = r.br.RawBytes(indexBlockColumnBlockProperties)
     148           0 : }
     149             : 
     150             : // DebugString prints a human-readable explanation of the keyspan block's binary
     151             : // representation.
     152           0 : func (r *IndexReader) DebugString() string {
     153           0 :         f := binfmt.New(r.br.data).LineWidth(20)
     154           0 :         r.Describe(f)
     155           0 :         return f.String()
     156           0 : }
     157             : 
     158             : // Describe describes the binary format of the index block, assuming f.Offset()
     159             : // is positioned at the beginning of the same index block described by r.
     160           0 : func (r *IndexReader) Describe(f *binfmt.Formatter) {
     161           0 :         // Set the relative offset. When loaded into memory, the beginning of blocks
     162           0 :         // are aligned. Padding that ensures alignment is done relative to the
     163           0 :         // current offset. Setting the relative offset ensures that if we're
     164           0 :         // describing this block within a larger structure (eg, f.Offset()>0), we
     165           0 :         // compute padding appropriately assuming the current byte f.Offset() is
     166           0 :         // aligned.
     167           0 :         f.SetAnchorOffset()
     168           0 : 
     169           0 :         f.CommentLine("index block header")
     170           0 :         r.br.headerToBinFormatter(f)
     171           0 :         for i := 0; i < indexBlockColumnCount; i++ {
     172           0 :                 r.br.columnToBinFormatter(f, i, int(r.br.header.Rows))
     173           0 :         }
     174           0 :         f.HexBytesln(1, "block padding byte")
     175             : }
     176             : 
     177             : // IndexIter is an iterator over the block entries in an index block.
     178             : type IndexIter struct {
     179             :         compare base.Compare
     180             :         r       *IndexReader
     181             :         n       int
     182             :         row     int
     183             : 
     184             :         h           block.BufferHandle
     185             :         allocReader IndexReader
     186             : }
     187             : 
     188             : // Assert that IndexIter satisfies the block.IndexBlockIterator interface.
     189             : var _ block.IndexBlockIterator = (*IndexIter)(nil)
     190             : 
     191             : // InitReader initializes an index iterator from the provided reader.
     192           0 : func (i *IndexIter) InitReader(compare base.Compare, r *IndexReader) {
     193           0 :         *i = IndexIter{
     194           0 :                 compare:     compare,
     195           0 :                 r:           r,
     196           0 :                 n:           int(r.br.header.Rows),
     197           0 :                 h:           i.h,
     198           0 :                 allocReader: i.allocReader,
     199           0 :         }
     200           0 : }
     201             : 
     202             : // Init initializes an iterator from the provided block data slice.
     203             : func (i *IndexIter) Init(
     204             :         cmp base.Compare, split base.Split, blk []byte, transforms block.IterTransforms,
     205           0 : ) error {
     206           0 :         i.h.Release()
     207           0 :         i.h = block.BufferHandle{}
     208           0 :         // TODO(jackson): Handle the transforms.
     209           0 :         i.allocReader.Init(blk)
     210           0 :         i.InitReader(cmp, &i.allocReader)
     211           0 :         return nil
     212           0 : }
     213             : 
     214             : // InitHandle initializes an iterator from the provided block handle.
     215             : func (i *IndexIter) InitHandle(
     216             :         cmp base.Compare, split base.Split, blk block.BufferHandle, transforms block.IterTransforms,
     217           0 : ) error {
     218           0 :         // TODO(jackson): Handle the transforms.
     219           0 : 
     220           0 :         // TODO(jackson): If block.h != nil, use a *IndexReader that's allocated
     221           0 :         // when the block is loaded into the block cache. On cache hits, this will
     222           0 :         // reduce the amount of setup necessary to use an iterator. (It's relatively
     223           0 :         // common to open an iterator and perform just a few seeks, so avoiding the
     224           0 :         // overhead can be material.)
     225           0 :         i.h.Release()
     226           0 :         i.h = blk
     227           0 :         i.allocReader.Init(i.h.Get())
     228           0 :         i.InitReader(cmp, &i.allocReader)
     229           0 :         return nil
     230           0 : }
     231             : 
     232             : // RowIndex returns the index of the block entry at the iterator's current
     233             : // position.
     234           0 : func (i *IndexIter) RowIndex() int {
     235           0 :         return i.row
     236           0 : }
     237             : 
     238             : // ResetForReuse resets the IndexIter for reuse, retaining buffers to avoid
     239             : // future allocations.
     240           0 : func (i *IndexIter) ResetForReuse() IndexIter {
     241           0 :         if invariants.Enabled && i.h != (block.BufferHandle{}) {
     242           0 :                 panic(errors.AssertionFailedf("IndexIter reset for reuse with non-empty handle"))
     243             :         }
     244           0 :         return IndexIter{ /* nothing to retain */ }
     245             : }
     246             : 
     247             : // Valid returns true if the iterator is currently positioned at a valid block
     248             : // handle.
     249           0 : func (i *IndexIter) Valid() bool {
     250           0 :         return 0 <= i.row && i.row < i.n
     251           0 : }
     252             : 
     253             : // Invalidate invalidates the block iterator, removing references to the block
     254             : // it was initialized with.
     255           0 : func (i *IndexIter) Invalidate() {
     256           0 :         i.r = nil
     257           0 :         i.n = 0
     258           0 : }
     259             : 
     260             : // IsDataInvalidated returns true when the iterator has been invalidated
     261             : // using an Invalidate call. NB: this is different from Valid.
     262           0 : func (i *IndexIter) IsDataInvalidated() bool {
     263           0 :         return i.r == nil
     264           0 : }
     265             : 
     266             : // Handle returns the underlying block buffer handle, if the iterator was
     267             : // initialized with one.
     268           0 : func (i *IndexIter) Handle() block.BufferHandle {
     269           0 :         return i.h
     270           0 : }
     271             : 
     272             : // Separator returns the separator at the iterator's current position. The
     273             : // iterator must be positioned at a valid row.
     274           0 : func (i *IndexIter) Separator() []byte {
     275           0 :         return i.r.separators.At(i.row)
     276           0 : }
     277             : 
     278             : // BlockHandleWithProperties decodes the block handle with any encoded
     279             : // properties at the iterator's current position.
     280           0 : func (i *IndexIter) BlockHandleWithProperties() (block.HandleWithProperties, error) {
     281           0 :         return block.HandleWithProperties{
     282           0 :                 Handle: block.Handle{
     283           0 :                         Offset: i.r.offsets.At(i.row),
     284           0 :                         Length: i.r.lengths.At(i.row),
     285           0 :                 },
     286           0 :                 Props: i.r.blockProps.At(i.row),
     287           0 :         }, nil
     288           0 : }
     289             : 
     290             : // SeekGE seeks the index iterator to the first block entry with a separator key
     291             : // greater or equal to the given key. It returns false if the seek key is
     292             : // greater than all index block separators.
     293           0 : func (i *IndexIter) SeekGE(key []byte) bool {
     294           0 :         // Define f(-1) == false and f(upper) == true.
     295           0 :         // Invariant: f(index-1) == false, f(upper) == true.
     296           0 :         index, upper := 0, i.n
     297           0 :         for index < upper {
     298           0 :                 h := int(uint(index+upper) >> 1) // avoid overflow when computing h
     299           0 :                 // index ≤ h < upper
     300           0 : 
     301           0 :                 // TODO(jackson): Is Bytes.At or Bytes.Slice(Bytes.Offset(h),
     302           0 :                 // Bytes.Offset(h+1)) faster in this code?
     303           0 :                 c := i.compare(key, i.r.separators.At(h))
     304           0 :                 if c > 0 {
     305           0 :                         index = h + 1 // preserves f(index-1) == false
     306           0 :                 } else {
     307           0 :                         upper = h // preserves f(upper) == true
     308           0 :                 }
     309             :         }
     310             :         // index == upper, f(index-1) == false, and f(upper) (= f(index)) == true  =>  answer is index.
     311           0 :         i.row = index
     312           0 :         return index < i.n
     313             : }
     314             : 
     315             : // First seeks index iterator to the first block entry. It returns false if the
     316             : // index block is empty.
     317           0 : func (i *IndexIter) First() bool {
     318           0 :         i.row = 0
     319           0 :         return i.n > 0
     320           0 : }
     321             : 
     322             : // Last seeks index iterator to the last block entry. It returns false if the
     323             : // index block is empty.
     324           0 : func (i *IndexIter) Last() bool {
     325           0 :         i.row = i.n - 1
     326           0 :         return i.n > 0
     327           0 : }
     328             : 
     329             : // Next steps the index iterator to the next block entry. It returns false if
     330             : // the index block is exhausted in the forward direction. A call to Next while
     331             : // already exhausted in the forward direction is a no-op.
     332           0 : func (i *IndexIter) Next() bool {
     333           0 :         i.row = min(i.n, i.row+1)
     334           0 :         return i.row < i.n
     335           0 : }
     336             : 
     337             : // Prev steps the index iterator to the previous block entry. It returns false
     338             : // if the index block is exhausted in the reverse direction. A call to Prev
     339             : // while already exhausted in the reverse direction is a no-op.
     340           0 : func (i *IndexIter) Prev() bool {
     341           0 :         i.row = max(-1, i.row-1)
     342           0 :         return i.row >= 0 && i.row < i.n
     343           0 : }
     344             : 
     345             : // Close closes the iterator, releasing any resources it holds.
     346           0 : func (i *IndexIter) Close() error {
     347           0 :         i.h.Release()
     348           0 :         *i = IndexIter{}
     349           0 :         return nil
     350           0 : }

Generated by: LCOV version 1.14