LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - index_block.go (source / functions) Hit Total Coverage
Test: 2024-09-10 08:16Z 2cc212bb - tests only.lcov Lines: 116 169 68.6 %
Date: 2024-09-10 08:17:02 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "bytes"
       9             : 
      10             :         "github.com/cockroachdb/pebble/internal/base"
      11             :         "github.com/cockroachdb/pebble/internal/binfmt"
      12             :         "github.com/cockroachdb/pebble/sstable/block"
      13             : )
      14             : 
      15             : const indexBlockCustomHeaderSize = 0
      16             : 
      17             : // IndexBlockWriter writes columnar index blocks. The writer is used for both
      18             : // first-level and second-level index blocks. The index block schema consists of
      19             : // three primary columns:
      20             : //   - Separators: a user key that is ≥ the largest user key in the
      21             : //     corresponding entry, and ≤ the smallest user key in the next entry.
      22             : //     Note that this allows consecutive separators to be equal. This is
      23             : //     possible when snapshots required we preserve duplicate user keys at
      24             : //     different sequence numbers.
      25             : //   - Offsets: the offset of the end of the corresponding block.
      26             : //   - Lengths: the length in bytes of the corresponding block.
      27             : //   - Block properties: a slice encoding arbitrary user-defined block
      28             : //     properties.
      29             : //
      30             : // TODO(jackson): Consider splitting separators into prefixes and suffixes (even
      31             : // without user-defined columns). This would allow us to use prefix compression
      32             : // for the prefix. Separators should typically be suffixless unless two KVs with
      33             : // the same prefix straddle a block boundary. We would need to use a buffer to
      34             : // materialize the separator key when we need to use it outside the context of
      35             : // seeking within the block.
      36             : type IndexBlockWriter struct {
      37             :         separators      RawBytesBuilder
      38             :         offsets         UintBuilder
      39             :         lengths         UintBuilder
      40             :         blockProperties RawBytesBuilder
      41             :         rows            int
      42             :         enc             blockEncoder
      43             : }
      44             : 
      45             : const (
      46             :         indexBlockColumnSeparator = iota
      47             :         indexBlockColumnOffsets
      48             :         indexBlockColumnLengths
      49             :         indexBlockColumnBlockProperties
      50             :         indexBlockColumnCount
      51             : )
      52             : 
      53             : // Init initializes the index block writer.
      54           1 : func (w *IndexBlockWriter) Init(blockProperties ...ColumnWriter) {
      55           1 :         w.separators.Init()
      56           1 :         w.offsets.Init()
      57           1 :         w.lengths.InitWithDefault()
      58           1 :         w.blockProperties.Init()
      59           1 :         w.rows = 0
      60           1 : }
      61             : 
      62             : // Reset resets the index block writer to its initial state, retaining buffers.
      63           0 : func (w *IndexBlockWriter) Reset() {
      64           0 :         w.separators.Reset()
      65           0 :         w.offsets.Reset()
      66           0 :         w.lengths.Reset()
      67           0 :         w.blockProperties.Reset()
      68           0 :         w.rows = 0
      69           0 :         w.enc.reset()
      70           0 : }
      71             : 
      72             : // Rows returns the number of entries in the index block so far.
      73           1 : func (w *IndexBlockWriter) Rows() int {
      74           1 :         return w.rows
      75           1 : }
      76             : 
      77             : // AddBlockHandle adds a new separator and end offset of a data block to the
      78             : // index block.  Add returns the index of the row.
      79             : //
      80             : // AddBlockHandle should only be used for first-level index blocks.
      81             : func (w *IndexBlockWriter) AddBlockHandle(
      82             :         separator []byte, handle block.Handle, blockProperties []byte,
      83           1 : ) int {
      84           1 :         idx := w.rows
      85           1 :         w.separators.Put(separator)
      86           1 :         w.offsets.Set(w.rows, handle.Offset)
      87           1 :         w.lengths.Set(w.rows, handle.Length)
      88           1 :         w.blockProperties.Put(blockProperties)
      89           1 :         w.rows++
      90           1 :         return idx
      91           1 : }
      92             : 
      93             : // UnsafeSeparator returns the separator of the i'th entry.
      94           1 : func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte {
      95           1 :         return w.separators.UnsafeGet(i)
      96           1 : }
      97             : 
      98             : // Size returns the size of the pending index block.
      99           1 : func (w *IndexBlockWriter) Size() int {
     100           1 :         off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize)
     101           1 :         off = w.separators.Size(w.rows, off)
     102           1 :         off = w.offsets.Size(w.rows, off)
     103           1 :         off = w.lengths.Size(w.rows, off)
     104           1 :         off = w.blockProperties.Size(w.rows, off)
     105           1 :         off++
     106           1 :         return int(off)
     107           1 : }
     108             : 
     109             : // Finish serializes the pending index block.
     110           1 : func (w *IndexBlockWriter) Finish() []byte {
     111           1 :         w.enc.init(w.Size(), Header{
     112           1 :                 Version: Version1,
     113           1 :                 Columns: indexBlockColumnCount,
     114           1 :                 Rows:    uint32(w.rows),
     115           1 :         }, indexBlockCustomHeaderSize)
     116           1 :         w.enc.encode(w.rows, &w.separators)
     117           1 :         w.enc.encode(w.rows, &w.offsets)
     118           1 :         w.enc.encode(w.rows, &w.lengths)
     119           1 :         w.enc.encode(w.rows, &w.blockProperties)
     120           1 :         return w.enc.finish()
     121           1 : }
     122             : 
     123             : // An IndexReader reads columnar index blocks.
     124             : type IndexReader struct {
     125             :         separators RawBytes
     126             :         offsets    UnsafeUints
     127             :         lengths    UnsafeUints // only used for second-level index blocks
     128             :         blockProps RawBytes
     129             :         br         BlockReader
     130             : }
     131             : 
     132             : // Init initializes the index reader with the given serialized index block.
     133           1 : func (r *IndexReader) Init(data []byte) {
     134           1 :         r.br.Init(data, indexBlockCustomHeaderSize)
     135           1 :         r.separators = r.br.RawBytes(indexBlockColumnSeparator)
     136           1 :         r.offsets = r.br.Uints(indexBlockColumnOffsets)
     137           1 :         r.lengths = r.br.Uints(indexBlockColumnLengths)
     138           1 :         r.blockProps = r.br.RawBytes(indexBlockColumnBlockProperties)
     139           1 : }
     140             : 
     141             : // DebugString prints a human-readable explanation of the keyspan block's binary
     142             : // representation.
     143           1 : func (r *IndexReader) DebugString() string {
     144           1 :         f := binfmt.New(r.br.data).LineWidth(20)
     145           1 :         r.Describe(f)
     146           1 :         return f.String()
     147           1 : }
     148             : 
     149             : // Describe describes the binary format of the index block, assuming f.Offset()
     150             : // is positioned at the beginning of the same index block described by r.
     151           1 : func (r *IndexReader) Describe(f *binfmt.Formatter) {
     152           1 :         // Set the relative offset. When loaded into memory, the beginning of blocks
     153           1 :         // are aligned. Padding that ensures alignment is done relative to the
     154           1 :         // current offset. Setting the relative offset ensures that if we're
     155           1 :         // describing this block within a larger structure (eg, f.Offset()>0), we
     156           1 :         // compute padding appropriately assuming the current byte f.Offset() is
     157           1 :         // aligned.
     158           1 :         f.SetAnchorOffset()
     159           1 : 
     160           1 :         f.CommentLine("index block header")
     161           1 :         r.br.headerToBinFormatter(f)
     162           1 :         for i := 0; i < indexBlockColumnCount; i++ {
     163           1 :                 r.br.columnToBinFormatter(f, i, int(r.br.header.Rows))
     164           1 :         }
     165           1 :         f.HexBytesln(1, "block padding byte")
     166             : }
     167             : 
     168             : // IndexIter is an iterator over the block entries in an index block.
     169             : type IndexIter struct {
     170             :         r   *IndexReader
     171             :         n   int
     172             :         row int
     173             : 
     174             :         h           block.BufferHandle
     175             :         allocReader IndexReader
     176             : }
     177             : 
     178             : // Assert that IndexIter satisfies the block.IndexBlockIterator interface.
     179             : var _ block.IndexBlockIterator = (*IndexIter)(nil)
     180             : 
     181             : // InitReader initializes an index iterator from the provided reader.
     182           1 : func (i *IndexIter) InitReader(r *IndexReader) {
     183           1 :         *i = IndexIter{r: r, n: int(r.br.header.Rows), allocReader: i.allocReader}
     184           1 : }
     185             : 
     186             : // Init initializes an iterator from the provided block data slice.
     187             : func (i *IndexIter) Init(
     188             :         cmp base.Compare, split base.Split, blk []byte, transforms block.IterTransforms,
     189           0 : ) error {
     190           0 :         // TODO(jackson): Handle the transforms.
     191           0 :         i.allocReader.Init(blk)
     192           0 :         i.InitReader(&i.allocReader)
     193           0 :         return nil
     194           0 : }
     195             : 
     196             : // InitHandle initializes an iterator from the provided block handle.
     197             : func (i *IndexIter) InitHandle(
     198             :         cmp base.Compare, split base.Split, block block.BufferHandle, transforms block.IterTransforms,
     199           0 : ) error {
     200           0 :         // TODO(jackson): Handle the transforms.
     201           0 : 
     202           0 :         // TODO(jackson): If block.h != nil, use a *IndexReader that's allocated
     203           0 :         // when the block is loaded into the block cache. On cache hits, this will
     204           0 :         // reduce the amount of setup necessary to use an iterator. (It's relatively
     205           0 :         // common to open an iterator and perform just a few seeks, so avoiding the
     206           0 :         // overhead can be material.)
     207           0 :         i.h.Release()
     208           0 :         i.h = block
     209           0 :         return i.Init(cmp, split, i.h.Get(), transforms)
     210           0 : }
     211             : 
     212             : // RowIndex returns the index of the block entry at the iterator's current
     213             : // position.
     214           0 : func (i *IndexIter) RowIndex() int {
     215           0 :         return i.row
     216           0 : }
     217             : 
     218             : // ResetForReuse resets the IndexIter for reuse, retaining buffers to avoid
     219             : // future allocations.
     220           0 : func (i *IndexIter) ResetForReuse() IndexIter {
     221           0 :         return IndexIter{ /* nothing to retain */ }
     222           0 : }
     223             : 
     224             : // Valid returns true if the iterator is currently positioned at a valid block
     225             : // handle.
     226           0 : func (i *IndexIter) Valid() bool {
     227           0 :         return 0 <= i.row && i.row < int(i.r.br.header.Rows)
     228           0 : }
     229             : 
     230             : // Invalidate invalidates the block iterator, removing references to the block
     231             : // it was initialized with.
     232           0 : func (i *IndexIter) Invalidate() {
     233           0 :         i.r = nil
     234           0 :         i.h = block.BufferHandle{}
     235           0 : }
     236             : 
     237             : // IsDataInvalidated returns true when the iterator has been invalidated
     238             : // using an Invalidate call. NB: this is different from Valid.
     239           0 : func (i *IndexIter) IsDataInvalidated() bool {
     240           0 :         return i.r == nil
     241           0 : }
     242             : 
     243             : // Handle returns the underlying block buffer handle, if the iterator was
     244             : // initialized with one.
     245           0 : func (i *IndexIter) Handle() block.BufferHandle {
     246           0 :         return i.h
     247           0 : }
     248             : 
     249             : // Separator returns the separator at the iterator's current position. The
     250             : // iterator must be positioned at a valid row.
     251           0 : func (i *IndexIter) Separator() []byte {
     252           0 :         return i.r.separators.At(i.row)
     253           0 : }
     254             : 
     255             : // BlockHandleWithProperties decodes the block handle with any encoded
     256             : // properties at the iterator's current position.
     257           1 : func (i *IndexIter) BlockHandleWithProperties() (block.HandleWithProperties, error) {
     258           1 :         return block.HandleWithProperties{
     259           1 :                 Handle: block.Handle{
     260           1 :                         Offset: i.r.offsets.At(i.row),
     261           1 :                         Length: i.r.lengths.At(i.row),
     262           1 :                 },
     263           1 :                 Props: i.r.blockProps.At(i.row),
     264           1 :         }, nil
     265           1 : }
     266             : 
     267             : // SeekGE seeks the index iterator to the first block entry with a separator key
     268             : // greater or equal to the given key. It returns false if the seek key is
     269             : // greater than all index block separators.
     270           1 : func (i *IndexIter) SeekGE(key []byte) bool {
     271           1 :         // Define f(-1) == false and f(upper) == true.
     272           1 :         // Invariant: f(index-1) == false, f(upper) == true.
     273           1 :         index, upper := 0, i.n
     274           1 :         for index < upper {
     275           1 :                 h := int(uint(index+upper) >> 1) // avoid overflow when computing h
     276           1 :                 // index ≤ h < upper
     277           1 : 
     278           1 :                 // TODO(jackson): Is Bytes.At or Bytes.Slice(Bytes.Offset(h),
     279           1 :                 // Bytes.Offset(h+1)) faster in this code?
     280           1 :                 c := bytes.Compare(key, i.r.separators.At(h))
     281           1 :                 if c > 0 {
     282           1 :                         index = h + 1 // preserves f(index-1) == false
     283           1 :                 } else {
     284           1 :                         upper = h // preserves f(upper) == true
     285           1 :                 }
     286             :         }
     287             :         // index == upper, f(index-1) == false, and f(upper) (= f(index)) == true  =>  answer is index.
     288           1 :         i.row = index
     289           1 :         return index < i.n
     290             : }
     291             : 
     292             : // First seeks index iterator to the first block entry. It returns false if the
     293             : // index block is empty.
     294           1 : func (i *IndexIter) First() bool {
     295           1 :         i.row = 0
     296           1 :         return i.n > 0
     297           1 : }
     298             : 
     299             : // Last seeks index iterator to the last block entry. It returns false if the
     300             : // index block is empty.
     301           1 : func (i *IndexIter) Last() bool {
     302           1 :         i.row = i.n - 1
     303           1 :         return i.n > 0
     304           1 : }
     305             : 
     306             : // Next steps the index iterator to the next block entry. It returns false if
     307             : // the index block is exhausted.
     308           1 : func (i *IndexIter) Next() bool {
     309           1 :         i.row++
     310           1 :         return i.row < i.n
     311           1 : }
     312             : 
     313             : // Prev steps the index iterator to the previous block entry. It returns false
     314             : // if the index block is exhausted.
     315           1 : func (i *IndexIter) Prev() bool {
     316           1 :         i.row--
     317           1 :         return i.row >= 0
     318           1 : }
     319             : 
     320             : // Close closes the iterator, releasing any resources it holds.
     321           0 : func (i *IndexIter) Close() error {
     322           0 :         i.h.Release()
     323           0 :         *i = IndexIter{}
     324           0 :         return nil
     325           0 : }

Generated by: LCOV version 1.14