LCOV - code coverage report
Current view: top level - pebble/sstable - reader_iter_single_lvl.go (source / functions) Hit Total Coverage
Test: 2024-09-16 08:16Z 6c9ad29b - tests + meta.lcov Lines: 927 997 93.0 %
Date: 2024-09-16 08:17:43 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "context"
      10             :         "fmt"
      11             :         "sync"
      12             :         "unsafe"
      13             : 
      14             :         "github.com/cockroachdb/pebble/internal/base"
      15             :         "github.com/cockroachdb/pebble/internal/invariants"
      16             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      17             :         "github.com/cockroachdb/pebble/objstorage"
      18             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      19             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
      20             :         "github.com/cockroachdb/pebble/sstable/block"
      21             :         "github.com/cockroachdb/pebble/sstable/rowblk"
      22             : )
      23             : 
      24             : // singleLevelIterator iterates over an entire table of data. To seek for a given
      25             : // key, it first looks in the index for the block that contains that key, and then
      26             : // looks inside that block.
      27             : //
      28             : // singleLevelIterator is parameterized by the type of the data block iterator
      29             : // and index block iterator.  The type parameters are designed to allow the
      30             : // singleLevelIterator to embed the data block and index block iterator structs
      31             : // within itself, avoiding an extra allocation and pointer indirection. The
      32             : // complication comes from the fact that we want to implement the interfaces on
      33             : // pointer receivers but embed the non-pointer types within the struct. The D
      34             : // and I type parameters are the non-pointer data and index block iterator
      35             : // types, and the PD and PI type parameters are the *D and *I types that
      36             : // actually implement the DataBlockIterator and IndexBlockIterator constraints.
      37             : //
      38             : // Unfortunately, uses of the [data] and [index] fields must explicitly cast
      39             : // &data/&index to the PD/PI type in order to access its interface methods. This
      40             : // pattern is taken from the Go generics proposal:
      41             : // https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#pointer-method-example
      42             : type singleLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterator[D]] struct {
      43             :         ctx context.Context
      44             :         cmp Compare
      45             :         // Global lower/upper bound for the iterator.
      46             :         lower []byte
      47             :         upper []byte
      48             :         bpfs  *BlockPropertiesFilterer
      49             :         // Per-block lower/upper bound. Nil if the bound does not apply to the block
      50             :         // because we determined the block lies completely within the bound.
      51             :         blockLower []byte
      52             :         blockUpper []byte
      53             :         reader     *Reader
      54             :         // vState will be set iff the iterator is constructed for virtual sstable
      55             :         // iteration.
      56             :         vState *virtualState
      57             :         // endKeyInclusive is set to force the iterator to treat the upper field as
      58             :         // inclusive while iterating instead of exclusive.
      59             :         endKeyInclusive       bool
      60             :         index                 I
      61             :         indexFilterRH         objstorage.ReadHandle
      62             :         indexFilterRHPrealloc objstorageprovider.PreallocatedReadHandle
      63             :         data                  D
      64             :         dataRH                objstorage.ReadHandle
      65             :         dataRHPrealloc        objstorageprovider.PreallocatedReadHandle
      66             :         // dataBH refers to the last data block that the iterator considered
      67             :         // loading. It may not actually have loaded the block, due to an error or
      68             :         // because it was considered irrelevant.
      69             :         dataBH   block.Handle
      70             :         vbReader *valueBlockReader
      71             :         // vbRH is the read handle for value blocks, which are in a different
      72             :         // part of the sstable than data blocks.
      73             :         vbRH         objstorage.ReadHandle
      74             :         vbRHPrealloc objstorageprovider.PreallocatedReadHandle
      75             :         err          error
      76             :         closeHook    func(i Iterator) error
      77             :         // stats and iterStats are slightly different. stats is a shared struct
      78             :         // supplied from the outside, and represents stats for the whole iterator
      79             :         // tree and can be reset from the outside (e.g. when the pebble.Iterator is
      80             :         // being reused). It is currently only provided when the iterator tree is
      81             :         // rooted at pebble.Iterator. iterStats is this sstable iterator's private
      82             :         // stats that are reported to a CategoryStatsCollector when this iterator is
      83             :         // closed. More paths are instrumented with this as the
      84             :         // CategoryStatsCollector needed for this is provided by the
      85             :         // tableCacheContainer (which is more universally used).
      86             :         stats      *base.InternalIteratorStats
      87             :         iterStats  iterStatsAccumulator
      88             :         bufferPool *block.BufferPool
      89             : 
      90             :         // boundsCmp and positionedUsingLatestBounds are for optimizing iteration
      91             :         // that uses multiple adjacent bounds. The seek after setting a new bound
      92             :         // can use the fact that the iterator is either within the previous bounds
      93             :         // or exactly one key before or after the bounds. If the new bounds is
      94             :         // after/before the previous bounds, and we are already positioned at a
      95             :         // block that is relevant for the new bounds, we can try to first position
      96             :         // using Next/Prev (repeatedly) instead of doing a more expensive seek.
      97             :         //
      98             :         // When there are wide files at higher levels that match the bounds
      99             :         // but don't have any data for the bound, we will already be
     100             :         // positioned at the key beyond the bounds and won't need to do much
     101             :         // work -- given that most data is in L6, such files are likely to
     102             :         // dominate the performance of the mergingIter, and may be the main
     103             :         // benefit of this performance optimization (of course it also helps
     104             :         // when the file that has the data has successive seeks that stay in
     105             :         // the same block).
     106             :         //
     107             :         // Specifically, boundsCmp captures the relationship between the previous
     108             :         // and current bounds, if the iterator had been positioned after setting
     109             :         // the previous bounds. If it was not positioned, i.e., Seek/First/Last
     110             :         // were not called, we don't know where it is positioned and cannot
     111             :         // optimize.
     112             :         //
     113             :         // Example: Bounds moving forward, and iterator exhausted in forward direction.
     114             :         //      bounds = [f, h), ^ shows block iterator position
     115             :         //  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
     116             :         //                                       ^
     117             :         //  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
     118             :         //  set to +1. SeekGE(j) can use next (the optimization also requires that j
     119             :         //  is within the block, but that is not for correctness, but to limit the
     120             :         //  optimization to when it will actually be an optimization).
     121             :         //
     122             :         // Example: Bounds moving forward.
     123             :         //      bounds = [f, h), ^ shows block iterator position
     124             :         //  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
     125             :         //                                 ^
     126             :         //  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
     127             :         //  set to +1. SeekGE(j) can use next.
     128             :         //
     129             :         // Example: Bounds moving forward, but iterator not positioned using previous
     130             :         //  bounds.
     131             :         //      bounds = [f, h), ^ shows block iterator position
     132             :         //  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
     133             :         //                                             ^
     134             :         //  new bounds = [i, j). Iterator is at j since it was never positioned using
     135             :         //  [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
     136             :         //  SeekGE(i) will not use next.
     137             :         //
     138             :         // Example: Bounds moving forward and sparse file
     139             :         //      bounds = [f, h), ^ shows block iterator position
     140             :         //  file contents [ a z ]
     141             :         //                    ^
     142             :         //  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
     143             :         //  set to +1. SeekGE(j) notices that the iterator is already past j and does
     144             :         //  not need to do anything.
     145             :         //
     146             :         // Similar examples can be constructed for backward iteration.
     147             :         //
     148             :         // This notion of exactly one key before or after the bounds is not quite
     149             :         // true when block properties are used to ignore blocks. In that case we
     150             :         // can't stop precisely at the first block that is past the bounds since
     151             :         // we are using the index entries to enforce the bounds.
     152             :         //
     153             :         // e.g. 3 blocks with keys [b, c]  [f, g], [i, j, k] with index entries d,
     154             :         // h, l. And let the lower bound be k, and we are reverse iterating. If
     155             :         // the block [i, j, k] is ignored due to the block interval annotations we
     156             :         // do need to move the index to block [f, g] since the index entry for the
     157             :         // [i, j, k] block is l which is not less than the lower bound of k. So we
     158             :         // have passed the entries i, j.
     159             :         //
     160             :         // This behavior is harmless since the block property filters are fixed
     161             :         // for the lifetime of the iterator so i, j are irrelevant. In addition,
     162             :         // the current code will not load the [f, g] block, so the seek
     163             :         // optimization that attempts to use Next/Prev do not apply anyway.
     164             :         boundsCmp                   int
     165             :         positionedUsingLatestBounds bool
     166             : 
     167             :         // exhaustedBounds represents whether the iterator is exhausted for
     168             :         // iteration by reaching the upper or lower bound. +1 when exhausted
     169             :         // the upper bound, -1 when exhausted the lower bound, and 0 when
     170             :         // neither. exhaustedBounds is also used for the TrySeekUsingNext
     171             :         // optimization in twoLevelIterator and singleLevelIterator. Care should be
     172             :         // taken in setting this in twoLevelIterator before calling into
     173             :         // singleLevelIterator, given that these two iterators share this field.
     174             :         exhaustedBounds int8
     175             : 
     176             :         // useFilterBlock controls whether the bloom filter block in this sstable, if
     177             :         // present, should be used for prefix seeks or not. In some cases it is
     178             :         // beneficial to skip a filter block even if it exists (eg. if probability of
     179             :         // a match is high).
     180             :         useFilterBlock         bool
     181             :         lastBloomFilterMatched bool
     182             : 
     183             :         transforms IterTransforms
     184             : 
     185             :         // inPool is set to true before putting the iterator in the reusable pool;
     186             :         // used to detect double-close.
     187             :         inPool bool
     188             :         // pool is the pool from which the iterator was allocated and to which the
     189             :         // iterator should be returned on Close. Because the iterator is
     190             :         // parameterized by the type of the data block iterator, pools must be
     191             :         // specific to the type of the data block iterator.
     192             :         //
     193             :         // If the iterator is embedded within a twoLevelIterator, pool is nil and
     194             :         // the twoLevelIterator.pool field may be non-nil.
     195             :         pool *sync.Pool
     196             : }
     197             : 
     198             : // singleLevelIterator implements the base.InternalIterator interface.
     199             : var _ base.InternalIterator = (*singleLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter])(nil)
     200             : 
     201             : // newRowBlockSingleLevelIterator reads the index block and creates and
     202             : // initializes a singleLevelIterator over an sstable with row-oriented data
     203             : // blocks.
     204             : //
     205             : // Note that lower, upper are iterator bounds and are separate from virtual
     206             : // sstable bounds. If the virtualState passed in is not nil, then virtual
     207             : // sstable bounds will be enforced.
     208             : func newRowBlockSingleLevelIterator(
     209             :         ctx context.Context,
     210             :         r *Reader,
     211             :         v *virtualState,
     212             :         transforms IterTransforms,
     213             :         lower, upper []byte,
     214             :         filterer *BlockPropertiesFilterer,
     215             :         filterBlockSizeLimit FilterBlockSizeLimit,
     216             :         stats *base.InternalIteratorStats,
     217             :         categoryAndQoS CategoryAndQoS,
     218             :         statsCollector *CategoryStatsCollector,
     219             :         rp ReaderProvider,
     220             :         bufferPool *block.BufferPool,
     221           2 : ) (*singleLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter], error) {
     222           2 :         if r.err != nil {
     223           0 :                 return nil, r.err
     224           0 :         }
     225             :         // TODO(jackson): When we have a columnar-block sstable format, assert that
     226             :         // the table format is row-oriented.
     227           2 :         i := singleLevelIterRowBlockPool.Get().(*singleLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter])
     228           2 :         useFilterBlock := shouldUseFilterBlock(r, filterBlockSizeLimit)
     229           2 :         i.init(
     230           2 :                 ctx, r, v, transforms, lower, upper, filterer, useFilterBlock,
     231           2 :                 stats, categoryAndQoS, statsCollector, bufferPool,
     232           2 :         )
     233           2 :         if r.tableFormat >= TableFormatPebblev3 {
     234           2 :                 if r.Properties.NumValueBlocks > 0 {
     235           2 :                         // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     236           2 :                         // can outlive the singleLevelIterator due to be being embedded in a
     237           2 :                         // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     238           2 :                         // should only optimize this if it shows up as significant in end-to-end
     239           2 :                         // CockroachDB benchmarks, since it is tricky to do so. One possibility
     240           2 :                         // is that if many sstable iterators only get positioned at latest
     241           2 :                         // versions of keys, and therefore never expose a LazyValue that is
     242           2 :                         // separated to their callers, they can put this valueBlockReader into a
     243           2 :                         // sync.Pool.
     244           2 :                         i.vbReader = &valueBlockReader{
     245           2 :                                 bpOpen: i,
     246           2 :                                 rp:     rp,
     247           2 :                                 vbih:   r.valueBIH,
     248           2 :                                 stats:  stats,
     249           2 :                         }
     250           2 :                         (&i.data).SetGetLazyValuer(i.vbReader)
     251           2 :                         i.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.vbRHPrealloc)
     252           2 :                 }
     253           2 :                 i.data.SetHasValuePrefix(true)
     254             :         }
     255             : 
     256           2 :         indexH, err := r.readIndex(ctx, i.indexFilterRH, stats, &i.iterStats)
     257           2 :         if err == nil {
     258           2 :                 err = i.index.InitHandle(i.cmp, r.Split, indexH, transforms)
     259           2 :         }
     260           2 :         if err != nil {
     261           1 :                 _ = i.Close()
     262           1 :                 return nil, err
     263           1 :         }
     264           2 :         return i, nil
     265             : }
     266             : 
     267             : // init initializes the singleLevelIterator struct. It does not read the index.
     268             : func (i *singleLevelIterator[I, PI, D, PD]) init(
     269             :         ctx context.Context,
     270             :         r *Reader,
     271             :         v *virtualState,
     272             :         transforms IterTransforms,
     273             :         lower, upper []byte,
     274             :         filterer *BlockPropertiesFilterer,
     275             :         useFilterBlock bool,
     276             :         stats *base.InternalIteratorStats,
     277             :         categoryAndQoS CategoryAndQoS,
     278             :         statsCollector *CategoryStatsCollector,
     279             :         bufferPool *block.BufferPool,
     280           2 : ) {
     281           2 :         i.inPool = false
     282           2 :         i.ctx = ctx
     283           2 :         i.lower = lower
     284           2 :         i.upper = upper
     285           2 :         i.bpfs = filterer
     286           2 :         i.useFilterBlock = useFilterBlock
     287           2 :         i.reader = r
     288           2 :         i.cmp = r.Compare
     289           2 :         i.stats = stats
     290           2 :         i.transforms = transforms
     291           2 :         i.bufferPool = bufferPool
     292           2 :         if v != nil {
     293           2 :                 i.vState = v
     294           2 :                 i.endKeyInclusive, i.lower, i.upper = v.constrainBounds(lower, upper, false /* endInclusive */)
     295           2 :         }
     296             : 
     297           2 :         i.iterStats.init(categoryAndQoS, statsCollector)
     298           2 : 
     299           2 :         i.indexFilterRH = objstorageprovider.UsePreallocatedReadHandle(
     300           2 :                 r.readable, objstorage.ReadBeforeForIndexAndFilter, &i.indexFilterRHPrealloc)
     301           2 :         i.dataRH = objstorageprovider.UsePreallocatedReadHandle(
     302           2 :                 r.readable, objstorage.NoReadBefore, &i.dataRHPrealloc)
     303             : }
     304             : 
     305             : // Helper function to check if keys returned from iterator are within virtual bounds.
     306           2 : func (i *singleLevelIterator[I, PI, D, PD]) maybeVerifyKey(kv *base.InternalKV) *base.InternalKV {
     307           2 :         if invariants.Enabled && kv != nil && i.vState != nil {
     308           2 :                 key := kv.K.UserKey
     309           2 :                 v := i.vState
     310           2 :                 lc := i.cmp(key, v.lower.UserKey)
     311           2 :                 uc := i.cmp(key, v.upper.UserKey)
     312           2 :                 if lc < 0 || uc > 0 || (uc == 0 && v.upper.IsExclusiveSentinel()) {
     313           0 :                         panic(fmt.Sprintf("key %q out of singleLeveliterator virtual bounds %s %s", key, v.lower.UserKey, v.upper.UserKey))
     314             :                 }
     315             :         }
     316           2 :         return kv
     317             : }
     318             : 
     319             : // SetupForCompaction sets up the singleLevelIterator for use with compactionIter.
     320             : // Currently, it skips readahead ramp-up. It should be called after init is called.
     321           2 : func (i *singleLevelIterator[I, PI, D, PD]) SetupForCompaction() {
     322           2 :         i.dataRH.SetupForCompaction()
     323           2 :         if i.vbRH != nil {
     324           2 :                 i.vbRH.SetupForCompaction()
     325           2 :         }
     326             : }
     327             : 
     328           2 : func (i *singleLevelIterator[I, PI, D, PD]) resetForReuse() singleLevelIterator[I, PI, D, PD] {
     329           2 :         return singleLevelIterator[I, PI, D, PD]{
     330           2 :                 index:  PI(&i.index).ResetForReuse(),
     331           2 :                 data:   PD(&i.data).ResetForReuse(),
     332           2 :                 pool:   i.pool,
     333           2 :                 inPool: true,
     334           2 :         }
     335           2 : }
     336             : 
     337           2 : func (i *singleLevelIterator[I, PI, D, PD]) initBounds() {
     338           2 :         // Trim the iteration bounds for the current block. We don't have to check
     339           2 :         // the bounds on each iteration if the block is entirely contained within the
     340           2 :         // iteration bounds.
     341           2 :         i.blockLower = i.lower
     342           2 :         if i.blockLower != nil {
     343           2 :                 kv := PD(&i.data).First()
     344           2 :                 // TODO(radu): this should be <= 0
     345           2 :                 if kv != nil && i.cmp(i.blockLower, kv.K.UserKey) < 0 {
     346           2 :                         // The lower-bound is less than the first key in the block. No need
     347           2 :                         // to check the lower-bound again for this block.
     348           2 :                         i.blockLower = nil
     349           2 :                 }
     350             :         }
     351           2 :         i.blockUpper = i.upper
     352           2 :         // TODO(radu): this should be >= 0 if blockUpper is inclusive.
     353           2 :         if i.blockUpper != nil && i.cmp(i.blockUpper, PI(&i.index).Separator()) > 0 {
     354           2 :                 // The upper-bound is greater than the index key which itself is greater
     355           2 :                 // than or equal to every key in the block. No need to check the
     356           2 :                 // upper-bound again for this block. Even if blockUpper is inclusive
     357           2 :                 // because of upper being inclusive, we can still safely set blockUpper
     358           2 :                 // to nil here.
     359           2 :                 i.blockUpper = nil
     360           2 :         }
     361             : }
     362             : 
     363           2 : func (i *singleLevelIterator[I, PI, D, PD]) initBoundsForAlreadyLoadedBlock() {
     364           2 :         // TODO(radu): determine automatically if we need to call First or not and
     365           2 :         // unify this function with initBounds().
     366           2 :         if PD(&i.data).FirstUserKey() == nil {
     367           0 :                 panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
     368             :         }
     369           2 :         i.blockLower = i.lower
     370           2 :         if i.blockLower != nil {
     371           2 :                 firstUserKey := PD(&i.data).FirstUserKey()
     372           2 :                 // TODO(radu): this should be <= 0
     373           2 :                 if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
     374           2 :                         // The lower-bound is less than the first key in the block. No need
     375           2 :                         // to check the lower-bound again for this block.
     376           2 :                         i.blockLower = nil
     377           2 :                 }
     378             :         }
     379           2 :         i.blockUpper = i.upper
     380           2 :         // TODO(radu): this should be >= 0 if blockUpper is inclusive.
     381           2 :         if i.blockUpper != nil && i.cmp(i.blockUpper, PI(&i.index).Separator()) > 0 {
     382           2 :                 // The upper-bound is greater than the index key which itself is greater
     383           2 :                 // than or equal to every key in the block. No need to check the
     384           2 :                 // upper-bound again for this block.
     385           2 :                 i.blockUpper = nil
     386           2 :         }
     387             : }
     388             : 
     389             : // Deterministic disabling (in testing mode) of the bounds-based optimization
     390             : // that avoids seeking. Uses the iterator pointer, since we want diversity in
     391             : // iterator behavior for the same SetBounds call. Used for tests.
     392           2 : func testingDisableBoundsOpt(bound []byte, ptr uintptr) bool {
     393           2 :         if !invariants.Enabled || ensureBoundsOptDeterminism {
     394           1 :                 return false
     395           1 :         }
     396             :         // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
     397           2 :         simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
     398           2 :         return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
     399             : }
     400             : 
     401             : // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
     402             : // optimizations performed by disableBoundsOpt for tests that require
     403             : // deterministic iterator behavior. Some unit tests examine internal iterator
     404             : // state and require this behavior to be deterministic.
     405             : var ensureBoundsOptDeterminism bool
     406             : 
     407             : // SetBoundsWithSyntheticPrefix indicates whether this iterator requires keys
     408             : // passed to its SetBounds() method by a prefix rewriting wrapper to be *not*
     409             : // rewritten to be in terms of this iterator's content, but instead be passed
     410             : // as-is, i.e. with the synthetic prefix still on them.
     411             : //
     412             : // This allows an optimization when this iterator is passing these bounds on to
     413             : // a vState to additionally constrain them. In said vState, passed bounds are
     414             : // combined with the vState bounds which are in terms of the rewritten prefix.
     415             : // If the caller rewrote bounds to be in terms of content prefix and SetBounds
     416             : // passed those to vState, the vState would need to *un*rewrite them back to the
     417             : // synthetic prefix in order to combine them with the vState bounds. Thus, if
     418             : // this iterator knows bounds will be passed to vState, it can signal that it
     419             : // they should be passed without being rewritten to skip converting to and fro.
     420           0 : func (i singleLevelIterator[I, PI, P, PD]) SetBoundsWithSyntheticPrefix() bool {
     421           0 :         return i.vState != nil
     422           0 : }
     423             : 
     424             : // SetBounds implements internalIterator.SetBounds, as documented in the pebble
     425             : // package. Note that the upper field is exclusive.
     426           2 : func (i *singleLevelIterator[I, PI, P, PD]) SetBounds(lower, upper []byte) {
     427           2 :         i.boundsCmp = 0
     428           2 :         if i.vState != nil {
     429           2 :                 // If the reader is constructed for a virtual sstable, then we must
     430           2 :                 // constrain the bounds of the reader. For physical sstables, the bounds
     431           2 :                 // can be wider than the actual sstable's bounds because we won't
     432           2 :                 // accidentally expose additional keys as there are no additional keys.
     433           2 :                 i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
     434           2 :                         lower, upper, false,
     435           2 :                 )
     436           2 :         } else {
     437           2 :                 // TODO(bananabrick): Figure out the logic here to enable the boundsCmp
     438           2 :                 // optimization for virtual sstables.
     439           2 :                 if i.positionedUsingLatestBounds {
     440           2 :                         if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
     441           2 :                                 i.boundsCmp = +1
     442           2 :                                 if testingDisableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
     443           2 :                                         i.boundsCmp = 0
     444           2 :                                 }
     445           2 :                         } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
     446           2 :                                 i.boundsCmp = -1
     447           2 :                                 if testingDisableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
     448           2 :                                         i.boundsCmp = 0
     449           2 :                                 }
     450             :                         }
     451             :                 }
     452             :         }
     453             : 
     454           2 :         i.positionedUsingLatestBounds = false
     455           2 :         i.lower = lower
     456           2 :         i.upper = upper
     457           2 :         i.blockLower = nil
     458           2 :         i.blockUpper = nil
     459             : }
     460             : 
     461           0 : func (i *singleLevelIterator[I, PI, P, PD]) SetContext(ctx context.Context) {
     462           0 :         i.ctx = ctx
     463           0 : }
     464             : 
     465             : // loadBlock loads the block at the current index position and leaves i.data
     466             : // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
     467             : // may be nil if we have simply exhausted the entire table.
     468           2 : func (i *singleLevelIterator[I, PI, P, PD]) loadBlock(dir int8) loadBlockResult {
     469           2 :         if !PI(&i.index).Valid() {
     470           0 :                 // Ensure the data block iterator is invalidated even if loading of the block
     471           0 :                 // fails.
     472           0 :                 PD(&i.data).Invalidate()
     473           0 :                 return loadBlockFailed
     474           0 :         }
     475             :         // Load the next block.
     476           2 :         bhp, err := PI(&i.index).BlockHandleWithProperties()
     477           2 :         if i.dataBH == bhp.Handle && PD(&i.data).Valid() {
     478           2 :                 // We're already at the data block we want to load. Reset bounds in case
     479           2 :                 // they changed since the last seek, but don't reload the block from cache
     480           2 :                 // or disk.
     481           2 :                 //
     482           2 :                 // It's safe to leave i.data in its original state here, as all callers to
     483           2 :                 // loadBlock make an absolute positioning call (i.e. a seek, first, or last)
     484           2 :                 // to `i.data` right after loadBlock returns loadBlockOK.
     485           2 :                 i.initBounds()
     486           2 :                 return loadBlockOK
     487           2 :         }
     488             :         // Ensure the data block iterator is invalidated even if loading of the block
     489             :         // fails.
     490           2 :         PD(&i.data).Invalidate()
     491           2 :         i.dataBH = bhp.Handle
     492           2 :         if err != nil {
     493           0 :                 i.err = errCorruptIndexEntry(err)
     494           0 :                 return loadBlockFailed
     495           0 :         }
     496           2 :         if i.bpfs != nil {
     497           2 :                 intersects, err := i.bpfs.intersects(bhp.Props)
     498           2 :                 if err != nil {
     499           0 :                         i.err = errCorruptIndexEntry(err)
     500           0 :                         return loadBlockFailed
     501           0 :                 }
     502           2 :                 if intersects == blockMaybeExcluded {
     503           2 :                         intersects = i.resolveMaybeExcluded(dir)
     504           2 :                 }
     505           2 :                 if intersects == blockExcluded {
     506           2 :                         return loadBlockIrrelevant
     507           2 :                 }
     508             :                 // blockIntersects
     509             :         }
     510           2 :         ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
     511           2 :         block, err := i.reader.readBlock(
     512           2 :                 ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, &i.iterStats, i.bufferPool)
     513           2 :         if err != nil {
     514           1 :                 i.err = err
     515           1 :                 return loadBlockFailed
     516           1 :         }
     517           2 :         i.err = PD(&i.data).InitHandle(i.cmp, i.reader.Split, block, i.transforms)
     518           2 :         if i.err != nil {
     519           0 :                 // The block is partially loaded, and we don't want it to appear valid.
     520           0 :                 PD(&i.data).Invalidate()
     521           0 :                 return loadBlockFailed
     522           0 :         }
     523           2 :         i.initBounds()
     524           2 :         return loadBlockOK
     525             : }
     526             : 
     527             : // readBlockForVBR implements the blockProviderWhenOpen interface for use by
     528             : // the valueBlockReader.
     529             : func (i *singleLevelIterator[I, PI, D, PD]) readBlockForVBR(
     530             :         h block.Handle, stats *base.InternalIteratorStats,
     531           2 : ) (block.BufferHandle, error) {
     532           2 :         ctx := objiotracing.WithBlockType(i.ctx, objiotracing.ValueBlock)
     533           2 :         return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, &i.iterStats, i.bufferPool)
     534           2 : }
     535             : 
     536             : // resolveMaybeExcluded is invoked when the block-property filterer has found
     537             : // that a block is excluded according to its properties but only if its bounds
     538             : // fall within the filter's current bounds.  This function consults the
     539             : // apprioriate bound, depending on the iteration direction, and returns either
     540             : // `blockIntersects` or `blockExcluded`.
     541           2 : func (i *singleLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
     542           2 :         // TODO(jackson): We could first try comparing to top-level index block's
     543           2 :         // key, and if within bounds avoid per-data block key comparisons.
     544           2 : 
     545           2 :         // This iterator is configured with a bound-limited block property
     546           2 :         // filter. The bpf determined this block could be excluded from
     547           2 :         // iteration based on the property encoded in the block handle.
     548           2 :         // However, we still need to determine if the block is wholly
     549           2 :         // contained within the filter's key bounds.
     550           2 :         //
     551           2 :         // External guarantees ensure all the block's keys are ≥ the
     552           2 :         // filter's lower bound during forward iteration, and that all the
     553           2 :         // block's keys are < the filter's upper bound during backward
     554           2 :         // iteration. We only need to determine if the opposite bound is
     555           2 :         // also met.
     556           2 :         //
     557           2 :         // The index separator in index.Key() provides an inclusive
     558           2 :         // upper-bound for the data block's keys, guaranteeing that all its
     559           2 :         // keys are ≤ index.Key(). For forward iteration, this is all we
     560           2 :         // need.
     561           2 :         if dir > 0 {
     562           2 :                 // Forward iteration.
     563           2 :                 if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(PI(&i.index).Separator()) {
     564           2 :                         return blockExcluded
     565           2 :                 }
     566           2 :                 return blockIntersects
     567             :         }
     568             : 
     569             :         // Reverse iteration.
     570             :         //
     571             :         // Because we're iterating in the reverse direction, we don't yet have
     572             :         // enough context available to determine if the block is wholly contained
     573             :         // within its bounds. This case arises only during backward iteration,
     574             :         // because of the way the index is structured.
     575             :         //
     576             :         // Consider a bound-limited bpf limited to the bounds [b,d), loading the
     577             :         // block with separator `c`. During reverse iteration, the guarantee that
     578             :         // all the block's keys are < `d` is externally provided, but no guarantee
     579             :         // is made on the bpf's lower bound. The separator `c` only provides an
     580             :         // inclusive upper bound on the block's keys, indicating that the
     581             :         // corresponding block handle points to a block containing only keys ≤ `c`.
     582             :         //
     583             :         // To establish a lower bound, we step the index backwards to read the
     584             :         // previous block's separator, which provides an inclusive lower bound on
     585             :         // the original block's keys. Afterwards, we step forward to restore our
     586             :         // index position.
     587           2 :         if !PI(&i.index).Prev() {
     588           2 :                 // The original block points to the first block of this index block. If
     589           2 :                 // there's a two-level index, it could potentially provide a lower
     590           2 :                 // bound, but the code refactoring necessary to read it doesn't seem
     591           2 :                 // worth the payoff. We fall through to loading the block.
     592           2 :         } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(PI(&i.index).Separator()) {
     593           2 :                 // The lower-bound on the original block falls within the filter's
     594           2 :                 // bounds, and we can skip the block (after restoring our current index
     595           2 :                 // position).
     596           2 :                 _ = PI(&i.index).Next()
     597           2 :                 return blockExcluded
     598           2 :         }
     599           2 :         _ = PI(&i.index).Next()
     600           2 :         return blockIntersects
     601             : }
     602             : 
     603             : // The number of times to call Next/Prev in a block before giving up and seeking.
     604             : // The value of 4 is arbitrary.
     605             : // TODO(sumeer): experiment with dynamic adjustment based on the history of
     606             : // seeks for a particular iterator.
     607             : const numStepsBeforeSeek = 4
     608             : 
     609             : func (i *singleLevelIterator[I, PI, D, PD]) trySeekGEUsingNextWithinBlock(
     610             :         key []byte,
     611           2 : ) (kv *base.InternalKV, done bool) {
     612           2 :         kv = PD(&i.data).KV()
     613           2 :         for j := 0; j < numStepsBeforeSeek; j++ {
     614           2 :                 curKeyCmp := i.cmp(kv.K.UserKey, key)
     615           2 :                 if curKeyCmp >= 0 {
     616           2 :                         if i.blockUpper != nil {
     617           2 :                                 cmp := i.cmp(kv.K.UserKey, i.blockUpper)
     618           2 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     619           2 :                                         i.exhaustedBounds = +1
     620           2 :                                         return nil, true
     621           2 :                                 }
     622             :                         }
     623           2 :                         return kv, true
     624             :                 }
     625           2 :                 kv = PD(&i.data).Next()
     626           2 :                 if kv == nil {
     627           1 :                         break
     628             :                 }
     629             :         }
     630           2 :         return kv, false
     631             : }
     632             : 
     633             : func (i *singleLevelIterator[I, PI, D, PD]) trySeekLTUsingPrevWithinBlock(
     634             :         key []byte,
     635           2 : ) (kv *base.InternalKV, done bool) {
     636           2 :         kv = PD(&i.data).KV()
     637           2 :         for j := 0; j < numStepsBeforeSeek; j++ {
     638           2 :                 curKeyCmp := i.cmp(kv.K.UserKey, key)
     639           2 :                 if curKeyCmp < 0 {
     640           2 :                         if i.blockLower != nil && i.cmp(kv.K.UserKey, i.blockLower) < 0 {
     641           2 :                                 i.exhaustedBounds = -1
     642           2 :                                 return nil, true
     643           2 :                         }
     644           2 :                         return kv, true
     645             :                 }
     646           2 :                 kv = PD(&i.data).Prev()
     647           2 :                 if kv == nil {
     648           1 :                         break
     649             :                 }
     650             :         }
     651           2 :         return kv, false
     652             : }
     653             : 
     654             : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
     655             : // package. Note that SeekGE only checks the upper bound. It is up to the
     656             : // caller to ensure that key is greater than or equal to the lower bound.
     657             : func (i *singleLevelIterator[I, PI, D, PD]) SeekGE(
     658             :         key []byte, flags base.SeekGEFlags,
     659           2 : ) *base.InternalKV {
     660           2 :         if i.vState != nil {
     661           2 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     662           2 :                 // have to internally restrict the bounds.
     663           2 :                 //
     664           2 :                 // TODO(bananabrick): We can optimize this check away for the level iter
     665           2 :                 // if necessary.
     666           2 :                 if i.cmp(key, i.lower) < 0 {
     667           2 :                         key = i.lower
     668           2 :                 }
     669             :         }
     670             : 
     671           2 :         if flags.TrySeekUsingNext() {
     672           2 :                 // The i.exhaustedBounds comparison indicates that the upper bound was
     673           2 :                 // reached. The i.data.isDataInvalidated() indicates that the sstable was
     674           2 :                 // exhausted.
     675           2 :                 if (i.exhaustedBounds == +1 || PD(&i.data).IsDataInvalidated()) && i.err == nil {
     676           2 :                         // Already exhausted, so return nil.
     677           2 :                         return nil
     678           2 :                 }
     679           2 :                 if i.err != nil {
     680           0 :                         // The current iterator position cannot be used.
     681           0 :                         flags = flags.DisableTrySeekUsingNext()
     682           0 :                 }
     683             :                 // INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
     684             :                 // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
     685             :                 // data-exhausted and bounds-exhausted, as defined earlier, are both
     686             :                 // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
     687             :                 // before calling into seekGEHelper.
     688             :         }
     689             : 
     690           2 :         i.exhaustedBounds = 0
     691           2 :         i.err = nil // clear cached iteration error
     692           2 :         boundsCmp := i.boundsCmp
     693           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     694           2 :         i.boundsCmp = 0
     695           2 :         i.positionedUsingLatestBounds = true
     696           2 :         return i.seekGEHelper(key, boundsCmp, flags)
     697             : }
     698             : 
     699             : // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
     700             : func (i *singleLevelIterator[I, PI, D, PD]) seekGEHelper(
     701             :         key []byte, boundsCmp int, flags base.SeekGEFlags,
     702           2 : ) *base.InternalKV {
     703           2 :         // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
     704           2 : 
     705           2 :         // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
     706           2 :         // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
     707           2 : 
     708           2 :         var dontSeekWithinBlock bool
     709           2 :         if !PD(&i.data).IsDataInvalidated() && PD(&i.data).Valid() && PI(&i.index).Valid() &&
     710           2 :                 boundsCmp > 0 && i.cmp(key, PI(&i.index).Separator()) <= 0 {
     711           2 :                 // Fast-path: The bounds have moved forward and this SeekGE is
     712           2 :                 // respecting the lower bound (guaranteed by Iterator). We know that
     713           2 :                 // the iterator must already be positioned within or just outside the
     714           2 :                 // previous bounds. Therefore it cannot be positioned at a block (or
     715           2 :                 // the position within that block) that is ahead of the seek position.
     716           2 :                 // However it can be positioned at an earlier block. This fast-path to
     717           2 :                 // use Next() on the block is only applied when we are already at the
     718           2 :                 // block that the slow-path (the else-clause) would load -- this is
     719           2 :                 // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
     720           2 :                 // predicate.
     721           2 :                 i.initBoundsForAlreadyLoadedBlock()
     722           2 :                 kv, done := i.trySeekGEUsingNextWithinBlock(key)
     723           2 :                 if done {
     724           2 :                         return kv
     725           2 :                 }
     726           2 :                 if kv == nil {
     727           1 :                         // Done with this block.
     728           1 :                         dontSeekWithinBlock = true
     729           1 :                 }
     730           2 :         } else {
     731           2 :                 // Cannot use bounds monotonicity. But may be able to optimize if
     732           2 :                 // caller claimed externally known invariant represented by
     733           2 :                 // flags.TrySeekUsingNext().
     734           2 :                 if flags.TrySeekUsingNext() {
     735           2 :                         // seekPrefixGE or SeekGE has already ensured
     736           2 :                         // !i.data.isDataInvalidated() && i.exhaustedBounds != +1
     737           2 :                         curr := PD(&i.data).KV()
     738           2 :                         less := i.cmp(curr.K.UserKey, key) < 0
     739           2 :                         // We could be more sophisticated and confirm that the seek
     740           2 :                         // position is within the current block before applying this
     741           2 :                         // optimization. But there may be some benefit even if it is in
     742           2 :                         // the next block, since we can avoid seeking i.index.
     743           2 :                         for j := 0; less && j < numStepsBeforeSeek; j++ {
     744           2 :                                 curr = i.Next()
     745           2 :                                 if curr == nil {
     746           2 :                                         return nil
     747           2 :                                 }
     748           2 :                                 less = i.cmp(curr.K.UserKey, key) < 0
     749             :                         }
     750           2 :                         if !less {
     751           2 :                                 if i.blockUpper != nil {
     752           2 :                                         cmp := i.cmp(curr.K.UserKey, i.blockUpper)
     753           2 :                                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     754           0 :                                                 i.exhaustedBounds = +1
     755           0 :                                                 return nil
     756           0 :                                         }
     757             :                                 }
     758           2 :                                 return curr
     759             :                         }
     760             :                 }
     761             : 
     762             :                 // Slow-path.
     763             : 
     764           2 :                 if !PI(&i.index).SeekGE(key) {
     765           2 :                         // The target key is greater than any key in the index block.
     766           2 :                         // Invalidate the block iterator so that a subsequent call to Prev()
     767           2 :                         // will return the last key in the table.
     768           2 :                         PD(&i.data).Invalidate()
     769           2 :                         return nil
     770           2 :                 }
     771           2 :                 result := i.loadBlock(+1)
     772           2 :                 if result == loadBlockFailed {
     773           1 :                         return nil
     774           1 :                 }
     775           2 :                 if result == loadBlockIrrelevant {
     776           2 :                         // Enforce the upper bound here since don't want to bother moving
     777           2 :                         // to the next block if upper bound is already exceeded. Note that
     778           2 :                         // the next block starts with keys >= ikey.UserKey since even
     779           2 :                         // though this is the block separator, the same user key can span
     780           2 :                         // multiple blocks. If upper is exclusive we use >= below, else
     781           2 :                         // we use >.
     782           2 :                         if i.upper != nil {
     783           2 :                                 cmp := i.cmp(PI(&i.index).Separator(), i.upper)
     784           2 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     785           2 :                                         i.exhaustedBounds = +1
     786           2 :                                         return nil
     787           2 :                                 }
     788             :                         }
     789             :                         // Want to skip to the next block.
     790           2 :                         dontSeekWithinBlock = true
     791             :                 }
     792             :         }
     793           2 :         if !dontSeekWithinBlock {
     794           2 :                 if ikv := PD(&i.data).SeekGE(key, flags.DisableTrySeekUsingNext()); ikv != nil {
     795           2 :                         if i.blockUpper != nil {
     796           2 :                                 cmp := i.cmp(ikv.K.UserKey, i.blockUpper)
     797           2 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     798           2 :                                         i.exhaustedBounds = +1
     799           2 :                                         return nil
     800           2 :                                 }
     801             :                         }
     802           2 :                         return ikv
     803             :                 }
     804             :         }
     805           2 :         return i.skipForward()
     806             : }
     807             : 
     808             : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
     809             : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
     810             : // to the caller to ensure that key is greater than or equal to the lower bound.
     811             : func (i *singleLevelIterator[I, PI, D, PD]) SeekPrefixGE(
     812             :         prefix, key []byte, flags base.SeekGEFlags,
     813           2 : ) *base.InternalKV {
     814           2 :         if i.vState != nil {
     815           2 :                 // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
     816           2 :                 // we may have to internally restrict the bounds.
     817           2 :                 //
     818           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     819           2 :                 // if necessary.
     820           2 :                 if i.cmp(key, i.lower) < 0 {
     821           2 :                         key = i.lower
     822           2 :                 }
     823             :         }
     824           2 :         return i.seekPrefixGE(prefix, key, flags)
     825             : }
     826             : 
     827             : func (i *singleLevelIterator[I, PI, D, PD]) seekPrefixGE(
     828             :         prefix, key []byte, flags base.SeekGEFlags,
     829           2 : ) (kv *base.InternalKV) {
     830           2 :         // NOTE: prefix is only used for bloom filter checking and not later work in
     831           2 :         // this method. Hence, we can use the existing iterator position if the last
     832           2 :         // SeekPrefixGE did not fail bloom filter matching.
     833           2 : 
     834           2 :         err := i.err
     835           2 :         i.err = nil // clear cached iteration error
     836           2 :         if i.useFilterBlock {
     837           2 :                 if !i.lastBloomFilterMatched {
     838           2 :                         // Iterator is not positioned based on last seek.
     839           2 :                         flags = flags.DisableTrySeekUsingNext()
     840           2 :                 }
     841           2 :                 i.lastBloomFilterMatched = false
     842           2 :                 // Check prefix bloom filter.
     843           2 :                 var mayContain bool
     844           2 :                 mayContain, i.err = i.bloomFilterMayContain(prefix)
     845           2 :                 if i.err != nil || !mayContain {
     846           2 :                         // In the i.err == nil case, this invalidation may not be necessary for
     847           2 :                         // correctness, and may be a place to optimize later by reusing the
     848           2 :                         // already loaded block. It was necessary in earlier versions of the code
     849           2 :                         // since the caller was allowed to call Next when SeekPrefixGE returned
     850           2 :                         // nil. This is no longer allowed.
     851           2 :                         PD(&i.data).Invalidate()
     852           2 :                         return nil
     853           2 :                 }
     854           2 :                 i.lastBloomFilterMatched = true
     855             :         }
     856           2 :         if flags.TrySeekUsingNext() {
     857           2 :                 // The i.exhaustedBounds comparison indicates that the upper bound was
     858           2 :                 // reached. The i.data.isDataInvalidated() indicates that the sstable was
     859           2 :                 // exhausted.
     860           2 :                 if (i.exhaustedBounds == +1 || PD(&i.data).IsDataInvalidated()) && err == nil {
     861           2 :                         // Already exhausted, so return nil.
     862           2 :                         return nil
     863           2 :                 }
     864           2 :                 if err != nil {
     865           1 :                         // The current iterator position cannot be used.
     866           1 :                         flags = flags.DisableTrySeekUsingNext()
     867           1 :                 }
     868             :                 // INVARIANT: flags.TrySeekUsingNext() => err == nil &&
     869             :                 // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
     870             :                 // data-exhausted and bounds-exhausted, as defined earlier, are both
     871             :                 // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
     872             :                 // before calling into seekGEHelper.
     873             :         }
     874             :         // Bloom filter matches, or skipped, so this method will position the
     875             :         // iterator.
     876           2 :         i.exhaustedBounds = 0
     877           2 :         boundsCmp := i.boundsCmp
     878           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     879           2 :         i.boundsCmp = 0
     880           2 :         i.positionedUsingLatestBounds = true
     881           2 :         return i.maybeVerifyKey(i.seekGEHelper(key, boundsCmp, flags))
     882             : }
     883             : 
     884             : // shouldUseFilterBlock returns whether we should use the filter block, based on
     885             : // its length and the size limit.
     886           2 : func shouldUseFilterBlock(reader *Reader, filterBlockSizeLimit FilterBlockSizeLimit) bool {
     887           2 :         return reader.tableFilter != nil && reader.filterBH.Length <= uint64(filterBlockSizeLimit)
     888           2 : }
     889             : 
     890           2 : func (i *singleLevelIterator[I, PI, D, PD]) bloomFilterMayContain(prefix []byte) (bool, error) {
     891           2 :         // Check prefix bloom filter.
     892           2 :         prefixToCheck := prefix
     893           2 :         if i.transforms.SyntheticPrefix.IsSet() {
     894           1 :                 // We have to remove the synthetic prefix.
     895           1 :                 var ok bool
     896           1 :                 prefixToCheck, ok = bytes.CutPrefix(prefix, i.transforms.SyntheticPrefix)
     897           1 :                 if !ok {
     898           1 :                         // This prefix will not be found inside this table.
     899           1 :                         return false, nil
     900           1 :                 }
     901             :         }
     902             : 
     903           2 :         dataH, err := i.reader.readFilter(i.ctx, i.indexFilterRH, i.stats, &i.iterStats)
     904           2 :         if err != nil {
     905           1 :                 return false, err
     906           1 :         }
     907           2 :         defer dataH.Release()
     908           2 :         return i.reader.tableFilter.mayContain(dataH.Get(), prefixToCheck), nil
     909             : }
     910             : 
     911             : // virtualLast should only be called if i.vReader != nil.
     912           2 : func (i *singleLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
     913           2 :         if i.vState == nil {
     914           0 :                 panic("pebble: invalid call to virtualLast")
     915             :         }
     916             : 
     917           2 :         if !i.endKeyInclusive {
     918           2 :                 // Trivial case.
     919           2 :                 return i.SeekLT(i.upper, base.SeekLTFlagsNone)
     920           2 :         }
     921           2 :         return i.virtualLastSeekLE()
     922             : }
     923             : 
     924             : // virtualLastSeekLE is called by virtualLast to do a SeekLE as part of a
     925             : // virtualLast. Consider generalizing this into a SeekLE() if there are other
     926             : // uses of this method in the future. Does a SeekLE on the upper bound of the
     927             : // file/iterator.
     928           2 : func (i *singleLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
     929           2 :         // Callers of SeekLE don't know about virtual sstable bounds, so we may
     930           2 :         // have to internally restrict the bounds.
     931           2 :         //
     932           2 :         // TODO(bananabrick): We can optimize this check away for the level iter
     933           2 :         // if necessary.
     934           2 :         if !i.endKeyInclusive {
     935           0 :                 panic("unexpected virtualLastSeekLE with exclusive upper bounds")
     936             :         }
     937           2 :         key := i.upper
     938           2 : 
     939           2 :         i.exhaustedBounds = 0
     940           2 :         i.err = nil // clear cached iteration error
     941           2 :         // Seek optimization only applies until iterator is first positioned with a
     942           2 :         // SeekGE or SeekLT after SetBounds.
     943           2 :         i.boundsCmp = 0
     944           2 :         i.positionedUsingLatestBounds = true
     945           2 : 
     946           2 :         indexOk := PI(&i.index).SeekGE(key)
     947           2 :         // We can have multiple internal keys with the same user key as the seek
     948           2 :         // key. In that case, we want the last (greatest) internal key.
     949           2 :         //
     950           2 :         // INVARIANT: One of two cases:
     951           2 :         // A. !indexOk. There is no data block with index key >= key. So all keys
     952           2 :         //    in the last data block are < key.
     953           2 :         // B. i.index.Separator() >= key. This data block may have some keys > key.
     954           2 :         //
     955           2 :         // Subcases of B:
     956           2 :         //   B1. Separator() == key. This is when loop iteration happens.
     957           2 :         //       Since Separator() >= largest data key in the block, the largest data
     958           2 :         //       key in this block is <= key.
     959           2 :         //   B2. Separator() > key. Loop iteration will not happen.
     960           2 :         //
     961           2 :         // NB: We can avoid this Next()ing if we just implement a blockIter.SeekLE().
     962           2 :         // This might be challenging to do correctly, so impose regular operations
     963           2 :         // for now.
     964           2 :         // TODO(jackson): Consider implementing SeekLE since it's easier to do in
     965           2 :         // colblk.
     966           2 :         for indexOk && bytes.Equal(PI(&i.index).Separator(), key) {
     967           2 :                 indexOk = PI(&i.index).Next()
     968           2 :         }
     969           2 :         if !indexOk {
     970           2 :                 // Cases A or B1 where B1 exhausted all blocks. In both cases the last block
     971           2 :                 // has all keys <= key. skipBackward enforces the lower bound.
     972           2 :                 return i.skipBackward()
     973           2 :         }
     974             :         // Case B. We are here because we were originally in case B2, or we were in B1
     975             :         // and we arrived at a block where ikey.UserKey > key. Either way, ikey.UserKey
     976             :         // > key. So there could be keys in the block > key. But the block preceding
     977             :         // this block cannot have any keys > key, otherwise it would have been the
     978             :         // result of the original index.SeekGE.
     979           2 :         result := i.loadBlock(-1)
     980           2 :         if result == loadBlockFailed {
     981           0 :                 return nil
     982           0 :         }
     983           2 :         if result == loadBlockIrrelevant {
     984           1 :                 // Want to skip to the previous block.
     985           1 :                 return i.skipBackward()
     986           1 :         }
     987           2 :         ikv := PD(&i.data).SeekGE(key, base.SeekGEFlagsNone)
     988           2 :         // Go to the last user key that matches key, and then Prev() on the data
     989           2 :         // block.
     990           2 :         for ikv != nil && bytes.Equal(ikv.K.UserKey, key) {
     991           2 :                 ikv = PD(&i.data).Next()
     992           2 :         }
     993           2 :         ikv = PD(&i.data).Prev()
     994           2 :         if ikv != nil {
     995           2 :                 // Enforce the lower bound here, as we could have gone past it. This happens
     996           2 :                 // if keys between `i.blockLower` and `key` are obsolete, for instance. Even
     997           2 :                 // though i.blockLower (which is either nil or equal to i.lower) is <= key,
     998           2 :                 // all internal keys in the user key interval [i.blockLower, key] could be
     999           2 :                 // obsolete (due to a RANGEDEL which will not be observed here). And
    1000           2 :                 // i.data.Prev will skip all these obsolete keys, and could land on a key
    1001           2 :                 // below the lower bound, requiring the lower bound check.
    1002           2 :                 if i.blockLower != nil && i.cmp(ikv.K.UserKey, i.blockLower) < 0 {
    1003           2 :                         i.exhaustedBounds = -1
    1004           2 :                         return nil
    1005           2 :                 }
    1006           2 :                 return ikv
    1007             :         }
    1008           2 :         return i.skipBackward()
    1009             : }
    1010             : 
    1011             : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
    1012             : // package. Note that SeekLT only checks the lower bound. It is up to the
    1013             : // caller to ensure that key is less than or equal to the upper bound.
    1014             : func (i *singleLevelIterator[I, PI, D, PD]) SeekLT(
    1015             :         key []byte, flags base.SeekLTFlags,
    1016           2 : ) *base.InternalKV {
    1017           2 :         if i.vState != nil {
    1018           2 :                 // Might have to fix upper bound since virtual sstable bounds are not
    1019           2 :                 // known to callers of SeekLT.
    1020           2 :                 //
    1021           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
    1022           2 :                 // if necessary.
    1023           2 :                 cmp := i.cmp(key, i.upper)
    1024           2 :                 // key == i.upper is fine. We'll do the right thing and return the
    1025           2 :                 // first internal key with user key < key.
    1026           2 :                 if cmp > 0 {
    1027           2 :                         // Return the last key in the virtual sstable.
    1028           2 :                         return i.maybeVerifyKey(i.virtualLast())
    1029           2 :                 }
    1030             :         }
    1031             : 
    1032           2 :         i.exhaustedBounds = 0
    1033           2 :         i.err = nil // clear cached iteration error
    1034           2 :         boundsCmp := i.boundsCmp
    1035           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1036           2 :         i.boundsCmp = 0
    1037           2 : 
    1038           2 :         // Seeking operations perform various step-instead-of-seeking optimizations:
    1039           2 :         // eg by considering monotonically increasing bounds (i.boundsCmp).
    1040           2 : 
    1041           2 :         i.positionedUsingLatestBounds = true
    1042           2 : 
    1043           2 :         var dontSeekWithinBlock bool
    1044           2 :         if !PD(&i.data).IsDataInvalidated() && PD(&i.data).Valid() && PI(&i.index).Valid() &&
    1045           2 :                 boundsCmp < 0 && i.cmp(PD(&i.data).FirstUserKey(), key) < 0 {
    1046           2 :                 // Fast-path: The bounds have moved backward, and this SeekLT is
    1047           2 :                 // respecting the upper bound (guaranteed by Iterator). We know that
    1048           2 :                 // the iterator must already be positioned within or just outside the
    1049           2 :                 // previous bounds. Therefore it cannot be positioned at a block (or
    1050           2 :                 // the position within that block) that is behind the seek position.
    1051           2 :                 // However it can be positioned at a later block. This fast-path to
    1052           2 :                 // use Prev() on the block is only applied when we are already at the
    1053           2 :                 // block that can satisfy this seek -- this is the motivation for the
    1054           2 :                 // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
    1055           2 :                 i.initBoundsForAlreadyLoadedBlock()
    1056           2 :                 ikv, done := i.trySeekLTUsingPrevWithinBlock(key)
    1057           2 :                 if done {
    1058           2 :                         return ikv
    1059           2 :                 }
    1060           2 :                 if ikv == nil {
    1061           1 :                         // Done with this block.
    1062           1 :                         dontSeekWithinBlock = true
    1063           1 :                 }
    1064           2 :         } else {
    1065           2 :                 // Slow-path.
    1066           2 : 
    1067           2 :                 // NB: If a bound-limited block property filter is configured, it's
    1068           2 :                 // externally ensured that the filter is disabled (through returning
    1069           2 :                 // Intersects=false irrespective of the block props provided) during
    1070           2 :                 // seeks.
    1071           2 :                 if !PI(&i.index).SeekGE(key) {
    1072           2 :                         if !PI(&i.index).Last() {
    1073           0 :                                 return nil
    1074           0 :                         }
    1075             :                 }
    1076             :                 // INVARIANT: ikey != nil.
    1077           2 :                 result := i.loadBlock(-1)
    1078           2 :                 if result == loadBlockFailed {
    1079           1 :                         return nil
    1080           1 :                 }
    1081           2 :                 if result == loadBlockIrrelevant {
    1082           2 :                         // Enforce the lower bound here since don't want to bother moving
    1083           2 :                         // to the previous block if lower bound is already exceeded. Note
    1084           2 :                         // that the previous block starts with keys <= ikey.UserKey since
    1085           2 :                         // even though this is the current block's separator, the same
    1086           2 :                         // user key can span multiple blocks.
    1087           2 :                         if i.lower != nil && i.cmp(PI(&i.index).Separator(), i.lower) < 0 {
    1088           1 :                                 i.exhaustedBounds = -1
    1089           1 :                                 return nil
    1090           1 :                         }
    1091             :                         // Want to skip to the previous block.
    1092           2 :                         dontSeekWithinBlock = true
    1093             :                 }
    1094             :         }
    1095           2 :         if !dontSeekWithinBlock {
    1096           2 :                 if ikv := PD(&i.data).SeekLT(key, flags); ikv != nil {
    1097           2 :                         if i.blockLower != nil && i.cmp(ikv.K.UserKey, i.blockLower) < 0 {
    1098           2 :                                 i.exhaustedBounds = -1
    1099           2 :                                 return nil
    1100           2 :                         }
    1101           2 :                         return ikv
    1102             :                 }
    1103             :         }
    1104             :         // The index contains separator keys which may lie between
    1105             :         // user-keys. Consider the user-keys:
    1106             :         //
    1107             :         //   complete
    1108             :         // ---- new block ---
    1109             :         //   complexion
    1110             :         //
    1111             :         // If these two keys end one block and start the next, the index key may
    1112             :         // be chosen as "compleu". The SeekGE in the index block will then point
    1113             :         // us to the block containing "complexion". If this happens, we want the
    1114             :         // last key from the previous data block.
    1115           2 :         return i.maybeVerifyKey(i.skipBackward())
    1116             : }
    1117             : 
    1118             : // First implements internalIterator.First, as documented in the pebble
    1119             : // package. Note that First only checks the upper bound. It is up to the caller
    1120             : // to ensure that key is greater than or equal to the lower bound (e.g. via a
    1121             : // call to SeekGE(lower)).
    1122           2 : func (i *singleLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
    1123           2 :         // If we have a lower bound, use SeekGE. Note that in general this is not
    1124           2 :         // supported usage, except when the lower bound is there because the table is
    1125           2 :         // virtual.
    1126           2 :         if i.lower != nil {
    1127           2 :                 return i.SeekGE(i.lower, base.SeekGEFlagsNone)
    1128           2 :         }
    1129             : 
    1130           2 :         i.positionedUsingLatestBounds = true
    1131           2 : 
    1132           2 :         return i.firstInternal()
    1133             : }
    1134             : 
    1135             : // firstInternal is a helper used for absolute positioning in a single-level
    1136             : // index file, or for positioning in the second-level index in a two-level
    1137             : // index file. For the latter, one cannot make any claims about absolute
    1138             : // positioning.
    1139           2 : func (i *singleLevelIterator[I, PI, D, PD]) firstInternal() *base.InternalKV {
    1140           2 :         i.exhaustedBounds = 0
    1141           2 :         i.err = nil // clear cached iteration error
    1142           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1143           2 :         i.boundsCmp = 0
    1144           2 : 
    1145           2 :         if !PI(&i.index).First() {
    1146           0 :                 PD(&i.data).Invalidate()
    1147           0 :                 return nil
    1148           0 :         }
    1149           2 :         result := i.loadBlock(+1)
    1150           2 :         if result == loadBlockFailed {
    1151           1 :                 return nil
    1152           1 :         }
    1153           2 :         if result == loadBlockOK {
    1154           2 :                 if kv := PD(&i.data).First(); kv != nil {
    1155           2 :                         if i.blockUpper != nil {
    1156           2 :                                 cmp := i.cmp(kv.K.UserKey, i.blockUpper)
    1157           2 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1158           2 :                                         i.exhaustedBounds = +1
    1159           2 :                                         return nil
    1160           2 :                                 }
    1161             :                         }
    1162           2 :                         return kv
    1163             :                 }
    1164             :                 // Else fall through to skipForward.
    1165           2 :         } else {
    1166           2 :                 // result == loadBlockIrrelevant. Enforce the upper bound here since
    1167           2 :                 // don't want to bother moving to the next block if upper bound is
    1168           2 :                 // already exceeded. Note that the next block starts with keys >=
    1169           2 :                 // ikey.UserKey since even though this is the block separator, the
    1170           2 :                 // same user key can span multiple blocks. If upper is exclusive we
    1171           2 :                 // use >= below, else we use >.
    1172           2 :                 if i.upper != nil {
    1173           2 :                         cmp := i.cmp(PI(&i.index).Separator(), i.upper)
    1174           2 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1175           1 :                                 i.exhaustedBounds = +1
    1176           1 :                                 return nil
    1177           1 :                         }
    1178             :                 }
    1179             :                 // Else fall through to skipForward.
    1180             :         }
    1181             : 
    1182           2 :         return i.skipForward()
    1183             : }
    1184             : 
    1185             : // Last implements internalIterator.Last, as documented in the pebble
    1186             : // package. Note that Last only checks the lower bound. It is up to the caller
    1187             : // to ensure that key is less than the upper bound (e.g. via a call to
    1188             : // SeekLT(upper))
    1189           2 : func (i *singleLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
    1190           2 :         if i.vState != nil {
    1191           2 :                 return i.maybeVerifyKey(i.virtualLast())
    1192           2 :         }
    1193             : 
    1194           2 :         if i.upper != nil {
    1195           0 :                 panic("singleLevelIterator.Last() used despite upper bound")
    1196             :         }
    1197           2 :         i.positionedUsingLatestBounds = true
    1198           2 :         return i.lastInternal()
    1199             : }
    1200             : 
    1201             : // lastInternal is a helper used for absolute positioning in a single-level
    1202             : // index file, or for positioning in the second-level index in a two-level
    1203             : // index file. For the latter, one cannot make any claims about absolute
    1204             : // positioning.
    1205           2 : func (i *singleLevelIterator[I, PI, D, PD]) lastInternal() *base.InternalKV {
    1206           2 :         i.exhaustedBounds = 0
    1207           2 :         i.err = nil // clear cached iteration error
    1208           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1209           2 :         i.boundsCmp = 0
    1210           2 : 
    1211           2 :         if !PI(&i.index).Last() {
    1212           0 :                 PD(&i.data).Invalidate()
    1213           0 :                 return nil
    1214           0 :         }
    1215           2 :         result := i.loadBlock(-1)
    1216           2 :         if result == loadBlockFailed {
    1217           1 :                 return nil
    1218           1 :         }
    1219           2 :         if result == loadBlockOK {
    1220           2 :                 if ikv := PD(&i.data).Last(); ikv != nil {
    1221           2 :                         if i.blockLower != nil && i.cmp(ikv.K.UserKey, i.blockLower) < 0 {
    1222           2 :                                 i.exhaustedBounds = -1
    1223           2 :                                 return nil
    1224           2 :                         }
    1225           2 :                         return ikv
    1226             :                 }
    1227             :                 // Else fall through to skipBackward.
    1228           2 :         } else {
    1229           2 :                 // result == loadBlockIrrelevant. Enforce the lower bound here since
    1230           2 :                 // don't want to bother moving to the previous block if lower bound is
    1231           2 :                 // already exceeded. Note that the previous block starts with keys <=
    1232           2 :                 // key.UserKey since even though this is the current block's
    1233           2 :                 // separator, the same user key can span multiple blocks.
    1234           2 :                 if i.lower != nil && i.cmp(PI(&i.index).Separator(), i.lower) < 0 {
    1235           2 :                         i.exhaustedBounds = -1
    1236           2 :                         return nil
    1237           2 :                 }
    1238             :         }
    1239             : 
    1240           2 :         return i.skipBackward()
    1241             : }
    1242             : 
    1243             : // Next implements internalIterator.Next, as documented in the pebble
    1244             : // package.
    1245             : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
    1246             : // due to performance. Keep the two in sync.
    1247           2 : func (i *singleLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
    1248           2 :         if i.exhaustedBounds == +1 {
    1249           0 :                 panic("Next called even though exhausted upper bound")
    1250             :         }
    1251           2 :         i.exhaustedBounds = 0
    1252           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1253           2 :         i.boundsCmp = 0
    1254           2 : 
    1255           2 :         if i.err != nil {
    1256           1 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
    1257           1 :                 // encountered, the iterator must be re-seeked.
    1258           1 :                 return nil
    1259           1 :         }
    1260           2 :         if kv := PD(&i.data).Next(); kv != nil {
    1261           2 :                 if i.blockUpper != nil {
    1262           2 :                         cmp := i.cmp(kv.K.UserKey, i.blockUpper)
    1263           2 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1264           2 :                                 i.exhaustedBounds = +1
    1265           2 :                                 return nil
    1266           2 :                         }
    1267             :                 }
    1268           2 :                 return kv
    1269             :         }
    1270           2 :         return i.skipForward()
    1271             : }
    1272             : 
    1273             : // NextPrefix implements (base.InternalIterator).NextPrefix.
    1274           2 : func (i *singleLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
    1275           2 :         if i.exhaustedBounds == +1 {
    1276           0 :                 panic("NextPrefix called even though exhausted upper bound")
    1277             :         }
    1278           2 :         i.exhaustedBounds = 0
    1279           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1280           2 :         i.boundsCmp = 0
    1281           2 :         if i.err != nil {
    1282           0 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
    1283           0 :                 // encountered, the iterator must be re-seeked.
    1284           0 :                 return nil
    1285           0 :         }
    1286           2 :         if kv := PD(&i.data).NextPrefix(succKey); kv != nil {
    1287           2 :                 if i.blockUpper != nil {
    1288           2 :                         cmp := i.cmp(kv.K.UserKey, i.blockUpper)
    1289           2 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1290           2 :                                 i.exhaustedBounds = +1
    1291           2 :                                 return nil
    1292           2 :                         }
    1293             :                 }
    1294           2 :                 return kv
    1295             :         }
    1296             :         // Did not find prefix in the existing data block. This is the slow-path
    1297             :         // where we effectively seek the iterator.
    1298             :         // The key is likely to be in the next data block, so try one step.
    1299           2 :         if !PI(&i.index).Next() {
    1300           2 :                 // The target key is greater than any key in the index block.
    1301           2 :                 // Invalidate the block iterator so that a subsequent call to Prev()
    1302           2 :                 // will return the last key in the table.
    1303           2 :                 PD(&i.data).Invalidate()
    1304           2 :                 return nil
    1305           2 :         }
    1306           2 :         if i.cmp(succKey, PI(&i.index).Separator()) > 0 {
    1307           2 :                 // Not in the next data block, so seek the index.
    1308           2 :                 if !PI(&i.index).SeekGE(succKey) {
    1309           2 :                         // The target key is greater than any key in the index block.
    1310           2 :                         // Invalidate the block iterator so that a subsequent call to Prev()
    1311           2 :                         // will return the last key in the table.
    1312           2 :                         PD(&i.data).Invalidate()
    1313           2 :                         return nil
    1314           2 :                 }
    1315             :         }
    1316           2 :         result := i.loadBlock(+1)
    1317           2 :         if result == loadBlockFailed {
    1318           1 :                 return nil
    1319           1 :         }
    1320           2 :         if result == loadBlockIrrelevant {
    1321           2 :                 // Enforce the upper bound here since don't want to bother moving
    1322           2 :                 // to the next block if upper bound is already exceeded. Note that
    1323           2 :                 // the next block starts with keys >= ikey.UserKey since even
    1324           2 :                 // though this is the block separator, the same user key can span
    1325           2 :                 // multiple blocks. If upper is exclusive we use >= below, else we use
    1326           2 :                 // >.
    1327           2 :                 if i.upper != nil {
    1328           2 :                         cmp := i.cmp(PI(&i.index).Separator(), i.upper)
    1329           2 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1330           1 :                                 i.exhaustedBounds = +1
    1331           1 :                                 return nil
    1332           1 :                         }
    1333             :                 }
    1334           2 :         } else if kv := PD(&i.data).SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
    1335           2 :                 if i.blockUpper != nil {
    1336           1 :                         cmp := i.cmp(kv.K.UserKey, i.blockUpper)
    1337           1 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1338           1 :                                 i.exhaustedBounds = +1
    1339           1 :                                 return nil
    1340           1 :                         }
    1341             :                 }
    1342           2 :                 return i.maybeVerifyKey(kv)
    1343             :         }
    1344             : 
    1345           2 :         return i.skipForward()
    1346             : }
    1347             : 
    1348             : // Prev implements internalIterator.Prev, as documented in the pebble
    1349             : // package.
    1350           2 : func (i *singleLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
    1351           2 :         if i.exhaustedBounds == -1 {
    1352           0 :                 panic("Prev called even though exhausted lower bound")
    1353             :         }
    1354           2 :         i.exhaustedBounds = 0
    1355           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1356           2 :         i.boundsCmp = 0
    1357           2 : 
    1358           2 :         if i.err != nil {
    1359           1 :                 return nil
    1360           1 :         }
    1361           2 :         if kv := PD(&i.data).Prev(); kv != nil {
    1362           2 :                 if i.blockLower != nil && i.cmp(kv.K.UserKey, i.blockLower) < 0 {
    1363           2 :                         i.exhaustedBounds = -1
    1364           2 :                         return nil
    1365           2 :                 }
    1366           2 :                 return kv
    1367             :         }
    1368           2 :         return i.skipBackward()
    1369             : }
    1370             : 
    1371           2 : func (i *singleLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
    1372           2 :         for {
    1373           2 :                 if !PI(&i.index).Next() {
    1374           2 :                         PD(&i.data).Invalidate()
    1375           2 :                         break
    1376             :                 }
    1377           2 :                 result := i.loadBlock(+1)
    1378           2 :                 if result != loadBlockOK {
    1379           2 :                         if i.err != nil {
    1380           1 :                                 break
    1381             :                         }
    1382           2 :                         if result == loadBlockFailed {
    1383           0 :                                 // We checked that i.index was at a valid entry, so
    1384           0 :                                 // loadBlockFailed could not have happened due to i.index
    1385           0 :                                 // being exhausted, and must be due to an error.
    1386           0 :                                 panic("loadBlock should not have failed with no error")
    1387             :                         }
    1388             :                         // result == loadBlockIrrelevant. Enforce the upper bound here
    1389             :                         // since don't want to bother moving to the next block if upper
    1390             :                         // bound is already exceeded. Note that the next block starts with
    1391             :                         // keys >= key.UserKey since even though this is the block
    1392             :                         // separator, the same user key can span multiple blocks. If upper
    1393             :                         // is exclusive we use >= below, else we use >.
    1394           2 :                         if i.upper != nil {
    1395           2 :                                 cmp := i.cmp(PI(&i.index).Separator(), i.upper)
    1396           2 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1397           2 :                                         i.exhaustedBounds = +1
    1398           2 :                                         return nil
    1399           2 :                                 }
    1400             :                         }
    1401           2 :                         continue
    1402             :                 }
    1403           2 :                 var kv *base.InternalKV
    1404           2 :                 // It is possible that skipBackward went too far and the virtual table lower
    1405           2 :                 // bound is after the first key in the block we are about to load, in which
    1406           2 :                 // case we must use SeekGE.
    1407           2 :                 //
    1408           2 :                 // An example of how this can happen:
    1409           2 :                 //
    1410           2 :                 //   Data block 1 - contains keys a@1, c@1
    1411           2 :                 //   Data block 2 - contains keys e@1, g@1
    1412           2 :                 //   Data block 3 - contains keys i@2, k@2
    1413           2 :                 //
    1414           2 :                 //   The virtual table lower bound is f. We have a range key masking filter
    1415           2 :                 //   that filters keys with @1 suffix. We are positioned inside block 3 then
    1416           2 :                 //   we Prev(). Block 2 is entirely filtered out, which makes us move to
    1417           2 :                 //   block 1. Now the range key masking filter gets an update (via
    1418           2 :                 //   SpanChanged) and it no longer filters out any keys. At this point if a
    1419           2 :                 //   Next happens, we will load block 2 but it would not be legal to return
    1420           2 :                 //   "e@1" which is outside the virtual bounds.
    1421           2 :                 //
    1422           2 :                 //   The core of the problem is that skipBackward doesn't know it can stop
    1423           2 :                 //   at block 2, because it doesn't know what keys are at the start of that
    1424           2 :                 //   block. This is why we don't have this problem in the opposite
    1425           2 :                 //   direction: skipForward will never go beyond the last relevant block
    1426           2 :                 //   because it looks at the separator key which is an upper bound for the
    1427           2 :                 //   block.
    1428           2 :                 //
    1429           2 :                 // Note that this is only a problem with virtual tables; we make no
    1430           2 :                 // guarantees wrt an iterator lower bound when we iterate forward. But we
    1431           2 :                 // must never return keys that are not inside the virtual table.
    1432           2 :                 if i.vState != nil && i.blockLower != nil {
    1433           2 :                         kv = PD(&i.data).SeekGE(i.lower, base.SeekGEFlagsNone)
    1434           2 :                 } else {
    1435           2 :                         kv = PD(&i.data).First()
    1436           2 :                 }
    1437           2 :                 if kv != nil {
    1438           2 :                         if i.blockUpper != nil {
    1439           2 :                                 cmp := i.cmp(kv.K.UserKey, i.blockUpper)
    1440           2 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1441           2 :                                         i.exhaustedBounds = +1
    1442           2 :                                         return nil
    1443           2 :                                 }
    1444             :                         }
    1445           2 :                         return i.maybeVerifyKey(kv)
    1446             :                 }
    1447             :         }
    1448           2 :         return nil
    1449             : }
    1450             : 
    1451           2 : func (i *singleLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
    1452           2 :         for {
    1453           2 :                 if !PI(&i.index).Prev() {
    1454           2 :                         PD(&i.data).Invalidate()
    1455           2 :                         break
    1456             :                 }
    1457           2 :                 result := i.loadBlock(-1)
    1458           2 :                 if result != loadBlockOK {
    1459           2 :                         if i.err != nil {
    1460           1 :                                 break
    1461             :                         }
    1462           2 :                         if result == loadBlockFailed {
    1463           0 :                                 // We checked that i.index was at a valid entry, so
    1464           0 :                                 // loadBlockFailed could not have happened due to to i.index
    1465           0 :                                 // being exhausted, and must be due to an error.
    1466           0 :                                 panic("loadBlock should not have failed with no error")
    1467             :                         }
    1468             :                         // result == loadBlockIrrelevant. Enforce the lower bound here
    1469             :                         // since don't want to bother moving to the previous block if lower
    1470             :                         // bound is already exceeded. Note that the previous block starts with
    1471             :                         // keys <= key.UserKey since even though this is the current block's
    1472             :                         // separator, the same user key can span multiple blocks.
    1473           2 :                         if i.lower != nil && i.cmp(PI(&i.index).Separator(), i.lower) < 0 {
    1474           2 :                                 i.exhaustedBounds = -1
    1475           2 :                                 return nil
    1476           2 :                         }
    1477           2 :                         continue
    1478             :                 }
    1479           2 :                 kv := PD(&i.data).Last()
    1480           2 :                 if kv == nil {
    1481           2 :                         // The block iter could have hid some obsolete points, so it isn't
    1482           2 :                         // safe to assume that there are no keys if we keep skipping backwards.
    1483           2 :                         // Check the previous block, but check the lower bound before doing
    1484           2 :                         // that.
    1485           2 :                         if i.lower != nil && i.cmp(PI(&i.index).Separator(), i.lower) < 0 {
    1486           1 :                                 i.exhaustedBounds = -1
    1487           1 :                                 return nil
    1488           1 :                         }
    1489           2 :                         continue
    1490             :                 }
    1491           2 :                 if i.blockLower != nil && i.cmp(kv.K.UserKey, i.blockLower) < 0 {
    1492           2 :                         i.exhaustedBounds = -1
    1493           2 :                         return nil
    1494           2 :                 }
    1495           2 :                 return i.maybeVerifyKey(kv)
    1496             :         }
    1497           2 :         return nil
    1498             : }
    1499             : 
    1500             : // Error implements internalIterator.Error, as documented in the pebble
    1501             : // package.
    1502           2 : func (i *singleLevelIterator[I, PI, D, PD]) Error() error {
    1503           2 :         if err := PD(&i.data).Error(); err != nil {
    1504           0 :                 return err
    1505           0 :         }
    1506           2 :         return i.err
    1507             : }
    1508             : 
    1509             : // SetCloseHook sets a function that will be called when the iterator is
    1510             : // closed.
    1511           2 : func (i *singleLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
    1512           2 :         i.closeHook = fn
    1513           2 : }
    1514             : 
    1515           2 : func firstError(err0, err1 error) error {
    1516           2 :         if err0 != nil {
    1517           1 :                 return err0
    1518           1 :         }
    1519           2 :         return err1
    1520             : }
    1521             : 
    1522             : // Close implements internalIterator.Close, as documented in the pebble
    1523             : // package.
    1524           2 : func (i *singleLevelIterator[I, PI, D, PD]) Close() error {
    1525           2 :         err := i.closeInternal()
    1526           2 :         pool := i.pool
    1527           2 :         *i = i.resetForReuse()
    1528           2 :         if pool != nil {
    1529           2 :                 pool.Put(i)
    1530           2 :         }
    1531           2 :         return err
    1532             : }
    1533             : 
    1534           2 : func (i *singleLevelIterator[I, PI, D, PD]) closeInternal() error {
    1535           2 :         if invariants.Enabled && i.inPool {
    1536           0 :                 panic("Close called on interator in pool")
    1537             :         }
    1538           2 :         i.iterStats.close()
    1539           2 :         var err error
    1540           2 :         if i.closeHook != nil {
    1541           2 :                 err = firstError(err, i.closeHook(i))
    1542           2 :         }
    1543           2 :         err = firstError(err, PD(&i.data).Close())
    1544           2 :         err = firstError(err, PI(&i.index).Close())
    1545           2 :         if i.indexFilterRH != nil {
    1546           2 :                 err = firstError(err, i.indexFilterRH.Close())
    1547           2 :                 i.indexFilterRH = nil
    1548           2 :         }
    1549           2 :         if i.dataRH != nil {
    1550           2 :                 err = firstError(err, i.dataRH.Close())
    1551           2 :                 i.dataRH = nil
    1552           2 :         }
    1553           2 :         err = firstError(err, i.err)
    1554           2 :         if i.bpfs != nil {
    1555           2 :                 releaseBlockPropertiesFilterer(i.bpfs)
    1556           2 :         }
    1557           2 :         if i.vbReader != nil {
    1558           2 :                 i.vbReader.close()
    1559           2 :         }
    1560           2 :         if i.vbRH != nil {
    1561           2 :                 err = firstError(err, i.vbRH.Close())
    1562           2 :                 i.vbRH = nil
    1563           2 :         }
    1564           2 :         return err
    1565             : }
    1566             : 
    1567           0 : func (i *singleLevelIterator[I, PI, D, PD]) String() string {
    1568           0 :         if i.vState != nil {
    1569           0 :                 return i.vState.fileNum.String()
    1570           0 :         }
    1571           0 :         return i.reader.cacheOpts.FileNum.String()
    1572             : }
    1573             : 
    1574             : // DebugTree is part of the InternalIterator interface.
    1575           0 : func (i *singleLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
    1576           0 :         tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
    1577           0 : }

Generated by: LCOV version 1.14