LCOV - code coverage report
Current view: top level - pebble/sstable - reader_iter_two_lvl.go (source / functions) Hit Total Coverage
Test: 2025-01-11 08:16Z ce9b648a - tests + meta.lcov Lines: 665 721 92.2 %
Date: 2025-01-11 08:17:30 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "context"
      10             :         "fmt"
      11             :         "sync"
      12             : 
      13             :         "github.com/cockroachdb/errors"
      14             :         "github.com/cockroachdb/pebble/internal/base"
      15             :         "github.com/cockroachdb/pebble/internal/invariants"
      16             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      17             :         "github.com/cockroachdb/pebble/objstorage"
      18             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      19             :         "github.com/cockroachdb/pebble/sstable/block"
      20             :         "github.com/cockroachdb/pebble/sstable/valblk"
      21             : )
      22             : 
      23             : type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterator[D]] struct {
      24             :         secondLevel   singleLevelIterator[I, PI, D, PD]
      25             :         topLevelIndex I
      26             :         // pool is the pool from which the iterator was allocated and to which the
      27             :         // iterator should be returned on Close. Because the iterator is
      28             :         // parameterized by the type of the data block iterator, pools must be
      29             :         // specific to the type of the data block iterator.
      30             :         pool *sync.Pool
      31             : 
      32             :         // useFilterBlock controls whether we consult the bloom filter in the
      33             :         // twoLevelIterator code. Note that secondLevel.useFilterBlock is always
      34             :         // false - any filtering happens at the top level.
      35             :         useFilterBlock         bool
      36             :         lastBloomFilterMatched bool
      37             : }
      38             : 
      39             : var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
      40             : 
      41             : // loadSecondLevelIndexBlock loads the index block at the current top level
      42             : // index position and leaves i.index unpositioned. If unsuccessful, it gets
      43             : // i.secondLevel.err to any error encountered, which may be nil if we have
      44             : // simply exhausted the entire table. This is used for two level indexes.
      45           2 : func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loadBlockResult {
      46           2 :         // Ensure the index data block iterators are invalidated even if loading of
      47           2 :         // the index fails.
      48           2 :         PD(&i.secondLevel.data).Invalidate()
      49           2 :         PI(&i.secondLevel.index).Invalidate()
      50           2 :         if !PI(&i.topLevelIndex).Valid() {
      51           0 :                 return loadBlockFailed
      52           0 :         }
      53           2 :         bhp, err := PI(&i.topLevelIndex).BlockHandleWithProperties()
      54           2 :         if err != nil {
      55           0 :                 i.secondLevel.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry (%v)", err)
      56           0 :                 return loadBlockFailed
      57           0 :         }
      58           2 :         if i.secondLevel.bpfs != nil {
      59           2 :                 intersects, err := i.secondLevel.bpfs.intersects(bhp.Props)
      60           2 :                 if err != nil {
      61           0 :                         i.secondLevel.err = errCorruptIndexEntry(err)
      62           0 :                         return loadBlockFailed
      63           0 :                 }
      64           2 :                 if intersects == blockMaybeExcluded {
      65           1 :                         intersects = i.resolveMaybeExcluded(dir)
      66           1 :                 }
      67           2 :                 if intersects == blockExcluded {
      68           2 :                         return loadBlockIrrelevant
      69           2 :                 }
      70             :                 // blockIntersects
      71             :         }
      72           2 :         indexBlock, err := i.secondLevel.reader.readIndexBlock(i.secondLevel.ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH, bhp.Handle)
      73           2 :         if err != nil {
      74           1 :                 i.secondLevel.err = err
      75           1 :                 return loadBlockFailed
      76           1 :         }
      77           2 :         err = PI(&i.secondLevel.index).InitHandle(i.secondLevel.reader.Comparer, indexBlock, i.secondLevel.transforms)
      78           2 :         if err != nil {
      79           0 :                 PI(&i.secondLevel.index).Invalidate()
      80           0 :                 i.secondLevel.err = err
      81           0 :                 return loadBlockFailed
      82           0 :         }
      83           2 :         return loadBlockOK
      84             : }
      85             : 
      86             : // resolveMaybeExcluded is invoked when the block-property filterer has found
      87             : // that an index block is excluded according to its properties but only if its
      88             : // bounds fall within the filter's current bounds. This function consults the
      89             : // appropriate bound, depending on the iteration direction, and returns either
      90             : // `blockIntersects` or `blockExcluded`.
      91           1 : func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
      92           1 :         // This iterator is configured with a bound-limited block property filter.
      93           1 :         // The bpf determined this entire index block could be excluded from
      94           1 :         // iteration based on the property encoded in the block handle. However, we
      95           1 :         // still need to determine if the index block is wholly contained within the
      96           1 :         // filter's key bounds.
      97           1 :         //
      98           1 :         // External guarantees ensure all its data blocks' keys are ≥ the filter's
      99           1 :         // lower bound during forward iteration, and that all its data blocks' keys
     100           1 :         // are < the filter's upper bound during backward iteration. We only need to
     101           1 :         // determine if the opposite bound is also met.
     102           1 :         //
     103           1 :         // The index separator in topLevelIndex.Separator() provides an inclusive
     104           1 :         // upper-bound for the index block's keys, guaranteeing that all its keys
     105           1 :         // are ≤ topLevelIndex.Separator(). For forward iteration, this is all we
     106           1 :         // need.
     107           1 :         if dir > 0 {
     108           1 :                 // Forward iteration.
     109           1 :                 if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(PI(&i.topLevelIndex).Separator()) {
     110           1 :                         return blockExcluded
     111           1 :                 }
     112           1 :                 return blockIntersects
     113             :         }
     114             : 
     115             :         // Reverse iteration.
     116             :         //
     117             :         // Because we're iterating in the reverse direction, we don't yet have
     118             :         // enough context available to determine if the block is wholly contained
     119             :         // within its bounds. This case arises only during backward iteration,
     120             :         // because of the way the index is structured.
     121             :         //
     122             :         // Consider a bound-limited bpf limited to the bounds [b,d), loading the
     123             :         // block with separator `c`. During reverse iteration, the guarantee that
     124             :         // all the block's keys are < `d` is externally provided, but no guarantee
     125             :         // is made on the bpf's lower bound. The separator `c` only provides an
     126             :         // inclusive upper bound on the block's keys, indicating that the
     127             :         // corresponding block handle points to a block containing only keys ≤ `c`.
     128             :         //
     129             :         // To establish a lower bound, we step the top-level index backwards to read
     130             :         // the previous block's separator, which provides an inclusive lower bound
     131             :         // on the original index block's keys. Afterwards, we step forward to
     132             :         // restore our top-level index position.
     133           1 :         if !PI(&i.topLevelIndex).Prev() {
     134           1 :                 // The original block points to the first index block of this table. If
     135           1 :                 // we knew the lower bound for the entire table, it could provide a
     136           1 :                 // lower bound, but the code refactoring necessary to read it doesn't
     137           1 :                 // seem worth the payoff. We fall through to loading the block.
     138           1 :         } else if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(PI(&i.topLevelIndex).Separator()) {
     139           1 :                 // The lower-bound on the original index block falls within the filter's
     140           1 :                 // bounds, and we can skip the block (after restoring our current
     141           1 :                 // top-level index position).
     142           1 :                 _ = PI(&i.topLevelIndex).Next()
     143           1 :                 return blockExcluded
     144           1 :         }
     145           1 :         _ = PI(&i.topLevelIndex).Next()
     146           1 :         return blockIntersects
     147             : }
     148             : 
     149             : // newColumnBlockTwoLevelIterator reads the top-level index block and creates and
     150             : // initializes a two-level iterator over an sstable with column-oriented data
     151             : // blocks.
     152             : //
     153             : // Note that lower, upper are iterator bounds and are separate from virtual
     154             : // sstable bounds. If the virtualState passed in is not nil, then virtual
     155             : // sstable bounds will be enforced.
     156             : func newColumnBlockTwoLevelIterator(
     157             :         ctx context.Context,
     158             :         r *Reader,
     159             :         v *virtualState,
     160             :         transforms IterTransforms,
     161             :         lower, upper []byte,
     162             :         filterer *BlockPropertiesFilterer,
     163             :         filterBlockSizeLimit FilterBlockSizeLimit,
     164             :         stats *base.InternalIteratorStats,
     165             :         statsAccum IterStatsAccumulator,
     166             :         rp valblk.ReaderProvider,
     167             :         bufferPool *block.BufferPool,
     168           2 : ) (*twoLevelIteratorColumnBlocks, error) {
     169           2 :         if r.err != nil {
     170           0 :                 return nil, r.err
     171           0 :         }
     172           2 :         if !r.tableFormat.BlockColumnar() {
     173           0 :                 panic(errors.AssertionFailedf("table format %d should not use columnar block format", r.tableFormat))
     174             :         }
     175           2 :         i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
     176           2 :         i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
     177           2 :                 false, // Disable the use of the filter block in the second level.
     178           2 :                 stats, statsAccum, bufferPool)
     179           2 :         var getLazyValuer block.GetLazyValueForPrefixAndValueHandler
     180           2 :         if r.Properties.NumValueBlocks > 0 {
     181           2 :                 // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     182           2 :                 // can outlive the singleLevelIterator due to be being embedded in a
     183           2 :                 // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     184           2 :                 // should only optimize this if it shows up as significant in end-to-end
     185           2 :                 // CockroachDB benchmarks, since it is tricky to do so. One possibility
     186           2 :                 // is that if many sstable iterators only get positioned at latest
     187           2 :                 // versions of keys, and therefore never expose a LazyValue that is
     188           2 :                 // separated to their callers, they can put this valueBlockReader into a
     189           2 :                 // sync.Pool.
     190           2 :                 i.secondLevel.vbReader = valblk.MakeReader(&i.secondLevel, rp, r.valueBIH, stats)
     191           2 :                 getLazyValuer = &i.secondLevel.vbReader
     192           2 :                 i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
     193           2 :         }
     194           2 :         i.secondLevel.data.InitOnce(r.keySchema, r.Comparer, getLazyValuer)
     195           2 :         i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
     196           2 :         topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH)
     197           2 :         if err == nil {
     198           2 :                 err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, transforms)
     199           2 :         }
     200           2 :         if err != nil {
     201           0 :                 _ = i.Close()
     202           0 :                 return nil, err
     203           0 :         }
     204           2 :         return i, nil
     205             : }
     206             : 
     207             : // newRowBlockTwoLevelIterator reads the top-level index block and creates and
     208             : // initializes a two-level iterator over an sstable with row-oriented data
     209             : // blocks.
     210             : //
     211             : // Note that lower, upper are iterator bounds and are separate from virtual
     212             : // sstable bounds. If the virtualState passed in is not nil, then virtual
     213             : // sstable bounds will be enforced.
     214             : func newRowBlockTwoLevelIterator(
     215             :         ctx context.Context,
     216             :         r *Reader,
     217             :         v *virtualState,
     218             :         transforms IterTransforms,
     219             :         lower, upper []byte,
     220             :         filterer *BlockPropertiesFilterer,
     221             :         filterBlockSizeLimit FilterBlockSizeLimit,
     222             :         stats *base.InternalIteratorStats,
     223             :         statsAccum IterStatsAccumulator,
     224             :         rp valblk.ReaderProvider,
     225             :         bufferPool *block.BufferPool,
     226           2 : ) (*twoLevelIteratorRowBlocks, error) {
     227           2 :         if r.err != nil {
     228           0 :                 return nil, r.err
     229           0 :         }
     230           2 :         if r.tableFormat.BlockColumnar() {
     231           0 :                 panic(errors.AssertionFailedf("table format %s uses block columnar format", r.tableFormat))
     232             :         }
     233           2 :         i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
     234           2 :         i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
     235           2 :                 false, // Disable the use of the filter block in the second level.
     236           2 :                 stats, statsAccum, bufferPool)
     237           2 :         if r.tableFormat >= TableFormatPebblev3 {
     238           2 :                 if r.Properties.NumValueBlocks > 0 {
     239           2 :                         // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     240           2 :                         // can outlive the singleLevelIterator due to be being embedded in a
     241           2 :                         // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     242           2 :                         // should only optimize this if it shows up as significant in end-to-end
     243           2 :                         // CockroachDB benchmarks, since it is tricky to do so. One possibility
     244           2 :                         // is that if many sstable iterators only get positioned at latest
     245           2 :                         // versions of keys, and therefore never expose a LazyValue that is
     246           2 :                         // separated to their callers, they can put this valueBlockReader into a
     247           2 :                         // sync.Pool.
     248           2 :                         i.secondLevel.vbReader = valblk.MakeReader(&i.secondLevel, rp, r.valueBIH, stats)
     249           2 :                         i.secondLevel.data.SetGetLazyValuer(&i.secondLevel.vbReader)
     250           2 :                         i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
     251           2 :                 }
     252           2 :                 i.secondLevel.data.SetHasValuePrefix(true)
     253             :         }
     254             : 
     255           2 :         i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
     256           2 : 
     257           2 :         topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH)
     258           2 :         if err == nil {
     259           2 :                 err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, transforms)
     260           2 :         }
     261           2 :         if err != nil {
     262           1 :                 _ = i.Close()
     263           1 :                 return nil, err
     264           1 :         }
     265           2 :         return i, nil
     266             : }
     267             : 
     268           0 : func (i *twoLevelIterator[I, PI, D, PD]) String() string {
     269           0 :         return i.secondLevel.String()
     270           0 : }
     271             : 
     272             : // DebugTree is part of the InternalIterator interface.
     273           0 : func (i *twoLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
     274           0 :         tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
     275           0 : }
     276             : 
     277             : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
     278             : // package. Note that SeekGE only checks the upper bound. It is up to the
     279             : // caller to ensure that key is greater than or equal to the lower bound.
     280             : func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
     281             :         key []byte, flags base.SeekGEFlags,
     282           2 : ) *base.InternalKV {
     283           2 :         if i.secondLevel.vState != nil {
     284           2 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     285           2 :                 // have to internally restrict the bounds.
     286           2 :                 //
     287           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     288           2 :                 // if necessary.
     289           2 :                 if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
     290           2 :                         key = i.secondLevel.lower
     291           2 :                 }
     292             :         }
     293             : 
     294           2 :         err := i.secondLevel.err
     295           2 :         i.secondLevel.err = nil // clear cached iteration error
     296           2 : 
     297           2 :         // The twoLevelIterator could be already exhausted. Utilize that when
     298           2 :         // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
     299           2 :         // bounds-exhausted near the top of the file.
     300           2 :         if flags.TrySeekUsingNext() &&
     301           2 :                 (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
     302           2 :                 err == nil {
     303           2 :                 // Already exhausted, so return nil.
     304           2 :                 return nil
     305           2 :         }
     306             : 
     307             :         // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
     308             :         // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
     309             : 
     310             :         // We fall into the slow path if i.index.IsDataInvalidated() even if the
     311             :         // top-level iterator is already positioned correctly and all other
     312             :         // conditions are met. An alternative structure could reuse topLevelIndex's
     313             :         // current position and reload the index block to which it points. Arguably,
     314             :         // an index block load is expensive and the index block may still be earlier
     315             :         // than the index block containing the sought key, resulting in a wasteful
     316             :         // block load.
     317             : 
     318           2 :         var dontSeekWithinSingleLevelIter bool
     319           2 :         if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
     320           2 :                 (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || PI(&i.topLevelIndex).SeparatorLT(key) {
     321           2 :                 // Slow-path: need to position the topLevelIndex.
     322           2 : 
     323           2 :                 // The previous exhausted state of singleLevelIterator is no longer
     324           2 :                 // relevant, since we may be moving to a different index block.
     325           2 :                 i.secondLevel.exhaustedBounds = 0
     326           2 :                 flags = flags.DisableTrySeekUsingNext()
     327           2 :                 if !PI(&i.topLevelIndex).SeekGE(key) {
     328           2 :                         PD(&i.secondLevel.data).Invalidate()
     329           2 :                         PI(&i.secondLevel.index).Invalidate()
     330           2 :                         return nil
     331           2 :                 }
     332             : 
     333           2 :                 result := i.loadSecondLevelIndexBlock(+1)
     334           2 :                 if result == loadBlockFailed {
     335           1 :                         i.secondLevel.boundsCmp = 0
     336           1 :                         return nil
     337           1 :                 }
     338           2 :                 if result == loadBlockIrrelevant {
     339           2 :                         // Enforce the upper bound here since don't want to bother moving to
     340           2 :                         // the next entry in the top level index if upper bound is already
     341           2 :                         // exceeded. Note that the next entry starts with keys >=
     342           2 :                         // topLevelIndex.Separator() since even though this is the block
     343           2 :                         // separator, the same user key can span multiple index blocks. If
     344           2 :                         // upper is exclusive we pass orEqual=true below, else we require
     345           2 :                         // the separator to be strictly greater than upper.
     346           2 :                         if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
     347           2 :                                 i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
     348           2 :                                 i.secondLevel.exhaustedBounds = +1
     349           2 :                         }
     350             :                         // Fall through to skipForward.
     351           2 :                         dontSeekWithinSingleLevelIter = true
     352           2 :                         // Clear boundsCmp.
     353           2 :                         //
     354           2 :                         // In the typical cases where dontSeekWithinSingleLevelIter=false,
     355           2 :                         // the singleLevelIterator.SeekGE call will clear boundsCmp.
     356           2 :                         // However, in this case where dontSeekWithinSingleLevelIter=true,
     357           2 :                         // we never seek on the single-level iterator. This call will fall
     358           2 :                         // through to skipForward, which may improperly leave boundsCmp=+1
     359           2 :                         // unless we clear it here.
     360           2 :                         i.secondLevel.boundsCmp = 0
     361             :                 }
     362           2 :         } else {
     363           2 :                 // INVARIANT: err == nil.
     364           2 :                 //
     365           2 :                 // Else fast-path: There are two possible cases, from
     366           2 :                 // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
     367           2 :                 //
     368           2 :                 // 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
     369           2 :                 // respecting the lower bound (guaranteed by Iterator). We know that the
     370           2 :                 // iterator must already be positioned within or just outside the previous
     371           2 :                 // bounds. Therefore, the topLevelIndex iter cannot be positioned at an
     372           2 :                 // entry ahead of the seek position (though it can be positioned behind).
     373           2 :                 // The !i.cmp(key, i.topLevelIndex.Separator) > 0 confirms that it is
     374           2 :                 // not behind. Since it is not ahead and not behind it must be at the
     375           2 :                 // right position.
     376           2 :                 //
     377           2 :                 // 2) This SeekGE will land on a key that is greater than the key we are
     378           2 :                 // currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
     379           2 :                 // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
     380           2 :                 // index block. No need to reset the state of singleLevelIterator.
     381           2 :                 //
     382           2 :                 // Note that cases 1 and 2 never overlap, and one of them must be true.
     383           2 :                 // This invariant checking is important enough that we do not gate it
     384           2 :                 // behind invariants.Enabled.
     385           2 :                 if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
     386           0 :                         panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
     387           0 :                                 i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
     388             :                 }
     389             : 
     390           2 :                 if !flags.TrySeekUsingNext() {
     391           2 :                         // Case 1. Bounds have changed so the previous exhausted bounds state is
     392           2 :                         // irrelevant.
     393           2 :                         // WARNING-data-exhausted: this is safe to do only because the monotonic
     394           2 :                         // bounds optimizations only work when !data-exhausted. If they also
     395           2 :                         // worked with data-exhausted, we have made it unclear whether
     396           2 :                         // data-exhausted is actually true. See the comment at the top of the
     397           2 :                         // file.
     398           2 :                         i.secondLevel.exhaustedBounds = 0
     399           2 :                 }
     400             :                 // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
     401             :                 // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
     402             :                 // bug https://github.com/cockroachdb/pebble/issues/2036.
     403             :         }
     404             : 
     405           2 :         if !dontSeekWithinSingleLevelIter {
     406           2 :                 // Note that while trySeekUsingNext could be false here, singleLevelIterator
     407           2 :                 // could do its own boundsCmp-based optimization to seek using next.
     408           2 :                 if ikv := i.secondLevel.SeekGE(key, flags); ikv != nil {
     409           2 :                         return ikv
     410           2 :                 }
     411             :         }
     412           2 :         return i.skipForward()
     413             : }
     414             : 
     415             : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
     416             : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
     417             : // to the caller to ensure that key is greater than or equal to the lower bound.
     418             : func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
     419             :         prefix, key []byte, flags base.SeekGEFlags,
     420           2 : ) *base.InternalKV {
     421           2 :         if i.secondLevel.vState != nil {
     422           2 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     423           2 :                 // have to internally restrict the bounds.
     424           2 :                 //
     425           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     426           2 :                 // if necessary.
     427           2 :                 if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
     428           2 :                         key = i.secondLevel.lower
     429           2 :                 }
     430             :         }
     431             : 
     432             :         // NOTE: prefix is only used for bloom filter checking and not later work in
     433             :         // this method. Hence, we can use the existing iterator position if the last
     434             :         // SeekPrefixGE did not fail bloom filter matching.
     435             : 
     436           2 :         err := i.secondLevel.err
     437           2 :         i.secondLevel.err = nil // clear cached iteration error
     438           2 : 
     439           2 :         // The twoLevelIterator could be already exhausted. Utilize that when
     440           2 :         // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
     441           2 :         // bounds-exhausted near the top of the file.
     442           2 :         filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
     443           2 :         if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
     444           2 :                 (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
     445           2 :                 err == nil {
     446           1 :                 // Already exhausted, so return nil.
     447           1 :                 return nil
     448           1 :         }
     449             : 
     450             :         // Check prefix bloom filter.
     451           2 :         if i.useFilterBlock {
     452           2 :                 if !i.lastBloomFilterMatched {
     453           2 :                         // Iterator is not positioned based on last seek.
     454           2 :                         flags = flags.DisableTrySeekUsingNext()
     455           2 :                 }
     456           2 :                 i.lastBloomFilterMatched = false
     457           2 :                 var mayContain bool
     458           2 :                 mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
     459           2 :                 if i.secondLevel.err != nil || !mayContain {
     460           2 :                         // In the i.secondLevel.err == nil case, this invalidation may not be necessary for
     461           2 :                         // correctness, and may be a place to optimize later by reusing the
     462           2 :                         // already loaded block. It was necessary in earlier versions of the code
     463           2 :                         // since the caller was allowed to call Next when SeekPrefixGE returned
     464           2 :                         // nil. This is no longer allowed.
     465           2 :                         PD(&i.secondLevel.data).Invalidate()
     466           2 :                         return nil
     467           2 :                 }
     468           2 :                 i.lastBloomFilterMatched = true
     469             :         }
     470             : 
     471             :         // Bloom filter matches.
     472             : 
     473             :         // SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
     474             :         // enabled by trySeekUsingNext, or by monotonically increasing bounds
     475             :         // (i.boundsCmp).
     476             : 
     477             :         // We fall into the slow path if i.index.IsDataInvalidated() even if the
     478             :         // top-level iterator is already positioned correctly and all other
     479             :         // conditions are met. An alternative structure could reuse topLevelIndex's
     480             :         // current position and reload the index block to which it points. Arguably,
     481             :         // an index block load is expensive and the index block may still be earlier
     482             :         // than the index block containing the sought key, resulting in a wasteful
     483             :         // block load.
     484             : 
     485           2 :         var dontSeekWithinSingleLevelIter bool
     486           2 :         if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
     487           2 :                 (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || PI(&i.topLevelIndex).SeparatorLT(key) {
     488           2 :                 // Slow-path: need to position the topLevelIndex.
     489           2 : 
     490           2 :                 // The previous exhausted state of singleLevelIterator is no longer
     491           2 :                 // relevant, since we may be moving to a different index block.
     492           2 :                 i.secondLevel.exhaustedBounds = 0
     493           2 :                 flags = flags.DisableTrySeekUsingNext()
     494           2 :                 if !PI(&i.topLevelIndex).SeekGE(key) {
     495           2 :                         PD(&i.secondLevel.data).Invalidate()
     496           2 :                         PI(&i.secondLevel.index).Invalidate()
     497           2 :                         return nil
     498           2 :                 }
     499             : 
     500           2 :                 result := i.loadSecondLevelIndexBlock(+1)
     501           2 :                 if result == loadBlockFailed {
     502           1 :                         i.secondLevel.boundsCmp = 0
     503           1 :                         return nil
     504           1 :                 }
     505           2 :                 if result == loadBlockIrrelevant {
     506           2 :                         // Enforce the upper bound here since don't want to bother moving to
     507           2 :                         // the next entry in the top level index if upper bound is already
     508           2 :                         // exceeded. Note that the next entry starts with keys >=
     509           2 :                         // topLevelIndex.Separator() since even though this is the block
     510           2 :                         // separator, the same user key can span multiple index blocks. If
     511           2 :                         // upper is exclusive we pass orEqual=true below, else we require
     512           2 :                         // the separator to be strictly greater than upper.
     513           2 :                         if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
     514           2 :                                 i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
     515           2 :                                 i.secondLevel.exhaustedBounds = +1
     516           2 :                         }
     517             :                         // Fall through to skipForward.
     518           2 :                         dontSeekWithinSingleLevelIter = true
     519           2 :                         // Clear boundsCmp.
     520           2 :                         //
     521           2 :                         // In the typical cases where dontSeekWithinSingleLevelIter=false,
     522           2 :                         // the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
     523           2 :                         // However, in this case where dontSeekWithinSingleLevelIter=true,
     524           2 :                         // we never seek on the single-level iterator. This call will fall
     525           2 :                         // through to skipForward, which may improperly leave boundsCmp=+1
     526           2 :                         // unless we clear it here.
     527           2 :                         i.secondLevel.boundsCmp = 0
     528             :                 }
     529           2 :         } else {
     530           2 :                 // INVARIANT: err == nil.
     531           2 :                 //
     532           2 :                 // Else fast-path: There are two possible cases, from
     533           2 :                 // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
     534           2 :                 //
     535           2 :                 // 1) The bounds have moved forward (i.boundsCmp > 0) and this
     536           2 :                 // SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
     537           2 :                 // know that the iterator must already be positioned within or just
     538           2 :                 // outside the previous bounds. Therefore, the topLevelIndex iter cannot
     539           2 :                 // be positioned at an entry ahead of the seek position (though it can be
     540           2 :                 // positioned behind). The !i.cmp(key, i.topLevelIndex.Separator()) > 0
     541           2 :                 // confirms that it is not behind. Since it is not ahead and not behind it
     542           2 :                 // must be at the right position.
     543           2 :                 //
     544           2 :                 // 2) This SeekPrefixGE will land on a key that is greater than the key we
     545           2 :                 // are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
     546           2 :                 // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
     547           2 :                 // index block. No need to reset the state of singleLevelIterator.
     548           2 :                 //
     549           2 :                 // Note that cases 1 and 2 never overlap, and one of them must be true.
     550           2 :                 // This invariant checking is important enough that we do not gate it
     551           2 :                 // behind invariants.Enabled.
     552           2 :                 if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
     553           0 :                         panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
     554           0 :                                 i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
     555             :                 }
     556             : 
     557           2 :                 if !flags.TrySeekUsingNext() {
     558           1 :                         // Case 1. Bounds have changed so the previous exhausted bounds state is
     559           1 :                         // irrelevant.
     560           1 :                         // WARNING-data-exhausted: this is safe to do only because the monotonic
     561           1 :                         // bounds optimizations only work when !data-exhausted. If they also
     562           1 :                         // worked with data-exhausted, we have made it unclear whether
     563           1 :                         // data-exhausted is actually true. See the comment at the top of the
     564           1 :                         // file.
     565           1 :                         i.secondLevel.exhaustedBounds = 0
     566           1 :                 }
     567             :                 // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
     568             :                 // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
     569             :                 // bug https://github.com/cockroachdb/pebble/issues/2036.
     570             :         }
     571             : 
     572           2 :         if !dontSeekWithinSingleLevelIter {
     573           2 :                 if ikv := i.secondLevel.seekPrefixGE(prefix, key, flags); ikv != nil {
     574           2 :                         return ikv
     575           2 :                 }
     576             :         }
     577             :         // NB: skipForward checks whether exhaustedBounds is already +1.
     578           2 :         return i.skipForward()
     579             : }
     580             : 
     581             : // virtualLast should only be called if i.vReader != nil.
     582           2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
     583           2 :         if i.secondLevel.vState == nil {
     584           0 :                 panic("pebble: invalid call to virtualLast")
     585             :         }
     586           2 :         if !i.secondLevel.endKeyInclusive {
     587           2 :                 // Trivial case.
     588           2 :                 return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
     589           2 :         }
     590           2 :         return i.virtualLastSeekLE()
     591             : }
     592             : 
     593             : // virtualLastSeekLE implements a SeekLE() that can be used as part
     594             : // of reverse-iteration calls such as a Last() on a virtual sstable. Does a
     595             : // SeekLE on the upper bound of the file/iterator.
     596           2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
     597           2 :         // Callers of SeekLE don't know about virtual sstable bounds, so we may
     598           2 :         // have to internally restrict the bounds.
     599           2 :         //
     600           2 :         // TODO(bananabrick): We can optimize this check away for the level iter
     601           2 :         // if necessary.
     602           2 :         if !i.secondLevel.endKeyInclusive {
     603           0 :                 panic("unexpected virtualLastSeekLE with exclusive upper bounds")
     604             :         }
     605           2 :         key := i.secondLevel.upper
     606           2 :         // Need to position the topLevelIndex.
     607           2 :         //
     608           2 :         // The previous exhausted state of singleLevelIterator is no longer
     609           2 :         // relevant, since we may be moving to a different index block.
     610           2 :         i.secondLevel.exhaustedBounds = 0
     611           2 :         // Seek optimization only applies until iterator is first positioned with a
     612           2 :         // SeekGE or SeekLT after SetBounds.
     613           2 :         i.secondLevel.boundsCmp = 0
     614           2 :         topLevelOk := PI(&i.topLevelIndex).SeekGE(key)
     615           2 :         // We can have multiple internal keys with the same user key as the seek
     616           2 :         // key. In that case, we want the last (greatest) internal key.
     617           2 :         for topLevelOk && bytes.Equal(PI(&i.topLevelIndex).Separator(), key) {
     618           2 :                 topLevelOk = PI(&i.topLevelIndex).Next()
     619           2 :         }
     620           2 :         if !topLevelOk {
     621           2 :                 return i.skipBackward()
     622           2 :         }
     623           2 :         result := i.loadSecondLevelIndexBlock(-1)
     624           2 :         if result == loadBlockFailed {
     625           0 :                 i.secondLevel.boundsCmp = 0
     626           0 :                 return nil
     627           0 :         }
     628           2 :         if result == loadBlockIrrelevant {
     629           1 :                 // Load the previous block.
     630           1 :                 return i.skipBackward()
     631           1 :         }
     632           2 :         if ikv := i.secondLevel.virtualLastSeekLE(); ikv != nil {
     633           2 :                 return ikv
     634           2 :         }
     635           2 :         return i.skipBackward()
     636             : }
     637             : 
     638             : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
     639             : // package. Note that SeekLT only checks the lower bound. It is up to the
     640             : // caller to ensure that key is less than the upper bound.
     641             : func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
     642             :         key []byte, flags base.SeekLTFlags,
     643           2 : ) *base.InternalKV {
     644           2 :         if i.secondLevel.vState != nil {
     645           2 :                 // Might have to fix upper bound since virtual sstable bounds are not
     646           2 :                 // known to callers of SeekLT.
     647           2 :                 //
     648           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     649           2 :                 // if necessary.
     650           2 :                 cmp := i.secondLevel.cmp(key, i.secondLevel.upper)
     651           2 :                 // key == i.secondLevel.upper is fine. We'll do the right thing and return the
     652           2 :                 // first internal key with user key < key.
     653           2 :                 if cmp > 0 {
     654           2 :                         return i.virtualLast()
     655           2 :                 }
     656             :         }
     657             : 
     658           2 :         i.secondLevel.exhaustedBounds = 0
     659           2 :         i.secondLevel.err = nil // clear cached iteration error
     660           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     661           2 :         i.secondLevel.boundsCmp = 0
     662           2 : 
     663           2 :         var result loadBlockResult
     664           2 :         // NB: Unlike SeekGE, we don't have a fast-path here since we don't know
     665           2 :         // whether the topLevelIndex is positioned after the position that would
     666           2 :         // be returned by doing i.topLevelIndex.SeekGE(). To know this we would
     667           2 :         // need to know the index key preceding the current one.
     668           2 :         // NB: If a bound-limited block property filter is configured, it's
     669           2 :         // externally ensured that the filter is disabled (through returning
     670           2 :         // Intersects=false irrespective of the block props provided) during seeks.
     671           2 :         if !PI(&i.topLevelIndex).SeekGE(key) {
     672           2 :                 if !PI(&i.topLevelIndex).Last() {
     673           0 :                         PD(&i.secondLevel.data).Invalidate()
     674           0 :                         PI(&i.secondLevel.index).Invalidate()
     675           0 :                         return nil
     676           0 :                 }
     677             : 
     678           2 :                 result = i.loadSecondLevelIndexBlock(-1)
     679           2 :                 if result == loadBlockFailed {
     680           1 :                         return nil
     681           1 :                 }
     682           2 :                 if result == loadBlockOK {
     683           2 :                         if ikv := i.secondLevel.lastInternal(); ikv != nil {
     684           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     685           2 :                         }
     686             :                         // Fall through to skipBackward since the singleLevelIterator did
     687             :                         // not have any blocks that satisfy the block interval
     688             :                         // constraints, or the lower bound was reached.
     689             :                 }
     690             :                 // Else loadBlockIrrelevant, so fall through.
     691           2 :         } else {
     692           2 :                 result = i.loadSecondLevelIndexBlock(-1)
     693           2 :                 if result == loadBlockFailed {
     694           1 :                         return nil
     695           1 :                 }
     696           2 :                 if result == loadBlockOK {
     697           2 :                         if ikv := i.secondLevel.SeekLT(key, flags); ikv != nil {
     698           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     699           2 :                         }
     700             :                         // Fall through to skipBackward since the singleLevelIterator did
     701             :                         // not have any blocks that satisfy the block interval
     702             :                         // constraint, or the lower bound was reached.
     703             :                 }
     704             :                 // Else loadBlockIrrelevant, so fall through.
     705             :         }
     706           2 :         if result == loadBlockIrrelevant {
     707           2 :                 // Enforce the lower bound here since don't want to bother moving to
     708           2 :                 // the previous entry in the top level index if lower bound is already
     709           2 :                 // exceeded. Note that the previous entry starts with keys <=
     710           2 :                 // ikey.InternalKey.UserKey since even though this is the current block's
     711           2 :                 // separator, the same user key can span multiple index blocks.
     712           2 :                 if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
     713           1 :                         i.secondLevel.exhaustedBounds = -1
     714           1 :                 }
     715             :         }
     716             :         // NB: skipBackward checks whether exhaustedBounds is already -1.
     717           2 :         return i.skipBackward()
     718             : }
     719             : 
     720             : // First implements internalIterator.First, as documented in the pebble
     721             : // package. Note that First only checks the upper bound. It is up to the caller
     722             : // to ensure that key is greater than or equal to the lower bound (e.g. via a
     723             : // call to SeekGE(lower)).
     724           2 : func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
     725           2 :         // If we have a lower bound, use SeekGE. Note that in general this is not
     726           2 :         // supported usage, except when the lower bound is there because the table is
     727           2 :         // virtual.
     728           2 :         if i.secondLevel.lower != nil {
     729           2 :                 return i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
     730           2 :         }
     731           2 :         i.secondLevel.exhaustedBounds = 0
     732           2 :         i.secondLevel.err = nil // clear cached iteration error
     733           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     734           2 :         i.secondLevel.boundsCmp = 0
     735           2 : 
     736           2 :         if !PI(&i.topLevelIndex).First() {
     737           0 :                 return nil
     738           0 :         }
     739           2 :         result := i.loadSecondLevelIndexBlock(+1)
     740           2 :         if result == loadBlockFailed {
     741           1 :                 return nil
     742           1 :         }
     743           2 :         if result == loadBlockOK {
     744           2 :                 if ikv := i.secondLevel.First(); ikv != nil {
     745           2 :                         return ikv
     746           2 :                 }
     747             :                 // Else fall through to skipForward.
     748           2 :         } else {
     749           2 :                 // result == loadBlockIrrelevant. Enforce the upper bound here since
     750           2 :                 // don't want to bother moving to the next entry in the top level index
     751           2 :                 // if upper bound is already exceeded. Note that the next entry starts
     752           2 :                 // with keys >= topLevelIndex.Separator() since even though this is the
     753           2 :                 // block separator, the same user key can span multiple index blocks.
     754           2 :                 // If upper is exclusive we pass orEqual=true below, else we require the
     755           2 :                 // separator to be strictly greater than upper.
     756           2 :                 if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
     757           2 :                         i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
     758           1 :                         i.secondLevel.exhaustedBounds = +1
     759           1 :                 }
     760             :         }
     761             :         // NB: skipForward checks whether exhaustedBounds is already +1.
     762           2 :         return i.skipForward()
     763             : }
     764             : 
     765             : // Last implements internalIterator.Last, as documented in the pebble
     766             : // package. Note that Last only checks the lower bound. It is up to the caller
     767             : // to ensure that key is less than the upper bound (e.g. via a call to
     768             : // SeekLT(upper))
     769           2 : func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
     770           2 :         if i.secondLevel.vState != nil {
     771           2 :                 if i.secondLevel.endKeyInclusive {
     772           2 :                         return i.virtualLast()
     773           2 :                 }
     774           2 :                 return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
     775             :         }
     776             : 
     777           2 :         if i.secondLevel.upper != nil {
     778           0 :                 panic("twoLevelIterator.Last() used despite upper bound")
     779             :         }
     780           2 :         i.secondLevel.exhaustedBounds = 0
     781           2 :         i.secondLevel.err = nil // clear cached iteration error
     782           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     783           2 :         i.secondLevel.boundsCmp = 0
     784           2 : 
     785           2 :         if !PI(&i.topLevelIndex).Last() {
     786           0 :                 return nil
     787           0 :         }
     788           2 :         result := i.loadSecondLevelIndexBlock(-1)
     789           2 :         if result == loadBlockFailed {
     790           1 :                 return nil
     791           1 :         }
     792           2 :         if result == loadBlockOK {
     793           2 :                 if ikv := i.secondLevel.Last(); ikv != nil {
     794           2 :                         return ikv
     795           2 :                 }
     796             :                 // Else fall through to skipBackward.
     797           2 :         } else {
     798           2 :                 // result == loadBlockIrrelevant. Enforce the lower bound here since
     799           2 :                 // don't want to bother moving to the previous entry in the top level
     800           2 :                 // index if lower bound is already exceeded. Note that the previous
     801           2 :                 // entry starts with keys <= ikv.InternalKey.UserKey since even though
     802           2 :                 // this is the current block's separator, the same user key can span
     803           2 :                 // multiple index blocks.
     804           2 :                 if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
     805           1 :                         i.secondLevel.exhaustedBounds = -1
     806           1 :                 }
     807             :         }
     808             :         // NB: skipBackward checks whether exhaustedBounds is already -1.
     809           2 :         return i.skipBackward()
     810             : }
     811             : 
     812             : // Next implements internalIterator.Next, as documented in the pebble
     813             : // package.
     814             : // Note: twoLevelCompactionIterator.Next mirrors the implementation of
     815             : // twoLevelIterator.Next due to performance. Keep the two in sync.
     816           2 : func (i *twoLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
     817           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     818           2 :         i.secondLevel.boundsCmp = 0
     819           2 :         if i.secondLevel.err != nil {
     820           1 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
     821           1 :                 // encountered, the iterator must be re-seeked.
     822           1 :                 return nil
     823           1 :         }
     824           2 :         if ikv := i.secondLevel.Next(); ikv != nil {
     825           2 :                 return ikv
     826           2 :         }
     827           2 :         return i.skipForward()
     828             : }
     829             : 
     830             : // NextPrefix implements (base.InternalIterator).NextPrefix.
     831           2 : func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
     832           2 :         if i.secondLevel.exhaustedBounds == +1 {
     833           0 :                 panic("Next called even though exhausted upper bound")
     834             :         }
     835             :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     836           2 :         i.secondLevel.boundsCmp = 0
     837           2 :         if i.secondLevel.err != nil {
     838           0 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
     839           0 :                 // encountered, the iterator must be re-seeked.
     840           0 :                 return nil
     841           0 :         }
     842           2 :         if ikv := i.secondLevel.NextPrefix(succKey); ikv != nil {
     843           2 :                 return ikv
     844           2 :         }
     845             :         // ikv == nil
     846           2 :         if i.secondLevel.err != nil {
     847           1 :                 return nil
     848           1 :         }
     849             : 
     850             :         // Did not find prefix in the existing second-level index block. This is the
     851             :         // slow-path where we seek the iterator.
     852           2 :         if !PI(&i.topLevelIndex).SeekGE(succKey) {
     853           1 :                 PD(&i.secondLevel.data).Invalidate()
     854           1 :                 PI(&i.secondLevel.index).Invalidate()
     855           1 :                 return nil
     856           1 :         }
     857           2 :         result := i.loadSecondLevelIndexBlock(+1)
     858           2 :         if result == loadBlockFailed {
     859           1 :                 return nil
     860           1 :         }
     861           2 :         if result == loadBlockIrrelevant {
     862           2 :                 // Enforce the upper bound here since don't want to bother moving to the
     863           2 :                 // next entry in the top level index if upper bound is already exceeded.
     864           2 :                 // Note that the next entry starts with keys >=
     865           2 :                 // topLevelIndex.Separator() since even though this is the block
     866           2 :                 // separator, the same user key can span multiple index blocks. If upper
     867           2 :                 // is exclusive we pass orEqual=true below, else we require the
     868           2 :                 // separator to be strictly greater than upper.
     869           2 :                 if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
     870           2 :                         i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
     871           0 :                         i.secondLevel.exhaustedBounds = +1
     872           0 :                 }
     873           2 :         } else if kv := i.secondLevel.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
     874           2 :                 return i.secondLevel.maybeVerifyKey(kv)
     875           2 :         }
     876           2 :         return i.skipForward()
     877             : }
     878             : 
     879             : // Prev implements internalIterator.Prev, as documented in the pebble
     880             : // package.
     881           2 : func (i *twoLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
     882           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     883           2 :         i.secondLevel.boundsCmp = 0
     884           2 :         if i.secondLevel.err != nil {
     885           1 :                 return nil
     886           1 :         }
     887           2 :         if kv := i.secondLevel.Prev(); kv != nil {
     888           2 :                 return kv
     889           2 :         }
     890           2 :         return i.skipBackward()
     891             : }
     892             : 
     893           2 : func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
     894           2 :         for {
     895           2 :                 if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds > 0 {
     896           2 :                         return nil
     897           2 :                 }
     898             : 
     899             :                 // It is possible that skipBackward went too far and the virtual table lower
     900             :                 // bound is after the first key in the block we are about to load, in which
     901             :                 // case we must use SeekGE below. The keys in the block we are about to load
     902             :                 // start right after the topLevelIndex key (before we Next).
     903             :                 //
     904             :                 // An example of how this can happen:
     905             :                 //
     906             :                 //   Second-level index block 1 - contains keys a@1, c@1
     907             :                 //   Second-level index block 2 - contains keys e@1, g@1
     908             :                 //   Second-level index block 3 - contains keys i@2, k@2
     909             :                 //
     910             :                 //   The virtual table lower bound is f. We have a range key masking filter
     911             :                 //   that filters keys with @1 suffix. We are positioned inside block 3 then
     912             :                 //   we Prev(). Block 2 is entirely filtered out, which makes us move to
     913             :                 //   block 1. Now the range key masking filter gets an update (via
     914             :                 //   SpanChanged) and it no longer filters out any keys. At this point if a
     915             :                 //   Next happens, we will load block 2 but it would not be legal to return
     916             :                 //   "e@1" which is outside the virtual bounds.
     917             :                 //
     918             :                 //   The core of the problem is that skipBackward doesn't know it can stop
     919             :                 //   at block 2, because it doesn't know what keys are at the start of that
     920             :                 //   block. This is why we don't have this problem in the opposite
     921             :                 //   direction: skipForward will never go beyond the last relevant block
     922             :                 //   because it looks at the separator key which is an upper bound for the
     923             :                 //   block.
     924             :                 //
     925             :                 // Note that this is only a problem with virtual tables; we make no
     926             :                 // guarantees wrt an iterator lower bound when we iterate forward. But we
     927             :                 // must never return keys that are not inside the virtual table.
     928           2 :                 useSeek := i.secondLevel.vState != nil && (!PI(&i.topLevelIndex).Valid() ||
     929           2 :                         PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.vState.lower.UserKey))
     930           2 : 
     931           2 :                 i.secondLevel.exhaustedBounds = 0
     932           2 :                 if !PI(&i.topLevelIndex).Next() {
     933           2 :                         PD(&i.secondLevel.data).Invalidate()
     934           2 :                         PI(&i.secondLevel.index).Invalidate()
     935           2 :                         return nil
     936           2 :                 }
     937           2 :                 result := i.loadSecondLevelIndexBlock(+1)
     938           2 :                 if result == loadBlockFailed {
     939           1 :                         return nil
     940           1 :                 }
     941           2 :                 if result == loadBlockOK {
     942           2 :                         var ikv *base.InternalKV
     943           2 :                         if useSeek {
     944           2 :                                 ikv = i.secondLevel.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
     945           2 :                         } else {
     946           2 :                                 ikv = i.secondLevel.firstInternal()
     947           2 :                         }
     948           2 :                         if ikv != nil {
     949           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     950           2 :                         }
     951             :                         // Next iteration will return if singleLevelIterator set
     952             :                         // exhaustedBounds = +1.
     953           2 :                 } else {
     954           2 :                         // result == loadBlockIrrelevant. Enforce the upper bound here since
     955           2 :                         // don't want to bother moving to the next entry in the top level
     956           2 :                         // index if upper bound is already exceeded. Note that the next
     957           2 :                         // entry starts with keys >= i.topLevelIndex.Separator() since even
     958           2 :                         // though this is the block separator, the same user key can span
     959           2 :                         // multiple index blocks. If upper is exclusive we pass orEqual=true
     960           2 :                         // below, else we require the separator to be strictly greater than
     961           2 :                         // upper.
     962           2 :                         if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
     963           2 :                                 i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
     964           2 :                                 i.secondLevel.exhaustedBounds = +1
     965           2 :                                 // Next iteration will return.
     966           2 :                         }
     967             :                 }
     968             :         }
     969             : }
     970             : 
     971           2 : func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
     972           2 :         for {
     973           2 :                 if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
     974           2 :                         return nil
     975           2 :                 }
     976           2 :                 i.secondLevel.exhaustedBounds = 0
     977           2 :                 if !PI(&i.topLevelIndex).Prev() {
     978           2 :                         PD(&i.secondLevel.data).Invalidate()
     979           2 :                         PI(&i.secondLevel.index).Invalidate()
     980           2 :                         return nil
     981           2 :                 }
     982           2 :                 result := i.loadSecondLevelIndexBlock(-1)
     983           2 :                 if result == loadBlockFailed {
     984           1 :                         return nil
     985           1 :                 }
     986           2 :                 if result == loadBlockOK {
     987           2 :                         ikv := i.secondLevel.lastInternal()
     988           2 :                         if ikv != nil {
     989           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     990           2 :                         }
     991             : 
     992             :                         // Next iteration will return if singleLevelIterator set
     993             :                         // exhaustedBounds = -1.
     994           2 :                 } else {
     995           2 :                         // result == loadBlockIrrelevant. Enforce the lower bound here since
     996           2 :                         // don't want to bother moving to the previous entry in the top
     997           2 :                         // level index if lower bound is already exceeded. Note that the
     998           2 :                         // previous entry starts with keys <= i.topLevelIndex.Separator() since
     999           2 :                         // even though this is the current block's separator, the same user
    1000           2 :                         // key can span multiple index blocks.
    1001           2 :                         if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
    1002           2 :                                 i.secondLevel.exhaustedBounds = -1
    1003           2 :                                 // Next iteration will return.
    1004           2 :                         }
    1005             :                 }
    1006             :         }
    1007             : }
    1008             : 
    1009           2 : func (i *twoLevelIterator[I, PI, D, PD]) Error() error {
    1010           2 :         return i.secondLevel.Error()
    1011           2 : }
    1012             : 
    1013           2 : func (i *twoLevelIterator[I, PI, D, PD]) SetBounds(lower, upper []byte) {
    1014           2 :         i.secondLevel.SetBounds(lower, upper)
    1015           2 : }
    1016             : 
    1017           0 : func (i *twoLevelIterator[I, PI, D, PD]) SetContext(ctx context.Context) {
    1018           0 :         i.secondLevel.SetContext(ctx)
    1019           0 : }
    1020             : 
    1021           2 : func (i *twoLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
    1022           2 :         i.secondLevel.SetCloseHook(fn)
    1023           2 : }
    1024             : 
    1025           2 : func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
    1026           2 :         i.secondLevel.SetupForCompaction()
    1027           2 : }
    1028             : 
    1029             : // Close implements internalIterator.Close, as documented in the pebble
    1030             : // package.
    1031           2 : func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
    1032           2 :         if invariants.Enabled && i.secondLevel.pool != nil {
    1033           0 :                 panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
    1034             :         }
    1035           2 :         pool := i.pool
    1036           2 :         err := i.secondLevel.closeInternal()
    1037           2 :         i.secondLevel.resetForReuse()
    1038           2 :         err = firstError(err, PI(&i.topLevelIndex).Close())
    1039           2 :         i.useFilterBlock = false
    1040           2 :         i.lastBloomFilterMatched = false
    1041           2 :         if pool != nil {
    1042           2 :                 pool.Put(i)
    1043           2 :         }
    1044           2 :         return err
    1045             : }

Generated by: LCOV version 1.14