LCOV - 2024-08-28 08:16Z 34e929e1 - tests only.lcov - pebble/sstable/reader_iter_two

LCOV - code coverage report

Current view:	top level - pebble/sstable - reader_iter_two_lvl.go (source / functions)		Hit	Total	Coverage
Test:	2024-08-28 08:16Z 34e929e1 - tests only.lcov	Lines:	583	689	84.6 %
Date:	2024-08-28 08:16:42	Functions:	0	0	-

          Line data    Source code

       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "context"
      10             :         "fmt"
      11             :         "sync"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/base"
      14             :         "github.com/cockroachdb/pebble/internal/invariants"
      15             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      16             :         "github.com/cockroachdb/pebble/objstorage"
      17             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      18             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
      19             :         "github.com/cockroachdb/pebble/sstable/block"
      20             :         "github.com/cockroachdb/pebble/sstable/rowblk"
      21             : )
      22             : 
      23             : type twoLevelIterator[I any, PI block.IndexBlockIterator[I], D any, PD block.DataBlockIterator[D]] struct {
      24             :         secondLevel   singleLevelIterator[I, PI, D, PD]
      25             :         topLevelIndex rowblk.IndexIter
      26             :         // pool is the pool from which the iterator was allocated and to which the
      27             :         // iterator should be returned on Close. Because the iterator is
      28             :         // parameterized by the type of the data block iterator, pools must be
      29             :         // specific to the type of the data block iterator.
      30             :         pool *sync.Pool
      31             : 
      32             :         // useFilterBlock controls whether we consult the bloom filter in the
      33             :         // twoLevelIterator code. Note that secondLevel.useFilterBlock is always
      34             :         // false - any filtering happens at the top level.
      35             :         useFilterBlock         bool
      36             :         lastBloomFilterMatched bool
      37             : }
      38             : 
      39             : var _ Iterator = (*twoLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter])(nil)
      40             : 
      41             : // loadIndex loads the index block at the current top level index position and
      42             : // leaves i.index unpositioned. If unsuccessful, it gets i.secondLevel.err to any error
      43             : // encountered, which may be nil if we have simply exhausted the entire table.
      44             : // This is used for two level indexes.
      45           1 : func (i *twoLevelIterator[I, PI, D, PD]) loadIndex(dir int8) loadBlockResult {
      46           1 :         // Ensure the index data block iterators are invalidated even if loading of
      47           1 :         // the index fails.
      48           1 :         PD(&i.secondLevel.data).Invalidate()
      49           1 :         PI(&i.secondLevel.index).Invalidate()
      50           1 :         if !i.topLevelIndex.Valid() {
      51           0 :                 return loadBlockFailed
      52           0 :         }
      53           1 :         bhp, err := i.topLevelIndex.BlockHandleWithProperties()
      54           1 :         if err != nil {
      55           0 :                 i.secondLevel.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry (%v)", err)
      56           0 :                 return loadBlockFailed
      57           0 :         }
      58           1 :         if i.secondLevel.bpfs != nil {
      59           1 :                 intersects, err := i.secondLevel.bpfs.intersects(bhp.Props)
      60           1 :                 if err != nil {
      61           0 :                         i.secondLevel.err = errCorruptIndexEntry(err)
      62           0 :                         return loadBlockFailed
      63           0 :                 }
      64           1 :                 if intersects == blockMaybeExcluded {
      65           0 :                         intersects = i.resolveMaybeExcluded(dir)
      66           0 :                 }
      67           1 :                 if intersects == blockExcluded {
      68           1 :                         return loadBlockIrrelevant
      69           1 :                 }
      70             :                 // blockIntersects
      71             :         }
      72           1 :         ctx := objiotracing.WithBlockType(i.secondLevel.ctx, objiotracing.MetadataBlock)
      73           1 :         indexBlock, err := i.secondLevel.reader.readBlock(
      74           1 :                 ctx, bhp.Handle, nil /* transform */, i.secondLevel.indexFilterRH, i.secondLevel.stats, &i.secondLevel.iterStats, i.secondLevel.bufferPool)
      75           1 :         if err == nil {
      76           1 :                 err = PI(&i.secondLevel.index).InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, indexBlock, i.secondLevel.transforms)
      77           1 :         }
      78           1 :         if err != nil {
      79           1 :                 i.secondLevel.err = err
      80           1 :                 return loadBlockFailed
      81           1 :         }
      82           1 :         return loadBlockOK
      83             : }
      84             : 
      85             : // resolveMaybeExcluded is invoked when the block-property filterer has found
      86             : // that an index block is excluded according to its properties but only if its
      87             : // bounds fall within the filter's current bounds. This function consults the
      88             : // appropriate bound, depending on the iteration direction, and returns either
      89             : // `blockIntersects` or `blockExcluded`.
      90           0 : func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
      91           0 :         // This iterator is configured with a bound-limited block property filter.
      92           0 :         // The bpf determined this entire index block could be excluded from
      93           0 :         // iteration based on the property encoded in the block handle. However, we
      94           0 :         // still need to determine if the index block is wholly contained within the
      95           0 :         // filter's key bounds.
      96           0 :         //
      97           0 :         // External guarantees ensure all its data blocks' keys are ≥ the filter's
      98           0 :         // lower bound during forward iteration, and that all its data blocks' keys
      99           0 :         // are < the filter's upper bound during backward iteration. We only need to
     100           0 :         // determine if the opposite bound is also met.
     101           0 :         //
     102           0 :         // The index separator in topLevelIndex.Separator() provides an inclusive
     103           0 :         // upper-bound for the index block's keys, guaranteeing that all its keys
     104           0 :         // are ≤ topLevelIndex.Separator(). For forward iteration, this is all we
     105           0 :         // need.
     106           0 :         if dir > 0 {
     107           0 :                 // Forward iteration.
     108           0 :                 if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.topLevelIndex.Separator()) {
     109           0 :                         return blockExcluded
     110           0 :                 }
     111           0 :                 return blockIntersects
     112             :         }
     113             : 
     114             :         // Reverse iteration.
     115             :         //
     116             :         // Because we're iterating in the reverse direction, we don't yet have
     117             :         // enough context available to determine if the block is wholly contained
     118             :         // within its bounds. This case arises only during backward iteration,
     119             :         // because of the way the index is structured.
     120             :         //
     121             :         // Consider a bound-limited bpf limited to the bounds [b,d), loading the
     122             :         // block with separator `c`. During reverse iteration, the guarantee that
     123             :         // all the block's keys are < `d` is externally provided, but no guarantee
     124             :         // is made on the bpf's lower bound. The separator `c` only provides an
     125             :         // inclusive upper bound on the block's keys, indicating that the
     126             :         // corresponding block handle points to a block containing only keys ≤ `c`.
     127             :         //
     128             :         // To establish a lower bound, we step the top-level index backwards to read
     129             :         // the previous block's separator, which provides an inclusive lower bound
     130             :         // on the original index block's keys. Afterwards, we step forward to
     131             :         // restore our top-level index position.
     132           0 :         if !i.topLevelIndex.Prev() {
     133           0 :                 // The original block points to the first index block of this table. If
     134           0 :                 // we knew the lower bound for the entire table, it could provide a
     135           0 :                 // lower bound, but the code refactoring necessary to read it doesn't
     136           0 :                 // seem worth the payoff. We fall through to loading the block.
     137           0 :         } else if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(i.topLevelIndex.Separator()) {
     138           0 :                 // The lower-bound on the original index block falls within the filter's
     139           0 :                 // bounds, and we can skip the block (after restoring our current
     140           0 :                 // top-level index position).
     141           0 :                 _ = i.topLevelIndex.Next()
     142           0 :                 return blockExcluded
     143           0 :         }
     144           0 :         _ = i.topLevelIndex.Next()
     145           0 :         return blockIntersects
     146             : }
     147             : 
     148             : // newRowBlockTwoLevelIterator reads the top-level index block and creates and
     149             : // initializes a two-level iterator over an sstable with row-oriented data
     150             : // blocks.
     151             : //
     152             : // Note that lower, upper are iterator bounds and are separate from virtual
     153             : // sstable bounds. If the virtualState passed in is not nil, then virtual
     154             : // sstable bounds will be enforced.
     155             : func newRowBlockTwoLevelIterator(
     156             :         ctx context.Context,
     157             :         r *Reader,
     158             :         v *virtualState,
     159             :         transforms IterTransforms,
     160             :         lower, upper []byte,
     161             :         filterer *BlockPropertiesFilterer,
     162             :         filterBlockSizeLimit FilterBlockSizeLimit,
     163             :         stats *base.InternalIteratorStats,
     164             :         categoryAndQoS CategoryAndQoS,
     165             :         statsCollector *CategoryStatsCollector,
     166             :         rp ReaderProvider,
     167             :         bufferPool *block.BufferPool,
     168           1 : ) (*twoLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter], error) {
     169           1 :         if r.err != nil {
     170           0 :                 return nil, r.err
     171           0 :         }
     172             :         // TODO(jackson): When we have a columnar-block sstable format, assert that
     173             :         // the table format is row-oriented.
     174           1 :         i := twoLevelIterRowBlockPool.Get().(*twoLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter])
     175           1 :         i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
     176           1 :                 false, // Disable the use of the filter block in the second level.
     177           1 :                 stats, categoryAndQoS, statsCollector, bufferPool)
     178           1 :         if r.tableFormat >= TableFormatPebblev3 {
     179           1 :                 if r.Properties.NumValueBlocks > 0 {
     180           1 :                         // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     181           1 :                         // can outlive the singleLevelIterator due to be being embedded in a
     182           1 :                         // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     183           1 :                         // should only optimize this if it shows up as significant in end-to-end
     184           1 :                         // CockroachDB benchmarks, since it is tricky to do so. One possibility
     185           1 :                         // is that if many sstable iterators only get positioned at latest
     186           1 :                         // versions of keys, and therefore never expose a LazyValue that is
     187           1 :                         // separated to their callers, they can put this valueBlockReader into a
     188           1 :                         // sync.Pool.
     189           1 :                         i.secondLevel.vbReader = &valueBlockReader{
     190           1 :                                 bpOpen: &i.secondLevel,
     191           1 :                                 rp:     rp,
     192           1 :                                 vbih:   r.valueBIH,
     193           1 :                                 stats:  stats,
     194           1 :                         }
     195           1 :                         i.secondLevel.data.SetGetLazyValuer(i.secondLevel.vbReader)
     196           1 :                         i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
     197           1 :                 }
     198           1 :                 i.secondLevel.data.SetHasValuePrefix(true)
     199             :         }
     200             : 
     201           1 :         i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
     202           1 : 
     203           1 :         topLevelIndexH, err := r.readIndex(ctx, i.secondLevel.indexFilterRH, stats, &i.secondLevel.iterStats)
     204           1 :         if err == nil {
     205           1 :                 err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
     206           1 :         }
     207           1 :         if err != nil {
     208           1 :                 _ = i.Close()
     209           1 :                 return nil, err
     210           1 :         }
     211           1 :         return i, nil
     212             : }
     213             : 
     214           0 : func (i *twoLevelIterator[I, PI, D, PD]) String() string {
     215           0 :         return i.secondLevel.String()
     216           0 : }
     217             : 
     218             : // DebugTree is part of the InternalIterator interface.
     219           0 : func (i *twoLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
     220           0 :         tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
     221           0 : }
     222             : 
     223             : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
     224             : // package. Note that SeekGE only checks the upper bound. It is up to the
     225             : // caller to ensure that key is greater than or equal to the lower bound.
     226             : func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
     227             :         key []byte, flags base.SeekGEFlags,
     228           1 : ) *base.InternalKV {
     229           1 :         if i.secondLevel.vState != nil {
     230           1 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     231           1 :                 // have to internally restrict the bounds.
     232           1 :                 //
     233           1 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     234           1 :                 // if necessary.
     235           1 :                 if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
     236           1 :                         key = i.secondLevel.lower
     237           1 :                 }
     238             :         }
     239             : 
     240           1 :         err := i.secondLevel.err
     241           1 :         i.secondLevel.err = nil // clear cached iteration error
     242           1 : 
     243           1 :         // The twoLevelIterator could be already exhausted. Utilize that when
     244           1 :         // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
     245           1 :         // bounds-exhausted near the top of the file.
     246           1 :         if flags.TrySeekUsingNext() &&
     247           1 :                 (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
     248           1 :                 err == nil {
     249           1 :                 // Already exhausted, so return nil.
     250           1 :                 return nil
     251           1 :         }
     252             : 
     253             :         // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
     254             :         // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
     255             : 
     256             :         // We fall into the slow path if i.index.IsDataInvalidated() even if the
     257             :         // top-level iterator is already positioned correctly and all other
     258             :         // conditions are met. An alternative structure could reuse topLevelIndex's
     259             :         // current position and reload the index block to which it points. Arguably,
     260             :         // an index block load is expensive and the index block may still be earlier
     261             :         // than the index block containing the sought key, resulting in a wasteful
     262             :         // block load.
     263             : 
     264           1 :         var dontSeekWithinSingleLevelIter bool
     265           1 :         if i.topLevelIndex.IsDataInvalidated() || !i.topLevelIndex.Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
     266           1 :                 (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.secondLevel.cmp(key, i.topLevelIndex.Separator()) > 0 {
     267           1 :                 // Slow-path: need to position the topLevelIndex.
     268           1 : 
     269           1 :                 // The previous exhausted state of singleLevelIterator is no longer
     270           1 :                 // relevant, since we may be moving to a different index block.
     271           1 :                 i.secondLevel.exhaustedBounds = 0
     272           1 :                 flags = flags.DisableTrySeekUsingNext()
     273           1 :                 if !i.topLevelIndex.SeekGE(key) {
     274           1 :                         PD(&i.secondLevel.data).Invalidate()
     275           1 :                         PI(&i.secondLevel.index).Invalidate()
     276           1 :                         return nil
     277           1 :                 }
     278             : 
     279           1 :                 result := i.loadIndex(+1)
     280           1 :                 if result == loadBlockFailed {
     281           1 :                         i.secondLevel.boundsCmp = 0
     282           1 :                         return nil
     283           1 :                 }
     284           1 :                 if result == loadBlockIrrelevant {
     285           1 :                         // Enforce the upper bound here since don't want to bother moving
     286           1 :                         // to the next entry in the top level index if upper bound is
     287           1 :                         // already exceeded. Note that the next entry starts with keys >=
     288           1 :                         // ikey.InternalKey.UserKey since even though this is the block separator, the
     289           1 :                         // same user key can span multiple index blocks. If upper is
     290           1 :                         // exclusive we use >= below, else we use >.
     291           1 :                         if i.secondLevel.upper != nil {
     292           1 :                                 cmp := i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.upper)
     293           1 :                                 if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     294           1 :                                         i.secondLevel.exhaustedBounds = +1
     295           1 :                                 }
     296             :                         }
     297             :                         // Fall through to skipForward.
     298           1 :                         dontSeekWithinSingleLevelIter = true
     299           1 :                         // Clear boundsCmp.
     300           1 :                         //
     301           1 :                         // In the typical cases where dontSeekWithinSingleLevelIter=false,
     302           1 :                         // the singleLevelIterator.SeekGE call will clear boundsCmp.
     303           1 :                         // However, in this case where dontSeekWithinSingleLevelIter=true,
     304           1 :                         // we never seek on the single-level iterator. This call will fall
     305           1 :                         // through to skipForward, which may improperly leave boundsCmp=+1
     306           1 :                         // unless we clear it here.
     307           1 :                         i.secondLevel.boundsCmp = 0
     308             :                 }
     309           1 :         } else {
     310           1 :                 // INVARIANT: err == nil.
     311           1 :                 //
     312           1 :                 // Else fast-path: There are two possible cases, from
     313           1 :                 // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
     314           1 :                 //
     315           1 :                 // 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
     316           1 :                 // respecting the lower bound (guaranteed by Iterator). We know that the
     317           1 :                 // iterator must already be positioned within or just outside the previous
     318           1 :                 // bounds. Therefore, the topLevelIndex iter cannot be positioned at an
     319           1 :                 // entry ahead of the seek position (though it can be positioned behind).
     320           1 :                 // The !i.cmp(key, i.topLevelIndex.Separator) > 0 confirms that it is
     321           1 :                 // not behind. Since it is not ahead and not behind it must be at the
     322           1 :                 // right position.
     323           1 :                 //
     324           1 :                 // 2) This SeekGE will land on a key that is greater than the key we are
     325           1 :                 // currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
     326           1 :                 // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
     327           1 :                 // index block. No need to reset the state of singleLevelIterator.
     328           1 :                 //
     329           1 :                 // Note that cases 1 and 2 never overlap, and one of them must be true.
     330           1 :                 // This invariant checking is important enough that we do not gate it
     331           1 :                 // behind invariants.Enabled.
     332           1 :                 if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
     333           0 :                         panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
     334           0 :                                 i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
     335             :                 }
     336             : 
     337           1 :                 if !flags.TrySeekUsingNext() {
     338           1 :                         // Case 1. Bounds have changed so the previous exhausted bounds state is
     339           1 :                         // irrelevant.
     340           1 :                         // WARNING-data-exhausted: this is safe to do only because the monotonic
     341           1 :                         // bounds optimizations only work when !data-exhausted. If they also
     342           1 :                         // worked with data-exhausted, we have made it unclear whether
     343           1 :                         // data-exhausted is actually true. See the comment at the top of the
     344           1 :                         // file.
     345           1 :                         i.secondLevel.exhaustedBounds = 0
     346           1 :                 }
     347             :                 // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
     348             :                 // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
     349             :                 // bug https://github.com/cockroachdb/pebble/issues/2036.
     350             :         }
     351             : 
     352           1 :         if !dontSeekWithinSingleLevelIter {
     353           1 :                 // Note that while trySeekUsingNext could be false here, singleLevelIterator
     354           1 :                 // could do its own boundsCmp-based optimization to seek using next.
     355           1 :                 if ikv := i.secondLevel.SeekGE(key, flags); ikv != nil {
     356           1 :                         return ikv
     357           1 :                 }
     358             :         }
     359           1 :         return i.skipForward()
     360             : }
     361             : 
     362             : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
     363             : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
     364             : // to the caller to ensure that key is greater than or equal to the lower bound.
     365             : func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
     366             :         prefix, key []byte, flags base.SeekGEFlags,
     367           1 : ) *base.InternalKV {
     368           1 :         if i.secondLevel.vState != nil {
     369           1 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     370           1 :                 // have to internally restrict the bounds.
     371           1 :                 //
     372           1 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     373           1 :                 // if necessary.
     374           1 :                 if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
     375           1 :                         key = i.secondLevel.lower
     376           1 :                 }
     377             :         }
     378             : 
     379             :         // NOTE: prefix is only used for bloom filter checking and not later work in
     380             :         // this method. Hence, we can use the existing iterator position if the last
     381             :         // SeekPrefixGE did not fail bloom filter matching.
     382             : 
     383           1 :         err := i.secondLevel.err
     384           1 :         i.secondLevel.err = nil // clear cached iteration error
     385           1 : 
     386           1 :         // The twoLevelIterator could be already exhausted. Utilize that when
     387           1 :         // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
     388           1 :         // bounds-exhausted near the top of the file.
     389           1 :         filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
     390           1 :         if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
     391           1 :                 (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
     392           1 :                 err == nil {
     393           0 :                 // Already exhausted, so return nil.
     394           0 :                 return nil
     395           0 :         }
     396             : 
     397             :         // Check prefix bloom filter.
     398           1 :         if i.useFilterBlock {
     399           1 :                 if !i.lastBloomFilterMatched {
     400           1 :                         // Iterator is not positioned based on last seek.
     401           1 :                         flags = flags.DisableTrySeekUsingNext()
     402           1 :                 }
     403           1 :                 i.lastBloomFilterMatched = false
     404           1 :                 var mayContain bool
     405           1 :                 mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
     406           1 :                 if i.secondLevel.err != nil || !mayContain {
     407           1 :                         // In the i.secondLevel.err == nil case, this invalidation may not be necessary for
     408           1 :                         // correctness, and may be a place to optimize later by reusing the
     409           1 :                         // already loaded block. It was necessary in earlier versions of the code
     410           1 :                         // since the caller was allowed to call Next when SeekPrefixGE returned
     411           1 :                         // nil. This is no longer allowed.
     412           1 :                         PD(&i.secondLevel.data).Invalidate()
     413           1 :                         return nil
     414           1 :                 }
     415           1 :                 i.lastBloomFilterMatched = true
     416             :         }
     417             : 
     418             :         // Bloom filter matches.
     419             : 
     420             :         // SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
     421             :         // enabled by trySeekUsingNext, or by monotonically increasing bounds
     422             :         // (i.boundsCmp).
     423             : 
     424             :         // We fall into the slow path if i.index.IsDataInvalidated() even if the
     425             :         // top-level iterator is already positioned correctly and all other
     426             :         // conditions are met. An alternative structure could reuse topLevelIndex's
     427             :         // current position and reload the index block to which it points. Arguably,
     428             :         // an index block load is expensive and the index block may still be earlier
     429             :         // than the index block containing the sought key, resulting in a wasteful
     430             :         // block load.
     431             : 
     432           1 :         var dontSeekWithinSingleLevelIter bool
     433           1 :         if i.topLevelIndex.IsDataInvalidated() || !i.topLevelIndex.Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
     434           1 :                 (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.secondLevel.cmp(key, i.topLevelIndex.Separator()) > 0 {
     435           1 :                 // Slow-path: need to position the topLevelIndex.
     436           1 : 
     437           1 :                 // The previous exhausted state of singleLevelIterator is no longer
     438           1 :                 // relevant, since we may be moving to a different index block.
     439           1 :                 i.secondLevel.exhaustedBounds = 0
     440           1 :                 flags = flags.DisableTrySeekUsingNext()
     441           1 :                 if !i.topLevelIndex.SeekGE(key) {
     442           1 :                         PD(&i.secondLevel.data).Invalidate()
     443           1 :                         PI(&i.secondLevel.index).Invalidate()
     444           1 :                         return nil
     445           1 :                 }
     446             : 
     447           1 :                 result := i.loadIndex(+1)
     448           1 :                 if result == loadBlockFailed {
     449           1 :                         i.secondLevel.boundsCmp = 0
     450           1 :                         return nil
     451           1 :                 }
     452           1 :                 if result == loadBlockIrrelevant {
     453           1 :                         // Enforce the upper bound here since don't want to bother moving
     454           1 :                         // to the next entry in the top level index if upper bound is
     455           1 :                         // already exceeded. Note that the next entry starts with keys >=
     456           1 :                         // ikey.InternalKey.UserKey since even though this is the block separator, the
     457           1 :                         // same user key can span multiple index blocks. If upper is
     458           1 :                         // exclusive we use >= below, else we use >.
     459           1 :                         if i.secondLevel.upper != nil {
     460           1 :                                 cmp := i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.upper)
     461           1 :                                 if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     462           1 :                                         i.secondLevel.exhaustedBounds = +1
     463           1 :                                 }
     464             :                         }
     465             :                         // Fall through to skipForward.
     466           1 :                         dontSeekWithinSingleLevelIter = true
     467           1 :                         // Clear boundsCmp.
     468           1 :                         //
     469           1 :                         // In the typical cases where dontSeekWithinSingleLevelIter=false,
     470           1 :                         // the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
     471           1 :                         // However, in this case where dontSeekWithinSingleLevelIter=true,
     472           1 :                         // we never seek on the single-level iterator. This call will fall
     473           1 :                         // through to skipForward, which may improperly leave boundsCmp=+1
     474           1 :                         // unless we clear it here.
     475           1 :                         i.secondLevel.boundsCmp = 0
     476             :                 }
     477           1 :         } else {
     478           1 :                 // INVARIANT: err == nil.
     479           1 :                 //
     480           1 :                 // Else fast-path: There are two possible cases, from
     481           1 :                 // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
     482           1 :                 //
     483           1 :                 // 1) The bounds have moved forward (i.boundsCmp > 0) and this
     484           1 :                 // SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
     485           1 :                 // know that the iterator must already be positioned within or just
     486           1 :                 // outside the previous bounds. Therefore, the topLevelIndex iter cannot
     487           1 :                 // be positioned at an entry ahead of the seek position (though it can be
     488           1 :                 // positioned behind). The !i.cmp(key, i.topLevelIndex.Separator()) > 0
     489           1 :                 // confirms that it is not behind. Since it is not ahead and not behind it
     490           1 :                 // must be at the right position.
     491           1 :                 //
     492           1 :                 // 2) This SeekPrefixGE will land on a key that is greater than the key we
     493           1 :                 // are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
     494           1 :                 // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
     495           1 :                 // index block. No need to reset the state of singleLevelIterator.
     496           1 :                 //
     497           1 :                 // Note that cases 1 and 2 never overlap, and one of them must be true.
     498           1 :                 // This invariant checking is important enough that we do not gate it
     499           1 :                 // behind invariants.Enabled.
     500           1 :                 if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
     501           0 :                         panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
     502           0 :                                 i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
     503             :                 }
     504             : 
     505           1 :                 if !flags.TrySeekUsingNext() {
     506           0 :                         // Case 1. Bounds have changed so the previous exhausted bounds state is
     507           0 :                         // irrelevant.
     508           0 :                         // WARNING-data-exhausted: this is safe to do only because the monotonic
     509           0 :                         // bounds optimizations only work when !data-exhausted. If they also
     510           0 :                         // worked with data-exhausted, we have made it unclear whether
     511           0 :                         // data-exhausted is actually true. See the comment at the top of the
     512           0 :                         // file.
     513           0 :                         i.secondLevel.exhaustedBounds = 0
     514           0 :                 }
     515             :                 // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
     516             :                 // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
     517             :                 // bug https://github.com/cockroachdb/pebble/issues/2036.
     518             :         }
     519             : 
     520           1 :         if !dontSeekWithinSingleLevelIter {
     521           1 :                 if ikv := i.secondLevel.seekPrefixGE(prefix, key, flags); ikv != nil {
     522           1 :                         return ikv
     523           1 :                 }
     524             :         }
     525             :         // NB: skipForward checks whether exhaustedBounds is already +1.
     526           1 :         return i.skipForward()
     527             : }
     528             : 
     529             : // virtualLast should only be called if i.vReader != nil.
     530           1 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
     531           1 :         if i.secondLevel.vState == nil {
     532           0 :                 panic("pebble: invalid call to virtualLast")
     533             :         }
     534           1 :         if !i.secondLevel.endKeyInclusive {
     535           1 :                 // Trivial case.
     536           1 :                 return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
     537           1 :         }
     538           1 :         return i.virtualLastSeekLE()
     539             : }
     540             : 
     541             : // virtualLastSeekLE implements a SeekLE() that can be used as part
     542             : // of reverse-iteration calls such as a Last() on a virtual sstable. Does a
     543             : // SeekLE on the upper bound of the file/iterator.
     544           1 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
     545           1 :         // Callers of SeekLE don't know about virtual sstable bounds, so we may
     546           1 :         // have to internally restrict the bounds.
     547           1 :         //
     548           1 :         // TODO(bananabrick): We can optimize this check away for the level iter
     549           1 :         // if necessary.
     550           1 :         if !i.secondLevel.endKeyInclusive {
     551           0 :                 panic("unexpected virtualLastSeekLE with exclusive upper bounds")
     552             :         }
     553           1 :         key := i.secondLevel.upper
     554           1 :         // Need to position the topLevelIndex.
     555           1 :         //
     556           1 :         // The previous exhausted state of singleLevelIterator is no longer
     557           1 :         // relevant, since we may be moving to a different index block.
     558           1 :         i.secondLevel.exhaustedBounds = 0
     559           1 :         // Seek optimization only applies until iterator is first positioned with a
     560           1 :         // SeekGE or SeekLT after SetBounds.
     561           1 :         i.secondLevel.boundsCmp = 0
     562           1 :         topLevelOk := i.topLevelIndex.SeekGE(key)
     563           1 :         // We can have multiple internal keys with the same user key as the seek
     564           1 :         // key. In that case, we want the last (greatest) internal key.
     565           1 :         for topLevelOk && bytes.Equal(i.topLevelIndex.Separator(), key) {
     566           1 :                 topLevelOk = i.topLevelIndex.Next()
     567           1 :         }
     568           1 :         if !topLevelOk {
     569           1 :                 return i.skipBackward()
     570           1 :         }
     571           1 :         result := i.loadIndex(-1)
     572           1 :         if result == loadBlockFailed {
     573           0 :                 i.secondLevel.boundsCmp = 0
     574           0 :                 return nil
     575           0 :         }
     576           1 :         if result == loadBlockIrrelevant {
     577           0 :                 // Load the previous block.
     578           0 :                 return i.skipBackward()
     579           0 :         }
     580           1 :         if ikv := i.secondLevel.virtualLastSeekLE(); ikv != nil {
     581           1 :                 return ikv
     582           1 :         }
     583           1 :         return i.skipBackward()
     584             : }
     585             : 
     586             : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
     587             : // package. Note that SeekLT only checks the lower bound. It is up to the
     588             : // caller to ensure that key is less than the upper bound.
     589             : func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
     590             :         key []byte, flags base.SeekLTFlags,
     591           1 : ) *base.InternalKV {
     592           1 :         if i.secondLevel.vState != nil {
     593           1 :                 // Might have to fix upper bound since virtual sstable bounds are not
     594           1 :                 // known to callers of SeekLT.
     595           1 :                 //
     596           1 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     597           1 :                 // if necessary.
     598           1 :                 cmp := i.secondLevel.cmp(key, i.secondLevel.upper)
     599           1 :                 // key == i.secondLevel.upper is fine. We'll do the right thing and return the
     600           1 :                 // first internal key with user key < key.
     601           1 :                 if cmp > 0 {
     602           1 :                         return i.virtualLast()
     603           1 :                 }
     604             :         }
     605             : 
     606           1 :         i.secondLevel.exhaustedBounds = 0
     607           1 :         i.secondLevel.err = nil // clear cached iteration error
     608           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     609           1 :         i.secondLevel.boundsCmp = 0
     610           1 : 
     611           1 :         var result loadBlockResult
     612           1 :         // NB: Unlike SeekGE, we don't have a fast-path here since we don't know
     613           1 :         // whether the topLevelIndex is positioned after the position that would
     614           1 :         // be returned by doing i.topLevelIndex.SeekGE(). To know this we would
     615           1 :         // need to know the index key preceding the current one.
     616           1 :         // NB: If a bound-limited block property filter is configured, it's
     617           1 :         // externally ensured that the filter is disabled (through returning
     618           1 :         // Intersects=false irrespective of the block props provided) during seeks.
     619           1 :         if !i.topLevelIndex.SeekGE(key) {
     620           1 :                 if !i.topLevelIndex.Last() {
     621           0 :                         PD(&i.secondLevel.data).Invalidate()
     622           0 :                         PI(&i.secondLevel.index).Invalidate()
     623           0 :                         return nil
     624           0 :                 }
     625             : 
     626           1 :                 result = i.loadIndex(-1)
     627           1 :                 if result == loadBlockFailed {
     628           1 :                         return nil
     629           1 :                 }
     630           1 :                 if result == loadBlockOK {
     631           1 :                         if ikv := i.secondLevel.lastInternal(); ikv != nil {
     632           1 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     633           1 :                         }
     634             :                         // Fall through to skipBackward since the singleLevelIterator did
     635             :                         // not have any blocks that satisfy the block interval
     636             :                         // constraints, or the lower bound was reached.
     637             :                 }
     638             :                 // Else loadBlockIrrelevant, so fall through.
     639           1 :         } else {
     640           1 :                 result = i.loadIndex(-1)
     641           1 :                 if result == loadBlockFailed {
     642           1 :                         return nil
     643           1 :                 }
     644           1 :                 if result == loadBlockOK {
     645           1 :                         if ikv := i.secondLevel.SeekLT(key, flags); ikv != nil {
     646           1 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     647           1 :                         }
     648             :                         // Fall through to skipBackward since the singleLevelIterator did
     649             :                         // not have any blocks that satisfy the block interval
     650             :                         // constraint, or the lower bound was reached.
     651             :                 }
     652             :                 // Else loadBlockIrrelevant, so fall through.
     653             :         }
     654           1 :         if result == loadBlockIrrelevant {
     655           1 :                 // Enforce the lower bound here since don't want to bother moving to
     656           1 :                 // the previous entry in the top level index if lower bound is already
     657           1 :                 // exceeded. Note that the previous entry starts with keys <=
     658           1 :                 // ikey.InternalKey.UserKey since even though this is the current block's
     659           1 :                 // separator, the same user key can span multiple index blocks.
     660           1 :                 if i.secondLevel.lower != nil && i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.lower) < 0 {
     661           1 :                         i.secondLevel.exhaustedBounds = -1
     662           1 :                 }
     663             :         }
     664             :         // NB: skipBackward checks whether exhaustedBounds is already -1.
     665           1 :         return i.skipBackward()
     666             : }
     667             : 
     668             : // First implements internalIterator.First, as documented in the pebble
     669             : // package. Note that First only checks the upper bound. It is up to the caller
     670             : // to ensure that key is greater than or equal to the lower bound (e.g. via a
     671             : // call to SeekGE(lower)).
     672           1 : func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
     673           1 :         // If we have a lower bound, use SeekGE. Note that in general this is not
     674           1 :         // supported usage, except when the lower bound is there because the table is
     675           1 :         // virtual.
     676           1 :         if i.secondLevel.lower != nil {
     677           1 :                 return i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
     678           1 :         }
     679           1 :         i.secondLevel.exhaustedBounds = 0
     680           1 :         i.secondLevel.err = nil // clear cached iteration error
     681           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     682           1 :         i.secondLevel.boundsCmp = 0
     683           1 : 
     684           1 :         if !i.topLevelIndex.First() {
     685           0 :                 return nil
     686           0 :         }
     687           1 :         result := i.loadIndex(+1)
     688           1 :         if result == loadBlockFailed {
     689           1 :                 return nil
     690           1 :         }
     691           1 :         if result == loadBlockOK {
     692           1 :                 if ikv := i.secondLevel.First(); ikv != nil {
     693           1 :                         return ikv
     694           1 :                 }
     695             :                 // Else fall through to skipForward.
     696           1 :         } else {
     697           1 :                 // result == loadBlockIrrelevant. Enforce the upper bound here since
     698           1 :                 // don't want to bother moving to the next entry in the top level
     699           1 :                 // index if upper bound is already exceeded. Note that the next entry
     700           1 :                 // starts with keys >= ikv.InternalKey.UserKey since even though this is the
     701           1 :                 // block separator, the same user key can span multiple index blocks.
     702           1 :                 // If upper is exclusive we use >= below, else we use >.
     703           1 :                 if i.secondLevel.upper != nil {
     704           1 :                         cmp := i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.upper)
     705           1 :                         if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     706           0 :                                 i.secondLevel.exhaustedBounds = +1
     707           0 :                         }
     708             :                 }
     709             :         }
     710             :         // NB: skipForward checks whether exhaustedBounds is already +1.
     711           1 :         return i.skipForward()
     712             : }
     713             : 
     714             : // Last implements internalIterator.Last, as documented in the pebble
     715             : // package. Note that Last only checks the lower bound. It is up to the caller
     716             : // to ensure that key is less than the upper bound (e.g. via a call to
     717             : // SeekLT(upper))
     718           1 : func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
     719           1 :         if i.secondLevel.vState != nil {
     720           1 :                 if i.secondLevel.endKeyInclusive {
     721           1 :                         return i.virtualLast()
     722           1 :                 }
     723           1 :                 return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
     724             :         }
     725             : 
     726           1 :         if i.secondLevel.upper != nil {
     727           0 :                 panic("twoLevelIterator.Last() used despite upper bound")
     728             :         }
     729           1 :         i.secondLevel.exhaustedBounds = 0
     730           1 :         i.secondLevel.err = nil // clear cached iteration error
     731           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     732           1 :         i.secondLevel.boundsCmp = 0
     733           1 : 
     734           1 :         if !i.topLevelIndex.Last() {
     735           0 :                 return nil
     736           0 :         }
     737           1 :         result := i.loadIndex(-1)
     738           1 :         if result == loadBlockFailed {
     739           1 :                 return nil
     740           1 :         }
     741           1 :         if result == loadBlockOK {
     742           1 :                 if ikv := i.secondLevel.Last(); ikv != nil {
     743           1 :                         return ikv
     744           1 :                 }
     745             :                 // Else fall through to skipBackward.
     746           1 :         } else {
     747           1 :                 // result == loadBlockIrrelevant. Enforce the lower bound here since
     748           1 :                 // don't want to bother moving to the previous entry in the top level
     749           1 :                 // index if lower bound is already exceeded. Note that the previous
     750           1 :                 // entry starts with keys <= ikv.InternalKey.UserKey since even though
     751           1 :                 // this is the current block's separator, the same user key can span
     752           1 :                 // multiple index blocks.
     753           1 :                 if i.secondLevel.lower != nil && i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.lower) < 0 {
     754           0 :                         i.secondLevel.exhaustedBounds = -1
     755           0 :                 }
     756             :         }
     757             :         // NB: skipBackward checks whether exhaustedBounds is already -1.
     758           1 :         return i.skipBackward()
     759             : }
     760             : 
     761             : // Next implements internalIterator.Next, as documented in the pebble
     762             : // package.
     763             : // Note: twoLevelCompactionIterator.Next mirrors the implementation of
     764             : // twoLevelIterator.Next due to performance. Keep the two in sync.
     765           1 : func (i *twoLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
     766           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     767           1 :         i.secondLevel.boundsCmp = 0
     768           1 :         if i.secondLevel.err != nil {
     769           1 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
     770           1 :                 // encountered, the iterator must be re-seeked.
     771           1 :                 return nil
     772           1 :         }
     773           1 :         if ikv := i.secondLevel.Next(); ikv != nil {
     774           1 :                 return ikv
     775           1 :         }
     776           1 :         return i.skipForward()
     777             : }
     778             : 
     779             : // NextPrefix implements (base.InternalIterator).NextPrefix.
     780           1 : func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
     781           1 :         if i.secondLevel.exhaustedBounds == +1 {
     782           0 :                 panic("Next called even though exhausted upper bound")
     783             :         }
     784             :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     785           1 :         i.secondLevel.boundsCmp = 0
     786           1 :         if i.secondLevel.err != nil {
     787           0 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
     788           0 :                 // encountered, the iterator must be re-seeked.
     789           0 :                 return nil
     790           0 :         }
     791           1 :         if ikv := i.secondLevel.NextPrefix(succKey); ikv != nil {
     792           1 :                 return ikv
     793           1 :         }
     794             :         // ikv == nil
     795           1 :         if i.secondLevel.err != nil {
     796           1 :                 return nil
     797           1 :         }
     798             : 
     799             :         // Did not find prefix in the existing second-level index block. This is the
     800             :         // slow-path where we seek the iterator.
     801           1 :         if !i.topLevelIndex.SeekGE(succKey) {
     802           0 :                 PD(&i.secondLevel.data).Invalidate()
     803           0 :                 PI(&i.secondLevel.index).Invalidate()
     804           0 :                 return nil
     805           0 :         }
     806           1 :         result := i.loadIndex(+1)
     807           1 :         if result == loadBlockFailed {
     808           1 :                 return nil
     809           1 :         }
     810           1 :         if result == loadBlockIrrelevant {
     811           1 :                 // Enforce the upper bound here since don't want to bother moving to the
     812           1 :                 // next entry in the top level index if upper bound is already exceeded.
     813           1 :                 // Note that the next entry starts with keys >= ikv.InternalKey.UserKey
     814           1 :                 // since even though this is the block separator, the same user key can
     815           1 :                 // span multiple index blocks. If upper is exclusive we use >= below,
     816           1 :                 // else we use >.
     817           1 :                 if i.secondLevel.upper != nil {
     818           1 :                         cmp := i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.upper)
     819           1 :                         if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     820           0 :                                 i.secondLevel.exhaustedBounds = +1
     821           0 :                         }
     822             :                 }
     823           1 :         } else if kv := i.secondLevel.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
     824           1 :                 return i.secondLevel.maybeVerifyKey(kv)
     825           1 :         }
     826           1 :         return i.skipForward()
     827             : }
     828             : 
     829             : // Prev implements internalIterator.Prev, as documented in the pebble
     830             : // package.
     831           1 : func (i *twoLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
     832           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     833           1 :         i.secondLevel.boundsCmp = 0
     834           1 :         if i.secondLevel.err != nil {
     835           1 :                 return nil
     836           1 :         }
     837           1 :         if kv := i.secondLevel.Prev(); kv != nil {
     838           1 :                 return kv
     839           1 :         }
     840           1 :         return i.skipBackward()
     841             : }
     842             : 
     843           1 : func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
     844           1 :         for {
     845           1 :                 if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds > 0 {
     846           1 :                         return nil
     847           1 :                 }
     848             : 
     849             :                 // It is possible that skipBackward went too far and the virtual table lower
     850             :                 // bound is after the first key in the block we are about to load, in which
     851             :                 // case we must use SeekGE below. The keys in the block we are about to load
     852             :                 // start right after the topLevelIndex key (before we Next).
     853             :                 //
     854             :                 // An example of how this can happen:
     855             :                 //
     856             :                 //   Second-level index block 1 - contains keys a@1, c@1
     857             :                 //   Second-level index block 2 - contains keys e@1, g@1
     858             :                 //   Second-level index block 3 - contains keys i@2, k@2
     859             :                 //
     860             :                 //   The virtual table lower bound is f. We have a range key masking filter
     861             :                 //   that filters keys with @1 suffix. We are positioned inside block 3 then
     862             :                 //   we Prev(). Block 2 is entirely filtered out, which makes us move to
     863             :                 //   block 1. Now the range key masking filter gets an update (via
     864             :                 //   SpanChanged) and it no longer filters out any keys. At this point if a
     865             :                 //   Next happens, we will load block 2 but it would not be legal to return
     866             :                 //   "e@1" which is outside the virtual bounds.
     867             :                 //
     868             :                 //   The core of the problem is that skipBackward doesn't know it can stop
     869             :                 //   at block 2, because it doesn't know what keys are at the start of that
     870             :                 //   block. This is why we don't have this problem in the opposite
     871             :                 //   direction: skipForward will never go beyond the last relevant block
     872             :                 //   because it looks at the separator key which is an upper bound for the
     873             :                 //   block.
     874             :                 //
     875             :                 // Note that this is only a problem with virtual tables; we make no
     876             :                 // guarantees wrt an iterator lower bound when we iterate forward. But we
     877             :                 // must never return keys that are not inside the virtual table.
     878           1 :                 useSeek := i.secondLevel.vState != nil &&
     879           1 :                         (!i.topLevelIndex.Valid() || i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.vState.lower.UserKey) < 0)
     880           1 : 
     881           1 :                 i.secondLevel.exhaustedBounds = 0
     882           1 :                 if !i.topLevelIndex.Next() {
     883           1 :                         PD(&i.secondLevel.data).Invalidate()
     884           1 :                         PI(&i.secondLevel.index).Invalidate()
     885           1 :                         return nil
     886           1 :                 }
     887           1 :                 result := i.loadIndex(+1)
     888           1 :                 if result == loadBlockFailed {
     889           1 :                         return nil
     890           1 :                 }
     891           1 :                 if result == loadBlockOK {
     892           1 :                         var ikv *base.InternalKV
     893           1 :                         if useSeek {
     894           1 :                                 ikv = i.secondLevel.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
     895           1 :                         } else {
     896           1 :                                 ikv = i.secondLevel.firstInternal()
     897           1 :                         }
     898           1 :                         if ikv != nil {
     899           1 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     900           1 :                         }
     901             :                         // Next iteration will return if singleLevelIterator set
     902             :                         // exhaustedBounds = +1.
     903           1 :                 } else {
     904           1 :                         // result == loadBlockIrrelevant. Enforce the upper bound here since
     905           1 :                         // don't want to bother moving to the next entry in the top level
     906           1 :                         // index if upper bound is already exceeded. Note that the next
     907           1 :                         // entry starts with keys >= i.topLevelIndex.Separator() since even
     908           1 :                         // though this is the block separator, the same user key can span
     909           1 :                         // multiple index blocks. If upper is exclusive we use >= below,
     910           1 :                         // else we use >.
     911           1 :                         if i.secondLevel.upper != nil {
     912           1 :                                 cmp := i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.upper)
     913           1 :                                 if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     914           1 :                                         i.secondLevel.exhaustedBounds = +1
     915           1 :                                         // Next iteration will return.
     916           1 :                                 }
     917             :                         }
     918             :                 }
     919             :         }
     920             : }
     921             : 
     922           1 : func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
     923           1 :         for {
     924           1 :                 if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
     925           1 :                         return nil
     926           1 :                 }
     927           1 :                 i.secondLevel.exhaustedBounds = 0
     928           1 :                 if !i.topLevelIndex.Prev() {
     929           1 :                         PD(&i.secondLevel.data).Invalidate()
     930           1 :                         PI(&i.secondLevel.index).Invalidate()
     931           1 :                         return nil
     932           1 :                 }
     933           1 :                 result := i.loadIndex(-1)
     934           1 :                 if result == loadBlockFailed {
     935           1 :                         return nil
     936           1 :                 }
     937           1 :                 if result == loadBlockOK {
     938           1 :                         ikv := i.secondLevel.lastInternal()
     939           1 :                         if ikv != nil {
     940           1 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     941           1 :                         }
     942             : 
     943             :                         // Next iteration will return if singleLevelIterator set
     944             :                         // exhaustedBounds = -1.
     945           1 :                 } else {
     946           1 :                         // result == loadBlockIrrelevant. Enforce the lower bound here since
     947           1 :                         // don't want to bother moving to the previous entry in the top
     948           1 :                         // level index if lower bound is already exceeded. Note that the
     949           1 :                         // previous entry starts with keys <= i.topLevelIndex.Separator() since
     950           1 :                         // even though this is the current block's separator, the same user
     951           1 :                         // key can span multiple index blocks.
     952           1 :                         if i.secondLevel.lower != nil && i.secondLevel.cmp(i.topLevelIndex.Separator(), i.secondLevel.lower) < 0 {
     953           1 :                                 i.secondLevel.exhaustedBounds = -1
     954           1 :                                 // Next iteration will return.
     955           1 :                         }
     956             :                 }
     957             :         }
     958             : }
     959             : 
     960           1 : func (i *twoLevelIterator[I, PI, D, PD]) Error() error {
     961           1 :         return i.secondLevel.Error()
     962           1 : }
     963             : 
     964           1 : func (i *twoLevelIterator[I, PI, D, PD]) SetBounds(lower, upper []byte) {
     965           1 :         i.secondLevel.SetBounds(lower, upper)
     966           1 : }
     967             : 
     968           0 : func (i *twoLevelIterator[I, PI, D, PD]) SetContext(ctx context.Context) {
     969           0 :         i.secondLevel.SetContext(ctx)
     970           0 : }
     971             : 
     972           1 : func (i *twoLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
     973           1 :         i.secondLevel.SetCloseHook(fn)
     974           1 : }
     975             : 
     976           1 : func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
     977           1 :         i.secondLevel.SetupForCompaction()
     978           1 : }
     979             : 
     980             : // Close implements internalIterator.Close, as documented in the pebble
     981             : // package.
     982           1 : func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
     983           1 :         if invariants.Enabled && i.secondLevel.pool != nil {
     984           0 :                 panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
     985             :         }
     986           1 :         pool := i.pool
     987           1 :         err := i.secondLevel.closeInternal()
     988           1 :         err = firstError(err, i.topLevelIndex.Close())
     989           1 :         *i = twoLevelIterator[I, PI, D, PD]{
     990           1 :                 secondLevel:   i.secondLevel.resetForReuse(),
     991           1 :                 topLevelIndex: i.topLevelIndex.ResetForReuse(),
     992           1 :                 pool:          pool,
     993           1 :         }
     994           1 :         if pool != nil {
     995           1 :                 pool.Put(i)
     996           1 :         }
     997           1 :         return err
     998             : }

Generated by: LCOV version 1.14