LCOV - code coverage report
Current view: top level - pebble/sstable - reader_iter_two_lvl.go (source / functions) Hit Total Coverage
Test: 2024-09-29 08:17Z 3fb8ee48 - tests + meta.lcov Lines: 641 731 87.7 %
Date: 2024-09-29 08:18:00 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "context"
      10             :         "fmt"
      11             :         "sync"
      12             : 
      13             :         "github.com/cockroachdb/errors"
      14             :         "github.com/cockroachdb/pebble/internal/base"
      15             :         "github.com/cockroachdb/pebble/internal/invariants"
      16             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      17             :         "github.com/cockroachdb/pebble/objstorage"
      18             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      19             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
      20             :         "github.com/cockroachdb/pebble/sstable/block"
      21             : )
      22             : 
      23             : type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterator[D]] struct {
      24             :         secondLevel   singleLevelIterator[I, PI, D, PD]
      25             :         topLevelIndex I
      26             :         // pool is the pool from which the iterator was allocated and to which the
      27             :         // iterator should be returned on Close. Because the iterator is
      28             :         // parameterized by the type of the data block iterator, pools must be
      29             :         // specific to the type of the data block iterator.
      30             :         pool *sync.Pool
      31             : 
      32             :         // useFilterBlock controls whether we consult the bloom filter in the
      33             :         // twoLevelIterator code. Note that secondLevel.useFilterBlock is always
      34             :         // false - any filtering happens at the top level.
      35             :         useFilterBlock         bool
      36             :         lastBloomFilterMatched bool
      37             : }
      38             : 
      39             : var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
      40             : 
      41             : // loadIndex loads the index block at the current top level index position and
      42             : // leaves i.index unpositioned. If unsuccessful, it gets i.secondLevel.err to any error
      43             : // encountered, which may be nil if we have simply exhausted the entire table.
      44             : // This is used for two level indexes.
      45           2 : func (i *twoLevelIterator[I, PI, D, PD]) loadIndex(dir int8) loadBlockResult {
      46           2 :         // Ensure the index data block iterators are invalidated even if loading of
      47           2 :         // the index fails.
      48           2 :         PD(&i.secondLevel.data).Invalidate()
      49           2 :         PI(&i.secondLevel.index).Invalidate()
      50           2 :         if !PI(&i.topLevelIndex).Valid() {
      51           0 :                 return loadBlockFailed
      52           0 :         }
      53           2 :         bhp, err := PI(&i.topLevelIndex).BlockHandleWithProperties()
      54           2 :         if err != nil {
      55           0 :                 i.secondLevel.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry (%v)", err)
      56           0 :                 return loadBlockFailed
      57           0 :         }
      58           2 :         if i.secondLevel.bpfs != nil {
      59           2 :                 intersects, err := i.secondLevel.bpfs.intersects(bhp.Props)
      60           2 :                 if err != nil {
      61           0 :                         i.secondLevel.err = errCorruptIndexEntry(err)
      62           0 :                         return loadBlockFailed
      63           0 :                 }
      64           2 :                 if intersects == blockMaybeExcluded {
      65           1 :                         intersects = i.resolveMaybeExcluded(dir)
      66           1 :                 }
      67           2 :                 if intersects == blockExcluded {
      68           2 :                         return loadBlockIrrelevant
      69           2 :                 }
      70             :                 // blockIntersects
      71             :         }
      72           2 :         ctx := objiotracing.WithBlockType(i.secondLevel.ctx, objiotracing.MetadataBlock)
      73           2 :         indexBlock, err := i.secondLevel.reader.readBlock(
      74           2 :                 ctx, bhp.Handle, nil /* transform */, i.secondLevel.indexFilterRH, i.secondLevel.stats, &i.secondLevel.iterStats, i.secondLevel.bufferPool)
      75           2 :         if err == nil {
      76           2 :                 err = PI(&i.secondLevel.index).InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, indexBlock, i.secondLevel.transforms)
      77           2 :         }
      78           2 :         if err != nil {
      79           1 :                 i.secondLevel.err = err
      80           1 :                 return loadBlockFailed
      81           1 :         }
      82           2 :         return loadBlockOK
      83             : }
      84             : 
      85             : // resolveMaybeExcluded is invoked when the block-property filterer has found
      86             : // that an index block is excluded according to its properties but only if its
      87             : // bounds fall within the filter's current bounds. This function consults the
      88             : // appropriate bound, depending on the iteration direction, and returns either
      89             : // `blockIntersects` or `blockExcluded`.
      90           1 : func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
      91           1 :         // This iterator is configured with a bound-limited block property filter.
      92           1 :         // The bpf determined this entire index block could be excluded from
      93           1 :         // iteration based on the property encoded in the block handle. However, we
      94           1 :         // still need to determine if the index block is wholly contained within the
      95           1 :         // filter's key bounds.
      96           1 :         //
      97           1 :         // External guarantees ensure all its data blocks' keys are ≥ the filter's
      98           1 :         // lower bound during forward iteration, and that all its data blocks' keys
      99           1 :         // are < the filter's upper bound during backward iteration. We only need to
     100           1 :         // determine if the opposite bound is also met.
     101           1 :         //
     102           1 :         // The index separator in topLevelIndex.Separator() provides an inclusive
     103           1 :         // upper-bound for the index block's keys, guaranteeing that all its keys
     104           1 :         // are ≤ topLevelIndex.Separator(). For forward iteration, this is all we
     105           1 :         // need.
     106           1 :         if dir > 0 {
     107           1 :                 // Forward iteration.
     108           1 :                 if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(PI(&i.topLevelIndex).Separator()) {
     109           1 :                         return blockExcluded
     110           1 :                 }
     111           1 :                 return blockIntersects
     112             :         }
     113             : 
     114             :         // Reverse iteration.
     115             :         //
     116             :         // Because we're iterating in the reverse direction, we don't yet have
     117             :         // enough context available to determine if the block is wholly contained
     118             :         // within its bounds. This case arises only during backward iteration,
     119             :         // because of the way the index is structured.
     120             :         //
     121             :         // Consider a bound-limited bpf limited to the bounds [b,d), loading the
     122             :         // block with separator `c`. During reverse iteration, the guarantee that
     123             :         // all the block's keys are < `d` is externally provided, but no guarantee
     124             :         // is made on the bpf's lower bound. The separator `c` only provides an
     125             :         // inclusive upper bound on the block's keys, indicating that the
     126             :         // corresponding block handle points to a block containing only keys ≤ `c`.
     127             :         //
     128             :         // To establish a lower bound, we step the top-level index backwards to read
     129             :         // the previous block's separator, which provides an inclusive lower bound
     130             :         // on the original index block's keys. Afterwards, we step forward to
     131             :         // restore our top-level index position.
     132           1 :         if !PI(&i.topLevelIndex).Prev() {
     133           1 :                 // The original block points to the first index block of this table. If
     134           1 :                 // we knew the lower bound for the entire table, it could provide a
     135           1 :                 // lower bound, but the code refactoring necessary to read it doesn't
     136           1 :                 // seem worth the payoff. We fall through to loading the block.
     137           1 :         } else if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(PI(&i.topLevelIndex).Separator()) {
     138           1 :                 // The lower-bound on the original index block falls within the filter's
     139           1 :                 // bounds, and we can skip the block (after restoring our current
     140           1 :                 // top-level index position).
     141           1 :                 _ = PI(&i.topLevelIndex).Next()
     142           1 :                 return blockExcluded
     143           1 :         }
     144           1 :         _ = PI(&i.topLevelIndex).Next()
     145           1 :         return blockIntersects
     146             : }
     147             : 
     148             : // newColumnBlockTwoLevelIterator reads the top-level index block and creates and
     149             : // initializes a two-level iterator over an sstable with column-oriented data
     150             : // blocks.
     151             : //
     152             : // Note that lower, upper are iterator bounds and are separate from virtual
     153             : // sstable bounds. If the virtualState passed in is not nil, then virtual
     154             : // sstable bounds will be enforced.
     155             : func newColumnBlockTwoLevelIterator(
     156             :         ctx context.Context,
     157             :         r *Reader,
     158             :         v *virtualState,
     159             :         transforms IterTransforms,
     160             :         lower, upper []byte,
     161             :         filterer *BlockPropertiesFilterer,
     162             :         filterBlockSizeLimit FilterBlockSizeLimit,
     163             :         stats *base.InternalIteratorStats,
     164             :         categoryAndQoS CategoryAndQoS,
     165             :         statsCollector *CategoryStatsCollector,
     166             :         rp ReaderProvider,
     167             :         bufferPool *block.BufferPool,
     168           0 : ) (*twoLevelIteratorColumnBlocks, error) {
     169           0 :         if r.err != nil {
     170           0 :                 return nil, r.err
     171           0 :         }
     172           0 :         if !r.tableFormat.BlockColumnar() {
     173           0 :                 panic(errors.AssertionFailedf("table format %d should not use columnar block format", r.tableFormat))
     174             :         }
     175           0 :         i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
     176           0 :         i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
     177           0 :                 false, // Disable the use of the filter block in the second level.
     178           0 :                 stats, categoryAndQoS, statsCollector, bufferPool)
     179           0 :         if r.Properties.NumValueBlocks > 0 {
     180           0 :                 // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     181           0 :                 // can outlive the singleLevelIterator due to be being embedded in a
     182           0 :                 // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     183           0 :                 // should only optimize this if it shows up as significant in end-to-end
     184           0 :                 // CockroachDB benchmarks, since it is tricky to do so. One possibility
     185           0 :                 // is that if many sstable iterators only get positioned at latest
     186           0 :                 // versions of keys, and therefore never expose a LazyValue that is
     187           0 :                 // separated to their callers, they can put this valueBlockReader into a
     188           0 :                 // sync.Pool.
     189           0 :                 i.secondLevel.vbReader = &valueBlockReader{
     190           0 :                         bpOpen: &i.secondLevel,
     191           0 :                         rp:     rp,
     192           0 :                         vbih:   r.valueBIH,
     193           0 :                         stats:  stats,
     194           0 :                 }
     195           0 :                 i.secondLevel.data.GetLazyValuer = i.secondLevel.vbReader
     196           0 :                 i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
     197           0 :         }
     198           0 :         i.secondLevel.data.KeySchema = r.keySchema
     199           0 :         i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
     200           0 :         topLevelIndexH, err := r.readIndex(ctx, i.secondLevel.indexFilterRH, stats, &i.secondLevel.iterStats)
     201           0 :         if err == nil {
     202           0 :                 err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
     203           0 :         }
     204           0 :         if err != nil {
     205           0 :                 _ = i.Close()
     206           0 :                 return nil, err
     207           0 :         }
     208           0 :         return i, nil
     209             : }
     210             : 
     211             : // newRowBlockTwoLevelIterator reads the top-level index block and creates and
     212             : // initializes a two-level iterator over an sstable with row-oriented data
     213             : // blocks.
     214             : //
     215             : // Note that lower, upper are iterator bounds and are separate from virtual
     216             : // sstable bounds. If the virtualState passed in is not nil, then virtual
     217             : // sstable bounds will be enforced.
     218             : func newRowBlockTwoLevelIterator(
     219             :         ctx context.Context,
     220             :         r *Reader,
     221             :         v *virtualState,
     222             :         transforms IterTransforms,
     223             :         lower, upper []byte,
     224             :         filterer *BlockPropertiesFilterer,
     225             :         filterBlockSizeLimit FilterBlockSizeLimit,
     226             :         stats *base.InternalIteratorStats,
     227             :         categoryAndQoS CategoryAndQoS,
     228             :         statsCollector *CategoryStatsCollector,
     229             :         rp ReaderProvider,
     230             :         bufferPool *block.BufferPool,
     231           2 : ) (*twoLevelIteratorRowBlocks, error) {
     232           2 :         if r.err != nil {
     233           0 :                 return nil, r.err
     234           0 :         }
     235           2 :         if r.tableFormat.BlockColumnar() {
     236           0 :                 panic(errors.AssertionFailedf("table format %s uses block columnar format", r.tableFormat))
     237             :         }
     238           2 :         i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
     239           2 :         i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
     240           2 :                 false, // Disable the use of the filter block in the second level.
     241           2 :                 stats, categoryAndQoS, statsCollector, bufferPool)
     242           2 :         if r.tableFormat >= TableFormatPebblev3 {
     243           2 :                 if r.Properties.NumValueBlocks > 0 {
     244           2 :                         // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     245           2 :                         // can outlive the singleLevelIterator due to be being embedded in a
     246           2 :                         // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     247           2 :                         // should only optimize this if it shows up as significant in end-to-end
     248           2 :                         // CockroachDB benchmarks, since it is tricky to do so. One possibility
     249           2 :                         // is that if many sstable iterators only get positioned at latest
     250           2 :                         // versions of keys, and therefore never expose a LazyValue that is
     251           2 :                         // separated to their callers, they can put this valueBlockReader into a
     252           2 :                         // sync.Pool.
     253           2 :                         i.secondLevel.vbReader = &valueBlockReader{
     254           2 :                                 bpOpen: &i.secondLevel,
     255           2 :                                 rp:     rp,
     256           2 :                                 vbih:   r.valueBIH,
     257           2 :                                 stats:  stats,
     258           2 :                         }
     259           2 :                         i.secondLevel.data.SetGetLazyValuer(i.secondLevel.vbReader)
     260           2 :                         i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
     261           2 :                 }
     262           2 :                 i.secondLevel.data.SetHasValuePrefix(true)
     263             :         }
     264             : 
     265           2 :         i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
     266           2 : 
     267           2 :         topLevelIndexH, err := r.readIndex(ctx, i.secondLevel.indexFilterRH, stats, &i.secondLevel.iterStats)
     268           2 :         if err == nil {
     269           2 :                 err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
     270           2 :         }
     271           2 :         if err != nil {
     272           1 :                 _ = i.Close()
     273           1 :                 return nil, err
     274           1 :         }
     275           2 :         return i, nil
     276             : }
     277             : 
     278           0 : func (i *twoLevelIterator[I, PI, D, PD]) String() string {
     279           0 :         return i.secondLevel.String()
     280           0 : }
     281             : 
     282             : // DebugTree is part of the InternalIterator interface.
     283           0 : func (i *twoLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
     284           0 :         tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
     285           0 : }
     286             : 
     287             : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
     288             : // package. Note that SeekGE only checks the upper bound. It is up to the
     289             : // caller to ensure that key is greater than or equal to the lower bound.
     290             : func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
     291             :         key []byte, flags base.SeekGEFlags,
     292           2 : ) *base.InternalKV {
     293           2 :         if i.secondLevel.vState != nil {
     294           2 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     295           2 :                 // have to internally restrict the bounds.
     296           2 :                 //
     297           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     298           2 :                 // if necessary.
     299           2 :                 if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
     300           2 :                         key = i.secondLevel.lower
     301           2 :                 }
     302             :         }
     303             : 
     304           2 :         err := i.secondLevel.err
     305           2 :         i.secondLevel.err = nil // clear cached iteration error
     306           2 : 
     307           2 :         // The twoLevelIterator could be already exhausted. Utilize that when
     308           2 :         // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
     309           2 :         // bounds-exhausted near the top of the file.
     310           2 :         if flags.TrySeekUsingNext() &&
     311           2 :                 (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
     312           2 :                 err == nil {
     313           2 :                 // Already exhausted, so return nil.
     314           2 :                 return nil
     315           2 :         }
     316             : 
     317             :         // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
     318             :         // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
     319             : 
     320             :         // We fall into the slow path if i.index.IsDataInvalidated() even if the
     321             :         // top-level iterator is already positioned correctly and all other
     322             :         // conditions are met. An alternative structure could reuse topLevelIndex's
     323             :         // current position and reload the index block to which it points. Arguably,
     324             :         // an index block load is expensive and the index block may still be earlier
     325             :         // than the index block containing the sought key, resulting in a wasteful
     326             :         // block load.
     327             : 
     328           2 :         var dontSeekWithinSingleLevelIter bool
     329           2 :         if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
     330           2 :                 (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.secondLevel.cmp(key, PI(&i.topLevelIndex).Separator()) > 0 {
     331           2 :                 // Slow-path: need to position the topLevelIndex.
     332           2 : 
     333           2 :                 // The previous exhausted state of singleLevelIterator is no longer
     334           2 :                 // relevant, since we may be moving to a different index block.
     335           2 :                 i.secondLevel.exhaustedBounds = 0
     336           2 :                 flags = flags.DisableTrySeekUsingNext()
     337           2 :                 if !PI(&i.topLevelIndex).SeekGE(key) {
     338           2 :                         PD(&i.secondLevel.data).Invalidate()
     339           2 :                         PI(&i.secondLevel.index).Invalidate()
     340           2 :                         return nil
     341           2 :                 }
     342             : 
     343           2 :                 result := i.loadIndex(+1)
     344           2 :                 if result == loadBlockFailed {
     345           1 :                         i.secondLevel.boundsCmp = 0
     346           1 :                         return nil
     347           1 :                 }
     348           2 :                 if result == loadBlockIrrelevant {
     349           2 :                         // Enforce the upper bound here since don't want to bother moving
     350           2 :                         // to the next entry in the top level index if upper bound is
     351           2 :                         // already exceeded. Note that the next entry starts with keys >=
     352           2 :                         // ikey.InternalKey.UserKey since even though this is the block separator, the
     353           2 :                         // same user key can span multiple index blocks. If upper is
     354           2 :                         // exclusive we use >= below, else we use >.
     355           2 :                         if i.secondLevel.upper != nil {
     356           2 :                                 cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
     357           2 :                                 if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     358           2 :                                         i.secondLevel.exhaustedBounds = +1
     359           2 :                                 }
     360             :                         }
     361             :                         // Fall through to skipForward.
     362           2 :                         dontSeekWithinSingleLevelIter = true
     363           2 :                         // Clear boundsCmp.
     364           2 :                         //
     365           2 :                         // In the typical cases where dontSeekWithinSingleLevelIter=false,
     366           2 :                         // the singleLevelIterator.SeekGE call will clear boundsCmp.
     367           2 :                         // However, in this case where dontSeekWithinSingleLevelIter=true,
     368           2 :                         // we never seek on the single-level iterator. This call will fall
     369           2 :                         // through to skipForward, which may improperly leave boundsCmp=+1
     370           2 :                         // unless we clear it here.
     371           2 :                         i.secondLevel.boundsCmp = 0
     372             :                 }
     373           2 :         } else {
     374           2 :                 // INVARIANT: err == nil.
     375           2 :                 //
     376           2 :                 // Else fast-path: There are two possible cases, from
     377           2 :                 // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
     378           2 :                 //
     379           2 :                 // 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
     380           2 :                 // respecting the lower bound (guaranteed by Iterator). We know that the
     381           2 :                 // iterator must already be positioned within or just outside the previous
     382           2 :                 // bounds. Therefore, the topLevelIndex iter cannot be positioned at an
     383           2 :                 // entry ahead of the seek position (though it can be positioned behind).
     384           2 :                 // The !i.cmp(key, i.topLevelIndex.Separator) > 0 confirms that it is
     385           2 :                 // not behind. Since it is not ahead and not behind it must be at the
     386           2 :                 // right position.
     387           2 :                 //
     388           2 :                 // 2) This SeekGE will land on a key that is greater than the key we are
     389           2 :                 // currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
     390           2 :                 // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
     391           2 :                 // index block. No need to reset the state of singleLevelIterator.
     392           2 :                 //
     393           2 :                 // Note that cases 1 and 2 never overlap, and one of them must be true.
     394           2 :                 // This invariant checking is important enough that we do not gate it
     395           2 :                 // behind invariants.Enabled.
     396           2 :                 if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
     397           0 :                         panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
     398           0 :                                 i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
     399             :                 }
     400             : 
     401           2 :                 if !flags.TrySeekUsingNext() {
     402           2 :                         // Case 1. Bounds have changed so the previous exhausted bounds state is
     403           2 :                         // irrelevant.
     404           2 :                         // WARNING-data-exhausted: this is safe to do only because the monotonic
     405           2 :                         // bounds optimizations only work when !data-exhausted. If they also
     406           2 :                         // worked with data-exhausted, we have made it unclear whether
     407           2 :                         // data-exhausted is actually true. See the comment at the top of the
     408           2 :                         // file.
     409           2 :                         i.secondLevel.exhaustedBounds = 0
     410           2 :                 }
     411             :                 // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
     412             :                 // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
     413             :                 // bug https://github.com/cockroachdb/pebble/issues/2036.
     414             :         }
     415             : 
     416           2 :         if !dontSeekWithinSingleLevelIter {
     417           2 :                 // Note that while trySeekUsingNext could be false here, singleLevelIterator
     418           2 :                 // could do its own boundsCmp-based optimization to seek using next.
     419           2 :                 if ikv := i.secondLevel.SeekGE(key, flags); ikv != nil {
     420           2 :                         return ikv
     421           2 :                 }
     422             :         }
     423           2 :         return i.skipForward()
     424             : }
     425             : 
     426             : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
     427             : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
     428             : // to the caller to ensure that key is greater than or equal to the lower bound.
     429             : func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
     430             :         prefix, key []byte, flags base.SeekGEFlags,
     431           2 : ) *base.InternalKV {
     432           2 :         if i.secondLevel.vState != nil {
     433           2 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     434           2 :                 // have to internally restrict the bounds.
     435           2 :                 //
     436           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     437           2 :                 // if necessary.
     438           2 :                 if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
     439           2 :                         key = i.secondLevel.lower
     440           2 :                 }
     441             :         }
     442             : 
     443             :         // NOTE: prefix is only used for bloom filter checking and not later work in
     444             :         // this method. Hence, we can use the existing iterator position if the last
     445             :         // SeekPrefixGE did not fail bloom filter matching.
     446             : 
     447           2 :         err := i.secondLevel.err
     448           2 :         i.secondLevel.err = nil // clear cached iteration error
     449           2 : 
     450           2 :         // The twoLevelIterator could be already exhausted. Utilize that when
     451           2 :         // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
     452           2 :         // bounds-exhausted near the top of the file.
     453           2 :         filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
     454           2 :         if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
     455           2 :                 (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
     456           2 :                 err == nil {
     457           1 :                 // Already exhausted, so return nil.
     458           1 :                 return nil
     459           1 :         }
     460             : 
     461             :         // Check prefix bloom filter.
     462           2 :         if i.useFilterBlock {
     463           2 :                 if !i.lastBloomFilterMatched {
     464           2 :                         // Iterator is not positioned based on last seek.
     465           2 :                         flags = flags.DisableTrySeekUsingNext()
     466           2 :                 }
     467           2 :                 i.lastBloomFilterMatched = false
     468           2 :                 var mayContain bool
     469           2 :                 mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
     470           2 :                 if i.secondLevel.err != nil || !mayContain {
     471           2 :                         // In the i.secondLevel.err == nil case, this invalidation may not be necessary for
     472           2 :                         // correctness, and may be a place to optimize later by reusing the
     473           2 :                         // already loaded block. It was necessary in earlier versions of the code
     474           2 :                         // since the caller was allowed to call Next when SeekPrefixGE returned
     475           2 :                         // nil. This is no longer allowed.
     476           2 :                         PD(&i.secondLevel.data).Invalidate()
     477           2 :                         return nil
     478           2 :                 }
     479           2 :                 i.lastBloomFilterMatched = true
     480             :         }
     481             : 
     482             :         // Bloom filter matches.
     483             : 
     484             :         // SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
     485             :         // enabled by trySeekUsingNext, or by monotonically increasing bounds
     486             :         // (i.boundsCmp).
     487             : 
     488             :         // We fall into the slow path if i.index.IsDataInvalidated() even if the
     489             :         // top-level iterator is already positioned correctly and all other
     490             :         // conditions are met. An alternative structure could reuse topLevelIndex's
     491             :         // current position and reload the index block to which it points. Arguably,
     492             :         // an index block load is expensive and the index block may still be earlier
     493             :         // than the index block containing the sought key, resulting in a wasteful
     494             :         // block load.
     495             : 
     496           2 :         var dontSeekWithinSingleLevelIter bool
     497           2 :         if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
     498           2 :                 (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.secondLevel.cmp(key, PI(&i.topLevelIndex).Separator()) > 0 {
     499           2 :                 // Slow-path: need to position the topLevelIndex.
     500           2 : 
     501           2 :                 // The previous exhausted state of singleLevelIterator is no longer
     502           2 :                 // relevant, since we may be moving to a different index block.
     503           2 :                 i.secondLevel.exhaustedBounds = 0
     504           2 :                 flags = flags.DisableTrySeekUsingNext()
     505           2 :                 if !PI(&i.topLevelIndex).SeekGE(key) {
     506           2 :                         PD(&i.secondLevel.data).Invalidate()
     507           2 :                         PI(&i.secondLevel.index).Invalidate()
     508           2 :                         return nil
     509           2 :                 }
     510             : 
     511           2 :                 result := i.loadIndex(+1)
     512           2 :                 if result == loadBlockFailed {
     513           1 :                         i.secondLevel.boundsCmp = 0
     514           1 :                         return nil
     515           1 :                 }
     516           2 :                 if result == loadBlockIrrelevant {
     517           2 :                         // Enforce the upper bound here since don't want to bother moving
     518           2 :                         // to the next entry in the top level index if upper bound is
     519           2 :                         // already exceeded. Note that the next entry starts with keys >=
     520           2 :                         // ikey.InternalKey.UserKey since even though this is the block separator, the
     521           2 :                         // same user key can span multiple index blocks. If upper is
     522           2 :                         // exclusive we use >= below, else we use >.
     523           2 :                         if i.secondLevel.upper != nil {
     524           2 :                                 cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
     525           2 :                                 if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     526           2 :                                         i.secondLevel.exhaustedBounds = +1
     527           2 :                                 }
     528             :                         }
     529             :                         // Fall through to skipForward.
     530           2 :                         dontSeekWithinSingleLevelIter = true
     531           2 :                         // Clear boundsCmp.
     532           2 :                         //
     533           2 :                         // In the typical cases where dontSeekWithinSingleLevelIter=false,
     534           2 :                         // the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
     535           2 :                         // However, in this case where dontSeekWithinSingleLevelIter=true,
     536           2 :                         // we never seek on the single-level iterator. This call will fall
     537           2 :                         // through to skipForward, which may improperly leave boundsCmp=+1
     538           2 :                         // unless we clear it here.
     539           2 :                         i.secondLevel.boundsCmp = 0
     540             :                 }
     541           2 :         } else {
     542           2 :                 // INVARIANT: err == nil.
     543           2 :                 //
     544           2 :                 // Else fast-path: There are two possible cases, from
     545           2 :                 // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
     546           2 :                 //
     547           2 :                 // 1) The bounds have moved forward (i.boundsCmp > 0) and this
     548           2 :                 // SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
     549           2 :                 // know that the iterator must already be positioned within or just
     550           2 :                 // outside the previous bounds. Therefore, the topLevelIndex iter cannot
     551           2 :                 // be positioned at an entry ahead of the seek position (though it can be
     552           2 :                 // positioned behind). The !i.cmp(key, i.topLevelIndex.Separator()) > 0
     553           2 :                 // confirms that it is not behind. Since it is not ahead and not behind it
     554           2 :                 // must be at the right position.
     555           2 :                 //
     556           2 :                 // 2) This SeekPrefixGE will land on a key that is greater than the key we
     557           2 :                 // are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
     558           2 :                 // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
     559           2 :                 // index block. No need to reset the state of singleLevelIterator.
     560           2 :                 //
     561           2 :                 // Note that cases 1 and 2 never overlap, and one of them must be true.
     562           2 :                 // This invariant checking is important enough that we do not gate it
     563           2 :                 // behind invariants.Enabled.
     564           2 :                 if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
     565           0 :                         panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
     566           0 :                                 i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
     567             :                 }
     568             : 
     569           2 :                 if !flags.TrySeekUsingNext() {
     570           1 :                         // Case 1. Bounds have changed so the previous exhausted bounds state is
     571           1 :                         // irrelevant.
     572           1 :                         // WARNING-data-exhausted: this is safe to do only because the monotonic
     573           1 :                         // bounds optimizations only work when !data-exhausted. If they also
     574           1 :                         // worked with data-exhausted, we have made it unclear whether
     575           1 :                         // data-exhausted is actually true. See the comment at the top of the
     576           1 :                         // file.
     577           1 :                         i.secondLevel.exhaustedBounds = 0
     578           1 :                 }
     579             :                 // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
     580             :                 // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
     581             :                 // bug https://github.com/cockroachdb/pebble/issues/2036.
     582             :         }
     583             : 
     584           2 :         if !dontSeekWithinSingleLevelIter {
     585           2 :                 if ikv := i.secondLevel.seekPrefixGE(prefix, key, flags); ikv != nil {
     586           2 :                         return ikv
     587           2 :                 }
     588             :         }
     589             :         // NB: skipForward checks whether exhaustedBounds is already +1.
     590           2 :         return i.skipForward()
     591             : }
     592             : 
     593             : // virtualLast should only be called if i.vReader != nil.
     594           2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
     595           2 :         if i.secondLevel.vState == nil {
     596           0 :                 panic("pebble: invalid call to virtualLast")
     597             :         }
     598           2 :         if !i.secondLevel.endKeyInclusive {
     599           2 :                 // Trivial case.
     600           2 :                 return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
     601           2 :         }
     602           2 :         return i.virtualLastSeekLE()
     603             : }
     604             : 
     605             : // virtualLastSeekLE implements a SeekLE() that can be used as part
     606             : // of reverse-iteration calls such as a Last() on a virtual sstable. Does a
     607             : // SeekLE on the upper bound of the file/iterator.
     608           2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
     609           2 :         // Callers of SeekLE don't know about virtual sstable bounds, so we may
     610           2 :         // have to internally restrict the bounds.
     611           2 :         //
     612           2 :         // TODO(bananabrick): We can optimize this check away for the level iter
     613           2 :         // if necessary.
     614           2 :         if !i.secondLevel.endKeyInclusive {
     615           0 :                 panic("unexpected virtualLastSeekLE with exclusive upper bounds")
     616             :         }
     617           2 :         key := i.secondLevel.upper
     618           2 :         // Need to position the topLevelIndex.
     619           2 :         //
     620           2 :         // The previous exhausted state of singleLevelIterator is no longer
     621           2 :         // relevant, since we may be moving to a different index block.
     622           2 :         i.secondLevel.exhaustedBounds = 0
     623           2 :         // Seek optimization only applies until iterator is first positioned with a
     624           2 :         // SeekGE or SeekLT after SetBounds.
     625           2 :         i.secondLevel.boundsCmp = 0
     626           2 :         topLevelOk := PI(&i.topLevelIndex).SeekGE(key)
     627           2 :         // We can have multiple internal keys with the same user key as the seek
     628           2 :         // key. In that case, we want the last (greatest) internal key.
     629           2 :         for topLevelOk && bytes.Equal(PI(&i.topLevelIndex).Separator(), key) {
     630           2 :                 topLevelOk = PI(&i.topLevelIndex).Next()
     631           2 :         }
     632           2 :         if !topLevelOk {
     633           2 :                 return i.skipBackward()
     634           2 :         }
     635           2 :         result := i.loadIndex(-1)
     636           2 :         if result == loadBlockFailed {
     637           0 :                 i.secondLevel.boundsCmp = 0
     638           0 :                 return nil
     639           0 :         }
     640           2 :         if result == loadBlockIrrelevant {
     641           1 :                 // Load the previous block.
     642           1 :                 return i.skipBackward()
     643           1 :         }
     644           2 :         if ikv := i.secondLevel.virtualLastSeekLE(); ikv != nil {
     645           2 :                 return ikv
     646           2 :         }
     647           2 :         return i.skipBackward()
     648             : }
     649             : 
     650             : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
     651             : // package. Note that SeekLT only checks the lower bound. It is up to the
     652             : // caller to ensure that key is less than the upper bound.
     653             : func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
     654             :         key []byte, flags base.SeekLTFlags,
     655           2 : ) *base.InternalKV {
     656           2 :         if i.secondLevel.vState != nil {
     657           2 :                 // Might have to fix upper bound since virtual sstable bounds are not
     658           2 :                 // known to callers of SeekLT.
     659           2 :                 //
     660           2 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     661           2 :                 // if necessary.
     662           2 :                 cmp := i.secondLevel.cmp(key, i.secondLevel.upper)
     663           2 :                 // key == i.secondLevel.upper is fine. We'll do the right thing and return the
     664           2 :                 // first internal key with user key < key.
     665           2 :                 if cmp > 0 {
     666           2 :                         return i.virtualLast()
     667           2 :                 }
     668             :         }
     669             : 
     670           2 :         i.secondLevel.exhaustedBounds = 0
     671           2 :         i.secondLevel.err = nil // clear cached iteration error
     672           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     673           2 :         i.secondLevel.boundsCmp = 0
     674           2 : 
     675           2 :         var result loadBlockResult
     676           2 :         // NB: Unlike SeekGE, we don't have a fast-path here since we don't know
     677           2 :         // whether the topLevelIndex is positioned after the position that would
     678           2 :         // be returned by doing i.topLevelIndex.SeekGE(). To know this we would
     679           2 :         // need to know the index key preceding the current one.
     680           2 :         // NB: If a bound-limited block property filter is configured, it's
     681           2 :         // externally ensured that the filter is disabled (through returning
     682           2 :         // Intersects=false irrespective of the block props provided) during seeks.
     683           2 :         if !PI(&i.topLevelIndex).SeekGE(key) {
     684           2 :                 if !PI(&i.topLevelIndex).Last() {
     685           0 :                         PD(&i.secondLevel.data).Invalidate()
     686           0 :                         PI(&i.secondLevel.index).Invalidate()
     687           0 :                         return nil
     688           0 :                 }
     689             : 
     690           2 :                 result = i.loadIndex(-1)
     691           2 :                 if result == loadBlockFailed {
     692           1 :                         return nil
     693           1 :                 }
     694           2 :                 if result == loadBlockOK {
     695           2 :                         if ikv := i.secondLevel.lastInternal(); ikv != nil {
     696           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     697           2 :                         }
     698             :                         // Fall through to skipBackward since the singleLevelIterator did
     699             :                         // not have any blocks that satisfy the block interval
     700             :                         // constraints, or the lower bound was reached.
     701             :                 }
     702             :                 // Else loadBlockIrrelevant, so fall through.
     703           2 :         } else {
     704           2 :                 result = i.loadIndex(-1)
     705           2 :                 if result == loadBlockFailed {
     706           1 :                         return nil
     707           1 :                 }
     708           2 :                 if result == loadBlockOK {
     709           2 :                         if ikv := i.secondLevel.SeekLT(key, flags); ikv != nil {
     710           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     711           2 :                         }
     712             :                         // Fall through to skipBackward since the singleLevelIterator did
     713             :                         // not have any blocks that satisfy the block interval
     714             :                         // constraint, or the lower bound was reached.
     715             :                 }
     716             :                 // Else loadBlockIrrelevant, so fall through.
     717             :         }
     718           2 :         if result == loadBlockIrrelevant {
     719           2 :                 // Enforce the lower bound here since don't want to bother moving to
     720           2 :                 // the previous entry in the top level index if lower bound is already
     721           2 :                 // exceeded. Note that the previous entry starts with keys <=
     722           2 :                 // ikey.InternalKey.UserKey since even though this is the current block's
     723           2 :                 // separator, the same user key can span multiple index blocks.
     724           2 :                 if i.secondLevel.lower != nil && i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.lower) < 0 {
     725           1 :                         i.secondLevel.exhaustedBounds = -1
     726           1 :                 }
     727             :         }
     728             :         // NB: skipBackward checks whether exhaustedBounds is already -1.
     729           2 :         return i.skipBackward()
     730             : }
     731             : 
     732             : // First implements internalIterator.First, as documented in the pebble
     733             : // package. Note that First only checks the upper bound. It is up to the caller
     734             : // to ensure that key is greater than or equal to the lower bound (e.g. via a
     735             : // call to SeekGE(lower)).
     736           2 : func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
     737           2 :         // If we have a lower bound, use SeekGE. Note that in general this is not
     738           2 :         // supported usage, except when the lower bound is there because the table is
     739           2 :         // virtual.
     740           2 :         if i.secondLevel.lower != nil {
     741           2 :                 return i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
     742           2 :         }
     743           2 :         i.secondLevel.exhaustedBounds = 0
     744           2 :         i.secondLevel.err = nil // clear cached iteration error
     745           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     746           2 :         i.secondLevel.boundsCmp = 0
     747           2 : 
     748           2 :         if !PI(&i.topLevelIndex).First() {
     749           0 :                 return nil
     750           0 :         }
     751           2 :         result := i.loadIndex(+1)
     752           2 :         if result == loadBlockFailed {
     753           1 :                 return nil
     754           1 :         }
     755           2 :         if result == loadBlockOK {
     756           2 :                 if ikv := i.secondLevel.First(); ikv != nil {
     757           2 :                         return ikv
     758           2 :                 }
     759             :                 // Else fall through to skipForward.
     760           2 :         } else {
     761           2 :                 // result == loadBlockIrrelevant. Enforce the upper bound here since
     762           2 :                 // don't want to bother moving to the next entry in the top level
     763           2 :                 // index if upper bound is already exceeded. Note that the next entry
     764           2 :                 // starts with keys >= ikv.InternalKey.UserKey since even though this is the
     765           2 :                 // block separator, the same user key can span multiple index blocks.
     766           2 :                 // If upper is exclusive we use >= below, else we use >.
     767           2 :                 if i.secondLevel.upper != nil {
     768           1 :                         cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
     769           1 :                         if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     770           1 :                                 i.secondLevel.exhaustedBounds = +1
     771           1 :                         }
     772             :                 }
     773             :         }
     774             :         // NB: skipForward checks whether exhaustedBounds is already +1.
     775           2 :         return i.skipForward()
     776             : }
     777             : 
     778             : // Last implements internalIterator.Last, as documented in the pebble
     779             : // package. Note that Last only checks the lower bound. It is up to the caller
     780             : // to ensure that key is less than the upper bound (e.g. via a call to
     781             : // SeekLT(upper))
     782           2 : func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
     783           2 :         if i.secondLevel.vState != nil {
     784           2 :                 if i.secondLevel.endKeyInclusive {
     785           2 :                         return i.virtualLast()
     786           2 :                 }
     787           2 :                 return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
     788             :         }
     789             : 
     790           2 :         if i.secondLevel.upper != nil {
     791           0 :                 panic("twoLevelIterator.Last() used despite upper bound")
     792             :         }
     793           2 :         i.secondLevel.exhaustedBounds = 0
     794           2 :         i.secondLevel.err = nil // clear cached iteration error
     795           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     796           2 :         i.secondLevel.boundsCmp = 0
     797           2 : 
     798           2 :         if !PI(&i.topLevelIndex).Last() {
     799           0 :                 return nil
     800           0 :         }
     801           2 :         result := i.loadIndex(-1)
     802           2 :         if result == loadBlockFailed {
     803           1 :                 return nil
     804           1 :         }
     805           2 :         if result == loadBlockOK {
     806           2 :                 if ikv := i.secondLevel.Last(); ikv != nil {
     807           2 :                         return ikv
     808           2 :                 }
     809             :                 // Else fall through to skipBackward.
     810           2 :         } else {
     811           2 :                 // result == loadBlockIrrelevant. Enforce the lower bound here since
     812           2 :                 // don't want to bother moving to the previous entry in the top level
     813           2 :                 // index if lower bound is already exceeded. Note that the previous
     814           2 :                 // entry starts with keys <= ikv.InternalKey.UserKey since even though
     815           2 :                 // this is the current block's separator, the same user key can span
     816           2 :                 // multiple index blocks.
     817           2 :                 if i.secondLevel.lower != nil && i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.lower) < 0 {
     818           1 :                         i.secondLevel.exhaustedBounds = -1
     819           1 :                 }
     820             :         }
     821             :         // NB: skipBackward checks whether exhaustedBounds is already -1.
     822           2 :         return i.skipBackward()
     823             : }
     824             : 
     825             : // Next implements internalIterator.Next, as documented in the pebble
     826             : // package.
     827             : // Note: twoLevelCompactionIterator.Next mirrors the implementation of
     828             : // twoLevelIterator.Next due to performance. Keep the two in sync.
     829           2 : func (i *twoLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
     830           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     831           2 :         i.secondLevel.boundsCmp = 0
     832           2 :         if i.secondLevel.err != nil {
     833           1 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
     834           1 :                 // encountered, the iterator must be re-seeked.
     835           1 :                 return nil
     836           1 :         }
     837           2 :         if ikv := i.secondLevel.Next(); ikv != nil {
     838           2 :                 return ikv
     839           2 :         }
     840           2 :         return i.skipForward()
     841             : }
     842             : 
     843             : // NextPrefix implements (base.InternalIterator).NextPrefix.
     844           2 : func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
     845           2 :         if i.secondLevel.exhaustedBounds == +1 {
     846           0 :                 panic("Next called even though exhausted upper bound")
     847             :         }
     848             :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     849           2 :         i.secondLevel.boundsCmp = 0
     850           2 :         if i.secondLevel.err != nil {
     851           0 :                 // TODO(jackson): Can this case be turned into a panic? Once an error is
     852           0 :                 // encountered, the iterator must be re-seeked.
     853           0 :                 return nil
     854           0 :         }
     855           2 :         if ikv := i.secondLevel.NextPrefix(succKey); ikv != nil {
     856           2 :                 return ikv
     857           2 :         }
     858             :         // ikv == nil
     859           2 :         if i.secondLevel.err != nil {
     860           1 :                 return nil
     861           1 :         }
     862             : 
     863             :         // Did not find prefix in the existing second-level index block. This is the
     864             :         // slow-path where we seek the iterator.
     865           2 :         if !PI(&i.topLevelIndex).SeekGE(succKey) {
     866           0 :                 PD(&i.secondLevel.data).Invalidate()
     867           0 :                 PI(&i.secondLevel.index).Invalidate()
     868           0 :                 return nil
     869           0 :         }
     870           2 :         result := i.loadIndex(+1)
     871           2 :         if result == loadBlockFailed {
     872           1 :                 return nil
     873           1 :         }
     874           2 :         if result == loadBlockIrrelevant {
     875           2 :                 // Enforce the upper bound here since don't want to bother moving to the
     876           2 :                 // next entry in the top level index if upper bound is already exceeded.
     877           2 :                 // Note that the next entry starts with keys >= ikv.InternalKey.UserKey
     878           2 :                 // since even though this is the block separator, the same user key can
     879           2 :                 // span multiple index blocks. If upper is exclusive we use >= below,
     880           2 :                 // else we use >.
     881           2 :                 if i.secondLevel.upper != nil {
     882           2 :                         cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
     883           2 :                         if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     884           0 :                                 i.secondLevel.exhaustedBounds = +1
     885           0 :                         }
     886             :                 }
     887           2 :         } else if kv := i.secondLevel.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
     888           2 :                 return i.secondLevel.maybeVerifyKey(kv)
     889           2 :         }
     890           2 :         return i.skipForward()
     891             : }
     892             : 
     893             : // Prev implements internalIterator.Prev, as documented in the pebble
     894             : // package.
     895           2 : func (i *twoLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
     896           2 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     897           2 :         i.secondLevel.boundsCmp = 0
     898           2 :         if i.secondLevel.err != nil {
     899           1 :                 return nil
     900           1 :         }
     901           2 :         if kv := i.secondLevel.Prev(); kv != nil {
     902           2 :                 return kv
     903           2 :         }
     904           2 :         return i.skipBackward()
     905             : }
     906             : 
     907           2 : func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
     908           2 :         for {
     909           2 :                 if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds > 0 {
     910           2 :                         return nil
     911           2 :                 }
     912             : 
     913             :                 // It is possible that skipBackward went too far and the virtual table lower
     914             :                 // bound is after the first key in the block we are about to load, in which
     915             :                 // case we must use SeekGE below. The keys in the block we are about to load
     916             :                 // start right after the topLevelIndex key (before we Next).
     917             :                 //
     918             :                 // An example of how this can happen:
     919             :                 //
     920             :                 //   Second-level index block 1 - contains keys a@1, c@1
     921             :                 //   Second-level index block 2 - contains keys e@1, g@1
     922             :                 //   Second-level index block 3 - contains keys i@2, k@2
     923             :                 //
     924             :                 //   The virtual table lower bound is f. We have a range key masking filter
     925             :                 //   that filters keys with @1 suffix. We are positioned inside block 3 then
     926             :                 //   we Prev(). Block 2 is entirely filtered out, which makes us move to
     927             :                 //   block 1. Now the range key masking filter gets an update (via
     928             :                 //   SpanChanged) and it no longer filters out any keys. At this point if a
     929             :                 //   Next happens, we will load block 2 but it would not be legal to return
     930             :                 //   "e@1" which is outside the virtual bounds.
     931             :                 //
     932             :                 //   The core of the problem is that skipBackward doesn't know it can stop
     933             :                 //   at block 2, because it doesn't know what keys are at the start of that
     934             :                 //   block. This is why we don't have this problem in the opposite
     935             :                 //   direction: skipForward will never go beyond the last relevant block
     936             :                 //   because it looks at the separator key which is an upper bound for the
     937             :                 //   block.
     938             :                 //
     939             :                 // Note that this is only a problem with virtual tables; we make no
     940             :                 // guarantees wrt an iterator lower bound when we iterate forward. But we
     941             :                 // must never return keys that are not inside the virtual table.
     942           2 :                 useSeek := i.secondLevel.vState != nil && (!PI(&i.topLevelIndex).Valid() ||
     943           2 :                         i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.vState.lower.UserKey) < 0)
     944           2 : 
     945           2 :                 i.secondLevel.exhaustedBounds = 0
     946           2 :                 if !PI(&i.topLevelIndex).Next() {
     947           2 :                         PD(&i.secondLevel.data).Invalidate()
     948           2 :                         PI(&i.secondLevel.index).Invalidate()
     949           2 :                         return nil
     950           2 :                 }
     951           2 :                 result := i.loadIndex(+1)
     952           2 :                 if result == loadBlockFailed {
     953           1 :                         return nil
     954           1 :                 }
     955           2 :                 if result == loadBlockOK {
     956           2 :                         var ikv *base.InternalKV
     957           2 :                         if useSeek {
     958           2 :                                 ikv = i.secondLevel.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
     959           2 :                         } else {
     960           2 :                                 ikv = i.secondLevel.firstInternal()
     961           2 :                         }
     962           2 :                         if ikv != nil {
     963           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
     964           2 :                         }
     965             :                         // Next iteration will return if singleLevelIterator set
     966             :                         // exhaustedBounds = +1.
     967           2 :                 } else {
     968           2 :                         // result == loadBlockIrrelevant. Enforce the upper bound here since
     969           2 :                         // don't want to bother moving to the next entry in the top level
     970           2 :                         // index if upper bound is already exceeded. Note that the next
     971           2 :                         // entry starts with keys >= i.topLevelIndex.Separator() since even
     972           2 :                         // though this is the block separator, the same user key can span
     973           2 :                         // multiple index blocks. If upper is exclusive we use >= below,
     974           2 :                         // else we use >.
     975           2 :                         if i.secondLevel.upper != nil {
     976           2 :                                 cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
     977           2 :                                 if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
     978           2 :                                         i.secondLevel.exhaustedBounds = +1
     979           2 :                                         // Next iteration will return.
     980           2 :                                 }
     981             :                         }
     982             :                 }
     983             :         }
     984             : }
     985             : 
     986           2 : func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
     987           2 :         for {
     988           2 :                 if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
     989           2 :                         return nil
     990           2 :                 }
     991           2 :                 i.secondLevel.exhaustedBounds = 0
     992           2 :                 if !PI(&i.topLevelIndex).Prev() {
     993           2 :                         PD(&i.secondLevel.data).Invalidate()
     994           2 :                         PI(&i.secondLevel.index).Invalidate()
     995           2 :                         return nil
     996           2 :                 }
     997           2 :                 result := i.loadIndex(-1)
     998           2 :                 if result == loadBlockFailed {
     999           1 :                         return nil
    1000           1 :                 }
    1001           2 :                 if result == loadBlockOK {
    1002           2 :                         ikv := i.secondLevel.lastInternal()
    1003           2 :                         if ikv != nil {
    1004           2 :                                 return i.secondLevel.maybeVerifyKey(ikv)
    1005           2 :                         }
    1006             : 
    1007             :                         // Next iteration will return if singleLevelIterator set
    1008             :                         // exhaustedBounds = -1.
    1009           2 :                 } else {
    1010           2 :                         // result == loadBlockIrrelevant. Enforce the lower bound here since
    1011           2 :                         // don't want to bother moving to the previous entry in the top
    1012           2 :                         // level index if lower bound is already exceeded. Note that the
    1013           2 :                         // previous entry starts with keys <= i.topLevelIndex.Separator() since
    1014           2 :                         // even though this is the current block's separator, the same user
    1015           2 :                         // key can span multiple index blocks.
    1016           2 :                         if i.secondLevel.lower != nil && i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.lower) < 0 {
    1017           2 :                                 i.secondLevel.exhaustedBounds = -1
    1018           2 :                                 // Next iteration will return.
    1019           2 :                         }
    1020             :                 }
    1021             :         }
    1022             : }
    1023             : 
    1024           2 : func (i *twoLevelIterator[I, PI, D, PD]) Error() error {
    1025           2 :         return i.secondLevel.Error()
    1026           2 : }
    1027             : 
    1028           2 : func (i *twoLevelIterator[I, PI, D, PD]) SetBounds(lower, upper []byte) {
    1029           2 :         i.secondLevel.SetBounds(lower, upper)
    1030           2 : }
    1031             : 
    1032           0 : func (i *twoLevelIterator[I, PI, D, PD]) SetContext(ctx context.Context) {
    1033           0 :         i.secondLevel.SetContext(ctx)
    1034           0 : }
    1035             : 
    1036           2 : func (i *twoLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
    1037           2 :         i.secondLevel.SetCloseHook(fn)
    1038           2 : }
    1039             : 
    1040           2 : func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
    1041           2 :         i.secondLevel.SetupForCompaction()
    1042           2 : }
    1043             : 
    1044             : // Close implements internalIterator.Close, as documented in the pebble
    1045             : // package.
    1046           2 : func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
    1047           2 :         if invariants.Enabled && i.secondLevel.pool != nil {
    1048           0 :                 panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
    1049             :         }
    1050           2 :         pool := i.pool
    1051           2 :         err := i.secondLevel.closeInternal()
    1052           2 :         err = firstError(err, PI(&i.topLevelIndex).Close())
    1053           2 :         *i = twoLevelIterator[I, PI, D, PD]{
    1054           2 :                 secondLevel:   i.secondLevel.resetForReuse(),
    1055           2 :                 topLevelIndex: PI(&i.topLevelIndex).ResetForReuse(),
    1056           2 :                 pool:          pool,
    1057           2 :         }
    1058           2 :         if pool != nil {
    1059           2 :                 pool.Put(i)
    1060           2 :         }
    1061           2 :         return err
    1062             : }

Generated by: LCOV version 1.14