LCOV - code coverage report
Current view: top level - pebble/sstable - reader_iter_single_lvl.go (source / functions) Hit Total Coverage
Test: 2023-10-21 08:16Z babd592d - tests only.lcov Lines: 796 900 88.4 %
Date: 2023-10-21 08:17:27 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "context"
       9             :         "fmt"
      10             :         "unsafe"
      11             : 
      12             :         "github.com/cockroachdb/pebble/internal/base"
      13             :         "github.com/cockroachdb/pebble/internal/invariants"
      14             :         "github.com/cockroachdb/pebble/objstorage"
      15             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      16             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
      17             : )
      18             : 
      19             : // singleLevelIterator iterates over an entire table of data. To seek for a given
      20             : // key, it first looks in the index for the block that contains that key, and then
      21             : // looks inside that block.
      22             : type singleLevelIterator struct {
      23             :         ctx context.Context
      24             :         cmp Compare
      25             :         // Global lower/upper bound for the iterator.
      26             :         lower []byte
      27             :         upper []byte
      28             :         bpfs  *BlockPropertiesFilterer
      29             :         // Per-block lower/upper bound. Nil if the bound does not apply to the block
      30             :         // because we determined the block lies completely within the bound.
      31             :         blockLower []byte
      32             :         blockUpper []byte
      33             :         reader     *Reader
      34             :         // vState will be set iff the iterator is constructed for virtual sstable
      35             :         // iteration.
      36             :         vState *virtualState
      37             :         // endKeyInclusive is set to force the iterator to treat the upper field as
      38             :         // inclusive while iterating instead of exclusive.
      39             :         endKeyInclusive bool
      40             :         index           blockIter
      41             :         data            blockIter
      42             :         dataRH          objstorage.ReadHandle
      43             :         dataRHPrealloc  objstorageprovider.PreallocatedReadHandle
      44             :         // dataBH refers to the last data block that the iterator considered
      45             :         // loading. It may not actually have loaded the block, due to an error or
      46             :         // because it was considered irrelevant.
      47             :         dataBH   BlockHandle
      48             :         vbReader *valueBlockReader
      49             :         // vbRH is the read handle for value blocks, which are in a different
      50             :         // part of the sstable than data blocks.
      51             :         vbRH         objstorage.ReadHandle
      52             :         vbRHPrealloc objstorageprovider.PreallocatedReadHandle
      53             :         err          error
      54             :         closeHook    func(i Iterator) error
      55             :         stats        *base.InternalIteratorStats
      56             :         bufferPool   *BufferPool
      57             : 
      58             :         // boundsCmp and positionedUsingLatestBounds are for optimizing iteration
      59             :         // that uses multiple adjacent bounds. The seek after setting a new bound
      60             :         // can use the fact that the iterator is either within the previous bounds
      61             :         // or exactly one key before or after the bounds. If the new bounds is
      62             :         // after/before the previous bounds, and we are already positioned at a
      63             :         // block that is relevant for the new bounds, we can try to first position
      64             :         // using Next/Prev (repeatedly) instead of doing a more expensive seek.
      65             :         //
      66             :         // When there are wide files at higher levels that match the bounds
      67             :         // but don't have any data for the bound, we will already be
      68             :         // positioned at the key beyond the bounds and won't need to do much
      69             :         // work -- given that most data is in L6, such files are likely to
      70             :         // dominate the performance of the mergingIter, and may be the main
      71             :         // benefit of this performance optimization (of course it also helps
      72             :         // when the file that has the data has successive seeks that stay in
      73             :         // the same block).
      74             :         //
      75             :         // Specifically, boundsCmp captures the relationship between the previous
      76             :         // and current bounds, if the iterator had been positioned after setting
      77             :         // the previous bounds. If it was not positioned, i.e., Seek/First/Last
      78             :         // were not called, we don't know where it is positioned and cannot
      79             :         // optimize.
      80             :         //
      81             :         // Example: Bounds moving forward, and iterator exhausted in forward direction.
      82             :         //      bounds = [f, h), ^ shows block iterator position
      83             :         //  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
      84             :         //                                       ^
      85             :         //  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
      86             :         //  set to +1. SeekGE(j) can use next (the optimization also requires that j
      87             :         //  is within the block, but that is not for correctness, but to limit the
      88             :         //  optimization to when it will actually be an optimization).
      89             :         //
      90             :         // Example: Bounds moving forward.
      91             :         //      bounds = [f, h), ^ shows block iterator position
      92             :         //  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
      93             :         //                                 ^
      94             :         //  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
      95             :         //  set to +1. SeekGE(j) can use next.
      96             :         //
      97             :         // Example: Bounds moving forward, but iterator not positioned using previous
      98             :         //  bounds.
      99             :         //      bounds = [f, h), ^ shows block iterator position
     100             :         //  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
     101             :         //                                             ^
     102             :         //  new bounds = [i, j). Iterator is at j since it was never positioned using
     103             :         //  [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
     104             :         //  SeekGE(i) will not use next.
     105             :         //
     106             :         // Example: Bounds moving forward and sparse file
     107             :         //      bounds = [f, h), ^ shows block iterator position
     108             :         //  file contents [ a z ]
     109             :         //                    ^
     110             :         //  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
     111             :         //  set to +1. SeekGE(j) notices that the iterator is already past j and does
     112             :         //  not need to do anything.
     113             :         //
     114             :         // Similar examples can be constructed for backward iteration.
     115             :         //
     116             :         // This notion of exactly one key before or after the bounds is not quite
     117             :         // true when block properties are used to ignore blocks. In that case we
     118             :         // can't stop precisely at the first block that is past the bounds since
     119             :         // we are using the index entries to enforce the bounds.
     120             :         //
     121             :         // e.g. 3 blocks with keys [b, c]  [f, g], [i, j, k] with index entries d,
     122             :         // h, l. And let the lower bound be k, and we are reverse iterating. If
     123             :         // the block [i, j, k] is ignored due to the block interval annotations we
     124             :         // do need to move the index to block [f, g] since the index entry for the
     125             :         // [i, j, k] block is l which is not less than the lower bound of k. So we
     126             :         // have passed the entries i, j.
     127             :         //
     128             :         // This behavior is harmless since the block property filters are fixed
     129             :         // for the lifetime of the iterator so i, j are irrelevant. In addition,
     130             :         // the current code will not load the [f, g] block, so the seek
     131             :         // optimization that attempts to use Next/Prev do not apply anyway.
     132             :         boundsCmp                   int
     133             :         positionedUsingLatestBounds bool
     134             : 
     135             :         // exhaustedBounds represents whether the iterator is exhausted for
     136             :         // iteration by reaching the upper or lower bound. +1 when exhausted
     137             :         // the upper bound, -1 when exhausted the lower bound, and 0 when
     138             :         // neither. exhaustedBounds is also used for the TrySeekUsingNext
     139             :         // optimization in twoLevelIterator and singleLevelIterator. Care should be
     140             :         // taken in setting this in twoLevelIterator before calling into
     141             :         // singleLevelIterator, given that these two iterators share this field.
     142             :         exhaustedBounds int8
     143             : 
     144             :         // maybeFilteredKeysSingleLevel indicates whether the last iterator
     145             :         // positioning operation may have skipped any data blocks due to
     146             :         // block-property filters when positioning the index.
     147             :         maybeFilteredKeysSingleLevel bool
     148             : 
     149             :         // useFilter specifies whether the filter block in this sstable, if present,
     150             :         // should be used for prefix seeks or not. In some cases it is beneficial
     151             :         // to skip a filter block even if it exists (eg. if probability of a match
     152             :         // is high).
     153             :         useFilter              bool
     154             :         lastBloomFilterMatched bool
     155             : 
     156             :         hideObsoletePoints bool
     157             : }
     158             : 
     159             : // singleLevelIterator implements the base.InternalIterator interface.
     160             : var _ base.InternalIterator = (*singleLevelIterator)(nil)
     161             : 
     162             : // init initializes a singleLevelIterator for reading from the table. It is
     163             : // synonmous with Reader.NewIter, but allows for reusing of the iterator
     164             : // between different Readers.
     165             : //
     166             : // Note that lower, upper passed into init has nothing to do with virtual sstable
     167             : // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
     168             : // will be enforced.
     169             : func (i *singleLevelIterator) init(
     170             :         ctx context.Context,
     171             :         r *Reader,
     172             :         v *virtualState,
     173             :         lower, upper []byte,
     174             :         filterer *BlockPropertiesFilterer,
     175             :         useFilter, hideObsoletePoints bool,
     176             :         stats *base.InternalIteratorStats,
     177             :         rp ReaderProvider,
     178             :         bufferPool *BufferPool,
     179           1 : ) error {
     180           1 :         if r.err != nil {
     181           0 :                 return r.err
     182           0 :         }
     183           1 :         indexH, err := r.readIndex(ctx, stats)
     184           1 :         if err != nil {
     185           1 :                 return err
     186           1 :         }
     187           1 :         if v != nil {
     188           1 :                 i.vState = v
     189           1 :                 i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
     190           1 :         }
     191             : 
     192           1 :         i.ctx = ctx
     193           1 :         i.lower = lower
     194           1 :         i.upper = upper
     195           1 :         i.bpfs = filterer
     196           1 :         i.useFilter = useFilter
     197           1 :         i.reader = r
     198           1 :         i.cmp = r.Compare
     199           1 :         i.stats = stats
     200           1 :         i.hideObsoletePoints = hideObsoletePoints
     201           1 :         i.bufferPool = bufferPool
     202           1 :         err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
     203           1 :         if err != nil {
     204           0 :                 // blockIter.Close releases indexH and always returns a nil error
     205           0 :                 _ = i.index.Close()
     206           0 :                 return err
     207           0 :         }
     208           1 :         i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
     209           1 :         if r.tableFormat >= TableFormatPebblev3 {
     210           1 :                 if r.Properties.NumValueBlocks > 0 {
     211           1 :                         // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
     212           1 :                         // can outlive the singleLevelIterator due to be being embedded in a
     213           1 :                         // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
     214           1 :                         // should only optimize this if it shows up as significant in end-to-end
     215           1 :                         // CockroachDB benchmarks, since it is tricky to do so. One possibility
     216           1 :                         // is that if many sstable iterators only get positioned at latest
     217           1 :                         // versions of keys, and therefore never expose a LazyValue that is
     218           1 :                         // separated to their callers, they can put this valueBlockReader into a
     219           1 :                         // sync.Pool.
     220           1 :                         i.vbReader = &valueBlockReader{
     221           1 :                                 ctx:    ctx,
     222           1 :                                 bpOpen: i,
     223           1 :                                 rp:     rp,
     224           1 :                                 vbih:   r.valueBIH,
     225           1 :                                 stats:  stats,
     226           1 :                         }
     227           1 :                         i.data.lazyValueHandling.vbr = i.vbReader
     228           1 :                         i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
     229           1 :                 }
     230           1 :                 i.data.lazyValueHandling.hasValuePrefix = true
     231             :         }
     232           1 :         return nil
     233             : }
     234             : 
     235             : // Helper function to check if keys returned from iterator are within global and virtual bounds.
     236             : func (i *singleLevelIterator) maybeVerifyKey(
     237             :         iKey *InternalKey, val base.LazyValue,
     238           1 : ) (*InternalKey, base.LazyValue) {
     239           1 :         // maybeVerify key is only used for virtual sstable iterators.
     240           1 :         if invariants.Enabled && i.vState != nil && iKey != nil {
     241           1 :                 key := iKey.UserKey
     242           1 : 
     243           1 :                 uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
     244           1 :                 lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
     245           1 : 
     246           1 :                 if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
     247           0 :                         panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
     248             :                 }
     249             :         }
     250           1 :         return iKey, val
     251             : }
     252             : 
     253             : // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
     254             : // Currently, it skips readahead ramp-up. It should be called after init is called.
     255           1 : func (i *singleLevelIterator) setupForCompaction() {
     256           1 :         i.dataRH.SetupForCompaction()
     257           1 :         if i.vbRH != nil {
     258           1 :                 i.vbRH.SetupForCompaction()
     259           1 :         }
     260             : }
     261             : 
     262           1 : func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
     263           1 :         return singleLevelIterator{
     264           1 :                 index: i.index.resetForReuse(),
     265           1 :                 data:  i.data.resetForReuse(),
     266           1 :         }
     267           1 : }
     268             : 
     269           1 : func (i *singleLevelIterator) initBounds() {
     270           1 :         // Trim the iteration bounds for the current block. We don't have to check
     271           1 :         // the bounds on each iteration if the block is entirely contained within the
     272           1 :         // iteration bounds.
     273           1 :         i.blockLower = i.lower
     274           1 :         if i.blockLower != nil {
     275           1 :                 key, _ := i.data.First()
     276           1 :                 if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
     277           1 :                         // The lower-bound is less than the first key in the block. No need
     278           1 :                         // to check the lower-bound again for this block.
     279           1 :                         i.blockLower = nil
     280           1 :                 }
     281             :         }
     282           1 :         i.blockUpper = i.upper
     283           1 :         if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
     284           1 :                 // The upper-bound is greater than the index key which itself is greater
     285           1 :                 // than or equal to every key in the block. No need to check the
     286           1 :                 // upper-bound again for this block. Even if blockUpper is inclusive
     287           1 :                 // because of upper being inclusive, we can still safely set blockUpper
     288           1 :                 // to nil here.
     289           1 :                 //
     290           1 :                 // TODO(bananabrick): We could also set blockUpper to nil for the >=
     291           1 :                 // case, if blockUpper is inclusive.
     292           1 :                 i.blockUpper = nil
     293           1 :         }
     294             : }
     295             : 
     296             : // Deterministic disabling of the bounds-based optimization that avoids seeking.
     297             : // Uses the iterator pointer, since we want diversity in iterator behavior for
     298             : // the same SetBounds call. Used for tests.
     299           1 : func disableBoundsOpt(bound []byte, ptr uintptr) bool {
     300           1 :         // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
     301           1 :         simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
     302           1 :         return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
     303           1 : }
     304             : 
     305             : // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
     306             : // optimizations performed by disableBoundsOpt for tests that require
     307             : // deterministic iterator behavior. Some unit tests examine internal iterator
     308             : // state and require this behavior to be deterministic.
     309             : var ensureBoundsOptDeterminism bool
     310             : 
     311             : // SetBounds implements internalIterator.SetBounds, as documented in the pebble
     312             : // package. Note that the upper field is exclusive.
     313           1 : func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
     314           1 :         i.boundsCmp = 0
     315           1 :         if i.vState != nil {
     316           1 :                 // If the reader is constructed for a virtual sstable, then we must
     317           1 :                 // constrain the bounds of the reader. For physical sstables, the bounds
     318           1 :                 // can be wider than the actual sstable's bounds because we won't
     319           1 :                 // accidentally expose additional keys as there are no additional keys.
     320           1 :                 i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
     321           1 :                         lower, upper, false,
     322           1 :                 )
     323           1 :         } else {
     324           1 :                 // TODO(bananabrick): Figure out the logic here to enable the boundsCmp
     325           1 :                 // optimization for virtual sstables.
     326           1 :                 if i.positionedUsingLatestBounds {
     327           1 :                         if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
     328           1 :                                 i.boundsCmp = +1
     329           1 :                                 if invariants.Enabled && !ensureBoundsOptDeterminism &&
     330           1 :                                         disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
     331           1 :                                         i.boundsCmp = 0
     332           1 :                                 }
     333           1 :                         } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
     334           1 :                                 i.boundsCmp = -1
     335           1 :                                 if invariants.Enabled && !ensureBoundsOptDeterminism &&
     336           1 :                                         disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
     337           1 :                                         i.boundsCmp = 0
     338           1 :                                 }
     339             :                         }
     340             :                 }
     341             :         }
     342             : 
     343           1 :         i.positionedUsingLatestBounds = false
     344           1 :         i.lower = lower
     345           1 :         i.upper = upper
     346           1 :         i.blockLower = nil
     347           1 :         i.blockUpper = nil
     348             : }
     349             : 
     350             : // loadBlock loads the block at the current index position and leaves i.data
     351             : // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
     352             : // may be nil if we have simply exhausted the entire table.
     353           1 : func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
     354           1 :         if !i.index.valid() {
     355           0 :                 // Ensure the data block iterator is invalidated even if loading of the block
     356           0 :                 // fails.
     357           0 :                 i.data.invalidate()
     358           0 :                 return loadBlockFailed
     359           0 :         }
     360             :         // Load the next block.
     361           1 :         v := i.index.value()
     362           1 :         bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
     363           1 :         if i.dataBH == bhp.BlockHandle && i.data.valid() {
     364           1 :                 // We're already at the data block we want to load. Reset bounds in case
     365           1 :                 // they changed since the last seek, but don't reload the block from cache
     366           1 :                 // or disk.
     367           1 :                 //
     368           1 :                 // It's safe to leave i.data in its original state here, as all callers to
     369           1 :                 // loadBlock make an absolute positioning call (i.e. a seek, first, or last)
     370           1 :                 // to `i.data` right after loadBlock returns loadBlockOK.
     371           1 :                 i.initBounds()
     372           1 :                 return loadBlockOK
     373           1 :         }
     374             :         // Ensure the data block iterator is invalidated even if loading of the block
     375             :         // fails.
     376           1 :         i.data.invalidate()
     377           1 :         i.dataBH = bhp.BlockHandle
     378           1 :         if err != nil {
     379           0 :                 i.err = errCorruptIndexEntry
     380           0 :                 return loadBlockFailed
     381           0 :         }
     382           1 :         if i.bpfs != nil {
     383           1 :                 intersects, err := i.bpfs.intersects(bhp.Props)
     384           1 :                 if err != nil {
     385           0 :                         i.err = errCorruptIndexEntry
     386           0 :                         return loadBlockFailed
     387           0 :                 }
     388           1 :                 if intersects == blockMaybeExcluded {
     389           1 :                         intersects = i.resolveMaybeExcluded(dir)
     390           1 :                 }
     391           1 :                 if intersects == blockExcluded {
     392           1 :                         i.maybeFilteredKeysSingleLevel = true
     393           1 :                         return loadBlockIrrelevant
     394           1 :                 }
     395             :                 // blockIntersects
     396             :         }
     397           1 :         ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
     398           1 :         block, err := i.reader.readBlock(ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, i.bufferPool)
     399           1 :         if err != nil {
     400           1 :                 i.err = err
     401           1 :                 return loadBlockFailed
     402           1 :         }
     403           1 :         i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
     404           1 :         if i.err != nil {
     405           0 :                 // The block is partially loaded, and we don't want it to appear valid.
     406           0 :                 i.data.invalidate()
     407           0 :                 return loadBlockFailed
     408           0 :         }
     409           1 :         i.initBounds()
     410           1 :         return loadBlockOK
     411             : }
     412             : 
     413             : // readBlockForVBR implements the blockProviderWhenOpen interface for use by
     414             : // the valueBlockReader.
     415             : func (i *singleLevelIterator) readBlockForVBR(
     416             :         ctx context.Context, h BlockHandle, stats *base.InternalIteratorStats,
     417           1 : ) (bufferHandle, error) {
     418           1 :         ctx = objiotracing.WithBlockType(ctx, objiotracing.ValueBlock)
     419           1 :         return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, i.bufferPool)
     420           1 : }
     421             : 
     422             : // resolveMaybeExcluded is invoked when the block-property filterer has found
     423             : // that a block is excluded according to its properties but only if its bounds
     424             : // fall within the filter's current bounds.  This function consults the
     425             : // apprioriate bound, depending on the iteration direction, and returns either
     426             : // `blockIntersects` or `blockMaybeExcluded`.
     427           1 : func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
     428           1 :         // TODO(jackson): We could first try comparing to top-level index block's
     429           1 :         // key, and if within bounds avoid per-data block key comparisons.
     430           1 : 
     431           1 :         // This iterator is configured with a bound-limited block property
     432           1 :         // filter. The bpf determined this block could be excluded from
     433           1 :         // iteration based on the property encoded in the block handle.
     434           1 :         // However, we still need to determine if the block is wholly
     435           1 :         // contained within the filter's key bounds.
     436           1 :         //
     437           1 :         // External guarantees ensure all the block's keys are ≥ the
     438           1 :         // filter's lower bound during forward iteration, and that all the
     439           1 :         // block's keys are < the filter's upper bound during backward
     440           1 :         // iteration. We only need to determine if the opposite bound is
     441           1 :         // also met.
     442           1 :         //
     443           1 :         // The index separator in index.Key() provides an inclusive
     444           1 :         // upper-bound for the data block's keys, guaranteeing that all its
     445           1 :         // keys are ≤ index.Key(). For forward iteration, this is all we
     446           1 :         // need.
     447           1 :         if dir > 0 {
     448           1 :                 // Forward iteration.
     449           1 :                 if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
     450           1 :                         return blockExcluded
     451           1 :                 }
     452           1 :                 return blockIntersects
     453             :         }
     454             : 
     455             :         // Reverse iteration.
     456             :         //
     457             :         // Because we're iterating in the reverse direction, we don't yet have
     458             :         // enough context available to determine if the block is wholly contained
     459             :         // within its bounds. This case arises only during backward iteration,
     460             :         // because of the way the index is structured.
     461             :         //
     462             :         // Consider a bound-limited bpf limited to the bounds [b,d), loading the
     463             :         // block with separator `c`. During reverse iteration, the guarantee that
     464             :         // all the block's keys are < `d` is externally provided, but no guarantee
     465             :         // is made on the bpf's lower bound. The separator `c` only provides an
     466             :         // inclusive upper bound on the block's keys, indicating that the
     467             :         // corresponding block handle points to a block containing only keys ≤ `c`.
     468             :         //
     469             :         // To establish a lower bound, we step the index backwards to read the
     470             :         // previous block's separator, which provides an inclusive lower bound on
     471             :         // the original block's keys. Afterwards, we step forward to restore our
     472             :         // index position.
     473           1 :         if peekKey, _ := i.index.Prev(); peekKey == nil {
     474           1 :                 // The original block points to the first block of this index block. If
     475           1 :                 // there's a two-level index, it could potentially provide a lower
     476           1 :                 // bound, but the code refactoring necessary to read it doesn't seem
     477           1 :                 // worth the payoff. We fall through to loading the block.
     478           1 :         } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
     479           1 :                 // The lower-bound on the original block falls within the filter's
     480           1 :                 // bounds, and we can skip the block (after restoring our current index
     481           1 :                 // position).
     482           1 :                 _, _ = i.index.Next()
     483           1 :                 return blockExcluded
     484           1 :         }
     485           1 :         _, _ = i.index.Next()
     486           1 :         return blockIntersects
     487             : }
     488             : 
     489           1 : func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
     490           1 :         if i.data.getFirstUserKey() == nil {
     491           0 :                 panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
     492             :         }
     493           1 :         i.blockLower = i.lower
     494           1 :         if i.blockLower != nil {
     495           1 :                 firstUserKey := i.data.getFirstUserKey()
     496           1 :                 if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
     497           1 :                         // The lower-bound is less than the first key in the block. No need
     498           1 :                         // to check the lower-bound again for this block.
     499           1 :                         i.blockLower = nil
     500           1 :                 }
     501             :         }
     502           1 :         i.blockUpper = i.upper
     503           1 :         if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
     504           1 :                 // The upper-bound is greater than the index key which itself is greater
     505           1 :                 // than or equal to every key in the block. No need to check the
     506           1 :                 // upper-bound again for this block.
     507           1 :                 i.blockUpper = nil
     508           1 :         }
     509             : }
     510             : 
     511             : // The number of times to call Next/Prev in a block before giving up and seeking.
     512             : // The value of 4 is arbitrary.
     513             : // TODO(sumeer): experiment with dynamic adjustment based on the history of
     514             : // seeks for a particular iterator.
     515             : const numStepsBeforeSeek = 4
     516             : 
     517             : func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
     518             :         key []byte,
     519           1 : ) (k *InternalKey, v base.LazyValue, done bool) {
     520           1 :         k, v = i.data.Key(), i.data.value()
     521           1 :         for j := 0; j < numStepsBeforeSeek; j++ {
     522           1 :                 curKeyCmp := i.cmp(k.UserKey, key)
     523           1 :                 if curKeyCmp >= 0 {
     524           1 :                         if i.blockUpper != nil {
     525           1 :                                 cmp := i.cmp(k.UserKey, i.blockUpper)
     526           1 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     527           1 :                                         i.exhaustedBounds = +1
     528           1 :                                         return nil, base.LazyValue{}, true
     529           1 :                                 }
     530             :                         }
     531           1 :                         return k, v, true
     532             :                 }
     533           1 :                 k, v = i.data.Next()
     534           1 :                 if k == nil {
     535           0 :                         break
     536             :                 }
     537             :         }
     538           1 :         return k, v, false
     539             : }
     540             : 
     541             : func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
     542             :         key []byte,
     543           1 : ) (k *InternalKey, v base.LazyValue, done bool) {
     544           1 :         k, v = i.data.Key(), i.data.value()
     545           1 :         for j := 0; j < numStepsBeforeSeek; j++ {
     546           1 :                 curKeyCmp := i.cmp(k.UserKey, key)
     547           1 :                 if curKeyCmp < 0 {
     548           1 :                         if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
     549           1 :                                 i.exhaustedBounds = -1
     550           1 :                                 return nil, base.LazyValue{}, true
     551           1 :                         }
     552           1 :                         return k, v, true
     553             :                 }
     554           1 :                 k, v = i.data.Prev()
     555           1 :                 if k == nil {
     556           1 :                         break
     557             :                 }
     558             :         }
     559           1 :         return k, v, false
     560             : }
     561             : 
     562           1 : func (i *singleLevelIterator) recordOffset() uint64 {
     563           1 :         offset := i.dataBH.Offset
     564           1 :         if i.data.valid() {
     565           1 :                 // - i.dataBH.Length/len(i.data.data) is the compression ratio. If
     566           1 :                 //   uncompressed, this is 1.
     567           1 :                 // - i.data.nextOffset is the uncompressed position of the current record
     568           1 :                 //   in the block.
     569           1 :                 // - i.dataBH.Offset is the offset of the block in the sstable before
     570           1 :                 //   decompression.
     571           1 :                 offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
     572           1 :         } else {
     573           1 :                 // Last entry in the block must increment bytes iterated by the size of the block trailer
     574           1 :                 // and restart points.
     575           1 :                 offset += i.dataBH.Length + blockTrailerLen
     576           1 :         }
     577           1 :         return offset
     578             : }
     579             : 
     580             : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
     581             : // package. Note that SeekGE only checks the upper bound. It is up to the
     582             : // caller to ensure that key is greater than or equal to the lower bound.
     583             : func (i *singleLevelIterator) SeekGE(
     584             :         key []byte, flags base.SeekGEFlags,
     585           1 : ) (*InternalKey, base.LazyValue) {
     586           1 :         if i.vState != nil {
     587           1 :                 // Callers of SeekGE don't know about virtual sstable bounds, so we may
     588           1 :                 // have to internally restrict the bounds.
     589           1 :                 //
     590           1 :                 // TODO(bananabrick): We can optimize this check away for the level iter
     591           1 :                 // if necessary.
     592           1 :                 if i.cmp(key, i.lower) < 0 {
     593           1 :                         key = i.lower
     594           1 :                 }
     595             :         }
     596             : 
     597           1 :         if flags.TrySeekUsingNext() {
     598           1 :                 // The i.exhaustedBounds comparison indicates that the upper bound was
     599           1 :                 // reached. The i.data.isDataInvalidated() indicates that the sstable was
     600           1 :                 // exhausted.
     601           1 :                 if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
     602           1 :                         // Already exhausted, so return nil.
     603           1 :                         return nil, base.LazyValue{}
     604           1 :                 }
     605           1 :                 if i.err != nil {
     606           0 :                         // The current iterator position cannot be used.
     607           0 :                         flags = flags.DisableTrySeekUsingNext()
     608           0 :                 }
     609             :                 // INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
     610             :                 // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
     611             :                 // data-exhausted and bounds-exhausted, as defined earlier, are both
     612             :                 // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
     613             :                 // before calling into seekGEHelper.
     614             :         }
     615             : 
     616           1 :         i.exhaustedBounds = 0
     617           1 :         i.err = nil // clear cached iteration error
     618           1 :         boundsCmp := i.boundsCmp
     619           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     620           1 :         i.boundsCmp = 0
     621           1 :         i.positionedUsingLatestBounds = true
     622           1 :         return i.seekGEHelper(key, boundsCmp, flags)
     623             : }
     624             : 
     625             : // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
     626             : func (i *singleLevelIterator) seekGEHelper(
     627             :         key []byte, boundsCmp int, flags base.SeekGEFlags,
     628           1 : ) (*InternalKey, base.LazyValue) {
     629           1 :         // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
     630           1 : 
     631           1 :         // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
     632           1 :         // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
     633           1 :         // Care must be taken to ensure that when performing these optimizations and
     634           1 :         // the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
     635           1 :         // Consider a previous SeekGE that filtered keys from k until the current
     636           1 :         // iterator position.
     637           1 :         //
     638           1 :         // If the previous SeekGE exhausted the iterator, it's possible keys greater
     639           1 :         // than or equal to the current search key were filtered. We must not reuse
     640           1 :         // the current iterator position without remembering the previous value of
     641           1 :         // maybeFilteredKeys.
     642           1 : 
     643           1 :         var dontSeekWithinBlock bool
     644           1 :         if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
     645           1 :                 boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
     646           1 :                 // Fast-path: The bounds have moved forward and this SeekGE is
     647           1 :                 // respecting the lower bound (guaranteed by Iterator). We know that
     648           1 :                 // the iterator must already be positioned within or just outside the
     649           1 :                 // previous bounds. Therefore it cannot be positioned at a block (or
     650           1 :                 // the position within that block) that is ahead of the seek position.
     651           1 :                 // However it can be positioned at an earlier block. This fast-path to
     652           1 :                 // use Next() on the block is only applied when we are already at the
     653           1 :                 // block that the slow-path (the else-clause) would load -- this is
     654           1 :                 // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
     655           1 :                 // predicate.
     656           1 :                 i.initBoundsForAlreadyLoadedBlock()
     657           1 :                 ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
     658           1 :                 if done {
     659           1 :                         return ikey, val
     660           1 :                 }
     661           1 :                 if ikey == nil {
     662           0 :                         // Done with this block.
     663           0 :                         dontSeekWithinBlock = true
     664           0 :                 }
     665           1 :         } else {
     666           1 :                 // Cannot use bounds monotonicity. But may be able to optimize if
     667           1 :                 // caller claimed externally known invariant represented by
     668           1 :                 // flags.TrySeekUsingNext().
     669           1 :                 if flags.TrySeekUsingNext() {
     670           1 :                         // seekPrefixGE or SeekGE has already ensured
     671           1 :                         // !i.data.isDataInvalidated() && i.exhaustedBounds != +1
     672           1 :                         currKey := i.data.Key()
     673           1 :                         value := i.data.value()
     674           1 :                         less := i.cmp(currKey.UserKey, key) < 0
     675           1 :                         // We could be more sophisticated and confirm that the seek
     676           1 :                         // position is within the current block before applying this
     677           1 :                         // optimization. But there may be some benefit even if it is in
     678           1 :                         // the next block, since we can avoid seeking i.index.
     679           1 :                         for j := 0; less && j < numStepsBeforeSeek; j++ {
     680           1 :                                 currKey, value = i.Next()
     681           1 :                                 if currKey == nil {
     682           1 :                                         return nil, base.LazyValue{}
     683           1 :                                 }
     684           1 :                                 less = i.cmp(currKey.UserKey, key) < 0
     685             :                         }
     686           1 :                         if !less {
     687           1 :                                 if i.blockUpper != nil {
     688           1 :                                         cmp := i.cmp(currKey.UserKey, i.blockUpper)
     689           1 :                                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     690           0 :                                                 i.exhaustedBounds = +1
     691           0 :                                                 return nil, base.LazyValue{}
     692           0 :                                         }
     693             :                                 }
     694           1 :                                 return currKey, value
     695             :                         }
     696             :                 }
     697             : 
     698             :                 // Slow-path.
     699             :                 // Since we're re-seeking the iterator, the previous value of
     700             :                 // maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
     701             :                 // during seeking, loadBlock will set it to true.
     702           1 :                 i.maybeFilteredKeysSingleLevel = false
     703           1 : 
     704           1 :                 var ikey *InternalKey
     705           1 :                 if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
     706           1 :                         // The target key is greater than any key in the index block.
     707           1 :                         // Invalidate the block iterator so that a subsequent call to Prev()
     708           1 :                         // will return the last key in the table.
     709           1 :                         i.data.invalidate()
     710           1 :                         return nil, base.LazyValue{}
     711           1 :                 }
     712           1 :                 result := i.loadBlock(+1)
     713           1 :                 if result == loadBlockFailed {
     714           1 :                         return nil, base.LazyValue{}
     715           1 :                 }
     716           1 :                 if result == loadBlockIrrelevant {
     717           1 :                         // Enforce the upper bound here since don't want to bother moving
     718           1 :                         // to the next block if upper bound is already exceeded. Note that
     719           1 :                         // the next block starts with keys >= ikey.UserKey since even
     720           1 :                         // though this is the block separator, the same user key can span
     721           1 :                         // multiple blocks. If upper is exclusive we use >= below, else
     722           1 :                         // we use >.
     723           1 :                         if i.upper != nil {
     724           1 :                                 cmp := i.cmp(ikey.UserKey, i.upper)
     725           1 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     726           1 :                                         i.exhaustedBounds = +1
     727           1 :                                         return nil, base.LazyValue{}
     728           1 :                                 }
     729             :                         }
     730             :                         // Want to skip to the next block.
     731           1 :                         dontSeekWithinBlock = true
     732             :                 }
     733             :         }
     734           1 :         if !dontSeekWithinBlock {
     735           1 :                 if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
     736           1 :                         if i.blockUpper != nil {
     737           1 :                                 cmp := i.cmp(ikey.UserKey, i.blockUpper)
     738           1 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
     739           1 :                                         i.exhaustedBounds = +1
     740           1 :                                         return nil, base.LazyValue{}
     741           1 :                                 }
     742             :                         }
     743           1 :                         return ikey, val
     744             :                 }
     745             :         }
     746           1 :         return i.skipForward()
     747             : }
     748             : 
     749             : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
     750             : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
     751             : // to the caller to ensure that key is greater than or equal to the lower bound.
     752             : func (i *singleLevelIterator) SeekPrefixGE(
     753             :         prefix, key []byte, flags base.SeekGEFlags,
     754           1 : ) (*base.InternalKey, base.LazyValue) {
     755           1 :         if i.vState != nil {
     756           1 :                 // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
     757           1 :                 // we may have to internally restrict the bounds.
     758           1 :                 //
     759           1 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     760           1 :                 // if necessary.
     761           1 :                 if i.cmp(key, i.lower) < 0 {
     762           1 :                         key = i.lower
     763           1 :                 }
     764             :         }
     765           1 :         return i.seekPrefixGE(prefix, key, flags, i.useFilter)
     766             : }
     767             : 
     768             : func (i *singleLevelIterator) seekPrefixGE(
     769             :         prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
     770           1 : ) (k *InternalKey, value base.LazyValue) {
     771           1 :         // NOTE: prefix is only used for bloom filter checking and not later work in
     772           1 :         // this method. Hence, we can use the existing iterator position if the last
     773           1 :         // SeekPrefixGE did not fail bloom filter matching.
     774           1 : 
     775           1 :         err := i.err
     776           1 :         i.err = nil // clear cached iteration error
     777           1 :         if checkFilter && i.reader.tableFilter != nil {
     778           1 :                 if !i.lastBloomFilterMatched {
     779           1 :                         // Iterator is not positioned based on last seek.
     780           1 :                         flags = flags.DisableTrySeekUsingNext()
     781           1 :                 }
     782           1 :                 i.lastBloomFilterMatched = false
     783           1 :                 // Check prefix bloom filter.
     784           1 :                 var dataH bufferHandle
     785           1 :                 dataH, i.err = i.reader.readFilter(i.ctx, i.stats)
     786           1 :                 if i.err != nil {
     787           0 :                         i.data.invalidate()
     788           0 :                         return nil, base.LazyValue{}
     789           0 :                 }
     790           1 :                 mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
     791           1 :                 dataH.Release()
     792           1 :                 if !mayContain {
     793           1 :                         // This invalidation may not be necessary for correctness, and may
     794           1 :                         // be a place to optimize later by reusing the already loaded
     795           1 :                         // block. It was necessary in earlier versions of the code since
     796           1 :                         // the caller was allowed to call Next when SeekPrefixGE returned
     797           1 :                         // nil. This is no longer allowed.
     798           1 :                         i.data.invalidate()
     799           1 :                         return nil, base.LazyValue{}
     800           1 :                 }
     801           1 :                 i.lastBloomFilterMatched = true
     802             :         }
     803           1 :         if flags.TrySeekUsingNext() {
     804           1 :                 // The i.exhaustedBounds comparison indicates that the upper bound was
     805           1 :                 // reached. The i.data.isDataInvalidated() indicates that the sstable was
     806           1 :                 // exhausted.
     807           1 :                 if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
     808           1 :                         // Already exhausted, so return nil.
     809           1 :                         return nil, base.LazyValue{}
     810           1 :                 }
     811           1 :                 if err != nil {
     812           0 :                         // The current iterator position cannot be used.
     813           0 :                         flags = flags.DisableTrySeekUsingNext()
     814           0 :                 }
     815             :                 // INVARIANT: flags.TrySeekUsingNext() => err == nil &&
     816             :                 // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
     817             :                 // data-exhausted and bounds-exhausted, as defined earlier, are both
     818             :                 // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
     819             :                 // before calling into seekGEHelper.
     820             :         }
     821             :         // Bloom filter matches, or skipped, so this method will position the
     822             :         // iterator.
     823           1 :         i.exhaustedBounds = 0
     824           1 :         boundsCmp := i.boundsCmp
     825           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     826           1 :         i.boundsCmp = 0
     827           1 :         i.positionedUsingLatestBounds = true
     828           1 :         k, value = i.seekGEHelper(key, boundsCmp, flags)
     829           1 :         return i.maybeVerifyKey(k, value)
     830             : }
     831             : 
     832             : // virtualLast should only be called if i.vReader != nil.
     833           1 : func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
     834           1 :         if i.vState == nil {
     835           0 :                 panic("pebble: invalid call to virtualLast")
     836             :         }
     837             : 
     838             :         // Seek to the first internal key.
     839           1 :         ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
     840           1 :         if i.endKeyInclusive {
     841           1 :                 // Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
     842           1 :                 // c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
     843           1 :                 // c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
     844           1 :                 // the last key with userkey == i.upper.
     845           1 :                 //
     846           1 :                 // TODO(bananabrick): Think about how to improve this. If many internal keys
     847           1 :                 // with the same user key at the upper bound then this could be slow, but
     848           1 :                 // maybe the odds of having many internal keys with the same user key at the
     849           1 :                 // upper bound are low.
     850           1 :                 for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
     851           1 :                         ikey, _ = i.Next()
     852           1 :                 }
     853           1 :                 return i.Prev()
     854             :         }
     855             : 
     856             :         // We seeked to the first key >= i.upper.
     857           1 :         return i.Prev()
     858             : }
     859             : 
     860             : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
     861             : // package. Note that SeekLT only checks the lower bound. It is up to the
     862             : // caller to ensure that key is less than or equal to the upper bound.
     863             : func (i *singleLevelIterator) SeekLT(
     864             :         key []byte, flags base.SeekLTFlags,
     865           1 : ) (*InternalKey, base.LazyValue) {
     866           1 :         if i.vState != nil {
     867           1 :                 // Might have to fix upper bound since virtual sstable bounds are not
     868           1 :                 // known to callers of SeekLT.
     869           1 :                 //
     870           1 :                 // TODO(bananabrick): We can optimize away this check for the level iter
     871           1 :                 // if necessary.
     872           1 :                 cmp := i.cmp(key, i.upper)
     873           1 :                 // key == i.upper is fine. We'll do the right thing and return the
     874           1 :                 // first internal key with user key < key.
     875           1 :                 if cmp > 0 {
     876           1 :                         // Return the last key in the virtual sstable.
     877           1 :                         return i.virtualLast()
     878           1 :                 }
     879             :         }
     880             : 
     881           1 :         i.exhaustedBounds = 0
     882           1 :         i.err = nil // clear cached iteration error
     883           1 :         boundsCmp := i.boundsCmp
     884           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
     885           1 :         i.boundsCmp = 0
     886           1 : 
     887           1 :         // Seeking operations perform various step-instead-of-seeking optimizations:
     888           1 :         // eg by considering monotonically increasing bounds (i.boundsCmp). Care
     889           1 :         // must be taken to ensure that when performing these optimizations and the
     890           1 :         // iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
     891           1 :         // appropriately.  Consider a previous SeekLT that filtered keys from k
     892           1 :         // until the current iterator position.
     893           1 :         //
     894           1 :         // If the previous SeekLT did exhausted the iterator, it's possible keys
     895           1 :         // less than the current search key were filtered. We must not reuse the
     896           1 :         // current iterator position without remembering the previous value of
     897           1 :         // maybeFilteredKeysSingleLevel.
     898           1 : 
     899           1 :         i.positionedUsingLatestBounds = true
     900           1 : 
     901           1 :         var dontSeekWithinBlock bool
     902           1 :         if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
     903           1 :                 boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
     904           1 :                 // Fast-path: The bounds have moved backward, and this SeekLT is
     905           1 :                 // respecting the upper bound (guaranteed by Iterator). We know that
     906           1 :                 // the iterator must already be positioned within or just outside the
     907           1 :                 // previous bounds. Therefore it cannot be positioned at a block (or
     908           1 :                 // the position within that block) that is behind the seek position.
     909           1 :                 // However it can be positioned at a later block. This fast-path to
     910           1 :                 // use Prev() on the block is only applied when we are already at the
     911           1 :                 // block that can satisfy this seek -- this is the motivation for the
     912           1 :                 // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
     913           1 :                 i.initBoundsForAlreadyLoadedBlock()
     914           1 :                 ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
     915           1 :                 if done {
     916           1 :                         return ikey, val
     917           1 :                 }
     918           1 :                 if ikey == nil {
     919           1 :                         // Done with this block.
     920           1 :                         dontSeekWithinBlock = true
     921           1 :                 }
     922           1 :         } else {
     923           1 :                 // Slow-path.
     924           1 :                 i.maybeFilteredKeysSingleLevel = false
     925           1 :                 var ikey *InternalKey
     926           1 : 
     927           1 :                 // NB: If a bound-limited block property filter is configured, it's
     928           1 :                 // externally ensured that the filter is disabled (through returning
     929           1 :                 // Intersects=false irrespective of the block props provided) during
     930           1 :                 // seeks.
     931           1 :                 if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
     932           1 :                         ikey, _ = i.index.Last()
     933           1 :                         if ikey == nil {
     934           0 :                                 return nil, base.LazyValue{}
     935           0 :                         }
     936             :                 }
     937             :                 // INVARIANT: ikey != nil.
     938           1 :                 result := i.loadBlock(-1)
     939           1 :                 if result == loadBlockFailed {
     940           0 :                         return nil, base.LazyValue{}
     941           0 :                 }
     942           1 :                 if result == loadBlockIrrelevant {
     943           1 :                         // Enforce the lower bound here since don't want to bother moving
     944           1 :                         // to the previous block if lower bound is already exceeded. Note
     945           1 :                         // that the previous block starts with keys <= ikey.UserKey since
     946           1 :                         // even though this is the current block's separator, the same
     947           1 :                         // user key can span multiple blocks.
     948           1 :                         if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
     949           0 :                                 i.exhaustedBounds = -1
     950           0 :                                 return nil, base.LazyValue{}
     951           0 :                         }
     952             :                         // Want to skip to the previous block.
     953           1 :                         dontSeekWithinBlock = true
     954             :                 }
     955             :         }
     956           1 :         if !dontSeekWithinBlock {
     957           1 :                 if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
     958           1 :                         if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
     959           1 :                                 i.exhaustedBounds = -1
     960           1 :                                 return nil, base.LazyValue{}
     961           1 :                         }
     962           1 :                         return ikey, val
     963             :                 }
     964             :         }
     965             :         // The index contains separator keys which may lie between
     966             :         // user-keys. Consider the user-keys:
     967             :         //
     968             :         //   complete
     969             :         // ---- new block ---
     970             :         //   complexion
     971             :         //
     972             :         // If these two keys end one block and start the next, the index key may
     973             :         // be chosen as "compleu". The SeekGE in the index block will then point
     974             :         // us to the block containing "complexion". If this happens, we want the
     975             :         // last key from the previous data block.
     976           1 :         return i.maybeVerifyKey(i.skipBackward())
     977             : }
     978             : 
     979             : // First implements internalIterator.First, as documented in the pebble
     980             : // package. Note that First only checks the upper bound. It is up to the caller
     981             : // to ensure that key is greater than or equal to the lower bound (e.g. via a
     982             : // call to SeekGE(lower)).
     983           1 : func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
     984           1 :         // If the iterator was created on a virtual sstable, we will SeekGE to the
     985           1 :         // lower bound instead of using First, because First does not respect
     986           1 :         // bounds.
     987           1 :         if i.vState != nil {
     988           1 :                 return i.SeekGE(i.lower, base.SeekGEFlagsNone)
     989           1 :         }
     990             : 
     991           1 :         if i.lower != nil {
     992           0 :                 panic("singleLevelIterator.First() used despite lower bound")
     993             :         }
     994           1 :         i.positionedUsingLatestBounds = true
     995           1 :         i.maybeFilteredKeysSingleLevel = false
     996           1 : 
     997           1 :         return i.firstInternal()
     998             : }
     999             : 
    1000             : // firstInternal is a helper used for absolute positioning in a single-level
    1001             : // index file, or for positioning in the second-level index in a two-level
    1002             : // index file. For the latter, one cannot make any claims about absolute
    1003             : // positioning.
    1004           1 : func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
    1005           1 :         i.exhaustedBounds = 0
    1006           1 :         i.err = nil // clear cached iteration error
    1007           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1008           1 :         i.boundsCmp = 0
    1009           1 : 
    1010           1 :         var ikey *InternalKey
    1011           1 :         if ikey, _ = i.index.First(); ikey == nil {
    1012           0 :                 i.data.invalidate()
    1013           0 :                 return nil, base.LazyValue{}
    1014           0 :         }
    1015           1 :         result := i.loadBlock(+1)
    1016           1 :         if result == loadBlockFailed {
    1017           1 :                 return nil, base.LazyValue{}
    1018           1 :         }
    1019           1 :         if result == loadBlockOK {
    1020           1 :                 if ikey, val := i.data.First(); ikey != nil {
    1021           1 :                         if i.blockUpper != nil {
    1022           1 :                                 cmp := i.cmp(ikey.UserKey, i.blockUpper)
    1023           1 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1024           1 :                                         i.exhaustedBounds = +1
    1025           1 :                                         return nil, base.LazyValue{}
    1026           1 :                                 }
    1027             :                         }
    1028           1 :                         return ikey, val
    1029             :                 }
    1030             :                 // Else fall through to skipForward.
    1031           1 :         } else {
    1032           1 :                 // result == loadBlockIrrelevant. Enforce the upper bound here since
    1033           1 :                 // don't want to bother moving to the next block if upper bound is
    1034           1 :                 // already exceeded. Note that the next block starts with keys >=
    1035           1 :                 // ikey.UserKey since even though this is the block separator, the
    1036           1 :                 // same user key can span multiple blocks. If upper is exclusive we
    1037           1 :                 // use >= below, else we use >.
    1038           1 :                 if i.upper != nil {
    1039           1 :                         cmp := i.cmp(ikey.UserKey, i.upper)
    1040           1 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1041           1 :                                 i.exhaustedBounds = +1
    1042           1 :                                 return nil, base.LazyValue{}
    1043           1 :                         }
    1044             :                 }
    1045             :                 // Else fall through to skipForward.
    1046             :         }
    1047             : 
    1048           1 :         return i.skipForward()
    1049             : }
    1050             : 
    1051             : // Last implements internalIterator.Last, as documented in the pebble
    1052             : // package. Note that Last only checks the lower bound. It is up to the caller
    1053             : // to ensure that key is less than the upper bound (e.g. via a call to
    1054             : // SeekLT(upper))
    1055           1 : func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
    1056           1 :         if i.vState != nil {
    1057           1 :                 return i.virtualLast()
    1058           1 :         }
    1059             : 
    1060           1 :         if i.upper != nil {
    1061           0 :                 panic("singleLevelIterator.Last() used despite upper bound")
    1062             :         }
    1063           1 :         i.positionedUsingLatestBounds = true
    1064           1 :         i.maybeFilteredKeysSingleLevel = false
    1065           1 :         return i.lastInternal()
    1066             : }
    1067             : 
    1068             : // lastInternal is a helper used for absolute positioning in a single-level
    1069             : // index file, or for positioning in the second-level index in a two-level
    1070             : // index file. For the latter, one cannot make any claims about absolute
    1071             : // positioning.
    1072           1 : func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
    1073           1 :         i.exhaustedBounds = 0
    1074           1 :         i.err = nil // clear cached iteration error
    1075           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1076           1 :         i.boundsCmp = 0
    1077           1 : 
    1078           1 :         var ikey *InternalKey
    1079           1 :         if ikey, _ = i.index.Last(); ikey == nil {
    1080           0 :                 i.data.invalidate()
    1081           0 :                 return nil, base.LazyValue{}
    1082           0 :         }
    1083           1 :         result := i.loadBlock(-1)
    1084           1 :         if result == loadBlockFailed {
    1085           1 :                 return nil, base.LazyValue{}
    1086           1 :         }
    1087           1 :         if result == loadBlockOK {
    1088           1 :                 if ikey, val := i.data.Last(); ikey != nil {
    1089           1 :                         if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
    1090           1 :                                 i.exhaustedBounds = -1
    1091           1 :                                 return nil, base.LazyValue{}
    1092           1 :                         }
    1093           1 :                         return ikey, val
    1094             :                 }
    1095             :                 // Else fall through to skipBackward.
    1096           1 :         } else {
    1097           1 :                 // result == loadBlockIrrelevant. Enforce the lower bound here since
    1098           1 :                 // don't want to bother moving to the previous block if lower bound is
    1099           1 :                 // already exceeded. Note that the previous block starts with keys <=
    1100           1 :                 // key.UserKey since even though this is the current block's
    1101           1 :                 // separator, the same user key can span multiple blocks.
    1102           1 :                 if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
    1103           0 :                         i.exhaustedBounds = -1
    1104           0 :                         return nil, base.LazyValue{}
    1105           0 :                 }
    1106             :         }
    1107             : 
    1108           1 :         return i.skipBackward()
    1109             : }
    1110             : 
    1111             : // Next implements internalIterator.Next, as documented in the pebble
    1112             : // package.
    1113             : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
    1114             : // due to performance. Keep the two in sync.
    1115           1 : func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
    1116           1 :         if i.exhaustedBounds == +1 {
    1117           0 :                 panic("Next called even though exhausted upper bound")
    1118             :         }
    1119           1 :         i.exhaustedBounds = 0
    1120           1 :         i.maybeFilteredKeysSingleLevel = false
    1121           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1122           1 :         i.boundsCmp = 0
    1123           1 : 
    1124           1 :         if i.err != nil {
    1125           0 :                 return nil, base.LazyValue{}
    1126           0 :         }
    1127           1 :         if key, val := i.data.Next(); key != nil {
    1128           1 :                 if i.blockUpper != nil {
    1129           1 :                         cmp := i.cmp(key.UserKey, i.blockUpper)
    1130           1 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1131           1 :                                 i.exhaustedBounds = +1
    1132           1 :                                 return nil, base.LazyValue{}
    1133           1 :                         }
    1134             :                 }
    1135           1 :                 return key, val
    1136             :         }
    1137           1 :         return i.skipForward()
    1138             : }
    1139             : 
    1140             : // NextPrefix implements (base.InternalIterator).NextPrefix.
    1141           1 : func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
    1142           1 :         if i.exhaustedBounds == +1 {
    1143           0 :                 panic("NextPrefix called even though exhausted upper bound")
    1144             :         }
    1145           1 :         i.exhaustedBounds = 0
    1146           1 :         i.maybeFilteredKeysSingleLevel = false
    1147           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1148           1 :         i.boundsCmp = 0
    1149           1 :         if i.err != nil {
    1150           0 :                 return nil, base.LazyValue{}
    1151           0 :         }
    1152           1 :         if key, val := i.data.NextPrefix(succKey); key != nil {
    1153           1 :                 if i.blockUpper != nil {
    1154           0 :                         cmp := i.cmp(key.UserKey, i.blockUpper)
    1155           0 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1156           0 :                                 i.exhaustedBounds = +1
    1157           0 :                                 return nil, base.LazyValue{}
    1158           0 :                         }
    1159             :                 }
    1160           1 :                 return key, val
    1161             :         }
    1162             :         // Did not find prefix in the existing data block. This is the slow-path
    1163             :         // where we effectively seek the iterator.
    1164           1 :         var ikey *InternalKey
    1165           1 :         // The key is likely to be in the next data block, so try one step.
    1166           1 :         if ikey, _ = i.index.Next(); ikey == nil {
    1167           1 :                 // The target key is greater than any key in the index block.
    1168           1 :                 // Invalidate the block iterator so that a subsequent call to Prev()
    1169           1 :                 // will return the last key in the table.
    1170           1 :                 i.data.invalidate()
    1171           1 :                 return nil, base.LazyValue{}
    1172           1 :         }
    1173           1 :         if i.cmp(succKey, ikey.UserKey) > 0 {
    1174           1 :                 // Not in the next data block, so seek the index.
    1175           1 :                 if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
    1176           1 :                         // The target key is greater than any key in the index block.
    1177           1 :                         // Invalidate the block iterator so that a subsequent call to Prev()
    1178           1 :                         // will return the last key in the table.
    1179           1 :                         i.data.invalidate()
    1180           1 :                         return nil, base.LazyValue{}
    1181           1 :                 }
    1182             :         }
    1183           1 :         result := i.loadBlock(+1)
    1184           1 :         if result == loadBlockFailed {
    1185           0 :                 return nil, base.LazyValue{}
    1186           0 :         }
    1187           1 :         if result == loadBlockIrrelevant {
    1188           0 :                 // Enforce the upper bound here since don't want to bother moving
    1189           0 :                 // to the next block if upper bound is already exceeded. Note that
    1190           0 :                 // the next block starts with keys >= ikey.UserKey since even
    1191           0 :                 // though this is the block separator, the same user key can span
    1192           0 :                 // multiple blocks. If upper is exclusive we use >= below, else we use
    1193           0 :                 // >.
    1194           0 :                 if i.upper != nil {
    1195           0 :                         cmp := i.cmp(ikey.UserKey, i.upper)
    1196           0 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1197           0 :                                 i.exhaustedBounds = +1
    1198           0 :                                 return nil, base.LazyValue{}
    1199           0 :                         }
    1200             :                 }
    1201           1 :         } else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
    1202           1 :                 if i.blockUpper != nil {
    1203           0 :                         cmp := i.cmp(key.UserKey, i.blockUpper)
    1204           0 :                         if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1205           0 :                                 i.exhaustedBounds = +1
    1206           0 :                                 return nil, base.LazyValue{}
    1207           0 :                         }
    1208             :                 }
    1209           1 :                 return i.maybeVerifyKey(key, val)
    1210             :         }
    1211             : 
    1212           1 :         return i.skipForward()
    1213             : }
    1214             : 
    1215             : // Prev implements internalIterator.Prev, as documented in the pebble
    1216             : // package.
    1217           1 : func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
    1218           1 :         if i.exhaustedBounds == -1 {
    1219           0 :                 panic("Prev called even though exhausted lower bound")
    1220             :         }
    1221           1 :         i.exhaustedBounds = 0
    1222           1 :         i.maybeFilteredKeysSingleLevel = false
    1223           1 :         // Seek optimization only applies until iterator is first positioned after SetBounds.
    1224           1 :         i.boundsCmp = 0
    1225           1 : 
    1226           1 :         if i.err != nil {
    1227           0 :                 return nil, base.LazyValue{}
    1228           0 :         }
    1229           1 :         if key, val := i.data.Prev(); key != nil {
    1230           1 :                 if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
    1231           1 :                         i.exhaustedBounds = -1
    1232           1 :                         return nil, base.LazyValue{}
    1233           1 :                 }
    1234           1 :                 return key, val
    1235             :         }
    1236           1 :         return i.skipBackward()
    1237             : }
    1238             : 
    1239           1 : func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
    1240           1 :         for {
    1241           1 :                 var key *InternalKey
    1242           1 :                 if key, _ = i.index.Next(); key == nil {
    1243           1 :                         i.data.invalidate()
    1244           1 :                         break
    1245             :                 }
    1246           1 :                 result := i.loadBlock(+1)
    1247           1 :                 if result != loadBlockOK {
    1248           1 :                         if i.err != nil {
    1249           1 :                                 break
    1250             :                         }
    1251           1 :                         if result == loadBlockFailed {
    1252           0 :                                 // We checked that i.index was at a valid entry, so
    1253           0 :                                 // loadBlockFailed could not have happened due to to i.index
    1254           0 :                                 // being exhausted, and must be due to an error.
    1255           0 :                                 panic("loadBlock should not have failed with no error")
    1256             :                         }
    1257             :                         // result == loadBlockIrrelevant. Enforce the upper bound here
    1258             :                         // since don't want to bother moving to the next block if upper
    1259             :                         // bound is already exceeded. Note that the next block starts with
    1260             :                         // keys >= key.UserKey since even though this is the block
    1261             :                         // separator, the same user key can span multiple blocks. If upper
    1262             :                         // is exclusive we use >= below, else we use >.
    1263           1 :                         if i.upper != nil {
    1264           1 :                                 cmp := i.cmp(key.UserKey, i.upper)
    1265           1 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1266           1 :                                         i.exhaustedBounds = +1
    1267           1 :                                         return nil, base.LazyValue{}
    1268           1 :                                 }
    1269             :                         }
    1270           1 :                         continue
    1271             :                 }
    1272           1 :                 if key, val := i.data.First(); key != nil {
    1273           1 :                         if i.blockUpper != nil {
    1274           1 :                                 cmp := i.cmp(key.UserKey, i.blockUpper)
    1275           1 :                                 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
    1276           1 :                                         i.exhaustedBounds = +1
    1277           1 :                                         return nil, base.LazyValue{}
    1278           1 :                                 }
    1279             :                         }
    1280           1 :                         return i.maybeVerifyKey(key, val)
    1281             :                 }
    1282             :         }
    1283           1 :         return nil, base.LazyValue{}
    1284             : }
    1285             : 
    1286           1 : func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
    1287           1 :         for {
    1288           1 :                 var key *InternalKey
    1289           1 :                 if key, _ = i.index.Prev(); key == nil {
    1290           1 :                         i.data.invalidate()
    1291           1 :                         break
    1292             :                 }
    1293           1 :                 result := i.loadBlock(-1)
    1294           1 :                 if result != loadBlockOK {
    1295           1 :                         if i.err != nil {
    1296           1 :                                 break
    1297             :                         }
    1298           1 :                         if result == loadBlockFailed {
    1299           0 :                                 // We checked that i.index was at a valid entry, so
    1300           0 :                                 // loadBlockFailed could not have happened due to to i.index
    1301           0 :                                 // being exhausted, and must be due to an error.
    1302           0 :                                 panic("loadBlock should not have failed with no error")
    1303             :                         }
    1304             :                         // result == loadBlockIrrelevant. Enforce the lower bound here
    1305             :                         // since don't want to bother moving to the previous block if lower
    1306             :                         // bound is already exceeded. Note that the previous block starts with
    1307             :                         // keys <= key.UserKey since even though this is the current block's
    1308             :                         // separator, the same user key can span multiple blocks.
    1309           1 :                         if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
    1310           0 :                                 i.exhaustedBounds = -1
    1311           0 :                                 return nil, base.LazyValue{}
    1312           0 :                         }
    1313           1 :                         continue
    1314             :                 }
    1315           1 :                 key, val := i.data.Last()
    1316           1 :                 if key == nil {
    1317           1 :                         return nil, base.LazyValue{}
    1318           1 :                 }
    1319           1 :                 if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
    1320           1 :                         i.exhaustedBounds = -1
    1321           1 :                         return nil, base.LazyValue{}
    1322           1 :                 }
    1323           1 :                 return i.maybeVerifyKey(key, val)
    1324             :         }
    1325           1 :         return nil, base.LazyValue{}
    1326             : }
    1327             : 
    1328             : // Error implements internalIterator.Error, as documented in the pebble
    1329             : // package.
    1330           1 : func (i *singleLevelIterator) Error() error {
    1331           1 :         if err := i.data.Error(); err != nil {
    1332           0 :                 return err
    1333           0 :         }
    1334           1 :         return i.err
    1335             : }
    1336             : 
    1337             : // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
    1338             : // whether or not the last positioning method may have skipped any keys due to
    1339             : // block-property filters.
    1340           1 : func (i *singleLevelIterator) MaybeFilteredKeys() bool {
    1341           1 :         return i.maybeFilteredKeysSingleLevel
    1342           1 : }
    1343             : 
    1344             : // SetCloseHook sets a function that will be called when the iterator is
    1345             : // closed.
    1346           1 : func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
    1347           1 :         i.closeHook = fn
    1348           1 : }
    1349             : 
    1350           1 : func firstError(err0, err1 error) error {
    1351           1 :         if err0 != nil {
    1352           1 :                 return err0
    1353           1 :         }
    1354           1 :         return err1
    1355             : }
    1356             : 
    1357             : // Close implements internalIterator.Close, as documented in the pebble
    1358             : // package.
    1359           1 : func (i *singleLevelIterator) Close() error {
    1360           1 :         var err error
    1361           1 :         if i.closeHook != nil {
    1362           1 :                 err = firstError(err, i.closeHook(i))
    1363           1 :         }
    1364           1 :         err = firstError(err, i.data.Close())
    1365           1 :         err = firstError(err, i.index.Close())
    1366           1 :         if i.dataRH != nil {
    1367           1 :                 err = firstError(err, i.dataRH.Close())
    1368           1 :                 i.dataRH = nil
    1369           1 :         }
    1370           1 :         err = firstError(err, i.err)
    1371           1 :         if i.bpfs != nil {
    1372           1 :                 releaseBlockPropertiesFilterer(i.bpfs)
    1373           1 :         }
    1374           1 :         if i.vbReader != nil {
    1375           1 :                 i.vbReader.close()
    1376           1 :         }
    1377           1 :         if i.vbRH != nil {
    1378           1 :                 err = firstError(err, i.vbRH.Close())
    1379           1 :                 i.vbRH = nil
    1380           1 :         }
    1381           1 :         *i = i.resetForReuse()
    1382           1 :         singleLevelIterPool.Put(i)
    1383           1 :         return err
    1384             : }
    1385             : 
    1386           1 : func (i *singleLevelIterator) String() string {
    1387           1 :         if i.vState != nil {
    1388           1 :                 return i.vState.fileNum.String()
    1389           1 :         }
    1390           1 :         return i.reader.fileNum.String()
    1391             : }

Generated by: LCOV version 1.14