LCOV - code coverage report
Current view: top level - pebble - level_iter.go (source / functions) Hit Total Coverage
Test: 2023-12-10 08:15Z 58bdc725 - tests only.lcov Lines: 671 717 93.6 %
Date: 2023-12-10 08:16:22 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package pebble
       6             : 
       7             : import (
       8             :         "context"
       9             :         "fmt"
      10             :         "runtime/debug"
      11             : 
      12             :         "github.com/cockroachdb/pebble/internal/base"
      13             :         "github.com/cockroachdb/pebble/internal/invariants"
      14             :         "github.com/cockroachdb/pebble/internal/keyspan"
      15             :         "github.com/cockroachdb/pebble/internal/manifest"
      16             :         "github.com/cockroachdb/pebble/sstable"
      17             : )
      18             : 
      19             : // tableNewIters creates a new point and range-del iterator for the given file
      20             : // number.
      21             : //
      22             : // On success, the internalIterator is not-nil and must be closed; the
      23             : // FragmentIterator can be nil.
      24             : // TODO(radu): always return a non-nil FragmentIterator.
      25             : //
      26             : // On error, the iterators are nil.
      27             : //
      28             : // The only (non-test) implementation of tableNewIters is tableCacheContainer.newIters().
      29             : type tableNewIters func(
      30             :         ctx context.Context,
      31             :         file *manifest.FileMetadata,
      32             :         opts *IterOptions,
      33             :         internalOpts internalIterOpts,
      34             : ) (internalIterator, keyspan.FragmentIterator, error)
      35             : 
      36             : // tableNewRangeDelIter takes a tableNewIters and returns a TableNewSpanIter
      37             : // for the rangedel iterator returned by tableNewIters.
      38           1 : func tableNewRangeDelIter(ctx context.Context, newIters tableNewIters) keyspan.TableNewSpanIter {
      39           1 :         return func(file *manifest.FileMetadata, iterOptions keyspan.SpanIterOptions) (keyspan.FragmentIterator, error) {
      40           1 :                 iter, rangeDelIter, err := newIters(ctx, file, nil, internalIterOpts{})
      41           1 :                 if iter != nil {
      42           1 :                         _ = iter.Close()
      43           1 :                 }
      44           1 :                 if rangeDelIter == nil {
      45           1 :                         rangeDelIter = emptyKeyspanIter
      46           1 :                 }
      47           1 :                 return rangeDelIter, err
      48             :         }
      49             : }
      50             : 
      51             : type internalIterOpts struct {
      52             :         bytesIterated      *uint64
      53             :         bufferPool         *sstable.BufferPool
      54             :         stats              *base.InternalIteratorStats
      55             :         boundLimitedFilter sstable.BoundLimitedBlockPropertyFilter
      56             : }
      57             : 
      58             : // levelIter provides a merged view of the sstables in a level.
      59             : //
      60             : // levelIter is used during compaction and as part of the Iterator
      61             : // implementation. When used as part of the Iterator implementation, level
      62             : // iteration needs to "pause" at sstable boundaries if a range deletion
      63             : // tombstone is the source of that boundary. We know if a range tombstone is
      64             : // the smallest or largest key in a file because the kind will be
      65             : // InternalKeyKindRangeDeletion. If the boundary key is a range deletion
      66             : // tombstone, we materialize a fake entry to return from levelIter. This
      67             : // prevents mergingIter from advancing past the sstable until the sstable
      68             : // contains the smallest (or largest for reverse iteration) key in the merged
      69             : // heap. Note that mergingIter treats a range deletion tombstone returned by
      70             : // the point iterator as a no-op.
      71             : //
      72             : // SeekPrefixGE presents the need for a second type of pausing. If an sstable
      73             : // iterator returns "not found" for a SeekPrefixGE operation, we don't want to
      74             : // advance to the next sstable as the "not found" does not indicate that all of
      75             : // the keys in the sstable are less than the search key. Advancing to the next
      76             : // sstable would cause us to skip over range tombstones, violating
      77             : // correctness. Instead, SeekPrefixGE creates a synthetic boundary key with the
      78             : // kind InternalKeyKindRangeDeletion which will be used to pause the levelIter
      79             : // at the sstable until the mergingIter is ready to advance past it.
      80             : type levelIter struct {
      81             :         // The context is stored here since (a) iterators are expected to be
      82             :         // short-lived (since they pin sstables), (b) plumbing a context into every
      83             :         // method is very painful, (c) they do not (yet) respect context
      84             :         // cancellation and are only used for tracing.
      85             :         ctx      context.Context
      86             :         logger   Logger
      87             :         comparer *Comparer
      88             :         cmp      Compare
      89             :         split    Split
      90             :         // The lower/upper bounds for iteration as specified at creation or the most
      91             :         // recent call to SetBounds.
      92             :         lower []byte
      93             :         upper []byte
      94             :         // The iterator options for the currently open table. If
      95             :         // tableOpts.{Lower,Upper}Bound are nil, the corresponding iteration boundary
      96             :         // does not lie within the table bounds.
      97             :         tableOpts IterOptions
      98             :         // The LSM level this levelIter is initialized for.
      99             :         level manifest.Level
     100             :         // The keys to return when iterating past an sstable boundary and that
     101             :         // boundary is a range deletion tombstone. The boundary could be smallest
     102             :         // (i.e. arrived at with Prev), or largest (arrived at with Next).
     103             :         smallestBoundary *InternalKey
     104             :         largestBoundary  *InternalKey
     105             :         // combinedIterState may be set when a levelIter is used during user
     106             :         // iteration. Although levelIter only iterates over point keys, it's also
     107             :         // responsible for lazily constructing the combined range & point iterator
     108             :         // when it observes a file containing range keys. If the combined iter
     109             :         // state's initialized field is true, the iterator is already using combined
     110             :         // iterator, OR the iterator is not configured to use combined iteration. If
     111             :         // it's false, the levelIter must set the `triggered` and `key` fields when
     112             :         // the levelIter passes over a file containing range keys. See the
     113             :         // lazyCombinedIter for more details.
     114             :         combinedIterState *combinedIterState
     115             :         // A synthetic boundary key to return when SeekPrefixGE finds an sstable
     116             :         // which doesn't contain the search key, but which does contain range
     117             :         // tombstones.
     118             :         syntheticBoundary InternalKey
     119             :         // The iter for the current file. It is nil under any of the following conditions:
     120             :         // - files.Current() == nil
     121             :         // - err != nil
     122             :         // - some other constraint, like the bounds in opts, caused the file at index to not
     123             :         //   be relevant to the iteration.
     124             :         iter internalIterator
     125             :         // iterFile holds the current file. It is always equal to l.files.Current().
     126             :         iterFile *fileMetadata
     127             :         // filteredIter is an optional interface that may be implemented by internal
     128             :         // iterators that perform filtering of keys. When a new file's iterator is
     129             :         // opened, it's tested to see if it implements filteredIter. If it does,
     130             :         // it's stored here to allow the level iterator to recognize when keys were
     131             :         // omitted from iteration results due to filtering. This is important when a
     132             :         // file contains range deletions that may delete keys from other files. The
     133             :         // levelIter must not advance to the next file until the mergingIter has
     134             :         // advanced beyond the file's bounds. See
     135             :         // levelIterBoundaryContext.isIgnorableBoundaryKey.
     136             :         filteredIter filteredIter
     137             :         newIters     tableNewIters
     138             :         // When rangeDelIterPtr != nil, the caller requires that *rangeDelIterPtr must
     139             :         // point to a range del iterator corresponding to the current file. When this
     140             :         // iterator returns nil, *rangeDelIterPtr should also be set to nil. Whenever
     141             :         // a non-nil internalIterator is placed in rangeDelIterPtr, a copy is placed
     142             :         // in rangeDelIterCopy. This is done for the following special case:
     143             :         // when this iterator returns nil because of exceeding the bounds, we don't
     144             :         // close iter and *rangeDelIterPtr since we could reuse it in the next seek. But
     145             :         // we need to set *rangeDelIterPtr to nil because of the aforementioned contract.
     146             :         // This copy is used to revive the *rangeDelIterPtr in the case of reuse.
     147             :         rangeDelIterPtr  *keyspan.FragmentIterator
     148             :         rangeDelIterCopy keyspan.FragmentIterator
     149             :         files            manifest.LevelIterator
     150             :         err              error
     151             : 
     152             :         // Pointer into this level's entry in `mergingIterLevel::levelIterBoundaryContext`.
     153             :         // We populate it with the corresponding bounds for the currently opened file. It is used for
     154             :         // two purposes (described for forward iteration. The explanation for backward iteration is
     155             :         // similar.)
     156             :         // - To limit the optimization that seeks lower-level iterators past keys shadowed by a range
     157             :         //   tombstone. Limiting this seek to the file largestUserKey is necessary since
     158             :         //   range tombstones are stored untruncated, while they only apply to keys within their
     159             :         //   containing file's boundaries. For a detailed example, see comment above `mergingIter`.
     160             :         // - To constrain the tombstone to act-within the bounds of the sstable when checking
     161             :         //   containment. For forward iteration we need the smallestUserKey.
     162             :         //
     163             :         // An example is sstable bounds [c#8, g#12] containing a tombstone [b, i)#7.
     164             :         // - When doing a SeekGE to user key X, the levelIter is at this sstable because X is either within
     165             :         //   the sstable bounds or earlier than the start of the sstable (and there is no sstable in
     166             :         //   between at this level). If X >= smallestUserKey, and the tombstone [b, i) contains X,
     167             :         //   it is correct to SeekGE the sstables at lower levels to min(g, i) (i.e., min of
     168             :         //   largestUserKey, tombstone.End) since any user key preceding min(g, i) must be covered by this
     169             :         //   tombstone (since it cannot have a version younger than this tombstone as it is at a lower
     170             :         //   level). And even if X = smallestUserKey or equal to the start user key of the tombstone,
     171             :         //   if the above conditions are satisfied we know that the internal keys corresponding to X at
     172             :         //   lower levels must have a version smaller than that in this file (again because of the level
     173             :         //   argument). So we don't need to use sequence numbers for this comparison.
     174             :         // - When checking whether this tombstone deletes internal key X we know that the levelIter is at this
     175             :         //   sstable so (repeating the above) X.UserKey is either within the sstable bounds or earlier than the
     176             :         //   start of the sstable (and there is no sstable in between at this level).
     177             :         //   - X is at at a lower level. If X.UserKey >= smallestUserKey, and the tombstone contains
     178             :         //     X.UserKey, we know X is deleted. This argument also works when X is a user key (we use
     179             :         //     it when seeking to test whether a user key is deleted).
     180             :         //   - X is at the same level. X must be within the sstable bounds of the tombstone so the
     181             :         //     X.UserKey >= smallestUserKey comparison is trivially true. In addition to the tombstone containing
     182             :         //     X we need to compare the sequence number of X and the tombstone (we don't need to look
     183             :         //     at how this tombstone is truncated to act-within the file bounds, which are InternalKeys,
     184             :         //     since X and the tombstone are from the same file).
     185             :         //
     186             :         // Iterating backwards has one more complication when checking whether a tombstone deletes
     187             :         // internal key X at a lower level (the construction we do here also works for a user key X).
     188             :         // Consider sstable bounds [c#8, g#InternalRangeDelSentinel] containing a tombstone [b, i)#7.
     189             :         // If we are positioned at key g#10 at a lower sstable, the tombstone we will see is [b, i)#7,
     190             :         // since the higher sstable is positioned at a key <= g#10. We should not use this tombstone
     191             :         // to delete g#10. This requires knowing that the largestUserKey is a range delete sentinel,
     192             :         // which we set in a separate bool below.
     193             :         //
     194             :         // These fields differs from the `*Boundary` fields in a few ways:
     195             :         // - `*Boundary` is only populated when the iterator is positioned exactly on the sentinel key.
     196             :         // - `*Boundary` can hold either the lower- or upper-bound, depending on the iterator direction.
     197             :         // - `*Boundary` is not exposed to the next higher-level iterator, i.e., `mergingIter`.
     198             :         boundaryContext *levelIterBoundaryContext
     199             : 
     200             :         // internalOpts holds the internal iterator options to pass to the table
     201             :         // cache when constructing new table iterators.
     202             :         internalOpts internalIterOpts
     203             : 
     204             :         // Scratch space for the obsolete keys filter, when there are no other block
     205             :         // property filters specified. See the performance note where
     206             :         // IterOptions.PointKeyFilters is declared.
     207             :         filtersBuf [1]BlockPropertyFilter
     208             : 
     209             :         // Disable invariant checks even if they are otherwise enabled. Used by tests
     210             :         // which construct "impossible" situations (e.g. seeking to a key before the
     211             :         // lower bound).
     212             :         disableInvariants bool
     213             : }
     214             : 
     215             : // filteredIter is an additional interface implemented by iterators that may
     216             : // skip over point keys during iteration. The sstable.Iterator implements this
     217             : // interface.
     218             : type filteredIter interface {
     219             :         // MaybeFilteredKeys may be called when an iterator is exhausted, indicating
     220             :         // whether or not the iterator's last positioning method may have skipped
     221             :         // any keys due to low-level filters.
     222             :         //
     223             :         // When an iterator is configured to use block-property filters, the
     224             :         // low-level iterator may skip over blocks or whole sstables of keys.
     225             :         // Implementations that implement skipping must implement this interface.
     226             :         // Higher-level iterators require it to preserve invariants (eg, a levelIter
     227             :         // used in a mergingIter must keep the file's range-del iterator open until
     228             :         // the mergingIter has moved past the file's bounds, even if all of the
     229             :         // file's point keys were filtered).
     230             :         //
     231             :         // MaybeFilteredKeys may always return false positives, that is it may
     232             :         // return true when no keys were filtered. It should only be called when the
     233             :         // iterator is exhausted. It must never return false negatives when the
     234             :         // iterator is exhausted.
     235             :         MaybeFilteredKeys() bool
     236             : }
     237             : 
     238             : // levelIter implements the base.InternalIterator interface.
     239             : var _ base.InternalIterator = (*levelIter)(nil)
     240             : 
     241             : // newLevelIter returns a levelIter. It is permissible to pass a nil split
     242             : // parameter if the caller is never going to call SeekPrefixGE.
     243             : func newLevelIter(
     244             :         ctx context.Context,
     245             :         opts IterOptions,
     246             :         comparer *Comparer,
     247             :         newIters tableNewIters,
     248             :         files manifest.LevelIterator,
     249             :         level manifest.Level,
     250             :         internalOpts internalIterOpts,
     251           1 : ) *levelIter {
     252           1 :         l := &levelIter{}
     253           1 :         l.init(ctx, opts, comparer, newIters, files, level, internalOpts)
     254           1 :         return l
     255           1 : }
     256             : 
     257             : func (l *levelIter) init(
     258             :         ctx context.Context,
     259             :         opts IterOptions,
     260             :         comparer *Comparer,
     261             :         newIters tableNewIters,
     262             :         files manifest.LevelIterator,
     263             :         level manifest.Level,
     264             :         internalOpts internalIterOpts,
     265           1 : ) {
     266           1 :         l.ctx = ctx
     267           1 :         l.err = nil
     268           1 :         l.level = level
     269           1 :         l.logger = opts.getLogger()
     270           1 :         l.lower = opts.LowerBound
     271           1 :         l.upper = opts.UpperBound
     272           1 :         l.tableOpts.TableFilter = opts.TableFilter
     273           1 :         l.tableOpts.PointKeyFilters = opts.PointKeyFilters
     274           1 :         if len(opts.PointKeyFilters) == 0 {
     275           1 :                 l.tableOpts.PointKeyFilters = l.filtersBuf[:0:1]
     276           1 :         }
     277           1 :         l.tableOpts.UseL6Filters = opts.UseL6Filters
     278           1 :         l.tableOpts.CategoryAndQoS = opts.CategoryAndQoS
     279           1 :         l.tableOpts.level = l.level
     280           1 :         l.tableOpts.snapshotForHideObsoletePoints = opts.snapshotForHideObsoletePoints
     281           1 :         l.comparer = comparer
     282           1 :         l.cmp = comparer.Compare
     283           1 :         l.split = comparer.Split
     284           1 :         l.iterFile = nil
     285           1 :         l.newIters = newIters
     286           1 :         l.files = files
     287           1 :         l.internalOpts = internalOpts
     288             : }
     289             : 
     290           1 : func (l *levelIter) initRangeDel(rangeDelIter *keyspan.FragmentIterator) {
     291           1 :         l.rangeDelIterPtr = rangeDelIter
     292           1 : }
     293             : 
     294           1 : func (l *levelIter) initBoundaryContext(context *levelIterBoundaryContext) {
     295           1 :         l.boundaryContext = context
     296           1 : }
     297             : 
     298           1 : func (l *levelIter) initCombinedIterState(state *combinedIterState) {
     299           1 :         l.combinedIterState = state
     300           1 : }
     301             : 
     302           1 : func (l *levelIter) maybeTriggerCombinedIteration(file *fileMetadata, dir int) {
     303           1 :         // If we encounter a file that contains range keys, we may need to
     304           1 :         // trigger a switch to combined range-key and point-key iteration,
     305           1 :         // if the *pebble.Iterator is configured for it. This switch is done
     306           1 :         // lazily because range keys are intended to be rare, and
     307           1 :         // constructing the range-key iterator substantially adds to the
     308           1 :         // cost of iterator construction and seeking.
     309           1 :         //
     310           1 :         // If l.combinedIterState.initialized is already true, either the
     311           1 :         // iterator is already using combined iteration or the iterator is not
     312           1 :         // configured to observe range keys. Either way, there's nothing to do.
     313           1 :         // If false, trigger the switch to combined iteration, using the the
     314           1 :         // file's bounds to seek the range-key iterator appropriately.
     315           1 :         //
     316           1 :         // We only need to trigger combined iteration if the file contains
     317           1 :         // RangeKeySets: if there are only Unsets and Dels, the user will observe no
     318           1 :         // range keys regardless. If this file has table stats available, they'll
     319           1 :         // tell us whether the file has any RangeKeySets. Otherwise, we must
     320           1 :         // fallback to assuming it does if HasRangeKeys=true.
     321           1 :         if file != nil && file.HasRangeKeys && l.combinedIterState != nil && !l.combinedIterState.initialized &&
     322           1 :                 (l.upper == nil || l.cmp(file.SmallestRangeKey.UserKey, l.upper) < 0) &&
     323           1 :                 (l.lower == nil || l.cmp(file.LargestRangeKey.UserKey, l.lower) > 0) &&
     324           1 :                 (!file.StatsValid() || file.Stats.NumRangeKeySets > 0) {
     325           1 :                 // The file contains range keys, and we're not using combined iteration yet.
     326           1 :                 // Trigger a switch to combined iteration. It's possible that a switch has
     327           1 :                 // already been triggered if multiple levels encounter files containing
     328           1 :                 // range keys while executing a single mergingIter operation. In this case,
     329           1 :                 // we need to compare the existing key recorded to l.combinedIterState.key,
     330           1 :                 // adjusting it if our key is smaller (forward iteration) or larger
     331           1 :                 // (backward iteration) than the existing key.
     332           1 :                 //
     333           1 :                 // These key comparisons are only required during a single high-level
     334           1 :                 // iterator operation. When the high-level iter op completes,
     335           1 :                 // iinitialized will be true, and future calls to this function will be
     336           1 :                 // no-ops.
     337           1 :                 switch dir {
     338           1 :                 case +1:
     339           1 :                         if !l.combinedIterState.triggered {
     340           1 :                                 l.combinedIterState.triggered = true
     341           1 :                                 l.combinedIterState.key = file.SmallestRangeKey.UserKey
     342           1 :                         } else if l.cmp(l.combinedIterState.key, file.SmallestRangeKey.UserKey) > 0 {
     343           1 :                                 l.combinedIterState.key = file.SmallestRangeKey.UserKey
     344           1 :                         }
     345           1 :                 case -1:
     346           1 :                         if !l.combinedIterState.triggered {
     347           1 :                                 l.combinedIterState.triggered = true
     348           1 :                                 l.combinedIterState.key = file.LargestRangeKey.UserKey
     349           1 :                         } else if l.cmp(l.combinedIterState.key, file.LargestRangeKey.UserKey) < 0 {
     350           1 :                                 l.combinedIterState.key = file.LargestRangeKey.UserKey
     351           1 :                         }
     352             :                 }
     353             :         }
     354             : }
     355             : 
     356           1 : func (l *levelIter) findFileGE(key []byte, flags base.SeekGEFlags) *fileMetadata {
     357           1 :         // Find the earliest file whose largest key is >= key.
     358           1 : 
     359           1 :         // NB: if flags.TrySeekUsingNext()=true, the levelIter must respect it. If
     360           1 :         // the levelIter is positioned at the key P, it must return a key ≥ P. If
     361           1 :         // used within a merging iterator, the merging iterator will depend on the
     362           1 :         // levelIter only moving forward to maintain heap invariants.
     363           1 : 
     364           1 :         // Ordinarily we seek the LevelIterator using SeekGE. In some instances, we
     365           1 :         // Next instead. In other instances, we try Next-ing first, falling back to
     366           1 :         // seek:
     367           1 :         //   a) flags.TrySeekUsingNext(): The top-level Iterator knows we're seeking
     368           1 :         //      to a key later than the current iterator position. We don't know how
     369           1 :         //      much later the seek key is, so it's possible there are many sstables
     370           1 :         //      between the current position and the seek key. However in most real-
     371           1 :         //      world use cases, the seek key is likely to be nearby. Rather than
     372           1 :         //      performing a log(N) seek through the file metadata, we next a few
     373           1 :         //      times from from our existing location. If we don't find a file whose
     374           1 :         //      largest is >= key within a few nexts, we fall back to seeking.
     375           1 :         //
     376           1 :         //      Note that in this case, the file returned by findFileGE may be
     377           1 :         //      different than the file returned by a raw binary search (eg, when
     378           1 :         //      TrySeekUsingNext=false). This is possible because the most recent
     379           1 :         //      positioning operation may have already determined that previous
     380           1 :         //      files' keys that are ≥ key are all deleted. This information is
     381           1 :         //      encoded within the iterator's current iterator position and is
     382           1 :         //      unavailable to a fresh binary search.
     383           1 :         //
     384           1 :         //   b) flags.RelativeSeek(): The merging iterator decided to re-seek this
     385           1 :         //      level according to a range tombstone. When lazy combined iteration
     386           1 :         //      is enabled, the level iterator is responsible for watching for
     387           1 :         //      files containing range keys and triggering the switch to combined
     388           1 :         //      iteration when such a file is observed. If a range deletion was
     389           1 :         //      observed in a higher level causing the merging iterator to seek the
     390           1 :         //      level to the range deletion's end key, we need to check whether all
     391           1 :         //      of the files between the old position and the new position contain
     392           1 :         //      any range keys.
     393           1 :         //
     394           1 :         //      In this scenario, we don't seek the LevelIterator and instead we
     395           1 :         //      Next it, one file at a time, checking each for range keys. The
     396           1 :         //      merging iterator sets this flag to inform us that we're moving
     397           1 :         //      forward relative to the existing position and that we must examine
     398           1 :         //      each intermediate sstable's metadata for lazy-combined iteration.
     399           1 :         //      In this case, we only Next and never Seek. We set nextsUntilSeek=-1
     400           1 :         //      to signal this intention.
     401           1 :         //
     402           1 :         // NB: At most one of flags.RelativeSeek() and flags.TrySeekUsingNext() may
     403           1 :         // be set, because the merging iterator re-seeks relative seeks with
     404           1 :         // explicitly only the RelativeSeek flag set.
     405           1 :         var nextsUntilSeek int
     406           1 :         var nextInsteadOfSeek bool
     407           1 :         if flags.TrySeekUsingNext() {
     408           1 :                 nextInsteadOfSeek = true
     409           1 :                 nextsUntilSeek = 4 // arbitrary
     410           1 :         }
     411           1 :         if flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized {
     412           1 :                 nextInsteadOfSeek = true
     413           1 :                 nextsUntilSeek = -1
     414           1 :         }
     415             : 
     416           1 :         var m *fileMetadata
     417           1 :         if nextInsteadOfSeek {
     418           1 :                 m = l.iterFile
     419           1 :         } else {
     420           1 :                 m = l.files.SeekGE(l.cmp, key)
     421           1 :         }
     422             :         // The below loop has a bit of an unusual organization. There are several
     423             :         // conditions under which we need to Next to a later file. If none of those
     424             :         // conditions are met, the file in `m` is okay to return. The loop body is
     425             :         // structured with a series of if statements, each of which may continue the
     426             :         // loop to the next file. If none of the statements are met, the end of the
     427             :         // loop body is a break.
     428           1 :         for m != nil {
     429           1 :                 if m.HasRangeKeys {
     430           1 :                         l.maybeTriggerCombinedIteration(m, +1)
     431           1 : 
     432           1 :                         // Some files may only contain range keys, which we can skip.
     433           1 :                         // NB: HasPointKeys=true if the file contains any points or range
     434           1 :                         // deletions (which delete points).
     435           1 :                         if !m.HasPointKeys {
     436           1 :                                 m = l.files.Next()
     437           1 :                                 continue
     438             :                         }
     439             :                 }
     440             : 
     441             :                 // This file has point keys.
     442             :                 //
     443             :                 // However, there are a couple reasons why `m` may not be positioned ≥
     444             :                 // `key` yet:
     445             :                 //
     446             :                 // 1. If SeekGE(key) landed on a file containing range keys, the file
     447             :                 //    may contain range keys ≥ `key` but no point keys ≥ `key`.
     448             :                 // 2. When nexting instead of seeking, we must check to see whether
     449             :                 //    we've nexted sufficiently far, or we need to next again.
     450             :                 //
     451             :                 // If the file does not contain point keys ≥ `key`, next to continue
     452             :                 // looking for a file that does.
     453           1 :                 if (m.HasRangeKeys || nextInsteadOfSeek) && l.cmp(m.LargestPointKey.UserKey, key) < 0 {
     454           1 :                         // If nextInsteadOfSeek is set and nextsUntilSeek is non-negative,
     455           1 :                         // the iterator has been nexting hoping to discover the relevant
     456           1 :                         // file without seeking. It's exhausted the allotted nextsUntilSeek
     457           1 :                         // and should seek to the sought key.
     458           1 :                         if nextInsteadOfSeek && nextsUntilSeek == 0 {
     459           0 :                                 nextInsteadOfSeek = false
     460           0 :                                 m = l.files.SeekGE(l.cmp, key)
     461           0 :                                 continue
     462           1 :                         } else if nextsUntilSeek > 0 {
     463           1 :                                 nextsUntilSeek--
     464           1 :                         }
     465           1 :                         m = l.files.Next()
     466           1 :                         continue
     467             :                 }
     468             : 
     469             :                 // This file has a point key bound ≥ `key`. But the largest point key
     470             :                 // bound may still be a range deletion sentinel, which is exclusive.  In
     471             :                 // this case, the file doesn't actually contain any point keys equal to
     472             :                 // `key`. We next to keep searching for a file that actually contains
     473             :                 // point keys ≥ key.
     474             :                 //
     475             :                 // Additionally, this prevents loading untruncated range deletions from
     476             :                 // a table which can't possibly contain the target key and is required
     477             :                 // for correctness by mergingIter.SeekGE (see the comment in that
     478             :                 // function).
     479           1 :                 if m.LargestPointKey.IsExclusiveSentinel() && l.cmp(m.LargestPointKey.UserKey, key) == 0 {
     480           1 :                         m = l.files.Next()
     481           1 :                         continue
     482             :                 }
     483             : 
     484             :                 // This file contains point keys ≥ `key`. Break and return it.
     485           1 :                 break
     486             :         }
     487           1 :         return m
     488             : }
     489             : 
     490           1 : func (l *levelIter) findFileLT(key []byte, flags base.SeekLTFlags) *fileMetadata {
     491           1 :         // Find the last file whose smallest key is < ikey.
     492           1 : 
     493           1 :         // Ordinarily we seek the LevelIterator using SeekLT.
     494           1 :         //
     495           1 :         // When lazy combined iteration is enabled, there's a complication. The
     496           1 :         // level iterator is responsible for watching for files containing range
     497           1 :         // keys and triggering the switch to combined iteration when such a file is
     498           1 :         // observed. If a range deletion was observed in a higher level causing the
     499           1 :         // merging iterator to seek the level to the range deletion's start key, we
     500           1 :         // need to check whether all of the files between the old position and the
     501           1 :         // new position contain any range keys.
     502           1 :         //
     503           1 :         // In this scenario, we don't seek the LevelIterator and instead we Prev it,
     504           1 :         // one file at a time, checking each for range keys.
     505           1 :         prevInsteadOfSeek := flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized
     506           1 : 
     507           1 :         var m *fileMetadata
     508           1 :         if prevInsteadOfSeek {
     509           1 :                 m = l.iterFile
     510           1 :         } else {
     511           1 :                 m = l.files.SeekLT(l.cmp, key)
     512           1 :         }
     513             :         // The below loop has a bit of an unusual organization. There are several
     514             :         // conditions under which we need to Prev to a previous file. If none of
     515             :         // those conditions are met, the file in `m` is okay to return. The loop
     516             :         // body is structured with a series of if statements, each of which may
     517             :         // continue the loop to the previous file. If none of the statements are
     518             :         // met, the end of the loop body is a break.
     519           1 :         for m != nil {
     520           1 :                 if m.HasRangeKeys {
     521           1 :                         l.maybeTriggerCombinedIteration(m, -1)
     522           1 : 
     523           1 :                         // Some files may only contain range keys, which we can skip.
     524           1 :                         // NB: HasPointKeys=true if the file contains any points or range
     525           1 :                         // deletions (which delete points).
     526           1 :                         if !m.HasPointKeys {
     527           1 :                                 m = l.files.Prev()
     528           1 :                                 continue
     529             :                         }
     530             :                 }
     531             : 
     532             :                 // This file has point keys.
     533             :                 //
     534             :                 // However, there are a couple reasons why `m` may not be positioned <
     535             :                 // `key` yet:
     536             :                 //
     537             :                 // 1. If SeekLT(key) landed on a file containing range keys, the file
     538             :                 //    may contain range keys < `key` but no point keys < `key`.
     539             :                 // 2. When preving instead of seeking, we must check to see whether
     540             :                 //    we've preved sufficiently far, or we need to prev again.
     541             :                 //
     542             :                 // If the file does not contain point keys < `key`, prev to continue
     543             :                 // looking for a file that does.
     544           1 :                 if (m.HasRangeKeys || prevInsteadOfSeek) && l.cmp(m.SmallestPointKey.UserKey, key) >= 0 {
     545           1 :                         m = l.files.Prev()
     546           1 :                         continue
     547             :                 }
     548             : 
     549             :                 // This file contains point keys < `key`. Break and return it.
     550           1 :                 break
     551             :         }
     552           1 :         return m
     553             : }
     554             : 
     555             : // Init the iteration bounds for the current table. Returns -1 if the table
     556             : // lies fully before the lower bound, +1 if the table lies fully after the
     557             : // upper bound, and 0 if the table overlaps the iteration bounds.
     558           1 : func (l *levelIter) initTableBounds(f *fileMetadata) int {
     559           1 :         l.tableOpts.LowerBound = l.lower
     560           1 :         if l.tableOpts.LowerBound != nil {
     561           1 :                 if l.cmp(f.LargestPointKey.UserKey, l.tableOpts.LowerBound) < 0 {
     562           1 :                         // The largest key in the sstable is smaller than the lower bound.
     563           1 :                         return -1
     564           1 :                 }
     565           1 :                 if l.cmp(l.tableOpts.LowerBound, f.SmallestPointKey.UserKey) <= 0 {
     566           1 :                         // The lower bound is smaller or equal to the smallest key in the
     567           1 :                         // table. Iteration within the table does not need to check the lower
     568           1 :                         // bound.
     569           1 :                         l.tableOpts.LowerBound = nil
     570           1 :                 }
     571             :         }
     572           1 :         l.tableOpts.UpperBound = l.upper
     573           1 :         if l.tableOpts.UpperBound != nil {
     574           1 :                 if l.cmp(f.SmallestPointKey.UserKey, l.tableOpts.UpperBound) >= 0 {
     575           1 :                         // The smallest key in the sstable is greater than or equal to the upper
     576           1 :                         // bound.
     577           1 :                         return 1
     578           1 :                 }
     579           1 :                 if l.cmp(l.tableOpts.UpperBound, f.LargestPointKey.UserKey) > 0 {
     580           1 :                         // The upper bound is greater than the largest key in the
     581           1 :                         // table. Iteration within the table does not need to check the upper
     582           1 :                         // bound. NB: tableOpts.UpperBound is exclusive and f.LargestPointKey is
     583           1 :                         // inclusive.
     584           1 :                         l.tableOpts.UpperBound = nil
     585           1 :                 }
     586             :         }
     587           1 :         return 0
     588             : }
     589             : 
     590             : type loadFileReturnIndicator int8
     591             : 
     592             : const (
     593             :         noFileLoaded loadFileReturnIndicator = iota
     594             :         fileAlreadyLoaded
     595             :         newFileLoaded
     596             : )
     597             : 
     598           1 : func (l *levelIter) loadFile(file *fileMetadata, dir int) loadFileReturnIndicator {
     599           1 :         l.smallestBoundary = nil
     600           1 :         l.largestBoundary = nil
     601           1 :         if l.boundaryContext != nil {
     602           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     603           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     604           1 :         }
     605           1 :         if l.iterFile == file {
     606           1 :                 if l.err != nil {
     607           0 :                         return noFileLoaded
     608           0 :                 }
     609           1 :                 if l.iter != nil {
     610           1 :                         // We don't bother comparing the file bounds with the iteration bounds when we have
     611           1 :                         // an already open iterator. It is possible that the iter may not be relevant given the
     612           1 :                         // current iteration bounds, but it knows those bounds, so it will enforce them.
     613           1 :                         if l.rangeDelIterPtr != nil {
     614           1 :                                 *l.rangeDelIterPtr = l.rangeDelIterCopy
     615           1 :                         }
     616             : 
     617             :                         // There are a few reasons we might not have triggered combined
     618             :                         // iteration yet, even though we already had `file` open.
     619             :                         // 1. If the bounds changed, we might have previously avoided
     620             :                         //    switching to combined iteration because the bounds excluded
     621             :                         //    the range keys contained in this file.
     622             :                         // 2. If an existing iterator was reconfigured to iterate over range
     623             :                         //    keys (eg, using SetOptions), then we wouldn't have triggered
     624             :                         //    the switch to combined iteration yet.
     625           1 :                         l.maybeTriggerCombinedIteration(file, dir)
     626           1 :                         return fileAlreadyLoaded
     627             :                 }
     628             :                 // We were already at file, but don't have an iterator, probably because the file was
     629             :                 // beyond the iteration bounds. It may still be, but it is also possible that the bounds
     630             :                 // have changed. We handle that below.
     631             :         }
     632             : 
     633             :         // Close both iter and rangeDelIterPtr. While mergingIter knows about
     634             :         // rangeDelIterPtr, it can't call Close() on it because it does not know
     635             :         // when the levelIter will switch it. Note that levelIter.Close() can be
     636             :         // called multiple times.
     637           1 :         if err := l.Close(); err != nil {
     638           1 :                 return noFileLoaded
     639           1 :         }
     640             : 
     641           1 :         for {
     642           1 :                 l.iterFile = file
     643           1 :                 if file == nil {
     644           1 :                         return noFileLoaded
     645           1 :                 }
     646             : 
     647           1 :                 l.maybeTriggerCombinedIteration(file, dir)
     648           1 :                 if !file.HasPointKeys {
     649           1 :                         switch dir {
     650           1 :                         case +1:
     651           1 :                                 file = l.files.Next()
     652           1 :                                 continue
     653           1 :                         case -1:
     654           1 :                                 file = l.files.Prev()
     655           1 :                                 continue
     656             :                         }
     657             :                 }
     658             : 
     659           1 :                 switch l.initTableBounds(file) {
     660           1 :                 case -1:
     661           1 :                         // The largest key in the sstable is smaller than the lower bound.
     662           1 :                         if dir < 0 {
     663           1 :                                 return noFileLoaded
     664           1 :                         }
     665           1 :                         file = l.files.Next()
     666           1 :                         continue
     667           1 :                 case +1:
     668           1 :                         // The smallest key in the sstable is greater than or equal to the upper
     669           1 :                         // bound.
     670           1 :                         if dir > 0 {
     671           1 :                                 return noFileLoaded
     672           1 :                         }
     673           1 :                         file = l.files.Prev()
     674           1 :                         continue
     675             :                 }
     676             : 
     677           1 :                 var rangeDelIter keyspan.FragmentIterator
     678           1 :                 var iter internalIterator
     679           1 :                 iter, rangeDelIter, l.err = l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts)
     680           1 :                 l.iter = iter
     681           1 :                 if l.err != nil {
     682           1 :                         return noFileLoaded
     683           1 :                 }
     684           1 :                 if rangeDelIter != nil {
     685           1 :                         if fi, ok := iter.(filteredIter); ok {
     686           1 :                                 l.filteredIter = fi
     687           1 :                         } else {
     688           0 :                                 l.filteredIter = nil
     689           0 :                         }
     690           1 :                 } else {
     691           1 :                         l.filteredIter = nil
     692           1 :                 }
     693           1 :                 if l.rangeDelIterPtr != nil {
     694           1 :                         *l.rangeDelIterPtr = rangeDelIter
     695           1 :                         l.rangeDelIterCopy = rangeDelIter
     696           1 :                 } else if rangeDelIter != nil {
     697           1 :                         rangeDelIter.Close()
     698           1 :                 }
     699           1 :                 if l.boundaryContext != nil {
     700           1 :                         l.boundaryContext.smallestUserKey = file.Smallest.UserKey
     701           1 :                         l.boundaryContext.largestUserKey = file.Largest.UserKey
     702           1 :                         l.boundaryContext.isLargestUserKeyExclusive = file.Largest.IsExclusiveSentinel()
     703           1 :                 }
     704           1 :                 return newFileLoaded
     705             :         }
     706             : }
     707             : 
     708             : // In race builds we verify that the keys returned by levelIter lie within
     709             : // [lower,upper).
     710           1 : func (l *levelIter) verify(key *InternalKey, val base.LazyValue) (*InternalKey, base.LazyValue) {
     711           1 :         // Note that invariants.Enabled is a compile time constant, which means the
     712           1 :         // block of code will be compiled out of normal builds making this method
     713           1 :         // eligible for inlining. Do not change this to use a variable.
     714           1 :         if invariants.Enabled && !l.disableInvariants && key != nil {
     715           1 :                 // We allow returning a boundary key that is outside of the lower/upper
     716           1 :                 // bounds as such keys are always range tombstones which will be skipped by
     717           1 :                 // the Iterator.
     718           1 :                 if l.lower != nil && key != l.smallestBoundary && l.cmp(key.UserKey, l.lower) < 0 {
     719           0 :                         l.logger.Fatalf("levelIter %s: lower bound violation: %s < %s\n%s", l.level, key, l.lower, debug.Stack())
     720           0 :                 }
     721           1 :                 if l.upper != nil && key != l.largestBoundary && l.cmp(key.UserKey, l.upper) > 0 {
     722           0 :                         l.logger.Fatalf("levelIter %s: upper bound violation: %s > %s\n%s", l.level, key, l.upper, debug.Stack())
     723           0 :                 }
     724             :         }
     725           1 :         return key, val
     726             : }
     727             : 
     728           1 : func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) {
     729           1 :         l.err = nil // clear cached iteration error
     730           1 :         if l.boundaryContext != nil {
     731           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     732           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     733           1 :         }
     734             :         // NB: the top-level Iterator has already adjusted key based on
     735             :         // IterOptions.LowerBound.
     736           1 :         loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
     737           1 :         if loadFileIndicator == noFileLoaded {
     738           1 :                 return nil, base.LazyValue{}
     739           1 :         }
     740           1 :         if loadFileIndicator == newFileLoaded {
     741           1 :                 // File changed, so l.iter has changed, and that iterator is not
     742           1 :                 // positioned appropriately.
     743           1 :                 flags = flags.DisableTrySeekUsingNext()
     744           1 :         }
     745           1 :         if ikey, val := l.iter.SeekGE(key, flags); ikey != nil {
     746           1 :                 return l.verify(ikey, val)
     747           1 :         }
     748           1 :         return l.verify(l.skipEmptyFileForward())
     749             : }
     750             : 
     751             : func (l *levelIter) SeekPrefixGE(
     752             :         prefix, key []byte, flags base.SeekGEFlags,
     753           1 : ) (*base.InternalKey, base.LazyValue) {
     754           1 :         l.err = nil // clear cached iteration error
     755           1 :         if l.boundaryContext != nil {
     756           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     757           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     758           1 :         }
     759             : 
     760             :         // NB: the top-level Iterator has already adjusted key based on
     761             :         // IterOptions.LowerBound.
     762           1 :         loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
     763           1 :         if loadFileIndicator == noFileLoaded {
     764           1 :                 return nil, base.LazyValue{}
     765           1 :         }
     766           1 :         if loadFileIndicator == newFileLoaded {
     767           1 :                 // File changed, so l.iter has changed, and that iterator is not
     768           1 :                 // positioned appropriately.
     769           1 :                 flags = flags.DisableTrySeekUsingNext()
     770           1 :         }
     771           1 :         if key, val := l.iter.SeekPrefixGE(prefix, key, flags); key != nil {
     772           1 :                 return l.verify(key, val)
     773           1 :         }
     774             :         // When SeekPrefixGE returns nil, we have not necessarily reached the end of
     775             :         // the sstable. All we know is that a key with prefix does not exist in the
     776             :         // current sstable. We do know that the key lies within the bounds of the
     777             :         // table as findFileGE found the table where key <= meta.Largest. We return
     778             :         // the table's bound with isIgnorableBoundaryKey set.
     779           1 :         if l.rangeDelIterPtr != nil && *l.rangeDelIterPtr != nil {
     780           1 :                 if l.tableOpts.UpperBound != nil {
     781           1 :                         l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
     782           1 :                         l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
     783           1 :                         l.largestBoundary = &l.syntheticBoundary
     784           1 :                         if l.boundaryContext != nil {
     785           1 :                                 l.boundaryContext.isSyntheticIterBoundsKey = true
     786           1 :                                 l.boundaryContext.isIgnorableBoundaryKey = false
     787           1 :                         }
     788           1 :                         return l.verify(l.largestBoundary, base.LazyValue{})
     789             :                 }
     790             :                 // Return the file's largest bound, ensuring this file stays open until
     791             :                 // the mergingIter advances beyond the file's bounds. We set
     792             :                 // isIgnorableBoundaryKey to signal that the actual key returned should
     793             :                 // be ignored, and does not represent a real key in the database.
     794           1 :                 l.largestBoundary = &l.iterFile.LargestPointKey
     795           1 :                 if l.boundaryContext != nil {
     796           1 :                         l.boundaryContext.isSyntheticIterBoundsKey = false
     797           1 :                         l.boundaryContext.isIgnorableBoundaryKey = true
     798           1 :                 }
     799           1 :                 return l.verify(l.largestBoundary, base.LazyValue{})
     800             :         }
     801             :         // It is possible that we are here because bloom filter matching failed. In
     802             :         // that case it is likely that all keys matching the prefix are wholly
     803             :         // within the current file and cannot be in the subsequent file. In that
     804             :         // case we don't want to go to the next file, since loading and seeking in
     805             :         // there has some cost. Additionally, for sparse key spaces, loading the
     806             :         // next file will defeat the optimization for the next SeekPrefixGE that is
     807             :         // called with flags.TrySeekUsingNext(), since for sparse key spaces it is
     808             :         // likely that the next key will also be contained in the current file.
     809           1 :         var n int
     810           1 :         if l.split != nil {
     811           1 :                 // If the split function is specified, calculate the prefix length accordingly.
     812           1 :                 n = l.split(l.iterFile.LargestPointKey.UserKey)
     813           1 :         } else {
     814           0 :                 // If the split function is not specified, the entire key is used as the
     815           0 :                 // prefix. This case can occur when getIter uses SeekPrefixGE.
     816           0 :                 n = len(l.iterFile.LargestPointKey.UserKey)
     817           0 :         }
     818           1 :         if l.cmp(prefix, l.iterFile.LargestPointKey.UserKey[:n]) < 0 {
     819           1 :                 return nil, base.LazyValue{}
     820           1 :         }
     821           0 :         return l.verify(l.skipEmptyFileForward())
     822             : }
     823             : 
     824           1 : func (l *levelIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) {
     825           1 :         l.err = nil // clear cached iteration error
     826           1 :         if l.boundaryContext != nil {
     827           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     828           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     829           1 :         }
     830             : 
     831             :         // NB: the top-level Iterator has already adjusted key based on
     832             :         // IterOptions.UpperBound.
     833           1 :         if l.loadFile(l.findFileLT(key, flags), -1) == noFileLoaded {
     834           1 :                 return nil, base.LazyValue{}
     835           1 :         }
     836           1 :         if key, val := l.iter.SeekLT(key, flags); key != nil {
     837           1 :                 return l.verify(key, val)
     838           1 :         }
     839           1 :         return l.verify(l.skipEmptyFileBackward())
     840             : }
     841             : 
     842           1 : func (l *levelIter) First() (*InternalKey, base.LazyValue) {
     843           1 :         l.err = nil // clear cached iteration error
     844           1 :         if l.boundaryContext != nil {
     845           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     846           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     847           1 :         }
     848             : 
     849             :         // NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is
     850             :         // set.
     851           1 :         if l.loadFile(l.files.First(), +1) == noFileLoaded {
     852           1 :                 return nil, base.LazyValue{}
     853           1 :         }
     854           1 :         if key, val := l.iter.First(); key != nil {
     855           1 :                 return l.verify(key, val)
     856           1 :         }
     857           1 :         return l.verify(l.skipEmptyFileForward())
     858             : }
     859             : 
     860           1 : func (l *levelIter) Last() (*InternalKey, base.LazyValue) {
     861           1 :         l.err = nil // clear cached iteration error
     862           1 :         if l.boundaryContext != nil {
     863           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     864           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     865           1 :         }
     866             : 
     867             :         // NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is
     868             :         // set.
     869           1 :         if l.loadFile(l.files.Last(), -1) == noFileLoaded {
     870           1 :                 return nil, base.LazyValue{}
     871           1 :         }
     872           1 :         if key, val := l.iter.Last(); key != nil {
     873           1 :                 return l.verify(key, val)
     874           1 :         }
     875           1 :         return l.verify(l.skipEmptyFileBackward())
     876             : }
     877             : 
     878           1 : func (l *levelIter) Next() (*InternalKey, base.LazyValue) {
     879           1 :         if l.err != nil || l.iter == nil {
     880           1 :                 return nil, base.LazyValue{}
     881           1 :         }
     882           1 :         if l.boundaryContext != nil {
     883           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     884           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     885           1 :         }
     886             : 
     887           1 :         switch {
     888           1 :         case l.largestBoundary != nil:
     889           1 :                 if l.tableOpts.UpperBound != nil {
     890           1 :                         // The UpperBound was within this file, so don't load the next
     891           1 :                         // file. We leave the largestBoundary unchanged so that subsequent
     892           1 :                         // calls to Next() stay at this file. If a Seek/First/Last call is
     893           1 :                         // made and this file continues to be relevant, loadFile() will
     894           1 :                         // set the largestBoundary to nil.
     895           1 :                         if l.rangeDelIterPtr != nil {
     896           1 :                                 *l.rangeDelIterPtr = nil
     897           1 :                         }
     898           1 :                         return nil, base.LazyValue{}
     899             :                 }
     900             :                 // We're stepping past the boundary key, so now we can load the next file.
     901           1 :                 if l.loadFile(l.files.Next(), +1) != noFileLoaded {
     902           1 :                         if key, val := l.iter.First(); key != nil {
     903           1 :                                 return l.verify(key, val)
     904           1 :                         }
     905           1 :                         return l.verify(l.skipEmptyFileForward())
     906             :                 }
     907           1 :                 return nil, base.LazyValue{}
     908             : 
     909           1 :         default:
     910           1 :                 // Reset the smallest boundary since we're moving away from it.
     911           1 :                 l.smallestBoundary = nil
     912           1 :                 if key, val := l.iter.Next(); key != nil {
     913           1 :                         return l.verify(key, val)
     914           1 :                 }
     915             :         }
     916           1 :         return l.verify(l.skipEmptyFileForward())
     917             : }
     918             : 
     919           1 : func (l *levelIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
     920           1 :         if l.err != nil || l.iter == nil {
     921           0 :                 return nil, base.LazyValue{}
     922           0 :         }
     923           1 :         if l.boundaryContext != nil {
     924           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     925           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     926           1 :         }
     927             : 
     928           1 :         switch {
     929           0 :         case l.largestBoundary != nil:
     930           0 :                 if l.tableOpts.UpperBound != nil {
     931           0 :                         // The UpperBound was within this file, so don't load the next
     932           0 :                         // file. We leave the largestBoundary unchanged so that subsequent
     933           0 :                         // calls to Next() stay at this file. If a Seek/First/Last call is
     934           0 :                         // made and this file continues to be relevant, loadFile() will
     935           0 :                         // set the largestBoundary to nil.
     936           0 :                         if l.rangeDelIterPtr != nil {
     937           0 :                                 *l.rangeDelIterPtr = nil
     938           0 :                         }
     939           0 :                         return nil, base.LazyValue{}
     940             :                 }
     941             :                 // We're stepping past the boundary key, so we need to load a later
     942             :                 // file.
     943             : 
     944           1 :         default:
     945           1 :                 // Reset the smallest boundary since we're moving away from it.
     946           1 :                 l.smallestBoundary = nil
     947           1 : 
     948           1 :                 if key, val := l.iter.NextPrefix(succKey); key != nil {
     949           1 :                         return l.verify(key, val)
     950           1 :                 }
     951             :                 // Fall through to seeking.
     952             :         }
     953             : 
     954             :         // Seek the manifest level iterator using TrySeekUsingNext=true and
     955             :         // RelativeSeek=true so that we take advantage of the knowledge that
     956             :         // `succKey` can only be contained in later files.
     957           1 :         metadataSeekFlags := base.SeekGEFlagsNone.EnableTrySeekUsingNext().EnableRelativeSeek()
     958           1 :         if l.loadFile(l.findFileGE(succKey, metadataSeekFlags), +1) != noFileLoaded {
     959           1 :                 // NB: The SeekGE on the file's iterator must not set TrySeekUsingNext,
     960           1 :                 // because l.iter is unpositioned.
     961           1 :                 if key, val := l.iter.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
     962           1 :                         return l.verify(key, val)
     963           1 :                 }
     964           0 :                 return l.verify(l.skipEmptyFileForward())
     965             :         }
     966           0 :         return nil, base.LazyValue{}
     967             : }
     968             : 
     969           1 : func (l *levelIter) Prev() (*InternalKey, base.LazyValue) {
     970           1 :         if l.err != nil || l.iter == nil {
     971           1 :                 return nil, base.LazyValue{}
     972           1 :         }
     973           1 :         if l.boundaryContext != nil {
     974           1 :                 l.boundaryContext.isSyntheticIterBoundsKey = false
     975           1 :                 l.boundaryContext.isIgnorableBoundaryKey = false
     976           1 :         }
     977             : 
     978           1 :         switch {
     979           1 :         case l.smallestBoundary != nil:
     980           1 :                 if l.tableOpts.LowerBound != nil {
     981           1 :                         // The LowerBound was within this file, so don't load the previous
     982           1 :                         // file. We leave the smallestBoundary unchanged so that
     983           1 :                         // subsequent calls to Prev() stay at this file. If a
     984           1 :                         // Seek/First/Last call is made and this file continues to be
     985           1 :                         // relevant, loadFile() will set the smallestBoundary to nil.
     986           1 :                         if l.rangeDelIterPtr != nil {
     987           1 :                                 *l.rangeDelIterPtr = nil
     988           1 :                         }
     989           1 :                         return nil, base.LazyValue{}
     990             :                 }
     991             :                 // We're stepping past the boundary key, so now we can load the prev file.
     992           1 :                 if l.loadFile(l.files.Prev(), -1) != noFileLoaded {
     993           1 :                         if key, val := l.iter.Last(); key != nil {
     994           1 :                                 return l.verify(key, val)
     995           1 :                         }
     996           1 :                         return l.verify(l.skipEmptyFileBackward())
     997             :                 }
     998           1 :                 return nil, base.LazyValue{}
     999             : 
    1000           1 :         default:
    1001           1 :                 // Reset the largest boundary since we're moving away from it.
    1002           1 :                 l.largestBoundary = nil
    1003           1 :                 if key, val := l.iter.Prev(); key != nil {
    1004           1 :                         return l.verify(key, val)
    1005           1 :                 }
    1006             :         }
    1007           1 :         return l.verify(l.skipEmptyFileBackward())
    1008             : }
    1009             : 
    1010           1 : func (l *levelIter) skipEmptyFileForward() (*InternalKey, base.LazyValue) {
    1011           1 :         var key *InternalKey
    1012           1 :         var val base.LazyValue
    1013           1 :         // The first iteration of this loop starts with an already exhausted
    1014           1 :         // l.iter. The reason for the exhaustion is either that we iterated to the
    1015           1 :         // end of the sstable, or our iteration was terminated early due to the
    1016           1 :         // presence of an upper-bound or the use of SeekPrefixGE. If
    1017           1 :         // l.rangeDelIterPtr is non-nil, we may need to pretend the iterator is
    1018           1 :         // not exhausted to allow for the merging to finish consuming the
    1019           1 :         // l.rangeDelIterPtr before levelIter switches the rangeDelIter from
    1020           1 :         // under it. This pretense is done by either generating a synthetic
    1021           1 :         // boundary key or returning the largest key of the file, depending on the
    1022           1 :         // exhaustion reason.
    1023           1 : 
    1024           1 :         // Subsequent iterations will examine consecutive files such that the first
    1025           1 :         // file that does not have an exhausted iterator causes the code to return
    1026           1 :         // that key, else the behavior described above if there is a corresponding
    1027           1 :         // rangeDelIterPtr.
    1028           1 :         for ; key == nil; key, val = l.iter.First() {
    1029           1 :                 if l.rangeDelIterPtr != nil {
    1030           1 :                         // We're being used as part of a mergingIter and we've exhausted the
    1031           1 :                         // current sstable. If an upper bound is present and the upper bound lies
    1032           1 :                         // within the current sstable, then we will have reached the upper bound
    1033           1 :                         // rather than the end of the sstable. We need to return a synthetic
    1034           1 :                         // boundary key so that mergingIter can use the range tombstone iterator
    1035           1 :                         // until the other levels have reached this boundary.
    1036           1 :                         //
    1037           1 :                         // It is safe to set the boundary key to the UpperBound user key
    1038           1 :                         // with the RANGEDEL sentinel since it is the smallest InternalKey
    1039           1 :                         // that matches the exclusive upper bound, and does not represent
    1040           1 :                         // a real key.
    1041           1 :                         if l.tableOpts.UpperBound != nil {
    1042           1 :                                 if *l.rangeDelIterPtr != nil {
    1043           1 :                                         l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
    1044           1 :                                         l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
    1045           1 :                                         l.largestBoundary = &l.syntheticBoundary
    1046           1 :                                         if l.boundaryContext != nil {
    1047           1 :                                                 l.boundaryContext.isSyntheticIterBoundsKey = true
    1048           1 :                                         }
    1049           1 :                                         return l.largestBoundary, base.LazyValue{}
    1050             :                                 }
    1051             :                                 // Else there are no range deletions in this sstable. This
    1052             :                                 // helps with performance when many levels are populated with
    1053             :                                 // sstables and most don't have any actual keys within the
    1054             :                                 // bounds.
    1055           1 :                                 return nil, base.LazyValue{}
    1056             :                         }
    1057             :                         // If the boundary is a range deletion tombstone, return that key.
    1058           1 :                         if l.iterFile.LargestPointKey.Kind() == InternalKeyKindRangeDelete {
    1059           1 :                                 l.largestBoundary = &l.iterFile.LargestPointKey
    1060           1 :                                 if l.boundaryContext != nil {
    1061           1 :                                         l.boundaryContext.isIgnorableBoundaryKey = true
    1062           1 :                                 }
    1063           1 :                                 return l.largestBoundary, base.LazyValue{}
    1064             :                         }
    1065             :                         // If the last point iterator positioning op might've skipped keys,
    1066             :                         // it's possible the file's range deletions are still relevant to
    1067             :                         // other levels. Return the largest boundary as a special ignorable
    1068             :                         // marker to avoid advancing to the next file.
    1069             :                         //
    1070             :                         // The sstable iterator cannot guarantee that keys were skipped. A
    1071             :                         // SeekGE that lands on a index separator k only knows that the
    1072             :                         // block at the index entry contains keys ≤ k. We can't know whether
    1073             :                         // there were actually keys between the seek key and the index
    1074             :                         // separator key. If the block is then excluded due to block
    1075             :                         // property filters, the iterator does not know whether keys were
    1076             :                         // actually skipped by the block's exclusion.
    1077             :                         //
    1078             :                         // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
    1079             :                         // it's possible l.iterFile.Largest was already returned. Returning
    1080             :                         // l.iterFile.Largest again is a violation of the strict
    1081             :                         // monotonicity normally provided. The mergingIter's heap can
    1082             :                         // tolerate this repeat key and in this case will keep the level at
    1083             :                         // the top of the heap and immediately skip the entry, advancing to
    1084             :                         // the next file.
    1085           1 :                         if *l.rangeDelIterPtr != nil && l.filteredIter != nil &&
    1086           1 :                                 l.filteredIter.MaybeFilteredKeys() {
    1087           0 :                                 l.largestBoundary = &l.iterFile.Largest
    1088           0 :                                 if l.boundaryContext != nil {
    1089           0 :                                         l.boundaryContext.isIgnorableBoundaryKey = true
    1090           0 :                                 }
    1091           0 :                                 return l.largestBoundary, base.LazyValue{}
    1092             :                         }
    1093             :                 }
    1094             : 
    1095             :                 // Current file was exhausted. Move to the next file.
    1096           1 :                 if l.loadFile(l.files.Next(), +1) == noFileLoaded {
    1097           1 :                         return nil, base.LazyValue{}
    1098           1 :                 }
    1099             :         }
    1100           1 :         return key, val
    1101             : }
    1102             : 
    1103           1 : func (l *levelIter) skipEmptyFileBackward() (*InternalKey, base.LazyValue) {
    1104           1 :         var key *InternalKey
    1105           1 :         var val base.LazyValue
    1106           1 :         // The first iteration of this loop starts with an already exhausted
    1107           1 :         // l.iter. The reason for the exhaustion is either that we iterated to the
    1108           1 :         // end of the sstable, or our iteration was terminated early due to the
    1109           1 :         // presence of a lower-bound. If l.rangeDelIterPtr is non-nil, we may need
    1110           1 :         // to pretend the iterator is not exhausted to allow for the merging to
    1111           1 :         // finish consuming the l.rangeDelIterPtr before levelIter switches the
    1112           1 :         // rangeDelIter from under it. This pretense is done by either generating
    1113           1 :         // a synthetic boundary key or returning the smallest key of the file,
    1114           1 :         // depending on the exhaustion reason.
    1115           1 : 
    1116           1 :         // Subsequent iterations will examine consecutive files such that the first
    1117           1 :         // file that does not have an exhausted iterator causes the code to return
    1118           1 :         // that key, else the behavior described above if there is a corresponding
    1119           1 :         // rangeDelIterPtr.
    1120           1 :         for ; key == nil; key, val = l.iter.Last() {
    1121           1 :                 if l.rangeDelIterPtr != nil {
    1122           1 :                         // We're being used as part of a mergingIter and we've exhausted the
    1123           1 :                         // current sstable. If a lower bound is present and the lower bound lies
    1124           1 :                         // within the current sstable, then we will have reached the lower bound
    1125           1 :                         // rather than the beginning of the sstable. We need to return a
    1126           1 :                         // synthetic boundary key so that mergingIter can use the range tombstone
    1127           1 :                         // iterator until the other levels have reached this boundary.
    1128           1 :                         //
    1129           1 :                         // It is safe to set the boundary key to the LowerBound user key
    1130           1 :                         // with the RANGEDEL sentinel since it is the smallest InternalKey
    1131           1 :                         // that is within the inclusive lower bound, and does not
    1132           1 :                         // represent a real key.
    1133           1 :                         if l.tableOpts.LowerBound != nil {
    1134           1 :                                 if *l.rangeDelIterPtr != nil {
    1135           1 :                                         l.syntheticBoundary.UserKey = l.tableOpts.LowerBound
    1136           1 :                                         l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
    1137           1 :                                         l.smallestBoundary = &l.syntheticBoundary
    1138           1 :                                         if l.boundaryContext != nil {
    1139           1 :                                                 l.boundaryContext.isSyntheticIterBoundsKey = true
    1140           1 :                                         }
    1141           1 :                                         return l.smallestBoundary, base.LazyValue{}
    1142             :                                 }
    1143             :                                 // Else there are no range deletions in this sstable. This
    1144             :                                 // helps with performance when many levels are populated with
    1145             :                                 // sstables and most don't have any actual keys within the
    1146             :                                 // bounds.
    1147           1 :                                 return nil, base.LazyValue{}
    1148             :                         }
    1149             :                         // If the boundary is a range deletion tombstone, return that key.
    1150           1 :                         if l.iterFile.SmallestPointKey.Kind() == InternalKeyKindRangeDelete {
    1151           1 :                                 l.smallestBoundary = &l.iterFile.SmallestPointKey
    1152           1 :                                 if l.boundaryContext != nil {
    1153           1 :                                         l.boundaryContext.isIgnorableBoundaryKey = true
    1154           1 :                                 }
    1155           1 :                                 return l.smallestBoundary, base.LazyValue{}
    1156             :                         }
    1157             :                         // If the last point iterator positioning op skipped keys, it's
    1158             :                         // possible the file's range deletions are still relevant to other
    1159             :                         // levels. Return the smallest boundary as a special ignorable key
    1160             :                         // to avoid advancing to the next file.
    1161             :                         //
    1162             :                         // The sstable iterator cannot guarantee that keys were skipped.  A
    1163             :                         // SeekGE that lands on a index separator k only knows that the
    1164             :                         // block at the index entry contains keys ≤ k. We can't know whether
    1165             :                         // there were actually keys between the seek key and the index
    1166             :                         // separator key. If the block is then excluded due to block
    1167             :                         // property filters, the iterator does not know whether keys were
    1168             :                         // actually skipped by the block's exclusion.
    1169             :                         //
    1170             :                         // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
    1171             :                         // it's possible l.iterFile.Smallest was already returned. Returning
    1172             :                         // l.iterFile.Smallest again is a violation of the strict
    1173             :                         // monotonicity normally provided. The mergingIter's heap can
    1174             :                         // tolerate this repeat key and in this case will keep the level at
    1175             :                         // the top of the heap and immediately skip the entry, advancing to
    1176             :                         // the next file.
    1177           1 :                         if *l.rangeDelIterPtr != nil && l.filteredIter != nil && l.filteredIter.MaybeFilteredKeys() {
    1178           0 :                                 l.smallestBoundary = &l.iterFile.Smallest
    1179           0 :                                 if l.boundaryContext != nil {
    1180           0 :                                         l.boundaryContext.isIgnorableBoundaryKey = true
    1181           0 :                                 }
    1182           0 :                                 return l.smallestBoundary, base.LazyValue{}
    1183             :                         }
    1184             :                 }
    1185             : 
    1186             :                 // Current file was exhausted. Move to the previous file.
    1187           1 :                 if l.loadFile(l.files.Prev(), -1) == noFileLoaded {
    1188           1 :                         return nil, base.LazyValue{}
    1189           1 :                 }
    1190             :         }
    1191           1 :         return key, val
    1192             : }
    1193             : 
    1194           1 : func (l *levelIter) Error() error {
    1195           1 :         if l.err != nil || l.iter == nil {
    1196           1 :                 return l.err
    1197           1 :         }
    1198           1 :         return l.iter.Error()
    1199             : }
    1200             : 
    1201           1 : func (l *levelIter) Close() error {
    1202           1 :         if l.iter != nil {
    1203           1 :                 l.err = l.iter.Close()
    1204           1 :                 l.iter = nil
    1205           1 :         }
    1206           1 :         if l.rangeDelIterPtr != nil {
    1207           1 :                 if t := l.rangeDelIterCopy; t != nil {
    1208           1 :                         l.err = firstError(l.err, t.Close())
    1209           1 :                 }
    1210           1 :                 *l.rangeDelIterPtr = nil
    1211           1 :                 l.rangeDelIterCopy = nil
    1212             :         }
    1213           1 :         return l.err
    1214             : }
    1215             : 
    1216           1 : func (l *levelIter) SetBounds(lower, upper []byte) {
    1217           1 :         l.lower = lower
    1218           1 :         l.upper = upper
    1219           1 : 
    1220           1 :         if l.iter == nil {
    1221           1 :                 return
    1222           1 :         }
    1223             : 
    1224             :         // Update tableOpts.{Lower,Upper}Bound in case the new boundaries fall within
    1225             :         // the boundaries of the current table.
    1226           1 :         if l.initTableBounds(l.iterFile) != 0 {
    1227           1 :                 // The table does not overlap the bounds. Close() will set levelIter.err if
    1228           1 :                 // an error occurs.
    1229           1 :                 _ = l.Close()
    1230           1 :                 return
    1231           1 :         }
    1232             : 
    1233           1 :         l.iter.SetBounds(l.tableOpts.LowerBound, l.tableOpts.UpperBound)
    1234             : }
    1235             : 
    1236           1 : func (l *levelIter) SetContext(ctx context.Context) {
    1237           1 :         l.ctx = ctx
    1238           1 :         if l.iter != nil {
    1239           0 :                 // TODO(sumeer): this is losing the ctx = objiotracing.WithLevel(ctx,
    1240           0 :                 // manifest.LevelToInt(opts.level)) that happens in table_cache.go.
    1241           0 :                 l.iter.SetContext(ctx)
    1242           0 :         }
    1243             : }
    1244             : 
    1245           1 : func (l *levelIter) String() string {
    1246           1 :         if l.iterFile != nil {
    1247           1 :                 return fmt.Sprintf("%s: fileNum=%s", l.level, l.iter.String())
    1248           1 :         }
    1249           0 :         return fmt.Sprintf("%s: fileNum=<nil>", l.level)
    1250             : }
    1251             : 
    1252             : var _ internalIterator = &levelIter{}

Generated by: LCOV version 1.14