LCOV - code coverage report
Current view: top level - pebble - iterator.go (source / functions) Hit Total Coverage
Test: 2024-10-16 08:16Z 8989e4e6 - tests + meta.lcov Lines: 1718 1915 89.7 %
Date: 2024-10-16 08:18:01 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package pebble
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "context"
      10             :         "io"
      11             :         "math/rand/v2"
      12             :         "sync"
      13             :         "unsafe"
      14             : 
      15             :         "github.com/cockroachdb/errors"
      16             :         "github.com/cockroachdb/pebble/internal/base"
      17             :         "github.com/cockroachdb/pebble/internal/bytealloc"
      18             :         "github.com/cockroachdb/pebble/internal/humanize"
      19             :         "github.com/cockroachdb/pebble/internal/invariants"
      20             :         "github.com/cockroachdb/pebble/internal/keyspan"
      21             :         "github.com/cockroachdb/pebble/internal/keyspan/keyspanimpl"
      22             :         "github.com/cockroachdb/pebble/internal/manifest"
      23             :         "github.com/cockroachdb/pebble/internal/rangekeystack"
      24             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      25             :         "github.com/cockroachdb/pebble/sstable"
      26             :         "github.com/cockroachdb/redact"
      27             : )
      28             : 
      29             : // iterPos describes the state of the internal iterator, in terms of whether it
      30             : // is at the position returned to the user (cur), one ahead of the position
      31             : // returned (next for forward iteration and prev for reverse iteration). The cur
      32             : // position is split into two states, for forward and reverse iteration, since
      33             : // we need to differentiate for switching directions.
      34             : //
      35             : // There is subtlety in what is considered the current position of the Iterator.
      36             : // The internal iterator exposes a sequence of internal keys. There is not
      37             : // always a single internalIterator position corresponding to the position
      38             : // returned to the user. Consider the example:
      39             : //
      40             : //      a.MERGE.9 a.MERGE.8 a.MERGE.7 a.SET.6 b.DELETE.9 b.DELETE.5 b.SET.4
      41             : //      \                                   /
      42             : //        \       Iterator.Key() = 'a'    /
      43             : //
      44             : // The Iterator exposes one valid position at user key 'a' and the two exhausted
      45             : // positions at the beginning and end of iteration. The underlying
      46             : // internalIterator contains 7 valid positions and 2 exhausted positions.
      47             : //
      48             : // Iterator positioning methods must set iterPos to iterPosCur{Foward,Backward}
      49             : // iff the user key at the current internalIterator position equals the
      50             : // Iterator.Key returned to the user. This guarantees that a call to nextUserKey
      51             : // or prevUserKey will advance to the next or previous iterator position.
      52             : // iterPosCur{Forward,Backward} does not make any guarantee about the internal
      53             : // iterator position among internal keys with matching user keys, and it will
      54             : // vary subtly depending on the particular key kinds encountered. In the above
      55             : // example, the iterator returning 'a' may set iterPosCurForward if the internal
      56             : // iterator is positioned at any of a.MERGE.9, a.MERGE.8, a.MERGE.7 or a.SET.6.
      57             : //
      58             : // When setting iterPos to iterPosNext or iterPosPrev, the internal iterator
      59             : // must be advanced to the first internalIterator position at a user key greater
      60             : // (iterPosNext) or less (iterPosPrev) than the key returned to the user. An
      61             : // internalIterator position that's !Valid() must also be considered greater or
      62             : // less—depending on the direction of iteration—than the last valid Iterator
      63             : // position.
      64             : type iterPos int8
      65             : 
      66             : const (
      67             :         iterPosCurForward iterPos = 0
      68             :         iterPosNext       iterPos = 1
      69             :         iterPosPrev       iterPos = -1
      70             :         iterPosCurReverse iterPos = -2
      71             : 
      72             :         // For limited iteration. When the iterator is at iterPosCurForwardPaused
      73             :         // - Next*() call should behave as if the internal iterator is already
      74             :         //   at next (akin to iterPosNext).
      75             :         // - Prev*() call should behave as if the internal iterator is at the
      76             :         //   current key (akin to iterPosCurForward).
      77             :         //
      78             :         // Similar semantics apply to CurReversePaused.
      79             :         iterPosCurForwardPaused iterPos = 2
      80             :         iterPosCurReversePaused iterPos = -3
      81             : )
      82             : 
      83             : // Approximate gap in bytes between samples of data read during iteration.
      84             : // This is multiplied with a default ReadSamplingMultiplier of 1 << 4 to yield
      85             : // 1 << 20 (1MB). The 1MB factor comes from:
      86             : // https://github.com/cockroachdb/pebble/issues/29#issuecomment-494477985
      87             : const readBytesPeriod uint64 = 1 << 16
      88             : 
      89             : var errReversePrefixIteration = errors.New("pebble: unsupported reverse prefix iteration")
      90             : 
      91             : // IteratorMetrics holds per-iterator metrics. These do not change over the
      92             : // lifetime of the iterator.
      93             : type IteratorMetrics struct {
      94             :         // The read amplification experienced by this iterator. This is the sum of
      95             :         // the memtables, the L0 sublevels and the non-empty Ln levels. Higher read
      96             :         // amplification generally results in slower reads, though allowing higher
      97             :         // read amplification can also result in faster writes.
      98             :         ReadAmp int
      99             : }
     100             : 
     101             : // IteratorStatsKind describes the two kind of iterator stats.
     102             : type IteratorStatsKind int8
     103             : 
     104             : const (
     105             :         // InterfaceCall represents calls to Iterator.
     106             :         InterfaceCall IteratorStatsKind = iota
     107             :         // InternalIterCall represents calls by Iterator to its internalIterator.
     108             :         InternalIterCall
     109             :         // NumStatsKind is the number of kinds, and is used for array sizing.
     110             :         NumStatsKind
     111             : )
     112             : 
     113             : // IteratorStats contains iteration stats.
     114             : type IteratorStats struct {
     115             :         // ForwardSeekCount includes SeekGE, SeekPrefixGE, First.
     116             :         ForwardSeekCount [NumStatsKind]int
     117             :         // ReverseSeek includes SeekLT, Last.
     118             :         ReverseSeekCount [NumStatsKind]int
     119             :         // ForwardStepCount includes Next.
     120             :         ForwardStepCount [NumStatsKind]int
     121             :         // ReverseStepCount includes Prev.
     122             :         ReverseStepCount [NumStatsKind]int
     123             :         InternalStats    InternalIteratorStats
     124             :         RangeKeyStats    RangeKeyIteratorStats
     125             : }
     126             : 
     127             : var _ redact.SafeFormatter = &IteratorStats{}
     128             : 
     129             : // InternalIteratorStats contains miscellaneous stats produced by internal
     130             : // iterators.
     131             : type InternalIteratorStats = base.InternalIteratorStats
     132             : 
     133             : // RangeKeyIteratorStats contains miscellaneous stats about range keys
     134             : // encountered by the iterator.
     135             : type RangeKeyIteratorStats struct {
     136             :         // Count records the number of range keys encountered during
     137             :         // iteration. Range keys may be counted multiple times if the iterator
     138             :         // leaves a range key's bounds and then returns.
     139             :         Count int
     140             :         // ContainedPoints records the number of point keys encountered within the
     141             :         // bounds of a range key. Note that this includes point keys with suffixes
     142             :         // that sort both above and below the covering range key's suffix.
     143             :         ContainedPoints int
     144             :         // SkippedPoints records the count of the subset of ContainedPoints point
     145             :         // keys that were skipped during iteration due to range-key masking. It does
     146             :         // not include point keys that were never loaded because a
     147             :         // RangeKeyMasking.Filter excluded the entire containing block.
     148             :         SkippedPoints int
     149             : }
     150             : 
     151             : // Merge adds all of the argument's statistics to the receiver. It may be used
     152             : // to accumulate stats across multiple iterators.
     153           1 : func (s *RangeKeyIteratorStats) Merge(o RangeKeyIteratorStats) {
     154           1 :         s.Count += o.Count
     155           1 :         s.ContainedPoints += o.ContainedPoints
     156           1 :         s.SkippedPoints += o.SkippedPoints
     157           1 : }
     158             : 
     159           0 : func (s *RangeKeyIteratorStats) String() string {
     160           0 :         return redact.StringWithoutMarkers(s)
     161           0 : }
     162             : 
     163             : // SafeFormat implements the redact.SafeFormatter interface.
     164           1 : func (s *RangeKeyIteratorStats) SafeFormat(p redact.SafePrinter, verb rune) {
     165           1 :         p.Printf("range keys: %s, contained points: %s (%s skipped)",
     166           1 :                 humanize.Count.Uint64(uint64(s.Count)),
     167           1 :                 humanize.Count.Uint64(uint64(s.ContainedPoints)),
     168           1 :                 humanize.Count.Uint64(uint64(s.SkippedPoints)))
     169           1 : }
     170             : 
     171             : // LazyValue is a lazy value. See the long comment in base.LazyValue.
     172             : type LazyValue = base.LazyValue
     173             : 
     174             : // Iterator iterates over a DB's key/value pairs in key order.
     175             : //
     176             : // An iterator must be closed after use, but it is not necessary to read an
     177             : // iterator until exhaustion.
     178             : //
     179             : // An iterator is not goroutine-safe, but it is safe to use multiple iterators
     180             : // concurrently, with each in a dedicated goroutine.
     181             : //
     182             : // It is also safe to use an iterator concurrently with modifying its
     183             : // underlying DB, if that DB permits modification. However, the resultant
     184             : // key/value pairs are not guaranteed to be a consistent snapshot of that DB
     185             : // at a particular point in time.
     186             : //
     187             : // If an iterator encounters an error during any operation, it is stored by
     188             : // the Iterator and surfaced through the Error method. All absolute
     189             : // positioning methods (eg, SeekLT, SeekGT, First, Last, etc) reset any
     190             : // accumulated error before positioning. All relative positioning methods (eg,
     191             : // Next, Prev) return without advancing if the iterator has an accumulated
     192             : // error.
     193             : type Iterator struct {
     194             :         // The context is stored here since (a) Iterators are expected to be
     195             :         // short-lived (since they pin memtables and sstables), (b) plumbing a
     196             :         // context into every method is very painful, (c) they do not (yet) respect
     197             :         // context cancellation and are only used for tracing.
     198             :         ctx       context.Context
     199             :         opts      IterOptions
     200             :         merge     Merge
     201             :         comparer  base.Comparer
     202             :         iter      internalIterator
     203             :         pointIter topLevelIterator
     204             :         // Either readState or version is set, but not both.
     205             :         readState *readState
     206             :         version   *version
     207             :         // rangeKey holds iteration state specific to iteration over range keys.
     208             :         // The range key field may be nil if the Iterator has never been configured
     209             :         // to iterate over range keys. Its non-nilness cannot be used to determine
     210             :         // if the Iterator is currently iterating over range keys: For that, consult
     211             :         // the IterOptions using opts.rangeKeys(). If non-nil, its rangeKeyIter
     212             :         // field is guaranteed to be non-nil too.
     213             :         rangeKey *iteratorRangeKeyState
     214             :         // rangeKeyMasking holds state for range-key masking of point keys.
     215             :         rangeKeyMasking rangeKeyMasking
     216             :         err             error
     217             :         // When iterValidityState=IterValid, key represents the current key, which
     218             :         // is backed by keyBuf.
     219             :         key    []byte
     220             :         keyBuf []byte
     221             :         value  LazyValue
     222             :         // For use in LazyValue.Clone.
     223             :         valueBuf []byte
     224             :         fetcher  base.LazyFetcher
     225             :         // For use in LazyValue.Value.
     226             :         lazyValueBuf []byte
     227             :         valueCloser  io.Closer
     228             :         // boundsBuf holds two buffers used to store the lower and upper bounds.
     229             :         // Whenever the Iterator's bounds change, the new bounds are copied into
     230             :         // boundsBuf[boundsBufIdx]. The two bounds share a slice to reduce
     231             :         // allocations. opts.LowerBound and opts.UpperBound point into this slice.
     232             :         boundsBuf    [2][]byte
     233             :         boundsBufIdx int
     234             :         // iterKV reflects the latest position of iter, except when SetBounds is
     235             :         // called. In that case, it is explicitly set to nil.
     236             :         iterKV              *base.InternalKV
     237             :         alloc               *iterAlloc
     238             :         getIterAlloc        *getIterAlloc
     239             :         prefixOrFullSeekKey []byte
     240             :         readSampling        readSampling
     241             :         stats               IteratorStats
     242             :         externalReaders     [][]*sstable.Reader
     243             : 
     244             :         // Following fields used when constructing an iterator stack, eg, in Clone
     245             :         // and SetOptions or when re-fragmenting a batch's range keys/range dels.
     246             :         // Non-nil if this Iterator includes a Batch.
     247             :         batch            *Batch
     248             :         newIters         tableNewIters
     249             :         newIterRangeKey  keyspanimpl.TableNewSpanIter
     250             :         lazyCombinedIter lazyCombinedIter
     251             :         seqNum           base.SeqNum
     252             :         // batchSeqNum is used by Iterators over indexed batches to detect when the
     253             :         // underlying batch has been mutated. The batch beneath an indexed batch may
     254             :         // be mutated while the Iterator is open, but new keys are not surfaced
     255             :         // until the next call to SetOptions.
     256             :         batchSeqNum base.SeqNum
     257             :         // batch{PointIter,RangeDelIter,RangeKeyIter} are used when the Iterator is
     258             :         // configured to read through an indexed batch. If a batch is set, these
     259             :         // iterators will be included within the iterator stack regardless of
     260             :         // whether the batch currently contains any keys of their kind. These
     261             :         // pointers are used during a call to SetOptions to refresh the Iterator's
     262             :         // view of its indexed batch.
     263             :         batchPointIter    batchIter
     264             :         batchRangeDelIter keyspan.Iter
     265             :         batchRangeKeyIter keyspan.Iter
     266             :         // merging is a pointer to this iterator's point merging iterator. It
     267             :         // appears here because key visibility is handled by the merging iterator.
     268             :         // During SetOptions on an iterator over an indexed batch, this field is
     269             :         // used to update the merging iterator's batch snapshot.
     270             :         merging *mergingIter
     271             : 
     272             :         // Keeping the bools here after all the 8 byte aligned fields shrinks the
     273             :         // sizeof this struct by 24 bytes.
     274             : 
     275             :         // INVARIANT:
     276             :         // iterValidityState==IterAtLimit <=>
     277             :         //  pos==iterPosCurForwardPaused || pos==iterPosCurReversePaused
     278             :         iterValidityState IterValidityState
     279             :         // Set to true by SetBounds, SetOptions. Causes the Iterator to appear
     280             :         // exhausted externally, while preserving the correct iterValidityState for
     281             :         // the iterator's internal state. Preserving the correct internal validity
     282             :         // is used for SeekPrefixGE(..., trySeekUsingNext), and SeekGE/SeekLT
     283             :         // optimizations after "no-op" calls to SetBounds and SetOptions.
     284             :         requiresReposition bool
     285             :         // The position of iter. When this is iterPos{Prev,Next} the iter has been
     286             :         // moved past the current key-value, which can only happen if
     287             :         // iterValidityState=IterValid, i.e., there is something to return to the
     288             :         // client for the current position.
     289             :         pos iterPos
     290             :         // Relates to the prefixOrFullSeekKey field above.
     291             :         hasPrefix bool
     292             :         // Used for deriving the value of SeekPrefixGE(..., trySeekUsingNext),
     293             :         // and SeekGE/SeekLT optimizations
     294             :         lastPositioningOp lastPositioningOpKind
     295             :         // Used for determining when it's safe to perform SeekGE optimizations that
     296             :         // reuse the iterator state to avoid the cost of a full seek if the iterator
     297             :         // is already positioned in the correct place. If the iterator's view of its
     298             :         // indexed batch was just refreshed, some optimizations cannot be applied on
     299             :         // the first seek after the refresh:
     300             :         // - SeekGE has a no-op optimization that does not seek on the internal
     301             :         //   iterator at all if the iterator is already in the correct place.
     302             :         //   This optimization cannot be performed if the internal iterator was
     303             :         //   last positioned when the iterator had a different view of an
     304             :         //   underlying batch.
     305             :         // - Seek[Prefix]GE set flags.TrySeekUsingNext()=true when the seek key is
     306             :         //   greater than the previous operation's seek key, under the expectation
     307             :         //   that the various internal iterators can use their current position to
     308             :         //   avoid a full expensive re-seek. This applies to the batchIter as well.
     309             :         //   However, if the view of the batch was just refreshed, the batchIter's
     310             :         //   position is not useful because it may already be beyond new keys less
     311             :         //   than the seek key. To prevent the use of this optimization in
     312             :         //   batchIter, Seek[Prefix]GE set flags.BatchJustRefreshed()=true if this
     313             :         //   bit is enabled.
     314             :         batchJustRefreshed bool
     315             :         // batchOnlyIter is set to true for Batch.NewBatchOnlyIter.
     316             :         batchOnlyIter bool
     317             :         // Used in some tests to disable the random disabling of seek optimizations.
     318             :         forceEnableSeekOpt bool
     319             :         // Set to true if NextPrefix is not currently permitted. Defaults to false
     320             :         // in case an iterator never had any bounds.
     321             :         nextPrefixNotPermittedByUpperBound bool
     322             : }
     323             : 
     324             : // cmp is a convenience shorthand for the i.comparer.Compare function.
     325           2 : func (i *Iterator) cmp(a, b []byte) int {
     326           2 :         return i.comparer.Compare(a, b)
     327           2 : }
     328             : 
     329             : // equal is a convenience shorthand for the i.comparer.Equal function.
     330           2 : func (i *Iterator) equal(a, b []byte) bool {
     331           2 :         return i.comparer.Equal(a, b)
     332           2 : }
     333             : 
     334             : // iteratorRangeKeyState holds an iterator's range key iteration state.
     335             : type iteratorRangeKeyState struct {
     336             :         opts  *IterOptions
     337             :         cmp   base.Compare
     338             :         split base.Split
     339             :         // rangeKeyIter holds the range key iterator stack that iterates over the
     340             :         // merged spans across the entirety of the LSM.
     341             :         rangeKeyIter keyspan.FragmentIterator
     342             :         iiter        keyspan.InterleavingIter
     343             :         // stale is set to true when the range key state recorded here (in start,
     344             :         // end and keys) may not be in sync with the current range key at the
     345             :         // interleaving iterator's current position.
     346             :         //
     347             :         // When the interelaving iterator passes over a new span, it invokes the
     348             :         // SpanChanged hook defined on the `rangeKeyMasking` type,  which sets stale
     349             :         // to true if the span is non-nil.
     350             :         //
     351             :         // The parent iterator may not be positioned over the interleaving
     352             :         // iterator's current position (eg, i.iterPos = iterPos{Next,Prev}), so
     353             :         // {keys,start,end} are only updated to the new range key during a call to
     354             :         // Iterator.saveRangeKey.
     355             :         stale bool
     356             :         // updated is used to signal to the Iterator client whether the state of
     357             :         // range keys has changed since the previous iterator position through the
     358             :         // `RangeKeyChanged` method. It's set to true during an Iterator positioning
     359             :         // operation that changes the state of the current range key. Each Iterator
     360             :         // positioning operation sets it back to false before executing.
     361             :         //
     362             :         // TODO(jackson): The lifecycle of {stale,updated,prevPosHadRangeKey} is
     363             :         // intricate and confusing. Try to refactor to reduce complexity.
     364             :         updated bool
     365             :         // prevPosHadRangeKey records whether the previous Iterator position had a
     366             :         // range key (HasPointAndRage() = (_, true)). It's updated at the beginning
     367             :         // of each new Iterator positioning operation. It's required by saveRangeKey to
     368             :         // to set `updated` appropriately: Without this record of the previous iterator
     369             :         // state, it's ambiguous whether an iterator only temporarily stepped onto a
     370             :         // position without a range key.
     371             :         prevPosHadRangeKey bool
     372             :         // rangeKeyOnly is set to true if at the current iterator position there is
     373             :         // no point key, only a range key start boundary.
     374             :         rangeKeyOnly bool
     375             :         // hasRangeKey is true when the current iterator position has a covering
     376             :         // range key (eg, a range key with bounds [<lower>,<upper>) such that
     377             :         // <lower> ≤ Key() < <upper>).
     378             :         hasRangeKey bool
     379             :         // start and end are the [start, end) boundaries of the current range keys.
     380             :         start []byte
     381             :         end   []byte
     382             : 
     383             :         rangeKeyBuffers
     384             : 
     385             :         // iterConfig holds fields that are used for the construction of the
     386             :         // iterator stack, but do not need to be directly accessed during iteration.
     387             :         // This struct is bundled within the iteratorRangeKeyState struct to reduce
     388             :         // allocations.
     389             :         iterConfig rangekeystack.UserIteratorConfig
     390             : }
     391             : 
     392             : type rangeKeyBuffers struct {
     393             :         // keys is sorted by Suffix ascending.
     394             :         keys []RangeKeyData
     395             :         // buf is used to save range-key data before moving the range-key iterator.
     396             :         // Start and end boundaries, suffixes and values are all copied into buf.
     397             :         buf bytealloc.A
     398             :         // internal holds buffers used by the range key internal iterators.
     399             :         internal rangekeystack.Buffers
     400             : }
     401             : 
     402           2 : func (b *rangeKeyBuffers) PrepareForReuse() {
     403           2 :         const maxKeysReuse = 100
     404           2 :         if len(b.keys) > maxKeysReuse {
     405           0 :                 b.keys = nil
     406           0 :         }
     407             :         // Avoid caching the key buf if it is overly large. The constant is
     408             :         // fairly arbitrary.
     409           2 :         if cap(b.buf) >= maxKeyBufCacheSize {
     410           2 :                 b.buf = nil
     411           2 :         } else {
     412           2 :                 b.buf = b.buf[:0]
     413           2 :         }
     414           2 :         b.internal.PrepareForReuse()
     415             : }
     416             : 
     417           2 : func (i *iteratorRangeKeyState) init(cmp base.Compare, split base.Split, opts *IterOptions) {
     418           2 :         i.cmp = cmp
     419           2 :         i.split = split
     420           2 :         i.opts = opts
     421           2 : }
     422             : 
     423             : var iterRangeKeyStateAllocPool = sync.Pool{
     424           2 :         New: func() interface{} {
     425           2 :                 return &iteratorRangeKeyState{}
     426           2 :         },
     427             : }
     428             : 
     429             : // isEphemeralPosition returns true iff the current iterator position is
     430             : // ephemeral, and won't be visited during subsequent relative positioning
     431             : // operations.
     432             : //
     433             : // The iterator position resulting from a SeekGE or SeekPrefixGE that lands on a
     434             : // straddling range key without a coincident point key is such a position.
     435           2 : func (i *Iterator) isEphemeralPosition() bool {
     436           2 :         return i.opts.rangeKeys() && i.rangeKey != nil && i.rangeKey.rangeKeyOnly &&
     437           2 :                 !i.equal(i.rangeKey.start, i.key)
     438           2 : }
     439             : 
     440             : type lastPositioningOpKind int8
     441             : 
     442             : const (
     443             :         unknownLastPositionOp lastPositioningOpKind = iota
     444             :         seekPrefixGELastPositioningOp
     445             :         seekGELastPositioningOp
     446             :         seekLTLastPositioningOp
     447             :         // internalNextOp is a special internal iterator positioning operation used
     448             :         // by CanDeterministicallySingleDelete. It exists for enforcing requirements
     449             :         // around calling CanDeterministicallySingleDelete at most once per external
     450             :         // iterator position.
     451             :         internalNextOp
     452             : )
     453             : 
     454             : // Limited iteration mode. Not for use with prefix iteration.
     455             : //
     456             : // SeekGE, SeekLT, Prev, Next have WithLimit variants, that pause the iterator
     457             : // at the limit in a best-effort manner. The client should behave correctly
     458             : // even if the limits are ignored. These limits are not "deep", in that they
     459             : // are not passed down to the underlying collection of internalIterators. This
     460             : // is because the limits are transient, and apply only until the next
     461             : // iteration call. They serve mainly as a way to bound the amount of work when
     462             : // two (or more) Iterators are being coordinated at a higher level.
     463             : //
     464             : // In limited iteration mode:
     465             : // - Avoid using Iterator.Valid if the last call was to a *WithLimit() method.
     466             : //   The return value from the *WithLimit() method provides a more precise
     467             : //   disposition.
     468             : // - The limit is exclusive for forward and inclusive for reverse.
     469             : //
     470             : //
     471             : // Limited iteration mode & range keys
     472             : //
     473             : // Limited iteration interacts with range-key iteration. When range key
     474             : // iteration is enabled, range keys are interleaved at their start boundaries.
     475             : // Limited iteration must ensure that if a range key exists within the limit,
     476             : // the iterator visits the range key.
     477             : //
     478             : // During forward limited iteration, this is trivial: An overlapping range key
     479             : // must have a start boundary less than the limit, and the range key's start
     480             : // boundary will be interleaved and found to be within the limit.
     481             : //
     482             : // During reverse limited iteration, the tail of the range key may fall within
     483             : // the limit. The range key must be surfaced even if the range key's start
     484             : // boundary is less than the limit, and if there are no point keys between the
     485             : // current iterator position and the limit. To provide this guarantee, reverse
     486             : // limited iteration ignores the limit as long as there is a range key
     487             : // overlapping the iteration position.
     488             : 
     489             : // IterValidityState captures the state of the Iterator.
     490             : type IterValidityState int8
     491             : 
     492             : const (
     493             :         // IterExhausted represents an Iterator that is exhausted.
     494             :         IterExhausted IterValidityState = iota
     495             :         // IterValid represents an Iterator that is valid.
     496             :         IterValid
     497             :         // IterAtLimit represents an Iterator that has a non-exhausted
     498             :         // internalIterator, but has reached a limit without any key for the
     499             :         // caller.
     500             :         IterAtLimit
     501             : )
     502             : 
     503             : // readSampling stores variables used to sample a read to trigger a read
     504             : // compaction
     505             : type readSampling struct {
     506             :         bytesUntilReadSampling uint64
     507             :         initialSamplePassed    bool
     508             :         pendingCompactions     readCompactionQueue
     509             :         // forceReadSampling is used for testing purposes to force a read sample on every
     510             :         // call to Iterator.maybeSampleRead()
     511             :         forceReadSampling bool
     512             : }
     513             : 
     514           2 : func (i *Iterator) findNextEntry(limit []byte) {
     515           2 :         i.iterValidityState = IterExhausted
     516           2 :         i.pos = iterPosCurForward
     517           2 :         if i.opts.rangeKeys() && i.rangeKey != nil {
     518           2 :                 i.rangeKey.rangeKeyOnly = false
     519           2 :         }
     520             : 
     521             :         // Close the closer for the current value if one was open.
     522           2 :         if i.closeValueCloser() != nil {
     523           0 :                 return
     524           0 :         }
     525             : 
     526           2 :         for i.iterKV != nil {
     527           2 :                 key := i.iterKV.K
     528           2 : 
     529           2 :                 // The topLevelIterator.StrictSeekPrefixGE contract requires that in
     530           2 :                 // prefix mode [i.hasPrefix=t], every point key returned by the internal
     531           2 :                 // iterator must have the current iteration prefix.
     532           2 :                 if invariants.Enabled && i.hasPrefix {
     533           2 :                         // Range keys are an exception to the contract and may return a different
     534           2 :                         // prefix. This case is explicitly handled in the switch statement below.
     535           2 :                         if key.Kind() != base.InternalKeyKindRangeKeySet {
     536           2 :                                 if p := i.comparer.Split.Prefix(key.UserKey); !i.equal(i.prefixOrFullSeekKey, p) {
     537           0 :                                         i.opts.logger.Fatalf("pebble: prefix violation: key %q does not have prefix %q\n", key.UserKey, i.prefixOrFullSeekKey)
     538           0 :                                 }
     539             :                         }
     540             :                 }
     541             : 
     542             :                 // Compare with limit every time we start at a different user key.
     543             :                 // Note that given the best-effort contract of limit, we could avoid a
     544             :                 // comparison in the common case by doing this only after
     545             :                 // i.nextUserKey is called for the deletes below. However that makes
     546             :                 // the behavior non-deterministic (since the behavior will vary based
     547             :                 // on what has been compacted), which makes it hard to test with the
     548             :                 // metamorphic test. So we forego that performance optimization.
     549           2 :                 if limit != nil && i.cmp(limit, i.iterKV.K.UserKey) <= 0 {
     550           2 :                         i.iterValidityState = IterAtLimit
     551           2 :                         i.pos = iterPosCurForwardPaused
     552           2 :                         return
     553           2 :                 }
     554             : 
     555             :                 // If the user has configured a SkipPoint function, invoke it to see
     556             :                 // whether we should skip over the current user key.
     557           2 :                 if i.opts.SkipPoint != nil && key.Kind() != InternalKeyKindRangeKeySet && i.opts.SkipPoint(i.iterKV.K.UserKey) {
     558           2 :                         // NB: We could call nextUserKey, but in some cases the SkipPoint
     559           2 :                         // predicate function might be cheaper than nextUserKey's key copy
     560           2 :                         // and key comparison. This should be the case for MVCC suffix
     561           2 :                         // comparisons, for example. In the future, we could expand the
     562           2 :                         // SkipPoint interface to give the implementor more control over
     563           2 :                         // whether we skip over just the internal key, the user key, or even
     564           2 :                         // the key prefix.
     565           2 :                         i.stats.ForwardStepCount[InternalIterCall]++
     566           2 :                         i.iterKV = i.iter.Next()
     567           2 :                         continue
     568             :                 }
     569             : 
     570           2 :                 switch key.Kind() {
     571           2 :                 case InternalKeyKindRangeKeySet:
     572           2 :                         if i.hasPrefix {
     573           2 :                                 if p := i.comparer.Split.Prefix(key.UserKey); !i.equal(i.prefixOrFullSeekKey, p) {
     574           2 :                                         return
     575           2 :                                 }
     576             :                         }
     577             :                         // Save the current key.
     578           2 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
     579           2 :                         i.key = i.keyBuf
     580           2 :                         i.value = LazyValue{}
     581           2 :                         // There may also be a live point key at this userkey that we have
     582           2 :                         // not yet read. We need to find the next entry with this user key
     583           2 :                         // to find it. Save the range key so we don't lose it when we Next
     584           2 :                         // the underlying iterator.
     585           2 :                         i.saveRangeKey()
     586           2 :                         pointKeyExists := i.nextPointCurrentUserKey()
     587           2 :                         if i.err != nil {
     588           0 :                                 i.iterValidityState = IterExhausted
     589           0 :                                 return
     590           0 :                         }
     591           2 :                         i.rangeKey.rangeKeyOnly = !pointKeyExists
     592           2 :                         i.iterValidityState = IterValid
     593           2 :                         return
     594             : 
     595           2 :                 case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
     596           2 :                         // NB: treating InternalKeyKindSingleDelete as equivalent to DEL is not
     597           2 :                         // only simpler, but is also necessary for correctness due to
     598           2 :                         // InternalKeyKindSSTableInternalObsoleteBit.
     599           2 :                         i.nextUserKey()
     600           2 :                         continue
     601             : 
     602           2 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete:
     603           2 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
     604           2 :                         i.key = i.keyBuf
     605           2 :                         i.value = i.iterKV.V
     606           2 :                         i.iterValidityState = IterValid
     607           2 :                         i.saveRangeKey()
     608           2 :                         return
     609             : 
     610           2 :                 case InternalKeyKindMerge:
     611           2 :                         // Resolving the merge may advance us to the next point key, which
     612           2 :                         // may be covered by a different set of range keys. Save the range
     613           2 :                         // key state so we don't lose it.
     614           2 :                         i.saveRangeKey()
     615           2 :                         if i.mergeForward(key) {
     616           2 :                                 i.iterValidityState = IterValid
     617           2 :                                 return
     618           2 :                         }
     619             : 
     620             :                         // The merge didn't yield a valid key, either because the value
     621             :                         // merger indicated it should be deleted, or because an error was
     622             :                         // encountered.
     623           1 :                         i.iterValidityState = IterExhausted
     624           1 :                         if i.err != nil {
     625           1 :                                 return
     626           1 :                         }
     627           1 :                         if i.pos != iterPosNext {
     628           0 :                                 i.nextUserKey()
     629           0 :                         }
     630           1 :                         if i.closeValueCloser() != nil {
     631           0 :                                 return
     632           0 :                         }
     633           1 :                         i.pos = iterPosCurForward
     634             : 
     635           0 :                 default:
     636           0 :                         i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
     637           0 :                         i.iterValidityState = IterExhausted
     638           0 :                         return
     639             :                 }
     640             :         }
     641             : 
     642             :         // Is iterKey nil due to an error?
     643           2 :         if err := i.iter.Error(); err != nil {
     644           1 :                 i.err = err
     645           1 :                 i.iterValidityState = IterExhausted
     646           1 :         }
     647             : }
     648             : 
     649           2 : func (i *Iterator) nextPointCurrentUserKey() bool {
     650           2 :         // If the user has configured a SkipPoint function and the current user key
     651           2 :         // would be skipped by it, there's no need to step forward looking for a
     652           2 :         // point key. If we were to find one, it should be skipped anyways.
     653           2 :         if i.opts.SkipPoint != nil && i.opts.SkipPoint(i.key) {
     654           2 :                 return false
     655           2 :         }
     656             : 
     657           2 :         i.pos = iterPosCurForward
     658           2 : 
     659           2 :         i.iterKV = i.iter.Next()
     660           2 :         i.stats.ForwardStepCount[InternalIterCall]++
     661           2 :         if i.iterKV == nil {
     662           2 :                 if err := i.iter.Error(); err != nil {
     663           0 :                         i.err = err
     664           2 :                 } else {
     665           2 :                         i.pos = iterPosNext
     666           2 :                 }
     667           2 :                 return false
     668             :         }
     669           2 :         if !i.equal(i.key, i.iterKV.K.UserKey) {
     670           2 :                 i.pos = iterPosNext
     671           2 :                 return false
     672           2 :         }
     673             : 
     674           2 :         key := i.iterKV.K
     675           2 :         switch key.Kind() {
     676           0 :         case InternalKeyKindRangeKeySet:
     677           0 :                 // RangeKeySets must always be interleaved as the first internal key
     678           0 :                 // for a user key.
     679           0 :                 i.err = base.CorruptionErrorf("pebble: unexpected range key set mid-user key")
     680           0 :                 return false
     681             : 
     682           2 :         case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
     683           2 :                 // NB: treating InternalKeyKindSingleDelete as equivalent to DEL is not
     684           2 :                 // only simpler, but is also necessary for correctness due to
     685           2 :                 // InternalKeyKindSSTableInternalObsoleteBit.
     686           2 :                 return false
     687             : 
     688           2 :         case InternalKeyKindSet, InternalKeyKindSetWithDelete:
     689           2 :                 i.value = i.iterKV.V
     690           2 :                 return true
     691             : 
     692           2 :         case InternalKeyKindMerge:
     693           2 :                 return i.mergeForward(key)
     694             : 
     695           0 :         default:
     696           0 :                 i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
     697           0 :                 return false
     698             :         }
     699             : }
     700             : 
     701             : // mergeForward resolves a MERGE key, advancing the underlying iterator forward
     702             : // to merge with subsequent keys with the same userkey. mergeForward returns a
     703             : // boolean indicating whether or not the merge yielded a valid key. A merge may
     704             : // not yield a valid key if an error occurred, in which case i.err is non-nil,
     705             : // or the user's value merger specified the key to be deleted.
     706             : //
     707             : // mergeForward does not update iterValidityState.
     708           2 : func (i *Iterator) mergeForward(key base.InternalKey) (valid bool) {
     709           2 :         var iterValue []byte
     710           2 :         iterValue, _, i.err = i.iterKV.Value(nil)
     711           2 :         if i.err != nil {
     712           0 :                 return false
     713           0 :         }
     714           2 :         var valueMerger ValueMerger
     715           2 :         valueMerger, i.err = i.merge(key.UserKey, iterValue)
     716           2 :         if i.err != nil {
     717           0 :                 return false
     718           0 :         }
     719             : 
     720           2 :         i.mergeNext(key, valueMerger)
     721           2 :         if i.err != nil {
     722           1 :                 return false
     723           1 :         }
     724             : 
     725           2 :         var needDelete bool
     726           2 :         var value []byte
     727           2 :         value, needDelete, i.valueCloser, i.err = finishValueMerger(
     728           2 :                 valueMerger, true /* includesBase */)
     729           2 :         i.value = base.MakeInPlaceValue(value)
     730           2 :         if i.err != nil {
     731           0 :                 return false
     732           0 :         }
     733           2 :         if needDelete {
     734           1 :                 _ = i.closeValueCloser()
     735           1 :                 return false
     736           1 :         }
     737           2 :         return true
     738             : }
     739             : 
     740           2 : func (i *Iterator) closeValueCloser() error {
     741           2 :         if i.valueCloser != nil {
     742           0 :                 i.err = i.valueCloser.Close()
     743           0 :                 i.valueCloser = nil
     744           0 :         }
     745           2 :         return i.err
     746             : }
     747             : 
     748           2 : func (i *Iterator) nextUserKey() {
     749           2 :         if i.iterKV == nil {
     750           2 :                 return
     751           2 :         }
     752           2 :         trailer := i.iterKV.K.Trailer
     753           2 :         done := i.iterKV.K.Trailer <= base.InternalKeyZeroSeqnumMaxTrailer
     754           2 :         if i.iterValidityState != IterValid {
     755           2 :                 i.keyBuf = append(i.keyBuf[:0], i.iterKV.K.UserKey...)
     756           2 :                 i.key = i.keyBuf
     757           2 :         }
     758           2 :         for {
     759           2 :                 i.stats.ForwardStepCount[InternalIterCall]++
     760           2 :                 i.iterKV = i.iter.Next()
     761           2 :                 if i.iterKV == nil {
     762           2 :                         if err := i.iter.Error(); err != nil {
     763           1 :                                 i.err = err
     764           1 :                                 return
     765           1 :                         }
     766             :                 }
     767             :                 // NB: We're guaranteed to be on the next user key if the previous key
     768             :                 // had a zero sequence number (`done`), or the new key has a trailer
     769             :                 // greater or equal to the previous key's trailer. This is true because
     770             :                 // internal keys with the same user key are sorted by InternalKeyTrailer in
     771             :                 // strictly monotonically descending order. We expect the trailer
     772             :                 // optimization to trigger around 50% of the time with randomly
     773             :                 // distributed writes. We expect it to trigger very frequently when
     774             :                 // iterating through ingested sstables, which contain keys that all have
     775             :                 // the same sequence number.
     776           2 :                 if done || i.iterKV == nil || i.iterKV.K.Trailer >= trailer {
     777           2 :                         break
     778             :                 }
     779           2 :                 if !i.equal(i.key, i.iterKV.K.UserKey) {
     780           2 :                         break
     781             :                 }
     782           2 :                 done = i.iterKV.K.Trailer <= base.InternalKeyZeroSeqnumMaxTrailer
     783           2 :                 trailer = i.iterKV.K.Trailer
     784             :         }
     785             : }
     786             : 
     787           2 : func (i *Iterator) maybeSampleRead() {
     788           2 :         // This method is only called when a public method of Iterator is
     789           2 :         // returning, and below we exclude the case were the iterator is paused at
     790           2 :         // a limit. The effect of these choices is that keys that are deleted, but
     791           2 :         // are encountered during iteration, are not accounted for in the read
     792           2 :         // sampling and will not cause read driven compactions, even though we are
     793           2 :         // incurring cost in iterating over them. And this issue is not limited to
     794           2 :         // Iterator, which does not see the effect of range deletes, which may be
     795           2 :         // causing iteration work in mergingIter. It is not clear at this time
     796           2 :         // whether this is a deficiency worth addressing.
     797           2 :         if i.iterValidityState != IterValid {
     798           2 :                 return
     799           2 :         }
     800           2 :         if i.readState == nil {
     801           2 :                 return
     802           2 :         }
     803           2 :         if i.readSampling.forceReadSampling {
     804           1 :                 i.sampleRead()
     805           1 :                 return
     806           1 :         }
     807           2 :         samplingPeriod := int32(int64(readBytesPeriod) * i.readState.db.opts.Experimental.ReadSamplingMultiplier)
     808           2 :         if samplingPeriod <= 0 {
     809           0 :                 return
     810           0 :         }
     811           2 :         bytesRead := uint64(len(i.key) + i.value.Len())
     812           2 :         for i.readSampling.bytesUntilReadSampling < bytesRead {
     813           2 :                 i.readSampling.bytesUntilReadSampling += uint64(rand.Uint32N(2 * uint32(samplingPeriod)))
     814           2 :                 // The block below tries to adjust for the case where this is the
     815           2 :                 // first read in a newly-opened iterator. As bytesUntilReadSampling
     816           2 :                 // starts off at zero, we don't want to sample the first read of
     817           2 :                 // every newly-opened iterator, but we do want to sample some of them.
     818           2 :                 if !i.readSampling.initialSamplePassed {
     819           2 :                         i.readSampling.initialSamplePassed = true
     820           2 :                         if i.readSampling.bytesUntilReadSampling > bytesRead {
     821           2 :                                 if rand.Uint64N(i.readSampling.bytesUntilReadSampling) > bytesRead {
     822           2 :                                         continue
     823             :                                 }
     824             :                         }
     825             :                 }
     826           2 :                 i.sampleRead()
     827             :         }
     828           2 :         i.readSampling.bytesUntilReadSampling -= bytesRead
     829             : }
     830             : 
     831           2 : func (i *Iterator) sampleRead() {
     832           2 :         var topFile *manifest.FileMetadata
     833           2 :         topLevel, numOverlappingLevels := numLevels, 0
     834           2 :         mi := i.merging
     835           2 :         if mi == nil {
     836           2 :                 return
     837           2 :         }
     838           2 :         if len(mi.levels) > 1 {
     839           2 :                 mi.ForEachLevelIter(func(li *levelIter) (done bool) {
     840           2 :                         if li.layer.IsFlushableIngests() {
     841           0 :                                 return false
     842           0 :                         }
     843           2 :                         l := li.layer.Level()
     844           2 :                         if f := li.iterFile; f != nil {
     845           2 :                                 var containsKey bool
     846           2 :                                 if i.pos == iterPosNext || i.pos == iterPosCurForward ||
     847           2 :                                         i.pos == iterPosCurForwardPaused {
     848           2 :                                         containsKey = i.cmp(f.SmallestPointKey.UserKey, i.key) <= 0
     849           2 :                                 } else if i.pos == iterPosPrev || i.pos == iterPosCurReverse ||
     850           2 :                                         i.pos == iterPosCurReversePaused {
     851           2 :                                         containsKey = i.cmp(f.LargestPointKey.UserKey, i.key) >= 0
     852           2 :                                 }
     853             :                                 // Do nothing if the current key is not contained in f's
     854             :                                 // bounds. We could seek the LevelIterator at this level
     855             :                                 // to find the right file, but the performance impacts of
     856             :                                 // doing that are significant enough to negate the benefits
     857             :                                 // of read sampling in the first place. See the discussion
     858             :                                 // at:
     859             :                                 // https://github.com/cockroachdb/pebble/pull/1041#issuecomment-763226492
     860           2 :                                 if containsKey {
     861           2 :                                         numOverlappingLevels++
     862           2 :                                         if numOverlappingLevels >= 2 {
     863           2 :                                                 // Terminate the loop early if at least 2 overlapping levels are found.
     864           2 :                                                 return true
     865           2 :                                         }
     866           2 :                                         topLevel = l
     867           2 :                                         topFile = f
     868             :                                 }
     869             :                         }
     870           2 :                         return false
     871             :                 })
     872             :         }
     873           2 :         if topFile == nil || topLevel >= numLevels {
     874           2 :                 return
     875           2 :         }
     876           2 :         if numOverlappingLevels >= 2 {
     877           2 :                 allowedSeeks := topFile.AllowedSeeks.Add(-1)
     878           2 :                 if allowedSeeks == 0 {
     879           1 : 
     880           1 :                         // Since the compaction queue can handle duplicates, we can keep
     881           1 :                         // adding to the queue even once allowedSeeks hits 0.
     882           1 :                         // In fact, we NEED to keep adding to the queue, because the queue
     883           1 :                         // is small and evicts older and possibly useful compactions.
     884           1 :                         topFile.AllowedSeeks.Add(topFile.InitAllowedSeeks)
     885           1 : 
     886           1 :                         read := readCompaction{
     887           1 :                                 start:   topFile.SmallestPointKey.UserKey,
     888           1 :                                 end:     topFile.LargestPointKey.UserKey,
     889           1 :                                 level:   topLevel,
     890           1 :                                 fileNum: topFile.FileNum,
     891           1 :                         }
     892           1 :                         i.readSampling.pendingCompactions.add(&read, i.cmp)
     893           1 :                 }
     894             :         }
     895             : }
     896             : 
     897           2 : func (i *Iterator) findPrevEntry(limit []byte) {
     898           2 :         i.iterValidityState = IterExhausted
     899           2 :         i.pos = iterPosCurReverse
     900           2 :         if i.opts.rangeKeys() && i.rangeKey != nil {
     901           2 :                 i.rangeKey.rangeKeyOnly = false
     902           2 :         }
     903             : 
     904             :         // Close the closer for the current value if one was open.
     905           2 :         if i.valueCloser != nil {
     906           0 :                 i.err = i.valueCloser.Close()
     907           0 :                 i.valueCloser = nil
     908           0 :                 if i.err != nil {
     909           0 :                         i.iterValidityState = IterExhausted
     910           0 :                         return
     911           0 :                 }
     912             :         }
     913             : 
     914           2 :         var valueMerger ValueMerger
     915           2 :         firstLoopIter := true
     916           2 :         rangeKeyBoundary := false
     917           2 :         // The code below compares with limit in multiple places. As documented in
     918           2 :         // findNextEntry, this is being done to make the behavior of limit
     919           2 :         // deterministic to allow for metamorphic testing. It is not required by
     920           2 :         // the best-effort contract of limit.
     921           2 :         for i.iterKV != nil {
     922           2 :                 key := i.iterKV.K
     923           2 : 
     924           2 :                 // NB: We cannot pause if the current key is covered by a range key.
     925           2 :                 // Otherwise, the user might not ever learn of a range key that covers
     926           2 :                 // the key space being iterated over in which there are no point keys.
     927           2 :                 // Since limits are best effort, ignoring the limit in this case is
     928           2 :                 // allowed by the contract of limit.
     929           2 :                 if firstLoopIter && limit != nil && i.cmp(limit, i.iterKV.K.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) {
     930           2 :                         i.iterValidityState = IterAtLimit
     931           2 :                         i.pos = iterPosCurReversePaused
     932           2 :                         return
     933           2 :                 }
     934           2 :                 firstLoopIter = false
     935           2 : 
     936           2 :                 if i.iterValidityState == IterValid {
     937           2 :                         if !i.equal(key.UserKey, i.key) {
     938           2 :                                 // We've iterated to the previous user key.
     939           2 :                                 i.pos = iterPosPrev
     940           2 :                                 if valueMerger != nil {
     941           2 :                                         var needDelete bool
     942           2 :                                         var value []byte
     943           2 :                                         value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, true /* includesBase */)
     944           2 :                                         i.value = base.MakeInPlaceValue(value)
     945           2 :                                         if i.err == nil && needDelete {
     946           1 :                                                 // The point key at this key is deleted. If we also have
     947           1 :                                                 // a range key boundary at this key, we still want to
     948           1 :                                                 // return. Otherwise, we need to continue looking for
     949           1 :                                                 // a live key.
     950           1 :                                                 i.value = LazyValue{}
     951           1 :                                                 if rangeKeyBoundary {
     952           0 :                                                         i.rangeKey.rangeKeyOnly = true
     953           1 :                                                 } else {
     954           1 :                                                         i.iterValidityState = IterExhausted
     955           1 :                                                         if i.closeValueCloser() == nil {
     956           1 :                                                                 continue
     957             :                                                         }
     958             :                                                 }
     959             :                                         }
     960             :                                 }
     961           2 :                                 if i.err != nil {
     962           0 :                                         i.iterValidityState = IterExhausted
     963           0 :                                 }
     964           2 :                                 return
     965             :                         }
     966             :                 }
     967             : 
     968             :                 // If the user has configured a SkipPoint function, invoke it to see
     969             :                 // whether we should skip over the current user key.
     970           2 :                 if i.opts.SkipPoint != nil && key.Kind() != InternalKeyKindRangeKeySet && i.opts.SkipPoint(key.UserKey) {
     971           2 :                         // NB: We could call prevUserKey, but in some cases the SkipPoint
     972           2 :                         // predicate function might be cheaper than prevUserKey's key copy
     973           2 :                         // and key comparison. This should be the case for MVCC suffix
     974           2 :                         // comparisons, for example. In the future, we could expand the
     975           2 :                         // SkipPoint interface to give the implementor more control over
     976           2 :                         // whether we skip over just the internal key, the user key, or even
     977           2 :                         // the key prefix.
     978           2 :                         i.stats.ReverseStepCount[InternalIterCall]++
     979           2 :                         i.iterKV = i.iter.Prev()
     980           2 :                         if i.iterKV == nil {
     981           2 :                                 if err := i.iter.Error(); err != nil {
     982           0 :                                         i.err = err
     983           0 :                                         i.iterValidityState = IterExhausted
     984           0 :                                         return
     985           0 :                                 }
     986             :                         }
     987           2 :                         if limit != nil && i.iterKV != nil && i.cmp(limit, i.iterKV.K.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) {
     988           2 :                                 i.iterValidityState = IterAtLimit
     989           2 :                                 i.pos = iterPosCurReversePaused
     990           2 :                                 return
     991           2 :                         }
     992           2 :                         continue
     993             :                 }
     994             : 
     995           2 :                 switch key.Kind() {
     996           2 :                 case InternalKeyKindRangeKeySet:
     997           2 :                         // Range key start boundary markers are interleaved with the maximum
     998           2 :                         // sequence number, so if there's a point key also at this key, we
     999           2 :                         // must've already iterated over it.
    1000           2 :                         // This is the final entry at this user key, so we may return
    1001           2 :                         i.rangeKey.rangeKeyOnly = i.iterValidityState != IterValid
    1002           2 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1003           2 :                         i.key = i.keyBuf
    1004           2 :                         i.iterValidityState = IterValid
    1005           2 :                         i.saveRangeKey()
    1006           2 :                         // In all other cases, previous iteration requires advancing to
    1007           2 :                         // iterPosPrev in order to determine if the key is live and
    1008           2 :                         // unshadowed by another key at the same user key. In this case,
    1009           2 :                         // because range key start boundary markers are always interleaved
    1010           2 :                         // at the maximum sequence number, we know that there aren't any
    1011           2 :                         // additional keys with the same user key in the backward direction.
    1012           2 :                         //
    1013           2 :                         // We Prev the underlying iterator once anyways for consistency, so
    1014           2 :                         // that we can maintain the invariant during backward iteration that
    1015           2 :                         // i.iterPos = iterPosPrev.
    1016           2 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1017           2 :                         i.iterKV = i.iter.Prev()
    1018           2 : 
    1019           2 :                         // Set rangeKeyBoundary so that on the next iteration, we know to
    1020           2 :                         // return the key even if the MERGE point key is deleted.
    1021           2 :                         rangeKeyBoundary = true
    1022             : 
    1023           2 :                 case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
    1024           2 :                         i.value = LazyValue{}
    1025           2 :                         i.iterValidityState = IterExhausted
    1026           2 :                         valueMerger = nil
    1027           2 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1028           2 :                         i.iterKV = i.iter.Prev()
    1029           2 :                         // Compare with the limit. We could optimize by only checking when
    1030           2 :                         // we step to the previous user key, but detecting that requires a
    1031           2 :                         // comparison too. Note that this position may already passed a
    1032           2 :                         // number of versions of this user key, but they are all deleted, so
    1033           2 :                         // the fact that a subsequent Prev*() call will not see them is
    1034           2 :                         // harmless. Also note that this is the only place in the loop,
    1035           2 :                         // other than the firstLoopIter and SkipPoint cases above, where we
    1036           2 :                         // could step to a different user key and start processing it for
    1037           2 :                         // returning to the caller.
    1038           2 :                         if limit != nil && i.iterKV != nil && i.cmp(limit, i.iterKV.K.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) {
    1039           2 :                                 i.iterValidityState = IterAtLimit
    1040           2 :                                 i.pos = iterPosCurReversePaused
    1041           2 :                                 return
    1042           2 :                         }
    1043           2 :                         continue
    1044             : 
    1045           2 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete:
    1046           2 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1047           2 :                         i.key = i.keyBuf
    1048           2 :                         // iterValue is owned by i.iter and could change after the Prev()
    1049           2 :                         // call, so use valueBuf instead. Note that valueBuf is only used
    1050           2 :                         // in this one instance; everywhere else (eg. in findNextEntry),
    1051           2 :                         // we just point i.value to the unsafe i.iter-owned value buffer.
    1052           2 :                         i.value, i.valueBuf = i.iterKV.V.Clone(i.valueBuf[:0], &i.fetcher)
    1053           2 :                         i.saveRangeKey()
    1054           2 :                         i.iterValidityState = IterValid
    1055           2 :                         i.iterKV = i.iter.Prev()
    1056           2 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1057           2 :                         valueMerger = nil
    1058           2 :                         continue
    1059             : 
    1060           2 :                 case InternalKeyKindMerge:
    1061           2 :                         if i.iterValidityState == IterExhausted {
    1062           2 :                                 i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1063           2 :                                 i.key = i.keyBuf
    1064           2 :                                 i.saveRangeKey()
    1065           2 :                                 var iterValue []byte
    1066           2 :                                 iterValue, _, i.err = i.iterKV.Value(nil)
    1067           2 :                                 if i.err != nil {
    1068           0 :                                         return
    1069           0 :                                 }
    1070           2 :                                 valueMerger, i.err = i.merge(i.key, iterValue)
    1071           2 :                                 if i.err != nil {
    1072           0 :                                         return
    1073           0 :                                 }
    1074           2 :                                 i.iterValidityState = IterValid
    1075           2 :                         } else if valueMerger == nil {
    1076           2 :                                 // Extract value before iterValue since we use value before iterValue
    1077           2 :                                 // and the underlying iterator is not required to provide backing
    1078           2 :                                 // memory for both simultaneously.
    1079           2 :                                 var value []byte
    1080           2 :                                 var callerOwned bool
    1081           2 :                                 value, callerOwned, i.err = i.value.Value(i.lazyValueBuf)
    1082           2 :                                 if callerOwned {
    1083           0 :                                         i.lazyValueBuf = value[:0]
    1084           0 :                                 }
    1085           2 :                                 if i.err != nil {
    1086           0 :                                         i.iterValidityState = IterExhausted
    1087           0 :                                         return
    1088           0 :                                 }
    1089           2 :                                 valueMerger, i.err = i.merge(i.key, value)
    1090           2 :                                 var iterValue []byte
    1091           2 :                                 iterValue, _, i.err = i.iterKV.Value(nil)
    1092           2 :                                 if i.err != nil {
    1093           0 :                                         i.iterValidityState = IterExhausted
    1094           0 :                                         return
    1095           0 :                                 }
    1096           2 :                                 if i.err == nil {
    1097           2 :                                         i.err = valueMerger.MergeNewer(iterValue)
    1098           2 :                                 }
    1099           2 :                                 if i.err != nil {
    1100           0 :                                         i.iterValidityState = IterExhausted
    1101           0 :                                         return
    1102           0 :                                 }
    1103           2 :                         } else {
    1104           2 :                                 var iterValue []byte
    1105           2 :                                 iterValue, _, i.err = i.iterKV.Value(nil)
    1106           2 :                                 if i.err != nil {
    1107           0 :                                         i.iterValidityState = IterExhausted
    1108           0 :                                         return
    1109           0 :                                 }
    1110           2 :                                 i.err = valueMerger.MergeNewer(iterValue)
    1111           2 :                                 if i.err != nil {
    1112           0 :                                         i.iterValidityState = IterExhausted
    1113           0 :                                         return
    1114           0 :                                 }
    1115             :                         }
    1116           2 :                         i.iterKV = i.iter.Prev()
    1117           2 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1118           2 :                         continue
    1119             : 
    1120           0 :                 default:
    1121           0 :                         i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
    1122           0 :                         i.iterValidityState = IterExhausted
    1123           0 :                         return
    1124             :                 }
    1125             :         }
    1126             :         // i.iterKV == nil, so broke out of the preceding loop.
    1127             : 
    1128             :         // Is iterKey nil due to an error?
    1129           2 :         if i.err = i.iter.Error(); i.err != nil {
    1130           1 :                 i.iterValidityState = IterExhausted
    1131           1 :                 return
    1132           1 :         }
    1133             : 
    1134           2 :         if i.iterValidityState == IterValid {
    1135           2 :                 i.pos = iterPosPrev
    1136           2 :                 if valueMerger != nil {
    1137           2 :                         var needDelete bool
    1138           2 :                         var value []byte
    1139           2 :                         value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, true /* includesBase */)
    1140           2 :                         i.value = base.MakeInPlaceValue(value)
    1141           2 :                         if i.err == nil && needDelete {
    1142           1 :                                 i.key = nil
    1143           1 :                                 i.value = LazyValue{}
    1144           1 :                                 i.iterValidityState = IterExhausted
    1145           1 :                         }
    1146             :                 }
    1147           2 :                 if i.err != nil {
    1148           0 :                         i.iterValidityState = IterExhausted
    1149           0 :                 }
    1150             :         }
    1151             : }
    1152             : 
    1153           2 : func (i *Iterator) prevUserKey() {
    1154           2 :         if i.iterKV == nil {
    1155           2 :                 return
    1156           2 :         }
    1157           2 :         if i.iterValidityState != IterValid {
    1158           2 :                 // If we're going to compare against the prev key, we need to save the
    1159           2 :                 // current key.
    1160           2 :                 i.keyBuf = append(i.keyBuf[:0], i.iterKV.K.UserKey...)
    1161           2 :                 i.key = i.keyBuf
    1162           2 :         }
    1163           2 :         for {
    1164           2 :                 i.iterKV = i.iter.Prev()
    1165           2 :                 i.stats.ReverseStepCount[InternalIterCall]++
    1166           2 :                 if i.iterKV == nil {
    1167           2 :                         if err := i.iter.Error(); err != nil {
    1168           0 :                                 i.err = err
    1169           0 :                                 i.iterValidityState = IterExhausted
    1170           0 :                         }
    1171           2 :                         break
    1172             :                 }
    1173           2 :                 if !i.equal(i.key, i.iterKV.K.UserKey) {
    1174           2 :                         break
    1175             :                 }
    1176             :         }
    1177             : }
    1178             : 
    1179           2 : func (i *Iterator) mergeNext(key InternalKey, valueMerger ValueMerger) {
    1180           2 :         // Save the current key.
    1181           2 :         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1182           2 :         i.key = i.keyBuf
    1183           2 : 
    1184           2 :         // Loop looking for older values for this key and merging them.
    1185           2 :         for {
    1186           2 :                 i.iterKV = i.iter.Next()
    1187           2 :                 i.stats.ForwardStepCount[InternalIterCall]++
    1188           2 :                 if i.iterKV == nil {
    1189           2 :                         if i.err = i.iter.Error(); i.err != nil {
    1190           1 :                                 return
    1191           1 :                         }
    1192           2 :                         i.pos = iterPosNext
    1193           2 :                         return
    1194             :                 }
    1195           2 :                 key = i.iterKV.K
    1196           2 :                 if !i.equal(i.key, key.UserKey) {
    1197           2 :                         // We've advanced to the next key.
    1198           2 :                         i.pos = iterPosNext
    1199           2 :                         return
    1200           2 :                 }
    1201           2 :                 switch key.Kind() {
    1202           2 :                 case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
    1203           2 :                         // We've hit a deletion tombstone. Return everything up to this
    1204           2 :                         // point.
    1205           2 :                         //
    1206           2 :                         // NB: treating InternalKeyKindSingleDelete as equivalent to DEL is not
    1207           2 :                         // only simpler, but is also necessary for correctness due to
    1208           2 :                         // InternalKeyKindSSTableInternalObsoleteBit.
    1209           2 :                         return
    1210             : 
    1211           2 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete:
    1212           2 :                         // We've hit a Set value. Merge with the existing value and return.
    1213           2 :                         var iterValue []byte
    1214           2 :                         iterValue, _, i.err = i.iterKV.Value(nil)
    1215           2 :                         if i.err != nil {
    1216           0 :                                 return
    1217           0 :                         }
    1218           2 :                         i.err = valueMerger.MergeOlder(iterValue)
    1219           2 :                         return
    1220             : 
    1221           2 :                 case InternalKeyKindMerge:
    1222           2 :                         // We've hit another Merge value. Merge with the existing value and
    1223           2 :                         // continue looping.
    1224           2 :                         var iterValue []byte
    1225           2 :                         iterValue, _, i.err = i.iterKV.Value(nil)
    1226           2 :                         if i.err != nil {
    1227           0 :                                 return
    1228           0 :                         }
    1229           2 :                         i.err = valueMerger.MergeOlder(iterValue)
    1230           2 :                         if i.err != nil {
    1231           0 :                                 return
    1232           0 :                         }
    1233           2 :                         continue
    1234             : 
    1235           0 :                 case InternalKeyKindRangeKeySet:
    1236           0 :                         // The RANGEKEYSET marker must sort before a MERGE at the same user key.
    1237           0 :                         i.err = base.CorruptionErrorf("pebble: out of order range key marker")
    1238           0 :                         return
    1239             : 
    1240           0 :                 default:
    1241           0 :                         i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
    1242           0 :                         return
    1243             :                 }
    1244             :         }
    1245             : }
    1246             : 
    1247             : // SeekGE moves the iterator to the first key/value pair whose key is greater
    1248             : // than or equal to the given key. Returns true if the iterator is pointing at
    1249             : // a valid entry and false otherwise.
    1250           2 : func (i *Iterator) SeekGE(key []byte) bool {
    1251           2 :         return i.SeekGEWithLimit(key, nil) == IterValid
    1252           2 : }
    1253             : 
    1254             : // SeekGEWithLimit moves the iterator to the first key/value pair whose key is
    1255             : // greater than or equal to the given key.
    1256             : //
    1257             : // If limit is provided, it serves as a best-effort exclusive limit. If the
    1258             : // first key greater than or equal to the given search key is also greater than
    1259             : // or equal to limit, the Iterator may pause and return IterAtLimit. Because
    1260             : // limits are best-effort, SeekGEWithLimit may return a key beyond limit.
    1261             : //
    1262             : // If the Iterator is configured to iterate over range keys, SeekGEWithLimit
    1263             : // guarantees it will surface any range keys with bounds overlapping the
    1264             : // keyspace [key, limit).
    1265           2 : func (i *Iterator) SeekGEWithLimit(key []byte, limit []byte) IterValidityState {
    1266           2 :         if i.rangeKey != nil {
    1267           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1268           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1269           2 :                 // If we have a range key but did not expose it at the previous iterator
    1270           2 :                 // position (because the iterator was not at a valid position), updated
    1271           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1272           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1273           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1274           2 :                 //   - SeekGE(...)        → (IterValid, RangeBounds() = [a,b))
    1275           2 :                 // the iterator returns RangeKeyChanged()=true.
    1276           2 :                 //
    1277           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1278           2 :                 // the iterator moves into a new range key, or out of the current range
    1279           2 :                 // key.
    1280           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1281           2 :         }
    1282           2 :         lastPositioningOp := i.lastPositioningOp
    1283           2 :         // Set it to unknown, since this operation may not succeed, in which case
    1284           2 :         // the SeekGE following this should not make any assumption about iterator
    1285           2 :         // position.
    1286           2 :         i.lastPositioningOp = unknownLastPositionOp
    1287           2 :         i.requiresReposition = false
    1288           2 :         i.err = nil // clear cached iteration error
    1289           2 :         i.hasPrefix = false
    1290           2 :         i.stats.ForwardSeekCount[InterfaceCall]++
    1291           2 :         if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
    1292           2 :                 key = lowerBound
    1293           2 :         } else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
    1294           2 :                 key = upperBound
    1295           2 :         }
    1296           2 :         seekInternalIter := true
    1297           2 : 
    1298           2 :         var flags base.SeekGEFlags
    1299           2 :         if i.batchJustRefreshed {
    1300           1 :                 i.batchJustRefreshed = false
    1301           1 :                 flags = flags.EnableBatchJustRefreshed()
    1302           1 :         }
    1303           2 :         if lastPositioningOp == seekGELastPositioningOp {
    1304           2 :                 cmp := i.cmp(i.prefixOrFullSeekKey, key)
    1305           2 :                 // If this seek is to the same or later key, and the iterator is
    1306           2 :                 // already positioned there, this is a noop. This can be helpful for
    1307           2 :                 // sparse key spaces that have many deleted keys, where one can avoid
    1308           2 :                 // the overhead of iterating past them again and again.
    1309           2 :                 if cmp <= 0 {
    1310           2 :                         if !flags.BatchJustRefreshed() &&
    1311           2 :                                 (i.iterValidityState == IterExhausted ||
    1312           2 :                                         (i.iterValidityState == IterValid && i.cmp(key, i.key) <= 0 &&
    1313           2 :                                                 (limit == nil || i.cmp(i.key, limit) < 0))) {
    1314           2 :                                 // Noop
    1315           2 :                                 if i.forceEnableSeekOpt || !testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) {
    1316           2 :                                         i.lastPositioningOp = seekGELastPositioningOp
    1317           2 :                                         return i.iterValidityState
    1318           2 :                                 }
    1319             :                         }
    1320             :                         // cmp == 0 is not safe to optimize since
    1321             :                         // - i.pos could be at iterPosNext, due to a merge.
    1322             :                         // - Even if i.pos were at iterPosCurForward, we could have a DELETE,
    1323             :                         //   SET pair for a key, and the iterator would have moved past DELETE
    1324             :                         //   but stayed at iterPosCurForward. A similar situation occurs for a
    1325             :                         //   MERGE, SET pair where the MERGE is consumed and the iterator is
    1326             :                         //   at the SET.
    1327             :                         // We also leverage the IterAtLimit <=> i.pos invariant defined in the
    1328             :                         // comment on iterValidityState, to exclude any cases where i.pos
    1329             :                         // is iterPosCur{Forward,Reverse}Paused. This avoids the need to
    1330             :                         // special-case those iterator positions and their interactions with
    1331             :                         // TrySeekUsingNext, as the main uses for TrySeekUsingNext in CockroachDB
    1332             :                         // do not use limited Seeks in the first place.
    1333           2 :                         if cmp < 0 && i.iterValidityState != IterAtLimit && limit == nil {
    1334           2 :                                 flags = flags.EnableTrySeekUsingNext()
    1335           2 :                         }
    1336           2 :                         if testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) && !i.forceEnableSeekOpt {
    1337           2 :                                 flags = flags.DisableTrySeekUsingNext()
    1338           2 :                         }
    1339           2 :                         if !flags.BatchJustRefreshed() && i.pos == iterPosCurForwardPaused && i.cmp(key, i.iterKV.K.UserKey) <= 0 {
    1340           2 :                                 // Have some work to do, but don't need to seek, and we can
    1341           2 :                                 // start doing findNextEntry from i.iterKey.
    1342           2 :                                 seekInternalIter = false
    1343           2 :                         }
    1344             :                 }
    1345             :         }
    1346           2 :         if seekInternalIter {
    1347           2 :                 i.iterKV = i.iter.SeekGE(key, flags)
    1348           2 :                 i.stats.ForwardSeekCount[InternalIterCall]++
    1349           2 :                 if err := i.iter.Error(); err != nil {
    1350           1 :                         i.err = err
    1351           1 :                         i.iterValidityState = IterExhausted
    1352           1 :                         return i.iterValidityState
    1353           1 :                 }
    1354             :         }
    1355           2 :         i.findNextEntry(limit)
    1356           2 :         i.maybeSampleRead()
    1357           2 :         if i.Error() == nil {
    1358           2 :                 // Prepare state for a future noop optimization.
    1359           2 :                 i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], key...)
    1360           2 :                 i.lastPositioningOp = seekGELastPositioningOp
    1361           2 :         }
    1362           2 :         return i.iterValidityState
    1363             : }
    1364             : 
    1365             : // SeekPrefixGE moves the iterator to the first key/value pair whose key is
    1366             : // greater than or equal to the given key and which has the same "prefix" as
    1367             : // the given key. The prefix for a key is determined by the user-defined
    1368             : // Comparer.Split function. The iterator will not observe keys not matching the
    1369             : // "prefix" of the search key. Calling SeekPrefixGE puts the iterator in prefix
    1370             : // iteration mode. The iterator remains in prefix iteration until a subsequent
    1371             : // call to another absolute positioning method (SeekGE, SeekLT, First,
    1372             : // Last). Reverse iteration (Prev) is not supported when an iterator is in
    1373             : // prefix iteration mode. Returns true if the iterator is pointing at a valid
    1374             : // entry and false otherwise.
    1375             : //
    1376             : // The semantics of SeekPrefixGE are slightly unusual and designed for
    1377             : // iteration to be able to take advantage of bloom filters that have been
    1378             : // created on the "prefix". If you're not using bloom filters, there is no
    1379             : // reason to use SeekPrefixGE.
    1380             : //
    1381             : // An example Split function may separate a timestamp suffix from the prefix of
    1382             : // the key.
    1383             : //
    1384             : //      Split(<key>@<timestamp>) -> <key>
    1385             : //
    1386             : // Consider the keys "a@1", "a@2", "aa@3", "aa@4". The prefixes for these keys
    1387             : // are "a", and "aa". Note that despite "a" and "aa" sharing a prefix by the
    1388             : // usual definition, those prefixes differ by the definition of the Split
    1389             : // function. To see how this works, consider the following set of calls on this
    1390             : // data set:
    1391             : //
    1392             : //      SeekPrefixGE("a@0") -> "a@1"
    1393             : //      Next()              -> "a@2"
    1394             : //      Next()              -> EOF
    1395             : //
    1396             : // If you're just looking to iterate over keys with a shared prefix, as
    1397             : // defined by the configured comparer, set iterator bounds instead:
    1398             : //
    1399             : //      iter := db.NewIter(&pebble.IterOptions{
    1400             : //        LowerBound: []byte("prefix"),
    1401             : //        UpperBound: []byte("prefiy"),
    1402             : //      })
    1403             : //      for iter.First(); iter.Valid(); iter.Next() {
    1404             : //        // Only keys beginning with "prefix" will be visited.
    1405             : //      }
    1406             : //
    1407             : // See ExampleIterator_SeekPrefixGE for a working example.
    1408             : //
    1409             : // When iterating with range keys enabled, all range keys encountered are
    1410             : // truncated to the seek key's prefix's bounds. The truncation of the upper
    1411             : // bound requires that the database's Comparer is configured with a
    1412             : // ImmediateSuccessor method. For example, a SeekPrefixGE("a@9") call with the
    1413             : // prefix "a" will truncate range key bounds to [a,ImmediateSuccessor(a)].
    1414           2 : func (i *Iterator) SeekPrefixGE(key []byte) bool {
    1415           2 :         if i.rangeKey != nil {
    1416           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1417           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1418           2 :                 // If we have a range key but did not expose it at the previous iterator
    1419           2 :                 // position (because the iterator was not at a valid position), updated
    1420           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1421           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1422           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1423           2 :                 //   - SeekPrefixGE(...)  → (IterValid, RangeBounds() = [a,b))
    1424           2 :                 // the iterator returns RangeKeyChanged()=true.
    1425           2 :                 //
    1426           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1427           2 :                 // the iterator moves into a new range key, or out of the current range
    1428           2 :                 // key.
    1429           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1430           2 :         }
    1431           2 :         lastPositioningOp := i.lastPositioningOp
    1432           2 :         // Set it to unknown, since this operation may not succeed, in which case
    1433           2 :         // the SeekPrefixGE following this should not make any assumption about
    1434           2 :         // iterator position.
    1435           2 :         i.lastPositioningOp = unknownLastPositionOp
    1436           2 :         i.requiresReposition = false
    1437           2 :         i.err = nil // clear cached iteration error
    1438           2 :         i.stats.ForwardSeekCount[InterfaceCall]++
    1439           2 :         if i.comparer.ImmediateSuccessor == nil && i.opts.KeyTypes != IterKeyTypePointsOnly {
    1440           0 :                 panic("pebble: ImmediateSuccessor must be provided for SeekPrefixGE with range keys")
    1441             :         }
    1442           2 :         prefixLen := i.comparer.Split(key)
    1443           2 :         keyPrefix := key[:prefixLen]
    1444           2 :         var flags base.SeekGEFlags
    1445           2 :         if i.batchJustRefreshed {
    1446           1 :                 flags = flags.EnableBatchJustRefreshed()
    1447           1 :                 i.batchJustRefreshed = false
    1448           1 :         }
    1449           2 :         if lastPositioningOp == seekPrefixGELastPositioningOp {
    1450           2 :                 if !i.hasPrefix {
    1451           0 :                         panic("lastPositioningOpsIsSeekPrefixGE is true, but hasPrefix is false")
    1452             :                 }
    1453             :                 // The iterator has not been repositioned after the last SeekPrefixGE.
    1454             :                 // See if we are seeking to a larger key, since then we can optimize
    1455             :                 // the seek by using next. Note that we could also optimize if Next
    1456             :                 // has been called, if the iterator is not exhausted and the current
    1457             :                 // position is <= the seek key. We are keeping this limited for now
    1458             :                 // since such optimizations require care for correctness, and to not
    1459             :                 // become de-optimizations (if one usually has to do all the next
    1460             :                 // calls and then the seek). This SeekPrefixGE optimization
    1461             :                 // specifically benefits CockroachDB.
    1462           2 :                 cmp := i.cmp(i.prefixOrFullSeekKey, keyPrefix)
    1463           2 :                 // cmp == 0 is not safe to optimize since
    1464           2 :                 // - i.pos could be at iterPosNext, due to a merge.
    1465           2 :                 // - Even if i.pos were at iterPosCurForward, we could have a DELETE,
    1466           2 :                 //   SET pair for a key, and the iterator would have moved past DELETE
    1467           2 :                 //   but stayed at iterPosCurForward. A similar situation occurs for a
    1468           2 :                 //   MERGE, SET pair where the MERGE is consumed and the iterator is
    1469           2 :                 //   at the SET.
    1470           2 :                 // In general some versions of i.prefix could have been consumed by
    1471           2 :                 // the iterator, so we only optimize for cmp < 0.
    1472           2 :                 if cmp < 0 {
    1473           2 :                         flags = flags.EnableTrySeekUsingNext()
    1474           2 :                 }
    1475           2 :                 if testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) && !i.forceEnableSeekOpt {
    1476           2 :                         flags = flags.DisableTrySeekUsingNext()
    1477           2 :                 }
    1478             :         }
    1479             :         // Make a copy of the prefix so that modifications to the key after
    1480             :         // SeekPrefixGE returns does not affect the stored prefix.
    1481           2 :         if cap(i.prefixOrFullSeekKey) < prefixLen {
    1482           2 :                 i.prefixOrFullSeekKey = make([]byte, prefixLen)
    1483           2 :         } else {
    1484           2 :                 i.prefixOrFullSeekKey = i.prefixOrFullSeekKey[:prefixLen]
    1485           2 :         }
    1486           2 :         i.hasPrefix = true
    1487           2 :         copy(i.prefixOrFullSeekKey, keyPrefix)
    1488           2 : 
    1489           2 :         if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
    1490           2 :                 if p := i.comparer.Split.Prefix(lowerBound); !bytes.Equal(i.prefixOrFullSeekKey, p) {
    1491           2 :                         i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of lower bound")
    1492           2 :                         i.iterValidityState = IterExhausted
    1493           2 :                         return false
    1494           2 :                 }
    1495           2 :                 key = lowerBound
    1496           2 :         } else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
    1497           2 :                 if p := i.comparer.Split.Prefix(upperBound); !bytes.Equal(i.prefixOrFullSeekKey, p) {
    1498           2 :                         i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of upper bound")
    1499           2 :                         i.iterValidityState = IterExhausted
    1500           2 :                         return false
    1501           2 :                 }
    1502           2 :                 key = upperBound
    1503             :         }
    1504           2 :         i.iterKV = i.iter.SeekPrefixGE(i.prefixOrFullSeekKey, key, flags)
    1505           2 :         i.stats.ForwardSeekCount[InternalIterCall]++
    1506           2 :         i.findNextEntry(nil)
    1507           2 :         i.maybeSampleRead()
    1508           2 :         if i.Error() == nil {
    1509           2 :                 i.lastPositioningOp = seekPrefixGELastPositioningOp
    1510           2 :         }
    1511           2 :         return i.iterValidityState == IterValid
    1512             : }
    1513             : 
    1514             : // Deterministic disabling (in testing mode) of the seek optimizations. It uses
    1515             : // the iterator pointer, since we want diversity in iterator behavior for the
    1516             : // same key.  Used for tests.
    1517           2 : func testingDisableSeekOpt(key []byte, ptr uintptr) bool {
    1518           2 :         if !invariants.Enabled {
    1519           0 :                 return false
    1520           0 :         }
    1521             :         // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
    1522           2 :         simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
    1523           2 :         return key != nil && key[0]&byte(1) == 0 && simpleHash == 0
    1524             : }
    1525             : 
    1526             : // SeekLT moves the iterator to the last key/value pair whose key is less than
    1527             : // the given key. Returns true if the iterator is pointing at a valid entry and
    1528             : // false otherwise.
    1529           2 : func (i *Iterator) SeekLT(key []byte) bool {
    1530           2 :         return i.SeekLTWithLimit(key, nil) == IterValid
    1531           2 : }
    1532             : 
    1533             : // SeekLTWithLimit moves the iterator to the last key/value pair whose key is
    1534             : // less than the given key.
    1535             : //
    1536             : // If limit is provided, it serves as a best-effort inclusive limit. If the last
    1537             : // key less than the given search key is also less than limit, the Iterator may
    1538             : // pause and return IterAtLimit. Because limits are best-effort, SeekLTWithLimit
    1539             : // may return a key beyond limit.
    1540             : //
    1541             : // If the Iterator is configured to iterate over range keys, SeekLTWithLimit
    1542             : // guarantees it will surface any range keys with bounds overlapping the
    1543             : // keyspace up to limit.
    1544           2 : func (i *Iterator) SeekLTWithLimit(key []byte, limit []byte) IterValidityState {
    1545           2 :         if i.rangeKey != nil {
    1546           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1547           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1548           2 :                 // If we have a range key but did not expose it at the previous iterator
    1549           2 :                 // position (because the iterator was not at a valid position), updated
    1550           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1551           2 :                 //   - Next()               → (IterValid, RangeBounds() = [a,b))
    1552           2 :                 //   - NextWithLimit(...)   → (IterAtLimit, RangeBounds() = -)
    1553           2 :                 //   - SeekLTWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    1554           2 :                 // the iterator returns RangeKeyChanged()=true.
    1555           2 :                 //
    1556           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1557           2 :                 // the iterator moves into a new range key, or out of the current range
    1558           2 :                 // key.
    1559           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1560           2 :         }
    1561           2 :         lastPositioningOp := i.lastPositioningOp
    1562           2 :         // Set it to unknown, since this operation may not succeed, in which case
    1563           2 :         // the SeekLT following this should not make any assumption about iterator
    1564           2 :         // position.
    1565           2 :         i.lastPositioningOp = unknownLastPositionOp
    1566           2 :         i.batchJustRefreshed = false
    1567           2 :         i.requiresReposition = false
    1568           2 :         i.err = nil // clear cached iteration error
    1569           2 :         i.stats.ReverseSeekCount[InterfaceCall]++
    1570           2 :         if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
    1571           2 :                 key = upperBound
    1572           2 :         } else if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
    1573           2 :                 key = lowerBound
    1574           2 :         }
    1575           2 :         i.hasPrefix = false
    1576           2 :         seekInternalIter := true
    1577           2 :         // The following noop optimization only applies when i.batch == nil, since
    1578           2 :         // an iterator over a batch is iterating over mutable data, that may have
    1579           2 :         // changed since the last seek.
    1580           2 :         if lastPositioningOp == seekLTLastPositioningOp && i.batch == nil {
    1581           2 :                 cmp := i.cmp(key, i.prefixOrFullSeekKey)
    1582           2 :                 // If this seek is to the same or earlier key, and the iterator is
    1583           2 :                 // already positioned there, this is a noop. This can be helpful for
    1584           2 :                 // sparse key spaces that have many deleted keys, where one can avoid
    1585           2 :                 // the overhead of iterating past them again and again.
    1586           2 :                 if cmp <= 0 {
    1587           2 :                         // NB: when pos != iterPosCurReversePaused, the invariant
    1588           2 :                         // documented earlier implies that iterValidityState !=
    1589           2 :                         // IterAtLimit.
    1590           2 :                         if i.iterValidityState == IterExhausted ||
    1591           2 :                                 (i.iterValidityState == IterValid && i.cmp(i.key, key) < 0 &&
    1592           2 :                                         (limit == nil || i.cmp(limit, i.key) <= 0)) {
    1593           2 :                                 if !testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) {
    1594           2 :                                         i.lastPositioningOp = seekLTLastPositioningOp
    1595           2 :                                         return i.iterValidityState
    1596           2 :                                 }
    1597             :                         }
    1598           2 :                         if i.pos == iterPosCurReversePaused && i.cmp(i.iterKV.K.UserKey, key) < 0 {
    1599           2 :                                 // Have some work to do, but don't need to seek, and we can
    1600           2 :                                 // start doing findPrevEntry from i.iterKey.
    1601           2 :                                 seekInternalIter = false
    1602           2 :                         }
    1603             :                 }
    1604             :         }
    1605           2 :         if seekInternalIter {
    1606           2 :                 i.iterKV = i.iter.SeekLT(key, base.SeekLTFlagsNone)
    1607           2 :                 i.stats.ReverseSeekCount[InternalIterCall]++
    1608           2 :                 if err := i.iter.Error(); err != nil {
    1609           1 :                         i.err = err
    1610           1 :                         i.iterValidityState = IterExhausted
    1611           1 :                         return i.iterValidityState
    1612           1 :                 }
    1613             :         }
    1614           2 :         i.findPrevEntry(limit)
    1615           2 :         i.maybeSampleRead()
    1616           2 :         if i.Error() == nil && i.batch == nil {
    1617           2 :                 // Prepare state for a future noop optimization.
    1618           2 :                 i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], key...)
    1619           2 :                 i.lastPositioningOp = seekLTLastPositioningOp
    1620           2 :         }
    1621           2 :         return i.iterValidityState
    1622             : }
    1623             : 
    1624             : // First moves the iterator the first key/value pair. Returns true if the
    1625             : // iterator is pointing at a valid entry and false otherwise.
    1626           2 : func (i *Iterator) First() bool {
    1627           2 :         if i.rangeKey != nil {
    1628           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1629           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1630           2 :                 // If we have a range key but did not expose it at the previous iterator
    1631           2 :                 // position (because the iterator was not at a valid position), updated
    1632           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1633           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1634           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1635           2 :                 //   - First(...)         → (IterValid, RangeBounds() = [a,b))
    1636           2 :                 // the iterator returns RangeKeyChanged()=true.
    1637           2 :                 //
    1638           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1639           2 :                 // the iterator moves into a new range key, or out of the current range
    1640           2 :                 // key.
    1641           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1642           2 :         }
    1643           2 :         i.err = nil // clear cached iteration error
    1644           2 :         i.hasPrefix = false
    1645           2 :         i.batchJustRefreshed = false
    1646           2 :         i.lastPositioningOp = unknownLastPositionOp
    1647           2 :         i.requiresReposition = false
    1648           2 :         i.stats.ForwardSeekCount[InterfaceCall]++
    1649           2 : 
    1650           2 :         i.err = i.iterFirstWithinBounds()
    1651           2 :         if i.err != nil {
    1652           1 :                 i.iterValidityState = IterExhausted
    1653           1 :                 return false
    1654           1 :         }
    1655           2 :         i.findNextEntry(nil)
    1656           2 :         i.maybeSampleRead()
    1657           2 :         return i.iterValidityState == IterValid
    1658             : }
    1659             : 
    1660             : // Last moves the iterator the last key/value pair. Returns true if the
    1661             : // iterator is pointing at a valid entry and false otherwise.
    1662           2 : func (i *Iterator) Last() bool {
    1663           2 :         if i.rangeKey != nil {
    1664           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1665           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1666           2 :                 // If we have a range key but did not expose it at the previous iterator
    1667           2 :                 // position (because the iterator was not at a valid position), updated
    1668           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1669           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1670           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1671           2 :                 //   - Last(...)          → (IterValid, RangeBounds() = [a,b))
    1672           2 :                 // the iterator returns RangeKeyChanged()=true.
    1673           2 :                 //
    1674           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1675           2 :                 // the iterator moves into a new range key, or out of the current range
    1676           2 :                 // key.
    1677           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1678           2 :         }
    1679           2 :         i.err = nil // clear cached iteration error
    1680           2 :         i.hasPrefix = false
    1681           2 :         i.batchJustRefreshed = false
    1682           2 :         i.lastPositioningOp = unknownLastPositionOp
    1683           2 :         i.requiresReposition = false
    1684           2 :         i.stats.ReverseSeekCount[InterfaceCall]++
    1685           2 : 
    1686           2 :         if i.err = i.iterLastWithinBounds(); i.err != nil {
    1687           1 :                 i.iterValidityState = IterExhausted
    1688           1 :                 return false
    1689           1 :         }
    1690           2 :         i.findPrevEntry(nil)
    1691           2 :         i.maybeSampleRead()
    1692           2 :         return i.iterValidityState == IterValid
    1693             : }
    1694             : 
    1695             : // Next moves the iterator to the next key/value pair. Returns true if the
    1696             : // iterator is pointing at a valid entry and false otherwise.
    1697           2 : func (i *Iterator) Next() bool {
    1698           2 :         return i.nextWithLimit(nil) == IterValid
    1699           2 : }
    1700             : 
    1701             : // NextWithLimit moves the iterator to the next key/value pair.
    1702             : //
    1703             : // If limit is provided, it serves as a best-effort exclusive limit. If the next
    1704             : // key  is greater than or equal to limit, the Iterator may pause and return
    1705             : // IterAtLimit. Because limits are best-effort, NextWithLimit may return a key
    1706             : // beyond limit.
    1707             : //
    1708             : // If the Iterator is configured to iterate over range keys, NextWithLimit
    1709             : // guarantees it will surface any range keys with bounds overlapping the
    1710             : // keyspace up to limit.
    1711           2 : func (i *Iterator) NextWithLimit(limit []byte) IterValidityState {
    1712           2 :         return i.nextWithLimit(limit)
    1713           2 : }
    1714             : 
    1715             : // NextPrefix moves the iterator to the next key/value pair with a key
    1716             : // containing a different prefix than the current key. Prefixes are determined
    1717             : // by Comparer.Split. Exhausts the iterator if invoked while in prefix-iteration
    1718             : // mode.
    1719             : //
    1720             : // It is not permitted to invoke NextPrefix while at a IterAtLimit position.
    1721             : // When called in this condition, NextPrefix has non-deterministic behavior.
    1722             : //
    1723             : // It is not permitted to invoke NextPrefix when the Iterator has an
    1724             : // upper-bound that is a versioned MVCC key (see the comment for
    1725             : // Comparer.Split). It returns an error in this case.
    1726           2 : func (i *Iterator) NextPrefix() bool {
    1727           2 :         if i.nextPrefixNotPermittedByUpperBound {
    1728           2 :                 i.lastPositioningOp = unknownLastPositionOp
    1729           2 :                 i.requiresReposition = false
    1730           2 :                 i.err = errors.Errorf("NextPrefix not permitted with upper bound %s",
    1731           2 :                         i.comparer.FormatKey(i.opts.UpperBound))
    1732           2 :                 i.iterValidityState = IterExhausted
    1733           2 :                 return false
    1734           2 :         }
    1735           2 :         if i.hasPrefix {
    1736           2 :                 i.iterValidityState = IterExhausted
    1737           2 :                 return false
    1738           2 :         }
    1739           2 :         if i.Error() != nil {
    1740           2 :                 return false
    1741           2 :         }
    1742           2 :         return i.nextPrefix() == IterValid
    1743             : }
    1744             : 
    1745           2 : func (i *Iterator) nextPrefix() IterValidityState {
    1746           2 :         if i.rangeKey != nil {
    1747           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1748           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1749           2 :                 // If we have a range key but did not expose it at the previous iterator
    1750           2 :                 // position (because the iterator was not at a valid position), updated
    1751           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1752           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1753           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1754           2 :                 //   - NextWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    1755           2 :                 // the iterator returns RangeKeyChanged()=true.
    1756           2 :                 //
    1757           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1758           2 :                 // the iterator moves into a new range key, or out of the current range
    1759           2 :                 // key.
    1760           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1761           2 :         }
    1762             : 
    1763             :         // Although NextPrefix documents that behavior at IterAtLimit is undefined,
    1764             :         // this function handles these cases as a simple prefix-agnostic Next. This
    1765             :         // is done for deterministic behavior in the metamorphic tests.
    1766             :         //
    1767             :         // TODO(jackson): If the metamorphic test operation generator is adjusted to
    1768             :         // make generation of some operations conditional on the previous
    1769             :         // operations, then we can remove this behavior and explicitly error.
    1770             : 
    1771           2 :         i.lastPositioningOp = unknownLastPositionOp
    1772           2 :         i.requiresReposition = false
    1773           2 :         switch i.pos {
    1774           2 :         case iterPosCurForward:
    1775           2 :                 // Positioned on the current key. Advance to the next prefix.
    1776           2 :                 i.internalNextPrefix(i.comparer.Split(i.key))
    1777           2 :         case iterPosCurForwardPaused:
    1778             :                 // Positioned at a limit. Implement as a prefix-agnostic Next. See TODO
    1779             :                 // up above. The iterator is already positioned at the next key.
    1780           2 :         case iterPosCurReverse:
    1781           2 :                 // Switching directions.
    1782           2 :                 // Unless the iterator was exhausted, reverse iteration needs to
    1783           2 :                 // position the iterator at iterPosPrev.
    1784           2 :                 if i.iterKV != nil {
    1785           0 :                         i.err = errors.New("switching from reverse to forward but iter is not at prev")
    1786           0 :                         i.iterValidityState = IterExhausted
    1787           0 :                         return i.iterValidityState
    1788           0 :                 }
    1789             :                 // The Iterator is exhausted and i.iter is positioned before the first
    1790             :                 // key. Reposition to point to the first internal key.
    1791           2 :                 if i.err = i.iterFirstWithinBounds(); i.err != nil {
    1792           0 :                         i.iterValidityState = IterExhausted
    1793           0 :                         return i.iterValidityState
    1794           0 :                 }
    1795           2 :         case iterPosCurReversePaused:
    1796           2 :                 // Positioned at a limit. Implement as a prefix-agnostic Next. See TODO
    1797           2 :                 // up above.
    1798           2 :                 //
    1799           2 :                 // Switching directions; The iterator must not be exhausted since it
    1800           2 :                 // paused.
    1801           2 :                 if i.iterKV == nil {
    1802           0 :                         i.err = errors.New("switching paused from reverse to forward but iter is exhausted")
    1803           0 :                         i.iterValidityState = IterExhausted
    1804           0 :                         return i.iterValidityState
    1805           0 :                 }
    1806           2 :                 i.nextUserKey()
    1807           2 :         case iterPosPrev:
    1808           2 :                 // The underlying iterator is pointed to the previous key (this can
    1809           2 :                 // only happen when switching iteration directions).
    1810           2 :                 if i.iterKV == nil {
    1811           2 :                         // We're positioned before the first key. Need to reposition to point to
    1812           2 :                         // the first key.
    1813           2 :                         i.err = i.iterFirstWithinBounds()
    1814           2 :                         if i.iterKV == nil {
    1815           0 :                                 i.iterValidityState = IterExhausted
    1816           0 :                                 return i.iterValidityState
    1817           0 :                         }
    1818           2 :                         if invariants.Enabled && !i.equal(i.iterKV.K.UserKey, i.key) {
    1819           0 :                                 i.opts.getLogger().Fatalf("pebble: invariant violation: First internal iterator from iterPosPrev landed on %q, not %q",
    1820           0 :                                         i.iterKV.K.UserKey, i.key)
    1821           0 :                         }
    1822           2 :                 } else {
    1823           2 :                         // Move the internal iterator back onto the user key stored in
    1824           2 :                         // i.key. iterPosPrev guarantees that it's positioned at the last
    1825           2 :                         // key with the user key less than i.key, so we're guaranteed to
    1826           2 :                         // land on the correct key with a single Next.
    1827           2 :                         i.iterKV = i.iter.Next()
    1828           2 :                         if i.iterKV == nil {
    1829           1 :                                 // This should only be possible if i.iter.Next() encountered an
    1830           1 :                                 // error.
    1831           1 :                                 if i.iter.Error() == nil {
    1832           0 :                                         i.opts.getLogger().Fatalf("pebble: invariant violation: Nexting internal iterator from iterPosPrev found nothing")
    1833           0 :                                 }
    1834             :                                 // NB: Iterator.Error() will return i.iter.Error().
    1835           1 :                                 i.iterValidityState = IterExhausted
    1836           1 :                                 return i.iterValidityState
    1837             :                         }
    1838           2 :                         if invariants.Enabled && !i.equal(i.iterKV.K.UserKey, i.key) {
    1839           0 :                                 i.opts.getLogger().Fatalf("pebble: invariant violation: Nexting internal iterator from iterPosPrev landed on %q, not %q",
    1840           0 :                                         i.iterKV.K.UserKey, i.key)
    1841           0 :                         }
    1842             :                 }
    1843             :                 // The internal iterator is now positioned at i.key. Advance to the next
    1844             :                 // prefix.
    1845           2 :                 i.internalNextPrefix(i.comparer.Split(i.key))
    1846           2 :         case iterPosNext:
    1847           2 :                 // Already positioned on the next key. Only call nextPrefixKey if the
    1848           2 :                 // next key shares the same prefix.
    1849           2 :                 if i.iterKV != nil {
    1850           2 :                         currKeyPrefixLen := i.comparer.Split(i.key)
    1851           2 :                         if bytes.Equal(i.comparer.Split.Prefix(i.iterKV.K.UserKey), i.key[:currKeyPrefixLen]) {
    1852           2 :                                 i.internalNextPrefix(currKeyPrefixLen)
    1853           2 :                         }
    1854             :                 }
    1855             :         }
    1856             : 
    1857           2 :         i.stats.ForwardStepCount[InterfaceCall]++
    1858           2 :         i.findNextEntry(nil /* limit */)
    1859           2 :         i.maybeSampleRead()
    1860           2 :         return i.iterValidityState
    1861             : }
    1862             : 
    1863           2 : func (i *Iterator) internalNextPrefix(currKeyPrefixLen int) {
    1864           2 :         if i.iterKV == nil {
    1865           2 :                 return
    1866           2 :         }
    1867             :         // The Next "fast-path" is not really a fast-path when there is more than
    1868             :         // one version. However, even with TableFormatPebblev3, there is a small
    1869             :         // slowdown (~10%) for one version if we remove it and only call NextPrefix.
    1870             :         // When there are two versions, only calling NextPrefix is ~30% faster.
    1871           2 :         i.stats.ForwardStepCount[InternalIterCall]++
    1872           2 :         if i.iterKV = i.iter.Next(); i.iterKV == nil {
    1873           2 :                 return
    1874           2 :         }
    1875           2 :         if !bytes.Equal(i.comparer.Split.Prefix(i.iterKV.K.UserKey), i.key[:currKeyPrefixLen]) {
    1876           2 :                 return
    1877           2 :         }
    1878           2 :         i.stats.ForwardStepCount[InternalIterCall]++
    1879           2 :         i.prefixOrFullSeekKey = i.comparer.ImmediateSuccessor(i.prefixOrFullSeekKey[:0], i.key[:currKeyPrefixLen])
    1880           2 :         if i.iterKV.K.IsExclusiveSentinel() {
    1881           0 :                 panic(errors.AssertionFailedf("pebble: unexpected exclusive sentinel key: %q", i.iterKV.K))
    1882             :         }
    1883             : 
    1884           2 :         i.iterKV = i.iter.NextPrefix(i.prefixOrFullSeekKey)
    1885           2 :         if invariants.Enabled && i.iterKV != nil {
    1886           2 :                 if p := i.comparer.Split.Prefix(i.iterKV.K.UserKey); i.cmp(p, i.prefixOrFullSeekKey) < 0 {
    1887           0 :                         panic(errors.AssertionFailedf("pebble: iter.NextPrefix did not advance beyond the current prefix: now at %q; expected to be geq %q",
    1888           0 :                                 i.iterKV.K, i.prefixOrFullSeekKey))
    1889             :                 }
    1890             :         }
    1891             : }
    1892             : 
    1893           2 : func (i *Iterator) nextWithLimit(limit []byte) IterValidityState {
    1894           2 :         i.stats.ForwardStepCount[InterfaceCall]++
    1895           2 :         if i.hasPrefix {
    1896           2 :                 if limit != nil {
    1897           2 :                         i.err = errors.New("cannot use limit with prefix iteration")
    1898           2 :                         i.iterValidityState = IterExhausted
    1899           2 :                         return i.iterValidityState
    1900           2 :                 } else if i.iterValidityState == IterExhausted {
    1901           2 :                         // No-op, already exhausted. We avoid executing the Next because it
    1902           2 :                         // can break invariants: Specifically, a file that fails the bloom
    1903           2 :                         // filter test may result in its level being removed from the
    1904           2 :                         // merging iterator. The level's removal can cause a lazy combined
    1905           2 :                         // iterator to miss range keys and trigger a switch to combined
    1906           2 :                         // iteration at a larger key, breaking keyspan invariants.
    1907           2 :                         return i.iterValidityState
    1908           2 :                 }
    1909             :         }
    1910           2 :         if i.err != nil {
    1911           2 :                 return i.iterValidityState
    1912           2 :         }
    1913           2 :         if i.rangeKey != nil {
    1914           2 :                 // NB: Check Valid() before clearing requiresReposition.
    1915           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1916           2 :                 // If we have a range key but did not expose it at the previous iterator
    1917           2 :                 // position (because the iterator was not at a valid position), updated
    1918           2 :                 // must be true. This ensures that after an iterator op sequence like:
    1919           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1920           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1921           2 :                 //   - NextWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    1922           2 :                 // the iterator returns RangeKeyChanged()=true.
    1923           2 :                 //
    1924           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    1925           2 :                 // the iterator moves into a new range key, or out of the current range
    1926           2 :                 // key.
    1927           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1928           2 :         }
    1929           2 :         i.lastPositioningOp = unknownLastPositionOp
    1930           2 :         i.requiresReposition = false
    1931           2 :         switch i.pos {
    1932           2 :         case iterPosCurForward:
    1933           2 :                 i.nextUserKey()
    1934           2 :         case iterPosCurForwardPaused:
    1935             :                 // Already at the right place.
    1936           2 :         case iterPosCurReverse:
    1937           2 :                 // Switching directions.
    1938           2 :                 // Unless the iterator was exhausted, reverse iteration needs to
    1939           2 :                 // position the iterator at iterPosPrev.
    1940           2 :                 if i.iterKV != nil {
    1941           0 :                         i.err = errors.New("switching from reverse to forward but iter is not at prev")
    1942           0 :                         i.iterValidityState = IterExhausted
    1943           0 :                         return i.iterValidityState
    1944           0 :                 }
    1945             :                 // We're positioned before the first key. Need to reposition to point to
    1946             :                 // the first key.
    1947           2 :                 if i.err = i.iterFirstWithinBounds(); i.err != nil {
    1948           1 :                         i.iterValidityState = IterExhausted
    1949           1 :                         return i.iterValidityState
    1950           1 :                 }
    1951           2 :         case iterPosCurReversePaused:
    1952           2 :                 // Switching directions.
    1953           2 :                 // The iterator must not be exhausted since it paused.
    1954           2 :                 if i.iterKV == nil {
    1955           0 :                         i.err = errors.New("switching paused from reverse to forward but iter is exhausted")
    1956           0 :                         i.iterValidityState = IterExhausted
    1957           0 :                         return i.iterValidityState
    1958           0 :                 }
    1959           2 :                 i.nextUserKey()
    1960           2 :         case iterPosPrev:
    1961           2 :                 // The underlying iterator is pointed to the previous key (this can
    1962           2 :                 // only happen when switching iteration directions). We set
    1963           2 :                 // i.iterValidityState to IterExhausted here to force the calls to
    1964           2 :                 // nextUserKey to save the current key i.iter is pointing at in order
    1965           2 :                 // to determine when the next user-key is reached.
    1966           2 :                 i.iterValidityState = IterExhausted
    1967           2 :                 if i.iterKV == nil {
    1968           2 :                         // We're positioned before the first key. Need to reposition to point to
    1969           2 :                         // the first key.
    1970           2 :                         i.err = i.iterFirstWithinBounds()
    1971           2 :                 } else {
    1972           2 :                         i.nextUserKey()
    1973           2 :                 }
    1974           2 :                 if i.err != nil {
    1975           1 :                         i.iterValidityState = IterExhausted
    1976           1 :                         return i.iterValidityState
    1977           1 :                 }
    1978           2 :                 i.nextUserKey()
    1979           2 :         case iterPosNext:
    1980             :                 // Already at the right place.
    1981             :         }
    1982           2 :         i.findNextEntry(limit)
    1983           2 :         i.maybeSampleRead()
    1984           2 :         return i.iterValidityState
    1985             : }
    1986             : 
    1987             : // Prev moves the iterator to the previous key/value pair. Returns true if the
    1988             : // iterator is pointing at a valid entry and false otherwise.
    1989           2 : func (i *Iterator) Prev() bool {
    1990           2 :         return i.PrevWithLimit(nil) == IterValid
    1991           2 : }
    1992             : 
    1993             : // PrevWithLimit moves the iterator to the previous key/value pair.
    1994             : //
    1995             : // If limit is provided, it serves as a best-effort inclusive limit. If the
    1996             : // previous key is less than limit, the Iterator may pause and return
    1997             : // IterAtLimit. Because limits are best-effort, PrevWithLimit may return a key
    1998             : // beyond limit.
    1999             : //
    2000             : // If the Iterator is configured to iterate over range keys, PrevWithLimit
    2001             : // guarantees it will surface any range keys with bounds overlapping the
    2002             : // keyspace up to limit.
    2003           2 : func (i *Iterator) PrevWithLimit(limit []byte) IterValidityState {
    2004           2 :         i.stats.ReverseStepCount[InterfaceCall]++
    2005           2 :         if i.err != nil {
    2006           2 :                 return i.iterValidityState
    2007           2 :         }
    2008           2 :         if i.rangeKey != nil {
    2009           2 :                 // NB: Check Valid() before clearing requiresReposition.
    2010           2 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    2011           2 :                 // If we have a range key but did not expose it at the previous iterator
    2012           2 :                 // position (because the iterator was not at a valid position), updated
    2013           2 :                 // must be true. This ensures that after an iterator op sequence like:
    2014           2 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    2015           2 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    2016           2 :                 //   - PrevWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    2017           2 :                 // the iterator returns RangeKeyChanged()=true.
    2018           2 :                 //
    2019           2 :                 // The remainder of this function will only update i.rangeKey.updated if
    2020           2 :                 // the iterator moves into a new range key, or out of the current range
    2021           2 :                 // key.
    2022           2 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    2023           2 :         }
    2024           2 :         i.lastPositioningOp = unknownLastPositionOp
    2025           2 :         i.requiresReposition = false
    2026           2 :         if i.hasPrefix {
    2027           2 :                 i.err = errReversePrefixIteration
    2028           2 :                 i.iterValidityState = IterExhausted
    2029           2 :                 return i.iterValidityState
    2030           2 :         }
    2031           2 :         switch i.pos {
    2032           2 :         case iterPosCurForward:
    2033             :                 // Switching directions, and will handle this below.
    2034           2 :         case iterPosCurForwardPaused:
    2035             :                 // Switching directions, and will handle this below.
    2036           2 :         case iterPosCurReverse:
    2037           2 :                 i.prevUserKey()
    2038           2 :         case iterPosCurReversePaused:
    2039             :                 // Already at the right place.
    2040           2 :         case iterPosNext:
    2041             :                 // The underlying iterator is pointed to the next key (this can only happen
    2042             :                 // when switching iteration directions). We will handle this below.
    2043           2 :         case iterPosPrev:
    2044             :                 // Already at the right place.
    2045             :         }
    2046           2 :         if i.pos == iterPosCurForward || i.pos == iterPosNext || i.pos == iterPosCurForwardPaused {
    2047           2 :                 // Switching direction.
    2048           2 :                 stepAgain := i.pos == iterPosNext
    2049           2 : 
    2050           2 :                 // Synthetic range key markers are a special case. Consider SeekGE(b)
    2051           2 :                 // which finds a range key [a, c). To ensure the user observes the range
    2052           2 :                 // key, the Iterator pauses at Key() = b. The iterator must advance the
    2053           2 :                 // internal iterator to see if there's also a coincident point key at
    2054           2 :                 // 'b', leaving the iterator at iterPosNext if there's not.
    2055           2 :                 //
    2056           2 :                 // This is a problem: Synthetic range key markers are only interleaved
    2057           2 :                 // during the original seek. A subsequent Prev() of i.iter will not move
    2058           2 :                 // back onto the synthetic range key marker. In this case where the
    2059           2 :                 // previous iterator position was a synthetic range key start boundary,
    2060           2 :                 // we must not step a second time.
    2061           2 :                 if i.isEphemeralPosition() {
    2062           2 :                         stepAgain = false
    2063           2 :                 }
    2064             : 
    2065             :                 // We set i.iterValidityState to IterExhausted here to force the calls
    2066             :                 // to prevUserKey to save the current key i.iter is pointing at in
    2067             :                 // order to determine when the prev user-key is reached.
    2068           2 :                 i.iterValidityState = IterExhausted
    2069           2 :                 if i.iterKV == nil {
    2070           2 :                         // We're positioned after the last key. Need to reposition to point to
    2071           2 :                         // the last key.
    2072           2 :                         i.err = i.iterLastWithinBounds()
    2073           2 :                 } else {
    2074           2 :                         i.prevUserKey()
    2075           2 :                 }
    2076           2 :                 if i.err != nil {
    2077           1 :                         return i.iterValidityState
    2078           1 :                 }
    2079           2 :                 if stepAgain {
    2080           2 :                         i.prevUserKey()
    2081           2 :                         if i.err != nil {
    2082           0 :                                 return i.iterValidityState
    2083           0 :                         }
    2084             :                 }
    2085             :         }
    2086           2 :         i.findPrevEntry(limit)
    2087           2 :         i.maybeSampleRead()
    2088           2 :         return i.iterValidityState
    2089             : }
    2090             : 
    2091             : // iterFirstWithinBounds moves the internal iterator to the first key,
    2092             : // respecting bounds.
    2093           2 : func (i *Iterator) iterFirstWithinBounds() error {
    2094           2 :         i.stats.ForwardSeekCount[InternalIterCall]++
    2095           2 :         if lowerBound := i.opts.GetLowerBound(); lowerBound != nil {
    2096           2 :                 i.iterKV = i.iter.SeekGE(lowerBound, base.SeekGEFlagsNone)
    2097           2 :         } else {
    2098           2 :                 i.iterKV = i.iter.First()
    2099           2 :         }
    2100           2 :         if i.iterKV == nil {
    2101           2 :                 return i.iter.Error()
    2102           2 :         }
    2103           2 :         return nil
    2104             : }
    2105             : 
    2106             : // iterLastWithinBounds moves the internal iterator to the last key, respecting
    2107             : // bounds.
    2108           2 : func (i *Iterator) iterLastWithinBounds() error {
    2109           2 :         i.stats.ReverseSeekCount[InternalIterCall]++
    2110           2 :         if upperBound := i.opts.GetUpperBound(); upperBound != nil {
    2111           2 :                 i.iterKV = i.iter.SeekLT(upperBound, base.SeekLTFlagsNone)
    2112           2 :         } else {
    2113           2 :                 i.iterKV = i.iter.Last()
    2114           2 :         }
    2115           2 :         if i.iterKV == nil {
    2116           2 :                 return i.iter.Error()
    2117           2 :         }
    2118           2 :         return nil
    2119             : }
    2120             : 
    2121             : // RangeKeyData describes a range key's data, set through RangeKeySet. The key
    2122             : // boundaries of the range key is provided by Iterator.RangeBounds.
    2123             : type RangeKeyData struct {
    2124             :         Suffix []byte
    2125             :         Value  []byte
    2126             : }
    2127             : 
    2128             : // rangeKeyWithinLimit is called during limited reverse iteration when
    2129             : // positioned over a key beyond the limit. If there exists a range key that lies
    2130             : // within the limit, the iterator must not pause in order to ensure the user has
    2131             : // an opportunity to observe the range key within limit.
    2132             : //
    2133             : // It would be valid to ignore the limit whenever there's a range key covering
    2134             : // the key, but that would introduce nondeterminism. To preserve determinism for
    2135             : // testing, the iterator ignores the limit only if the covering range key does
    2136             : // cover the keyspace within the limit.
    2137             : //
    2138             : // This awkwardness exists because range keys are interleaved at their inclusive
    2139             : // start positions. Note that limit is inclusive.
    2140           2 : func (i *Iterator) rangeKeyWithinLimit(limit []byte) bool {
    2141           2 :         if i.rangeKey == nil || !i.opts.rangeKeys() {
    2142           2 :                 return false
    2143           2 :         }
    2144           2 :         s := i.rangeKey.iiter.Span()
    2145           2 :         // If the range key ends beyond the limit, then the range key does not cover
    2146           2 :         // any portion of the keyspace within the limit and it is safe to pause.
    2147           2 :         return s != nil && i.cmp(s.End, limit) > 0
    2148             : }
    2149             : 
    2150             : // saveRangeKey saves the current range key to the underlying iterator's current
    2151             : // range key state. If the range key has not changed, saveRangeKey is a no-op.
    2152             : // If there is a new range key, saveRangeKey copies all of the key, value and
    2153             : // suffixes into Iterator-managed buffers.
    2154           2 : func (i *Iterator) saveRangeKey() {
    2155           2 :         if i.rangeKey == nil || i.opts.KeyTypes == IterKeyTypePointsOnly {
    2156           2 :                 return
    2157           2 :         }
    2158             : 
    2159           2 :         s := i.rangeKey.iiter.Span()
    2160           2 :         if s == nil {
    2161           2 :                 i.rangeKey.hasRangeKey = false
    2162           2 :                 i.rangeKey.updated = i.rangeKey.prevPosHadRangeKey
    2163           2 :                 return
    2164           2 :         } else if !i.rangeKey.stale {
    2165           2 :                 // The range key `s` is identical to the one currently saved. No-op.
    2166           2 :                 return
    2167           2 :         }
    2168             : 
    2169           2 :         if s.KeysOrder != keyspan.BySuffixAsc {
    2170           0 :                 panic("pebble: range key span's keys unexpectedly not in ascending suffix order")
    2171             :         }
    2172             : 
    2173             :         // Although `i.rangeKey.stale` is true, the span s may still be identical
    2174             :         // to the currently saved span. This is possible when seeking the iterator,
    2175             :         // which may land back on the same range key. If we previously had a range
    2176             :         // key and the new one has an identical start key, then it must be the same
    2177             :         // range key and we can avoid copying and keep `i.rangeKey.updated=false`.
    2178             :         //
    2179             :         // TODO(jackson): These key comparisons could be avoidable during relative
    2180             :         // positioning operations continuing in the same direction, because these
    2181             :         // ops will never encounter the previous position's range key while
    2182             :         // stale=true. However, threading whether the current op is a seek or step
    2183             :         // maybe isn't worth it. This key comparison is only necessary once when we
    2184             :         // step onto a new range key, which should be relatively rare.
    2185           2 :         if i.rangeKey.prevPosHadRangeKey && i.equal(i.rangeKey.start, s.Start) &&
    2186           2 :                 i.equal(i.rangeKey.end, s.End) {
    2187           2 :                 i.rangeKey.updated = false
    2188           2 :                 i.rangeKey.stale = false
    2189           2 :                 i.rangeKey.hasRangeKey = true
    2190           2 :                 return
    2191           2 :         }
    2192           2 :         i.stats.RangeKeyStats.Count += len(s.Keys)
    2193           2 :         i.rangeKey.buf.Reset()
    2194           2 :         i.rangeKey.hasRangeKey = true
    2195           2 :         i.rangeKey.updated = true
    2196           2 :         i.rangeKey.stale = false
    2197           2 :         i.rangeKey.buf, i.rangeKey.start = i.rangeKey.buf.Copy(s.Start)
    2198           2 :         i.rangeKey.buf, i.rangeKey.end = i.rangeKey.buf.Copy(s.End)
    2199           2 :         i.rangeKey.keys = i.rangeKey.keys[:0]
    2200           2 :         for j := 0; j < len(s.Keys); j++ {
    2201           2 :                 if invariants.Enabled {
    2202           2 :                         if s.Keys[j].Kind() != base.InternalKeyKindRangeKeySet {
    2203           0 :                                 panic("pebble: user iteration encountered non-RangeKeySet key kind")
    2204           2 :                         } else if j > 0 && i.comparer.CompareSuffixes(s.Keys[j].Suffix, s.Keys[j-1].Suffix) < 0 {
    2205           0 :                                 panic("pebble: user iteration encountered range keys not in suffix order")
    2206             :                         }
    2207             :                 }
    2208           2 :                 var rkd RangeKeyData
    2209           2 :                 i.rangeKey.buf, rkd.Suffix = i.rangeKey.buf.Copy(s.Keys[j].Suffix)
    2210           2 :                 i.rangeKey.buf, rkd.Value = i.rangeKey.buf.Copy(s.Keys[j].Value)
    2211           2 :                 i.rangeKey.keys = append(i.rangeKey.keys, rkd)
    2212             :         }
    2213             : }
    2214             : 
    2215             : // RangeKeyChanged indicates whether the most recent iterator positioning
    2216             : // operation resulted in the iterator stepping into or out of a new range key.
    2217             : // If true, previously returned range key bounds and data has been invalidated.
    2218             : // If false, previously obtained range key bounds, suffix and value slices are
    2219             : // still valid and may continue to be read.
    2220             : //
    2221             : // Invalid iterator positions are considered to not hold range keys, meaning
    2222             : // that if an iterator steps from an IterExhausted or IterAtLimit position onto
    2223             : // a position with a range key, RangeKeyChanged will yield true.
    2224           2 : func (i *Iterator) RangeKeyChanged() bool {
    2225           2 :         return i.iterValidityState == IterValid && i.rangeKey != nil && i.rangeKey.updated
    2226           2 : }
    2227             : 
    2228             : // HasPointAndRange indicates whether there exists a point key, a range key or
    2229             : // both at the current iterator position.
    2230           2 : func (i *Iterator) HasPointAndRange() (hasPoint, hasRange bool) {
    2231           2 :         if i.iterValidityState != IterValid || i.requiresReposition {
    2232           1 :                 return false, false
    2233           1 :         }
    2234           2 :         if i.opts.KeyTypes == IterKeyTypePointsOnly {
    2235           2 :                 return true, false
    2236           2 :         }
    2237           2 :         return i.rangeKey == nil || !i.rangeKey.rangeKeyOnly, i.rangeKey != nil && i.rangeKey.hasRangeKey
    2238             : }
    2239             : 
    2240             : // RangeBounds returns the start (inclusive) and end (exclusive) bounds of the
    2241             : // range key covering the current iterator position. RangeBounds returns nil
    2242             : // bounds if there is no range key covering the current iterator position, or
    2243             : // the iterator is not configured to surface range keys.
    2244             : //
    2245             : // If valid, the returned start bound is less than or equal to Key() and the
    2246             : // returned end bound is greater than Key().
    2247           2 : func (i *Iterator) RangeBounds() (start, end []byte) {
    2248           2 :         if i.rangeKey == nil || !i.opts.rangeKeys() || !i.rangeKey.hasRangeKey {
    2249           0 :                 return nil, nil
    2250           0 :         }
    2251           2 :         return i.rangeKey.start, i.rangeKey.end
    2252             : }
    2253             : 
    2254             : // Key returns the key of the current key/value pair, or nil if done. The
    2255             : // caller should not modify the contents of the returned slice, and its
    2256             : // contents may change on the next call to Next.
    2257             : //
    2258             : // If positioned at an iterator position that only holds a range key, Key()
    2259             : // always returns the start bound of the range key. Otherwise, it returns the
    2260             : // point key's key.
    2261           2 : func (i *Iterator) Key() []byte {
    2262           2 :         return i.key
    2263           2 : }
    2264             : 
    2265             : // Value returns the value of the current key/value pair, or nil if done. The
    2266             : // caller should not modify the contents of the returned slice, and its
    2267             : // contents may change on the next call to Next.
    2268             : //
    2269             : // Only valid if HasPointAndRange() returns true for hasPoint.
    2270             : // Deprecated: use ValueAndErr instead.
    2271           2 : func (i *Iterator) Value() []byte {
    2272           2 :         val, _ := i.ValueAndErr()
    2273           2 :         return val
    2274           2 : }
    2275             : 
    2276             : // ValueAndErr returns the value, and any error encountered in extracting the value.
    2277             : // REQUIRES: i.Error()==nil and HasPointAndRange() returns true for hasPoint.
    2278             : //
    2279             : // The caller should not modify the contents of the returned slice, and its
    2280             : // contents may change on the next call to Next.
    2281           2 : func (i *Iterator) ValueAndErr() ([]byte, error) {
    2282           2 :         val, callerOwned, err := i.value.Value(i.lazyValueBuf)
    2283           2 :         if err != nil {
    2284           1 :                 i.err = err
    2285           1 :                 i.iterValidityState = IterExhausted
    2286           1 :         }
    2287           2 :         if callerOwned {
    2288           1 :                 i.lazyValueBuf = val[:0]
    2289           1 :         }
    2290           2 :         return val, err
    2291             : }
    2292             : 
    2293             : // LazyValue returns the LazyValue. Only for advanced use cases.
    2294             : // REQUIRES: i.Error()==nil and HasPointAndRange() returns true for hasPoint.
    2295           0 : func (i *Iterator) LazyValue() LazyValue {
    2296           0 :         return i.value
    2297           0 : }
    2298             : 
    2299             : // RangeKeys returns the range key values and their suffixes covering the
    2300             : // current iterator position. The range bounds may be retrieved separately
    2301             : // through Iterator.RangeBounds().
    2302           2 : func (i *Iterator) RangeKeys() []RangeKeyData {
    2303           2 :         if i.rangeKey == nil || !i.opts.rangeKeys() || !i.rangeKey.hasRangeKey {
    2304           0 :                 return nil
    2305           0 :         }
    2306           2 :         return i.rangeKey.keys
    2307             : }
    2308             : 
    2309             : // Valid returns true if the iterator is positioned at a valid key/value pair
    2310             : // and false otherwise.
    2311           2 : func (i *Iterator) Valid() bool {
    2312           2 :         valid := i.iterValidityState == IterValid && !i.requiresReposition
    2313           2 :         if invariants.Enabled {
    2314           2 :                 if err := i.Error(); valid && err != nil {
    2315           0 :                         panic(errors.AssertionFailedf("pebble: iterator is valid with non-nil Error: %+v", err))
    2316             :                 }
    2317             :         }
    2318           2 :         return valid
    2319             : }
    2320             : 
    2321             : // Error returns any accumulated error.
    2322           2 : func (i *Iterator) Error() error {
    2323           2 :         if i.err != nil {
    2324           2 :                 return i.err
    2325           2 :         }
    2326           2 :         if i.iter != nil {
    2327           2 :                 return i.iter.Error()
    2328           2 :         }
    2329           0 :         return nil
    2330             : }
    2331             : 
    2332             : const maxKeyBufCacheSize = 4 << 10 // 4 KB
    2333             : 
    2334             : // Close closes the iterator and returns any accumulated error. Exhausting
    2335             : // all the key/value pairs in a table is not considered to be an error.
    2336             : // It is not valid to call any method, including Close, after the iterator
    2337             : // has been closed.
    2338           2 : func (i *Iterator) Close() error {
    2339           2 :         // Close the child iterator before releasing the readState because when the
    2340           2 :         // readState is released sstables referenced by the readState may be deleted
    2341           2 :         // which will fail on Windows if the sstables are still open by the child
    2342           2 :         // iterator.
    2343           2 :         if i.iter != nil {
    2344           2 :                 i.err = firstError(i.err, i.iter.Close())
    2345           2 : 
    2346           2 :                 // Closing i.iter did not necessarily close the point and range key
    2347           2 :                 // iterators. Calls to SetOptions may have 'disconnected' either one
    2348           2 :                 // from i.iter if iteration key types were changed. Both point and range
    2349           2 :                 // key iterators are preserved in case the iterator needs to switch key
    2350           2 :                 // types again. We explicitly close both of these iterators here.
    2351           2 :                 //
    2352           2 :                 // NB: If the iterators were still connected to i.iter, they may be
    2353           2 :                 // closed, but calling Close on a closed internal iterator or fragment
    2354           2 :                 // iterator is allowed.
    2355           2 :                 if i.pointIter != nil {
    2356           2 :                         i.err = firstError(i.err, i.pointIter.Close())
    2357           2 :                 }
    2358           2 :                 if i.rangeKey != nil && i.rangeKey.rangeKeyIter != nil {
    2359           2 :                         i.rangeKey.rangeKeyIter.Close()
    2360           2 :                 }
    2361             :         }
    2362           2 :         err := i.err
    2363           2 : 
    2364           2 :         if i.readState != nil {
    2365           2 :                 if i.readSampling.pendingCompactions.size > 0 {
    2366           1 :                         // Copy pending read compactions using db.mu.Lock()
    2367           1 :                         i.readState.db.mu.Lock()
    2368           1 :                         i.readState.db.mu.compact.readCompactions.combine(&i.readSampling.pendingCompactions, i.cmp)
    2369           1 :                         reschedule := i.readState.db.mu.compact.rescheduleReadCompaction
    2370           1 :                         i.readState.db.mu.compact.rescheduleReadCompaction = false
    2371           1 :                         concurrentCompactions := i.readState.db.mu.compact.compactingCount
    2372           1 :                         i.readState.db.mu.Unlock()
    2373           1 : 
    2374           1 :                         if reschedule && concurrentCompactions == 0 {
    2375           0 :                                 // In a read heavy workload, flushes may not happen frequently enough to
    2376           0 :                                 // schedule compactions.
    2377           0 :                                 i.readState.db.compactionSchedulers.Add(1)
    2378           0 :                                 go i.readState.db.maybeScheduleCompactionAsync()
    2379           0 :                         }
    2380             :                 }
    2381             : 
    2382           2 :                 i.readState.unref()
    2383           2 :                 i.readState = nil
    2384             :         }
    2385             : 
    2386           2 :         if i.version != nil {
    2387           2 :                 i.version.Unref()
    2388           2 :         }
    2389             : 
    2390           2 :         for _, readers := range i.externalReaders {
    2391           1 :                 for _, r := range readers {
    2392           1 :                         err = firstError(err, r.Close())
    2393           1 :                 }
    2394             :         }
    2395             : 
    2396             :         // Close the closer for the current value if one was open.
    2397           2 :         if i.valueCloser != nil {
    2398           1 :                 err = firstError(err, i.valueCloser.Close())
    2399           1 :                 i.valueCloser = nil
    2400           1 :         }
    2401             : 
    2402           2 :         if i.rangeKey != nil {
    2403           2 : 
    2404           2 :                 i.rangeKey.rangeKeyBuffers.PrepareForReuse()
    2405           2 :                 *i.rangeKey = iteratorRangeKeyState{
    2406           2 :                         rangeKeyBuffers: i.rangeKey.rangeKeyBuffers,
    2407           2 :                 }
    2408           2 :                 iterRangeKeyStateAllocPool.Put(i.rangeKey)
    2409           2 :                 i.rangeKey = nil
    2410           2 :         }
    2411           2 :         if alloc := i.alloc; alloc != nil {
    2412           2 :                 var (
    2413           2 :                         keyBuf               []byte
    2414           2 :                         boundsBuf            [2][]byte
    2415           2 :                         prefixOrFullSeekKey  []byte
    2416           2 :                         mergingIterHeapItems []*mergingIterLevel
    2417           2 :                 )
    2418           2 : 
    2419           2 :                 // Avoid caching the key buf if it is overly large. The constant is fairly
    2420           2 :                 // arbitrary.
    2421           2 :                 if cap(i.keyBuf) < maxKeyBufCacheSize {
    2422           2 :                         keyBuf = i.keyBuf
    2423           2 :                 }
    2424           2 :                 if cap(i.prefixOrFullSeekKey) < maxKeyBufCacheSize {
    2425           2 :                         prefixOrFullSeekKey = i.prefixOrFullSeekKey
    2426           2 :                 }
    2427           2 :                 for j := range i.boundsBuf {
    2428           2 :                         if cap(i.boundsBuf[j]) < maxKeyBufCacheSize {
    2429           2 :                                 boundsBuf[j] = i.boundsBuf[j]
    2430           2 :                         }
    2431             :                 }
    2432           2 :                 mergingIterHeapItems = alloc.merging.heap.items
    2433           2 :                 *alloc = iterAlloc{
    2434           2 :                         keyBuf:              keyBuf,
    2435           2 :                         boundsBuf:           boundsBuf,
    2436           2 :                         prefixOrFullSeekKey: prefixOrFullSeekKey,
    2437           2 :                         merging: mergingIter{
    2438           2 :                                 heap: mergingIterHeap{items: mergingIterHeapItems},
    2439           2 :                         },
    2440           2 :                 }
    2441           2 :                 iterAllocPool.Put(alloc)
    2442           2 :         } else if alloc := i.getIterAlloc; alloc != nil {
    2443           2 :                 if cap(i.keyBuf) >= maxKeyBufCacheSize {
    2444           0 :                         alloc.keyBuf = nil
    2445           2 :                 } else {
    2446           2 :                         alloc.keyBuf = i.keyBuf
    2447           2 :                 }
    2448           2 :                 *alloc = getIterAlloc{
    2449           2 :                         keyBuf: alloc.keyBuf,
    2450           2 :                 }
    2451           2 :                 getIterAllocPool.Put(alloc)
    2452             :         }
    2453           2 :         return err
    2454             : }
    2455             : 
    2456             : // SetBounds sets the lower and upper bounds for the iterator. Once SetBounds
    2457             : // returns, the caller is free to mutate the provided slices.
    2458             : //
    2459             : // The iterator will always be invalidated and must be repositioned with a call
    2460             : // to SeekGE, SeekPrefixGE, SeekLT, First, or Last.
    2461           2 : func (i *Iterator) SetBounds(lower, upper []byte) {
    2462           2 :         // Ensure that the Iterator appears exhausted, regardless of whether we
    2463           2 :         // actually have to invalidate the internal iterator. Optimizations that
    2464           2 :         // avoid exhaustion are an internal implementation detail that shouldn't
    2465           2 :         // leak through the interface. The caller should still call an absolute
    2466           2 :         // positioning method to reposition the iterator.
    2467           2 :         i.requiresReposition = true
    2468           2 : 
    2469           2 :         if ((i.opts.LowerBound == nil) == (lower == nil)) &&
    2470           2 :                 ((i.opts.UpperBound == nil) == (upper == nil)) &&
    2471           2 :                 i.equal(i.opts.LowerBound, lower) &&
    2472           2 :                 i.equal(i.opts.UpperBound, upper) {
    2473           2 :                 // Unchanged, noop.
    2474           2 :                 return
    2475           2 :         }
    2476             : 
    2477             :         // Copy the user-provided bounds into an Iterator-owned buffer, and set them
    2478             :         // on i.opts.{Lower,Upper}Bound.
    2479           2 :         i.processBounds(lower, upper)
    2480           2 : 
    2481           2 :         i.iter.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2482           2 :         // If the iterator has an open point iterator that's not currently being
    2483           2 :         // used, propagate the new bounds to it.
    2484           2 :         if i.pointIter != nil && !i.opts.pointKeys() {
    2485           2 :                 i.pointIter.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2486           2 :         }
    2487             :         // If the iterator has a range key iterator, propagate bounds to it. The
    2488             :         // top-level SetBounds on the interleaving iterator (i.iter) won't propagate
    2489             :         // bounds to the range key iterator stack, because the FragmentIterator
    2490             :         // interface doesn't define a SetBounds method. We need to directly inform
    2491             :         // the iterConfig stack.
    2492           2 :         if i.rangeKey != nil {
    2493           2 :                 i.rangeKey.iterConfig.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2494           2 :         }
    2495             : 
    2496             :         // Even though this is not a positioning operation, the alteration of the
    2497             :         // bounds means we cannot optimize Seeks by using Next.
    2498           2 :         i.invalidate()
    2499             : }
    2500             : 
    2501             : // SetContext replaces the context provided at iterator creation, or the last
    2502             : // one provided by SetContext. Even though iterators are expected to be
    2503             : // short-lived, there are some cases where either (a) iterators are used far
    2504             : // from the code that created them, (b) iterators are reused (while being
    2505             : // short-lived) for processing different requests. For such scenarios, we
    2506             : // allow the caller to replace the context.
    2507           0 : func (i *Iterator) SetContext(ctx context.Context) {
    2508           0 :         i.ctx = ctx
    2509           0 :         i.iter.SetContext(ctx)
    2510           0 :         // If the iterator has an open point iterator that's not currently being
    2511           0 :         // used, propagate the new context to it.
    2512           0 :         if i.pointIter != nil && !i.opts.pointKeys() {
    2513           0 :                 i.pointIter.SetContext(i.ctx)
    2514           0 :         }
    2515             : }
    2516             : 
    2517             : // Initialization and changing of the bounds must call processBounds.
    2518             : // processBounds saves the bounds and computes derived state from those
    2519             : // bounds.
    2520           2 : func (i *Iterator) processBounds(lower, upper []byte) {
    2521           2 :         // Copy the user-provided bounds into an Iterator-owned buffer. We can't
    2522           2 :         // overwrite the current bounds, because some internal iterators compare old
    2523           2 :         // and new bounds for optimizations.
    2524           2 : 
    2525           2 :         buf := i.boundsBuf[i.boundsBufIdx][:0]
    2526           2 :         if lower != nil {
    2527           2 :                 buf = append(buf, lower...)
    2528           2 :                 i.opts.LowerBound = buf
    2529           2 :         } else {
    2530           2 :                 i.opts.LowerBound = nil
    2531           2 :         }
    2532           2 :         i.nextPrefixNotPermittedByUpperBound = false
    2533           2 :         if upper != nil {
    2534           2 :                 buf = append(buf, upper...)
    2535           2 :                 i.opts.UpperBound = buf[len(buf)-len(upper):]
    2536           2 :                 if i.comparer.Split(i.opts.UpperBound) != len(i.opts.UpperBound) {
    2537           2 :                         // Setting an upper bound that is a versioned MVCC key. This means
    2538           2 :                         // that a key can have some MVCC versions before the upper bound and
    2539           2 :                         // some after. This causes significant complications for NextPrefix,
    2540           2 :                         // so we bar the user of NextPrefix.
    2541           2 :                         i.nextPrefixNotPermittedByUpperBound = true
    2542           2 :                 }
    2543           2 :         } else {
    2544           2 :                 i.opts.UpperBound = nil
    2545           2 :         }
    2546           2 :         i.boundsBuf[i.boundsBufIdx] = buf
    2547           2 :         i.boundsBufIdx = 1 - i.boundsBufIdx
    2548             : }
    2549             : 
    2550             : // SetOptions sets new iterator options for the iterator. Note that the lower
    2551             : // and upper bounds applied here will supersede any bounds set by previous calls
    2552             : // to SetBounds.
    2553             : //
    2554             : // Note that the slices provided in this SetOptions must not be changed by the
    2555             : // caller until the iterator is closed, or a subsequent SetBounds or SetOptions
    2556             : // has returned. This is because comparisons between the existing and new bounds
    2557             : // are sometimes used to optimize seeking. See the extended commentary on
    2558             : // SetBounds.
    2559             : //
    2560             : // If the iterator was created over an indexed mutable batch, the iterator's
    2561             : // view of the mutable batch is refreshed.
    2562             : //
    2563             : // The iterator will always be invalidated and must be repositioned with a call
    2564             : // to SeekGE, SeekPrefixGE, SeekLT, First, or Last.
    2565             : //
    2566             : // If only lower and upper bounds need to be modified, prefer SetBounds.
    2567           2 : func (i *Iterator) SetOptions(o *IterOptions) {
    2568           2 :         if i.externalReaders != nil {
    2569           1 :                 if err := validateExternalIterOpts(o); err != nil {
    2570           0 :                         panic(err)
    2571             :                 }
    2572             :         }
    2573             : 
    2574             :         // Ensure that the Iterator appears exhausted, regardless of whether we
    2575             :         // actually have to invalidate the internal iterator. Optimizations that
    2576             :         // avoid exhaustion are an internal implementation detail that shouldn't
    2577             :         // leak through the interface. The caller should still call an absolute
    2578             :         // positioning method to reposition the iterator.
    2579           2 :         i.requiresReposition = true
    2580           2 : 
    2581           2 :         // Check if global state requires we close all internal iterators.
    2582           2 :         //
    2583           2 :         // If the Iterator is in an error state, invalidate the existing iterators
    2584           2 :         // so that we reconstruct an iterator state from scratch.
    2585           2 :         //
    2586           2 :         // If OnlyReadGuaranteedDurable changed, the iterator stacks are incorrect,
    2587           2 :         // improperly including or excluding memtables. Invalidate them so that
    2588           2 :         // finishInitializingIter will reconstruct them.
    2589           2 :         closeBoth := i.err != nil ||
    2590           2 :                 o.OnlyReadGuaranteedDurable != i.opts.OnlyReadGuaranteedDurable
    2591           2 : 
    2592           2 :         // If either options specify block property filters for an iterator stack,
    2593           2 :         // reconstruct it.
    2594           2 :         if i.pointIter != nil && (closeBoth || len(o.PointKeyFilters) > 0 || len(i.opts.PointKeyFilters) > 0 ||
    2595           2 :                 o.RangeKeyMasking.Filter != nil || i.opts.RangeKeyMasking.Filter != nil || o.SkipPoint != nil ||
    2596           2 :                 i.opts.SkipPoint != nil) {
    2597           2 :                 i.err = firstError(i.err, i.pointIter.Close())
    2598           2 :                 i.pointIter = nil
    2599           2 :         }
    2600           2 :         if i.rangeKey != nil {
    2601           2 :                 if closeBoth || len(o.RangeKeyFilters) > 0 || len(i.opts.RangeKeyFilters) > 0 {
    2602           2 :                         i.rangeKey.rangeKeyIter.Close()
    2603           2 :                         i.rangeKey = nil
    2604           2 :                 } else {
    2605           2 :                         // If there's still a range key iterator stack, invalidate the
    2606           2 :                         // iterator. This ensures RangeKeyChanged() returns true if a
    2607           2 :                         // subsequent positioning operation discovers a range key. It also
    2608           2 :                         // prevents seek no-op optimizations.
    2609           2 :                         i.invalidate()
    2610           2 :                 }
    2611             :         }
    2612             : 
    2613             :         // If the iterator is backed by a batch that's been mutated, refresh its
    2614             :         // existing point and range-key iterators, and invalidate the iterator to
    2615             :         // prevent seek-using-next optimizations. If we don't yet have a point-key
    2616             :         // iterator or range-key iterator but we require one, it'll be created in
    2617             :         // the slow path that reconstructs the iterator in finishInitializingIter.
    2618           2 :         if i.batch != nil {
    2619           2 :                 nextBatchSeqNum := (base.SeqNum(len(i.batch.data)) | base.SeqNumBatchBit)
    2620           2 :                 if nextBatchSeqNum != i.batchSeqNum {
    2621           2 :                         i.batchSeqNum = nextBatchSeqNum
    2622           2 :                         if i.merging != nil {
    2623           2 :                                 i.merging.batchSnapshot = nextBatchSeqNum
    2624           2 :                         }
    2625             :                         // Prevent a no-op seek optimization on the next seek. We won't be
    2626             :                         // able to reuse the top-level Iterator state, because it may be
    2627             :                         // incorrect after the inclusion of new batch mutations.
    2628           2 :                         i.batchJustRefreshed = true
    2629           2 :                         if i.pointIter != nil && i.batch.countRangeDels > 0 {
    2630           1 :                                 if i.batchRangeDelIter.Count() == 0 {
    2631           1 :                                         // When we constructed this iterator, there were no
    2632           1 :                                         // rangedels in the batch. Iterator construction will
    2633           1 :                                         // have excluded the batch rangedel iterator from the
    2634           1 :                                         // point iterator stack. We need to reconstruct the
    2635           1 :                                         // point iterator to add i.batchRangeDelIter into the
    2636           1 :                                         // iterator stack.
    2637           1 :                                         i.err = firstError(i.err, i.pointIter.Close())
    2638           1 :                                         i.pointIter = nil
    2639           1 :                                 } else {
    2640           1 :                                         // There are range deletions in the batch and we already
    2641           1 :                                         // have a batch rangedel iterator. We can update the
    2642           1 :                                         // batch rangedel iterator in place.
    2643           1 :                                         //
    2644           1 :                                         // NB: There may or may not be new range deletions. We
    2645           1 :                                         // can't tell based on i.batchRangeDelIter.Count(),
    2646           1 :                                         // which is the count of fragmented range deletions, NOT
    2647           1 :                                         // the number of range deletions written to the batch
    2648           1 :                                         // [i.batch.countRangeDels].
    2649           1 :                                         i.batch.initRangeDelIter(&i.opts, &i.batchRangeDelIter, nextBatchSeqNum)
    2650           1 :                                 }
    2651             :                         }
    2652           2 :                         if i.rangeKey != nil && i.batch.countRangeKeys > 0 {
    2653           1 :                                 if i.batchRangeKeyIter.Count() == 0 {
    2654           1 :                                         // When we constructed this iterator, there were no range
    2655           1 :                                         // keys in the batch. Iterator construction will have
    2656           1 :                                         // excluded the batch rangekey iterator from the range key
    2657           1 :                                         // iterator stack. We need to reconstruct the range key
    2658           1 :                                         // iterator to add i.batchRangeKeyIter into the iterator
    2659           1 :                                         // stack.
    2660           1 :                                         i.rangeKey.rangeKeyIter.Close()
    2661           1 :                                         i.rangeKey = nil
    2662           1 :                                 } else {
    2663           1 :                                         // There are range keys in the batch and we already
    2664           1 :                                         // have a batch rangekey iterator. We can update the batch
    2665           1 :                                         // rangekey iterator in place.
    2666           1 :                                         //
    2667           1 :                                         // NB: There may or may not be new range keys. We can't
    2668           1 :                                         // tell based on i.batchRangeKeyIter.Count(), which is the
    2669           1 :                                         // count of fragmented range keys, NOT the number of
    2670           1 :                                         // range keys written to the batch [i.batch.countRangeKeys].
    2671           1 :                                         i.batch.initRangeKeyIter(&i.opts, &i.batchRangeKeyIter, nextBatchSeqNum)
    2672           1 :                                         i.invalidate()
    2673           1 :                                 }
    2674             :                         }
    2675             :                 }
    2676             :         }
    2677             : 
    2678             :         // Reset combinedIterState.initialized in case the iterator key types
    2679             :         // changed. If there's already a range key iterator stack, the combined
    2680             :         // iterator is already initialized.  Additionally, if the iterator is not
    2681             :         // configured to include range keys, mark it as initialized to signal that
    2682             :         // lower level iterators should not trigger a switch to combined iteration.
    2683           2 :         i.lazyCombinedIter.combinedIterState = combinedIterState{
    2684           2 :                 initialized: i.rangeKey != nil || !i.opts.rangeKeys(),
    2685           2 :         }
    2686           2 : 
    2687           2 :         boundsEqual := ((i.opts.LowerBound == nil) == (o.LowerBound == nil)) &&
    2688           2 :                 ((i.opts.UpperBound == nil) == (o.UpperBound == nil)) &&
    2689           2 :                 i.equal(i.opts.LowerBound, o.LowerBound) &&
    2690           2 :                 i.equal(i.opts.UpperBound, o.UpperBound)
    2691           2 : 
    2692           2 :         if boundsEqual && o.KeyTypes == i.opts.KeyTypes &&
    2693           2 :                 (i.pointIter != nil || !i.opts.pointKeys()) &&
    2694           2 :                 (i.rangeKey != nil || !i.opts.rangeKeys() || i.opts.KeyTypes == IterKeyTypePointsAndRanges) &&
    2695           2 :                 i.comparer.CompareSuffixes(o.RangeKeyMasking.Suffix, i.opts.RangeKeyMasking.Suffix) == 0 &&
    2696           2 :                 o.UseL6Filters == i.opts.UseL6Filters {
    2697           2 :                 // The options are identical, so we can likely use the fast path. In
    2698           2 :                 // addition to all the above constraints, we cannot use the fast path if
    2699           2 :                 // configured to perform lazy combined iteration but an indexed batch
    2700           2 :                 // used by the iterator now contains range keys. Lazy combined iteration
    2701           2 :                 // is not compatible with batch range keys because we always need to
    2702           2 :                 // merge the batch's range keys into iteration.
    2703           2 :                 if i.rangeKey != nil || !i.opts.rangeKeys() || i.batch == nil || i.batch.countRangeKeys == 0 {
    2704           2 :                         // Fast path. This preserves the Seek-using-Next optimizations as
    2705           2 :                         // long as the iterator wasn't already invalidated up above.
    2706           2 :                         return
    2707           2 :                 }
    2708             :         }
    2709             :         // Slow path.
    2710             : 
    2711             :         // The options changed. Save the new ones to i.opts.
    2712           2 :         if boundsEqual {
    2713           2 :                 // Copying the options into i.opts will overwrite LowerBound and
    2714           2 :                 // UpperBound fields with the user-provided slices. We need to hold on
    2715           2 :                 // to the Pebble-owned slices, so save them and re-set them after the
    2716           2 :                 // copy.
    2717           2 :                 lower, upper := i.opts.LowerBound, i.opts.UpperBound
    2718           2 :                 i.opts = *o
    2719           2 :                 i.opts.LowerBound, i.opts.UpperBound = lower, upper
    2720           2 :         } else {
    2721           2 :                 i.opts = *o
    2722           2 :                 i.processBounds(o.LowerBound, o.UpperBound)
    2723           2 :                 // Propagate the changed bounds to the existing point iterator.
    2724           2 :                 // NB: We propagate i.opts.{Lower,Upper}Bound, not o.{Lower,Upper}Bound
    2725           2 :                 // because i.opts now point to buffers owned by Pebble.
    2726           2 :                 if i.pointIter != nil {
    2727           2 :                         i.pointIter.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2728           2 :                 }
    2729           2 :                 if i.rangeKey != nil {
    2730           2 :                         i.rangeKey.iterConfig.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2731           2 :                 }
    2732             :         }
    2733             : 
    2734             :         // Even though this is not a positioning operation, the invalidation of the
    2735             :         // iterator stack means we cannot optimize Seeks by using Next.
    2736           2 :         i.invalidate()
    2737           2 : 
    2738           2 :         // Iterators created through NewExternalIter have a different iterator
    2739           2 :         // initialization process.
    2740           2 :         if i.externalReaders != nil {
    2741           1 :                 finishInitializingExternal(i.ctx, i)
    2742           1 :                 return
    2743           1 :         }
    2744           2 :         finishInitializingIter(i.ctx, i.alloc)
    2745             : }
    2746             : 
    2747           2 : func (i *Iterator) invalidate() {
    2748           2 :         i.lastPositioningOp = unknownLastPositionOp
    2749           2 :         i.hasPrefix = false
    2750           2 :         i.iterKV = nil
    2751           2 :         i.err = nil
    2752           2 :         // This switch statement isn't necessary for correctness since callers
    2753           2 :         // should call a repositioning method. We could have arbitrarily set i.pos
    2754           2 :         // to one of the values. But it results in more intuitive behavior in
    2755           2 :         // tests, which do not always reposition.
    2756           2 :         switch i.pos {
    2757           2 :         case iterPosCurForward, iterPosNext, iterPosCurForwardPaused:
    2758           2 :                 i.pos = iterPosCurForward
    2759           2 :         case iterPosCurReverse, iterPosPrev, iterPosCurReversePaused:
    2760           2 :                 i.pos = iterPosCurReverse
    2761             :         }
    2762           2 :         i.iterValidityState = IterExhausted
    2763           2 :         if i.rangeKey != nil {
    2764           2 :                 i.rangeKey.iiter.Invalidate()
    2765           2 :                 i.rangeKey.prevPosHadRangeKey = false
    2766           2 :         }
    2767             : }
    2768             : 
    2769             : // Metrics returns per-iterator metrics.
    2770           0 : func (i *Iterator) Metrics() IteratorMetrics {
    2771           0 :         m := IteratorMetrics{
    2772           0 :                 ReadAmp: 1,
    2773           0 :         }
    2774           0 :         if mi, ok := i.iter.(*mergingIter); ok {
    2775           0 :                 m.ReadAmp = len(mi.levels)
    2776           0 :         }
    2777           0 :         return m
    2778             : }
    2779             : 
    2780             : // ResetStats resets the stats to 0.
    2781           0 : func (i *Iterator) ResetStats() {
    2782           0 :         i.stats = IteratorStats{}
    2783           0 : }
    2784             : 
    2785             : // Stats returns the current stats.
    2786           1 : func (i *Iterator) Stats() IteratorStats {
    2787           1 :         return i.stats
    2788           1 : }
    2789             : 
    2790             : // CloneOptions configures an iterator constructed through Iterator.Clone.
    2791             : type CloneOptions struct {
    2792             :         // IterOptions, if non-nil, define the iterator options to configure a
    2793             :         // cloned iterator. If nil, the clone adopts the same IterOptions as the
    2794             :         // iterator being cloned.
    2795             :         IterOptions *IterOptions
    2796             :         // RefreshBatchView may be set to true when cloning an Iterator over an
    2797             :         // indexed batch. When false, the clone adopts the same (possibly stale)
    2798             :         // view of the indexed batch as the cloned Iterator. When true, the clone is
    2799             :         // constructed with a refreshed view of the batch, observing all of the
    2800             :         // batch's mutations at the time of the Clone. If the cloned iterator was
    2801             :         // not constructed to read over an indexed batch, RefreshVatchView has no
    2802             :         // effect.
    2803             :         RefreshBatchView bool
    2804             : }
    2805             : 
    2806             : // Clone creates a new Iterator over the same underlying data, i.e., over the
    2807             : // same {batch, memtables, sstables}). The resulting iterator is not positioned.
    2808             : // It starts with the same IterOptions, unless opts.IterOptions is set.
    2809             : //
    2810             : // When called on an Iterator over an indexed batch, the clone's visibility of
    2811             : // the indexed batch is determined by CloneOptions.RefreshBatchView. If false,
    2812             : // the clone inherits the iterator's current (possibly stale) view of the batch,
    2813             : // and callers may call SetOptions to subsequently refresh the clone's view to
    2814             : // include all batch mutations. If true, the clone is constructed with a
    2815             : // complete view of the indexed batch's mutations at the time of the Clone.
    2816             : //
    2817             : // Callers can use Clone if they need multiple iterators that need to see
    2818             : // exactly the same underlying state of the DB. This should not be used to
    2819             : // extend the lifetime of the data backing the original Iterator since that
    2820             : // will cause an increase in memory and disk usage (use NewSnapshot for that
    2821             : // purpose).
    2822           2 : func (i *Iterator) Clone(opts CloneOptions) (*Iterator, error) {
    2823           2 :         return i.CloneWithContext(context.Background(), opts)
    2824           2 : }
    2825             : 
    2826             : // CloneWithContext is like Clone, and additionally accepts a context for
    2827             : // tracing.
    2828           2 : func (i *Iterator) CloneWithContext(ctx context.Context, opts CloneOptions) (*Iterator, error) {
    2829           2 :         if opts.IterOptions == nil {
    2830           2 :                 opts.IterOptions = &i.opts
    2831           2 :         }
    2832           2 :         if i.batchOnlyIter {
    2833           1 :                 return nil, errors.Errorf("cannot Clone a batch-only Iterator")
    2834           1 :         }
    2835           2 :         readState := i.readState
    2836           2 :         vers := i.version
    2837           2 :         if readState == nil && vers == nil {
    2838           1 :                 return nil, errors.Errorf("cannot Clone a closed Iterator")
    2839           1 :         }
    2840             :         // i is already holding a ref, so there is no race with unref here.
    2841             :         //
    2842             :         // TODO(bilal): If the underlying iterator was created on a snapshot, we could
    2843             :         // grab a reference to the current readState instead of reffing the original
    2844             :         // readState. This allows us to release references to some zombie sstables
    2845             :         // and memtables.
    2846           2 :         if readState != nil {
    2847           2 :                 readState.ref()
    2848           2 :         }
    2849           2 :         if vers != nil {
    2850           2 :                 vers.Ref()
    2851           2 :         }
    2852             :         // Bundle various structures under a single umbrella in order to allocate
    2853             :         // them together.
    2854           2 :         buf := iterAllocPool.Get().(*iterAlloc)
    2855           2 :         dbi := &buf.dbi
    2856           2 :         *dbi = Iterator{
    2857           2 :                 ctx:                 ctx,
    2858           2 :                 opts:                *opts.IterOptions,
    2859           2 :                 alloc:               buf,
    2860           2 :                 merge:               i.merge,
    2861           2 :                 comparer:            i.comparer,
    2862           2 :                 readState:           readState,
    2863           2 :                 version:             vers,
    2864           2 :                 keyBuf:              buf.keyBuf,
    2865           2 :                 prefixOrFullSeekKey: buf.prefixOrFullSeekKey,
    2866           2 :                 boundsBuf:           buf.boundsBuf,
    2867           2 :                 batch:               i.batch,
    2868           2 :                 batchSeqNum:         i.batchSeqNum,
    2869           2 :                 newIters:            i.newIters,
    2870           2 :                 newIterRangeKey:     i.newIterRangeKey,
    2871           2 :                 seqNum:              i.seqNum,
    2872           2 :         }
    2873           2 :         dbi.processBounds(dbi.opts.LowerBound, dbi.opts.UpperBound)
    2874           2 : 
    2875           2 :         // If the caller requested the clone have a current view of the indexed
    2876           2 :         // batch, set the clone's batch sequence number appropriately.
    2877           2 :         if i.batch != nil && opts.RefreshBatchView {
    2878           1 :                 dbi.batchSeqNum = (base.SeqNum(len(i.batch.data)) | base.SeqNumBatchBit)
    2879           1 :         }
    2880             : 
    2881           2 :         return finishInitializingIter(ctx, buf), nil
    2882             : }
    2883             : 
    2884             : // Merge adds all of the argument's statistics to the receiver. It may be used
    2885             : // to accumulate stats across multiple iterators.
    2886           1 : func (stats *IteratorStats) Merge(o IteratorStats) {
    2887           1 :         for i := InterfaceCall; i < NumStatsKind; i++ {
    2888           1 :                 stats.ForwardSeekCount[i] += o.ForwardSeekCount[i]
    2889           1 :                 stats.ReverseSeekCount[i] += o.ReverseSeekCount[i]
    2890           1 :                 stats.ForwardStepCount[i] += o.ForwardStepCount[i]
    2891           1 :                 stats.ReverseStepCount[i] += o.ReverseStepCount[i]
    2892           1 :         }
    2893           1 :         stats.InternalStats.Merge(o.InternalStats)
    2894           1 :         stats.RangeKeyStats.Merge(o.RangeKeyStats)
    2895             : }
    2896             : 
    2897           1 : func (stats *IteratorStats) String() string {
    2898           1 :         return redact.StringWithoutMarkers(stats)
    2899           1 : }
    2900             : 
    2901             : // SafeFormat implements the redact.SafeFormatter interface.
    2902           1 : func (stats *IteratorStats) SafeFormat(s redact.SafePrinter, verb rune) {
    2903           1 :         if stats.ReverseSeekCount[InterfaceCall] == 0 && stats.ReverseSeekCount[InternalIterCall] == 0 {
    2904           1 :                 s.Printf("seeked %s times (%s internal)",
    2905           1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InterfaceCall])),
    2906           1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InternalIterCall])),
    2907           1 :                 )
    2908           1 :         } else {
    2909           1 :                 s.Printf("seeked %s times (%s fwd/%s rev, internal: %s fwd/%s rev)",
    2910           1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InterfaceCall]+stats.ReverseSeekCount[InterfaceCall])),
    2911           1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InterfaceCall])),
    2912           1 :                         humanize.Count.Uint64(uint64(stats.ReverseSeekCount[InterfaceCall])),
    2913           1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InternalIterCall])),
    2914           1 :                         humanize.Count.Uint64(uint64(stats.ReverseSeekCount[InternalIterCall])),
    2915           1 :                 )
    2916           1 :         }
    2917           1 :         s.SafeString("; ")
    2918           1 : 
    2919           1 :         if stats.ReverseStepCount[InterfaceCall] == 0 && stats.ReverseStepCount[InternalIterCall] == 0 {
    2920           1 :                 s.Printf("stepped %s times (%s internal)",
    2921           1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InterfaceCall])),
    2922           1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InternalIterCall])),
    2923           1 :                 )
    2924           1 :         } else {
    2925           1 :                 s.Printf("stepped %s times (%s fwd/%s rev, internal: %s fwd/%s rev)",
    2926           1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InterfaceCall]+stats.ReverseStepCount[InterfaceCall])),
    2927           1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InterfaceCall])),
    2928           1 :                         humanize.Count.Uint64(uint64(stats.ReverseStepCount[InterfaceCall])),
    2929           1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InternalIterCall])),
    2930           1 :                         humanize.Count.Uint64(uint64(stats.ReverseStepCount[InternalIterCall])),
    2931           1 :                 )
    2932           1 :         }
    2933             : 
    2934           1 :         if stats.InternalStats != (InternalIteratorStats{}) {
    2935           1 :                 s.SafeString("; ")
    2936           1 :                 stats.InternalStats.SafeFormat(s, verb)
    2937           1 :         }
    2938           1 :         if stats.RangeKeyStats != (RangeKeyIteratorStats{}) {
    2939           1 :                 s.SafeString(", ")
    2940           1 :                 stats.RangeKeyStats.SafeFormat(s, verb)
    2941           1 :         }
    2942             : }
    2943             : 
    2944             : // CanDeterministicallySingleDelete takes a valid iterator and examines internal
    2945             : // state to determine if a SingleDelete deleting Iterator.Key() would
    2946             : // deterministically delete the key. CanDeterministicallySingleDelete requires
    2947             : // the iterator to be oriented in the forward direction (eg, the last
    2948             : // positioning operation must've been a First, a Seek[Prefix]GE, or a
    2949             : // Next[Prefix][WithLimit]).
    2950             : //
    2951             : // This function does not change the external position of the iterator, and all
    2952             : // positioning methods should behave the same as if it was never called. This
    2953             : // function will only return a meaningful result the first time it's invoked at
    2954             : // an iterator position. This function invalidates the iterator Value's memory,
    2955             : // and the caller must not rely on the memory safety of the previous Iterator
    2956             : // position.
    2957             : //
    2958             : // If CanDeterministicallySingleDelete returns true AND the key at the iterator
    2959             : // position is not modified between the creation of the Iterator and the commit
    2960             : // of a batch containing a SingleDelete over the key, then the caller can be
    2961             : // assured that SingleDelete is equivalent to Delete on the local engine, but it
    2962             : // may not be true on another engine that received the same writes and with
    2963             : // logically equivalent state since this engine may have collapsed multiple SETs
    2964             : // into one.
    2965           2 : func CanDeterministicallySingleDelete(it *Iterator) (bool, error) {
    2966           2 :         // This function may only be called once per external iterator position. We
    2967           2 :         // can validate this by checking the last positioning operation.
    2968           2 :         if it.lastPositioningOp == internalNextOp {
    2969           2 :                 return false, errors.New("pebble: CanDeterministicallySingleDelete called twice")
    2970           2 :         }
    2971           2 :         validity, kind := it.internalNext()
    2972           2 :         var shadowedBySingleDelete bool
    2973           2 :         for validity == internalNextValid {
    2974           2 :                 switch kind {
    2975           2 :                 case InternalKeyKindDelete, InternalKeyKindDeleteSized:
    2976           2 :                         // A DEL or DELSIZED tombstone is okay. An internal key
    2977           2 :                         // sequence like SINGLEDEL; SET; DEL; SET can be handled
    2978           2 :                         // deterministically. If there are SETs further down, we
    2979           2 :                         // don't care about them.
    2980           2 :                         return true, nil
    2981           2 :                 case InternalKeyKindSingleDelete:
    2982           2 :                         // A SingleDelete is okay as long as when that SingleDelete was
    2983           2 :                         // written, it was written deterministically (eg, with its own
    2984           2 :                         // CanDeterministicallySingleDelete check). Validate that it was
    2985           2 :                         // written deterministically. We'll allow one set to appear after
    2986           2 :                         // the SingleDelete.
    2987           2 :                         shadowedBySingleDelete = true
    2988           2 :                         validity, kind = it.internalNext()
    2989           2 :                         continue
    2990           2 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete, InternalKeyKindMerge:
    2991           2 :                         // If we observed a single delete, it's allowed to delete 1 key.
    2992           2 :                         // We'll keep looping to validate that the internal keys beneath the
    2993           2 :                         // already-written single delete are copacetic.
    2994           2 :                         if shadowedBySingleDelete {
    2995           1 :                                 shadowedBySingleDelete = false
    2996           1 :                                 validity, kind = it.internalNext()
    2997           1 :                                 continue
    2998             :                         }
    2999             :                         // We encountered a shadowed SET, SETWITHDEL, MERGE. A SINGLEDEL
    3000             :                         // that deleted the KV at the original iterator position could
    3001             :                         // result in this key becoming visible.
    3002           2 :                         return false, nil
    3003           0 :                 case InternalKeyKindRangeDelete:
    3004           0 :                         // RangeDeletes are handled by the merging iterator and should never
    3005           0 :                         // be observed by the top-level Iterator.
    3006           0 :                         panic(errors.AssertionFailedf("pebble: unexpected range delete"))
    3007           0 :                 case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete:
    3008           0 :                         // Range keys are interleaved at the maximal sequence number and
    3009           0 :                         // should never be observed within a user key.
    3010           0 :                         panic(errors.AssertionFailedf("pebble: unexpected range key"))
    3011           0 :                 default:
    3012           0 :                         panic(errors.AssertionFailedf("pebble: unexpected key kind: %s", errors.Safe(kind)))
    3013             :                 }
    3014             :         }
    3015           2 :         if validity == internalNextError {
    3016           2 :                 return false, it.Error()
    3017           2 :         }
    3018           2 :         return true, nil
    3019             : }
    3020             : 
    3021             : // internalNextValidity enumerates the potential outcomes of a call to
    3022             : // internalNext.
    3023             : type internalNextValidity int8
    3024             : 
    3025             : const (
    3026             :         // internalNextError is returned by internalNext when an error occurred and
    3027             :         // the caller is responsible for checking iter.Error().
    3028             :         internalNextError internalNextValidity = iota
    3029             :         // internalNextExhausted is returned by internalNext when the next internal
    3030             :         // key is an internal key with a different user key than Iterator.Key().
    3031             :         internalNextExhausted
    3032             :         // internalNextValid is returned by internalNext when the internal next
    3033             :         // found a shadowed internal key with a user key equal to Iterator.Key().
    3034             :         internalNextValid
    3035             : )
    3036             : 
    3037             : // internalNext advances internal Iterator state forward to expose the
    3038             : // InternalKeyKind of the next internal key with a user key equal to Key().
    3039             : //
    3040             : // internalNext is a highly specialized operation and is unlikely to be
    3041             : // generally useful. See Iterator.Next for how to reposition the iterator to the
    3042             : // next key. internalNext requires the Iterator to be at a valid position in the
    3043             : // forward direction (the last positioning operation must've been a First, a
    3044             : // Seek[Prefix]GE, or a Next[Prefix][WithLimit] and Valid() must return true).
    3045             : //
    3046             : // internalNext, unlike all other Iterator methods, exposes internal LSM state.
    3047             : // internalNext advances the Iterator's internal iterator to the next shadowed
    3048             : // key with a user key equal to Key(). When a key is overwritten or deleted, its
    3049             : // removal from the LSM occurs lazily as a part of compactions. internalNext
    3050             : // allows the caller to see whether an obsolete internal key exists with the
    3051             : // current Key(), and what it's key kind is. Note that the existence of an
    3052             : // internal key is nondeterministic and dependent on internal LSM state. These
    3053             : // semantics are unlikely to be applicable to almost all use cases.
    3054             : //
    3055             : // If internalNext finds a key that shares the same user key as Key(), it
    3056             : // returns internalNextValid and the internal key's kind. If internalNext
    3057             : // encounters an error, it returns internalNextError and the caller is expected
    3058             : // to call Iterator.Error() to retrieve it. In all other circumstances,
    3059             : // internalNext returns internalNextExhausted, indicating that there are no more
    3060             : // additional internal keys with the user key Key().
    3061             : //
    3062             : // internalNext does not change the external position of the iterator, and a
    3063             : // Next operation should behave the same as if internalNext was never called.
    3064             : // internalNext does invalidate the iterator Value's memory, and the caller must
    3065             : // not rely on the memory safety of the previous Iterator position.
    3066           2 : func (i *Iterator) internalNext() (internalNextValidity, base.InternalKeyKind) {
    3067           2 :         i.stats.ForwardStepCount[InterfaceCall]++
    3068           2 :         if i.err != nil {
    3069           2 :                 return internalNextError, base.InternalKeyKindInvalid
    3070           2 :         } else if i.iterValidityState != IterValid {
    3071           2 :                 return internalNextExhausted, base.InternalKeyKindInvalid
    3072           2 :         }
    3073           2 :         i.lastPositioningOp = internalNextOp
    3074           2 : 
    3075           2 :         switch i.pos {
    3076           2 :         case iterPosCurForward:
    3077           2 :                 i.iterKV = i.iter.Next()
    3078           2 :                 if i.iterKV == nil {
    3079           2 :                         // We check i.iter.Error() here and return an internalNextError enum
    3080           2 :                         // variant so that the caller does not need to check i.iter.Error()
    3081           2 :                         // in the common case that the next internal key has a new user key.
    3082           2 :                         if i.err = i.iter.Error(); i.err != nil {
    3083           0 :                                 return internalNextError, base.InternalKeyKindInvalid
    3084           0 :                         }
    3085           2 :                         i.pos = iterPosNext
    3086           2 :                         return internalNextExhausted, base.InternalKeyKindInvalid
    3087           2 :                 } else if i.comparer.Equal(i.iterKV.K.UserKey, i.key) {
    3088           2 :                         return internalNextValid, i.iterKV.Kind()
    3089           2 :                 }
    3090           2 :                 i.pos = iterPosNext
    3091           2 :                 return internalNextExhausted, base.InternalKeyKindInvalid
    3092           2 :         case iterPosCurReverse, iterPosCurReversePaused, iterPosPrev:
    3093           2 :                 i.err = errors.New("switching from reverse to forward via internalNext is prohibited")
    3094           2 :                 i.iterValidityState = IterExhausted
    3095           2 :                 return internalNextError, base.InternalKeyKindInvalid
    3096           2 :         case iterPosNext, iterPosCurForwardPaused:
    3097           2 :                 // The previous method already moved onto the next user key. This is
    3098           2 :                 // only possible if
    3099           2 :                 //   - the last positioning method was a call to internalNext, and we
    3100           2 :                 //     advanced to a new user key.
    3101           2 :                 //   - the previous non-internalNext iterator operation encountered a
    3102           2 :                 //     range key or merge, forcing an internal Next that found a new
    3103           2 :                 //     user key that's not equal to i.Iterator.Key().
    3104           2 :                 return internalNextExhausted, base.InternalKeyKindInvalid
    3105           0 :         default:
    3106           0 :                 panic("unreachable")
    3107             :         }
    3108             : }
    3109             : 
    3110             : var _ base.IteratorDebug = (*Iterator)(nil)
    3111             : 
    3112             : // DebugTree implements the base.IteratorDebug interface.
    3113           0 : func (i *Iterator) DebugTree(tp treeprinter.Node) {
    3114           0 :         n := tp.Childf("%T(%p)", i, i)
    3115           0 :         if i.iter != nil {
    3116           0 :                 i.iter.DebugTree(n)
    3117           0 :         }
    3118           0 :         if i.pointIter != nil {
    3119           0 :                 i.pointIter.DebugTree(n)
    3120           0 :         }
    3121             : }

Generated by: LCOV version 1.14