LCOV - code coverage report
Current view: top level - pebble - iterator.go (source / functions) Coverage Total Hit
Test: 2025-11-14 08:18Z 5a4e0fa7 - tests only.lcov Lines: 89.7 % 1920 1723
Test Date: 2025-11-14 08:19:26 Functions: - 0 0

            Line data    Source code
       1              : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2              : // of this source code is governed by a BSD-style license that can be found in
       3              : // the LICENSE file.
       4              : 
       5              : package pebble
       6              : 
       7              : import (
       8              :         "bytes"
       9              :         "context"
      10              :         "io"
      11              :         "math/rand/v2"
      12              :         "sync"
      13              :         "unsafe"
      14              : 
      15              :         "github.com/cockroachdb/errors"
      16              :         "github.com/cockroachdb/pebble/internal/base"
      17              :         "github.com/cockroachdb/pebble/internal/bytealloc"
      18              :         "github.com/cockroachdb/pebble/internal/bytesprofile"
      19              :         "github.com/cockroachdb/pebble/internal/humanize"
      20              :         "github.com/cockroachdb/pebble/internal/inflight"
      21              :         "github.com/cockroachdb/pebble/internal/invariants"
      22              :         "github.com/cockroachdb/pebble/internal/keyspan"
      23              :         "github.com/cockroachdb/pebble/internal/keyspan/keyspanimpl"
      24              :         "github.com/cockroachdb/pebble/internal/manifest"
      25              :         "github.com/cockroachdb/pebble/internal/rangekeystack"
      26              :         "github.com/cockroachdb/pebble/internal/treeprinter"
      27              :         "github.com/cockroachdb/pebble/sstable/blob"
      28              :         "github.com/cockroachdb/redact"
      29              : )
      30              : 
      31              : // iterPos describes the state of the internal iterator, in terms of whether it
      32              : // is at the position returned to the user (cur), one ahead of the position
      33              : // returned (next for forward iteration and prev for reverse iteration). The cur
      34              : // position is split into two states, for forward and reverse iteration, since
      35              : // we need to differentiate for switching directions.
      36              : //
      37              : // There is subtlety in what is considered the current position of the Iterator.
      38              : // The internal iterator exposes a sequence of internal keys. There is not
      39              : // always a single internalIterator position corresponding to the position
      40              : // returned to the user. Consider the example:
      41              : //
      42              : //      a.MERGE.9 a.MERGE.8 a.MERGE.7 a.SET.6 b.DELETE.9 b.DELETE.5 b.SET.4
      43              : //      \                                   /
      44              : //        \       Iterator.Key() = 'a'    /
      45              : //
      46              : // The Iterator exposes one valid position at user key 'a' and the two exhausted
      47              : // positions at the beginning and end of iteration. The underlying
      48              : // internalIterator contains 7 valid positions and 2 exhausted positions.
      49              : //
      50              : // Iterator positioning methods must set iterPos to iterPosCur{Foward,Backward}
      51              : // iff the user key at the current internalIterator position equals the
      52              : // Iterator.Key returned to the user. This guarantees that a call to nextUserKey
      53              : // or prevUserKey will advance to the next or previous iterator position.
      54              : // iterPosCur{Forward,Backward} does not make any guarantee about the internal
      55              : // iterator position among internal keys with matching user keys, and it will
      56              : // vary subtly depending on the particular key kinds encountered. In the above
      57              : // example, the iterator returning 'a' may set iterPosCurForward if the internal
      58              : // iterator is positioned at any of a.MERGE.9, a.MERGE.8, a.MERGE.7 or a.SET.6.
      59              : //
      60              : // When setting iterPos to iterPosNext or iterPosPrev, the internal iterator
      61              : // must be advanced to the first internalIterator position at a user key greater
      62              : // (iterPosNext) or less (iterPosPrev) than the key returned to the user. An
      63              : // internalIterator position that's !Valid() must also be considered greater or
      64              : // less—depending on the direction of iteration—than the last valid Iterator
      65              : // position.
      66              : type iterPos int8
      67              : 
      68              : const (
      69              :         iterPosCurForward iterPos = 0
      70              :         iterPosNext       iterPos = 1
      71              :         iterPosPrev       iterPos = -1
      72              :         iterPosCurReverse iterPos = -2
      73              : 
      74              :         // For limited iteration. When the iterator is at iterPosCurForwardPaused
      75              :         // - Next*() call should behave as if the internal iterator is already
      76              :         //   at next (akin to iterPosNext).
      77              :         // - Prev*() call should behave as if the internal iterator is at the
      78              :         //   current key (akin to iterPosCurForward).
      79              :         //
      80              :         // Similar semantics apply to CurReversePaused.
      81              :         iterPosCurForwardPaused iterPos = 2
      82              :         iterPosCurReversePaused iterPos = -3
      83              : )
      84              : 
      85              : // Approximate gap in bytes between samples of data read during iteration.
      86              : // This is multiplied with a default ReadSamplingMultiplier of 1 << 4 to yield
      87              : // 1 << 20 (1MB). The 1MB factor comes from:
      88              : // https://github.com/cockroachdb/pebble/issues/29#issuecomment-494477985
      89              : const readBytesPeriod uint64 = 1 << 16
      90              : 
      91              : var errReversePrefixIteration = errors.New("pebble: unsupported reverse prefix iteration")
      92              : 
      93              : // IteratorMetrics holds per-iterator metrics. These do not change over the
      94              : // lifetime of the iterator.
      95              : type IteratorMetrics struct {
      96              :         // The read amplification experienced by this iterator. This is the sum of
      97              :         // the memtables, the L0 sublevels and the non-empty Ln levels. Higher read
      98              :         // amplification generally results in slower reads, though allowing higher
      99              :         // read amplification can also result in faster writes.
     100              :         ReadAmp int
     101              : }
     102              : 
     103              : // IteratorStatsKind describes the two kind of iterator stats.
     104              : type IteratorStatsKind int8
     105              : 
     106              : const (
     107              :         // InterfaceCall represents calls to Iterator.
     108              :         InterfaceCall IteratorStatsKind = iota
     109              :         // InternalIterCall represents calls by Iterator to its internalIterator.
     110              :         InternalIterCall
     111              :         // NumStatsKind is the number of kinds, and is used for array sizing.
     112              :         NumStatsKind
     113              : )
     114              : 
     115              : // IteratorStats contains iteration stats.
     116              : type IteratorStats struct {
     117              :         // ForwardSeekCount includes SeekGE, SeekPrefixGE, First.
     118              :         ForwardSeekCount [NumStatsKind]int
     119              :         // ReverseSeek includes SeekLT, Last.
     120              :         ReverseSeekCount [NumStatsKind]int
     121              :         // ForwardStepCount includes Next.
     122              :         ForwardStepCount [NumStatsKind]int
     123              :         // ReverseStepCount includes Prev.
     124              :         ReverseStepCount [NumStatsKind]int
     125              :         InternalStats    InternalIteratorStats
     126              :         RangeKeyStats    RangeKeyIteratorStats
     127              : }
     128              : 
     129              : var _ redact.SafeFormatter = &IteratorStats{}
     130              : 
     131              : // InternalIteratorStats contains miscellaneous stats produced by internal
     132              : // iterators.
     133              : type InternalIteratorStats = base.InternalIteratorStats
     134              : 
     135              : // BlockReadStats contains iterators stats about block reads.
     136              : type BlockReadStats = base.BlockReadStats
     137              : 
     138              : // RangeKeyIteratorStats contains miscellaneous stats about range keys
     139              : // encountered by the iterator.
     140              : type RangeKeyIteratorStats struct {
     141              :         // Count records the number of range keys encountered during
     142              :         // iteration. Range keys may be counted multiple times if the iterator
     143              :         // leaves a range key's bounds and then returns.
     144              :         Count int
     145              :         // ContainedPoints records the number of point keys encountered within the
     146              :         // bounds of a range key. Note that this includes point keys with suffixes
     147              :         // that sort both above and below the covering range key's suffix.
     148              :         ContainedPoints int
     149              :         // SkippedPoints records the count of the subset of ContainedPoints point
     150              :         // keys that were skipped during iteration due to range-key masking. It does
     151              :         // not include point keys that were never loaded because a
     152              :         // RangeKeyMasking.Filter excluded the entire containing block.
     153              :         SkippedPoints int
     154              : }
     155              : 
     156              : // Merge adds all of the argument's statistics to the receiver. It may be used
     157              : // to accumulate stats across multiple iterators.
     158            1 : func (s *RangeKeyIteratorStats) Merge(o RangeKeyIteratorStats) {
     159            1 :         s.Count += o.Count
     160            1 :         s.ContainedPoints += o.ContainedPoints
     161            1 :         s.SkippedPoints += o.SkippedPoints
     162            1 : }
     163              : 
     164            0 : func (s *RangeKeyIteratorStats) String() string {
     165            0 :         return redact.StringWithoutMarkers(s)
     166            0 : }
     167              : 
     168              : // SafeFormat implements the redact.SafeFormatter interface.
     169            1 : func (s *RangeKeyIteratorStats) SafeFormat(p redact.SafePrinter, verb rune) {
     170            1 :         p.Printf("range keys: %s, contained points: %s (%s skipped)",
     171            1 :                 humanize.Count.Uint64(uint64(s.Count)),
     172            1 :                 humanize.Count.Uint64(uint64(s.ContainedPoints)),
     173            1 :                 humanize.Count.Uint64(uint64(s.SkippedPoints)))
     174            1 : }
     175              : 
     176              : // LazyValue is a lazy value. See the long comment in base.LazyValue.
     177              : type LazyValue = base.LazyValue
     178              : 
     179              : // Iterator iterates over a DB's key/value pairs in key order.
     180              : //
     181              : // An iterator must be closed after use, but it is not necessary to read an
     182              : // iterator until exhaustion.
     183              : //
     184              : // An iterator is not goroutine-safe, but it is safe to use multiple iterators
     185              : // concurrently, with each in a dedicated goroutine.
     186              : //
     187              : // It is also safe to use an iterator concurrently with modifying its
     188              : // underlying DB, if that DB permits modification. However, the resultant
     189              : // key/value pairs are not guaranteed to be a consistent snapshot of that DB
     190              : // at a particular point in time.
     191              : //
     192              : // If an iterator encounters an error during any operation, it is stored by
     193              : // the Iterator and surfaced through the Error method. All absolute
     194              : // positioning methods (eg, SeekLT, SeekGT, First, Last, etc) reset any
     195              : // accumulated error before positioning. All relative positioning methods (eg,
     196              : // Next, Prev) return without advancing if the iterator has an accumulated
     197              : // error.
     198              : type Iterator struct {
     199              :         // blobValueFetcher is the ValueFetcher to use when retrieving values stored
     200              :         // externally in blob files.
     201              :         blobValueFetcher blob.ValueFetcher
     202              : 
     203              :         // All fields below this field are cleared during Iterator.Close before
     204              :         // returning the Iterator to the pool. Any fields above this field must also
     205              :         // be cleared, but may be cleared as a part of the body of Iterator.Close
     206              :         // (eg, if these types have their own Close method that zeroes their
     207              :         // fields).
     208              :         clearForReuseBoundary struct{}
     209              : 
     210              :         // The context is stored here since (a) Iterators are expected to be
     211              :         // short-lived (since they pin memtables and sstables), (b) plumbing a
     212              :         // context into every method is very painful, (c) they do not (yet) respect
     213              :         // context cancellation and are only used for tracing.
     214              :         ctx context.Context
     215              :         // TODO(radu): group fields that are always set from `db` fields (tracker,
     216              :         // merge, comparer, fc) into a single struct we can point to.
     217              :         tracker       *inflight.Tracker
     218              :         trackerHandle inflight.Handle
     219              :         opts          IterOptions
     220              :         merge         Merge
     221              :         comparer      *base.Comparer
     222              :         iter          internalIterator
     223              :         pointIter     topLevelIterator
     224              :         // Either readState or version is set, but not both.
     225              :         readState *readState
     226              :         version   *manifest.Version
     227              :         // rangeKey holds iteration state specific to iteration over range keys.
     228              :         // The range key field may be nil if the Iterator has never been configured
     229              :         // to iterate over range keys. Its non-nilness cannot be used to determine
     230              :         // if the Iterator is currently iterating over range keys: For that, consult
     231              :         // the IterOptions using opts.rangeKeys(). If non-nil, its rangeKeyIter
     232              :         // field is guaranteed to be non-nil too.
     233              :         rangeKey *iteratorRangeKeyState
     234              :         // rangeKeyMasking holds state for range-key masking of point keys.
     235              :         rangeKeyMasking rangeKeyMasking
     236              :         err             error
     237              :         // When iterValidityState=IterValid, key represents the current key, which
     238              :         // is backed by keyBuf.
     239              :         key    []byte
     240              :         keyBuf []byte
     241              :         value  base.InternalValue
     242              :         // For use in LazyValue.Clone.
     243              :         valueBuf []byte
     244              :         fetcher  base.LazyFetcher
     245              :         // For use in LazyValue.Value.
     246              :         lazyValueBuf []byte
     247              :         valueCloser  io.Closer
     248              :         // boundsBuf holds two buffers used to store the lower and upper bounds.
     249              :         // Whenever the Iterator's bounds change, the new bounds are copied into
     250              :         // boundsBuf[boundsBufIdx]. The two bounds share a slice to reduce
     251              :         // allocations. opts.LowerBound and opts.UpperBound point into this slice.
     252              :         boundsBuf    [2][]byte
     253              :         boundsBufIdx int
     254              :         // iterKV reflects the latest position of iter, except when SetBounds is
     255              :         // called. In that case, it is explicitly set to nil.
     256              :         iterKV              *base.InternalKV
     257              :         alloc               *iterAlloc
     258              :         prefixOrFullSeekKey []byte
     259              :         readSampling        readSampling
     260              :         stats               IteratorStats
     261              :         externalIter        *externalIterState
     262              :         // Following fields used when constructing an iterator stack, eg, in Clone
     263              :         // and SetOptions or when re-fragmenting a batch's range keys/range dels.
     264              :         fc                    *fileCacheHandle
     265              :         newIters              tableNewIters
     266              :         newIterRangeKey       keyspanimpl.TableNewSpanIter
     267              :         valueRetrievalProfile *bytesprofile.Profile
     268              :         lazyCombinedIter      lazyCombinedIter
     269              :         seqNum                base.SeqNum
     270              :         // batch is non-nil if this Iterator includes an indexed batch. Batch
     271              :         // contains all the state pertaining to iterating over the indexed batch.
     272              :         // The iteratorBatchState struct is bundled within the iterAlloc struct to
     273              :         // reduce allocations.
     274              :         batch *iteratorBatchState
     275              :         // merging is a pointer to this iterator's point merging iterator. It
     276              :         // appears here because key visibility is handled by the merging iterator.
     277              :         // During SetOptions on an iterator over an indexed batch, this field is
     278              :         // used to update the merging iterator's batch snapshot.
     279              :         merging *mergingIter
     280              : 
     281              :         // Keeping the bools here after all the 8 byte aligned fields shrinks the
     282              :         // sizeof this struct by 24 bytes.
     283              : 
     284              :         // INVARIANT:
     285              :         // iterValidityState==IterAtLimit <=>
     286              :         //  pos==iterPosCurForwardPaused || pos==iterPosCurReversePaused
     287              :         iterValidityState IterValidityState
     288              :         // Set to true by SetBounds, SetOptions. Causes the Iterator to appear
     289              :         // exhausted externally, while preserving the correct iterValidityState for
     290              :         // the iterator's internal state. Preserving the correct internal validity
     291              :         // is used for SeekPrefixGE(..., trySeekUsingNext), and SeekGE/SeekLT
     292              :         // optimizations after "no-op" calls to SetBounds and SetOptions.
     293              :         requiresReposition bool
     294              :         // The position of iter. When this is iterPos{Prev,Next} the iter has been
     295              :         // moved past the current key-value, which can only happen if
     296              :         // iterValidityState=IterValid, i.e., there is something to return to the
     297              :         // client for the current position.
     298              :         pos iterPos
     299              :         // Relates to the prefixOrFullSeekKey field above.
     300              :         hasPrefix bool
     301              :         // Used for deriving the value of SeekPrefixGE(..., trySeekUsingNext),
     302              :         // and SeekGE/SeekLT optimizations
     303              :         lastPositioningOp lastPositioningOpKind
     304              :         // Used for determining when it's safe to perform SeekGE optimizations that
     305              :         // reuse the iterator state to avoid the cost of a full seek if the iterator
     306              :         // is already positioned in the correct place. If the iterator's view of its
     307              :         // indexed batch was just refreshed, some optimizations cannot be applied on
     308              :         // the first seek after the refresh:
     309              :         // - SeekGE has a no-op optimization that does not seek on the internal
     310              :         //   iterator at all if the iterator is already in the correct place.
     311              :         //   This optimization cannot be performed if the internal iterator was
     312              :         //   last positioned when the iterator had a different view of an
     313              :         //   underlying batch.
     314              :         // - Seek[Prefix]GE set flags.TrySeekUsingNext()=true when the seek key is
     315              :         //   greater than the previous operation's seek key, under the expectation
     316              :         //   that the various internal iterators can use their current position to
     317              :         //   avoid a full expensive re-seek. This applies to the batchIter as well.
     318              :         //   However, if the view of the batch was just refreshed, the batchIter's
     319              :         //   position is not useful because it may already be beyond new keys less
     320              :         //   than the seek key. To prevent the use of this optimization in
     321              :         //   batchIter, Seek[Prefix]GE set flags.BatchJustRefreshed()=true if this
     322              :         //   bit is enabled.
     323              :         batchJustRefreshed bool
     324              :         // batchOnlyIter is set to true for Batch.NewBatchOnlyIter.
     325              :         batchOnlyIter bool
     326              :         // Used in some tests to disable the random disabling of seek optimizations.
     327              :         forceEnableSeekOpt bool
     328              :         // Set to true if NextPrefix is not currently permitted. Defaults to false
     329              :         // in case an iterator never had any bounds.
     330              :         nextPrefixNotPermittedByUpperBound bool
     331              : }
     332              : 
     333              : const clearOff = unsafe.Offsetof(Iterator{}.clearForReuseBoundary)
     334              : const clearLen = unsafe.Sizeof(Iterator{}) - clearOff
     335              : 
     336            1 : func (i *Iterator) clearForReuse() {
     337            1 :         *(*[clearLen]byte)(unsafe.Add(unsafe.Pointer(i), clearOff)) = [clearLen]byte{}
     338            1 : }
     339              : 
     340              : // cmp is a convenience shorthand for the i.comparer.Compare function.
     341            1 : func (i *Iterator) cmp(a, b []byte) int {
     342            1 :         return i.comparer.Compare(a, b)
     343            1 : }
     344              : 
     345              : // equal is a convenience shorthand for the i.comparer.Equal function.
     346            1 : func (i *Iterator) equal(a, b []byte) bool {
     347            1 :         return i.comparer.Equal(a, b)
     348            1 : }
     349              : 
     350              : // iteratorBatchState holds state pertaining to iterating over an indexed batch.
     351              : // When an iterator is configured to read through an indexed batch, the iterator
     352              : // maintains a pointer to this struct. This struct is embedded within the
     353              : // iterAlloc struct to reduce allocations.
     354              : type iteratorBatchState struct {
     355              :         batch *Batch
     356              :         // batchSeqNum is used by Iterators over indexed batches to detect when the
     357              :         // underlying batch has been mutated. The batch beneath an indexed batch may
     358              :         // be mutated while the Iterator is open, but new keys are not surfaced
     359              :         // until the next call to SetOptions.
     360              :         batchSeqNum base.SeqNum
     361              :         // batch{PointIter,RangeDelIter,RangeKeyIter} are used when the Iterator is
     362              :         // configured to read through an indexed batch. If a batch is set, these
     363              :         // iterators will be included within the iterator stack regardless of
     364              :         // whether the batch currently contains any keys of their kind. These
     365              :         // pointers are used during a call to SetOptions to refresh the Iterator's
     366              :         // view of its indexed batch.
     367              :         pointIter    batchIter
     368              :         rangeDelIter keyspan.Iter
     369              :         rangeKeyIter keyspan.Iter
     370              : }
     371              : 
     372              : // iteratorRangeKeyState holds an iterator's range key iteration state.
     373              : type iteratorRangeKeyState struct {
     374              :         // rangeKeyIter holds the range key iterator stack that iterates over the
     375              :         // merged spans across the entirety of the LSM.
     376              :         rangeKeyIter keyspan.FragmentIterator
     377              :         iiter        keyspan.InterleavingIter
     378              :         // stale is set to true when the range key state recorded here (in start,
     379              :         // end and keys) may not be in sync with the current range key at the
     380              :         // interleaving iterator's current position.
     381              :         //
     382              :         // When the interelaving iterator passes over a new span, it invokes the
     383              :         // SpanChanged hook defined on the `rangeKeyMasking` type,  which sets stale
     384              :         // to true if the span is non-nil.
     385              :         //
     386              :         // The parent iterator may not be positioned over the interleaving
     387              :         // iterator's current position (eg, i.iterPos = iterPos{Next,Prev}), so
     388              :         // {keys,start,end} are only updated to the new range key during a call to
     389              :         // Iterator.saveRangeKey.
     390              :         stale bool
     391              :         // updated is used to signal to the Iterator client whether the state of
     392              :         // range keys has changed since the previous iterator position through the
     393              :         // `RangeKeyChanged` method. It's set to true during an Iterator positioning
     394              :         // operation that changes the state of the current range key. Each Iterator
     395              :         // positioning operation sets it back to false before executing.
     396              :         //
     397              :         // TODO(jackson): The lifecycle of {stale,updated,prevPosHadRangeKey} is
     398              :         // intricate and confusing. Try to refactor to reduce complexity.
     399              :         updated bool
     400              :         // prevPosHadRangeKey records whether the previous Iterator position had a
     401              :         // range key (HasPointAndRage() = (_, true)). It's updated at the beginning
     402              :         // of each new Iterator positioning operation. It's required by saveRangeKey to
     403              :         // to set `updated` appropriately: Without this record of the previous iterator
     404              :         // state, it's ambiguous whether an iterator only temporarily stepped onto a
     405              :         // position without a range key.
     406              :         prevPosHadRangeKey bool
     407              :         // rangeKeyOnly is set to true if at the current iterator position there is
     408              :         // no point key, only a range key start boundary.
     409              :         rangeKeyOnly bool
     410              :         // hasRangeKey is true when the current iterator position has a covering
     411              :         // range key (eg, a range key with bounds [<lower>,<upper>) such that
     412              :         // <lower> ≤ Key() < <upper>).
     413              :         hasRangeKey bool
     414              :         // start and end are the [start, end) boundaries of the current range keys.
     415              :         start []byte
     416              :         end   []byte
     417              : 
     418              :         rangeKeyBuffers
     419              : 
     420              :         // iterConfig holds fields that are used for the construction of the
     421              :         // iterator stack, but do not need to be directly accessed during iteration.
     422              :         // This struct is bundled within the iteratorRangeKeyState struct to reduce
     423              :         // allocations.
     424              :         iterConfig rangekeystack.UserIteratorConfig
     425              : }
     426              : 
     427              : type rangeKeyBuffers struct {
     428              :         // keys is sorted by Suffix ascending.
     429              :         keys []RangeKeyData
     430              :         // buf is used to save range-key data before moving the range-key iterator.
     431              :         // Start and end boundaries, suffixes and values are all copied into buf.
     432              :         buf bytealloc.A
     433              :         // internal holds buffers used by the range key internal iterators.
     434              :         internal rangekeystack.Buffers
     435              : }
     436              : 
     437            1 : func (b *rangeKeyBuffers) PrepareForReuse() {
     438            1 :         const maxKeysReuse = 100
     439            1 :         if len(b.keys) > maxKeysReuse {
     440            0 :                 b.keys = nil
     441            0 :         }
     442              :         // Avoid caching the key buf if it is overly large. The constant is
     443              :         // fairly arbitrary.
     444            1 :         if cap(b.buf) >= maxKeyBufCacheSize {
     445            1 :                 b.buf = nil
     446            1 :         } else {
     447            1 :                 b.buf = b.buf[:0]
     448            1 :         }
     449            1 :         b.internal.PrepareForReuse()
     450              : }
     451              : 
     452              : var iterRangeKeyStateAllocPool = sync.Pool{
     453            1 :         New: func() interface{} {
     454            1 :                 return &iteratorRangeKeyState{}
     455            1 :         },
     456              : }
     457              : 
     458              : // isEphemeralPosition returns true iff the current iterator position is
     459              : // ephemeral, and won't be visited during subsequent relative positioning
     460              : // operations.
     461              : //
     462              : // The iterator position resulting from a SeekGE or SeekPrefixGE that lands on a
     463              : // straddling range key without a coincident point key is such a position.
     464            1 : func (i *Iterator) isEphemeralPosition() bool {
     465            1 :         return i.opts.rangeKeys() && i.rangeKey != nil && i.rangeKey.rangeKeyOnly &&
     466            1 :                 !i.equal(i.rangeKey.start, i.key)
     467            1 : }
     468              : 
     469              : type lastPositioningOpKind int8
     470              : 
     471              : const (
     472              :         unknownLastPositionOp lastPositioningOpKind = iota
     473              :         seekPrefixGELastPositioningOp
     474              :         seekGELastPositioningOp
     475              :         seekLTLastPositioningOp
     476              :         // internalNextOp is a special internal iterator positioning operation used
     477              :         // by CanDeterministicallySingleDelete. It exists for enforcing requirements
     478              :         // around calling CanDeterministicallySingleDelete at most once per external
     479              :         // iterator position.
     480              :         internalNextOp
     481              : )
     482              : 
     483              : // Limited iteration mode. Not for use with prefix iteration.
     484              : //
     485              : // SeekGE, SeekLT, Prev, Next have WithLimit variants, that pause the iterator
     486              : // at the limit in a best-effort manner. The client should behave correctly
     487              : // even if the limits are ignored. These limits are not "deep", in that they
     488              : // are not passed down to the underlying collection of internalIterators. This
     489              : // is because the limits are transient, and apply only until the next
     490              : // iteration call. They serve mainly as a way to bound the amount of work when
     491              : // two (or more) Iterators are being coordinated at a higher level.
     492              : //
     493              : // In limited iteration mode:
     494              : // - Avoid using Iterator.Valid if the last call was to a *WithLimit() method.
     495              : //   The return value from the *WithLimit() method provides a more precise
     496              : //   disposition.
     497              : // - The limit is exclusive for forward and inclusive for reverse.
     498              : //
     499              : //
     500              : // Limited iteration mode & range keys
     501              : //
     502              : // Limited iteration interacts with range-key iteration. When range key
     503              : // iteration is enabled, range keys are interleaved at their start boundaries.
     504              : // Limited iteration must ensure that if a range key exists within the limit,
     505              : // the iterator visits the range key.
     506              : //
     507              : // During forward limited iteration, this is trivial: An overlapping range key
     508              : // must have a start boundary less than the limit, and the range key's start
     509              : // boundary will be interleaved and found to be within the limit.
     510              : //
     511              : // During reverse limited iteration, the tail of the range key may fall within
     512              : // the limit. The range key must be surfaced even if the range key's start
     513              : // boundary is less than the limit, and if there are no point keys between the
     514              : // current iterator position and the limit. To provide this guarantee, reverse
     515              : // limited iteration ignores the limit as long as there is a range key
     516              : // overlapping the iteration position.
     517              : 
     518              : // IterValidityState captures the state of the Iterator.
     519              : type IterValidityState int8
     520              : 
     521              : const (
     522              :         // IterExhausted represents an Iterator that is exhausted.
     523              :         IterExhausted IterValidityState = iota
     524              :         // IterValid represents an Iterator that is valid.
     525              :         IterValid
     526              :         // IterAtLimit represents an Iterator that has a non-exhausted
     527              :         // internalIterator, but has reached a limit without any key for the
     528              :         // caller.
     529              :         IterAtLimit
     530              : )
     531              : 
     532              : // readSampling stores variables used to sample a read to trigger a read
     533              : // compaction
     534              : type readSampling struct {
     535              :         bytesUntilReadSampling uint64
     536              :         initialSamplePassed    bool
     537              :         pendingCompactions     readCompactionQueue
     538              :         // forceReadSampling is used for testing purposes to force a read sample on every
     539              :         // call to Iterator.maybeSampleRead()
     540              :         forceReadSampling bool
     541              : }
     542              : 
     543            1 : func (i *Iterator) findNextEntry(limit []byte) {
     544            1 :         i.iterValidityState = IterExhausted
     545            1 :         i.pos = iterPosCurForward
     546            1 :         if i.opts.rangeKeys() && i.rangeKey != nil {
     547            1 :                 i.rangeKey.rangeKeyOnly = false
     548            1 :         }
     549              : 
     550              :         // Close the closer for the current value if one was open.
     551            1 :         if i.closeValueCloser() != nil {
     552            0 :                 return
     553            0 :         }
     554              : 
     555            1 :         for i.iterKV != nil {
     556            1 :                 key := i.iterKV.K
     557            1 : 
     558            1 :                 // The topLevelIterator.StrictSeekPrefixGE contract requires that in
     559            1 :                 // prefix mode [i.hasPrefix=t], every point key returned by the internal
     560            1 :                 // iterator must have the current iteration prefix.
     561            1 :                 if invariants.Enabled && i.hasPrefix {
     562            1 :                         // Range keys are an exception to the contract and may return a different
     563            1 :                         // prefix. This case is explicitly handled in the switch statement below.
     564            1 :                         if key.Kind() != base.InternalKeyKindRangeKeySet {
     565            1 :                                 if p := i.comparer.Split.Prefix(key.UserKey); !i.equal(i.prefixOrFullSeekKey, p) {
     566            0 :                                         i.opts.logger.Fatalf("pebble: prefix violation: key %q does not have prefix %q\n", key.UserKey, i.prefixOrFullSeekKey)
     567            0 :                                 }
     568              :                         }
     569              :                 }
     570              : 
     571              :                 // Compare with limit every time we start at a different user key.
     572              :                 // Note that given the best-effort contract of limit, we could avoid a
     573              :                 // comparison in the common case by doing this only after
     574              :                 // i.nextUserKey is called for the deletes below. However that makes
     575              :                 // the behavior non-deterministic (since the behavior will vary based
     576              :                 // on what has been compacted), which makes it hard to test with the
     577              :                 // metamorphic test. So we forego that performance optimization.
     578            1 :                 if limit != nil && i.cmp(limit, i.iterKV.K.UserKey) <= 0 {
     579            1 :                         i.iterValidityState = IterAtLimit
     580            1 :                         i.pos = iterPosCurForwardPaused
     581            1 :                         return
     582            1 :                 }
     583              : 
     584              :                 // If the user has configured a SkipPoint function, invoke it to see
     585              :                 // whether we should skip over the current user key.
     586            1 :                 if i.opts.SkipPoint != nil && key.Kind() != InternalKeyKindRangeKeySet && i.opts.SkipPoint(i.iterKV.K.UserKey) {
     587            1 :                         // NB: We could call nextUserKey, but in some cases the SkipPoint
     588            1 :                         // predicate function might be cheaper than nextUserKey's key copy
     589            1 :                         // and key comparison. This should be the case for MVCC suffix
     590            1 :                         // comparisons, for example. In the future, we could expand the
     591            1 :                         // SkipPoint interface to give the implementor more control over
     592            1 :                         // whether we skip over just the internal key, the user key, or even
     593            1 :                         // the key prefix.
     594            1 :                         i.stats.ForwardStepCount[InternalIterCall]++
     595            1 :                         i.iterKV = i.iter.Next()
     596            1 :                         continue
     597              :                 }
     598              : 
     599            1 :                 switch key.Kind() {
     600            1 :                 case InternalKeyKindRangeKeySet:
     601            1 :                         if i.hasPrefix {
     602            1 :                                 if p := i.comparer.Split.Prefix(key.UserKey); !i.equal(i.prefixOrFullSeekKey, p) {
     603            1 :                                         return
     604            1 :                                 }
     605              :                         }
     606              :                         // Save the current key.
     607            1 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
     608            1 :                         i.key = i.keyBuf
     609            1 :                         i.value = base.InternalValue{}
     610            1 :                         // There may also be a live point key at this userkey that we have
     611            1 :                         // not yet read. We need to find the next entry with this user key
     612            1 :                         // to find it. Save the range key so we don't lose it when we Next
     613            1 :                         // the underlying iterator.
     614            1 :                         i.saveRangeKey()
     615            1 :                         pointKeyExists := i.nextPointCurrentUserKey()
     616            1 :                         if i.err != nil {
     617            0 :                                 i.iterValidityState = IterExhausted
     618            0 :                                 return
     619            0 :                         }
     620            1 :                         i.rangeKey.rangeKeyOnly = !pointKeyExists
     621            1 :                         i.iterValidityState = IterValid
     622            1 :                         return
     623              : 
     624            1 :                 case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
     625            1 :                         // NB: treating InternalKeyKindSingleDelete as equivalent to DEL is not
     626            1 :                         // only simpler, but is also necessary for correctness due to
     627            1 :                         // InternalKeyKindSSTableInternalObsoleteBit.
     628            1 :                         i.nextUserKey()
     629            1 :                         continue
     630              : 
     631            1 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete:
     632            1 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
     633            1 :                         i.key = i.keyBuf
     634            1 :                         i.value = i.iterKV.V
     635            1 :                         i.iterValidityState = IterValid
     636            1 :                         i.saveRangeKey()
     637            1 :                         return
     638              : 
     639            1 :                 case InternalKeyKindMerge:
     640            1 :                         // Resolving the merge may advance us to the next point key, which
     641            1 :                         // may be covered by a different set of range keys. Save the range
     642            1 :                         // key state so we don't lose it.
     643            1 :                         i.saveRangeKey()
     644            1 :                         if i.mergeForward(key) {
     645            1 :                                 i.iterValidityState = IterValid
     646            1 :                                 return
     647            1 :                         }
     648              : 
     649              :                         // The merge didn't yield a valid key, either because the value
     650              :                         // merger indicated it should be deleted, or because an error was
     651              :                         // encountered.
     652            1 :                         i.iterValidityState = IterExhausted
     653            1 :                         if i.err != nil {
     654            1 :                                 return
     655            1 :                         }
     656            1 :                         if i.pos != iterPosNext {
     657            0 :                                 i.nextUserKey()
     658            0 :                         }
     659            1 :                         if i.closeValueCloser() != nil {
     660            0 :                                 return
     661            0 :                         }
     662            1 :                         i.pos = iterPosCurForward
     663              : 
     664            0 :                 default:
     665            0 :                         i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
     666            0 :                         i.iterValidityState = IterExhausted
     667            0 :                         return
     668              :                 }
     669              :         }
     670              : 
     671              :         // Is iterKey nil due to an error?
     672            1 :         if err := i.iter.Error(); err != nil {
     673            1 :                 i.err = err
     674            1 :                 i.iterValidityState = IterExhausted
     675            1 :         }
     676              : }
     677              : 
     678            1 : func (i *Iterator) nextPointCurrentUserKey() bool {
     679            1 :         // If the user has configured a SkipPoint function and the current user key
     680            1 :         // would be skipped by it, there's no need to step forward looking for a
     681            1 :         // point key. If we were to find one, it should be skipped anyways.
     682            1 :         if i.opts.SkipPoint != nil && i.opts.SkipPoint(i.key) {
     683            1 :                 return false
     684            1 :         }
     685              : 
     686            1 :         i.pos = iterPosCurForward
     687            1 : 
     688            1 :         i.iterKV = i.iter.Next()
     689            1 :         i.stats.ForwardStepCount[InternalIterCall]++
     690            1 :         if i.iterKV == nil {
     691            1 :                 if err := i.iter.Error(); err != nil {
     692            0 :                         i.err = err
     693            1 :                 } else {
     694            1 :                         i.pos = iterPosNext
     695            1 :                 }
     696            1 :                 return false
     697              :         }
     698            1 :         if !i.equal(i.key, i.iterKV.K.UserKey) {
     699            1 :                 i.pos = iterPosNext
     700            1 :                 return false
     701            1 :         }
     702              : 
     703            1 :         key := i.iterKV.K
     704            1 :         switch key.Kind() {
     705            0 :         case InternalKeyKindRangeKeySet:
     706            0 :                 // RangeKeySets must always be interleaved as the first internal key
     707            0 :                 // for a user key.
     708            0 :                 i.err = base.CorruptionErrorf("pebble: unexpected range key set mid-user key")
     709            0 :                 return false
     710              : 
     711            1 :         case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
     712            1 :                 // NB: treating InternalKeyKindSingleDelete as equivalent to DEL is not
     713            1 :                 // only simpler, but is also necessary for correctness due to
     714            1 :                 // InternalKeyKindSSTableInternalObsoleteBit.
     715            1 :                 return false
     716              : 
     717            1 :         case InternalKeyKindSet, InternalKeyKindSetWithDelete:
     718            1 :                 i.value = i.iterKV.V
     719            1 :                 return true
     720              : 
     721            1 :         case InternalKeyKindMerge:
     722            1 :                 return i.mergeForward(key)
     723              : 
     724            0 :         default:
     725            0 :                 i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
     726            0 :                 return false
     727              :         }
     728              : }
     729              : 
     730              : // mergeForward resolves a MERGE key, advancing the underlying iterator forward
     731              : // to merge with subsequent keys with the same userkey. mergeForward returns a
     732              : // boolean indicating whether or not the merge yielded a valid key. A merge may
     733              : // not yield a valid key if an error occurred, in which case i.err is non-nil,
     734              : // or the user's value merger specified the key to be deleted.
     735              : //
     736              : // mergeForward does not update iterValidityState.
     737            1 : func (i *Iterator) mergeForward(key base.InternalKey) (valid bool) {
     738            1 :         var iterValue []byte
     739            1 :         iterValue, _, i.err = i.iterKV.Value(nil)
     740            1 :         if i.err != nil {
     741            0 :                 return false
     742            0 :         }
     743            1 :         var valueMerger ValueMerger
     744            1 :         valueMerger, i.err = i.merge(key.UserKey, iterValue)
     745            1 :         if i.err != nil {
     746            0 :                 return false
     747            0 :         }
     748              : 
     749            1 :         i.mergeNext(key, valueMerger)
     750            1 :         if i.err != nil {
     751            1 :                 return false
     752            1 :         }
     753              : 
     754            1 :         var needDelete bool
     755            1 :         var value []byte
     756            1 :         value, needDelete, i.valueCloser, i.err = finishValueMerger(
     757            1 :                 valueMerger, true /* includesBase */)
     758            1 :         i.value = base.MakeInPlaceValue(value)
     759            1 :         if i.err != nil {
     760            0 :                 return false
     761            0 :         }
     762            1 :         if needDelete {
     763            1 :                 _ = i.closeValueCloser()
     764            1 :                 return false
     765            1 :         }
     766            1 :         return true
     767              : }
     768              : 
     769            1 : func (i *Iterator) closeValueCloser() error {
     770            1 :         if i.valueCloser != nil {
     771            0 :                 i.err = i.valueCloser.Close()
     772            0 :                 i.valueCloser = nil
     773            0 :         }
     774            1 :         return i.err
     775              : }
     776              : 
     777            1 : func (i *Iterator) nextUserKey() {
     778            1 :         if i.iterKV == nil {
     779            1 :                 return
     780            1 :         }
     781            1 :         trailer := i.iterKV.K.Trailer
     782            1 :         done := i.iterKV.K.Trailer <= base.InternalKeyZeroSeqnumMaxTrailer
     783            1 :         if i.iterValidityState != IterValid {
     784            1 :                 i.keyBuf = append(i.keyBuf[:0], i.iterKV.K.UserKey...)
     785            1 :                 i.key = i.keyBuf
     786            1 :         }
     787            1 :         for {
     788            1 :                 i.stats.ForwardStepCount[InternalIterCall]++
     789            1 :                 i.iterKV = i.iter.Next()
     790            1 :                 if i.iterKV == nil {
     791            1 :                         if err := i.iter.Error(); err != nil {
     792            1 :                                 i.err = err
     793            1 :                                 return
     794            1 :                         }
     795              :                 }
     796              :                 // NB: We're guaranteed to be on the next user key if the previous key
     797              :                 // had a zero sequence number (`done`), or the new key has a trailer
     798              :                 // greater or equal to the previous key's trailer. This is true because
     799              :                 // internal keys with the same user key are sorted by InternalKeyTrailer in
     800              :                 // strictly monotonically descending order. We expect the trailer
     801              :                 // optimization to trigger around 50% of the time with randomly
     802              :                 // distributed writes. We expect it to trigger very frequently when
     803              :                 // iterating through ingested sstables, which contain keys that all have
     804              :                 // the same sequence number.
     805            1 :                 if done || i.iterKV == nil || i.iterKV.K.Trailer >= trailer {
     806            1 :                         break
     807              :                 }
     808            1 :                 if !i.equal(i.key, i.iterKV.K.UserKey) {
     809            1 :                         break
     810              :                 }
     811            1 :                 done = i.iterKV.K.Trailer <= base.InternalKeyZeroSeqnumMaxTrailer
     812            1 :                 trailer = i.iterKV.K.Trailer
     813              :         }
     814              : }
     815              : 
     816            1 : func (i *Iterator) maybeSampleRead() {
     817            1 :         // This method is only called when a public method of Iterator is
     818            1 :         // returning, and below we exclude the case were the iterator is paused at
     819            1 :         // a limit. The effect of these choices is that keys that are deleted, but
     820            1 :         // are encountered during iteration, are not accounted for in the read
     821            1 :         // sampling and will not cause read driven compactions, even though we are
     822            1 :         // incurring cost in iterating over them. And this issue is not limited to
     823            1 :         // Iterator, which does not see the effect of range deletes, which may be
     824            1 :         // causing iteration work in mergingIter. It is not clear at this time
     825            1 :         // whether this is a deficiency worth addressing.
     826            1 :         if i.iterValidityState != IterValid {
     827            1 :                 return
     828            1 :         }
     829            1 :         if i.readState == nil {
     830            1 :                 return
     831            1 :         }
     832            1 :         if i.readSampling.forceReadSampling {
     833            1 :                 i.sampleRead()
     834            1 :                 return
     835            1 :         }
     836            1 :         samplingPeriod := int32(int64(readBytesPeriod) * i.readState.db.opts.Experimental.ReadSamplingMultiplier)
     837            1 :         if samplingPeriod <= 0 {
     838            0 :                 return
     839            0 :         }
     840            1 :         bytesRead := uint64(len(i.key) + i.value.Len())
     841            1 :         for i.readSampling.bytesUntilReadSampling < bytesRead {
     842            1 :                 i.readSampling.bytesUntilReadSampling += uint64(rand.Uint32N(2 * uint32(samplingPeriod)))
     843            1 :                 // The block below tries to adjust for the case where this is the
     844            1 :                 // first read in a newly-opened iterator. As bytesUntilReadSampling
     845            1 :                 // starts off at zero, we don't want to sample the first read of
     846            1 :                 // every newly-opened iterator, but we do want to sample some of them.
     847            1 :                 if !i.readSampling.initialSamplePassed {
     848            1 :                         i.readSampling.initialSamplePassed = true
     849            1 :                         if i.readSampling.bytesUntilReadSampling > bytesRead {
     850            1 :                                 if rand.Uint64N(i.readSampling.bytesUntilReadSampling) > bytesRead {
     851            1 :                                         continue
     852              :                                 }
     853              :                         }
     854              :                 }
     855            1 :                 i.sampleRead()
     856              :         }
     857            1 :         i.readSampling.bytesUntilReadSampling -= bytesRead
     858              : }
     859              : 
     860            1 : func (i *Iterator) sampleRead() {
     861            1 :         var topFile *manifest.TableMetadata
     862            1 :         topLevel, numOverlappingLevels := numLevels, 0
     863            1 :         mi := i.merging
     864            1 :         if mi == nil {
     865            1 :                 return
     866            1 :         }
     867            1 :         if len(mi.levels) > 1 {
     868            1 :                 mi.ForEachLevelIter(func(li *levelIter) (done bool) {
     869            1 :                         if li.layer.IsFlushableIngests() {
     870            0 :                                 return false
     871            0 :                         }
     872            1 :                         l := li.layer.Level()
     873            1 :                         if f := li.iterFile; f != nil {
     874            1 :                                 var containsKey bool
     875            1 :                                 if i.pos == iterPosNext || i.pos == iterPosCurForward ||
     876            1 :                                         i.pos == iterPosCurForwardPaused {
     877            1 :                                         containsKey = i.cmp(f.PointKeyBounds.SmallestUserKey(), i.key) <= 0
     878            1 :                                 } else if i.pos == iterPosPrev || i.pos == iterPosCurReverse ||
     879            1 :                                         i.pos == iterPosCurReversePaused {
     880            1 :                                         containsKey = i.cmp(f.PointKeyBounds.LargestUserKey(), i.key) >= 0
     881            1 :                                 }
     882              :                                 // Do nothing if the current key is not contained in f's
     883              :                                 // bounds. We could seek the LevelIterator at this level
     884              :                                 // to find the right file, but the performance impacts of
     885              :                                 // doing that are significant enough to negate the benefits
     886              :                                 // of read sampling in the first place. See the discussion
     887              :                                 // at:
     888              :                                 // https://github.com/cockroachdb/pebble/pull/1041#issuecomment-763226492
     889            1 :                                 if containsKey {
     890            1 :                                         numOverlappingLevels++
     891            1 :                                         if numOverlappingLevels >= 2 {
     892            1 :                                                 // Terminate the loop early if at least 2 overlapping levels are found.
     893            1 :                                                 return true
     894            1 :                                         }
     895            1 :                                         topLevel = l
     896            1 :                                         topFile = f
     897              :                                 }
     898              :                         }
     899            1 :                         return false
     900              :                 })
     901              :         }
     902            1 :         if topFile == nil || topLevel >= numLevels {
     903            1 :                 return
     904            1 :         }
     905            1 :         if numOverlappingLevels >= 2 {
     906            1 :                 allowedSeeks := topFile.AllowedSeeks.Add(-1)
     907            1 :                 if allowedSeeks == 0 {
     908            1 : 
     909            1 :                         // Since the compaction queue can handle duplicates, we can keep
     910            1 :                         // adding to the queue even once allowedSeeks hits 0.
     911            1 :                         // In fact, we NEED to keep adding to the queue, because the queue
     912            1 :                         // is small and evicts older and possibly useful compactions.
     913            1 :                         topFile.AllowedSeeks.Add(topFile.InitAllowedSeeks)
     914            1 : 
     915            1 :                         read := readCompaction{
     916            1 :                                 start:    topFile.PointKeyBounds.SmallestUserKey(),
     917            1 :                                 end:      topFile.PointKeyBounds.LargestUserKey(),
     918            1 :                                 level:    topLevel,
     919            1 :                                 tableNum: topFile.TableNum,
     920            1 :                         }
     921            1 :                         i.readSampling.pendingCompactions.add(&read, i.cmp)
     922            1 :                 }
     923              :         }
     924              : }
     925              : 
     926            1 : func (i *Iterator) findPrevEntry(limit []byte) {
     927            1 :         i.iterValidityState = IterExhausted
     928            1 :         i.pos = iterPosCurReverse
     929            1 :         if i.opts.rangeKeys() && i.rangeKey != nil {
     930            1 :                 i.rangeKey.rangeKeyOnly = false
     931            1 :         }
     932              : 
     933              :         // Close the closer for the current value if one was open.
     934            1 :         if i.valueCloser != nil {
     935            0 :                 i.err = i.valueCloser.Close()
     936            0 :                 i.valueCloser = nil
     937            0 :                 if i.err != nil {
     938            0 :                         i.iterValidityState = IterExhausted
     939            0 :                         return
     940            0 :                 }
     941              :         }
     942              : 
     943            1 :         var valueMerger ValueMerger
     944            1 :         firstLoopIter := true
     945            1 :         rangeKeyBoundary := false
     946            1 :         // The code below compares with limit in multiple places. As documented in
     947            1 :         // findNextEntry, this is being done to make the behavior of limit
     948            1 :         // deterministic to allow for metamorphic testing. It is not required by
     949            1 :         // the best-effort contract of limit.
     950            1 :         for i.iterKV != nil {
     951            1 :                 key := i.iterKV.K
     952            1 : 
     953            1 :                 // NB: We cannot pause if the current key is covered by a range key.
     954            1 :                 // Otherwise, the user might not ever learn of a range key that covers
     955            1 :                 // the key space being iterated over in which there are no point keys.
     956            1 :                 // Since limits are best effort, ignoring the limit in this case is
     957            1 :                 // allowed by the contract of limit.
     958            1 :                 if firstLoopIter && limit != nil && i.cmp(limit, i.iterKV.K.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) {
     959            1 :                         i.iterValidityState = IterAtLimit
     960            1 :                         i.pos = iterPosCurReversePaused
     961            1 :                         return
     962            1 :                 }
     963            1 :                 firstLoopIter = false
     964            1 : 
     965            1 :                 if i.iterValidityState == IterValid {
     966            1 :                         if !i.equal(key.UserKey, i.key) {
     967            1 :                                 // We've iterated to the previous user key.
     968            1 :                                 i.pos = iterPosPrev
     969            1 :                                 if valueMerger != nil {
     970            1 :                                         var needDelete bool
     971            1 :                                         var value []byte
     972            1 :                                         value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, true /* includesBase */)
     973            1 :                                         i.value = base.MakeInPlaceValue(value)
     974            1 :                                         if i.err == nil && needDelete {
     975            1 :                                                 // The point key at this key is deleted. If we also have
     976            1 :                                                 // a range key boundary at this key, we still want to
     977            1 :                                                 // return. Otherwise, we need to continue looking for
     978            1 :                                                 // a live key.
     979            1 :                                                 i.value = base.InternalValue{}
     980            1 :                                                 if rangeKeyBoundary {
     981            0 :                                                         i.rangeKey.rangeKeyOnly = true
     982            1 :                                                 } else {
     983            1 :                                                         i.iterValidityState = IterExhausted
     984            1 :                                                         if i.closeValueCloser() == nil {
     985            1 :                                                                 continue
     986              :                                                         }
     987              :                                                 }
     988              :                                         }
     989              :                                 }
     990            1 :                                 if i.err != nil {
     991            0 :                                         i.iterValidityState = IterExhausted
     992            0 :                                 }
     993            1 :                                 return
     994              :                         }
     995              :                 }
     996              : 
     997              :                 // If the user has configured a SkipPoint function, invoke it to see
     998              :                 // whether we should skip over the current user key.
     999            1 :                 if i.opts.SkipPoint != nil && key.Kind() != InternalKeyKindRangeKeySet && i.opts.SkipPoint(key.UserKey) {
    1000            1 :                         // NB: We could call prevUserKey, but in some cases the SkipPoint
    1001            1 :                         // predicate function might be cheaper than prevUserKey's key copy
    1002            1 :                         // and key comparison. This should be the case for MVCC suffix
    1003            1 :                         // comparisons, for example. In the future, we could expand the
    1004            1 :                         // SkipPoint interface to give the implementor more control over
    1005            1 :                         // whether we skip over just the internal key, the user key, or even
    1006            1 :                         // the key prefix.
    1007            1 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1008            1 :                         i.iterKV = i.iter.Prev()
    1009            1 :                         if i.iterKV == nil {
    1010            1 :                                 if err := i.iter.Error(); err != nil {
    1011            0 :                                         i.err = err
    1012            0 :                                         i.iterValidityState = IterExhausted
    1013            0 :                                         return
    1014            0 :                                 }
    1015              :                         }
    1016            1 :                         if limit != nil && i.iterKV != nil && i.cmp(limit, i.iterKV.K.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) {
    1017            1 :                                 i.iterValidityState = IterAtLimit
    1018            1 :                                 i.pos = iterPosCurReversePaused
    1019            1 :                                 return
    1020            1 :                         }
    1021            1 :                         continue
    1022              :                 }
    1023              : 
    1024            1 :                 switch key.Kind() {
    1025            1 :                 case InternalKeyKindRangeKeySet:
    1026            1 :                         // Range key start boundary markers are interleaved with the maximum
    1027            1 :                         // sequence number, so if there's a point key also at this key, we
    1028            1 :                         // must've already iterated over it.
    1029            1 :                         // This is the final entry at this user key, so we may return
    1030            1 :                         i.rangeKey.rangeKeyOnly = i.iterValidityState != IterValid
    1031            1 :                         if i.rangeKey.rangeKeyOnly {
    1032            1 :                                 // The point iterator is now invalid, so clear the point value.
    1033            1 :                                 i.value = base.InternalValue{}
    1034            1 :                         }
    1035            1 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1036            1 :                         i.key = i.keyBuf
    1037            1 :                         i.iterValidityState = IterValid
    1038            1 :                         i.saveRangeKey()
    1039            1 :                         // In all other cases, previous iteration requires advancing to
    1040            1 :                         // iterPosPrev in order to determine if the key is live and
    1041            1 :                         // unshadowed by another key at the same user key. In this case,
    1042            1 :                         // because range key start boundary markers are always interleaved
    1043            1 :                         // at the maximum sequence number, we know that there aren't any
    1044            1 :                         // additional keys with the same user key in the backward direction.
    1045            1 :                         //
    1046            1 :                         // We Prev the underlying iterator once anyways for consistency, so
    1047            1 :                         // that we can maintain the invariant during backward iteration that
    1048            1 :                         // i.iterPos = iterPosPrev.
    1049            1 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1050            1 :                         i.iterKV = i.iter.Prev()
    1051            1 : 
    1052            1 :                         // Set rangeKeyBoundary so that on the next iteration, we know to
    1053            1 :                         // return the key even if the MERGE point key is deleted.
    1054            1 :                         rangeKeyBoundary = true
    1055              : 
    1056            1 :                 case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
    1057            1 :                         i.value = base.InternalValue{}
    1058            1 :                         i.iterValidityState = IterExhausted
    1059            1 :                         valueMerger = nil
    1060            1 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1061            1 :                         i.iterKV = i.iter.Prev()
    1062            1 :                         // Compare with the limit. We could optimize by only checking when
    1063            1 :                         // we step to the previous user key, but detecting that requires a
    1064            1 :                         // comparison too. Note that this position may already passed a
    1065            1 :                         // number of versions of this user key, but they are all deleted, so
    1066            1 :                         // the fact that a subsequent Prev*() call will not see them is
    1067            1 :                         // harmless. Also note that this is the only place in the loop,
    1068            1 :                         // other than the firstLoopIter and SkipPoint cases above, where we
    1069            1 :                         // could step to a different user key and start processing it for
    1070            1 :                         // returning to the caller.
    1071            1 :                         if limit != nil && i.iterKV != nil && i.cmp(limit, i.iterKV.K.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) {
    1072            1 :                                 i.iterValidityState = IterAtLimit
    1073            1 :                                 i.pos = iterPosCurReversePaused
    1074            1 :                                 return
    1075            1 :                         }
    1076            1 :                         continue
    1077              : 
    1078            1 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete:
    1079            1 :                         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1080            1 :                         i.key = i.keyBuf
    1081            1 :                         // iterValue is owned by i.iter and could change after the Prev()
    1082            1 :                         // call, so use valueBuf instead. Note that valueBuf is only used
    1083            1 :                         // in this one instance; everywhere else (eg. in findNextEntry),
    1084            1 :                         // we just point i.value to the unsafe i.iter-owned value buffer.
    1085            1 :                         i.value, i.valueBuf = i.iterKV.V.Clone(i.valueBuf[:0], &i.fetcher)
    1086            1 :                         i.saveRangeKey()
    1087            1 :                         i.iterValidityState = IterValid
    1088            1 :                         i.iterKV = i.iter.Prev()
    1089            1 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1090            1 :                         valueMerger = nil
    1091            1 :                         continue
    1092              : 
    1093            1 :                 case InternalKeyKindMerge:
    1094            1 :                         if i.iterValidityState == IterExhausted {
    1095            1 :                                 i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1096            1 :                                 i.key = i.keyBuf
    1097            1 :                                 i.saveRangeKey()
    1098            1 :                                 var iterValue []byte
    1099            1 :                                 iterValue, _, i.err = i.iterKV.Value(nil)
    1100            1 :                                 if i.err != nil {
    1101            0 :                                         return
    1102            0 :                                 }
    1103            1 :                                 valueMerger, i.err = i.merge(i.key, iterValue)
    1104            1 :                                 if i.err != nil {
    1105            0 :                                         return
    1106            0 :                                 }
    1107            1 :                                 i.iterValidityState = IterValid
    1108            1 :                         } else if valueMerger == nil {
    1109            1 :                                 // Extract value before iterValue since we use value before iterValue
    1110            1 :                                 // and the underlying iterator is not required to provide backing
    1111            1 :                                 // memory for both simultaneously.
    1112            1 :                                 var value []byte
    1113            1 :                                 var callerOwned bool
    1114            1 :                                 value, callerOwned, i.err = i.value.Value(i.lazyValueBuf)
    1115            1 :                                 if callerOwned {
    1116            0 :                                         i.lazyValueBuf = value[:0]
    1117            0 :                                 }
    1118            1 :                                 if i.err != nil {
    1119            0 :                                         i.iterValidityState = IterExhausted
    1120            0 :                                         return
    1121            0 :                                 }
    1122            1 :                                 valueMerger, i.err = i.merge(i.key, value)
    1123            1 :                                 var iterValue []byte
    1124            1 :                                 iterValue, _, i.err = i.iterKV.Value(nil)
    1125            1 :                                 if i.err != nil {
    1126            0 :                                         i.iterValidityState = IterExhausted
    1127            0 :                                         return
    1128            0 :                                 }
    1129            1 :                                 if i.err == nil {
    1130            1 :                                         i.err = valueMerger.MergeNewer(iterValue)
    1131            1 :                                 }
    1132            1 :                                 if i.err != nil {
    1133            0 :                                         i.iterValidityState = IterExhausted
    1134            0 :                                         return
    1135            0 :                                 }
    1136            1 :                         } else {
    1137            1 :                                 var iterValue []byte
    1138            1 :                                 iterValue, _, i.err = i.iterKV.Value(nil)
    1139            1 :                                 if i.err != nil {
    1140            0 :                                         i.iterValidityState = IterExhausted
    1141            0 :                                         return
    1142            0 :                                 }
    1143            1 :                                 i.err = valueMerger.MergeNewer(iterValue)
    1144            1 :                                 if i.err != nil {
    1145            0 :                                         i.iterValidityState = IterExhausted
    1146            0 :                                         return
    1147            0 :                                 }
    1148              :                         }
    1149            1 :                         i.iterKV = i.iter.Prev()
    1150            1 :                         i.stats.ReverseStepCount[InternalIterCall]++
    1151            1 :                         continue
    1152              : 
    1153            0 :                 default:
    1154            0 :                         i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
    1155            0 :                         i.iterValidityState = IterExhausted
    1156            0 :                         return
    1157              :                 }
    1158              :         }
    1159              :         // i.iterKV == nil, so broke out of the preceding loop.
    1160              : 
    1161              :         // Is iterKey nil due to an error?
    1162            1 :         if i.err = i.iter.Error(); i.err != nil {
    1163            1 :                 i.iterValidityState = IterExhausted
    1164            1 :                 return
    1165            1 :         }
    1166              : 
    1167            1 :         if i.iterValidityState == IterValid {
    1168            1 :                 i.pos = iterPosPrev
    1169            1 :                 if valueMerger != nil {
    1170            1 :                         var needDelete bool
    1171            1 :                         var value []byte
    1172            1 :                         value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, true /* includesBase */)
    1173            1 :                         i.value = base.MakeInPlaceValue(value)
    1174            1 :                         if i.err == nil && needDelete {
    1175            1 :                                 i.key = nil
    1176            1 :                                 i.value = base.InternalValue{}
    1177            1 :                                 i.iterValidityState = IterExhausted
    1178            1 :                         }
    1179              :                 }
    1180            1 :                 if i.err != nil {
    1181            0 :                         i.iterValidityState = IterExhausted
    1182            0 :                 }
    1183              :         }
    1184              : }
    1185              : 
    1186            1 : func (i *Iterator) prevUserKey() {
    1187            1 :         if i.iterKV == nil {
    1188            1 :                 return
    1189            1 :         }
    1190            1 :         if i.iterValidityState != IterValid {
    1191            1 :                 // If we're going to compare against the prev key, we need to save the
    1192            1 :                 // current key.
    1193            1 :                 i.keyBuf = append(i.keyBuf[:0], i.iterKV.K.UserKey...)
    1194            1 :                 i.key = i.keyBuf
    1195            1 :         }
    1196            1 :         for {
    1197            1 :                 i.iterKV = i.iter.Prev()
    1198            1 :                 i.stats.ReverseStepCount[InternalIterCall]++
    1199            1 :                 if i.iterKV == nil {
    1200            1 :                         if err := i.iter.Error(); err != nil {
    1201            0 :                                 i.err = err
    1202            0 :                                 i.iterValidityState = IterExhausted
    1203            0 :                         }
    1204            1 :                         break
    1205              :                 }
    1206            1 :                 if !i.equal(i.key, i.iterKV.K.UserKey) {
    1207            1 :                         break
    1208              :                 }
    1209              :         }
    1210              : }
    1211              : 
    1212            1 : func (i *Iterator) mergeNext(key InternalKey, valueMerger ValueMerger) {
    1213            1 :         // Save the current key.
    1214            1 :         i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
    1215            1 :         i.key = i.keyBuf
    1216            1 : 
    1217            1 :         // Loop looking for older values for this key and merging them.
    1218            1 :         for {
    1219            1 :                 i.iterKV = i.iter.Next()
    1220            1 :                 i.stats.ForwardStepCount[InternalIterCall]++
    1221            1 :                 if i.iterKV == nil {
    1222            1 :                         if i.err = i.iter.Error(); i.err != nil {
    1223            1 :                                 return
    1224            1 :                         }
    1225            1 :                         i.pos = iterPosNext
    1226            1 :                         return
    1227              :                 }
    1228            1 :                 key = i.iterKV.K
    1229            1 :                 if !i.equal(i.key, key.UserKey) {
    1230            1 :                         // We've advanced to the next key.
    1231            1 :                         i.pos = iterPosNext
    1232            1 :                         return
    1233            1 :                 }
    1234            1 :                 switch key.Kind() {
    1235            1 :                 case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized:
    1236            1 :                         // We've hit a deletion tombstone. Return everything up to this
    1237            1 :                         // point.
    1238            1 :                         //
    1239            1 :                         // NB: treating InternalKeyKindSingleDelete as equivalent to DEL is not
    1240            1 :                         // only simpler, but is also necessary for correctness due to
    1241            1 :                         // InternalKeyKindSSTableInternalObsoleteBit.
    1242            1 :                         return
    1243              : 
    1244            1 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete:
    1245            1 :                         // We've hit a Set value. Merge with the existing value and return.
    1246            1 :                         var iterValue []byte
    1247            1 :                         iterValue, _, i.err = i.iterKV.Value(nil)
    1248            1 :                         if i.err != nil {
    1249            0 :                                 return
    1250            0 :                         }
    1251            1 :                         i.err = valueMerger.MergeOlder(iterValue)
    1252            1 :                         return
    1253              : 
    1254            1 :                 case InternalKeyKindMerge:
    1255            1 :                         // We've hit another Merge value. Merge with the existing value and
    1256            1 :                         // continue looping.
    1257            1 :                         var iterValue []byte
    1258            1 :                         iterValue, _, i.err = i.iterKV.Value(nil)
    1259            1 :                         if i.err != nil {
    1260            0 :                                 return
    1261            0 :                         }
    1262            1 :                         i.err = valueMerger.MergeOlder(iterValue)
    1263            1 :                         if i.err != nil {
    1264            0 :                                 return
    1265            0 :                         }
    1266            1 :                         continue
    1267              : 
    1268            0 :                 case InternalKeyKindRangeKeySet:
    1269            0 :                         // The RANGEKEYSET marker must sort before a MERGE at the same user key.
    1270            0 :                         i.err = base.CorruptionErrorf("pebble: out of order range key marker")
    1271            0 :                         return
    1272              : 
    1273            0 :                 default:
    1274            0 :                         i.err = base.CorruptionErrorf("pebble: invalid internal key kind: %d", errors.Safe(key.Kind()))
    1275            0 :                         return
    1276              :                 }
    1277              :         }
    1278              : }
    1279              : 
    1280              : // SeekGE moves the iterator to the first key/value pair whose key is greater
    1281              : // than or equal to the given key. Returns true if the iterator is pointing at
    1282              : // a valid entry and false otherwise.
    1283            1 : func (i *Iterator) SeekGE(key []byte) bool {
    1284            1 :         return i.SeekGEWithLimit(key, nil) == IterValid
    1285            1 : }
    1286              : 
    1287              : // SeekGEWithLimit moves the iterator to the first key/value pair whose key is
    1288              : // greater than or equal to the given key.
    1289              : //
    1290              : // If limit is provided, it serves as a best-effort exclusive limit. If the
    1291              : // first key greater than or equal to the given search key is also greater than
    1292              : // or equal to limit, the Iterator may pause and return IterAtLimit. Because
    1293              : // limits are best-effort, SeekGEWithLimit may return a key beyond limit.
    1294              : //
    1295              : // If the Iterator is configured to iterate over range keys, SeekGEWithLimit
    1296              : // guarantees it will surface any range keys with bounds overlapping the
    1297              : // keyspace [key, limit).
    1298            1 : func (i *Iterator) SeekGEWithLimit(key []byte, limit []byte) IterValidityState {
    1299            1 :         if i.rangeKey != nil {
    1300            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1301            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1302            1 :                 // If we have a range key but did not expose it at the previous iterator
    1303            1 :                 // position (because the iterator was not at a valid position), updated
    1304            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1305            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1306            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1307            1 :                 //   - SeekGE(...)        → (IterValid, RangeBounds() = [a,b))
    1308            1 :                 // the iterator returns RangeKeyChanged()=true.
    1309            1 :                 //
    1310            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1311            1 :                 // the iterator moves into a new range key, or out of the current range
    1312            1 :                 // key.
    1313            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1314            1 :         }
    1315            1 :         lastPositioningOp := i.lastPositioningOp
    1316            1 :         // Set it to unknown, since this operation may not succeed, in which case
    1317            1 :         // the SeekGE following this should not make any assumption about iterator
    1318            1 :         // position.
    1319            1 :         i.lastPositioningOp = unknownLastPositionOp
    1320            1 :         i.requiresReposition = false
    1321            1 :         i.err = nil // clear cached iteration error
    1322            1 :         i.hasPrefix = false
    1323            1 :         i.stats.ForwardSeekCount[InterfaceCall]++
    1324            1 :         if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
    1325            1 :                 key = lowerBound
    1326            1 :         } else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
    1327            1 :                 key = upperBound
    1328            1 :         }
    1329            1 :         seekInternalIter := true
    1330            1 : 
    1331            1 :         var flags base.SeekGEFlags
    1332            1 :         if i.batchJustRefreshed {
    1333            1 :                 i.batchJustRefreshed = false
    1334            1 :                 flags = flags.EnableBatchJustRefreshed()
    1335            1 :         }
    1336            1 :         if lastPositioningOp == seekGELastPositioningOp {
    1337            1 :                 cmp := i.cmp(i.prefixOrFullSeekKey, key)
    1338            1 :                 // If this seek is to the same or later key, and the iterator is
    1339            1 :                 // already positioned there, this is a noop. This can be helpful for
    1340            1 :                 // sparse key spaces that have many deleted keys, where one can avoid
    1341            1 :                 // the overhead of iterating past them again and again.
    1342            1 :                 if cmp <= 0 {
    1343            1 :                         if !flags.BatchJustRefreshed() &&
    1344            1 :                                 (i.iterValidityState == IterExhausted ||
    1345            1 :                                         (i.iterValidityState == IterValid && i.cmp(key, i.key) <= 0 &&
    1346            1 :                                                 (limit == nil || i.cmp(i.key, limit) < 0))) {
    1347            1 :                                 // Noop
    1348            1 :                                 if i.forceEnableSeekOpt || !testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) {
    1349            1 :                                         i.lastPositioningOp = seekGELastPositioningOp
    1350            1 :                                         return i.iterValidityState
    1351            1 :                                 }
    1352              :                         }
    1353              :                         // cmp == 0 is not safe to optimize since
    1354              :                         // - i.pos could be at iterPosNext, due to a merge.
    1355              :                         // - Even if i.pos were at iterPosCurForward, we could have a DELETE,
    1356              :                         //   SET pair for a key, and the iterator would have moved past DELETE
    1357              :                         //   but stayed at iterPosCurForward. A similar situation occurs for a
    1358              :                         //   MERGE, SET pair where the MERGE is consumed and the iterator is
    1359              :                         //   at the SET.
    1360              :                         // We also leverage the IterAtLimit <=> i.pos invariant defined in the
    1361              :                         // comment on iterValidityState, to exclude any cases where i.pos
    1362              :                         // is iterPosCur{Forward,Reverse}Paused. This avoids the need to
    1363              :                         // special-case those iterator positions and their interactions with
    1364              :                         // TrySeekUsingNext, as the main uses for TrySeekUsingNext in CockroachDB
    1365              :                         // do not use limited Seeks in the first place.
    1366            1 :                         if cmp < 0 && i.iterValidityState != IterAtLimit && limit == nil {
    1367            1 :                                 flags = flags.EnableTrySeekUsingNext()
    1368            1 :                         }
    1369            1 :                         if testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) && !i.forceEnableSeekOpt {
    1370            1 :                                 flags = flags.DisableTrySeekUsingNext()
    1371            1 :                         }
    1372            1 :                         if !flags.BatchJustRefreshed() && i.pos == iterPosCurForwardPaused && i.cmp(key, i.iterKV.K.UserKey) <= 0 {
    1373            1 :                                 // Have some work to do, but don't need to seek, and we can
    1374            1 :                                 // start doing findNextEntry from i.iterKey.
    1375            1 :                                 seekInternalIter = false
    1376            1 :                         }
    1377              :                 }
    1378              :         }
    1379            1 :         if seekInternalIter {
    1380            1 :                 i.iterKV = i.iter.SeekGE(key, flags)
    1381            1 :                 i.stats.ForwardSeekCount[InternalIterCall]++
    1382            1 :                 if err := i.iter.Error(); err != nil {
    1383            1 :                         i.err = err
    1384            1 :                         i.iterValidityState = IterExhausted
    1385            1 :                         return i.iterValidityState
    1386            1 :                 }
    1387              :         }
    1388            1 :         i.findNextEntry(limit)
    1389            1 :         i.maybeSampleRead()
    1390            1 :         if i.Error() == nil {
    1391            1 :                 // Prepare state for a future noop optimization.
    1392            1 :                 i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], key...)
    1393            1 :                 i.lastPositioningOp = seekGELastPositioningOp
    1394            1 :         }
    1395            1 :         return i.iterValidityState
    1396              : }
    1397              : 
    1398              : // SeekPrefixGE moves the iterator to the first key/value pair whose key is
    1399              : // greater than or equal to the given key and which has the same "prefix" as
    1400              : // the given key. The prefix for a key is determined by the user-defined
    1401              : // Comparer.Split function. The iterator will not observe keys not matching the
    1402              : // "prefix" of the search key. Calling SeekPrefixGE puts the iterator in prefix
    1403              : // iteration mode. The iterator remains in prefix iteration until a subsequent
    1404              : // call to another absolute positioning method (SeekGE, SeekLT, First,
    1405              : // Last). Reverse iteration (Prev) is not supported when an iterator is in
    1406              : // prefix iteration mode. Returns true if the iterator is pointing at a valid
    1407              : // entry and false otherwise.
    1408              : //
    1409              : // The semantics of SeekPrefixGE are slightly unusual and designed for
    1410              : // iteration to be able to take advantage of bloom filters that have been
    1411              : // created on the "prefix". If you're not using bloom filters, there is no
    1412              : // reason to use SeekPrefixGE.
    1413              : //
    1414              : // An example Split function may separate a timestamp suffix from the prefix of
    1415              : // the key.
    1416              : //
    1417              : //      Split(<key>@<timestamp>) -> <key>
    1418              : //
    1419              : // Consider the keys "a@1", "a@2", "aa@3", "aa@4". The prefixes for these keys
    1420              : // are "a", and "aa". Note that despite "a" and "aa" sharing a prefix by the
    1421              : // usual definition, those prefixes differ by the definition of the Split
    1422              : // function. To see how this works, consider the following set of calls on this
    1423              : // data set:
    1424              : //
    1425              : //      SeekPrefixGE("a@0") -> "a@1"
    1426              : //      Next()              -> "a@2"
    1427              : //      Next()              -> EOF
    1428              : //
    1429              : // If you're just looking to iterate over keys with a shared prefix, as
    1430              : // defined by the configured comparer, set iterator bounds instead:
    1431              : //
    1432              : //      iter := db.NewIter(&pebble.IterOptions{
    1433              : //        LowerBound: []byte("prefix"),
    1434              : //        UpperBound: []byte("prefiy"),
    1435              : //      })
    1436              : //      for iter.First(); iter.Valid(); iter.Next() {
    1437              : //        // Only keys beginning with "prefix" will be visited.
    1438              : //      }
    1439              : //
    1440              : // See ExampleIterator_SeekPrefixGE for a working example.
    1441              : //
    1442              : // When iterating with range keys enabled, all range keys encountered are
    1443              : // truncated to the seek key's prefix's bounds. The truncation of the upper
    1444              : // bound requires that the database's Comparer is configured with a
    1445              : // ImmediateSuccessor method. For example, a SeekPrefixGE("a@9") call with the
    1446              : // prefix "a" will truncate range key bounds to [a,ImmediateSuccessor(a)].
    1447            1 : func (i *Iterator) SeekPrefixGE(key []byte) bool {
    1448            1 :         if i.rangeKey != nil {
    1449            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1450            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1451            1 :                 // If we have a range key but did not expose it at the previous iterator
    1452            1 :                 // position (because the iterator was not at a valid position), updated
    1453            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1454            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1455            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1456            1 :                 //   - SeekPrefixGE(...)  → (IterValid, RangeBounds() = [a,b))
    1457            1 :                 // the iterator returns RangeKeyChanged()=true.
    1458            1 :                 //
    1459            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1460            1 :                 // the iterator moves into a new range key, or out of the current range
    1461            1 :                 // key.
    1462            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1463            1 :         }
    1464            1 :         lastPositioningOp := i.lastPositioningOp
    1465            1 :         // Set it to unknown, since this operation may not succeed, in which case
    1466            1 :         // the SeekPrefixGE following this should not make any assumption about
    1467            1 :         // iterator position.
    1468            1 :         i.lastPositioningOp = unknownLastPositionOp
    1469            1 :         i.requiresReposition = false
    1470            1 :         i.err = nil // clear cached iteration error
    1471            1 :         i.stats.ForwardSeekCount[InterfaceCall]++
    1472            1 :         if i.comparer.ImmediateSuccessor == nil && i.opts.KeyTypes != IterKeyTypePointsOnly {
    1473            0 :                 panic("pebble: ImmediateSuccessor must be provided for SeekPrefixGE with range keys")
    1474              :         }
    1475            1 :         prefixLen := i.comparer.Split(key)
    1476            1 :         keyPrefix := key[:prefixLen]
    1477            1 :         var flags base.SeekGEFlags
    1478            1 :         if i.batchJustRefreshed {
    1479            1 :                 flags = flags.EnableBatchJustRefreshed()
    1480            1 :                 i.batchJustRefreshed = false
    1481            1 :         }
    1482            1 :         if lastPositioningOp == seekPrefixGELastPositioningOp {
    1483            1 :                 if !i.hasPrefix {
    1484            0 :                         panic("lastPositioningOpsIsSeekPrefixGE is true, but hasPrefix is false")
    1485              :                 }
    1486              :                 // The iterator has not been repositioned after the last SeekPrefixGE.
    1487              :                 // See if we are seeking to a larger key, since then we can optimize
    1488              :                 // the seek by using next. Note that we could also optimize if Next
    1489              :                 // has been called, if the iterator is not exhausted and the current
    1490              :                 // position is <= the seek key. We are keeping this limited for now
    1491              :                 // since such optimizations require care for correctness, and to not
    1492              :                 // become de-optimizations (if one usually has to do all the next
    1493              :                 // calls and then the seek). This SeekPrefixGE optimization
    1494              :                 // specifically benefits CockroachDB.
    1495            1 :                 cmp := i.cmp(i.prefixOrFullSeekKey, keyPrefix)
    1496            1 :                 // cmp == 0 is not safe to optimize since
    1497            1 :                 // - i.pos could be at iterPosNext, due to a merge.
    1498            1 :                 // - Even if i.pos were at iterPosCurForward, we could have a DELETE,
    1499            1 :                 //   SET pair for a key, and the iterator would have moved past DELETE
    1500            1 :                 //   but stayed at iterPosCurForward. A similar situation occurs for a
    1501            1 :                 //   MERGE, SET pair where the MERGE is consumed and the iterator is
    1502            1 :                 //   at the SET.
    1503            1 :                 // In general some versions of i.prefix could have been consumed by
    1504            1 :                 // the iterator, so we only optimize for cmp < 0.
    1505            1 :                 if cmp < 0 {
    1506            1 :                         flags = flags.EnableTrySeekUsingNext()
    1507            1 :                 }
    1508            1 :                 if testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) && !i.forceEnableSeekOpt {
    1509            1 :                         flags = flags.DisableTrySeekUsingNext()
    1510            1 :                 }
    1511              :         }
    1512              :         // Make a copy of the prefix so that modifications to the key after
    1513              :         // SeekPrefixGE returns does not affect the stored prefix.
    1514            1 :         if cap(i.prefixOrFullSeekKey) < prefixLen {
    1515            1 :                 i.prefixOrFullSeekKey = make([]byte, prefixLen)
    1516            1 :         } else {
    1517            1 :                 i.prefixOrFullSeekKey = i.prefixOrFullSeekKey[:prefixLen]
    1518            1 :         }
    1519            1 :         i.hasPrefix = true
    1520            1 :         copy(i.prefixOrFullSeekKey, keyPrefix)
    1521            1 : 
    1522            1 :         if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
    1523            1 :                 if p := i.comparer.Split.Prefix(lowerBound); !bytes.Equal(i.prefixOrFullSeekKey, p) {
    1524            1 :                         i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of lower bound")
    1525            1 :                         i.iterValidityState = IterExhausted
    1526            1 :                         return false
    1527            1 :                 }
    1528            1 :                 key = lowerBound
    1529            1 :         } else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
    1530            1 :                 if p := i.comparer.Split.Prefix(upperBound); !bytes.Equal(i.prefixOrFullSeekKey, p) {
    1531            1 :                         i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of upper bound")
    1532            1 :                         i.iterValidityState = IterExhausted
    1533            1 :                         return false
    1534            1 :                 }
    1535            1 :                 key = upperBound
    1536              :         }
    1537            1 :         i.iterKV = i.iter.SeekPrefixGE(i.prefixOrFullSeekKey, key, flags)
    1538            1 :         i.stats.ForwardSeekCount[InternalIterCall]++
    1539            1 :         i.findNextEntry(nil)
    1540            1 :         i.maybeSampleRead()
    1541            1 :         if i.Error() == nil {
    1542            1 :                 i.lastPositioningOp = seekPrefixGELastPositioningOp
    1543            1 :         }
    1544            1 :         return i.iterValidityState == IterValid
    1545              : }
    1546              : 
    1547              : // Deterministic disabling (in testing mode) of the seek optimizations. It uses
    1548              : // the iterator pointer, since we want diversity in iterator behavior for the
    1549              : // same key.  Used for tests.
    1550            1 : func testingDisableSeekOpt(key []byte, ptr uintptr) bool {
    1551            1 :         if !invariants.Enabled {
    1552            0 :                 return false
    1553            0 :         }
    1554              :         // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
    1555            1 :         simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
    1556            1 :         return key != nil && key[0]&byte(1) == 0 && simpleHash == 0
    1557              : }
    1558              : 
    1559              : // SeekLT moves the iterator to the last key/value pair whose key is less than
    1560              : // the given key. Returns true if the iterator is pointing at a valid entry and
    1561              : // false otherwise.
    1562            1 : func (i *Iterator) SeekLT(key []byte) bool {
    1563            1 :         return i.SeekLTWithLimit(key, nil) == IterValid
    1564            1 : }
    1565              : 
    1566              : // SeekLTWithLimit moves the iterator to the last key/value pair whose key is
    1567              : // less than the given key.
    1568              : //
    1569              : // If limit is provided, it serves as a best-effort inclusive limit. If the last
    1570              : // key less than the given search key is also less than limit, the Iterator may
    1571              : // pause and return IterAtLimit. Because limits are best-effort, SeekLTWithLimit
    1572              : // may return a key beyond limit.
    1573              : //
    1574              : // If the Iterator is configured to iterate over range keys, SeekLTWithLimit
    1575              : // guarantees it will surface any range keys with bounds overlapping the
    1576              : // keyspace up to limit.
    1577            1 : func (i *Iterator) SeekLTWithLimit(key []byte, limit []byte) IterValidityState {
    1578            1 :         if i.rangeKey != nil {
    1579            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1580            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1581            1 :                 // If we have a range key but did not expose it at the previous iterator
    1582            1 :                 // position (because the iterator was not at a valid position), updated
    1583            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1584            1 :                 //   - Next()               → (IterValid, RangeBounds() = [a,b))
    1585            1 :                 //   - NextWithLimit(...)   → (IterAtLimit, RangeBounds() = -)
    1586            1 :                 //   - SeekLTWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    1587            1 :                 // the iterator returns RangeKeyChanged()=true.
    1588            1 :                 //
    1589            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1590            1 :                 // the iterator moves into a new range key, or out of the current range
    1591            1 :                 // key.
    1592            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1593            1 :         }
    1594            1 :         lastPositioningOp := i.lastPositioningOp
    1595            1 :         // Set it to unknown, since this operation may not succeed, in which case
    1596            1 :         // the SeekLT following this should not make any assumption about iterator
    1597            1 :         // position.
    1598            1 :         i.lastPositioningOp = unknownLastPositionOp
    1599            1 :         i.batchJustRefreshed = false
    1600            1 :         i.requiresReposition = false
    1601            1 :         i.err = nil // clear cached iteration error
    1602            1 :         i.stats.ReverseSeekCount[InterfaceCall]++
    1603            1 :         if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
    1604            1 :                 key = upperBound
    1605            1 :         } else if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
    1606            1 :                 key = lowerBound
    1607            1 :         }
    1608            1 :         i.hasPrefix = false
    1609            1 :         seekInternalIter := true
    1610            1 :         // The following noop optimization only applies when i.batch == nil, since
    1611            1 :         // an iterator over a batch is iterating over mutable data, that may have
    1612            1 :         // changed since the last seek.
    1613            1 :         if lastPositioningOp == seekLTLastPositioningOp && i.batch == nil {
    1614            1 :                 cmp := i.cmp(key, i.prefixOrFullSeekKey)
    1615            1 :                 // If this seek is to the same or earlier key, and the iterator is
    1616            1 :                 // already positioned there, this is a noop. This can be helpful for
    1617            1 :                 // sparse key spaces that have many deleted keys, where one can avoid
    1618            1 :                 // the overhead of iterating past them again and again.
    1619            1 :                 if cmp <= 0 {
    1620            1 :                         // NB: when pos != iterPosCurReversePaused, the invariant
    1621            1 :                         // documented earlier implies that iterValidityState !=
    1622            1 :                         // IterAtLimit.
    1623            1 :                         if i.iterValidityState == IterExhausted ||
    1624            1 :                                 (i.iterValidityState == IterValid && i.cmp(i.key, key) < 0 &&
    1625            1 :                                         (limit == nil || i.cmp(limit, i.key) <= 0)) {
    1626            1 :                                 if !testingDisableSeekOpt(key, uintptr(unsafe.Pointer(i))) {
    1627            1 :                                         i.lastPositioningOp = seekLTLastPositioningOp
    1628            1 :                                         return i.iterValidityState
    1629            1 :                                 }
    1630              :                         }
    1631            1 :                         if i.pos == iterPosCurReversePaused && i.cmp(i.iterKV.K.UserKey, key) < 0 {
    1632            1 :                                 // Have some work to do, but don't need to seek, and we can
    1633            1 :                                 // start doing findPrevEntry from i.iterKey.
    1634            1 :                                 seekInternalIter = false
    1635            1 :                         }
    1636              :                 }
    1637              :         }
    1638            1 :         if seekInternalIter {
    1639            1 :                 i.iterKV = i.iter.SeekLT(key, base.SeekLTFlagsNone)
    1640            1 :                 i.stats.ReverseSeekCount[InternalIterCall]++
    1641            1 :                 if err := i.iter.Error(); err != nil {
    1642            1 :                         i.err = err
    1643            1 :                         i.iterValidityState = IterExhausted
    1644            1 :                         return i.iterValidityState
    1645            1 :                 }
    1646              :         }
    1647            1 :         i.findPrevEntry(limit)
    1648            1 :         i.maybeSampleRead()
    1649            1 :         if i.Error() == nil && i.batch == nil {
    1650            1 :                 // Prepare state for a future noop optimization.
    1651            1 :                 i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], key...)
    1652            1 :                 i.lastPositioningOp = seekLTLastPositioningOp
    1653            1 :         }
    1654            1 :         return i.iterValidityState
    1655              : }
    1656              : 
    1657              : // First moves the iterator the first key/value pair. Returns true if the
    1658              : // iterator is pointing at a valid entry and false otherwise.
    1659            1 : func (i *Iterator) First() bool {
    1660            1 :         if i.rangeKey != nil {
    1661            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1662            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1663            1 :                 // If we have a range key but did not expose it at the previous iterator
    1664            1 :                 // position (because the iterator was not at a valid position), updated
    1665            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1666            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1667            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1668            1 :                 //   - First(...)         → (IterValid, RangeBounds() = [a,b))
    1669            1 :                 // the iterator returns RangeKeyChanged()=true.
    1670            1 :                 //
    1671            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1672            1 :                 // the iterator moves into a new range key, or out of the current range
    1673            1 :                 // key.
    1674            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1675            1 :         }
    1676            1 :         i.err = nil // clear cached iteration error
    1677            1 :         i.hasPrefix = false
    1678            1 :         i.batchJustRefreshed = false
    1679            1 :         i.lastPositioningOp = unknownLastPositionOp
    1680            1 :         i.requiresReposition = false
    1681            1 :         i.stats.ForwardSeekCount[InterfaceCall]++
    1682            1 : 
    1683            1 :         i.err = i.iterFirstWithinBounds()
    1684            1 :         if i.err != nil {
    1685            1 :                 i.iterValidityState = IterExhausted
    1686            1 :                 return false
    1687            1 :         }
    1688            1 :         i.findNextEntry(nil)
    1689            1 :         i.maybeSampleRead()
    1690            1 :         return i.iterValidityState == IterValid
    1691              : }
    1692              : 
    1693              : // Last moves the iterator the last key/value pair. Returns true if the
    1694              : // iterator is pointing at a valid entry and false otherwise.
    1695            1 : func (i *Iterator) Last() bool {
    1696            1 :         if i.rangeKey != nil {
    1697            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1698            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1699            1 :                 // If we have a range key but did not expose it at the previous iterator
    1700            1 :                 // position (because the iterator was not at a valid position), updated
    1701            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1702            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1703            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1704            1 :                 //   - Last(...)          → (IterValid, RangeBounds() = [a,b))
    1705            1 :                 // the iterator returns RangeKeyChanged()=true.
    1706            1 :                 //
    1707            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1708            1 :                 // the iterator moves into a new range key, or out of the current range
    1709            1 :                 // key.
    1710            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1711            1 :         }
    1712            1 :         i.err = nil // clear cached iteration error
    1713            1 :         i.hasPrefix = false
    1714            1 :         i.batchJustRefreshed = false
    1715            1 :         i.lastPositioningOp = unknownLastPositionOp
    1716            1 :         i.requiresReposition = false
    1717            1 :         i.stats.ReverseSeekCount[InterfaceCall]++
    1718            1 : 
    1719            1 :         if i.err = i.iterLastWithinBounds(); i.err != nil {
    1720            1 :                 i.iterValidityState = IterExhausted
    1721            1 :                 return false
    1722            1 :         }
    1723            1 :         i.findPrevEntry(nil)
    1724            1 :         i.maybeSampleRead()
    1725            1 :         return i.iterValidityState == IterValid
    1726              : }
    1727              : 
    1728              : // Next moves the iterator to the next key/value pair. Returns true if the
    1729              : // iterator is pointing at a valid entry and false otherwise.
    1730            1 : func (i *Iterator) Next() bool {
    1731            1 :         return i.nextWithLimit(nil) == IterValid
    1732            1 : }
    1733              : 
    1734              : // NextWithLimit moves the iterator to the next key/value pair.
    1735              : //
    1736              : // If limit is provided, it serves as a best-effort exclusive limit. If the next
    1737              : // key  is greater than or equal to limit, the Iterator may pause and return
    1738              : // IterAtLimit. Because limits are best-effort, NextWithLimit may return a key
    1739              : // beyond limit.
    1740              : //
    1741              : // If the Iterator is configured to iterate over range keys, NextWithLimit
    1742              : // guarantees it will surface any range keys with bounds overlapping the
    1743              : // keyspace up to limit.
    1744            1 : func (i *Iterator) NextWithLimit(limit []byte) IterValidityState {
    1745            1 :         return i.nextWithLimit(limit)
    1746            1 : }
    1747              : 
    1748              : // NextPrefix moves the iterator to the next key/value pair with a key
    1749              : // containing a different prefix than the current key. Prefixes are determined
    1750              : // by Comparer.Split. Exhausts the iterator if invoked while in prefix-iteration
    1751              : // mode.
    1752              : //
    1753              : // It is not permitted to invoke NextPrefix while at a IterAtLimit position.
    1754              : // When called in this condition, NextPrefix has non-deterministic behavior.
    1755              : //
    1756              : // It is not permitted to invoke NextPrefix when the Iterator has an
    1757              : // upper-bound that is a versioned MVCC key (see the comment for
    1758              : // Comparer.Split). It returns an error in this case.
    1759            1 : func (i *Iterator) NextPrefix() bool {
    1760            1 :         if i.nextPrefixNotPermittedByUpperBound {
    1761            1 :                 i.lastPositioningOp = unknownLastPositionOp
    1762            1 :                 i.requiresReposition = false
    1763            1 :                 i.err = errors.Errorf("NextPrefix not permitted with upper bound %s",
    1764            1 :                         i.comparer.FormatKey(i.opts.UpperBound))
    1765            1 :                 i.iterValidityState = IterExhausted
    1766            1 :                 return false
    1767            1 :         }
    1768            1 :         if i.hasPrefix {
    1769            1 :                 i.iterValidityState = IterExhausted
    1770            1 :                 return false
    1771            1 :         }
    1772            1 :         if i.Error() != nil {
    1773            1 :                 return false
    1774            1 :         }
    1775            1 :         return i.nextPrefix() == IterValid
    1776              : }
    1777              : 
    1778            1 : func (i *Iterator) nextPrefix() IterValidityState {
    1779            1 :         if i.rangeKey != nil {
    1780            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1781            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1782            1 :                 // If we have a range key but did not expose it at the previous iterator
    1783            1 :                 // position (because the iterator was not at a valid position), updated
    1784            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1785            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1786            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1787            1 :                 //   - NextWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    1788            1 :                 // the iterator returns RangeKeyChanged()=true.
    1789            1 :                 //
    1790            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1791            1 :                 // the iterator moves into a new range key, or out of the current range
    1792            1 :                 // key.
    1793            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1794            1 :         }
    1795              : 
    1796              :         // Although NextPrefix documents that behavior at IterAtLimit is undefined,
    1797              :         // this function handles these cases as a simple prefix-agnostic Next. This
    1798              :         // is done for deterministic behavior in the metamorphic tests.
    1799              :         //
    1800              :         // TODO(jackson): If the metamorphic test operation generator is adjusted to
    1801              :         // make generation of some operations conditional on the previous
    1802              :         // operations, then we can remove this behavior and explicitly error.
    1803              : 
    1804            1 :         i.lastPositioningOp = unknownLastPositionOp
    1805            1 :         i.requiresReposition = false
    1806            1 :         switch i.pos {
    1807            1 :         case iterPosCurForward:
    1808            1 :                 // Positioned on the current key. Advance to the next prefix.
    1809            1 :                 i.internalNextPrefix(i.comparer.Split(i.key))
    1810            0 :         case iterPosCurForwardPaused:
    1811              :                 // Positioned at a limit. Implement as a prefix-agnostic Next. See TODO
    1812              :                 // up above. The iterator is already positioned at the next key.
    1813            1 :         case iterPosCurReverse:
    1814            1 :                 // Switching directions.
    1815            1 :                 // Unless the iterator was exhausted, reverse iteration needs to
    1816            1 :                 // position the iterator at iterPosPrev.
    1817            1 :                 if i.iterKV != nil {
    1818            0 :                         i.err = errors.New("switching from reverse to forward but iter is not at prev")
    1819            0 :                         i.iterValidityState = IterExhausted
    1820            0 :                         return i.iterValidityState
    1821            0 :                 }
    1822              :                 // The Iterator is exhausted and i.iter is positioned before the first
    1823              :                 // key. Reposition to point to the first internal key.
    1824            1 :                 if i.err = i.iterFirstWithinBounds(); i.err != nil {
    1825            0 :                         i.iterValidityState = IterExhausted
    1826            0 :                         return i.iterValidityState
    1827            0 :                 }
    1828            1 :         case iterPosCurReversePaused:
    1829            1 :                 // Positioned at a limit. Implement as a prefix-agnostic Next. See TODO
    1830            1 :                 // up above.
    1831            1 :                 //
    1832            1 :                 // Switching directions; The iterator must not be exhausted since it
    1833            1 :                 // paused.
    1834            1 :                 if i.iterKV == nil {
    1835            0 :                         i.err = errors.New("switching paused from reverse to forward but iter is exhausted")
    1836            0 :                         i.iterValidityState = IterExhausted
    1837            0 :                         return i.iterValidityState
    1838            0 :                 }
    1839            1 :                 i.nextUserKey()
    1840            1 :         case iterPosPrev:
    1841            1 :                 // The underlying iterator is pointed to the previous key (this can
    1842            1 :                 // only happen when switching iteration directions).
    1843            1 :                 if i.iterKV == nil {
    1844            1 :                         // We're positioned before the first key. Need to reposition to point to
    1845            1 :                         // the first key.
    1846            1 :                         i.err = i.iterFirstWithinBounds()
    1847            1 :                         if i.iterKV == nil {
    1848            0 :                                 i.iterValidityState = IterExhausted
    1849            0 :                                 return i.iterValidityState
    1850            0 :                         }
    1851            1 :                         if invariants.Enabled && !i.equal(i.iterKV.K.UserKey, i.key) {
    1852            0 :                                 i.opts.getLogger().Fatalf("pebble: invariant violation: First internal iterator from iterPosPrev landed on %q, not %q",
    1853            0 :                                         i.iterKV.K.UserKey, i.key)
    1854            0 :                         }
    1855            1 :                 } else {
    1856            1 :                         // Move the internal iterator back onto the user key stored in
    1857            1 :                         // i.key. iterPosPrev guarantees that it's positioned at the last
    1858            1 :                         // key with the user key less than i.key, so we're guaranteed to
    1859            1 :                         // land on the correct key with a single Next.
    1860            1 :                         i.iterKV = i.iter.Next()
    1861            1 :                         if i.iterKV == nil {
    1862            1 :                                 // This should only be possible if i.iter.Next() encountered an
    1863            1 :                                 // error.
    1864            1 :                                 if i.iter.Error() == nil {
    1865            0 :                                         i.opts.getLogger().Fatalf("pebble: invariant violation: Nexting internal iterator from iterPosPrev found nothing")
    1866            0 :                                 }
    1867              :                                 // NB: Iterator.Error() will return i.iter.Error().
    1868            1 :                                 i.iterValidityState = IterExhausted
    1869            1 :                                 return i.iterValidityState
    1870              :                         }
    1871            1 :                         if invariants.Enabled && !i.equal(i.iterKV.K.UserKey, i.key) {
    1872            0 :                                 i.opts.getLogger().Fatalf("pebble: invariant violation: Nexting internal iterator from iterPosPrev landed on %q, not %q",
    1873            0 :                                         i.iterKV.K.UserKey, i.key)
    1874            0 :                         }
    1875              :                 }
    1876              :                 // The internal iterator is now positioned at i.key. Advance to the next
    1877              :                 // prefix.
    1878            1 :                 i.internalNextPrefix(i.comparer.Split(i.key))
    1879            1 :         case iterPosNext:
    1880            1 :                 // Already positioned on the next key. Only call nextPrefixKey if the
    1881            1 :                 // next key shares the same prefix.
    1882            1 :                 if i.iterKV != nil {
    1883            1 :                         currKeyPrefixLen := i.comparer.Split(i.key)
    1884            1 :                         if bytes.Equal(i.comparer.Split.Prefix(i.iterKV.K.UserKey), i.key[:currKeyPrefixLen]) {
    1885            1 :                                 i.internalNextPrefix(currKeyPrefixLen)
    1886            1 :                         }
    1887              :                 }
    1888              :         }
    1889              : 
    1890            1 :         i.stats.ForwardStepCount[InterfaceCall]++
    1891            1 :         i.findNextEntry(nil /* limit */)
    1892            1 :         i.maybeSampleRead()
    1893            1 :         return i.iterValidityState
    1894              : }
    1895              : 
    1896            1 : func (i *Iterator) internalNextPrefix(currKeyPrefixLen int) {
    1897            1 :         if i.iterKV == nil {
    1898            1 :                 return
    1899            1 :         }
    1900              :         // The Next "fast-path" is not really a fast-path when there is more than
    1901              :         // one version. However, even with TableFormatPebblev3, there is a small
    1902              :         // slowdown (~10%) for one version if we remove it and only call NextPrefix.
    1903              :         // When there are two versions, only calling NextPrefix is ~30% faster.
    1904            1 :         i.stats.ForwardStepCount[InternalIterCall]++
    1905            1 :         if i.iterKV = i.iter.Next(); i.iterKV == nil {
    1906            1 :                 return
    1907            1 :         }
    1908            1 :         if !bytes.Equal(i.comparer.Split.Prefix(i.iterKV.K.UserKey), i.key[:currKeyPrefixLen]) {
    1909            1 :                 return
    1910            1 :         }
    1911            1 :         i.stats.ForwardStepCount[InternalIterCall]++
    1912            1 :         i.prefixOrFullSeekKey = i.comparer.ImmediateSuccessor(i.prefixOrFullSeekKey[:0], i.key[:currKeyPrefixLen])
    1913            1 :         if i.iterKV.K.IsExclusiveSentinel() {
    1914            0 :                 panic(errors.AssertionFailedf("pebble: unexpected exclusive sentinel key: %q", i.iterKV.K))
    1915              :         }
    1916              : 
    1917            1 :         i.iterKV = i.iter.NextPrefix(i.prefixOrFullSeekKey)
    1918            1 :         if invariants.Enabled && i.iterKV != nil {
    1919            1 :                 if p := i.comparer.Split.Prefix(i.iterKV.K.UserKey); i.cmp(p, i.prefixOrFullSeekKey) < 0 {
    1920            0 :                         panic(errors.AssertionFailedf("pebble: iter.NextPrefix did not advance beyond the current prefix: now at %q; expected to be geq %q",
    1921            0 :                                 i.iterKV.K, i.prefixOrFullSeekKey))
    1922              :                 }
    1923              :         }
    1924              : }
    1925              : 
    1926            1 : func (i *Iterator) nextWithLimit(limit []byte) IterValidityState {
    1927            1 :         i.stats.ForwardStepCount[InterfaceCall]++
    1928            1 :         if i.hasPrefix {
    1929            1 :                 if limit != nil {
    1930            1 :                         i.err = errors.New("cannot use limit with prefix iteration")
    1931            1 :                         i.iterValidityState = IterExhausted
    1932            1 :                         return i.iterValidityState
    1933            1 :                 } else if i.iterValidityState == IterExhausted {
    1934            1 :                         // No-op, already exhausted. We avoid executing the Next because it
    1935            1 :                         // can break invariants: Specifically, a file that fails the bloom
    1936            1 :                         // filter test may result in its level being removed from the
    1937            1 :                         // merging iterator. The level's removal can cause a lazy combined
    1938            1 :                         // iterator to miss range keys and trigger a switch to combined
    1939            1 :                         // iteration at a larger key, breaking keyspan invariants.
    1940            1 :                         return i.iterValidityState
    1941            1 :                 }
    1942              :         }
    1943            1 :         if i.err != nil {
    1944            1 :                 return i.iterValidityState
    1945            1 :         }
    1946            1 :         if i.rangeKey != nil {
    1947            1 :                 // NB: Check Valid() before clearing requiresReposition.
    1948            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    1949            1 :                 // If we have a range key but did not expose it at the previous iterator
    1950            1 :                 // position (because the iterator was not at a valid position), updated
    1951            1 :                 // must be true. This ensures that after an iterator op sequence like:
    1952            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    1953            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    1954            1 :                 //   - NextWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    1955            1 :                 // the iterator returns RangeKeyChanged()=true.
    1956            1 :                 //
    1957            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    1958            1 :                 // the iterator moves into a new range key, or out of the current range
    1959            1 :                 // key.
    1960            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    1961            1 :         }
    1962            1 :         i.lastPositioningOp = unknownLastPositionOp
    1963            1 :         i.requiresReposition = false
    1964            1 :         switch i.pos {
    1965            1 :         case iterPosCurForward:
    1966            1 :                 i.nextUserKey()
    1967            1 :         case iterPosCurForwardPaused:
    1968              :                 // Already at the right place.
    1969            1 :         case iterPosCurReverse:
    1970            1 :                 // Switching directions.
    1971            1 :                 // Unless the iterator was exhausted, reverse iteration needs to
    1972            1 :                 // position the iterator at iterPosPrev.
    1973            1 :                 if i.iterKV != nil {
    1974            0 :                         i.err = errors.New("switching from reverse to forward but iter is not at prev")
    1975            0 :                         i.iterValidityState = IterExhausted
    1976            0 :                         return i.iterValidityState
    1977            0 :                 }
    1978              :                 // We're positioned before the first key. Need to reposition to point to
    1979              :                 // the first key.
    1980            1 :                 if i.err = i.iterFirstWithinBounds(); i.err != nil {
    1981            1 :                         i.iterValidityState = IterExhausted
    1982            1 :                         return i.iterValidityState
    1983            1 :                 }
    1984            1 :         case iterPosCurReversePaused:
    1985            1 :                 // Switching directions.
    1986            1 :                 // The iterator must not be exhausted since it paused.
    1987            1 :                 if i.iterKV == nil {
    1988            0 :                         i.err = errors.New("switching paused from reverse to forward but iter is exhausted")
    1989            0 :                         i.iterValidityState = IterExhausted
    1990            0 :                         return i.iterValidityState
    1991            0 :                 }
    1992            1 :                 i.nextUserKey()
    1993            1 :         case iterPosPrev:
    1994            1 :                 // The underlying iterator is pointed to the previous key (this can
    1995            1 :                 // only happen when switching iteration directions). We set
    1996            1 :                 // i.iterValidityState to IterExhausted here to force the calls to
    1997            1 :                 // nextUserKey to save the current key i.iter is pointing at in order
    1998            1 :                 // to determine when the next user-key is reached.
    1999            1 :                 i.iterValidityState = IterExhausted
    2000            1 :                 if i.iterKV == nil {
    2001            1 :                         // We're positioned before the first key. Need to reposition to point to
    2002            1 :                         // the first key.
    2003            1 :                         i.err = i.iterFirstWithinBounds()
    2004            1 :                 } else {
    2005            1 :                         i.nextUserKey()
    2006            1 :                 }
    2007            1 :                 if i.err != nil {
    2008            1 :                         i.iterValidityState = IterExhausted
    2009            1 :                         return i.iterValidityState
    2010            1 :                 }
    2011            1 :                 i.nextUserKey()
    2012            1 :         case iterPosNext:
    2013              :                 // Already at the right place.
    2014              :         }
    2015            1 :         i.findNextEntry(limit)
    2016            1 :         i.maybeSampleRead()
    2017            1 :         return i.iterValidityState
    2018              : }
    2019              : 
    2020              : // Prev moves the iterator to the previous key/value pair. Returns true if the
    2021              : // iterator is pointing at a valid entry and false otherwise.
    2022            1 : func (i *Iterator) Prev() bool {
    2023            1 :         return i.PrevWithLimit(nil) == IterValid
    2024            1 : }
    2025              : 
    2026              : // PrevWithLimit moves the iterator to the previous key/value pair.
    2027              : //
    2028              : // If limit is provided, it serves as a best-effort inclusive limit. If the
    2029              : // previous key is less than limit, the Iterator may pause and return
    2030              : // IterAtLimit. Because limits are best-effort, PrevWithLimit may return a key
    2031              : // beyond limit.
    2032              : //
    2033              : // If the Iterator is configured to iterate over range keys, PrevWithLimit
    2034              : // guarantees it will surface any range keys with bounds overlapping the
    2035              : // keyspace up to limit.
    2036            1 : func (i *Iterator) PrevWithLimit(limit []byte) IterValidityState {
    2037            1 :         i.stats.ReverseStepCount[InterfaceCall]++
    2038            1 :         if i.err != nil {
    2039            1 :                 return i.iterValidityState
    2040            1 :         }
    2041            1 :         if i.rangeKey != nil {
    2042            1 :                 // NB: Check Valid() before clearing requiresReposition.
    2043            1 :                 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid()
    2044            1 :                 // If we have a range key but did not expose it at the previous iterator
    2045            1 :                 // position (because the iterator was not at a valid position), updated
    2046            1 :                 // must be true. This ensures that after an iterator op sequence like:
    2047            1 :                 //   - Next()             → (IterValid, RangeBounds() = [a,b))
    2048            1 :                 //   - NextWithLimit(...) → (IterAtLimit, RangeBounds() = -)
    2049            1 :                 //   - PrevWithLimit(...) → (IterValid, RangeBounds() = [a,b))
    2050            1 :                 // the iterator returns RangeKeyChanged()=true.
    2051            1 :                 //
    2052            1 :                 // The remainder of this function will only update i.rangeKey.updated if
    2053            1 :                 // the iterator moves into a new range key, or out of the current range
    2054            1 :                 // key.
    2055            1 :                 i.rangeKey.updated = i.rangeKey.hasRangeKey && !i.Valid() && i.opts.rangeKeys()
    2056            1 :         }
    2057            1 :         i.lastPositioningOp = unknownLastPositionOp
    2058            1 :         i.requiresReposition = false
    2059            1 :         if i.hasPrefix {
    2060            1 :                 i.err = errReversePrefixIteration
    2061            1 :                 i.iterValidityState = IterExhausted
    2062            1 :                 return i.iterValidityState
    2063            1 :         }
    2064            1 :         switch i.pos {
    2065            1 :         case iterPosCurForward:
    2066              :                 // Switching directions, and will handle this below.
    2067            1 :         case iterPosCurForwardPaused:
    2068              :                 // Switching directions, and will handle this below.
    2069            1 :         case iterPosCurReverse:
    2070            1 :                 i.prevUserKey()
    2071            1 :         case iterPosCurReversePaused:
    2072              :                 // Already at the right place.
    2073            1 :         case iterPosNext:
    2074              :                 // The underlying iterator is pointed to the next key (this can only happen
    2075              :                 // when switching iteration directions). We will handle this below.
    2076            1 :         case iterPosPrev:
    2077              :                 // Already at the right place.
    2078              :         }
    2079            1 :         if i.pos == iterPosCurForward || i.pos == iterPosNext || i.pos == iterPosCurForwardPaused {
    2080            1 :                 // Switching direction.
    2081            1 :                 stepAgain := i.pos == iterPosNext
    2082            1 : 
    2083            1 :                 // Synthetic range key markers are a special case. Consider SeekGE(b)
    2084            1 :                 // which finds a range key [a, c). To ensure the user observes the range
    2085            1 :                 // key, the Iterator pauses at Key() = b. The iterator must advance the
    2086            1 :                 // internal iterator to see if there's also a coincident point key at
    2087            1 :                 // 'b', leaving the iterator at iterPosNext if there's not.
    2088            1 :                 //
    2089            1 :                 // This is a problem: Synthetic range key markers are only interleaved
    2090            1 :                 // during the original seek. A subsequent Prev() of i.iter will not move
    2091            1 :                 // back onto the synthetic range key marker. In this case where the
    2092            1 :                 // previous iterator position was a synthetic range key start boundary,
    2093            1 :                 // we must not step a second time.
    2094            1 :                 if i.isEphemeralPosition() {
    2095            1 :                         stepAgain = false
    2096            1 :                 }
    2097              : 
    2098              :                 // We set i.iterValidityState to IterExhausted here to force the calls
    2099              :                 // to prevUserKey to save the current key i.iter is pointing at in
    2100              :                 // order to determine when the prev user-key is reached.
    2101            1 :                 i.iterValidityState = IterExhausted
    2102            1 :                 if i.iterKV == nil {
    2103            1 :                         // We're positioned after the last key. Need to reposition to point to
    2104            1 :                         // the last key.
    2105            1 :                         i.err = i.iterLastWithinBounds()
    2106            1 :                 } else {
    2107            1 :                         i.prevUserKey()
    2108            1 :                 }
    2109            1 :                 if i.err != nil {
    2110            1 :                         return i.iterValidityState
    2111            1 :                 }
    2112            1 :                 if stepAgain {
    2113            1 :                         i.prevUserKey()
    2114            1 :                         if i.err != nil {
    2115            0 :                                 return i.iterValidityState
    2116            0 :                         }
    2117              :                 }
    2118              :         }
    2119            1 :         i.findPrevEntry(limit)
    2120            1 :         i.maybeSampleRead()
    2121            1 :         return i.iterValidityState
    2122              : }
    2123              : 
    2124              : // iterFirstWithinBounds moves the internal iterator to the first key,
    2125              : // respecting bounds.
    2126            1 : func (i *Iterator) iterFirstWithinBounds() error {
    2127            1 :         i.stats.ForwardSeekCount[InternalIterCall]++
    2128            1 :         if lowerBound := i.opts.GetLowerBound(); lowerBound != nil {
    2129            1 :                 i.iterKV = i.iter.SeekGE(lowerBound, base.SeekGEFlagsNone)
    2130            1 :         } else {
    2131            1 :                 i.iterKV = i.iter.First()
    2132            1 :         }
    2133            1 :         if i.iterKV == nil {
    2134            1 :                 return i.iter.Error()
    2135            1 :         }
    2136            1 :         return nil
    2137              : }
    2138              : 
    2139              : // iterLastWithinBounds moves the internal iterator to the last key, respecting
    2140              : // bounds.
    2141            1 : func (i *Iterator) iterLastWithinBounds() error {
    2142            1 :         i.stats.ReverseSeekCount[InternalIterCall]++
    2143            1 :         if upperBound := i.opts.GetUpperBound(); upperBound != nil {
    2144            1 :                 i.iterKV = i.iter.SeekLT(upperBound, base.SeekLTFlagsNone)
    2145            1 :         } else {
    2146            1 :                 i.iterKV = i.iter.Last()
    2147            1 :         }
    2148            1 :         if i.iterKV == nil {
    2149            1 :                 return i.iter.Error()
    2150            1 :         }
    2151            1 :         return nil
    2152              : }
    2153              : 
    2154              : // RangeKeyData describes a range key's data, set through RangeKeySet. The key
    2155              : // boundaries of the range key is provided by Iterator.RangeBounds.
    2156              : type RangeKeyData struct {
    2157              :         Suffix []byte
    2158              :         Value  []byte
    2159              : }
    2160              : 
    2161              : // rangeKeyWithinLimit is called during limited reverse iteration when
    2162              : // positioned over a key beyond the limit. If there exists a range key that lies
    2163              : // within the limit, the iterator must not pause in order to ensure the user has
    2164              : // an opportunity to observe the range key within limit.
    2165              : //
    2166              : // It would be valid to ignore the limit whenever there's a range key covering
    2167              : // the key, but that would introduce nondeterminism. To preserve determinism for
    2168              : // testing, the iterator ignores the limit only if the covering range key does
    2169              : // cover the keyspace within the limit.
    2170              : //
    2171              : // This awkwardness exists because range keys are interleaved at their inclusive
    2172              : // start positions. Note that limit is inclusive.
    2173            1 : func (i *Iterator) rangeKeyWithinLimit(limit []byte) bool {
    2174            1 :         if i.rangeKey == nil || !i.opts.rangeKeys() {
    2175            1 :                 return false
    2176            1 :         }
    2177            1 :         s := i.rangeKey.iiter.Span()
    2178            1 :         // If the range key ends beyond the limit, then the range key does not cover
    2179            1 :         // any portion of the keyspace within the limit and it is safe to pause.
    2180            1 :         return s != nil && i.cmp(s.End, limit) > 0
    2181              : }
    2182              : 
    2183              : // saveRangeKey saves the current range key to the underlying iterator's current
    2184              : // range key state. If the range key has not changed, saveRangeKey is a no-op.
    2185              : // If there is a new range key, saveRangeKey copies all of the key, value and
    2186              : // suffixes into Iterator-managed buffers.
    2187            1 : func (i *Iterator) saveRangeKey() {
    2188            1 :         if i.rangeKey == nil || i.opts.KeyTypes == IterKeyTypePointsOnly {
    2189            1 :                 return
    2190            1 :         }
    2191              : 
    2192            1 :         s := i.rangeKey.iiter.Span()
    2193            1 :         if s == nil {
    2194            1 :                 i.rangeKey.hasRangeKey = false
    2195            1 :                 i.rangeKey.updated = i.rangeKey.prevPosHadRangeKey
    2196            1 :                 return
    2197            1 :         } else if !i.rangeKey.stale {
    2198            1 :                 // The range key `s` is identical to the one currently saved. No-op.
    2199            1 :                 return
    2200            1 :         }
    2201              : 
    2202            1 :         if s.KeysOrder != keyspan.BySuffixAsc {
    2203            0 :                 panic("pebble: range key span's keys unexpectedly not in ascending suffix order")
    2204              :         }
    2205              : 
    2206              :         // Although `i.rangeKey.stale` is true, the span s may still be identical
    2207              :         // to the currently saved span. This is possible when seeking the iterator,
    2208              :         // which may land back on the same range key. If we previously had a range
    2209              :         // key and the new one has an identical start key, then it must be the same
    2210              :         // range key and we can avoid copying and keep `i.rangeKey.updated=false`.
    2211              :         //
    2212              :         // TODO(jackson): These key comparisons could be avoidable during relative
    2213              :         // positioning operations continuing in the same direction, because these
    2214              :         // ops will never encounter the previous position's range key while
    2215              :         // stale=true. However, threading whether the current op is a seek or step
    2216              :         // maybe isn't worth it. This key comparison is only necessary once when we
    2217              :         // step onto a new range key, which should be relatively rare.
    2218            1 :         if i.rangeKey.prevPosHadRangeKey && i.equal(i.rangeKey.start, s.Start) &&
    2219            1 :                 i.equal(i.rangeKey.end, s.End) {
    2220            1 :                 i.rangeKey.updated = false
    2221            1 :                 i.rangeKey.stale = false
    2222            1 :                 i.rangeKey.hasRangeKey = true
    2223            1 :                 return
    2224            1 :         }
    2225            1 :         i.stats.RangeKeyStats.Count += len(s.Keys)
    2226            1 :         i.rangeKey.buf.Reset()
    2227            1 :         i.rangeKey.hasRangeKey = true
    2228            1 :         i.rangeKey.updated = true
    2229            1 :         i.rangeKey.stale = false
    2230            1 :         i.rangeKey.buf, i.rangeKey.start = i.rangeKey.buf.Copy(s.Start)
    2231            1 :         i.rangeKey.buf, i.rangeKey.end = i.rangeKey.buf.Copy(s.End)
    2232            1 :         i.rangeKey.keys = i.rangeKey.keys[:0]
    2233            1 :         for j := 0; j < len(s.Keys); j++ {
    2234            1 :                 if invariants.Enabled {
    2235            1 :                         if s.Keys[j].Kind() != base.InternalKeyKindRangeKeySet {
    2236            0 :                                 panic("pebble: user iteration encountered non-RangeKeySet key kind")
    2237            1 :                         } else if j > 0 && i.comparer.CompareRangeSuffixes(s.Keys[j].Suffix, s.Keys[j-1].Suffix) < 0 {
    2238            0 :                                 panic("pebble: user iteration encountered range keys not in suffix order")
    2239              :                         }
    2240              :                 }
    2241            1 :                 var rkd RangeKeyData
    2242            1 :                 i.rangeKey.buf, rkd.Suffix = i.rangeKey.buf.Copy(s.Keys[j].Suffix)
    2243            1 :                 i.rangeKey.buf, rkd.Value = i.rangeKey.buf.Copy(s.Keys[j].Value)
    2244            1 :                 i.rangeKey.keys = append(i.rangeKey.keys, rkd)
    2245              :         }
    2246              : }
    2247              : 
    2248              : // RangeKeyChanged indicates whether the most recent iterator positioning
    2249              : // operation resulted in the iterator stepping into or out of a new range key.
    2250              : // If true, previously returned range key bounds and data has been invalidated.
    2251              : // If false, previously obtained range key bounds, suffix and value slices are
    2252              : // still valid and may continue to be read.
    2253              : //
    2254              : // Invalid iterator positions are considered to not hold range keys, meaning
    2255              : // that if an iterator steps from an IterExhausted or IterAtLimit position onto
    2256              : // a position with a range key, RangeKeyChanged will yield true.
    2257            1 : func (i *Iterator) RangeKeyChanged() bool {
    2258            1 :         return i.iterValidityState == IterValid && i.rangeKey != nil && i.rangeKey.updated
    2259            1 : }
    2260              : 
    2261              : // HasPointAndRange indicates whether there exists a point key, a range key or
    2262              : // both at the current iterator position.
    2263            1 : func (i *Iterator) HasPointAndRange() (hasPoint, hasRange bool) {
    2264            1 :         if i.iterValidityState != IterValid || i.requiresReposition {
    2265            1 :                 return false, false
    2266            1 :         }
    2267            1 :         if i.opts.KeyTypes == IterKeyTypePointsOnly {
    2268            1 :                 return true, false
    2269            1 :         }
    2270            1 :         return i.rangeKey == nil || !i.rangeKey.rangeKeyOnly, i.rangeKey != nil && i.rangeKey.hasRangeKey
    2271              : }
    2272              : 
    2273              : // RangeBounds returns the start (inclusive) and end (exclusive) bounds of the
    2274              : // range key covering the current iterator position. RangeBounds returns nil
    2275              : // bounds if there is no range key covering the current iterator position, or
    2276              : // the iterator is not configured to surface range keys.
    2277              : //
    2278              : // If valid, the returned start bound is less than or equal to Key() and the
    2279              : // returned end bound is greater than Key().
    2280            1 : func (i *Iterator) RangeBounds() (start, end []byte) {
    2281            1 :         if i.rangeKey == nil || !i.opts.rangeKeys() || !i.rangeKey.hasRangeKey {
    2282            0 :                 return nil, nil
    2283            0 :         }
    2284            1 :         return i.rangeKey.start, i.rangeKey.end
    2285              : }
    2286              : 
    2287              : // Key returns the key of the current key/value pair, or nil if done. The
    2288              : // caller should not modify the contents of the returned slice, and its
    2289              : // contents may change on the next call to Next.
    2290              : //
    2291              : // If positioned at an iterator position that only holds a range key, Key()
    2292              : // always returns the start bound of the range key. Otherwise, it returns the
    2293              : // point key's key.
    2294            1 : func (i *Iterator) Key() []byte {
    2295            1 :         return i.key
    2296            1 : }
    2297              : 
    2298              : // Value returns the value of the current key/value pair, or nil if done. The
    2299              : // caller should not modify the contents of the returned slice, and its
    2300              : // contents may change on the next call to Next.
    2301              : //
    2302              : // Only valid if HasPointAndRange() returns true for hasPoint.
    2303              : // Deprecated: use ValueAndErr instead.
    2304            1 : func (i *Iterator) Value() []byte {
    2305            1 :         val, _ := i.ValueAndErr()
    2306            1 :         return val
    2307            1 : }
    2308              : 
    2309              : // ValueAndErr returns the value, and any error encountered in extracting the value.
    2310              : // REQUIRES: i.Error()==nil and HasPointAndRange() returns true for hasPoint.
    2311              : //
    2312              : // The caller should not modify the contents of the returned slice, and its
    2313              : // contents may change on the next call to Next.
    2314            1 : func (i *Iterator) ValueAndErr() ([]byte, error) {
    2315            1 :         val, callerOwned, err := i.value.Value(i.lazyValueBuf)
    2316            1 :         if err != nil {
    2317            1 :                 i.err = err
    2318            1 :                 i.iterValidityState = IterExhausted
    2319            1 :         }
    2320            1 :         if callerOwned {
    2321            1 :                 i.lazyValueBuf = val[:0]
    2322            1 :         }
    2323            1 :         return val, err
    2324              : }
    2325              : 
    2326              : // LazyValue returns the LazyValue. Only for advanced use cases.
    2327              : // REQUIRES: i.Error()==nil and HasPointAndRange() returns true for hasPoint.
    2328            0 : func (i *Iterator) LazyValue() LazyValue {
    2329            0 :         return i.value.LazyValue()
    2330            0 : }
    2331              : 
    2332              : // RangeKeys returns the range key values and their suffixes covering the
    2333              : // current iterator position. The range bounds may be retrieved separately
    2334              : // through Iterator.RangeBounds().
    2335            1 : func (i *Iterator) RangeKeys() []RangeKeyData {
    2336            1 :         if i.rangeKey == nil || !i.opts.rangeKeys() || !i.rangeKey.hasRangeKey {
    2337            0 :                 return nil
    2338            0 :         }
    2339            1 :         return i.rangeKey.keys
    2340              : }
    2341              : 
    2342              : // Valid returns true if the iterator is positioned at a valid key/value pair
    2343              : // and false otherwise.
    2344            1 : func (i *Iterator) Valid() bool {
    2345            1 :         valid := i.iterValidityState == IterValid && !i.requiresReposition
    2346            1 :         if invariants.Enabled {
    2347            1 :                 if err := i.Error(); valid && err != nil {
    2348            0 :                         panic(errors.AssertionFailedf("pebble: iterator is valid with non-nil Error: %+v", err))
    2349              :                 }
    2350              :         }
    2351            1 :         return valid
    2352              : }
    2353              : 
    2354              : // Error returns any accumulated error.
    2355            1 : func (i *Iterator) Error() error {
    2356            1 :         if i.err != nil {
    2357            1 :                 return i.err
    2358            1 :         }
    2359            1 :         if i.iter != nil {
    2360            1 :                 return i.iter.Error()
    2361            1 :         }
    2362            0 :         return nil
    2363              : }
    2364              : 
    2365              : const maxKeyBufCacheSize = 4 << 10 // 4 KB
    2366              : 
    2367              : // Close closes the iterator and returns any accumulated error. Exhausting
    2368              : // all the key/value pairs in a table is not considered to be an error.
    2369              : // It is not valid to call any method, including Close, after the iterator
    2370              : // has been closed.
    2371            1 : func (i *Iterator) Close() error {
    2372            1 :         if i.trackerHandle.IsValid() {
    2373            1 :                 i.tracker.Stop(i.trackerHandle)
    2374            1 :                 i.trackerHandle = 0
    2375            1 :         }
    2376              :         // Close the child iterator before releasing the readState because when the
    2377              :         // readState is released sstables referenced by the readState may be deleted
    2378              :         // which will fail on Windows if the sstables are still open by the child
    2379              :         // iterator.
    2380            1 :         if i.iter != nil {
    2381            1 :                 i.err = firstError(i.err, i.iter.Close())
    2382            1 : 
    2383            1 :                 // Closing i.iter did not necessarily close the point and range key
    2384            1 :                 // iterators. Calls to SetOptions may have 'disconnected' either one
    2385            1 :                 // from i.iter if iteration key types were changed. Both point and range
    2386            1 :                 // key iterators are preserved in case the iterator needs to switch key
    2387            1 :                 // types again. We explicitly close both of these iterators here.
    2388            1 :                 //
    2389            1 :                 // NB: If the iterators were still connected to i.iter, they may be
    2390            1 :                 // closed, but calling Close on a closed internal iterator or fragment
    2391            1 :                 // iterator is allowed.
    2392            1 :                 if i.pointIter != nil {
    2393            1 :                         i.err = firstError(i.err, i.pointIter.Close())
    2394            1 :                 }
    2395            1 :                 if i.rangeKey != nil && i.rangeKey.rangeKeyIter != nil {
    2396            1 :                         i.rangeKey.rangeKeyIter.Close()
    2397            1 :                 }
    2398            1 :                 i.err = firstError(i.err, i.blobValueFetcher.Close())
    2399              :         }
    2400            1 :         err := i.err
    2401            1 : 
    2402            1 :         if i.readState != nil {
    2403            1 :                 if i.readSampling.pendingCompactions.size > 0 {
    2404            1 :                         // Copy pending read compactions using db.mu.Lock()
    2405            1 :                         i.readState.db.mu.Lock()
    2406            1 :                         i.readState.db.mu.compact.readCompactions.combine(&i.readSampling.pendingCompactions, i.cmp)
    2407            1 :                         reschedule := i.readState.db.mu.compact.rescheduleReadCompaction
    2408            1 :                         i.readState.db.mu.compact.rescheduleReadCompaction = false
    2409            1 :                         concurrentCompactions := i.readState.db.mu.compact.compactingCount
    2410            1 :                         i.readState.db.mu.Unlock()
    2411            1 : 
    2412            1 :                         if reschedule && concurrentCompactions == 0 {
    2413            0 :                                 // In a read heavy workload, flushes may not happen frequently enough to
    2414            0 :                                 // schedule compactions.
    2415            0 :                                 i.readState.db.compactionSchedulers.Add(1)
    2416            0 :                                 go i.readState.db.maybeScheduleCompactionAsync()
    2417            0 :                         }
    2418              :                 }
    2419              : 
    2420            1 :                 i.readState.unref()
    2421            1 :                 i.readState = nil
    2422              :         }
    2423              : 
    2424            1 :         if i.version != nil {
    2425            1 :                 i.version.Unref()
    2426            1 :         }
    2427            1 :         if i.externalIter != nil {
    2428            1 :                 err = firstError(err, i.externalIter.Close())
    2429            1 :         }
    2430              : 
    2431              :         // Close the closer for the current value if one was open.
    2432            1 :         if i.valueCloser != nil {
    2433            1 :                 err = firstError(err, i.valueCloser.Close())
    2434            1 :                 i.valueCloser = nil
    2435            1 :         }
    2436              : 
    2437            1 :         if i.rangeKey != nil {
    2438            1 :                 i.rangeKey.rangeKeyBuffers.PrepareForReuse()
    2439            1 :                 *i.rangeKey = iteratorRangeKeyState{
    2440            1 :                         rangeKeyBuffers: i.rangeKey.rangeKeyBuffers,
    2441            1 :                 }
    2442            1 :                 iterRangeKeyStateAllocPool.Put(i.rangeKey)
    2443            1 :                 i.rangeKey = nil
    2444            1 :         }
    2445              : 
    2446            1 :         alloc := i.alloc
    2447            1 :         if alloc == nil {
    2448            1 :                 // NB: When the Iterator is used as a part of a Get(), Close() is called by
    2449            1 :                 // getIterAlloc.Close which handles recycling the appropriate structure and
    2450            1 :                 // fields.
    2451            1 :                 return err
    2452            1 :         }
    2453              : 
    2454              :         // Reset the alloc struct, retaining any buffers within their maximum
    2455              :         // bounds, and return the iterAlloc struct to the pool.
    2456              : 
    2457              :         // Avoid caching the key buf if it is overly large. The constant is fairly
    2458              :         // arbitrary.
    2459            1 :         if cap(i.keyBuf) < maxKeyBufCacheSize {
    2460            1 :                 alloc.keyBuf = i.keyBuf
    2461            1 :         }
    2462            1 :         if cap(i.prefixOrFullSeekKey) < maxKeyBufCacheSize {
    2463            1 :                 alloc.prefixOrFullSeekKey = i.prefixOrFullSeekKey
    2464            1 :         }
    2465            1 :         for j := range i.boundsBuf {
    2466            1 :                 if cap(i.boundsBuf[j]) < maxKeyBufCacheSize {
    2467            1 :                         alloc.boundsBuf[j] = i.boundsBuf[j]
    2468            1 :                 }
    2469              :         }
    2470            1 :         mergingIterHeapItems := alloc.merging.heap.items
    2471            1 : 
    2472            1 :         // We zero each field piecemeal in part to avoid the use of a stack
    2473            1 :         // allocated autotmp iterAlloc variable, and in part to make it easier
    2474            1 :         // to make zeroing conditional on whether the field was actually used.
    2475            1 :         //
    2476            1 :         // TODO(jackson,nvanbenschoten): Iterator.Close continues to appear in
    2477            1 :         // profiles in part because of this zeroing.
    2478            1 :         if i.batch != nil {
    2479            1 :                 alloc.batchState = iteratorBatchState{}
    2480            1 :         }
    2481            1 :         alloc.dbi.clearForReuse()
    2482            1 :         alloc.merging = mergingIter{}
    2483            1 :         clear(mergingIterHeapItems[:cap(mergingIterHeapItems)])
    2484            1 :         alloc.merging.heap.items = mergingIterHeapItems
    2485            1 :         clear(alloc.mlevels[:])
    2486            1 :         clear(alloc.levels[:])
    2487            1 :         clear(alloc.levelsPositioned[:])
    2488            1 : 
    2489            1 :         iterAllocPool.Put(alloc)
    2490            1 :         return err
    2491              : }
    2492              : 
    2493              : // SetBounds sets the lower and upper bounds for the iterator. Once SetBounds
    2494              : // returns, the caller is free to mutate the provided slices.
    2495              : //
    2496              : // The iterator will always be invalidated and must be repositioned with a call
    2497              : // to SeekGE, SeekPrefixGE, SeekLT, First, or Last.
    2498            1 : func (i *Iterator) SetBounds(lower, upper []byte) {
    2499            1 :         // Ensure that the Iterator appears exhausted, regardless of whether we
    2500            1 :         // actually have to invalidate the internal iterator. Optimizations that
    2501            1 :         // avoid exhaustion are an internal implementation detail that shouldn't
    2502            1 :         // leak through the interface. The caller should still call an absolute
    2503            1 :         // positioning method to reposition the iterator.
    2504            1 :         i.requiresReposition = true
    2505            1 : 
    2506            1 :         if ((i.opts.LowerBound == nil) == (lower == nil)) &&
    2507            1 :                 ((i.opts.UpperBound == nil) == (upper == nil)) &&
    2508            1 :                 i.equal(i.opts.LowerBound, lower) &&
    2509            1 :                 i.equal(i.opts.UpperBound, upper) {
    2510            1 :                 // Unchanged, noop.
    2511            1 :                 return
    2512            1 :         }
    2513              : 
    2514              :         // Copy the user-provided bounds into an Iterator-owned buffer, and set them
    2515              :         // on i.opts.{Lower,Upper}Bound.
    2516            1 :         i.processBounds(lower, upper)
    2517            1 : 
    2518            1 :         i.iter.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2519            1 :         // If the iterator has an open point iterator that's not currently being
    2520            1 :         // used, propagate the new bounds to it.
    2521            1 :         if i.pointIter != nil && !i.opts.pointKeys() {
    2522            1 :                 i.pointIter.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2523            1 :         }
    2524              :         // If the iterator has a range key iterator, propagate bounds to it. The
    2525              :         // top-level SetBounds on the interleaving iterator (i.iter) won't propagate
    2526              :         // bounds to the range key iterator stack, because the FragmentIterator
    2527              :         // interface doesn't define a SetBounds method. We need to directly inform
    2528              :         // the iterConfig stack.
    2529            1 :         if i.rangeKey != nil {
    2530            1 :                 i.rangeKey.iterConfig.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2531            1 :         }
    2532              : 
    2533              :         // Even though this is not a positioning operation, the alteration of the
    2534              :         // bounds means we cannot optimize Seeks by using Next.
    2535            1 :         i.invalidate()
    2536              : }
    2537              : 
    2538              : // SetContext replaces the context provided at iterator creation, or the last
    2539              : // one provided by SetContext. Even though iterators are expected to be
    2540              : // short-lived, there are some cases where either (a) iterators are used far
    2541              : // from the code that created them, (b) iterators are reused (while being
    2542              : // short-lived) for processing different requests. For such scenarios, we
    2543              : // allow the caller to replace the context.
    2544            0 : func (i *Iterator) SetContext(ctx context.Context) {
    2545            0 :         i.ctx = ctx
    2546            0 :         i.iter.SetContext(ctx)
    2547            0 :         // If the iterator has an open point iterator that's not currently being
    2548            0 :         // used, propagate the new context to it.
    2549            0 :         if i.pointIter != nil && !i.opts.pointKeys() {
    2550            0 :                 i.pointIter.SetContext(i.ctx)
    2551            0 :         }
    2552              : }
    2553              : 
    2554              : // Initialization and changing of the bounds must call processBounds.
    2555              : // processBounds saves the bounds and computes derived state from those
    2556              : // bounds.
    2557            1 : func (i *Iterator) processBounds(lower, upper []byte) {
    2558            1 :         // Copy the user-provided bounds into an Iterator-owned buffer. We can't
    2559            1 :         // overwrite the current bounds, because some internal iterators compare old
    2560            1 :         // and new bounds for optimizations.
    2561            1 : 
    2562            1 :         buf := i.boundsBuf[i.boundsBufIdx][:0]
    2563            1 :         if lower != nil {
    2564            1 :                 buf = append(buf, lower...)
    2565            1 :                 i.opts.LowerBound = buf
    2566            1 :         } else {
    2567            1 :                 i.opts.LowerBound = nil
    2568            1 :         }
    2569            1 :         i.nextPrefixNotPermittedByUpperBound = false
    2570            1 :         if upper != nil {
    2571            1 :                 buf = append(buf, upper...)
    2572            1 :                 i.opts.UpperBound = buf[len(buf)-len(upper):]
    2573            1 :                 if i.comparer.Split(i.opts.UpperBound) != len(i.opts.UpperBound) {
    2574            1 :                         // Setting an upper bound that is a versioned MVCC key. This means
    2575            1 :                         // that a key can have some MVCC versions before the upper bound and
    2576            1 :                         // some after. This causes significant complications for NextPrefix,
    2577            1 :                         // so we bar the user of NextPrefix.
    2578            1 :                         i.nextPrefixNotPermittedByUpperBound = true
    2579            1 :                 }
    2580            1 :         } else {
    2581            1 :                 i.opts.UpperBound = nil
    2582            1 :         }
    2583            1 :         i.boundsBuf[i.boundsBufIdx] = buf
    2584            1 :         i.boundsBufIdx = 1 - i.boundsBufIdx
    2585              : }
    2586              : 
    2587              : // SetOptions sets new iterator options for the iterator. Note that the lower
    2588              : // and upper bounds applied here will supersede any bounds set by previous calls
    2589              : // to SetBounds.
    2590              : //
    2591              : // Note that the slices provided in this SetOptions must not be changed by the
    2592              : // caller until the iterator is closed, or a subsequent SetBounds or SetOptions
    2593              : // has returned. This is because comparisons between the existing and new bounds
    2594              : // are sometimes used to optimize seeking. See the extended commentary on
    2595              : // SetBounds.
    2596              : //
    2597              : // If the iterator was created over an indexed mutable batch, the iterator's
    2598              : // view of the mutable batch is refreshed.
    2599              : //
    2600              : // The iterator will always be invalidated and must be repositioned with a call
    2601              : // to SeekGE, SeekPrefixGE, SeekLT, First, or Last.
    2602              : //
    2603              : // If only lower and upper bounds need to be modified, prefer SetBounds.
    2604            1 : func (i *Iterator) SetOptions(o *IterOptions) {
    2605            1 :         if i.externalIter != nil {
    2606            1 :                 if err := validateExternalIterOpts(o); err != nil {
    2607            0 :                         panic(err)
    2608              :                 }
    2609              :         }
    2610              : 
    2611              :         // Ensure that the Iterator appears exhausted, regardless of whether we
    2612              :         // actually have to invalidate the internal iterator. Optimizations that
    2613              :         // avoid exhaustion are an internal implementation detail that shouldn't
    2614              :         // leak through the interface. The caller should still call an absolute
    2615              :         // positioning method to reposition the iterator.
    2616            1 :         i.requiresReposition = true
    2617            1 : 
    2618            1 :         // Check if global state requires we close all internal iterators.
    2619            1 :         //
    2620            1 :         // If the Iterator is in an error state, invalidate the existing iterators
    2621            1 :         // so that we reconstruct an iterator state from scratch.
    2622            1 :         //
    2623            1 :         // If OnlyReadGuaranteedDurable changed, the iterator stacks are incorrect,
    2624            1 :         // improperly including or excluding memtables. Invalidate them so that
    2625            1 :         // finishInitializingIter will reconstruct them.
    2626            1 :         closeBoth := i.err != nil ||
    2627            1 :                 o.OnlyReadGuaranteedDurable != i.opts.OnlyReadGuaranteedDurable
    2628            1 : 
    2629            1 :         // If either options specify block property filters for an iterator stack,
    2630            1 :         // reconstruct it.
    2631            1 :         if i.pointIter != nil && (closeBoth || len(o.PointKeyFilters) > 0 || len(i.opts.PointKeyFilters) > 0 ||
    2632            1 :                 o.RangeKeyMasking.Filter != nil || i.opts.RangeKeyMasking.Filter != nil || o.SkipPoint != nil ||
    2633            1 :                 i.opts.SkipPoint != nil) {
    2634            1 :                 i.err = firstError(i.err, i.pointIter.Close())
    2635            1 :                 i.pointIter = nil
    2636            1 :         }
    2637            1 :         if i.rangeKey != nil {
    2638            1 :                 if closeBoth || len(o.RangeKeyFilters) > 0 || len(i.opts.RangeKeyFilters) > 0 {
    2639            1 :                         i.rangeKey.rangeKeyIter.Close()
    2640            1 :                         i.rangeKey = nil
    2641            1 :                 } else {
    2642            1 :                         // If there's still a range key iterator stack, invalidate the
    2643            1 :                         // iterator. This ensures RangeKeyChanged() returns true if a
    2644            1 :                         // subsequent positioning operation discovers a range key. It also
    2645            1 :                         // prevents seek no-op optimizations.
    2646            1 :                         i.invalidate()
    2647            1 :                 }
    2648              :         }
    2649              : 
    2650              :         // If the iterator is backed by a batch that's been mutated, refresh its
    2651              :         // existing point and range-key iterators, and invalidate the iterator to
    2652              :         // prevent seek-using-next optimizations. If we don't yet have a point-key
    2653              :         // iterator or range-key iterator but we require one, it'll be created in
    2654              :         // the slow path that reconstructs the iterator in finishInitializingIter.
    2655            1 :         if i.batch != nil {
    2656            1 :                 nextBatchSeqNum := (base.SeqNum(len(i.batch.batch.data)) | base.SeqNumBatchBit)
    2657            1 :                 if nextBatchSeqNum != i.batch.batchSeqNum {
    2658            1 :                         i.batch.batchSeqNum = nextBatchSeqNum
    2659            1 :                         if i.merging != nil {
    2660            1 :                                 i.merging.batchSnapshot = nextBatchSeqNum
    2661            1 :                         }
    2662              :                         // Prevent a no-op seek optimization on the next seek. We won't be
    2663              :                         // able to reuse the top-level Iterator state, because it may be
    2664              :                         // incorrect after the inclusion of new batch mutations.
    2665            1 :                         i.batchJustRefreshed = true
    2666            1 :                         if i.pointIter != nil && i.batch.batch.countRangeDels > 0 {
    2667            1 :                                 if i.batch.rangeDelIter.Count() == 0 {
    2668            1 :                                         // When we constructed this iterator, there were no
    2669            1 :                                         // rangedels in the batch. Iterator construction will
    2670            1 :                                         // have excluded the batch rangedel iterator from the
    2671            1 :                                         // point iterator stack. We need to reconstruct the
    2672            1 :                                         // point iterator to add i.batchRangeDelIter into the
    2673            1 :                                         // iterator stack.
    2674            1 :                                         i.err = firstError(i.err, i.pointIter.Close())
    2675            1 :                                         i.pointIter = nil
    2676            1 :                                 } else {
    2677            1 :                                         // There are range deletions in the batch and we already
    2678            1 :                                         // have a batch rangedel iterator. We can update the
    2679            1 :                                         // batch rangedel iterator in place.
    2680            1 :                                         //
    2681            1 :                                         // NB: There may or may not be new range deletions. We
    2682            1 :                                         // can't tell based on i.batchRangeDelIter.Count(),
    2683            1 :                                         // which is the count of fragmented range deletions, NOT
    2684            1 :                                         // the number of range deletions written to the batch
    2685            1 :                                         // [i.batch.countRangeDels].
    2686            1 :                                         i.batch.batch.initRangeDelIter(&i.opts, &i.batch.rangeDelIter, nextBatchSeqNum)
    2687            1 :                                 }
    2688              :                         }
    2689            1 :                         if i.rangeKey != nil && i.batch.batch.countRangeKeys > 0 {
    2690            1 :                                 if i.batch.rangeKeyIter.Count() == 0 {
    2691            1 :                                         // When we constructed this iterator, there were no range
    2692            1 :                                         // keys in the batch. Iterator construction will have
    2693            1 :                                         // excluded the batch rangekey iterator from the range key
    2694            1 :                                         // iterator stack. We need to reconstruct the range key
    2695            1 :                                         // iterator to add i.batchRangeKeyIter into the iterator
    2696            1 :                                         // stack.
    2697            1 :                                         i.rangeKey.rangeKeyIter.Close()
    2698            1 :                                         i.rangeKey = nil
    2699            1 :                                 } else {
    2700            1 :                                         // There are range keys in the batch and we already
    2701            1 :                                         // have a batch rangekey iterator. We can update the batch
    2702            1 :                                         // rangekey iterator in place.
    2703            1 :                                         //
    2704            1 :                                         // NB: There may or may not be new range keys. We can't
    2705            1 :                                         // tell based on i.batchRangeKeyIter.Count(), which is the
    2706            1 :                                         // count of fragmented range keys, NOT the number of
    2707            1 :                                         // range keys written to the batch [i.batch.countRangeKeys].
    2708            1 :                                         i.batch.batch.initRangeKeyIter(&i.opts, &i.batch.rangeKeyIter, nextBatchSeqNum)
    2709            1 :                                         i.invalidate()
    2710            1 :                                 }
    2711              :                         }
    2712              :                 }
    2713              :         }
    2714              : 
    2715              :         // Reset combinedIterState.initialized in case the iterator key types
    2716              :         // changed. If there's already a range key iterator stack, the combined
    2717              :         // iterator is already initialized.  Additionally, if the iterator is not
    2718              :         // configured to include range keys, mark it as initialized to signal that
    2719              :         // lower level iterators should not trigger a switch to combined iteration.
    2720            1 :         i.lazyCombinedIter.combinedIterState = combinedIterState{
    2721            1 :                 initialized: i.rangeKey != nil || !i.opts.rangeKeys(),
    2722            1 :         }
    2723            1 : 
    2724            1 :         boundsEqual := ((i.opts.LowerBound == nil) == (o.LowerBound == nil)) &&
    2725            1 :                 ((i.opts.UpperBound == nil) == (o.UpperBound == nil)) &&
    2726            1 :                 i.equal(i.opts.LowerBound, o.LowerBound) &&
    2727            1 :                 i.equal(i.opts.UpperBound, o.UpperBound)
    2728            1 : 
    2729            1 :         if boundsEqual && o.KeyTypes == i.opts.KeyTypes &&
    2730            1 :                 (i.pointIter != nil || !i.opts.pointKeys()) &&
    2731            1 :                 (i.rangeKey != nil || !i.opts.rangeKeys() || i.opts.KeyTypes == IterKeyTypePointsAndRanges) &&
    2732            1 :                 i.comparer.CompareRangeSuffixes(o.RangeKeyMasking.Suffix, i.opts.RangeKeyMasking.Suffix) == 0 &&
    2733            1 :                 o.UseL6Filters == i.opts.UseL6Filters {
    2734            1 :                 // The options are identical, so we can likely use the fast path. In
    2735            1 :                 // addition to all the above constraints, we cannot use the fast path if
    2736            1 :                 // configured to perform lazy combined iteration but an indexed batch
    2737            1 :                 // used by the iterator now contains range keys. Lazy combined iteration
    2738            1 :                 // is not compatible with batch range keys because we always need to
    2739            1 :                 // merge the batch's range keys into iteration.
    2740            1 :                 if i.rangeKey != nil || !i.opts.rangeKeys() || i.batch == nil || i.batch.batch.countRangeKeys == 0 {
    2741            1 :                         // Fast path. This preserves the Seek-using-Next optimizations as
    2742            1 :                         // long as the iterator wasn't already invalidated up above.
    2743            1 :                         return
    2744            1 :                 }
    2745              :         }
    2746              :         // Slow path.
    2747              : 
    2748              :         // The options changed. Save the new ones to i.opts.
    2749            1 :         if boundsEqual {
    2750            1 :                 // Copying the options into i.opts will overwrite LowerBound and
    2751            1 :                 // UpperBound fields with the user-provided slices. We need to hold on
    2752            1 :                 // to the Pebble-owned slices, so save them and re-set them after the
    2753            1 :                 // copy.
    2754            1 :                 lower, upper := i.opts.LowerBound, i.opts.UpperBound
    2755            1 :                 i.opts = *o
    2756            1 :                 i.opts.LowerBound, i.opts.UpperBound = lower, upper
    2757            1 :         } else {
    2758            1 :                 i.opts = *o
    2759            1 :                 i.processBounds(o.LowerBound, o.UpperBound)
    2760            1 :                 // Propagate the changed bounds to the existing point iterator.
    2761            1 :                 // NB: We propagate i.opts.{Lower,Upper}Bound, not o.{Lower,Upper}Bound
    2762            1 :                 // because i.opts now point to buffers owned by Pebble.
    2763            1 :                 if i.pointIter != nil {
    2764            1 :                         i.pointIter.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2765            1 :                 }
    2766            1 :                 if i.rangeKey != nil {
    2767            1 :                         i.rangeKey.iterConfig.SetBounds(i.opts.LowerBound, i.opts.UpperBound)
    2768            1 :                 }
    2769              :         }
    2770              : 
    2771              :         // Even though this is not a positioning operation, the invalidation of the
    2772              :         // iterator stack means we cannot optimize Seeks by using Next.
    2773            1 :         i.invalidate()
    2774            1 : 
    2775            1 :         // Iterators created through NewExternalIter have a different iterator
    2776            1 :         // initialization process.
    2777            1 :         if i.externalIter != nil {
    2778            1 :                 _ = finishInitializingExternal(i.ctx, i)
    2779            1 :                 return
    2780            1 :         }
    2781            1 :         finishInitializingIter(i.ctx, i.alloc)
    2782              : }
    2783              : 
    2784            1 : func (i *Iterator) invalidate() {
    2785            1 :         i.lastPositioningOp = unknownLastPositionOp
    2786            1 :         i.hasPrefix = false
    2787            1 :         i.iterKV = nil
    2788            1 :         i.err = nil
    2789            1 :         // This switch statement isn't necessary for correctness since callers
    2790            1 :         // should call a repositioning method. We could have arbitrarily set i.pos
    2791            1 :         // to one of the values. But it results in more intuitive behavior in
    2792            1 :         // tests, which do not always reposition.
    2793            1 :         switch i.pos {
    2794            1 :         case iterPosCurForward, iterPosNext, iterPosCurForwardPaused:
    2795            1 :                 i.pos = iterPosCurForward
    2796            1 :         case iterPosCurReverse, iterPosPrev, iterPosCurReversePaused:
    2797            1 :                 i.pos = iterPosCurReverse
    2798              :         }
    2799            1 :         i.iterValidityState = IterExhausted
    2800            1 :         if i.rangeKey != nil {
    2801            1 :                 i.rangeKey.iiter.Invalidate()
    2802            1 :                 i.rangeKey.prevPosHadRangeKey = false
    2803            1 :         }
    2804              : }
    2805              : 
    2806              : // Metrics returns per-iterator metrics.
    2807            0 : func (i *Iterator) Metrics() IteratorMetrics {
    2808            0 :         m := IteratorMetrics{
    2809            0 :                 ReadAmp: 1,
    2810            0 :         }
    2811            0 :         if mi, ok := i.iter.(*mergingIter); ok {
    2812            0 :                 m.ReadAmp = len(mi.levels)
    2813            0 :         }
    2814            0 :         return m
    2815              : }
    2816              : 
    2817              : // ResetStats resets the stats to 0.
    2818            0 : func (i *Iterator) ResetStats() {
    2819            0 :         i.stats = IteratorStats{}
    2820            0 : }
    2821              : 
    2822              : // Stats returns the current stats.
    2823            1 : func (i *Iterator) Stats() IteratorStats {
    2824            1 :         return i.stats
    2825            1 : }
    2826              : 
    2827              : // CloneOptions configures an iterator constructed through Iterator.Clone.
    2828              : type CloneOptions struct {
    2829              :         // IterOptions, if non-nil, define the iterator options to configure a
    2830              :         // cloned iterator. If nil, the clone adopts the same IterOptions as the
    2831              :         // iterator being cloned.
    2832              :         IterOptions *IterOptions
    2833              :         // RefreshBatchView may be set to true when cloning an Iterator over an
    2834              :         // indexed batch. When false, the clone adopts the same (possibly stale)
    2835              :         // view of the indexed batch as the cloned Iterator. When true, the clone is
    2836              :         // constructed with a refreshed view of the batch, observing all of the
    2837              :         // batch's mutations at the time of the Clone. If the cloned iterator was
    2838              :         // not constructed to read over an indexed batch, RefreshVatchView has no
    2839              :         // effect.
    2840              :         RefreshBatchView bool
    2841              : }
    2842              : 
    2843              : // Clone creates a new Iterator over the same underlying data, i.e., over the
    2844              : // same {batch, memtables, sstables}). The resulting iterator is not positioned.
    2845              : // It starts with the same IterOptions, unless opts.IterOptions is set.
    2846              : //
    2847              : // When called on an Iterator over an indexed batch, the clone's visibility of
    2848              : // the indexed batch is determined by CloneOptions.RefreshBatchView. If false,
    2849              : // the clone inherits the iterator's current (possibly stale) view of the batch,
    2850              : // and callers may call SetOptions to subsequently refresh the clone's view to
    2851              : // include all batch mutations. If true, the clone is constructed with a
    2852              : // complete view of the indexed batch's mutations at the time of the Clone.
    2853              : //
    2854              : // Callers can use Clone if they need multiple iterators that need to see
    2855              : // exactly the same underlying state of the DB. This should not be used to
    2856              : // extend the lifetime of the data backing the original Iterator since that
    2857              : // will cause an increase in memory and disk usage (use NewSnapshot for that
    2858              : // purpose).
    2859            1 : func (i *Iterator) Clone(opts CloneOptions) (*Iterator, error) {
    2860            1 :         return i.CloneWithContext(context.Background(), opts)
    2861            1 : }
    2862              : 
    2863              : // CloneWithContext is like Clone, and additionally accepts a context for
    2864              : // tracing.
    2865            1 : func (i *Iterator) CloneWithContext(ctx context.Context, opts CloneOptions) (*Iterator, error) {
    2866            1 :         if opts.IterOptions == nil {
    2867            1 :                 opts.IterOptions = &i.opts
    2868            1 :         }
    2869            1 :         if i.batchOnlyIter {
    2870            1 :                 return nil, errors.Errorf("cannot Clone a batch-only Iterator")
    2871            1 :         }
    2872            1 :         readState := i.readState
    2873            1 :         vers := i.version
    2874            1 :         if readState == nil && vers == nil {
    2875            1 :                 return nil, errors.Errorf("cannot Clone a closed Iterator")
    2876            1 :         }
    2877              :         // i is already holding a ref, so there is no race with unref here.
    2878              :         //
    2879              :         // TODO(bilal): If the underlying iterator was created on a snapshot, we could
    2880              :         // grab a reference to the current readState instead of reffing the original
    2881              :         // readState. This allows us to release references to some zombie sstables
    2882              :         // and memtables.
    2883            1 :         if readState != nil {
    2884            1 :                 readState.ref()
    2885            1 :         }
    2886            1 :         if vers != nil {
    2887            1 :                 vers.Ref()
    2888            1 :         }
    2889              :         // Bundle various structures under a single umbrella in order to allocate
    2890              :         // them together.
    2891            1 :         buf := newIterAlloc()
    2892            1 :         dbi := &buf.dbi
    2893            1 :         *dbi = Iterator{
    2894            1 :                 ctx:                   ctx,
    2895            1 :                 opts:                  *opts.IterOptions,
    2896            1 :                 alloc:                 buf,
    2897            1 :                 merge:                 i.merge,
    2898            1 :                 comparer:              i.comparer,
    2899            1 :                 readState:             readState,
    2900            1 :                 version:               vers,
    2901            1 :                 keyBuf:                buf.keyBuf,
    2902            1 :                 prefixOrFullSeekKey:   buf.prefixOrFullSeekKey,
    2903            1 :                 boundsBuf:             buf.boundsBuf,
    2904            1 :                 fc:                    i.fc,
    2905            1 :                 newIters:              i.newIters,
    2906            1 :                 newIterRangeKey:       i.newIterRangeKey,
    2907            1 :                 valueRetrievalProfile: i.valueRetrievalProfile,
    2908            1 :                 seqNum:                i.seqNum,
    2909            1 :                 tracker:               i.tracker,
    2910            1 :         }
    2911            1 :         if i.tracker != nil && !dbi.opts.ExemptFromTracking {
    2912            1 :                 dbi.trackerHandle = i.tracker.Start()
    2913            1 :         }
    2914            1 :         if i.batch != nil {
    2915            1 :                 dbi.batch = &buf.batchState
    2916            1 :                 dbi.batch.batch = i.batch.batch
    2917            1 :                 dbi.batch.batchSeqNum = i.batch.batchSeqNum
    2918            1 :                 // If the caller requested the clone have a current view of the indexed
    2919            1 :                 // batch, set the clone's batch sequence number appropriately.
    2920            1 :                 if opts.RefreshBatchView {
    2921            1 :                         dbi.batch.batchSeqNum = (base.SeqNum(len(i.batch.batch.data)) | base.SeqNumBatchBit)
    2922            1 :                 }
    2923              :         }
    2924            1 :         dbi.processBounds(dbi.opts.LowerBound, dbi.opts.UpperBound)
    2925            1 :         return finishInitializingIter(ctx, buf), nil
    2926              : }
    2927              : 
    2928              : // Merge adds all of the argument's statistics to the receiver. It may be used
    2929              : // to accumulate stats across multiple iterators.
    2930            1 : func (stats *IteratorStats) Merge(o IteratorStats) {
    2931            1 :         for i := InterfaceCall; i < NumStatsKind; i++ {
    2932            1 :                 stats.ForwardSeekCount[i] += o.ForwardSeekCount[i]
    2933            1 :                 stats.ReverseSeekCount[i] += o.ReverseSeekCount[i]
    2934            1 :                 stats.ForwardStepCount[i] += o.ForwardStepCount[i]
    2935            1 :                 stats.ReverseStepCount[i] += o.ReverseStepCount[i]
    2936            1 :         }
    2937            1 :         stats.InternalStats.Merge(o.InternalStats)
    2938            1 :         stats.RangeKeyStats.Merge(o.RangeKeyStats)
    2939              : }
    2940              : 
    2941            1 : func (stats *IteratorStats) String() string {
    2942            1 :         return redact.StringWithoutMarkers(stats)
    2943            1 : }
    2944              : 
    2945              : // SafeFormat implements the redact.SafeFormatter interface.
    2946            1 : func (stats *IteratorStats) SafeFormat(s redact.SafePrinter, verb rune) {
    2947            1 :         if stats.ReverseSeekCount[InterfaceCall] == 0 && stats.ReverseSeekCount[InternalIterCall] == 0 {
    2948            1 :                 s.Printf("seeked %s times (%s internal)",
    2949            1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InterfaceCall])),
    2950            1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InternalIterCall])),
    2951            1 :                 )
    2952            1 :         } else {
    2953            1 :                 s.Printf("seeked %s times (%s fwd/%s rev, internal: %s fwd/%s rev)",
    2954            1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InterfaceCall]+stats.ReverseSeekCount[InterfaceCall])),
    2955            1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InterfaceCall])),
    2956            1 :                         humanize.Count.Uint64(uint64(stats.ReverseSeekCount[InterfaceCall])),
    2957            1 :                         humanize.Count.Uint64(uint64(stats.ForwardSeekCount[InternalIterCall])),
    2958            1 :                         humanize.Count.Uint64(uint64(stats.ReverseSeekCount[InternalIterCall])),
    2959            1 :                 )
    2960            1 :         }
    2961            1 :         s.SafeString("; ")
    2962            1 : 
    2963            1 :         if stats.ReverseStepCount[InterfaceCall] == 0 && stats.ReverseStepCount[InternalIterCall] == 0 {
    2964            1 :                 s.Printf("stepped %s times (%s internal)",
    2965            1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InterfaceCall])),
    2966            1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InternalIterCall])),
    2967            1 :                 )
    2968            1 :         } else {
    2969            1 :                 s.Printf("stepped %s times (%s fwd/%s rev, internal: %s fwd/%s rev)",
    2970            1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InterfaceCall]+stats.ReverseStepCount[InterfaceCall])),
    2971            1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InterfaceCall])),
    2972            1 :                         humanize.Count.Uint64(uint64(stats.ReverseStepCount[InterfaceCall])),
    2973            1 :                         humanize.Count.Uint64(uint64(stats.ForwardStepCount[InternalIterCall])),
    2974            1 :                         humanize.Count.Uint64(uint64(stats.ReverseStepCount[InternalIterCall])),
    2975            1 :                 )
    2976            1 :         }
    2977              : 
    2978            1 :         if stats.InternalStats != (InternalIteratorStats{}) {
    2979            1 :                 s.SafeString("; ")
    2980            1 :                 stats.InternalStats.SafeFormat(s, verb)
    2981            1 :         }
    2982            1 :         if stats.RangeKeyStats != (RangeKeyIteratorStats{}) {
    2983            1 :                 s.SafeString(", ")
    2984            1 :                 stats.RangeKeyStats.SafeFormat(s, verb)
    2985            1 :         }
    2986              : }
    2987              : 
    2988              : // CanDeterministicallySingleDelete takes a valid iterator and examines internal
    2989              : // state to determine if a SingleDelete deleting Iterator.Key() would
    2990              : // deterministically delete the key. CanDeterministicallySingleDelete requires
    2991              : // the iterator to be oriented in the forward direction (eg, the last
    2992              : // positioning operation must've been a First, a Seek[Prefix]GE, or a
    2993              : // Next[Prefix][WithLimit]).
    2994              : //
    2995              : // This function does not change the external position of the iterator, and all
    2996              : // positioning methods should behave the same as if it was never called. This
    2997              : // function will only return a meaningful result the first time it's invoked at
    2998              : // an iterator position. This function invalidates the iterator Value's memory,
    2999              : // and the caller must not rely on the memory safety of the previous Iterator
    3000              : // position.
    3001              : //
    3002              : // If CanDeterministicallySingleDelete returns true AND the key at the iterator
    3003              : // position is not modified between the creation of the Iterator and the commit
    3004              : // of a batch containing a SingleDelete over the key, then the caller can be
    3005              : // assured that SingleDelete is equivalent to Delete on the local engine, but it
    3006              : // may not be true on another engine that received the same writes and with
    3007              : // logically equivalent state since this engine may have collapsed multiple SETs
    3008              : // into one.
    3009            1 : func CanDeterministicallySingleDelete(it *Iterator) (bool, error) {
    3010            1 :         // This function may only be called once per external iterator position. We
    3011            1 :         // can validate this by checking the last positioning operation.
    3012            1 :         if it.lastPositioningOp == internalNextOp {
    3013            1 :                 return false, errors.New("pebble: CanDeterministicallySingleDelete called twice")
    3014            1 :         }
    3015            1 :         validity, kind := it.internalNext()
    3016            1 :         var shadowedBySingleDelete bool
    3017            1 :         for validity == internalNextValid {
    3018            1 :                 switch kind {
    3019            1 :                 case InternalKeyKindDelete, InternalKeyKindDeleteSized:
    3020            1 :                         // A DEL or DELSIZED tombstone is okay. An internal key
    3021            1 :                         // sequence like SINGLEDEL; SET; DEL; SET can be handled
    3022            1 :                         // deterministically. If there are SETs further down, we
    3023            1 :                         // don't care about them.
    3024            1 :                         return true, nil
    3025            1 :                 case InternalKeyKindSingleDelete:
    3026            1 :                         // A SingleDelete is okay as long as when that SingleDelete was
    3027            1 :                         // written, it was written deterministically (eg, with its own
    3028            1 :                         // CanDeterministicallySingleDelete check). Validate that it was
    3029            1 :                         // written deterministically. We'll allow one set to appear after
    3030            1 :                         // the SingleDelete.
    3031            1 :                         shadowedBySingleDelete = true
    3032            1 :                         validity, kind = it.internalNext()
    3033            1 :                         continue
    3034            1 :                 case InternalKeyKindSet, InternalKeyKindSetWithDelete, InternalKeyKindMerge:
    3035            1 :                         // If we observed a single delete, it's allowed to delete 1 key.
    3036            1 :                         // We'll keep looping to validate that the internal keys beneath the
    3037            1 :                         // already-written single delete are copacetic.
    3038            1 :                         if shadowedBySingleDelete {
    3039            1 :                                 shadowedBySingleDelete = false
    3040            1 :                                 validity, kind = it.internalNext()
    3041            1 :                                 continue
    3042              :                         }
    3043              :                         // We encountered a shadowed SET, SETWITHDEL, MERGE. A SINGLEDEL
    3044              :                         // that deleted the KV at the original iterator position could
    3045              :                         // result in this key becoming visible.
    3046            1 :                         return false, nil
    3047            0 :                 case InternalKeyKindRangeDelete:
    3048            0 :                         // RangeDeletes are handled by the merging iterator and should never
    3049            0 :                         // be observed by the top-level Iterator.
    3050            0 :                         panic(errors.AssertionFailedf("pebble: unexpected range delete"))
    3051            0 :                 case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete:
    3052            0 :                         // Range keys are interleaved at the maximal sequence number and
    3053            0 :                         // should never be observed within a user key.
    3054            0 :                         panic(errors.AssertionFailedf("pebble: unexpected range key"))
    3055            0 :                 default:
    3056            0 :                         panic(errors.AssertionFailedf("pebble: unexpected key kind: %s", errors.Safe(kind)))
    3057              :                 }
    3058              :         }
    3059            1 :         if validity == internalNextError {
    3060            1 :                 return false, it.Error()
    3061            1 :         }
    3062            1 :         return true, nil
    3063              : }
    3064              : 
    3065              : // internalNextValidity enumerates the potential outcomes of a call to
    3066              : // internalNext.
    3067              : type internalNextValidity int8
    3068              : 
    3069              : const (
    3070              :         // internalNextError is returned by internalNext when an error occurred and
    3071              :         // the caller is responsible for checking iter.Error().
    3072              :         internalNextError internalNextValidity = iota
    3073              :         // internalNextExhausted is returned by internalNext when the next internal
    3074              :         // key is an internal key with a different user key than Iterator.Key().
    3075              :         internalNextExhausted
    3076              :         // internalNextValid is returned by internalNext when the internal next
    3077              :         // found a shadowed internal key with a user key equal to Iterator.Key().
    3078              :         internalNextValid
    3079              : )
    3080              : 
    3081              : // internalNext advances internal Iterator state forward to expose the
    3082              : // InternalKeyKind of the next internal key with a user key equal to Key().
    3083              : //
    3084              : // internalNext is a highly specialized operation and is unlikely to be
    3085              : // generally useful. See Iterator.Next for how to reposition the iterator to the
    3086              : // next key. internalNext requires the Iterator to be at a valid position in the
    3087              : // forward direction (the last positioning operation must've been a First, a
    3088              : // Seek[Prefix]GE, or a Next[Prefix][WithLimit] and Valid() must return true).
    3089              : //
    3090              : // internalNext, unlike all other Iterator methods, exposes internal LSM state.
    3091              : // internalNext advances the Iterator's internal iterator to the next shadowed
    3092              : // key with a user key equal to Key(). When a key is overwritten or deleted, its
    3093              : // removal from the LSM occurs lazily as a part of compactions. internalNext
    3094              : // allows the caller to see whether an obsolete internal key exists with the
    3095              : // current Key(), and what it's key kind is. Note that the existence of an
    3096              : // internal key is nondeterministic and dependent on internal LSM state. These
    3097              : // semantics are unlikely to be applicable to almost all use cases.
    3098              : //
    3099              : // If internalNext finds a key that shares the same user key as Key(), it
    3100              : // returns internalNextValid and the internal key's kind. If internalNext
    3101              : // encounters an error, it returns internalNextError and the caller is expected
    3102              : // to call Iterator.Error() to retrieve it. In all other circumstances,
    3103              : // internalNext returns internalNextExhausted, indicating that there are no more
    3104              : // additional internal keys with the user key Key().
    3105              : //
    3106              : // internalNext does not change the external position of the iterator, and a
    3107              : // Next operation should behave the same as if internalNext was never called.
    3108              : // internalNext does invalidate the iterator Value's memory, and the caller must
    3109              : // not rely on the memory safety of the previous Iterator position.
    3110            1 : func (i *Iterator) internalNext() (internalNextValidity, base.InternalKeyKind) {
    3111            1 :         i.stats.ForwardStepCount[InterfaceCall]++
    3112            1 :         if i.err != nil {
    3113            1 :                 return internalNextError, base.InternalKeyKindInvalid
    3114            1 :         } else if i.iterValidityState != IterValid {
    3115            1 :                 return internalNextExhausted, base.InternalKeyKindInvalid
    3116            1 :         }
    3117            1 :         i.lastPositioningOp = internalNextOp
    3118            1 : 
    3119            1 :         switch i.pos {
    3120            1 :         case iterPosCurForward:
    3121            1 :                 i.iterKV = i.iter.Next()
    3122            1 :                 if i.iterKV == nil {
    3123            1 :                         // We check i.iter.Error() here and return an internalNextError enum
    3124            1 :                         // variant so that the caller does not need to check i.iter.Error()
    3125            1 :                         // in the common case that the next internal key has a new user key.
    3126            1 :                         if i.err = i.iter.Error(); i.err != nil {
    3127            0 :                                 return internalNextError, base.InternalKeyKindInvalid
    3128            0 :                         }
    3129            1 :                         i.pos = iterPosNext
    3130            1 :                         return internalNextExhausted, base.InternalKeyKindInvalid
    3131            1 :                 } else if i.comparer.Equal(i.iterKV.K.UserKey, i.key) {
    3132            1 :                         return internalNextValid, i.iterKV.Kind()
    3133            1 :                 }
    3134            1 :                 i.pos = iterPosNext
    3135            1 :                 return internalNextExhausted, base.InternalKeyKindInvalid
    3136            1 :         case iterPosCurReverse, iterPosCurReversePaused, iterPosPrev:
    3137            1 :                 i.err = errors.New("switching from reverse to forward via internalNext is prohibited")
    3138            1 :                 i.iterValidityState = IterExhausted
    3139            1 :                 return internalNextError, base.InternalKeyKindInvalid
    3140            1 :         case iterPosNext, iterPosCurForwardPaused:
    3141            1 :                 // The previous method already moved onto the next user key. This is
    3142            1 :                 // only possible if
    3143            1 :                 //   - the last positioning method was a call to internalNext, and we
    3144            1 :                 //     advanced to a new user key.
    3145            1 :                 //   - the previous non-internalNext iterator operation encountered a
    3146            1 :                 //     range key or merge, forcing an internal Next that found a new
    3147            1 :                 //     user key that's not equal to i.Iterator.Key().
    3148            1 :                 return internalNextExhausted, base.InternalKeyKindInvalid
    3149            0 :         default:
    3150            0 :                 panic("unreachable")
    3151              :         }
    3152              : }
    3153              : 
    3154              : var _ base.IteratorDebug = (*Iterator)(nil)
    3155              : 
    3156              : // DebugTree implements the base.IteratorDebug interface.
    3157            0 : func (i *Iterator) DebugTree(tp treeprinter.Node) {
    3158            0 :         n := tp.Childf("%T(%p)", i, i)
    3159            0 :         if i.iter != nil {
    3160            0 :                 i.iter.DebugTree(n)
    3161            0 :         }
    3162            0 :         if i.pointIter != nil {
    3163            0 :                 i.pointIter.DebugTree(n)
    3164            0 :         }
    3165              : }
        

Generated by: LCOV version 2.0-1