Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "runtime/debug"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/internal/keyspan"
15 : "github.com/cockroachdb/pebble/internal/manifest"
16 : "github.com/cockroachdb/pebble/sstable"
17 : )
18 :
19 : // tableNewIters creates a new point and range-del iterator for the given file
20 : // number.
21 : //
22 : // On success, the internalIterator is not-nil and must be closed; the
23 : // FragmentIterator can be nil.
24 : // TODO(radu): always return a non-nil FragmentIterator.
25 : //
26 : // On error, the iterators are nil.
27 : //
28 : // The only (non-test) implementation of tableNewIters is tableCacheContainer.newIters().
29 : type tableNewIters func(
30 : ctx context.Context,
31 : file *manifest.FileMetadata,
32 : opts *IterOptions,
33 : internalOpts internalIterOpts,
34 : ) (internalIterator, keyspan.FragmentIterator, error)
35 :
36 : // tableNewRangeDelIter takes a tableNewIters and returns a TableNewSpanIter
37 : // for the rangedel iterator returned by tableNewIters.
38 1 : func tableNewRangeDelIter(ctx context.Context, newIters tableNewIters) keyspan.TableNewSpanIter {
39 1 : return func(file *manifest.FileMetadata, iterOptions keyspan.SpanIterOptions) (keyspan.FragmentIterator, error) {
40 1 : iter, rangeDelIter, err := newIters(ctx, file, nil, internalIterOpts{})
41 1 : if iter != nil {
42 1 : _ = iter.Close()
43 1 : }
44 1 : if rangeDelIter == nil {
45 1 : rangeDelIter = emptyKeyspanIter
46 1 : }
47 1 : return rangeDelIter, err
48 : }
49 : }
50 :
51 : type internalIterOpts struct {
52 : bytesIterated *uint64
53 : bufferPool *sstable.BufferPool
54 : stats *base.InternalIteratorStats
55 : boundLimitedFilter sstable.BoundLimitedBlockPropertyFilter
56 : }
57 :
58 : // levelIter provides a merged view of the sstables in a level.
59 : //
60 : // levelIter is used during compaction and as part of the Iterator
61 : // implementation. When used as part of the Iterator implementation, level
62 : // iteration needs to "pause" at sstable boundaries if a range deletion
63 : // tombstone is the source of that boundary. We know if a range tombstone is
64 : // the smallest or largest key in a file because the kind will be
65 : // InternalKeyKindRangeDeletion. If the boundary key is a range deletion
66 : // tombstone, we materialize a fake entry to return from levelIter. This
67 : // prevents mergingIter from advancing past the sstable until the sstable
68 : // contains the smallest (or largest for reverse iteration) key in the merged
69 : // heap. Note that mergingIter treats a range deletion tombstone returned by
70 : // the point iterator as a no-op.
71 : //
72 : // SeekPrefixGE presents the need for a second type of pausing. If an sstable
73 : // iterator returns "not found" for a SeekPrefixGE operation, we don't want to
74 : // advance to the next sstable as the "not found" does not indicate that all of
75 : // the keys in the sstable are less than the search key. Advancing to the next
76 : // sstable would cause us to skip over range tombstones, violating
77 : // correctness. Instead, SeekPrefixGE creates a synthetic boundary key with the
78 : // kind InternalKeyKindRangeDeletion which will be used to pause the levelIter
79 : // at the sstable until the mergingIter is ready to advance past it.
80 : type levelIter struct {
81 : // The context is stored here since (a) iterators are expected to be
82 : // short-lived (since they pin sstables), (b) plumbing a context into every
83 : // method is very painful, (c) they do not (yet) respect context
84 : // cancellation and are only used for tracing.
85 : ctx context.Context
86 : logger Logger
87 : comparer *Comparer
88 : cmp Compare
89 : split Split
90 : // The lower/upper bounds for iteration as specified at creation or the most
91 : // recent call to SetBounds.
92 : lower []byte
93 : upper []byte
94 : // The iterator options for the currently open table. If
95 : // tableOpts.{Lower,Upper}Bound are nil, the corresponding iteration boundary
96 : // does not lie within the table bounds.
97 : tableOpts IterOptions
98 : // The LSM level this levelIter is initialized for.
99 : level manifest.Level
100 : // The keys to return when iterating past an sstable boundary and that
101 : // boundary is a range deletion tombstone. The boundary could be smallest
102 : // (i.e. arrived at with Prev), or largest (arrived at with Next).
103 : smallestBoundary *InternalKey
104 : largestBoundary *InternalKey
105 : // combinedIterState may be set when a levelIter is used during user
106 : // iteration. Although levelIter only iterates over point keys, it's also
107 : // responsible for lazily constructing the combined range & point iterator
108 : // when it observes a file containing range keys. If the combined iter
109 : // state's initialized field is true, the iterator is already using combined
110 : // iterator, OR the iterator is not configured to use combined iteration. If
111 : // it's false, the levelIter must set the `triggered` and `key` fields when
112 : // the levelIter passes over a file containing range keys. See the
113 : // lazyCombinedIter for more details.
114 : combinedIterState *combinedIterState
115 : // A synthetic boundary key to return when SeekPrefixGE finds an sstable
116 : // which doesn't contain the search key, but which does contain range
117 : // tombstones.
118 : syntheticBoundary InternalKey
119 : // The iter for the current file. It is nil under any of the following conditions:
120 : // - files.Current() == nil
121 : // - err != nil
122 : // - some other constraint, like the bounds in opts, caused the file at index to not
123 : // be relevant to the iteration.
124 : iter internalIterator
125 : // iterFile holds the current file. It is always equal to l.files.Current().
126 : iterFile *fileMetadata
127 : // filteredIter is an optional interface that may be implemented by internal
128 : // iterators that perform filtering of keys. When a new file's iterator is
129 : // opened, it's tested to see if it implements filteredIter. If it does,
130 : // it's stored here to allow the level iterator to recognize when keys were
131 : // omitted from iteration results due to filtering. This is important when a
132 : // file contains range deletions that may delete keys from other files. The
133 : // levelIter must not advance to the next file until the mergingIter has
134 : // advanced beyond the file's bounds. See
135 : // levelIterBoundaryContext.isIgnorableBoundaryKey.
136 : filteredIter filteredIter
137 : newIters tableNewIters
138 : // When rangeDelIterPtr != nil, the caller requires that *rangeDelIterPtr must
139 : // point to a range del iterator corresponding to the current file. When this
140 : // iterator returns nil, *rangeDelIterPtr should also be set to nil. Whenever
141 : // a non-nil internalIterator is placed in rangeDelIterPtr, a copy is placed
142 : // in rangeDelIterCopy. This is done for the following special case:
143 : // when this iterator returns nil because of exceeding the bounds, we don't
144 : // close iter and *rangeDelIterPtr since we could reuse it in the next seek. But
145 : // we need to set *rangeDelIterPtr to nil because of the aforementioned contract.
146 : // This copy is used to revive the *rangeDelIterPtr in the case of reuse.
147 : rangeDelIterPtr *keyspan.FragmentIterator
148 : rangeDelIterCopy keyspan.FragmentIterator
149 : files manifest.LevelIterator
150 : err error
151 :
152 : // Pointer into this level's entry in `mergingIterLevel::levelIterBoundaryContext`.
153 : // We populate it with the corresponding bounds for the currently opened file. It is used for
154 : // two purposes (described for forward iteration. The explanation for backward iteration is
155 : // similar.)
156 : // - To limit the optimization that seeks lower-level iterators past keys shadowed by a range
157 : // tombstone. Limiting this seek to the file largestUserKey is necessary since
158 : // range tombstones are stored untruncated, while they only apply to keys within their
159 : // containing file's boundaries. For a detailed example, see comment above `mergingIter`.
160 : // - To constrain the tombstone to act-within the bounds of the sstable when checking
161 : // containment. For forward iteration we need the smallestUserKey.
162 : //
163 : // An example is sstable bounds [c#8, g#12] containing a tombstone [b, i)#7.
164 : // - When doing a SeekGE to user key X, the levelIter is at this sstable because X is either within
165 : // the sstable bounds or earlier than the start of the sstable (and there is no sstable in
166 : // between at this level). If X >= smallestUserKey, and the tombstone [b, i) contains X,
167 : // it is correct to SeekGE the sstables at lower levels to min(g, i) (i.e., min of
168 : // largestUserKey, tombstone.End) since any user key preceding min(g, i) must be covered by this
169 : // tombstone (since it cannot have a version younger than this tombstone as it is at a lower
170 : // level). And even if X = smallestUserKey or equal to the start user key of the tombstone,
171 : // if the above conditions are satisfied we know that the internal keys corresponding to X at
172 : // lower levels must have a version smaller than that in this file (again because of the level
173 : // argument). So we don't need to use sequence numbers for this comparison.
174 : // - When checking whether this tombstone deletes internal key X we know that the levelIter is at this
175 : // sstable so (repeating the above) X.UserKey is either within the sstable bounds or earlier than the
176 : // start of the sstable (and there is no sstable in between at this level).
177 : // - X is at at a lower level. If X.UserKey >= smallestUserKey, and the tombstone contains
178 : // X.UserKey, we know X is deleted. This argument also works when X is a user key (we use
179 : // it when seeking to test whether a user key is deleted).
180 : // - X is at the same level. X must be within the sstable bounds of the tombstone so the
181 : // X.UserKey >= smallestUserKey comparison is trivially true. In addition to the tombstone containing
182 : // X we need to compare the sequence number of X and the tombstone (we don't need to look
183 : // at how this tombstone is truncated to act-within the file bounds, which are InternalKeys,
184 : // since X and the tombstone are from the same file).
185 : //
186 : // Iterating backwards has one more complication when checking whether a tombstone deletes
187 : // internal key X at a lower level (the construction we do here also works for a user key X).
188 : // Consider sstable bounds [c#8, g#InternalRangeDelSentinel] containing a tombstone [b, i)#7.
189 : // If we are positioned at key g#10 at a lower sstable, the tombstone we will see is [b, i)#7,
190 : // since the higher sstable is positioned at a key <= g#10. We should not use this tombstone
191 : // to delete g#10. This requires knowing that the largestUserKey is a range delete sentinel,
192 : // which we set in a separate bool below.
193 : //
194 : // These fields differs from the `*Boundary` fields in a few ways:
195 : // - `*Boundary` is only populated when the iterator is positioned exactly on the sentinel key.
196 : // - `*Boundary` can hold either the lower- or upper-bound, depending on the iterator direction.
197 : // - `*Boundary` is not exposed to the next higher-level iterator, i.e., `mergingIter`.
198 : boundaryContext *levelIterBoundaryContext
199 :
200 : // internalOpts holds the internal iterator options to pass to the table
201 : // cache when constructing new table iterators.
202 : internalOpts internalIterOpts
203 :
204 : // Scratch space for the obsolete keys filter, when there are no other block
205 : // property filters specified. See the performance note where
206 : // IterOptions.PointKeyFilters is declared.
207 : filtersBuf [1]BlockPropertyFilter
208 :
209 : // Disable invariant checks even if they are otherwise enabled. Used by tests
210 : // which construct "impossible" situations (e.g. seeking to a key before the
211 : // lower bound).
212 : disableInvariants bool
213 : }
214 :
215 : // filteredIter is an additional interface implemented by iterators that may
216 : // skip over point keys during iteration. The sstable.Iterator implements this
217 : // interface.
218 : type filteredIter interface {
219 : // MaybeFilteredKeys may be called when an iterator is exhausted, indicating
220 : // whether or not the iterator's last positioning method may have skipped
221 : // any keys due to low-level filters.
222 : //
223 : // When an iterator is configured to use block-property filters, the
224 : // low-level iterator may skip over blocks or whole sstables of keys.
225 : // Implementations that implement skipping must implement this interface.
226 : // Higher-level iterators require it to preserve invariants (eg, a levelIter
227 : // used in a mergingIter must keep the file's range-del iterator open until
228 : // the mergingIter has moved past the file's bounds, even if all of the
229 : // file's point keys were filtered).
230 : //
231 : // MaybeFilteredKeys may always return false positives, that is it may
232 : // return true when no keys were filtered. It should only be called when the
233 : // iterator is exhausted. It must never return false negatives when the
234 : // iterator is exhausted.
235 : MaybeFilteredKeys() bool
236 : }
237 :
238 : // levelIter implements the base.InternalIterator interface.
239 : var _ base.InternalIterator = (*levelIter)(nil)
240 :
241 : // newLevelIter returns a levelIter. It is permissible to pass a nil split
242 : // parameter if the caller is never going to call SeekPrefixGE.
243 : func newLevelIter(
244 : opts IterOptions,
245 : comparer *Comparer,
246 : newIters tableNewIters,
247 : files manifest.LevelIterator,
248 : level manifest.Level,
249 : internalOpts internalIterOpts,
250 2 : ) *levelIter {
251 2 : l := &levelIter{}
252 2 : l.init(context.Background(), opts, comparer, newIters, files, level,
253 2 : internalOpts)
254 2 : return l
255 2 : }
256 :
257 : func (l *levelIter) init(
258 : ctx context.Context,
259 : opts IterOptions,
260 : comparer *Comparer,
261 : newIters tableNewIters,
262 : files manifest.LevelIterator,
263 : level manifest.Level,
264 : internalOpts internalIterOpts,
265 2 : ) {
266 2 : l.ctx = ctx
267 2 : l.err = nil
268 2 : l.level = level
269 2 : l.logger = opts.getLogger()
270 2 : l.lower = opts.LowerBound
271 2 : l.upper = opts.UpperBound
272 2 : l.tableOpts.TableFilter = opts.TableFilter
273 2 : l.tableOpts.PointKeyFilters = opts.PointKeyFilters
274 2 : if len(opts.PointKeyFilters) == 0 {
275 2 : l.tableOpts.PointKeyFilters = l.filtersBuf[:0:1]
276 2 : }
277 2 : l.tableOpts.UseL6Filters = opts.UseL6Filters
278 2 : l.tableOpts.level = l.level
279 2 : l.tableOpts.snapshotForHideObsoletePoints = opts.snapshotForHideObsoletePoints
280 2 : l.comparer = comparer
281 2 : l.cmp = comparer.Compare
282 2 : l.split = comparer.Split
283 2 : l.iterFile = nil
284 2 : l.newIters = newIters
285 2 : l.files = files
286 2 : l.internalOpts = internalOpts
287 : }
288 :
289 2 : func (l *levelIter) initRangeDel(rangeDelIter *keyspan.FragmentIterator) {
290 2 : l.rangeDelIterPtr = rangeDelIter
291 2 : }
292 :
293 2 : func (l *levelIter) initBoundaryContext(context *levelIterBoundaryContext) {
294 2 : l.boundaryContext = context
295 2 : }
296 :
297 2 : func (l *levelIter) initCombinedIterState(state *combinedIterState) {
298 2 : l.combinedIterState = state
299 2 : }
300 :
301 2 : func (l *levelIter) maybeTriggerCombinedIteration(file *fileMetadata, dir int) {
302 2 : // If we encounter a file that contains range keys, we may need to
303 2 : // trigger a switch to combined range-key and point-key iteration,
304 2 : // if the *pebble.Iterator is configured for it. This switch is done
305 2 : // lazily because range keys are intended to be rare, and
306 2 : // constructing the range-key iterator substantially adds to the
307 2 : // cost of iterator construction and seeking.
308 2 : //
309 2 : // If l.combinedIterState.initialized is already true, either the
310 2 : // iterator is already using combined iteration or the iterator is not
311 2 : // configured to observe range keys. Either way, there's nothing to do.
312 2 : // If false, trigger the switch to combined iteration, using the the
313 2 : // file's bounds to seek the range-key iterator appropriately.
314 2 : //
315 2 : // We only need to trigger combined iteration if the file contains
316 2 : // RangeKeySets: if there are only Unsets and Dels, the user will observe no
317 2 : // range keys regardless. If this file has table stats available, they'll
318 2 : // tell us whether the file has any RangeKeySets. Otherwise, we must
319 2 : // fallback to assuming it does if HasRangeKeys=true.
320 2 : if file != nil && file.HasRangeKeys && l.combinedIterState != nil && !l.combinedIterState.initialized &&
321 2 : (l.upper == nil || l.cmp(file.SmallestRangeKey.UserKey, l.upper) < 0) &&
322 2 : (l.lower == nil || l.cmp(file.LargestRangeKey.UserKey, l.lower) > 0) &&
323 2 : (!file.StatsValid() || file.Stats.NumRangeKeySets > 0) {
324 2 : // The file contains range keys, and we're not using combined iteration yet.
325 2 : // Trigger a switch to combined iteration. It's possible that a switch has
326 2 : // already been triggered if multiple levels encounter files containing
327 2 : // range keys while executing a single mergingIter operation. In this case,
328 2 : // we need to compare the existing key recorded to l.combinedIterState.key,
329 2 : // adjusting it if our key is smaller (forward iteration) or larger
330 2 : // (backward iteration) than the existing key.
331 2 : //
332 2 : // These key comparisons are only required during a single high-level
333 2 : // iterator operation. When the high-level iter op completes,
334 2 : // iinitialized will be true, and future calls to this function will be
335 2 : // no-ops.
336 2 : switch dir {
337 2 : case +1:
338 2 : if !l.combinedIterState.triggered {
339 2 : l.combinedIterState.triggered = true
340 2 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
341 2 : } else if l.cmp(l.combinedIterState.key, file.SmallestRangeKey.UserKey) > 0 {
342 2 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
343 2 : }
344 2 : case -1:
345 2 : if !l.combinedIterState.triggered {
346 2 : l.combinedIterState.triggered = true
347 2 : l.combinedIterState.key = file.LargestRangeKey.UserKey
348 2 : } else if l.cmp(l.combinedIterState.key, file.LargestRangeKey.UserKey) < 0 {
349 2 : l.combinedIterState.key = file.LargestRangeKey.UserKey
350 2 : }
351 : }
352 : }
353 : }
354 :
355 2 : func (l *levelIter) findFileGE(key []byte, flags base.SeekGEFlags) *fileMetadata {
356 2 : // Find the earliest file whose largest key is >= key.
357 2 :
358 2 : // NB: if flags.TrySeekUsingNext()=true, the levelIter must respect it. If
359 2 : // the levelIter is positioned at the key P, it must return a key ≥ P. If
360 2 : // used within a merging iterator, the merging iterator will depend on the
361 2 : // levelIter only moving forward to maintain heap invariants.
362 2 :
363 2 : // Ordinarily we seek the LevelIterator using SeekGE. In some instances, we
364 2 : // Next instead. In other instances, we try Next-ing first, falling back to
365 2 : // seek:
366 2 : // a) flags.TrySeekUsingNext(): The top-level Iterator knows we're seeking
367 2 : // to a key later than the current iterator position. We don't know how
368 2 : // much later the seek key is, so it's possible there are many sstables
369 2 : // between the current position and the seek key. However in most real-
370 2 : // world use cases, the seek key is likely to be nearby. Rather than
371 2 : // performing a log(N) seek through the file metadata, we next a few
372 2 : // times from from our existing location. If we don't find a file whose
373 2 : // largest is >= key within a few nexts, we fall back to seeking.
374 2 : //
375 2 : // Note that in this case, the file returned by findFileGE may be
376 2 : // different than the file returned by a raw binary search (eg, when
377 2 : // TrySeekUsingNext=false). This is possible because the most recent
378 2 : // positioning operation may have already determined that previous
379 2 : // files' keys that are ≥ key are all deleted. This information is
380 2 : // encoded within the iterator's current iterator position and is
381 2 : // unavailable to a fresh binary search.
382 2 : //
383 2 : // b) flags.RelativeSeek(): The merging iterator decided to re-seek this
384 2 : // level according to a range tombstone. When lazy combined iteration
385 2 : // is enabled, the level iterator is responsible for watching for
386 2 : // files containing range keys and triggering the switch to combined
387 2 : // iteration when such a file is observed. If a range deletion was
388 2 : // observed in a higher level causing the merging iterator to seek the
389 2 : // level to the range deletion's end key, we need to check whether all
390 2 : // of the files between the old position and the new position contain
391 2 : // any range keys.
392 2 : //
393 2 : // In this scenario, we don't seek the LevelIterator and instead we
394 2 : // Next it, one file at a time, checking each for range keys. The
395 2 : // merging iterator sets this flag to inform us that we're moving
396 2 : // forward relative to the existing position and that we must examine
397 2 : // each intermediate sstable's metadata for lazy-combined iteration.
398 2 : // In this case, we only Next and never Seek. We set nextsUntilSeek=-1
399 2 : // to signal this intention.
400 2 : //
401 2 : // NB: At most one of flags.RelativeSeek() and flags.TrySeekUsingNext() may
402 2 : // be set, because the merging iterator re-seeks relative seeks with
403 2 : // explicitly only the RelativeSeek flag set.
404 2 : var nextsUntilSeek int
405 2 : var nextInsteadOfSeek bool
406 2 : if flags.TrySeekUsingNext() {
407 2 : nextInsteadOfSeek = true
408 2 : nextsUntilSeek = 4 // arbitrary
409 2 : }
410 2 : if flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized {
411 2 : nextInsteadOfSeek = true
412 2 : nextsUntilSeek = -1
413 2 : }
414 :
415 2 : var m *fileMetadata
416 2 : if nextInsteadOfSeek {
417 2 : m = l.iterFile
418 2 : } else {
419 2 : m = l.files.SeekGE(l.cmp, key)
420 2 : }
421 : // The below loop has a bit of an unusual organization. There are several
422 : // conditions under which we need to Next to a later file. If none of those
423 : // conditions are met, the file in `m` is okay to return. The loop body is
424 : // structured with a series of if statements, each of which may continue the
425 : // loop to the next file. If none of the statements are met, the end of the
426 : // loop body is a break.
427 2 : for m != nil {
428 2 : if m.HasRangeKeys {
429 2 : l.maybeTriggerCombinedIteration(m, +1)
430 2 :
431 2 : // Some files may only contain range keys, which we can skip.
432 2 : // NB: HasPointKeys=true if the file contains any points or range
433 2 : // deletions (which delete points).
434 2 : if !m.HasPointKeys {
435 2 : m = l.files.Next()
436 2 : continue
437 : }
438 : }
439 :
440 : // This file has point keys.
441 : //
442 : // However, there are a couple reasons why `m` may not be positioned ≥
443 : // `key` yet:
444 : //
445 : // 1. If SeekGE(key) landed on a file containing range keys, the file
446 : // may contain range keys ≥ `key` but no point keys ≥ `key`.
447 : // 2. When nexting instead of seeking, we must check to see whether
448 : // we've nexted sufficiently far, or we need to next again.
449 : //
450 : // If the file does not contain point keys ≥ `key`, next to continue
451 : // looking for a file that does.
452 2 : if (m.HasRangeKeys || nextInsteadOfSeek) && l.cmp(m.LargestPointKey.UserKey, key) < 0 {
453 2 : // If nextInsteadOfSeek is set and nextsUntilSeek is non-negative,
454 2 : // the iterator has been nexting hoping to discover the relevant
455 2 : // file without seeking. It's exhausted the allotted nextsUntilSeek
456 2 : // and should seek to the sought key.
457 2 : if nextInsteadOfSeek && nextsUntilSeek == 0 {
458 1 : nextInsteadOfSeek = false
459 1 : m = l.files.SeekGE(l.cmp, key)
460 1 : continue
461 2 : } else if nextsUntilSeek > 0 {
462 2 : nextsUntilSeek--
463 2 : }
464 2 : m = l.files.Next()
465 2 : continue
466 : }
467 :
468 : // This file has a point key bound ≥ `key`. But the largest point key
469 : // bound may still be a range deletion sentinel, which is exclusive. In
470 : // this case, the file doesn't actually contain any point keys equal to
471 : // `key`. We next to keep searching for a file that actually contains
472 : // point keys ≥ key.
473 : //
474 : // Additionally, this prevents loading untruncated range deletions from
475 : // a table which can't possibly contain the target key and is required
476 : // for correctness by mergingIter.SeekGE (see the comment in that
477 : // function).
478 2 : if m.LargestPointKey.IsExclusiveSentinel() && l.cmp(m.LargestPointKey.UserKey, key) == 0 {
479 2 : m = l.files.Next()
480 2 : continue
481 : }
482 :
483 : // This file contains point keys ≥ `key`. Break and return it.
484 2 : break
485 : }
486 2 : return m
487 : }
488 :
489 2 : func (l *levelIter) findFileLT(key []byte, flags base.SeekLTFlags) *fileMetadata {
490 2 : // Find the last file whose smallest key is < ikey.
491 2 :
492 2 : // Ordinarily we seek the LevelIterator using SeekLT.
493 2 : //
494 2 : // When lazy combined iteration is enabled, there's a complication. The
495 2 : // level iterator is responsible for watching for files containing range
496 2 : // keys and triggering the switch to combined iteration when such a file is
497 2 : // observed. If a range deletion was observed in a higher level causing the
498 2 : // merging iterator to seek the level to the range deletion's start key, we
499 2 : // need to check whether all of the files between the old position and the
500 2 : // new position contain any range keys.
501 2 : //
502 2 : // In this scenario, we don't seek the LevelIterator and instead we Prev it,
503 2 : // one file at a time, checking each for range keys.
504 2 : prevInsteadOfSeek := flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized
505 2 :
506 2 : var m *fileMetadata
507 2 : if prevInsteadOfSeek {
508 2 : m = l.iterFile
509 2 : } else {
510 2 : m = l.files.SeekLT(l.cmp, key)
511 2 : }
512 : // The below loop has a bit of an unusual organization. There are several
513 : // conditions under which we need to Prev to a previous file. If none of
514 : // those conditions are met, the file in `m` is okay to return. The loop
515 : // body is structured with a series of if statements, each of which may
516 : // continue the loop to the previous file. If none of the statements are
517 : // met, the end of the loop body is a break.
518 2 : for m != nil {
519 2 : if m.HasRangeKeys {
520 2 : l.maybeTriggerCombinedIteration(m, -1)
521 2 :
522 2 : // Some files may only contain range keys, which we can skip.
523 2 : // NB: HasPointKeys=true if the file contains any points or range
524 2 : // deletions (which delete points).
525 2 : if !m.HasPointKeys {
526 2 : m = l.files.Prev()
527 2 : continue
528 : }
529 : }
530 :
531 : // This file has point keys.
532 : //
533 : // However, there are a couple reasons why `m` may not be positioned <
534 : // `key` yet:
535 : //
536 : // 1. If SeekLT(key) landed on a file containing range keys, the file
537 : // may contain range keys < `key` but no point keys < `key`.
538 : // 2. When preving instead of seeking, we must check to see whether
539 : // we've preved sufficiently far, or we need to prev again.
540 : //
541 : // If the file does not contain point keys < `key`, prev to continue
542 : // looking for a file that does.
543 2 : if (m.HasRangeKeys || prevInsteadOfSeek) && l.cmp(m.SmallestPointKey.UserKey, key) >= 0 {
544 2 : m = l.files.Prev()
545 2 : continue
546 : }
547 :
548 : // This file contains point keys < `key`. Break and return it.
549 2 : break
550 : }
551 2 : return m
552 : }
553 :
554 : // Init the iteration bounds for the current table. Returns -1 if the table
555 : // lies fully before the lower bound, +1 if the table lies fully after the
556 : // upper bound, and 0 if the table overlaps the iteration bounds.
557 2 : func (l *levelIter) initTableBounds(f *fileMetadata) int {
558 2 : l.tableOpts.LowerBound = l.lower
559 2 : if l.tableOpts.LowerBound != nil {
560 2 : if l.cmp(f.LargestPointKey.UserKey, l.tableOpts.LowerBound) < 0 {
561 2 : // The largest key in the sstable is smaller than the lower bound.
562 2 : return -1
563 2 : }
564 2 : if l.cmp(l.tableOpts.LowerBound, f.SmallestPointKey.UserKey) <= 0 {
565 2 : // The lower bound is smaller or equal to the smallest key in the
566 2 : // table. Iteration within the table does not need to check the lower
567 2 : // bound.
568 2 : l.tableOpts.LowerBound = nil
569 2 : }
570 : }
571 2 : l.tableOpts.UpperBound = l.upper
572 2 : if l.tableOpts.UpperBound != nil {
573 2 : if l.cmp(f.SmallestPointKey.UserKey, l.tableOpts.UpperBound) >= 0 {
574 2 : // The smallest key in the sstable is greater than or equal to the upper
575 2 : // bound.
576 2 : return 1
577 2 : }
578 2 : if l.cmp(l.tableOpts.UpperBound, f.LargestPointKey.UserKey) > 0 {
579 2 : // The upper bound is greater than the largest key in the
580 2 : // table. Iteration within the table does not need to check the upper
581 2 : // bound. NB: tableOpts.UpperBound is exclusive and f.LargestPointKey is
582 2 : // inclusive.
583 2 : l.tableOpts.UpperBound = nil
584 2 : }
585 : }
586 2 : return 0
587 : }
588 :
589 : type loadFileReturnIndicator int8
590 :
591 : const (
592 : noFileLoaded loadFileReturnIndicator = iota
593 : fileAlreadyLoaded
594 : newFileLoaded
595 : )
596 :
597 2 : func (l *levelIter) loadFile(file *fileMetadata, dir int) loadFileReturnIndicator {
598 2 : l.smallestBoundary = nil
599 2 : l.largestBoundary = nil
600 2 : if l.boundaryContext != nil {
601 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
602 2 : l.boundaryContext.isIgnorableBoundaryKey = false
603 2 : }
604 2 : if l.iterFile == file {
605 2 : if l.err != nil {
606 0 : return noFileLoaded
607 0 : }
608 2 : if l.iter != nil {
609 2 : // We don't bother comparing the file bounds with the iteration bounds when we have
610 2 : // an already open iterator. It is possible that the iter may not be relevant given the
611 2 : // current iteration bounds, but it knows those bounds, so it will enforce them.
612 2 : if l.rangeDelIterPtr != nil {
613 2 : *l.rangeDelIterPtr = l.rangeDelIterCopy
614 2 : }
615 :
616 : // There are a few reasons we might not have triggered combined
617 : // iteration yet, even though we already had `file` open.
618 : // 1. If the bounds changed, we might have previously avoided
619 : // switching to combined iteration because the bounds excluded
620 : // the range keys contained in this file.
621 : // 2. If an existing iterator was reconfigured to iterate over range
622 : // keys (eg, using SetOptions), then we wouldn't have triggered
623 : // the switch to combined iteration yet.
624 2 : l.maybeTriggerCombinedIteration(file, dir)
625 2 : return fileAlreadyLoaded
626 : }
627 : // We were already at file, but don't have an iterator, probably because the file was
628 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
629 : // have changed. We handle that below.
630 : }
631 :
632 : // Close both iter and rangeDelIterPtr. While mergingIter knows about
633 : // rangeDelIterPtr, it can't call Close() on it because it does not know
634 : // when the levelIter will switch it. Note that levelIter.Close() can be
635 : // called multiple times.
636 2 : if err := l.Close(); err != nil {
637 1 : return noFileLoaded
638 1 : }
639 :
640 2 : for {
641 2 : l.iterFile = file
642 2 : if file == nil {
643 2 : return noFileLoaded
644 2 : }
645 :
646 2 : l.maybeTriggerCombinedIteration(file, dir)
647 2 : if !file.HasPointKeys {
648 2 : switch dir {
649 2 : case +1:
650 2 : file = l.files.Next()
651 2 : continue
652 2 : case -1:
653 2 : file = l.files.Prev()
654 2 : continue
655 : }
656 : }
657 :
658 2 : switch l.initTableBounds(file) {
659 2 : case -1:
660 2 : // The largest key in the sstable is smaller than the lower bound.
661 2 : if dir < 0 {
662 2 : return noFileLoaded
663 2 : }
664 1 : file = l.files.Next()
665 1 : continue
666 2 : case +1:
667 2 : // The smallest key in the sstable is greater than or equal to the upper
668 2 : // bound.
669 2 : if dir > 0 {
670 2 : return noFileLoaded
671 2 : }
672 1 : file = l.files.Prev()
673 1 : continue
674 : }
675 :
676 2 : var rangeDelIter keyspan.FragmentIterator
677 2 : var iter internalIterator
678 2 : iter, rangeDelIter, l.err = l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts)
679 2 : l.iter = iter
680 2 : if l.err != nil {
681 1 : return noFileLoaded
682 1 : }
683 2 : if rangeDelIter != nil {
684 2 : if fi, ok := iter.(filteredIter); ok {
685 2 : l.filteredIter = fi
686 2 : } else {
687 0 : l.filteredIter = nil
688 0 : }
689 2 : } else {
690 2 : l.filteredIter = nil
691 2 : }
692 2 : if l.rangeDelIterPtr != nil {
693 2 : *l.rangeDelIterPtr = rangeDelIter
694 2 : l.rangeDelIterCopy = rangeDelIter
695 2 : } else if rangeDelIter != nil {
696 2 : rangeDelIter.Close()
697 2 : }
698 2 : if l.boundaryContext != nil {
699 2 : l.boundaryContext.smallestUserKey = file.Smallest.UserKey
700 2 : l.boundaryContext.largestUserKey = file.Largest.UserKey
701 2 : l.boundaryContext.isLargestUserKeyExclusive = file.Largest.IsExclusiveSentinel()
702 2 : }
703 2 : return newFileLoaded
704 : }
705 : }
706 :
707 : // In race builds we verify that the keys returned by levelIter lie within
708 : // [lower,upper).
709 2 : func (l *levelIter) verify(key *InternalKey, val base.LazyValue) (*InternalKey, base.LazyValue) {
710 2 : // Note that invariants.Enabled is a compile time constant, which means the
711 2 : // block of code will be compiled out of normal builds making this method
712 2 : // eligible for inlining. Do not change this to use a variable.
713 2 : if invariants.Enabled && !l.disableInvariants && key != nil {
714 2 : // We allow returning a boundary key that is outside of the lower/upper
715 2 : // bounds as such keys are always range tombstones which will be skipped by
716 2 : // the Iterator.
717 2 : if l.lower != nil && key != l.smallestBoundary && l.cmp(key.UserKey, l.lower) < 0 {
718 0 : l.logger.Fatalf("levelIter %s: lower bound violation: %s < %s\n%s", l.level, key, l.lower, debug.Stack())
719 0 : }
720 2 : if l.upper != nil && key != l.largestBoundary && l.cmp(key.UserKey, l.upper) > 0 {
721 0 : l.logger.Fatalf("levelIter %s: upper bound violation: %s > %s\n%s", l.level, key, l.upper, debug.Stack())
722 0 : }
723 : }
724 2 : return key, val
725 : }
726 :
727 2 : func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) {
728 2 : l.err = nil // clear cached iteration error
729 2 : if l.boundaryContext != nil {
730 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
731 2 : l.boundaryContext.isIgnorableBoundaryKey = false
732 2 : }
733 : // NB: the top-level Iterator has already adjusted key based on
734 : // IterOptions.LowerBound.
735 2 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
736 2 : if loadFileIndicator == noFileLoaded {
737 2 : return nil, base.LazyValue{}
738 2 : }
739 2 : if loadFileIndicator == newFileLoaded {
740 2 : // File changed, so l.iter has changed, and that iterator is not
741 2 : // positioned appropriately.
742 2 : flags = flags.DisableTrySeekUsingNext()
743 2 : }
744 2 : if ikey, val := l.iter.SeekGE(key, flags); ikey != nil {
745 2 : return l.verify(ikey, val)
746 2 : }
747 2 : return l.verify(l.skipEmptyFileForward())
748 : }
749 :
750 : func (l *levelIter) SeekPrefixGE(
751 : prefix, key []byte, flags base.SeekGEFlags,
752 2 : ) (*base.InternalKey, base.LazyValue) {
753 2 : l.err = nil // clear cached iteration error
754 2 : if l.boundaryContext != nil {
755 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
756 2 : l.boundaryContext.isIgnorableBoundaryKey = false
757 2 : }
758 :
759 : // NB: the top-level Iterator has already adjusted key based on
760 : // IterOptions.LowerBound.
761 2 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
762 2 : if loadFileIndicator == noFileLoaded {
763 2 : return nil, base.LazyValue{}
764 2 : }
765 2 : if loadFileIndicator == newFileLoaded {
766 2 : // File changed, so l.iter has changed, and that iterator is not
767 2 : // positioned appropriately.
768 2 : flags = flags.DisableTrySeekUsingNext()
769 2 : }
770 2 : if key, val := l.iter.SeekPrefixGE(prefix, key, flags); key != nil {
771 2 : return l.verify(key, val)
772 2 : }
773 : // When SeekPrefixGE returns nil, we have not necessarily reached the end of
774 : // the sstable. All we know is that a key with prefix does not exist in the
775 : // current sstable. We do know that the key lies within the bounds of the
776 : // table as findFileGE found the table where key <= meta.Largest. We return
777 : // the table's bound with isIgnorableBoundaryKey set.
778 2 : if l.rangeDelIterPtr != nil && *l.rangeDelIterPtr != nil {
779 2 : if l.tableOpts.UpperBound != nil {
780 2 : l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
781 2 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
782 2 : l.largestBoundary = &l.syntheticBoundary
783 2 : if l.boundaryContext != nil {
784 2 : l.boundaryContext.isSyntheticIterBoundsKey = true
785 2 : l.boundaryContext.isIgnorableBoundaryKey = false
786 2 : }
787 2 : return l.verify(l.largestBoundary, base.LazyValue{})
788 : }
789 : // Return the file's largest bound, ensuring this file stays open until
790 : // the mergingIter advances beyond the file's bounds. We set
791 : // isIgnorableBoundaryKey to signal that the actual key returned should
792 : // be ignored, and does not represent a real key in the database.
793 2 : l.largestBoundary = &l.iterFile.LargestPointKey
794 2 : if l.boundaryContext != nil {
795 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
796 2 : l.boundaryContext.isIgnorableBoundaryKey = true
797 2 : }
798 2 : return l.verify(l.largestBoundary, base.LazyValue{})
799 : }
800 : // It is possible that we are here because bloom filter matching failed. In
801 : // that case it is likely that all keys matching the prefix are wholly
802 : // within the current file and cannot be in the subsequent file. In that
803 : // case we don't want to go to the next file, since loading and seeking in
804 : // there has some cost. Additionally, for sparse key spaces, loading the
805 : // next file will defeat the optimization for the next SeekPrefixGE that is
806 : // called with flags.TrySeekUsingNext(), since for sparse key spaces it is
807 : // likely that the next key will also be contained in the current file.
808 2 : var n int
809 2 : if l.split != nil {
810 2 : // If the split function is specified, calculate the prefix length accordingly.
811 2 : n = l.split(l.iterFile.LargestPointKey.UserKey)
812 2 : } else {
813 0 : // If the split function is not specified, the entire key is used as the
814 0 : // prefix. This case can occur when getIter uses SeekPrefixGE.
815 0 : n = len(l.iterFile.LargestPointKey.UserKey)
816 0 : }
817 2 : if l.cmp(prefix, l.iterFile.LargestPointKey.UserKey[:n]) < 0 {
818 2 : return nil, base.LazyValue{}
819 2 : }
820 1 : return l.verify(l.skipEmptyFileForward())
821 : }
822 :
823 2 : func (l *levelIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) {
824 2 : l.err = nil // clear cached iteration error
825 2 : if l.boundaryContext != nil {
826 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
827 2 : l.boundaryContext.isIgnorableBoundaryKey = false
828 2 : }
829 :
830 : // NB: the top-level Iterator has already adjusted key based on
831 : // IterOptions.UpperBound.
832 2 : if l.loadFile(l.findFileLT(key, flags), -1) == noFileLoaded {
833 2 : return nil, base.LazyValue{}
834 2 : }
835 2 : if key, val := l.iter.SeekLT(key, flags); key != nil {
836 2 : return l.verify(key, val)
837 2 : }
838 2 : return l.verify(l.skipEmptyFileBackward())
839 : }
840 :
841 2 : func (l *levelIter) First() (*InternalKey, base.LazyValue) {
842 2 : l.err = nil // clear cached iteration error
843 2 : if l.boundaryContext != nil {
844 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
845 2 : l.boundaryContext.isIgnorableBoundaryKey = false
846 2 : }
847 :
848 : // NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is
849 : // set.
850 2 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
851 2 : return nil, base.LazyValue{}
852 2 : }
853 2 : if key, val := l.iter.First(); key != nil {
854 2 : return l.verify(key, val)
855 2 : }
856 2 : return l.verify(l.skipEmptyFileForward())
857 : }
858 :
859 2 : func (l *levelIter) Last() (*InternalKey, base.LazyValue) {
860 2 : l.err = nil // clear cached iteration error
861 2 : if l.boundaryContext != nil {
862 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
863 2 : l.boundaryContext.isIgnorableBoundaryKey = false
864 2 : }
865 :
866 : // NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is
867 : // set.
868 2 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
869 2 : return nil, base.LazyValue{}
870 2 : }
871 2 : if key, val := l.iter.Last(); key != nil {
872 2 : return l.verify(key, val)
873 2 : }
874 2 : return l.verify(l.skipEmptyFileBackward())
875 : }
876 :
877 2 : func (l *levelIter) Next() (*InternalKey, base.LazyValue) {
878 2 : if l.err != nil || l.iter == nil {
879 1 : return nil, base.LazyValue{}
880 1 : }
881 2 : if l.boundaryContext != nil {
882 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
883 2 : l.boundaryContext.isIgnorableBoundaryKey = false
884 2 : }
885 :
886 2 : switch {
887 2 : case l.largestBoundary != nil:
888 2 : if l.tableOpts.UpperBound != nil {
889 1 : // The UpperBound was within this file, so don't load the next
890 1 : // file. We leave the largestBoundary unchanged so that subsequent
891 1 : // calls to Next() stay at this file. If a Seek/First/Last call is
892 1 : // made and this file continues to be relevant, loadFile() will
893 1 : // set the largestBoundary to nil.
894 1 : if l.rangeDelIterPtr != nil {
895 1 : *l.rangeDelIterPtr = nil
896 1 : }
897 1 : return nil, base.LazyValue{}
898 : }
899 : // We're stepping past the boundary key, so now we can load the next file.
900 2 : if l.loadFile(l.files.Next(), +1) != noFileLoaded {
901 2 : if key, val := l.iter.First(); key != nil {
902 2 : return l.verify(key, val)
903 2 : }
904 2 : return l.verify(l.skipEmptyFileForward())
905 : }
906 2 : return nil, base.LazyValue{}
907 :
908 2 : default:
909 2 : // Reset the smallest boundary since we're moving away from it.
910 2 : l.smallestBoundary = nil
911 2 : if key, val := l.iter.Next(); key != nil {
912 2 : return l.verify(key, val)
913 2 : }
914 : }
915 2 : return l.verify(l.skipEmptyFileForward())
916 : }
917 :
918 2 : func (l *levelIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
919 2 : if l.err != nil || l.iter == nil {
920 0 : return nil, base.LazyValue{}
921 0 : }
922 2 : if l.boundaryContext != nil {
923 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
924 2 : l.boundaryContext.isIgnorableBoundaryKey = false
925 2 : }
926 :
927 2 : switch {
928 1 : case l.largestBoundary != nil:
929 1 : if l.tableOpts.UpperBound != nil {
930 0 : // The UpperBound was within this file, so don't load the next
931 0 : // file. We leave the largestBoundary unchanged so that subsequent
932 0 : // calls to Next() stay at this file. If a Seek/First/Last call is
933 0 : // made and this file continues to be relevant, loadFile() will
934 0 : // set the largestBoundary to nil.
935 0 : if l.rangeDelIterPtr != nil {
936 0 : *l.rangeDelIterPtr = nil
937 0 : }
938 0 : return nil, base.LazyValue{}
939 : }
940 : // We're stepping past the boundary key, so we need to load a later
941 : // file.
942 :
943 2 : default:
944 2 : // Reset the smallest boundary since we're moving away from it.
945 2 : l.smallestBoundary = nil
946 2 :
947 2 : if key, val := l.iter.NextPrefix(succKey); key != nil {
948 1 : return l.verify(key, val)
949 1 : }
950 : // Fall through to seeking.
951 : }
952 :
953 : // Seek the manifest level iterator using TrySeekUsingNext=true and
954 : // RelativeSeek=true so that we take advantage of the knowledge that
955 : // `succKey` can only be contained in later files.
956 2 : metadataSeekFlags := base.SeekGEFlagsNone.EnableTrySeekUsingNext().EnableRelativeSeek()
957 2 : if l.loadFile(l.findFileGE(succKey, metadataSeekFlags), +1) != noFileLoaded {
958 2 : // NB: The SeekGE on the file's iterator must not set TrySeekUsingNext,
959 2 : // because l.iter is unpositioned.
960 2 : if key, val := l.iter.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
961 2 : return l.verify(key, val)
962 2 : }
963 1 : return l.verify(l.skipEmptyFileForward())
964 : }
965 1 : return nil, base.LazyValue{}
966 : }
967 :
968 2 : func (l *levelIter) Prev() (*InternalKey, base.LazyValue) {
969 2 : if l.err != nil || l.iter == nil {
970 1 : return nil, base.LazyValue{}
971 1 : }
972 2 : if l.boundaryContext != nil {
973 2 : l.boundaryContext.isSyntheticIterBoundsKey = false
974 2 : l.boundaryContext.isIgnorableBoundaryKey = false
975 2 : }
976 :
977 2 : switch {
978 2 : case l.smallestBoundary != nil:
979 2 : if l.tableOpts.LowerBound != nil {
980 1 : // The LowerBound was within this file, so don't load the previous
981 1 : // file. We leave the smallestBoundary unchanged so that
982 1 : // subsequent calls to Prev() stay at this file. If a
983 1 : // Seek/First/Last call is made and this file continues to be
984 1 : // relevant, loadFile() will set the smallestBoundary to nil.
985 1 : if l.rangeDelIterPtr != nil {
986 1 : *l.rangeDelIterPtr = nil
987 1 : }
988 1 : return nil, base.LazyValue{}
989 : }
990 : // We're stepping past the boundary key, so now we can load the prev file.
991 2 : if l.loadFile(l.files.Prev(), -1) != noFileLoaded {
992 2 : if key, val := l.iter.Last(); key != nil {
993 2 : return l.verify(key, val)
994 2 : }
995 2 : return l.verify(l.skipEmptyFileBackward())
996 : }
997 2 : return nil, base.LazyValue{}
998 :
999 2 : default:
1000 2 : // Reset the largest boundary since we're moving away from it.
1001 2 : l.largestBoundary = nil
1002 2 : if key, val := l.iter.Prev(); key != nil {
1003 2 : return l.verify(key, val)
1004 2 : }
1005 : }
1006 2 : return l.verify(l.skipEmptyFileBackward())
1007 : }
1008 :
1009 2 : func (l *levelIter) skipEmptyFileForward() (*InternalKey, base.LazyValue) {
1010 2 : var key *InternalKey
1011 2 : var val base.LazyValue
1012 2 : // The first iteration of this loop starts with an already exhausted
1013 2 : // l.iter. The reason for the exhaustion is either that we iterated to the
1014 2 : // end of the sstable, or our iteration was terminated early due to the
1015 2 : // presence of an upper-bound or the use of SeekPrefixGE. If
1016 2 : // l.rangeDelIterPtr is non-nil, we may need to pretend the iterator is
1017 2 : // not exhausted to allow for the merging to finish consuming the
1018 2 : // l.rangeDelIterPtr before levelIter switches the rangeDelIter from
1019 2 : // under it. This pretense is done by either generating a synthetic
1020 2 : // boundary key or returning the largest key of the file, depending on the
1021 2 : // exhaustion reason.
1022 2 :
1023 2 : // Subsequent iterations will examine consecutive files such that the first
1024 2 : // file that does not have an exhausted iterator causes the code to return
1025 2 : // that key, else the behavior described above if there is a corresponding
1026 2 : // rangeDelIterPtr.
1027 2 : for ; key == nil; key, val = l.iter.First() {
1028 2 : if l.rangeDelIterPtr != nil {
1029 2 : // We're being used as part of a mergingIter and we've exhausted the
1030 2 : // current sstable. If an upper bound is present and the upper bound lies
1031 2 : // within the current sstable, then we will have reached the upper bound
1032 2 : // rather than the end of the sstable. We need to return a synthetic
1033 2 : // boundary key so that mergingIter can use the range tombstone iterator
1034 2 : // until the other levels have reached this boundary.
1035 2 : //
1036 2 : // It is safe to set the boundary key to the UpperBound user key
1037 2 : // with the RANGEDEL sentinel since it is the smallest InternalKey
1038 2 : // that matches the exclusive upper bound, and does not represent
1039 2 : // a real key.
1040 2 : if l.tableOpts.UpperBound != nil {
1041 2 : if *l.rangeDelIterPtr != nil {
1042 2 : l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
1043 2 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
1044 2 : l.largestBoundary = &l.syntheticBoundary
1045 2 : if l.boundaryContext != nil {
1046 2 : l.boundaryContext.isSyntheticIterBoundsKey = true
1047 2 : }
1048 2 : return l.largestBoundary, base.LazyValue{}
1049 : }
1050 : // Else there are no range deletions in this sstable. This
1051 : // helps with performance when many levels are populated with
1052 : // sstables and most don't have any actual keys within the
1053 : // bounds.
1054 2 : return nil, base.LazyValue{}
1055 : }
1056 : // If the boundary is a range deletion tombstone, return that key.
1057 2 : if l.iterFile.LargestPointKey.Kind() == InternalKeyKindRangeDelete {
1058 2 : l.largestBoundary = &l.iterFile.LargestPointKey
1059 2 : if l.boundaryContext != nil {
1060 2 : l.boundaryContext.isIgnorableBoundaryKey = true
1061 2 : }
1062 2 : return l.largestBoundary, base.LazyValue{}
1063 : }
1064 : // If the last point iterator positioning op might've skipped keys,
1065 : // it's possible the file's range deletions are still relevant to
1066 : // other levels. Return the largest boundary as a special ignorable
1067 : // marker to avoid advancing to the next file.
1068 : //
1069 : // The sstable iterator cannot guarantee that keys were skipped. A
1070 : // SeekGE that lands on a index separator k only knows that the
1071 : // block at the index entry contains keys ≤ k. We can't know whether
1072 : // there were actually keys between the seek key and the index
1073 : // separator key. If the block is then excluded due to block
1074 : // property filters, the iterator does not know whether keys were
1075 : // actually skipped by the block's exclusion.
1076 : //
1077 : // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
1078 : // it's possible l.iterFile.Largest was already returned. Returning
1079 : // l.iterFile.Largest again is a violation of the strict
1080 : // monotonicity normally provided. The mergingIter's heap can
1081 : // tolerate this repeat key and in this case will keep the level at
1082 : // the top of the heap and immediately skip the entry, advancing to
1083 : // the next file.
1084 2 : if *l.rangeDelIterPtr != nil && l.filteredIter != nil &&
1085 2 : l.filteredIter.MaybeFilteredKeys() {
1086 1 : l.largestBoundary = &l.iterFile.Largest
1087 1 : if l.boundaryContext != nil {
1088 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1089 1 : }
1090 1 : return l.largestBoundary, base.LazyValue{}
1091 : }
1092 : }
1093 :
1094 : // Current file was exhausted. Move to the next file.
1095 2 : if l.loadFile(l.files.Next(), +1) == noFileLoaded {
1096 2 : return nil, base.LazyValue{}
1097 2 : }
1098 : }
1099 2 : return key, val
1100 : }
1101 :
1102 2 : func (l *levelIter) skipEmptyFileBackward() (*InternalKey, base.LazyValue) {
1103 2 : var key *InternalKey
1104 2 : var val base.LazyValue
1105 2 : // The first iteration of this loop starts with an already exhausted
1106 2 : // l.iter. The reason for the exhaustion is either that we iterated to the
1107 2 : // end of the sstable, or our iteration was terminated early due to the
1108 2 : // presence of a lower-bound. If l.rangeDelIterPtr is non-nil, we may need
1109 2 : // to pretend the iterator is not exhausted to allow for the merging to
1110 2 : // finish consuming the l.rangeDelIterPtr before levelIter switches the
1111 2 : // rangeDelIter from under it. This pretense is done by either generating
1112 2 : // a synthetic boundary key or returning the smallest key of the file,
1113 2 : // depending on the exhaustion reason.
1114 2 :
1115 2 : // Subsequent iterations will examine consecutive files such that the first
1116 2 : // file that does not have an exhausted iterator causes the code to return
1117 2 : // that key, else the behavior described above if there is a corresponding
1118 2 : // rangeDelIterPtr.
1119 2 : for ; key == nil; key, val = l.iter.Last() {
1120 2 : if l.rangeDelIterPtr != nil {
1121 2 : // We're being used as part of a mergingIter and we've exhausted the
1122 2 : // current sstable. If a lower bound is present and the lower bound lies
1123 2 : // within the current sstable, then we will have reached the lower bound
1124 2 : // rather than the beginning of the sstable. We need to return a
1125 2 : // synthetic boundary key so that mergingIter can use the range tombstone
1126 2 : // iterator until the other levels have reached this boundary.
1127 2 : //
1128 2 : // It is safe to set the boundary key to the LowerBound user key
1129 2 : // with the RANGEDEL sentinel since it is the smallest InternalKey
1130 2 : // that is within the inclusive lower bound, and does not
1131 2 : // represent a real key.
1132 2 : if l.tableOpts.LowerBound != nil {
1133 2 : if *l.rangeDelIterPtr != nil {
1134 2 : l.syntheticBoundary.UserKey = l.tableOpts.LowerBound
1135 2 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
1136 2 : l.smallestBoundary = &l.syntheticBoundary
1137 2 : if l.boundaryContext != nil {
1138 2 : l.boundaryContext.isSyntheticIterBoundsKey = true
1139 2 : }
1140 2 : return l.smallestBoundary, base.LazyValue{}
1141 : }
1142 : // Else there are no range deletions in this sstable. This
1143 : // helps with performance when many levels are populated with
1144 : // sstables and most don't have any actual keys within the
1145 : // bounds.
1146 2 : return nil, base.LazyValue{}
1147 : }
1148 : // If the boundary is a range deletion tombstone, return that key.
1149 2 : if l.iterFile.SmallestPointKey.Kind() == InternalKeyKindRangeDelete {
1150 2 : l.smallestBoundary = &l.iterFile.SmallestPointKey
1151 2 : if l.boundaryContext != nil {
1152 2 : l.boundaryContext.isIgnorableBoundaryKey = true
1153 2 : }
1154 2 : return l.smallestBoundary, base.LazyValue{}
1155 : }
1156 : // If the last point iterator positioning op skipped keys, it's
1157 : // possible the file's range deletions are still relevant to other
1158 : // levels. Return the smallest boundary as a special ignorable key
1159 : // to avoid advancing to the next file.
1160 : //
1161 : // The sstable iterator cannot guarantee that keys were skipped. A
1162 : // SeekGE that lands on a index separator k only knows that the
1163 : // block at the index entry contains keys ≤ k. We can't know whether
1164 : // there were actually keys between the seek key and the index
1165 : // separator key. If the block is then excluded due to block
1166 : // property filters, the iterator does not know whether keys were
1167 : // actually skipped by the block's exclusion.
1168 : //
1169 : // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
1170 : // it's possible l.iterFile.Smallest was already returned. Returning
1171 : // l.iterFile.Smallest again is a violation of the strict
1172 : // monotonicity normally provided. The mergingIter's heap can
1173 : // tolerate this repeat key and in this case will keep the level at
1174 : // the top of the heap and immediately skip the entry, advancing to
1175 : // the next file.
1176 2 : if *l.rangeDelIterPtr != nil && l.filteredIter != nil && l.filteredIter.MaybeFilteredKeys() {
1177 1 : l.smallestBoundary = &l.iterFile.Smallest
1178 1 : if l.boundaryContext != nil {
1179 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1180 1 : }
1181 1 : return l.smallestBoundary, base.LazyValue{}
1182 : }
1183 : }
1184 :
1185 : // Current file was exhausted. Move to the previous file.
1186 2 : if l.loadFile(l.files.Prev(), -1) == noFileLoaded {
1187 2 : return nil, base.LazyValue{}
1188 2 : }
1189 : }
1190 2 : return key, val
1191 : }
1192 :
1193 2 : func (l *levelIter) Error() error {
1194 2 : if l.err != nil || l.iter == nil {
1195 2 : return l.err
1196 2 : }
1197 2 : return l.iter.Error()
1198 : }
1199 :
1200 2 : func (l *levelIter) Close() error {
1201 2 : if l.iter != nil {
1202 2 : l.err = l.iter.Close()
1203 2 : l.iter = nil
1204 2 : }
1205 2 : if l.rangeDelIterPtr != nil {
1206 2 : if t := l.rangeDelIterCopy; t != nil {
1207 2 : l.err = firstError(l.err, t.Close())
1208 2 : }
1209 2 : *l.rangeDelIterPtr = nil
1210 2 : l.rangeDelIterCopy = nil
1211 : }
1212 2 : return l.err
1213 : }
1214 :
1215 2 : func (l *levelIter) SetBounds(lower, upper []byte) {
1216 2 : l.lower = lower
1217 2 : l.upper = upper
1218 2 :
1219 2 : if l.iter == nil {
1220 2 : return
1221 2 : }
1222 :
1223 : // Update tableOpts.{Lower,Upper}Bound in case the new boundaries fall within
1224 : // the boundaries of the current table.
1225 2 : if l.initTableBounds(l.iterFile) != 0 {
1226 2 : // The table does not overlap the bounds. Close() will set levelIter.err if
1227 2 : // an error occurs.
1228 2 : _ = l.Close()
1229 2 : return
1230 2 : }
1231 :
1232 2 : l.iter.SetBounds(l.tableOpts.LowerBound, l.tableOpts.UpperBound)
1233 : }
1234 :
1235 2 : func (l *levelIter) String() string {
1236 2 : if l.iterFile != nil {
1237 2 : return fmt.Sprintf("%s: fileNum=%s", l.level, l.iter.String())
1238 2 : }
1239 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.level)
1240 : }
1241 :
1242 : var _ internalIterator = &levelIter{}
|