Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "runtime/debug"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/internal/keyspan"
15 : "github.com/cockroachdb/pebble/internal/manifest"
16 : "github.com/cockroachdb/pebble/sstable"
17 : )
18 :
19 : // tableNewIters creates a new point and range-del iterator for the given file
20 : // number.
21 : //
22 : // On success, the internalIterator is not-nil and must be closed; the
23 : // FragmentIterator can be nil.
24 : // TODO(radu): always return a non-nil FragmentIterator.
25 : //
26 : // On error, the iterators are nil.
27 : //
28 : // The only (non-test) implementation of tableNewIters is tableCacheContainer.newIters().
29 : type tableNewIters func(
30 : ctx context.Context,
31 : file *manifest.FileMetadata,
32 : opts *IterOptions,
33 : internalOpts internalIterOpts,
34 : ) (internalIterator, keyspan.FragmentIterator, error)
35 :
36 : // tableNewRangeDelIter takes a tableNewIters and returns a TableNewSpanIter
37 : // for the rangedel iterator returned by tableNewIters.
38 0 : func tableNewRangeDelIter(ctx context.Context, newIters tableNewIters) keyspan.TableNewSpanIter {
39 0 : return func(file *manifest.FileMetadata, iterOptions keyspan.SpanIterOptions) (keyspan.FragmentIterator, error) {
40 0 : iter, rangeDelIter, err := newIters(ctx, file, nil, internalIterOpts{})
41 0 : if iter != nil {
42 0 : _ = iter.Close()
43 0 : }
44 0 : if rangeDelIter == nil {
45 0 : rangeDelIter = emptyKeyspanIter
46 0 : }
47 0 : return rangeDelIter, err
48 : }
49 : }
50 :
51 : type internalIterOpts struct {
52 : bytesIterated *uint64
53 : bufferPool *sstable.BufferPool
54 : stats *base.InternalIteratorStats
55 : boundLimitedFilter sstable.BoundLimitedBlockPropertyFilter
56 : }
57 :
58 : // levelIter provides a merged view of the sstables in a level.
59 : //
60 : // levelIter is used during compaction and as part of the Iterator
61 : // implementation. When used as part of the Iterator implementation, level
62 : // iteration needs to "pause" at sstable boundaries if a range deletion
63 : // tombstone is the source of that boundary. We know if a range tombstone is
64 : // the smallest or largest key in a file because the kind will be
65 : // InternalKeyKindRangeDeletion. If the boundary key is a range deletion
66 : // tombstone, we materialize a fake entry to return from levelIter. This
67 : // prevents mergingIter from advancing past the sstable until the sstable
68 : // contains the smallest (or largest for reverse iteration) key in the merged
69 : // heap. Note that mergingIter treats a range deletion tombstone returned by
70 : // the point iterator as a no-op.
71 : //
72 : // SeekPrefixGE presents the need for a second type of pausing. If an sstable
73 : // iterator returns "not found" for a SeekPrefixGE operation, we don't want to
74 : // advance to the next sstable as the "not found" does not indicate that all of
75 : // the keys in the sstable are less than the search key. Advancing to the next
76 : // sstable would cause us to skip over range tombstones, violating
77 : // correctness. Instead, SeekPrefixGE creates a synthetic boundary key with the
78 : // kind InternalKeyKindRangeDeletion which will be used to pause the levelIter
79 : // at the sstable until the mergingIter is ready to advance past it.
80 : type levelIter struct {
81 : // The context is stored here since (a) iterators are expected to be
82 : // short-lived (since they pin sstables), (b) plumbing a context into every
83 : // method is very painful, (c) they do not (yet) respect context
84 : // cancellation and are only used for tracing.
85 : ctx context.Context
86 : logger Logger
87 : comparer *Comparer
88 : cmp Compare
89 : split Split
90 : // The lower/upper bounds for iteration as specified at creation or the most
91 : // recent call to SetBounds.
92 : lower []byte
93 : upper []byte
94 : // The iterator options for the currently open table. If
95 : // tableOpts.{Lower,Upper}Bound are nil, the corresponding iteration boundary
96 : // does not lie within the table bounds.
97 : tableOpts IterOptions
98 : // The LSM level this levelIter is initialized for.
99 : level manifest.Level
100 : // The keys to return when iterating past an sstable boundary and that
101 : // boundary is a range deletion tombstone. The boundary could be smallest
102 : // (i.e. arrived at with Prev), or largest (arrived at with Next).
103 : smallestBoundary *InternalKey
104 : largestBoundary *InternalKey
105 : // combinedIterState may be set when a levelIter is used during user
106 : // iteration. Although levelIter only iterates over point keys, it's also
107 : // responsible for lazily constructing the combined range & point iterator
108 : // when it observes a file containing range keys. If the combined iter
109 : // state's initialized field is true, the iterator is already using combined
110 : // iterator, OR the iterator is not configured to use combined iteration. If
111 : // it's false, the levelIter must set the `triggered` and `key` fields when
112 : // the levelIter passes over a file containing range keys. See the
113 : // lazyCombinedIter for more details.
114 : combinedIterState *combinedIterState
115 : // A synthetic boundary key to return when SeekPrefixGE finds an sstable
116 : // which doesn't contain the search key, but which does contain range
117 : // tombstones.
118 : syntheticBoundary InternalKey
119 : // The iter for the current file. It is nil under any of the following conditions:
120 : // - files.Current() == nil
121 : // - err != nil
122 : // - some other constraint, like the bounds in opts, caused the file at index to not
123 : // be relevant to the iteration.
124 : iter internalIterator
125 : // iterFile holds the current file. It is always equal to l.files.Current().
126 : iterFile *fileMetadata
127 : // filteredIter is an optional interface that may be implemented by internal
128 : // iterators that perform filtering of keys. When a new file's iterator is
129 : // opened, it's tested to see if it implements filteredIter. If it does,
130 : // it's stored here to allow the level iterator to recognize when keys were
131 : // omitted from iteration results due to filtering. This is important when a
132 : // file contains range deletions that may delete keys from other files. The
133 : // levelIter must not advance to the next file until the mergingIter has
134 : // advanced beyond the file's bounds. See
135 : // levelIterBoundaryContext.isIgnorableBoundaryKey.
136 : filteredIter filteredIter
137 : newIters tableNewIters
138 : // When rangeDelIterPtr != nil, the caller requires that *rangeDelIterPtr must
139 : // point to a range del iterator corresponding to the current file. When this
140 : // iterator returns nil, *rangeDelIterPtr should also be set to nil. Whenever
141 : // a non-nil internalIterator is placed in rangeDelIterPtr, a copy is placed
142 : // in rangeDelIterCopy. This is done for the following special case:
143 : // when this iterator returns nil because of exceeding the bounds, we don't
144 : // close iter and *rangeDelIterPtr since we could reuse it in the next seek. But
145 : // we need to set *rangeDelIterPtr to nil because of the aforementioned contract.
146 : // This copy is used to revive the *rangeDelIterPtr in the case of reuse.
147 : rangeDelIterPtr *keyspan.FragmentIterator
148 : rangeDelIterCopy keyspan.FragmentIterator
149 : files manifest.LevelIterator
150 : err error
151 :
152 : // Pointer into this level's entry in `mergingIterLevel::levelIterBoundaryContext`.
153 : // We populate it with the corresponding bounds for the currently opened file. It is used for
154 : // two purposes (described for forward iteration. The explanation for backward iteration is
155 : // similar.)
156 : // - To limit the optimization that seeks lower-level iterators past keys shadowed by a range
157 : // tombstone. Limiting this seek to the file largestUserKey is necessary since
158 : // range tombstones are stored untruncated, while they only apply to keys within their
159 : // containing file's boundaries. For a detailed example, see comment above `mergingIter`.
160 : // - To constrain the tombstone to act-within the bounds of the sstable when checking
161 : // containment. For forward iteration we need the smallestUserKey.
162 : //
163 : // An example is sstable bounds [c#8, g#12] containing a tombstone [b, i)#7.
164 : // - When doing a SeekGE to user key X, the levelIter is at this sstable because X is either within
165 : // the sstable bounds or earlier than the start of the sstable (and there is no sstable in
166 : // between at this level). If X >= smallestUserKey, and the tombstone [b, i) contains X,
167 : // it is correct to SeekGE the sstables at lower levels to min(g, i) (i.e., min of
168 : // largestUserKey, tombstone.End) since any user key preceding min(g, i) must be covered by this
169 : // tombstone (since it cannot have a version younger than this tombstone as it is at a lower
170 : // level). And even if X = smallestUserKey or equal to the start user key of the tombstone,
171 : // if the above conditions are satisfied we know that the internal keys corresponding to X at
172 : // lower levels must have a version smaller than that in this file (again because of the level
173 : // argument). So we don't need to use sequence numbers for this comparison.
174 : // - When checking whether this tombstone deletes internal key X we know that the levelIter is at this
175 : // sstable so (repeating the above) X.UserKey is either within the sstable bounds or earlier than the
176 : // start of the sstable (and there is no sstable in between at this level).
177 : // - X is at at a lower level. If X.UserKey >= smallestUserKey, and the tombstone contains
178 : // X.UserKey, we know X is deleted. This argument also works when X is a user key (we use
179 : // it when seeking to test whether a user key is deleted).
180 : // - X is at the same level. X must be within the sstable bounds of the tombstone so the
181 : // X.UserKey >= smallestUserKey comparison is trivially true. In addition to the tombstone containing
182 : // X we need to compare the sequence number of X and the tombstone (we don't need to look
183 : // at how this tombstone is truncated to act-within the file bounds, which are InternalKeys,
184 : // since X and the tombstone are from the same file).
185 : //
186 : // Iterating backwards has one more complication when checking whether a tombstone deletes
187 : // internal key X at a lower level (the construction we do here also works for a user key X).
188 : // Consider sstable bounds [c#8, g#InternalRangeDelSentinel] containing a tombstone [b, i)#7.
189 : // If we are positioned at key g#10 at a lower sstable, the tombstone we will see is [b, i)#7,
190 : // since the higher sstable is positioned at a key <= g#10. We should not use this tombstone
191 : // to delete g#10. This requires knowing that the largestUserKey is a range delete sentinel,
192 : // which we set in a separate bool below.
193 : //
194 : // These fields differs from the `*Boundary` fields in a few ways:
195 : // - `*Boundary` is only populated when the iterator is positioned exactly on the sentinel key.
196 : // - `*Boundary` can hold either the lower- or upper-bound, depending on the iterator direction.
197 : // - `*Boundary` is not exposed to the next higher-level iterator, i.e., `mergingIter`.
198 : boundaryContext *levelIterBoundaryContext
199 :
200 : // internalOpts holds the internal iterator options to pass to the table
201 : // cache when constructing new table iterators.
202 : internalOpts internalIterOpts
203 :
204 : // Scratch space for the obsolete keys filter, when there are no other block
205 : // property filters specified. See the performance note where
206 : // IterOptions.PointKeyFilters is declared.
207 : filtersBuf [1]BlockPropertyFilter
208 :
209 : // Disable invariant checks even if they are otherwise enabled. Used by tests
210 : // which construct "impossible" situations (e.g. seeking to a key before the
211 : // lower bound).
212 : disableInvariants bool
213 : }
214 :
215 : // filteredIter is an additional interface implemented by iterators that may
216 : // skip over point keys during iteration. The sstable.Iterator implements this
217 : // interface.
218 : type filteredIter interface {
219 : // MaybeFilteredKeys may be called when an iterator is exhausted, indicating
220 : // whether or not the iterator's last positioning method may have skipped
221 : // any keys due to low-level filters.
222 : //
223 : // When an iterator is configured to use block-property filters, the
224 : // low-level iterator may skip over blocks or whole sstables of keys.
225 : // Implementations that implement skipping must implement this interface.
226 : // Higher-level iterators require it to preserve invariants (eg, a levelIter
227 : // used in a mergingIter must keep the file's range-del iterator open until
228 : // the mergingIter has moved past the file's bounds, even if all of the
229 : // file's point keys were filtered).
230 : //
231 : // MaybeFilteredKeys may always return false positives, that is it may
232 : // return true when no keys were filtered. It should only be called when the
233 : // iterator is exhausted. It must never return false negatives when the
234 : // iterator is exhausted.
235 : MaybeFilteredKeys() bool
236 : }
237 :
238 : // levelIter implements the base.InternalIterator interface.
239 : var _ base.InternalIterator = (*levelIter)(nil)
240 :
241 : // newLevelIter returns a levelIter. It is permissible to pass a nil split
242 : // parameter if the caller is never going to call SeekPrefixGE.
243 : func newLevelIter(
244 : opts IterOptions,
245 : comparer *Comparer,
246 : newIters tableNewIters,
247 : files manifest.LevelIterator,
248 : level manifest.Level,
249 : internalOpts internalIterOpts,
250 1 : ) *levelIter {
251 1 : l := &levelIter{}
252 1 : l.init(context.Background(), opts, comparer, newIters, files, level,
253 1 : internalOpts)
254 1 : return l
255 1 : }
256 :
257 : func (l *levelIter) init(
258 : ctx context.Context,
259 : opts IterOptions,
260 : comparer *Comparer,
261 : newIters tableNewIters,
262 : files manifest.LevelIterator,
263 : level manifest.Level,
264 : internalOpts internalIterOpts,
265 1 : ) {
266 1 : l.ctx = ctx
267 1 : l.err = nil
268 1 : l.level = level
269 1 : l.logger = opts.getLogger()
270 1 : l.lower = opts.LowerBound
271 1 : l.upper = opts.UpperBound
272 1 : l.tableOpts.TableFilter = opts.TableFilter
273 1 : l.tableOpts.PointKeyFilters = opts.PointKeyFilters
274 1 : if len(opts.PointKeyFilters) == 0 {
275 1 : l.tableOpts.PointKeyFilters = l.filtersBuf[:0:1]
276 1 : }
277 1 : l.tableOpts.UseL6Filters = opts.UseL6Filters
278 1 : l.tableOpts.level = l.level
279 1 : l.tableOpts.snapshotForHideObsoletePoints = opts.snapshotForHideObsoletePoints
280 1 : l.comparer = comparer
281 1 : l.cmp = comparer.Compare
282 1 : l.split = comparer.Split
283 1 : l.iterFile = nil
284 1 : l.newIters = newIters
285 1 : l.files = files
286 1 : l.internalOpts = internalOpts
287 : }
288 :
289 1 : func (l *levelIter) initRangeDel(rangeDelIter *keyspan.FragmentIterator) {
290 1 : l.rangeDelIterPtr = rangeDelIter
291 1 : }
292 :
293 1 : func (l *levelIter) initBoundaryContext(context *levelIterBoundaryContext) {
294 1 : l.boundaryContext = context
295 1 : }
296 :
297 1 : func (l *levelIter) initCombinedIterState(state *combinedIterState) {
298 1 : l.combinedIterState = state
299 1 : }
300 :
301 1 : func (l *levelIter) maybeTriggerCombinedIteration(file *fileMetadata, dir int) {
302 1 : // If we encounter a file that contains range keys, we may need to
303 1 : // trigger a switch to combined range-key and point-key iteration,
304 1 : // if the *pebble.Iterator is configured for it. This switch is done
305 1 : // lazily because range keys are intended to be rare, and
306 1 : // constructing the range-key iterator substantially adds to the
307 1 : // cost of iterator construction and seeking.
308 1 : //
309 1 : // If l.combinedIterState.initialized is already true, either the
310 1 : // iterator is already using combined iteration or the iterator is not
311 1 : // configured to observe range keys. Either way, there's nothing to do.
312 1 : // If false, trigger the switch to combined iteration, using the the
313 1 : // file's bounds to seek the range-key iterator appropriately.
314 1 : //
315 1 : // We only need to trigger combined iteration if the file contains
316 1 : // RangeKeySets: if there are only Unsets and Dels, the user will observe no
317 1 : // range keys regardless. If this file has table stats available, they'll
318 1 : // tell us whether the file has any RangeKeySets. Otherwise, we must
319 1 : // fallback to assuming it does if HasRangeKeys=true.
320 1 : if file != nil && file.HasRangeKeys && l.combinedIterState != nil && !l.combinedIterState.initialized &&
321 1 : (l.upper == nil || l.cmp(file.SmallestRangeKey.UserKey, l.upper) < 0) &&
322 1 : (l.lower == nil || l.cmp(file.LargestRangeKey.UserKey, l.lower) > 0) &&
323 1 : (!file.StatsValid() || file.Stats.NumRangeKeySets > 0) {
324 1 : // The file contains range keys, and we're not using combined iteration yet.
325 1 : // Trigger a switch to combined iteration. It's possible that a switch has
326 1 : // already been triggered if multiple levels encounter files containing
327 1 : // range keys while executing a single mergingIter operation. In this case,
328 1 : // we need to compare the existing key recorded to l.combinedIterState.key,
329 1 : // adjusting it if our key is smaller (forward iteration) or larger
330 1 : // (backward iteration) than the existing key.
331 1 : //
332 1 : // These key comparisons are only required during a single high-level
333 1 : // iterator operation. When the high-level iter op completes,
334 1 : // iinitialized will be true, and future calls to this function will be
335 1 : // no-ops.
336 1 : switch dir {
337 1 : case +1:
338 1 : if !l.combinedIterState.triggered {
339 1 : l.combinedIterState.triggered = true
340 1 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
341 1 : } else if l.cmp(l.combinedIterState.key, file.SmallestRangeKey.UserKey) > 0 {
342 1 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
343 1 : }
344 1 : case -1:
345 1 : if !l.combinedIterState.triggered {
346 1 : l.combinedIterState.triggered = true
347 1 : l.combinedIterState.key = file.LargestRangeKey.UserKey
348 1 : } else if l.cmp(l.combinedIterState.key, file.LargestRangeKey.UserKey) < 0 {
349 1 : l.combinedIterState.key = file.LargestRangeKey.UserKey
350 1 : }
351 : }
352 : }
353 : }
354 :
355 1 : func (l *levelIter) findFileGE(key []byte, flags base.SeekGEFlags) *fileMetadata {
356 1 : // Find the earliest file whose largest key is >= key.
357 1 :
358 1 : // NB: if flags.TrySeekUsingNext()=true, the levelIter must respect it. If
359 1 : // the levelIter is positioned at the key P, it must return a key ≥ P. If
360 1 : // used within a merging iterator, the merging iterator will depend on the
361 1 : // levelIter only moving forward to maintain heap invariants.
362 1 :
363 1 : // Ordinarily we seek the LevelIterator using SeekGE. In some instances, we
364 1 : // Next instead. In other instances, we try Next-ing first, falling back to
365 1 : // seek:
366 1 : // a) flags.TrySeekUsingNext(): The top-level Iterator knows we're seeking
367 1 : // to a key later than the current iterator position. We don't know how
368 1 : // much later the seek key is, so it's possible there are many sstables
369 1 : // between the current position and the seek key. However in most real-
370 1 : // world use cases, the seek key is likely to be nearby. Rather than
371 1 : // performing a log(N) seek through the file metadata, we next a few
372 1 : // times from from our existing location. If we don't find a file whose
373 1 : // largest is >= key within a few nexts, we fall back to seeking.
374 1 : //
375 1 : // Note that in this case, the file returned by findFileGE may be
376 1 : // different than the file returned by a raw binary search (eg, when
377 1 : // TrySeekUsingNext=false). This is possible because the most recent
378 1 : // positioning operation may have already determined that previous
379 1 : // files' keys that are ≥ key are all deleted. This information is
380 1 : // encoded within the iterator's current iterator position and is
381 1 : // unavailable to a fresh binary search.
382 1 : //
383 1 : // b) flags.RelativeSeek(): The merging iterator decided to re-seek this
384 1 : // level according to a range tombstone. When lazy combined iteration
385 1 : // is enabled, the level iterator is responsible for watching for
386 1 : // files containing range keys and triggering the switch to combined
387 1 : // iteration when such a file is observed. If a range deletion was
388 1 : // observed in a higher level causing the merging iterator to seek the
389 1 : // level to the range deletion's end key, we need to check whether all
390 1 : // of the files between the old position and the new position contain
391 1 : // any range keys.
392 1 : //
393 1 : // In this scenario, we don't seek the LevelIterator and instead we
394 1 : // Next it, one file at a time, checking each for range keys. The
395 1 : // merging iterator sets this flag to inform us that we're moving
396 1 : // forward relative to the existing position and that we must examine
397 1 : // each intermediate sstable's metadata for lazy-combined iteration.
398 1 : // In this case, we only Next and never Seek. We set nextsUntilSeek=-1
399 1 : // to signal this intention.
400 1 : //
401 1 : // NB: At most one of flags.RelativeSeek() and flags.TrySeekUsingNext() may
402 1 : // be set, because the merging iterator re-seeks relative seeks with
403 1 : // explicitly only the RelativeSeek flag set.
404 1 : var nextsUntilSeek int
405 1 : var nextInsteadOfSeek bool
406 1 : if flags.TrySeekUsingNext() {
407 1 : nextInsteadOfSeek = true
408 1 : nextsUntilSeek = 4 // arbitrary
409 1 : }
410 1 : if flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized {
411 1 : nextInsteadOfSeek = true
412 1 : nextsUntilSeek = -1
413 1 : }
414 :
415 1 : var m *fileMetadata
416 1 : if nextInsteadOfSeek {
417 1 : m = l.iterFile
418 1 : } else {
419 1 : m = l.files.SeekGE(l.cmp, key)
420 1 : }
421 : // The below loop has a bit of an unusual organization. There are several
422 : // conditions under which we need to Next to a later file. If none of those
423 : // conditions are met, the file in `m` is okay to return. The loop body is
424 : // structured with a series of if statements, each of which may continue the
425 : // loop to the next file. If none of the statements are met, the end of the
426 : // loop body is a break.
427 1 : for m != nil {
428 1 : if m.HasRangeKeys {
429 1 : l.maybeTriggerCombinedIteration(m, +1)
430 1 :
431 1 : // Some files may only contain range keys, which we can skip.
432 1 : // NB: HasPointKeys=true if the file contains any points or range
433 1 : // deletions (which delete points).
434 1 : if !m.HasPointKeys {
435 1 : m = l.files.Next()
436 1 : continue
437 : }
438 : }
439 :
440 : // This file has point keys.
441 : //
442 : // However, there are a couple reasons why `m` may not be positioned ≥
443 : // `key` yet:
444 : //
445 : // 1. If SeekGE(key) landed on a file containing range keys, the file
446 : // may contain range keys ≥ `key` but no point keys ≥ `key`.
447 : // 2. When nexting instead of seeking, we must check to see whether
448 : // we've nexted sufficiently far, or we need to next again.
449 : //
450 : // If the file does not contain point keys ≥ `key`, next to continue
451 : // looking for a file that does.
452 1 : if (m.HasRangeKeys || nextInsteadOfSeek) && l.cmp(m.LargestPointKey.UserKey, key) < 0 {
453 1 : // If nextInsteadOfSeek is set and nextsUntilSeek is non-negative,
454 1 : // the iterator has been nexting hoping to discover the relevant
455 1 : // file without seeking. It's exhausted the allotted nextsUntilSeek
456 1 : // and should seek to the sought key.
457 1 : if nextInsteadOfSeek && nextsUntilSeek == 0 {
458 1 : nextInsteadOfSeek = false
459 1 : m = l.files.SeekGE(l.cmp, key)
460 1 : continue
461 1 : } else if nextsUntilSeek > 0 {
462 1 : nextsUntilSeek--
463 1 : }
464 1 : m = l.files.Next()
465 1 : continue
466 : }
467 :
468 : // This file has a point key bound ≥ `key`. But the largest point key
469 : // bound may still be a range deletion sentinel, which is exclusive. In
470 : // this case, the file doesn't actually contain any point keys equal to
471 : // `key`. We next to keep searching for a file that actually contains
472 : // point keys ≥ key.
473 : //
474 : // Additionally, this prevents loading untruncated range deletions from
475 : // a table which can't possibly contain the target key and is required
476 : // for correctness by mergingIter.SeekGE (see the comment in that
477 : // function).
478 1 : if m.LargestPointKey.IsExclusiveSentinel() && l.cmp(m.LargestPointKey.UserKey, key) == 0 {
479 1 : m = l.files.Next()
480 1 : continue
481 : }
482 :
483 : // This file contains point keys ≥ `key`. Break and return it.
484 1 : break
485 : }
486 1 : return m
487 : }
488 :
489 1 : func (l *levelIter) findFileLT(key []byte, flags base.SeekLTFlags) *fileMetadata {
490 1 : // Find the last file whose smallest key is < ikey.
491 1 :
492 1 : // Ordinarily we seek the LevelIterator using SeekLT.
493 1 : //
494 1 : // When lazy combined iteration is enabled, there's a complication. The
495 1 : // level iterator is responsible for watching for files containing range
496 1 : // keys and triggering the switch to combined iteration when such a file is
497 1 : // observed. If a range deletion was observed in a higher level causing the
498 1 : // merging iterator to seek the level to the range deletion's start key, we
499 1 : // need to check whether all of the files between the old position and the
500 1 : // new position contain any range keys.
501 1 : //
502 1 : // In this scenario, we don't seek the LevelIterator and instead we Prev it,
503 1 : // one file at a time, checking each for range keys.
504 1 : prevInsteadOfSeek := flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized
505 1 :
506 1 : var m *fileMetadata
507 1 : if prevInsteadOfSeek {
508 1 : m = l.iterFile
509 1 : } else {
510 1 : m = l.files.SeekLT(l.cmp, key)
511 1 : }
512 : // The below loop has a bit of an unusual organization. There are several
513 : // conditions under which we need to Prev to a previous file. If none of
514 : // those conditions are met, the file in `m` is okay to return. The loop
515 : // body is structured with a series of if statements, each of which may
516 : // continue the loop to the previous file. If none of the statements are
517 : // met, the end of the loop body is a break.
518 1 : for m != nil {
519 1 : if m.HasRangeKeys {
520 1 : l.maybeTriggerCombinedIteration(m, -1)
521 1 :
522 1 : // Some files may only contain range keys, which we can skip.
523 1 : // NB: HasPointKeys=true if the file contains any points or range
524 1 : // deletions (which delete points).
525 1 : if !m.HasPointKeys {
526 1 : m = l.files.Prev()
527 1 : continue
528 : }
529 : }
530 :
531 : // This file has point keys.
532 : //
533 : // However, there are a couple reasons why `m` may not be positioned <
534 : // `key` yet:
535 : //
536 : // 1. If SeekLT(key) landed on a file containing range keys, the file
537 : // may contain range keys < `key` but no point keys < `key`.
538 : // 2. When preving instead of seeking, we must check to see whether
539 : // we've preved sufficiently far, or we need to prev again.
540 : //
541 : // If the file does not contain point keys < `key`, prev to continue
542 : // looking for a file that does.
543 1 : if (m.HasRangeKeys || prevInsteadOfSeek) && l.cmp(m.SmallestPointKey.UserKey, key) >= 0 {
544 1 : m = l.files.Prev()
545 1 : continue
546 : }
547 :
548 : // This file contains point keys < `key`. Break and return it.
549 1 : break
550 : }
551 1 : return m
552 : }
553 :
554 : // Init the iteration bounds for the current table. Returns -1 if the table
555 : // lies fully before the lower bound, +1 if the table lies fully after the
556 : // upper bound, and 0 if the table overlaps the iteration bounds.
557 1 : func (l *levelIter) initTableBounds(f *fileMetadata) int {
558 1 : l.tableOpts.LowerBound = l.lower
559 1 : if l.tableOpts.LowerBound != nil {
560 1 : if l.cmp(f.LargestPointKey.UserKey, l.tableOpts.LowerBound) < 0 {
561 1 : // The largest key in the sstable is smaller than the lower bound.
562 1 : return -1
563 1 : }
564 1 : if l.cmp(l.tableOpts.LowerBound, f.SmallestPointKey.UserKey) <= 0 {
565 1 : // The lower bound is smaller or equal to the smallest key in the
566 1 : // table. Iteration within the table does not need to check the lower
567 1 : // bound.
568 1 : l.tableOpts.LowerBound = nil
569 1 : }
570 : }
571 1 : l.tableOpts.UpperBound = l.upper
572 1 : if l.tableOpts.UpperBound != nil {
573 1 : if l.cmp(f.SmallestPointKey.UserKey, l.tableOpts.UpperBound) >= 0 {
574 1 : // The smallest key in the sstable is greater than or equal to the upper
575 1 : // bound.
576 1 : return 1
577 1 : }
578 1 : if l.cmp(l.tableOpts.UpperBound, f.LargestPointKey.UserKey) > 0 {
579 1 : // The upper bound is greater than the largest key in the
580 1 : // table. Iteration within the table does not need to check the upper
581 1 : // bound. NB: tableOpts.UpperBound is exclusive and f.LargestPointKey is
582 1 : // inclusive.
583 1 : l.tableOpts.UpperBound = nil
584 1 : }
585 : }
586 1 : return 0
587 : }
588 :
589 : type loadFileReturnIndicator int8
590 :
591 : const (
592 : noFileLoaded loadFileReturnIndicator = iota
593 : fileAlreadyLoaded
594 : newFileLoaded
595 : )
596 :
597 1 : func (l *levelIter) loadFile(file *fileMetadata, dir int) loadFileReturnIndicator {
598 1 : l.smallestBoundary = nil
599 1 : l.largestBoundary = nil
600 1 : if l.boundaryContext != nil {
601 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
602 1 : l.boundaryContext.isIgnorableBoundaryKey = false
603 1 : }
604 1 : if l.iterFile == file {
605 1 : if l.err != nil {
606 0 : return noFileLoaded
607 0 : }
608 1 : if l.iter != nil {
609 1 : // We don't bother comparing the file bounds with the iteration bounds when we have
610 1 : // an already open iterator. It is possible that the iter may not be relevant given the
611 1 : // current iteration bounds, but it knows those bounds, so it will enforce them.
612 1 : if l.rangeDelIterPtr != nil {
613 1 : *l.rangeDelIterPtr = l.rangeDelIterCopy
614 1 : }
615 :
616 : // There are a few reasons we might not have triggered combined
617 : // iteration yet, even though we already had `file` open.
618 : // 1. If the bounds changed, we might have previously avoided
619 : // switching to combined iteration because the bounds excluded
620 : // the range keys contained in this file.
621 : // 2. If an existing iterator was reconfigured to iterate over range
622 : // keys (eg, using SetOptions), then we wouldn't have triggered
623 : // the switch to combined iteration yet.
624 1 : l.maybeTriggerCombinedIteration(file, dir)
625 1 : return fileAlreadyLoaded
626 : }
627 : // We were already at file, but don't have an iterator, probably because the file was
628 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
629 : // have changed. We handle that below.
630 : }
631 :
632 : // Close both iter and rangeDelIterPtr. While mergingIter knows about
633 : // rangeDelIterPtr, it can't call Close() on it because it does not know
634 : // when the levelIter will switch it. Note that levelIter.Close() can be
635 : // called multiple times.
636 1 : if err := l.Close(); err != nil {
637 0 : return noFileLoaded
638 0 : }
639 :
640 1 : for {
641 1 : l.iterFile = file
642 1 : if file == nil {
643 1 : return noFileLoaded
644 1 : }
645 :
646 1 : l.maybeTriggerCombinedIteration(file, dir)
647 1 : if !file.HasPointKeys {
648 1 : switch dir {
649 1 : case +1:
650 1 : file = l.files.Next()
651 1 : continue
652 1 : case -1:
653 1 : file = l.files.Prev()
654 1 : continue
655 : }
656 : }
657 :
658 1 : switch l.initTableBounds(file) {
659 1 : case -1:
660 1 : // The largest key in the sstable is smaller than the lower bound.
661 1 : if dir < 0 {
662 1 : return noFileLoaded
663 1 : }
664 0 : file = l.files.Next()
665 0 : continue
666 1 : case +1:
667 1 : // The smallest key in the sstable is greater than or equal to the upper
668 1 : // bound.
669 1 : if dir > 0 {
670 1 : return noFileLoaded
671 1 : }
672 0 : file = l.files.Prev()
673 0 : continue
674 : }
675 :
676 1 : var rangeDelIter keyspan.FragmentIterator
677 1 : var iter internalIterator
678 1 : iter, rangeDelIter, l.err = l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts)
679 1 : l.iter = iter
680 1 : if l.err != nil {
681 0 : return noFileLoaded
682 0 : }
683 1 : if rangeDelIter != nil {
684 1 : if fi, ok := iter.(filteredIter); ok {
685 1 : l.filteredIter = fi
686 1 : } else {
687 0 : l.filteredIter = nil
688 0 : }
689 1 : } else {
690 1 : l.filteredIter = nil
691 1 : }
692 1 : if l.rangeDelIterPtr != nil {
693 1 : *l.rangeDelIterPtr = rangeDelIter
694 1 : l.rangeDelIterCopy = rangeDelIter
695 1 : } else if rangeDelIter != nil {
696 1 : rangeDelIter.Close()
697 1 : }
698 1 : if l.boundaryContext != nil {
699 1 : l.boundaryContext.smallestUserKey = file.Smallest.UserKey
700 1 : l.boundaryContext.largestUserKey = file.Largest.UserKey
701 1 : l.boundaryContext.isLargestUserKeyExclusive = file.Largest.IsExclusiveSentinel()
702 1 : }
703 1 : return newFileLoaded
704 : }
705 : }
706 :
707 : // In race builds we verify that the keys returned by levelIter lie within
708 : // [lower,upper).
709 1 : func (l *levelIter) verify(key *InternalKey, val base.LazyValue) (*InternalKey, base.LazyValue) {
710 1 : // Note that invariants.Enabled is a compile time constant, which means the
711 1 : // block of code will be compiled out of normal builds making this method
712 1 : // eligible for inlining. Do not change this to use a variable.
713 1 : if invariants.Enabled && !l.disableInvariants && key != nil {
714 1 : // We allow returning a boundary key that is outside of the lower/upper
715 1 : // bounds as such keys are always range tombstones which will be skipped by
716 1 : // the Iterator.
717 1 : if l.lower != nil && key != l.smallestBoundary && l.cmp(key.UserKey, l.lower) < 0 {
718 0 : l.logger.Fatalf("levelIter %s: lower bound violation: %s < %s\n%s", l.level, key, l.lower, debug.Stack())
719 0 : }
720 1 : if l.upper != nil && key != l.largestBoundary && l.cmp(key.UserKey, l.upper) > 0 {
721 0 : l.logger.Fatalf("levelIter %s: upper bound violation: %s > %s\n%s", l.level, key, l.upper, debug.Stack())
722 0 : }
723 : }
724 1 : return key, val
725 : }
726 :
727 1 : func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) {
728 1 : l.err = nil // clear cached iteration error
729 1 : if l.boundaryContext != nil {
730 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
731 1 : l.boundaryContext.isIgnorableBoundaryKey = false
732 1 : }
733 : // NB: the top-level Iterator has already adjusted key based on
734 : // IterOptions.LowerBound.
735 1 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
736 1 : if loadFileIndicator == noFileLoaded {
737 1 : return nil, base.LazyValue{}
738 1 : }
739 1 : if loadFileIndicator == newFileLoaded {
740 1 : // File changed, so l.iter has changed, and that iterator is not
741 1 : // positioned appropriately.
742 1 : flags = flags.DisableTrySeekUsingNext()
743 1 : }
744 1 : if ikey, val := l.iter.SeekGE(key, flags); ikey != nil {
745 1 : return l.verify(ikey, val)
746 1 : }
747 1 : return l.verify(l.skipEmptyFileForward())
748 : }
749 :
750 : func (l *levelIter) SeekPrefixGE(
751 : prefix, key []byte, flags base.SeekGEFlags,
752 1 : ) (*base.InternalKey, base.LazyValue) {
753 1 : l.err = nil // clear cached iteration error
754 1 : if l.boundaryContext != nil {
755 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
756 1 : l.boundaryContext.isIgnorableBoundaryKey = false
757 1 : }
758 :
759 : // NB: the top-level Iterator has already adjusted key based on
760 : // IterOptions.LowerBound.
761 1 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
762 1 : if loadFileIndicator == noFileLoaded {
763 1 : return nil, base.LazyValue{}
764 1 : }
765 1 : if loadFileIndicator == newFileLoaded {
766 1 : // File changed, so l.iter has changed, and that iterator is not
767 1 : // positioned appropriately.
768 1 : flags = flags.DisableTrySeekUsingNext()
769 1 : }
770 1 : if key, val := l.iter.SeekPrefixGE(prefix, key, flags); key != nil {
771 1 : return l.verify(key, val)
772 1 : }
773 : // When SeekPrefixGE returns nil, we have not necessarily reached the end of
774 : // the sstable. All we know is that a key with prefix does not exist in the
775 : // current sstable. We do know that the key lies within the bounds of the
776 : // table as findFileGE found the table where key <= meta.Largest. We return
777 : // the table's bound with isIgnorableBoundaryKey set.
778 1 : if l.rangeDelIterPtr != nil && *l.rangeDelIterPtr != nil {
779 1 : if l.tableOpts.UpperBound != nil {
780 1 : l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
781 1 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
782 1 : l.largestBoundary = &l.syntheticBoundary
783 1 : if l.boundaryContext != nil {
784 1 : l.boundaryContext.isSyntheticIterBoundsKey = true
785 1 : l.boundaryContext.isIgnorableBoundaryKey = false
786 1 : }
787 1 : return l.verify(l.largestBoundary, base.LazyValue{})
788 : }
789 : // Return the file's largest bound, ensuring this file stays open until
790 : // the mergingIter advances beyond the file's bounds. We set
791 : // isIgnorableBoundaryKey to signal that the actual key returned should
792 : // be ignored, and does not represent a real key in the database.
793 1 : l.largestBoundary = &l.iterFile.LargestPointKey
794 1 : if l.boundaryContext != nil {
795 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
796 1 : l.boundaryContext.isIgnorableBoundaryKey = true
797 1 : }
798 1 : return l.verify(l.largestBoundary, base.LazyValue{})
799 : }
800 : // It is possible that we are here because bloom filter matching failed. In
801 : // that case it is likely that all keys matching the prefix are wholly
802 : // within the current file and cannot be in the subsequent file. In that
803 : // case we don't want to go to the next file, since loading and seeking in
804 : // there has some cost. Additionally, for sparse key spaces, loading the
805 : // next file will defeat the optimization for the next SeekPrefixGE that is
806 : // called with flags.TrySeekUsingNext(), since for sparse key spaces it is
807 : // likely that the next key will also be contained in the current file.
808 1 : var n int
809 1 : if l.split != nil {
810 1 : // If the split function is specified, calculate the prefix length accordingly.
811 1 : n = l.split(l.iterFile.LargestPointKey.UserKey)
812 1 : } else {
813 0 : // If the split function is not specified, the entire key is used as the
814 0 : // prefix. This case can occur when getIter uses SeekPrefixGE.
815 0 : n = len(l.iterFile.LargestPointKey.UserKey)
816 0 : }
817 1 : if l.cmp(prefix, l.iterFile.LargestPointKey.UserKey[:n]) < 0 {
818 1 : return nil, base.LazyValue{}
819 1 : }
820 1 : return l.verify(l.skipEmptyFileForward())
821 : }
822 :
823 1 : func (l *levelIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) {
824 1 : l.err = nil // clear cached iteration error
825 1 : if l.boundaryContext != nil {
826 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
827 1 : l.boundaryContext.isIgnorableBoundaryKey = false
828 1 : }
829 :
830 : // NB: the top-level Iterator has already adjusted key based on
831 : // IterOptions.UpperBound.
832 1 : if l.loadFile(l.findFileLT(key, flags), -1) == noFileLoaded {
833 1 : return nil, base.LazyValue{}
834 1 : }
835 1 : if key, val := l.iter.SeekLT(key, flags); key != nil {
836 1 : return l.verify(key, val)
837 1 : }
838 1 : return l.verify(l.skipEmptyFileBackward())
839 : }
840 :
841 1 : func (l *levelIter) First() (*InternalKey, base.LazyValue) {
842 1 : l.err = nil // clear cached iteration error
843 1 : if l.boundaryContext != nil {
844 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
845 1 : l.boundaryContext.isIgnorableBoundaryKey = false
846 1 : }
847 :
848 : // NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is
849 : // set.
850 1 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
851 1 : return nil, base.LazyValue{}
852 1 : }
853 1 : if key, val := l.iter.First(); key != nil {
854 1 : return l.verify(key, val)
855 1 : }
856 1 : return l.verify(l.skipEmptyFileForward())
857 : }
858 :
859 1 : func (l *levelIter) Last() (*InternalKey, base.LazyValue) {
860 1 : l.err = nil // clear cached iteration error
861 1 : if l.boundaryContext != nil {
862 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
863 1 : l.boundaryContext.isIgnorableBoundaryKey = false
864 1 : }
865 :
866 : // NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is
867 : // set.
868 1 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
869 1 : return nil, base.LazyValue{}
870 1 : }
871 1 : if key, val := l.iter.Last(); key != nil {
872 1 : return l.verify(key, val)
873 1 : }
874 1 : return l.verify(l.skipEmptyFileBackward())
875 : }
876 :
877 1 : func (l *levelIter) Next() (*InternalKey, base.LazyValue) {
878 1 : if l.err != nil || l.iter == nil {
879 0 : return nil, base.LazyValue{}
880 0 : }
881 1 : if l.boundaryContext != nil {
882 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
883 1 : l.boundaryContext.isIgnorableBoundaryKey = false
884 1 : }
885 :
886 1 : switch {
887 1 : case l.largestBoundary != nil:
888 1 : if l.tableOpts.UpperBound != nil {
889 0 : // The UpperBound was within this file, so don't load the next
890 0 : // file. We leave the largestBoundary unchanged so that subsequent
891 0 : // calls to Next() stay at this file. If a Seek/First/Last call is
892 0 : // made and this file continues to be relevant, loadFile() will
893 0 : // set the largestBoundary to nil.
894 0 : if l.rangeDelIterPtr != nil {
895 0 : *l.rangeDelIterPtr = nil
896 0 : }
897 0 : return nil, base.LazyValue{}
898 : }
899 : // We're stepping past the boundary key, so now we can load the next file.
900 1 : if l.loadFile(l.files.Next(), +1) != noFileLoaded {
901 1 : if key, val := l.iter.First(); key != nil {
902 1 : return l.verify(key, val)
903 1 : }
904 1 : return l.verify(l.skipEmptyFileForward())
905 : }
906 1 : return nil, base.LazyValue{}
907 :
908 1 : default:
909 1 : // Reset the smallest boundary since we're moving away from it.
910 1 : l.smallestBoundary = nil
911 1 : if key, val := l.iter.Next(); key != nil {
912 1 : return l.verify(key, val)
913 1 : }
914 : }
915 1 : return l.verify(l.skipEmptyFileForward())
916 : }
917 :
918 1 : func (l *levelIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
919 1 : if l.err != nil || l.iter == nil {
920 0 : return nil, base.LazyValue{}
921 0 : }
922 1 : if l.boundaryContext != nil {
923 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
924 1 : l.boundaryContext.isIgnorableBoundaryKey = false
925 1 : }
926 :
927 1 : switch {
928 1 : case l.largestBoundary != nil:
929 1 : if l.tableOpts.UpperBound != nil {
930 0 : // The UpperBound was within this file, so don't load the next
931 0 : // file. We leave the largestBoundary unchanged so that subsequent
932 0 : // calls to Next() stay at this file. If a Seek/First/Last call is
933 0 : // made and this file continues to be relevant, loadFile() will
934 0 : // set the largestBoundary to nil.
935 0 : if l.rangeDelIterPtr != nil {
936 0 : *l.rangeDelIterPtr = nil
937 0 : }
938 0 : return nil, base.LazyValue{}
939 : }
940 : // We're stepping past the boundary key, so we need to load a later
941 : // file.
942 :
943 1 : default:
944 1 : // Reset the smallest boundary since we're moving away from it.
945 1 : l.smallestBoundary = nil
946 1 :
947 1 : if key, val := l.iter.NextPrefix(succKey); key != nil {
948 1 : return l.verify(key, val)
949 1 : }
950 : // Fall through to seeking.
951 : }
952 :
953 : // Seek the manifest level iterator using TrySeekUsingNext=true and
954 : // RelativeSeek=true so that we take advantage of the knowledge that
955 : // `succKey` can only be contained in later files.
956 1 : metadataSeekFlags := base.SeekGEFlagsNone.EnableTrySeekUsingNext().EnableRelativeSeek()
957 1 : if l.loadFile(l.findFileGE(succKey, metadataSeekFlags), +1) != noFileLoaded {
958 1 : // NB: The SeekGE on the file's iterator must not set TrySeekUsingNext,
959 1 : // because l.iter is unpositioned.
960 1 : if key, val := l.iter.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
961 1 : return l.verify(key, val)
962 1 : }
963 1 : return l.verify(l.skipEmptyFileForward())
964 : }
965 1 : return nil, base.LazyValue{}
966 : }
967 :
968 1 : func (l *levelIter) Prev() (*InternalKey, base.LazyValue) {
969 1 : if l.err != nil || l.iter == nil {
970 0 : return nil, base.LazyValue{}
971 0 : }
972 1 : if l.boundaryContext != nil {
973 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
974 1 : l.boundaryContext.isIgnorableBoundaryKey = false
975 1 : }
976 :
977 1 : switch {
978 1 : case l.smallestBoundary != nil:
979 1 : if l.tableOpts.LowerBound != nil {
980 0 : // The LowerBound was within this file, so don't load the previous
981 0 : // file. We leave the smallestBoundary unchanged so that
982 0 : // subsequent calls to Prev() stay at this file. If a
983 0 : // Seek/First/Last call is made and this file continues to be
984 0 : // relevant, loadFile() will set the smallestBoundary to nil.
985 0 : if l.rangeDelIterPtr != nil {
986 0 : *l.rangeDelIterPtr = nil
987 0 : }
988 0 : return nil, base.LazyValue{}
989 : }
990 : // We're stepping past the boundary key, so now we can load the prev file.
991 1 : if l.loadFile(l.files.Prev(), -1) != noFileLoaded {
992 1 : if key, val := l.iter.Last(); key != nil {
993 1 : return l.verify(key, val)
994 1 : }
995 1 : return l.verify(l.skipEmptyFileBackward())
996 : }
997 1 : return nil, base.LazyValue{}
998 :
999 1 : default:
1000 1 : // Reset the largest boundary since we're moving away from it.
1001 1 : l.largestBoundary = nil
1002 1 : if key, val := l.iter.Prev(); key != nil {
1003 1 : return l.verify(key, val)
1004 1 : }
1005 : }
1006 1 : return l.verify(l.skipEmptyFileBackward())
1007 : }
1008 :
1009 1 : func (l *levelIter) skipEmptyFileForward() (*InternalKey, base.LazyValue) {
1010 1 : var key *InternalKey
1011 1 : var val base.LazyValue
1012 1 : // The first iteration of this loop starts with an already exhausted
1013 1 : // l.iter. The reason for the exhaustion is either that we iterated to the
1014 1 : // end of the sstable, or our iteration was terminated early due to the
1015 1 : // presence of an upper-bound or the use of SeekPrefixGE. If
1016 1 : // l.rangeDelIterPtr is non-nil, we may need to pretend the iterator is
1017 1 : // not exhausted to allow for the merging to finish consuming the
1018 1 : // l.rangeDelIterPtr before levelIter switches the rangeDelIter from
1019 1 : // under it. This pretense is done by either generating a synthetic
1020 1 : // boundary key or returning the largest key of the file, depending on the
1021 1 : // exhaustion reason.
1022 1 :
1023 1 : // Subsequent iterations will examine consecutive files such that the first
1024 1 : // file that does not have an exhausted iterator causes the code to return
1025 1 : // that key, else the behavior described above if there is a corresponding
1026 1 : // rangeDelIterPtr.
1027 1 : for ; key == nil; key, val = l.iter.First() {
1028 1 : if l.rangeDelIterPtr != nil {
1029 1 : // We're being used as part of a mergingIter and we've exhausted the
1030 1 : // current sstable. If an upper bound is present and the upper bound lies
1031 1 : // within the current sstable, then we will have reached the upper bound
1032 1 : // rather than the end of the sstable. We need to return a synthetic
1033 1 : // boundary key so that mergingIter can use the range tombstone iterator
1034 1 : // until the other levels have reached this boundary.
1035 1 : //
1036 1 : // It is safe to set the boundary key to the UpperBound user key
1037 1 : // with the RANGEDEL sentinel since it is the smallest InternalKey
1038 1 : // that matches the exclusive upper bound, and does not represent
1039 1 : // a real key.
1040 1 : if l.tableOpts.UpperBound != nil {
1041 1 : if *l.rangeDelIterPtr != nil {
1042 1 : l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
1043 1 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
1044 1 : l.largestBoundary = &l.syntheticBoundary
1045 1 : if l.boundaryContext != nil {
1046 1 : l.boundaryContext.isSyntheticIterBoundsKey = true
1047 1 : }
1048 1 : return l.largestBoundary, base.LazyValue{}
1049 : }
1050 : // Else there are no range deletions in this sstable. This
1051 : // helps with performance when many levels are populated with
1052 : // sstables and most don't have any actual keys within the
1053 : // bounds.
1054 1 : return nil, base.LazyValue{}
1055 : }
1056 : // If the boundary is a range deletion tombstone, return that key.
1057 1 : if l.iterFile.LargestPointKey.Kind() == InternalKeyKindRangeDelete {
1058 1 : l.largestBoundary = &l.iterFile.LargestPointKey
1059 1 : if l.boundaryContext != nil {
1060 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1061 1 : }
1062 1 : return l.largestBoundary, base.LazyValue{}
1063 : }
1064 : // If the last point iterator positioning op might've skipped keys,
1065 : // it's possible the file's range deletions are still relevant to
1066 : // other levels. Return the largest boundary as a special ignorable
1067 : // marker to avoid advancing to the next file.
1068 : //
1069 : // The sstable iterator cannot guarantee that keys were skipped. A
1070 : // SeekGE that lands on a index separator k only knows that the
1071 : // block at the index entry contains keys ≤ k. We can't know whether
1072 : // there were actually keys between the seek key and the index
1073 : // separator key. If the block is then excluded due to block
1074 : // property filters, the iterator does not know whether keys were
1075 : // actually skipped by the block's exclusion.
1076 : //
1077 : // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
1078 : // it's possible l.iterFile.Largest was already returned. Returning
1079 : // l.iterFile.Largest again is a violation of the strict
1080 : // monotonicity normally provided. The mergingIter's heap can
1081 : // tolerate this repeat key and in this case will keep the level at
1082 : // the top of the heap and immediately skip the entry, advancing to
1083 : // the next file.
1084 1 : if *l.rangeDelIterPtr != nil && l.filteredIter != nil &&
1085 1 : l.filteredIter.MaybeFilteredKeys() {
1086 1 : l.largestBoundary = &l.iterFile.Largest
1087 1 : if l.boundaryContext != nil {
1088 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1089 1 : }
1090 1 : return l.largestBoundary, base.LazyValue{}
1091 : }
1092 : }
1093 :
1094 : // Current file was exhausted. Move to the next file.
1095 1 : if l.loadFile(l.files.Next(), +1) == noFileLoaded {
1096 1 : return nil, base.LazyValue{}
1097 1 : }
1098 : }
1099 1 : return key, val
1100 : }
1101 :
1102 1 : func (l *levelIter) skipEmptyFileBackward() (*InternalKey, base.LazyValue) {
1103 1 : var key *InternalKey
1104 1 : var val base.LazyValue
1105 1 : // The first iteration of this loop starts with an already exhausted
1106 1 : // l.iter. The reason for the exhaustion is either that we iterated to the
1107 1 : // end of the sstable, or our iteration was terminated early due to the
1108 1 : // presence of a lower-bound. If l.rangeDelIterPtr is non-nil, we may need
1109 1 : // to pretend the iterator is not exhausted to allow for the merging to
1110 1 : // finish consuming the l.rangeDelIterPtr before levelIter switches the
1111 1 : // rangeDelIter from under it. This pretense is done by either generating
1112 1 : // a synthetic boundary key or returning the smallest key of the file,
1113 1 : // depending on the exhaustion reason.
1114 1 :
1115 1 : // Subsequent iterations will examine consecutive files such that the first
1116 1 : // file that does not have an exhausted iterator causes the code to return
1117 1 : // that key, else the behavior described above if there is a corresponding
1118 1 : // rangeDelIterPtr.
1119 1 : for ; key == nil; key, val = l.iter.Last() {
1120 1 : if l.rangeDelIterPtr != nil {
1121 1 : // We're being used as part of a mergingIter and we've exhausted the
1122 1 : // current sstable. If a lower bound is present and the lower bound lies
1123 1 : // within the current sstable, then we will have reached the lower bound
1124 1 : // rather than the beginning of the sstable. We need to return a
1125 1 : // synthetic boundary key so that mergingIter can use the range tombstone
1126 1 : // iterator until the other levels have reached this boundary.
1127 1 : //
1128 1 : // It is safe to set the boundary key to the LowerBound user key
1129 1 : // with the RANGEDEL sentinel since it is the smallest InternalKey
1130 1 : // that is within the inclusive lower bound, and does not
1131 1 : // represent a real key.
1132 1 : if l.tableOpts.LowerBound != nil {
1133 1 : if *l.rangeDelIterPtr != nil {
1134 1 : l.syntheticBoundary.UserKey = l.tableOpts.LowerBound
1135 1 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
1136 1 : l.smallestBoundary = &l.syntheticBoundary
1137 1 : if l.boundaryContext != nil {
1138 1 : l.boundaryContext.isSyntheticIterBoundsKey = true
1139 1 : }
1140 1 : return l.smallestBoundary, base.LazyValue{}
1141 : }
1142 : // Else there are no range deletions in this sstable. This
1143 : // helps with performance when many levels are populated with
1144 : // sstables and most don't have any actual keys within the
1145 : // bounds.
1146 1 : return nil, base.LazyValue{}
1147 : }
1148 : // If the boundary is a range deletion tombstone, return that key.
1149 1 : if l.iterFile.SmallestPointKey.Kind() == InternalKeyKindRangeDelete {
1150 1 : l.smallestBoundary = &l.iterFile.SmallestPointKey
1151 1 : if l.boundaryContext != nil {
1152 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1153 1 : }
1154 1 : return l.smallestBoundary, base.LazyValue{}
1155 : }
1156 : // If the last point iterator positioning op skipped keys, it's
1157 : // possible the file's range deletions are still relevant to other
1158 : // levels. Return the smallest boundary as a special ignorable key
1159 : // to avoid advancing to the next file.
1160 : //
1161 : // The sstable iterator cannot guarantee that keys were skipped. A
1162 : // SeekGE that lands on a index separator k only knows that the
1163 : // block at the index entry contains keys ≤ k. We can't know whether
1164 : // there were actually keys between the seek key and the index
1165 : // separator key. If the block is then excluded due to block
1166 : // property filters, the iterator does not know whether keys were
1167 : // actually skipped by the block's exclusion.
1168 : //
1169 : // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
1170 : // it's possible l.iterFile.Smallest was already returned. Returning
1171 : // l.iterFile.Smallest again is a violation of the strict
1172 : // monotonicity normally provided. The mergingIter's heap can
1173 : // tolerate this repeat key and in this case will keep the level at
1174 : // the top of the heap and immediately skip the entry, advancing to
1175 : // the next file.
1176 1 : if *l.rangeDelIterPtr != nil && l.filteredIter != nil && l.filteredIter.MaybeFilteredKeys() {
1177 1 : l.smallestBoundary = &l.iterFile.Smallest
1178 1 : if l.boundaryContext != nil {
1179 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1180 1 : }
1181 1 : return l.smallestBoundary, base.LazyValue{}
1182 : }
1183 : }
1184 :
1185 : // Current file was exhausted. Move to the previous file.
1186 1 : if l.loadFile(l.files.Prev(), -1) == noFileLoaded {
1187 1 : return nil, base.LazyValue{}
1188 1 : }
1189 : }
1190 1 : return key, val
1191 : }
1192 :
1193 1 : func (l *levelIter) Error() error {
1194 1 : if l.err != nil || l.iter == nil {
1195 1 : return l.err
1196 1 : }
1197 1 : return l.iter.Error()
1198 : }
1199 :
1200 1 : func (l *levelIter) Close() error {
1201 1 : if l.iter != nil {
1202 1 : l.err = l.iter.Close()
1203 1 : l.iter = nil
1204 1 : }
1205 1 : if l.rangeDelIterPtr != nil {
1206 1 : if t := l.rangeDelIterCopy; t != nil {
1207 1 : l.err = firstError(l.err, t.Close())
1208 1 : }
1209 1 : *l.rangeDelIterPtr = nil
1210 1 : l.rangeDelIterCopy = nil
1211 : }
1212 1 : return l.err
1213 : }
1214 :
1215 1 : func (l *levelIter) SetBounds(lower, upper []byte) {
1216 1 : l.lower = lower
1217 1 : l.upper = upper
1218 1 :
1219 1 : if l.iter == nil {
1220 1 : return
1221 1 : }
1222 :
1223 : // Update tableOpts.{Lower,Upper}Bound in case the new boundaries fall within
1224 : // the boundaries of the current table.
1225 1 : if l.initTableBounds(l.iterFile) != 0 {
1226 1 : // The table does not overlap the bounds. Close() will set levelIter.err if
1227 1 : // an error occurs.
1228 1 : _ = l.Close()
1229 1 : return
1230 1 : }
1231 :
1232 1 : l.iter.SetBounds(l.tableOpts.LowerBound, l.tableOpts.UpperBound)
1233 : }
1234 :
1235 1 : func (l *levelIter) String() string {
1236 1 : if l.iterFile != nil {
1237 1 : return fmt.Sprintf("%s: fileNum=%s", l.level, l.iter.String())
1238 1 : }
1239 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.level)
1240 : }
1241 :
1242 : var _ internalIterator = &levelIter{}
|