Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "runtime/debug"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/internal/keyspan"
15 : "github.com/cockroachdb/pebble/internal/manifest"
16 : "github.com/cockroachdb/pebble/sstable"
17 : )
18 :
19 : type internalIterOpts struct {
20 : bytesIterated *uint64
21 : bufferPool *sstable.BufferPool
22 : stats *base.InternalIteratorStats
23 : boundLimitedFilter sstable.BoundLimitedBlockPropertyFilter
24 : }
25 :
26 : // levelIter provides a merged view of the sstables in a level.
27 : //
28 : // levelIter is used during compaction and as part of the Iterator
29 : // implementation. When used as part of the Iterator implementation, level
30 : // iteration needs to "pause" at sstable boundaries if a range deletion
31 : // tombstone is the source of that boundary. We know if a range tombstone is
32 : // the smallest or largest key in a file because the kind will be
33 : // InternalKeyKindRangeDeletion. If the boundary key is a range deletion
34 : // tombstone, we materialize a fake entry to return from levelIter. This
35 : // prevents mergingIter from advancing past the sstable until the sstable
36 : // contains the smallest (or largest for reverse iteration) key in the merged
37 : // heap. Note that mergingIter treats a range deletion tombstone returned by
38 : // the point iterator as a no-op.
39 : //
40 : // SeekPrefixGE presents the need for a second type of pausing. If an sstable
41 : // iterator returns "not found" for a SeekPrefixGE operation, we don't want to
42 : // advance to the next sstable as the "not found" does not indicate that all of
43 : // the keys in the sstable are less than the search key. Advancing to the next
44 : // sstable would cause us to skip over range tombstones, violating
45 : // correctness. Instead, SeekPrefixGE creates a synthetic boundary key with the
46 : // kind InternalKeyKindRangeDeletion which will be used to pause the levelIter
47 : // at the sstable until the mergingIter is ready to advance past it.
48 : type levelIter struct {
49 : // The context is stored here since (a) iterators are expected to be
50 : // short-lived (since they pin sstables), (b) plumbing a context into every
51 : // method is very painful, (c) they do not (yet) respect context
52 : // cancellation and are only used for tracing.
53 : ctx context.Context
54 : logger Logger
55 : comparer *Comparer
56 : cmp Compare
57 : split Split
58 : // The lower/upper bounds for iteration as specified at creation or the most
59 : // recent call to SetBounds.
60 : lower []byte
61 : upper []byte
62 : // The iterator options for the currently open table. If
63 : // tableOpts.{Lower,Upper}Bound are nil, the corresponding iteration boundary
64 : // does not lie within the table bounds.
65 : tableOpts IterOptions
66 : // The LSM level this levelIter is initialized for.
67 : level manifest.Level
68 : // The keys to return when iterating past an sstable boundary and that
69 : // boundary is a range deletion tombstone. The boundary could be smallest
70 : // (i.e. arrived at with Prev), or largest (arrived at with Next).
71 : smallestBoundary *InternalKey
72 : largestBoundary *InternalKey
73 : // combinedIterState may be set when a levelIter is used during user
74 : // iteration. Although levelIter only iterates over point keys, it's also
75 : // responsible for lazily constructing the combined range & point iterator
76 : // when it observes a file containing range keys. If the combined iter
77 : // state's initialized field is true, the iterator is already using combined
78 : // iterator, OR the iterator is not configured to use combined iteration. If
79 : // it's false, the levelIter must set the `triggered` and `key` fields when
80 : // the levelIter passes over a file containing range keys. See the
81 : // lazyCombinedIter for more details.
82 : combinedIterState *combinedIterState
83 : // A synthetic boundary key to return when SeekPrefixGE finds an sstable
84 : // which doesn't contain the search key, but which does contain range
85 : // tombstones.
86 : syntheticBoundary InternalKey
87 : // The iter for the current file. It is nil under any of the following conditions:
88 : // - files.Current() == nil
89 : // - err != nil
90 : // - some other constraint, like the bounds in opts, caused the file at index to not
91 : // be relevant to the iteration.
92 : iter internalIterator
93 : // iterFile holds the current file. It is always equal to l.files.Current().
94 : iterFile *fileMetadata
95 : // filteredIter is an optional interface that may be implemented by internal
96 : // iterators that perform filtering of keys. When a new file's iterator is
97 : // opened, it's tested to see if it implements filteredIter. If it does,
98 : // it's stored here to allow the level iterator to recognize when keys were
99 : // omitted from iteration results due to filtering. This is important when a
100 : // file contains range deletions that may delete keys from other files. The
101 : // levelIter must not advance to the next file until the mergingIter has
102 : // advanced beyond the file's bounds. See
103 : // levelIterBoundaryContext.isIgnorableBoundaryKey.
104 : filteredIter filteredIter
105 : newIters tableNewIters
106 : // When rangeDelIterPtr != nil, the caller requires that *rangeDelIterPtr must
107 : // point to a range del iterator corresponding to the current file. When this
108 : // iterator returns nil, *rangeDelIterPtr should also be set to nil. Whenever
109 : // a non-nil internalIterator is placed in rangeDelIterPtr, a copy is placed
110 : // in rangeDelIterCopy. This is done for the following special case:
111 : // when this iterator returns nil because of exceeding the bounds, we don't
112 : // close iter and *rangeDelIterPtr since we could reuse it in the next seek. But
113 : // we need to set *rangeDelIterPtr to nil because of the aforementioned contract.
114 : // This copy is used to revive the *rangeDelIterPtr in the case of reuse.
115 : rangeDelIterPtr *keyspan.FragmentIterator
116 : rangeDelIterCopy keyspan.FragmentIterator
117 : files manifest.LevelIterator
118 : err error
119 :
120 : // Pointer into this level's entry in `mergingIterLevel::levelIterBoundaryContext`.
121 : // We populate it with the corresponding bounds for the currently opened file. It is used for
122 : // two purposes (described for forward iteration. The explanation for backward iteration is
123 : // similar.)
124 : // - To limit the optimization that seeks lower-level iterators past keys shadowed by a range
125 : // tombstone. Limiting this seek to the file largestUserKey is necessary since
126 : // range tombstones are stored untruncated, while they only apply to keys within their
127 : // containing file's boundaries. For a detailed example, see comment above `mergingIter`.
128 : // - To constrain the tombstone to act-within the bounds of the sstable when checking
129 : // containment. For forward iteration we need the smallestUserKey.
130 : //
131 : // An example is sstable bounds [c#8, g#12] containing a tombstone [b, i)#7.
132 : // - When doing a SeekGE to user key X, the levelIter is at this sstable because X is either within
133 : // the sstable bounds or earlier than the start of the sstable (and there is no sstable in
134 : // between at this level). If X >= smallestUserKey, and the tombstone [b, i) contains X,
135 : // it is correct to SeekGE the sstables at lower levels to min(g, i) (i.e., min of
136 : // largestUserKey, tombstone.End) since any user key preceding min(g, i) must be covered by this
137 : // tombstone (since it cannot have a version younger than this tombstone as it is at a lower
138 : // level). And even if X = smallestUserKey or equal to the start user key of the tombstone,
139 : // if the above conditions are satisfied we know that the internal keys corresponding to X at
140 : // lower levels must have a version smaller than that in this file (again because of the level
141 : // argument). So we don't need to use sequence numbers for this comparison.
142 : // - When checking whether this tombstone deletes internal key X we know that the levelIter is at this
143 : // sstable so (repeating the above) X.UserKey is either within the sstable bounds or earlier than the
144 : // start of the sstable (and there is no sstable in between at this level).
145 : // - X is at at a lower level. If X.UserKey >= smallestUserKey, and the tombstone contains
146 : // X.UserKey, we know X is deleted. This argument also works when X is a user key (we use
147 : // it when seeking to test whether a user key is deleted).
148 : // - X is at the same level. X must be within the sstable bounds of the tombstone so the
149 : // X.UserKey >= smallestUserKey comparison is trivially true. In addition to the tombstone containing
150 : // X we need to compare the sequence number of X and the tombstone (we don't need to look
151 : // at how this tombstone is truncated to act-within the file bounds, which are InternalKeys,
152 : // since X and the tombstone are from the same file).
153 : //
154 : // Iterating backwards has one more complication when checking whether a tombstone deletes
155 : // internal key X at a lower level (the construction we do here also works for a user key X).
156 : // Consider sstable bounds [c#8, g#InternalRangeDelSentinel] containing a tombstone [b, i)#7.
157 : // If we are positioned at key g#10 at a lower sstable, the tombstone we will see is [b, i)#7,
158 : // since the higher sstable is positioned at a key <= g#10. We should not use this tombstone
159 : // to delete g#10. This requires knowing that the largestUserKey is a range delete sentinel,
160 : // which we set in a separate bool below.
161 : //
162 : // These fields differs from the `*Boundary` fields in a few ways:
163 : // - `*Boundary` is only populated when the iterator is positioned exactly on the sentinel key.
164 : // - `*Boundary` can hold either the lower- or upper-bound, depending on the iterator direction.
165 : // - `*Boundary` is not exposed to the next higher-level iterator, i.e., `mergingIter`.
166 : boundaryContext *levelIterBoundaryContext
167 :
168 : // internalOpts holds the internal iterator options to pass to the table
169 : // cache when constructing new table iterators.
170 : internalOpts internalIterOpts
171 :
172 : // Scratch space for the obsolete keys filter, when there are no other block
173 : // property filters specified. See the performance note where
174 : // IterOptions.PointKeyFilters is declared.
175 : filtersBuf [1]BlockPropertyFilter
176 :
177 : // Disable invariant checks even if they are otherwise enabled. Used by tests
178 : // which construct "impossible" situations (e.g. seeking to a key before the
179 : // lower bound).
180 : disableInvariants bool
181 : }
182 :
183 : // filteredIter is an additional interface implemented by iterators that may
184 : // skip over point keys during iteration. The sstable.Iterator implements this
185 : // interface.
186 : type filteredIter interface {
187 : // MaybeFilteredKeys may be called when an iterator is exhausted, indicating
188 : // whether or not the iterator's last positioning method may have skipped
189 : // any keys due to low-level filters.
190 : //
191 : // When an iterator is configured to use block-property filters, the
192 : // low-level iterator may skip over blocks or whole sstables of keys.
193 : // Implementations that implement skipping must implement this interface.
194 : // Higher-level iterators require it to preserve invariants (eg, a levelIter
195 : // used in a mergingIter must keep the file's range-del iterator open until
196 : // the mergingIter has moved past the file's bounds, even if all of the
197 : // file's point keys were filtered).
198 : //
199 : // MaybeFilteredKeys may always return false positives, that is it may
200 : // return true when no keys were filtered. It should only be called when the
201 : // iterator is exhausted. It must never return false negatives when the
202 : // iterator is exhausted.
203 : MaybeFilteredKeys() bool
204 : }
205 :
206 : // levelIter implements the base.InternalIterator interface.
207 : var _ base.InternalIterator = (*levelIter)(nil)
208 :
209 : // newLevelIter returns a levelIter. It is permissible to pass a nil split
210 : // parameter if the caller is never going to call SeekPrefixGE.
211 : func newLevelIter(
212 : ctx context.Context,
213 : opts IterOptions,
214 : comparer *Comparer,
215 : newIters tableNewIters,
216 : files manifest.LevelIterator,
217 : level manifest.Level,
218 : internalOpts internalIterOpts,
219 1 : ) *levelIter {
220 1 : l := &levelIter{}
221 1 : l.init(ctx, opts, comparer, newIters, files, level, internalOpts)
222 1 : return l
223 1 : }
224 :
225 : func (l *levelIter) init(
226 : ctx context.Context,
227 : opts IterOptions,
228 : comparer *Comparer,
229 : newIters tableNewIters,
230 : files manifest.LevelIterator,
231 : level manifest.Level,
232 : internalOpts internalIterOpts,
233 1 : ) {
234 1 : l.ctx = ctx
235 1 : l.err = nil
236 1 : l.level = level
237 1 : l.logger = opts.getLogger()
238 1 : l.lower = opts.LowerBound
239 1 : l.upper = opts.UpperBound
240 1 : l.tableOpts.TableFilter = opts.TableFilter
241 1 : l.tableOpts.PointKeyFilters = opts.PointKeyFilters
242 1 : if len(opts.PointKeyFilters) == 0 {
243 1 : l.tableOpts.PointKeyFilters = l.filtersBuf[:0:1]
244 1 : }
245 1 : l.tableOpts.UseL6Filters = opts.UseL6Filters
246 1 : l.tableOpts.CategoryAndQoS = opts.CategoryAndQoS
247 1 : l.tableOpts.level = l.level
248 1 : l.tableOpts.snapshotForHideObsoletePoints = opts.snapshotForHideObsoletePoints
249 1 : l.comparer = comparer
250 1 : l.cmp = comparer.Compare
251 1 : l.split = comparer.Split
252 1 : l.iterFile = nil
253 1 : l.newIters = newIters
254 1 : l.files = files
255 1 : l.internalOpts = internalOpts
256 : }
257 :
258 1 : func (l *levelIter) initRangeDel(rangeDelIter *keyspan.FragmentIterator) {
259 1 : l.rangeDelIterPtr = rangeDelIter
260 1 : }
261 :
262 1 : func (l *levelIter) initBoundaryContext(context *levelIterBoundaryContext) {
263 1 : l.boundaryContext = context
264 1 : }
265 :
266 1 : func (l *levelIter) initCombinedIterState(state *combinedIterState) {
267 1 : l.combinedIterState = state
268 1 : }
269 :
270 1 : func (l *levelIter) maybeTriggerCombinedIteration(file *fileMetadata, dir int) {
271 1 : // If we encounter a file that contains range keys, we may need to
272 1 : // trigger a switch to combined range-key and point-key iteration,
273 1 : // if the *pebble.Iterator is configured for it. This switch is done
274 1 : // lazily because range keys are intended to be rare, and
275 1 : // constructing the range-key iterator substantially adds to the
276 1 : // cost of iterator construction and seeking.
277 1 : //
278 1 : // If l.combinedIterState.initialized is already true, either the
279 1 : // iterator is already using combined iteration or the iterator is not
280 1 : // configured to observe range keys. Either way, there's nothing to do.
281 1 : // If false, trigger the switch to combined iteration, using the the
282 1 : // file's bounds to seek the range-key iterator appropriately.
283 1 : //
284 1 : // We only need to trigger combined iteration if the file contains
285 1 : // RangeKeySets: if there are only Unsets and Dels, the user will observe no
286 1 : // range keys regardless. If this file has table stats available, they'll
287 1 : // tell us whether the file has any RangeKeySets. Otherwise, we must
288 1 : // fallback to assuming it does if HasRangeKeys=true.
289 1 : if file != nil && file.HasRangeKeys && l.combinedIterState != nil && !l.combinedIterState.initialized &&
290 1 : (l.upper == nil || l.cmp(file.SmallestRangeKey.UserKey, l.upper) < 0) &&
291 1 : (l.lower == nil || l.cmp(file.LargestRangeKey.UserKey, l.lower) > 0) &&
292 1 : (!file.StatsValid() || file.Stats.NumRangeKeySets > 0) {
293 1 : // The file contains range keys, and we're not using combined iteration yet.
294 1 : // Trigger a switch to combined iteration. It's possible that a switch has
295 1 : // already been triggered if multiple levels encounter files containing
296 1 : // range keys while executing a single mergingIter operation. In this case,
297 1 : // we need to compare the existing key recorded to l.combinedIterState.key,
298 1 : // adjusting it if our key is smaller (forward iteration) or larger
299 1 : // (backward iteration) than the existing key.
300 1 : //
301 1 : // These key comparisons are only required during a single high-level
302 1 : // iterator operation. When the high-level iter op completes,
303 1 : // iinitialized will be true, and future calls to this function will be
304 1 : // no-ops.
305 1 : switch dir {
306 1 : case +1:
307 1 : if !l.combinedIterState.triggered {
308 1 : l.combinedIterState.triggered = true
309 1 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
310 1 : } else if l.cmp(l.combinedIterState.key, file.SmallestRangeKey.UserKey) > 0 {
311 1 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
312 1 : }
313 1 : case -1:
314 1 : if !l.combinedIterState.triggered {
315 1 : l.combinedIterState.triggered = true
316 1 : l.combinedIterState.key = file.LargestRangeKey.UserKey
317 1 : } else if l.cmp(l.combinedIterState.key, file.LargestRangeKey.UserKey) < 0 {
318 1 : l.combinedIterState.key = file.LargestRangeKey.UserKey
319 1 : }
320 : }
321 : }
322 : }
323 :
324 1 : func (l *levelIter) findFileGE(key []byte, flags base.SeekGEFlags) *fileMetadata {
325 1 : // Find the earliest file whose largest key is >= key.
326 1 :
327 1 : // NB: if flags.TrySeekUsingNext()=true, the levelIter must respect it. If
328 1 : // the levelIter is positioned at the key P, it must return a key ≥ P. If
329 1 : // used within a merging iterator, the merging iterator will depend on the
330 1 : // levelIter only moving forward to maintain heap invariants.
331 1 :
332 1 : // Ordinarily we seek the LevelIterator using SeekGE. In some instances, we
333 1 : // Next instead. In other instances, we try Next-ing first, falling back to
334 1 : // seek:
335 1 : // a) flags.TrySeekUsingNext(): The top-level Iterator knows we're seeking
336 1 : // to a key later than the current iterator position. We don't know how
337 1 : // much later the seek key is, so it's possible there are many sstables
338 1 : // between the current position and the seek key. However in most real-
339 1 : // world use cases, the seek key is likely to be nearby. Rather than
340 1 : // performing a log(N) seek through the file metadata, we next a few
341 1 : // times from from our existing location. If we don't find a file whose
342 1 : // largest is >= key within a few nexts, we fall back to seeking.
343 1 : //
344 1 : // Note that in this case, the file returned by findFileGE may be
345 1 : // different than the file returned by a raw binary search (eg, when
346 1 : // TrySeekUsingNext=false). This is possible because the most recent
347 1 : // positioning operation may have already determined that previous
348 1 : // files' keys that are ≥ key are all deleted. This information is
349 1 : // encoded within the iterator's current iterator position and is
350 1 : // unavailable to a fresh binary search.
351 1 : //
352 1 : // b) flags.RelativeSeek(): The merging iterator decided to re-seek this
353 1 : // level according to a range tombstone. When lazy combined iteration
354 1 : // is enabled, the level iterator is responsible for watching for
355 1 : // files containing range keys and triggering the switch to combined
356 1 : // iteration when such a file is observed. If a range deletion was
357 1 : // observed in a higher level causing the merging iterator to seek the
358 1 : // level to the range deletion's end key, we need to check whether all
359 1 : // of the files between the old position and the new position contain
360 1 : // any range keys.
361 1 : //
362 1 : // In this scenario, we don't seek the LevelIterator and instead we
363 1 : // Next it, one file at a time, checking each for range keys. The
364 1 : // merging iterator sets this flag to inform us that we're moving
365 1 : // forward relative to the existing position and that we must examine
366 1 : // each intermediate sstable's metadata for lazy-combined iteration.
367 1 : // In this case, we only Next and never Seek. We set nextsUntilSeek=-1
368 1 : // to signal this intention.
369 1 : //
370 1 : // NB: At most one of flags.RelativeSeek() and flags.TrySeekUsingNext() may
371 1 : // be set, because the merging iterator re-seeks relative seeks with
372 1 : // explicitly only the RelativeSeek flag set.
373 1 : var nextsUntilSeek int
374 1 : var nextInsteadOfSeek bool
375 1 : if flags.TrySeekUsingNext() {
376 1 : nextInsteadOfSeek = true
377 1 : nextsUntilSeek = 4 // arbitrary
378 1 : }
379 1 : if flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized {
380 1 : nextInsteadOfSeek = true
381 1 : nextsUntilSeek = -1
382 1 : }
383 :
384 1 : var m *fileMetadata
385 1 : if nextInsteadOfSeek {
386 1 : m = l.iterFile
387 1 : } else {
388 1 : m = l.files.SeekGE(l.cmp, key)
389 1 : }
390 : // The below loop has a bit of an unusual organization. There are several
391 : // conditions under which we need to Next to a later file. If none of those
392 : // conditions are met, the file in `m` is okay to return. The loop body is
393 : // structured with a series of if statements, each of which may continue the
394 : // loop to the next file. If none of the statements are met, the end of the
395 : // loop body is a break.
396 1 : for m != nil {
397 1 : if m.HasRangeKeys {
398 1 : l.maybeTriggerCombinedIteration(m, +1)
399 1 :
400 1 : // Some files may only contain range keys, which we can skip.
401 1 : // NB: HasPointKeys=true if the file contains any points or range
402 1 : // deletions (which delete points).
403 1 : if !m.HasPointKeys {
404 1 : m = l.files.Next()
405 1 : continue
406 : }
407 : }
408 :
409 : // This file has point keys.
410 : //
411 : // However, there are a couple reasons why `m` may not be positioned ≥
412 : // `key` yet:
413 : //
414 : // 1. If SeekGE(key) landed on a file containing range keys, the file
415 : // may contain range keys ≥ `key` but no point keys ≥ `key`.
416 : // 2. When nexting instead of seeking, we must check to see whether
417 : // we've nexted sufficiently far, or we need to next again.
418 : //
419 : // If the file does not contain point keys ≥ `key`, next to continue
420 : // looking for a file that does.
421 1 : if (m.HasRangeKeys || nextInsteadOfSeek) && l.cmp(m.LargestPointKey.UserKey, key) < 0 {
422 1 : // If nextInsteadOfSeek is set and nextsUntilSeek is non-negative,
423 1 : // the iterator has been nexting hoping to discover the relevant
424 1 : // file without seeking. It's exhausted the allotted nextsUntilSeek
425 1 : // and should seek to the sought key.
426 1 : if nextInsteadOfSeek && nextsUntilSeek == 0 {
427 1 : nextInsteadOfSeek = false
428 1 : m = l.files.SeekGE(l.cmp, key)
429 1 : continue
430 1 : } else if nextsUntilSeek > 0 {
431 1 : nextsUntilSeek--
432 1 : }
433 1 : m = l.files.Next()
434 1 : continue
435 : }
436 :
437 : // This file has a point key bound ≥ `key`. But the largest point key
438 : // bound may still be a range deletion sentinel, which is exclusive. In
439 : // this case, the file doesn't actually contain any point keys equal to
440 : // `key`. We next to keep searching for a file that actually contains
441 : // point keys ≥ key.
442 : //
443 : // Additionally, this prevents loading untruncated range deletions from
444 : // a table which can't possibly contain the target key and is required
445 : // for correctness by mergingIter.SeekGE (see the comment in that
446 : // function).
447 1 : if m.LargestPointKey.IsExclusiveSentinel() && l.cmp(m.LargestPointKey.UserKey, key) == 0 {
448 1 : m = l.files.Next()
449 1 : continue
450 : }
451 :
452 : // This file contains point keys ≥ `key`. Break and return it.
453 1 : break
454 : }
455 1 : return m
456 : }
457 :
458 1 : func (l *levelIter) findFileLT(key []byte, flags base.SeekLTFlags) *fileMetadata {
459 1 : // Find the last file whose smallest key is < ikey.
460 1 :
461 1 : // Ordinarily we seek the LevelIterator using SeekLT.
462 1 : //
463 1 : // When lazy combined iteration is enabled, there's a complication. The
464 1 : // level iterator is responsible for watching for files containing range
465 1 : // keys and triggering the switch to combined iteration when such a file is
466 1 : // observed. If a range deletion was observed in a higher level causing the
467 1 : // merging iterator to seek the level to the range deletion's start key, we
468 1 : // need to check whether all of the files between the old position and the
469 1 : // new position contain any range keys.
470 1 : //
471 1 : // In this scenario, we don't seek the LevelIterator and instead we Prev it,
472 1 : // one file at a time, checking each for range keys.
473 1 : prevInsteadOfSeek := flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized
474 1 :
475 1 : var m *fileMetadata
476 1 : if prevInsteadOfSeek {
477 1 : m = l.iterFile
478 1 : } else {
479 1 : m = l.files.SeekLT(l.cmp, key)
480 1 : }
481 : // The below loop has a bit of an unusual organization. There are several
482 : // conditions under which we need to Prev to a previous file. If none of
483 : // those conditions are met, the file in `m` is okay to return. The loop
484 : // body is structured with a series of if statements, each of which may
485 : // continue the loop to the previous file. If none of the statements are
486 : // met, the end of the loop body is a break.
487 1 : for m != nil {
488 1 : if m.HasRangeKeys {
489 1 : l.maybeTriggerCombinedIteration(m, -1)
490 1 :
491 1 : // Some files may only contain range keys, which we can skip.
492 1 : // NB: HasPointKeys=true if the file contains any points or range
493 1 : // deletions (which delete points).
494 1 : if !m.HasPointKeys {
495 1 : m = l.files.Prev()
496 1 : continue
497 : }
498 : }
499 :
500 : // This file has point keys.
501 : //
502 : // However, there are a couple reasons why `m` may not be positioned <
503 : // `key` yet:
504 : //
505 : // 1. If SeekLT(key) landed on a file containing range keys, the file
506 : // may contain range keys < `key` but no point keys < `key`.
507 : // 2. When preving instead of seeking, we must check to see whether
508 : // we've preved sufficiently far, or we need to prev again.
509 : //
510 : // If the file does not contain point keys < `key`, prev to continue
511 : // looking for a file that does.
512 1 : if (m.HasRangeKeys || prevInsteadOfSeek) && l.cmp(m.SmallestPointKey.UserKey, key) >= 0 {
513 1 : m = l.files.Prev()
514 1 : continue
515 : }
516 :
517 : // This file contains point keys < `key`. Break and return it.
518 1 : break
519 : }
520 1 : return m
521 : }
522 :
523 : // Init the iteration bounds for the current table. Returns -1 if the table
524 : // lies fully before the lower bound, +1 if the table lies fully after the
525 : // upper bound, and 0 if the table overlaps the iteration bounds.
526 1 : func (l *levelIter) initTableBounds(f *fileMetadata) int {
527 1 : l.tableOpts.LowerBound = l.lower
528 1 : if l.tableOpts.LowerBound != nil {
529 1 : if l.cmp(f.LargestPointKey.UserKey, l.tableOpts.LowerBound) < 0 {
530 1 : // The largest key in the sstable is smaller than the lower bound.
531 1 : return -1
532 1 : }
533 1 : if l.cmp(l.tableOpts.LowerBound, f.SmallestPointKey.UserKey) <= 0 {
534 1 : // The lower bound is smaller or equal to the smallest key in the
535 1 : // table. Iteration within the table does not need to check the lower
536 1 : // bound.
537 1 : l.tableOpts.LowerBound = nil
538 1 : }
539 : }
540 1 : l.tableOpts.UpperBound = l.upper
541 1 : if l.tableOpts.UpperBound != nil {
542 1 : if l.cmp(f.SmallestPointKey.UserKey, l.tableOpts.UpperBound) >= 0 {
543 1 : // The smallest key in the sstable is greater than or equal to the upper
544 1 : // bound.
545 1 : return 1
546 1 : }
547 1 : if l.cmp(l.tableOpts.UpperBound, f.LargestPointKey.UserKey) > 0 {
548 1 : // The upper bound is greater than the largest key in the
549 1 : // table. Iteration within the table does not need to check the upper
550 1 : // bound. NB: tableOpts.UpperBound is exclusive and f.LargestPointKey is
551 1 : // inclusive.
552 1 : l.tableOpts.UpperBound = nil
553 1 : }
554 : }
555 1 : return 0
556 : }
557 :
558 : type loadFileReturnIndicator int8
559 :
560 : const (
561 : noFileLoaded loadFileReturnIndicator = iota
562 : fileAlreadyLoaded
563 : newFileLoaded
564 : )
565 :
566 1 : func (l *levelIter) loadFile(file *fileMetadata, dir int) loadFileReturnIndicator {
567 1 : l.smallestBoundary = nil
568 1 : l.largestBoundary = nil
569 1 : if l.boundaryContext != nil {
570 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
571 1 : l.boundaryContext.isIgnorableBoundaryKey = false
572 1 : }
573 1 : if l.iterFile == file {
574 1 : if l.err != nil {
575 0 : return noFileLoaded
576 0 : }
577 1 : if l.iter != nil {
578 1 : // We don't bother comparing the file bounds with the iteration bounds when we have
579 1 : // an already open iterator. It is possible that the iter may not be relevant given the
580 1 : // current iteration bounds, but it knows those bounds, so it will enforce them.
581 1 : if l.rangeDelIterPtr != nil {
582 1 : *l.rangeDelIterPtr = l.rangeDelIterCopy
583 1 : }
584 :
585 : // There are a few reasons we might not have triggered combined
586 : // iteration yet, even though we already had `file` open.
587 : // 1. If the bounds changed, we might have previously avoided
588 : // switching to combined iteration because the bounds excluded
589 : // the range keys contained in this file.
590 : // 2. If an existing iterator was reconfigured to iterate over range
591 : // keys (eg, using SetOptions), then we wouldn't have triggered
592 : // the switch to combined iteration yet.
593 1 : l.maybeTriggerCombinedIteration(file, dir)
594 1 : return fileAlreadyLoaded
595 : }
596 : // We were already at file, but don't have an iterator, probably because the file was
597 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
598 : // have changed. We handle that below.
599 : }
600 :
601 : // Close both iter and rangeDelIterPtr. While mergingIter knows about
602 : // rangeDelIterPtr, it can't call Close() on it because it does not know
603 : // when the levelIter will switch it. Note that levelIter.Close() can be
604 : // called multiple times.
605 1 : if err := l.Close(); err != nil {
606 0 : return noFileLoaded
607 0 : }
608 :
609 1 : for {
610 1 : l.iterFile = file
611 1 : if file == nil {
612 1 : return noFileLoaded
613 1 : }
614 :
615 1 : l.maybeTriggerCombinedIteration(file, dir)
616 1 : if !file.HasPointKeys {
617 1 : switch dir {
618 1 : case +1:
619 1 : file = l.files.Next()
620 1 : continue
621 1 : case -1:
622 1 : file = l.files.Prev()
623 1 : continue
624 : }
625 : }
626 :
627 1 : switch l.initTableBounds(file) {
628 1 : case -1:
629 1 : // The largest key in the sstable is smaller than the lower bound.
630 1 : if dir < 0 {
631 1 : return noFileLoaded
632 1 : }
633 0 : file = l.files.Next()
634 0 : continue
635 1 : case +1:
636 1 : // The smallest key in the sstable is greater than or equal to the upper
637 1 : // bound.
638 1 : if dir > 0 {
639 1 : return noFileLoaded
640 1 : }
641 0 : file = l.files.Prev()
642 0 : continue
643 : }
644 :
645 1 : var rangeDelIter keyspan.FragmentIterator
646 1 : var iter internalIterator
647 1 : iter, rangeDelIter, l.err = l.newIters.TODO(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts)
648 1 : l.iter = iter
649 1 : if l.err != nil {
650 0 : return noFileLoaded
651 0 : }
652 1 : if rangeDelIter != nil {
653 1 : if fi, ok := iter.(filteredIter); ok {
654 1 : l.filteredIter = fi
655 1 : } else {
656 0 : l.filteredIter = nil
657 0 : }
658 1 : } else {
659 1 : l.filteredIter = nil
660 1 : }
661 1 : if l.rangeDelIterPtr != nil {
662 1 : *l.rangeDelIterPtr = rangeDelIter
663 1 : l.rangeDelIterCopy = rangeDelIter
664 1 : } else if rangeDelIter != nil {
665 1 : rangeDelIter.Close()
666 1 : }
667 1 : return newFileLoaded
668 : }
669 : }
670 :
671 : // In race builds we verify that the keys returned by levelIter lie within
672 : // [lower,upper).
673 1 : func (l *levelIter) verify(key *InternalKey, val base.LazyValue) (*InternalKey, base.LazyValue) {
674 1 : // Note that invariants.Enabled is a compile time constant, which means the
675 1 : // block of code will be compiled out of normal builds making this method
676 1 : // eligible for inlining. Do not change this to use a variable.
677 1 : if invariants.Enabled && !l.disableInvariants && key != nil {
678 1 : // We allow returning a boundary key that is outside of the lower/upper
679 1 : // bounds as such keys are always range tombstones which will be skipped by
680 1 : // the Iterator.
681 1 : if l.lower != nil && key != l.smallestBoundary && l.cmp(key.UserKey, l.lower) < 0 {
682 0 : l.logger.Fatalf("levelIter %s: lower bound violation: %s < %s\n%s", l.level, key, l.lower, debug.Stack())
683 0 : }
684 1 : if l.upper != nil && key != l.largestBoundary && l.cmp(key.UserKey, l.upper) > 0 {
685 0 : l.logger.Fatalf("levelIter %s: upper bound violation: %s > %s\n%s", l.level, key, l.upper, debug.Stack())
686 0 : }
687 : }
688 1 : return key, val
689 : }
690 :
691 1 : func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) {
692 1 : l.err = nil // clear cached iteration error
693 1 : if l.boundaryContext != nil {
694 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
695 1 : l.boundaryContext.isIgnorableBoundaryKey = false
696 1 : }
697 : // NB: the top-level Iterator has already adjusted key based on
698 : // IterOptions.LowerBound.
699 1 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
700 1 : if loadFileIndicator == noFileLoaded {
701 1 : return nil, base.LazyValue{}
702 1 : }
703 1 : if loadFileIndicator == newFileLoaded {
704 1 : // File changed, so l.iter has changed, and that iterator is not
705 1 : // positioned appropriately.
706 1 : flags = flags.DisableTrySeekUsingNext()
707 1 : }
708 1 : if ikey, val := l.iter.SeekGE(key, flags); ikey != nil {
709 1 : return l.verify(ikey, val)
710 1 : }
711 1 : return l.verify(l.skipEmptyFileForward())
712 : }
713 :
714 : func (l *levelIter) SeekPrefixGE(
715 : prefix, key []byte, flags base.SeekGEFlags,
716 1 : ) (*base.InternalKey, base.LazyValue) {
717 1 : l.err = nil // clear cached iteration error
718 1 : if l.boundaryContext != nil {
719 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
720 1 : l.boundaryContext.isIgnorableBoundaryKey = false
721 1 : }
722 :
723 : // NB: the top-level Iterator has already adjusted key based on
724 : // IterOptions.LowerBound.
725 1 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
726 1 : if loadFileIndicator == noFileLoaded {
727 1 : return nil, base.LazyValue{}
728 1 : }
729 1 : if loadFileIndicator == newFileLoaded {
730 1 : // File changed, so l.iter has changed, and that iterator is not
731 1 : // positioned appropriately.
732 1 : flags = flags.DisableTrySeekUsingNext()
733 1 : }
734 1 : if key, val := l.iter.SeekPrefixGE(prefix, key, flags); key != nil {
735 1 : return l.verify(key, val)
736 1 : }
737 1 : if err := l.iter.Error(); err != nil {
738 0 : return nil, base.LazyValue{}
739 0 : }
740 : // When SeekPrefixGE returns nil, we have not necessarily reached the end of
741 : // the sstable. All we know is that a key with prefix does not exist in the
742 : // current sstable. We do know that the key lies within the bounds of the
743 : // table as findFileGE found the table where key <= meta.Largest. We return
744 : // the table's bound with isIgnorableBoundaryKey set.
745 1 : if l.rangeDelIterPtr != nil && *l.rangeDelIterPtr != nil {
746 1 : if l.tableOpts.UpperBound != nil {
747 1 : l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
748 1 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
749 1 : l.largestBoundary = &l.syntheticBoundary
750 1 : if l.boundaryContext != nil {
751 1 : l.boundaryContext.isSyntheticIterBoundsKey = true
752 1 : l.boundaryContext.isIgnorableBoundaryKey = false
753 1 : }
754 1 : return l.verify(l.largestBoundary, base.LazyValue{})
755 : }
756 : // Return the file's largest bound, ensuring this file stays open until
757 : // the mergingIter advances beyond the file's bounds. We set
758 : // isIgnorableBoundaryKey to signal that the actual key returned should
759 : // be ignored, and does not represent a real key in the database.
760 1 : l.largestBoundary = &l.iterFile.LargestPointKey
761 1 : if l.boundaryContext != nil {
762 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
763 1 : l.boundaryContext.isIgnorableBoundaryKey = true
764 1 : }
765 1 : return l.verify(l.largestBoundary, base.LazyValue{})
766 : }
767 : // It is possible that we are here because bloom filter matching failed. In
768 : // that case it is likely that all keys matching the prefix are wholly
769 : // within the current file and cannot be in the subsequent file. In that
770 : // case we don't want to go to the next file, since loading and seeking in
771 : // there has some cost. Additionally, for sparse key spaces, loading the
772 : // next file will defeat the optimization for the next SeekPrefixGE that is
773 : // called with flags.TrySeekUsingNext(), since for sparse key spaces it is
774 : // likely that the next key will also be contained in the current file.
775 1 : var n int
776 1 : if l.split != nil {
777 1 : // If the split function is specified, calculate the prefix length accordingly.
778 1 : n = l.split(l.iterFile.LargestPointKey.UserKey)
779 1 : } else {
780 0 : // If the split function is not specified, the entire key is used as the
781 0 : // prefix. This case can occur when getIter uses SeekPrefixGE.
782 0 : n = len(l.iterFile.LargestPointKey.UserKey)
783 0 : }
784 1 : if l.cmp(prefix, l.iterFile.LargestPointKey.UserKey[:n]) < 0 {
785 1 : return nil, base.LazyValue{}
786 1 : }
787 1 : return l.verify(l.skipEmptyFileForward())
788 : }
789 :
790 1 : func (l *levelIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) {
791 1 : l.err = nil // clear cached iteration error
792 1 : if l.boundaryContext != nil {
793 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
794 1 : l.boundaryContext.isIgnorableBoundaryKey = false
795 1 : }
796 :
797 : // NB: the top-level Iterator has already adjusted key based on
798 : // IterOptions.UpperBound.
799 1 : if l.loadFile(l.findFileLT(key, flags), -1) == noFileLoaded {
800 1 : return nil, base.LazyValue{}
801 1 : }
802 1 : if key, val := l.iter.SeekLT(key, flags); key != nil {
803 1 : return l.verify(key, val)
804 1 : }
805 1 : return l.verify(l.skipEmptyFileBackward())
806 : }
807 :
808 1 : func (l *levelIter) First() (*InternalKey, base.LazyValue) {
809 1 : l.err = nil // clear cached iteration error
810 1 : if l.boundaryContext != nil {
811 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
812 1 : l.boundaryContext.isIgnorableBoundaryKey = false
813 1 : }
814 :
815 : // NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is
816 : // set.
817 1 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
818 1 : return nil, base.LazyValue{}
819 1 : }
820 1 : if key, val := l.iter.First(); key != nil {
821 1 : return l.verify(key, val)
822 1 : }
823 1 : return l.verify(l.skipEmptyFileForward())
824 : }
825 :
826 1 : func (l *levelIter) Last() (*InternalKey, base.LazyValue) {
827 1 : l.err = nil // clear cached iteration error
828 1 : if l.boundaryContext != nil {
829 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
830 1 : l.boundaryContext.isIgnorableBoundaryKey = false
831 1 : }
832 :
833 : // NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is
834 : // set.
835 1 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
836 1 : return nil, base.LazyValue{}
837 1 : }
838 1 : if key, val := l.iter.Last(); key != nil {
839 1 : return l.verify(key, val)
840 1 : }
841 1 : return l.verify(l.skipEmptyFileBackward())
842 : }
843 :
844 1 : func (l *levelIter) Next() (*InternalKey, base.LazyValue) {
845 1 : if l.err != nil || l.iter == nil {
846 0 : return nil, base.LazyValue{}
847 0 : }
848 1 : if l.boundaryContext != nil {
849 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
850 1 : l.boundaryContext.isIgnorableBoundaryKey = false
851 1 : }
852 :
853 1 : switch {
854 1 : case l.largestBoundary != nil:
855 1 : if l.tableOpts.UpperBound != nil {
856 0 : // The UpperBound was within this file, so don't load the next
857 0 : // file. We leave the largestBoundary unchanged so that subsequent
858 0 : // calls to Next() stay at this file. If a Seek/First/Last call is
859 0 : // made and this file continues to be relevant, loadFile() will
860 0 : // set the largestBoundary to nil.
861 0 : if l.rangeDelIterPtr != nil {
862 0 : *l.rangeDelIterPtr = nil
863 0 : }
864 0 : return nil, base.LazyValue{}
865 : }
866 : // We're stepping past the boundary key, so now we can load the next file.
867 1 : if l.loadFile(l.files.Next(), +1) != noFileLoaded {
868 1 : if key, val := l.iter.First(); key != nil {
869 1 : return l.verify(key, val)
870 1 : }
871 1 : return l.verify(l.skipEmptyFileForward())
872 : }
873 1 : return nil, base.LazyValue{}
874 :
875 1 : default:
876 1 : // Reset the smallest boundary since we're moving away from it.
877 1 : l.smallestBoundary = nil
878 1 : if key, val := l.iter.Next(); key != nil {
879 1 : return l.verify(key, val)
880 1 : }
881 : }
882 1 : return l.verify(l.skipEmptyFileForward())
883 : }
884 :
885 1 : func (l *levelIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
886 1 : if l.err != nil || l.iter == nil {
887 0 : return nil, base.LazyValue{}
888 0 : }
889 1 : if l.boundaryContext != nil {
890 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
891 1 : l.boundaryContext.isIgnorableBoundaryKey = false
892 1 : }
893 :
894 1 : switch {
895 1 : case l.largestBoundary != nil:
896 1 : if l.tableOpts.UpperBound != nil {
897 0 : // The UpperBound was within this file, so don't load the next
898 0 : // file. We leave the largestBoundary unchanged so that subsequent
899 0 : // calls to Next() stay at this file. If a Seek/First/Last call is
900 0 : // made and this file continues to be relevant, loadFile() will
901 0 : // set the largestBoundary to nil.
902 0 : if l.rangeDelIterPtr != nil {
903 0 : *l.rangeDelIterPtr = nil
904 0 : }
905 0 : return nil, base.LazyValue{}
906 : }
907 : // We're stepping past the boundary key, so we need to load a later
908 : // file.
909 :
910 1 : default:
911 1 : // Reset the smallest boundary since we're moving away from it.
912 1 : l.smallestBoundary = nil
913 1 :
914 1 : if key, val := l.iter.NextPrefix(succKey); key != nil {
915 1 : return l.verify(key, val)
916 1 : }
917 1 : if l.iter.Error() != nil {
918 0 : return nil, base.LazyValue{}
919 0 : }
920 : // Fall through to seeking.
921 : }
922 :
923 : // Seek the manifest level iterator using TrySeekUsingNext=true and
924 : // RelativeSeek=true so that we take advantage of the knowledge that
925 : // `succKey` can only be contained in later files.
926 1 : metadataSeekFlags := base.SeekGEFlagsNone.EnableTrySeekUsingNext().EnableRelativeSeek()
927 1 : if l.loadFile(l.findFileGE(succKey, metadataSeekFlags), +1) != noFileLoaded {
928 1 : // NB: The SeekGE on the file's iterator must not set TrySeekUsingNext,
929 1 : // because l.iter is unpositioned.
930 1 : if key, val := l.iter.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
931 1 : return l.verify(key, val)
932 1 : }
933 1 : return l.verify(l.skipEmptyFileForward())
934 : }
935 1 : return nil, base.LazyValue{}
936 : }
937 :
938 1 : func (l *levelIter) Prev() (*InternalKey, base.LazyValue) {
939 1 : if l.err != nil || l.iter == nil {
940 0 : return nil, base.LazyValue{}
941 0 : }
942 1 : if l.boundaryContext != nil {
943 1 : l.boundaryContext.isSyntheticIterBoundsKey = false
944 1 : l.boundaryContext.isIgnorableBoundaryKey = false
945 1 : }
946 :
947 1 : switch {
948 1 : case l.smallestBoundary != nil:
949 1 : if l.tableOpts.LowerBound != nil {
950 0 : // The LowerBound was within this file, so don't load the previous
951 0 : // file. We leave the smallestBoundary unchanged so that
952 0 : // subsequent calls to Prev() stay at this file. If a
953 0 : // Seek/First/Last call is made and this file continues to be
954 0 : // relevant, loadFile() will set the smallestBoundary to nil.
955 0 : if l.rangeDelIterPtr != nil {
956 0 : *l.rangeDelIterPtr = nil
957 0 : }
958 0 : return nil, base.LazyValue{}
959 : }
960 : // We're stepping past the boundary key, so now we can load the prev file.
961 1 : if l.loadFile(l.files.Prev(), -1) != noFileLoaded {
962 1 : if key, val := l.iter.Last(); key != nil {
963 1 : return l.verify(key, val)
964 1 : }
965 1 : return l.verify(l.skipEmptyFileBackward())
966 : }
967 1 : return nil, base.LazyValue{}
968 :
969 1 : default:
970 1 : // Reset the largest boundary since we're moving away from it.
971 1 : l.largestBoundary = nil
972 1 : if key, val := l.iter.Prev(); key != nil {
973 1 : return l.verify(key, val)
974 1 : }
975 : }
976 1 : return l.verify(l.skipEmptyFileBackward())
977 : }
978 :
979 1 : func (l *levelIter) skipEmptyFileForward() (*InternalKey, base.LazyValue) {
980 1 : var key *InternalKey
981 1 : var val base.LazyValue
982 1 : // The first iteration of this loop starts with an already exhausted
983 1 : // l.iter. The reason for the exhaustion is either that we iterated to the
984 1 : // end of the sstable, or our iteration was terminated early due to the
985 1 : // presence of an upper-bound or the use of SeekPrefixGE. If
986 1 : // l.rangeDelIterPtr is non-nil, we may need to pretend the iterator is
987 1 : // not exhausted to allow for the merging to finish consuming the
988 1 : // l.rangeDelIterPtr before levelIter switches the rangeDelIter from
989 1 : // under it. This pretense is done by either generating a synthetic
990 1 : // boundary key or returning the largest key of the file, depending on the
991 1 : // exhaustion reason.
992 1 :
993 1 : // Subsequent iterations will examine consecutive files such that the first
994 1 : // file that does not have an exhausted iterator causes the code to return
995 1 : // that key, else the behavior described above if there is a corresponding
996 1 : // rangeDelIterPtr.
997 1 : for ; key == nil; key, val = l.iter.First() {
998 1 : if l.iter.Error() != nil {
999 0 : return nil, base.LazyValue{}
1000 0 : }
1001 1 : if l.rangeDelIterPtr != nil {
1002 1 : // We're being used as part of a mergingIter and we've exhausted the
1003 1 : // current sstable. If an upper bound is present and the upper bound lies
1004 1 : // within the current sstable, then we will have reached the upper bound
1005 1 : // rather than the end of the sstable. We need to return a synthetic
1006 1 : // boundary key so that mergingIter can use the range tombstone iterator
1007 1 : // until the other levels have reached this boundary.
1008 1 : //
1009 1 : // It is safe to set the boundary key to the UpperBound user key
1010 1 : // with the RANGEDEL sentinel since it is the smallest InternalKey
1011 1 : // that matches the exclusive upper bound, and does not represent
1012 1 : // a real key.
1013 1 : if l.tableOpts.UpperBound != nil {
1014 1 : if *l.rangeDelIterPtr != nil {
1015 1 : l.syntheticBoundary.UserKey = l.tableOpts.UpperBound
1016 1 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
1017 1 : l.largestBoundary = &l.syntheticBoundary
1018 1 : if l.boundaryContext != nil {
1019 1 : l.boundaryContext.isSyntheticIterBoundsKey = true
1020 1 : }
1021 1 : return l.largestBoundary, base.LazyValue{}
1022 : }
1023 : // Else there are no range deletions in this sstable. This
1024 : // helps with performance when many levels are populated with
1025 : // sstables and most don't have any actual keys within the
1026 : // bounds.
1027 1 : return nil, base.LazyValue{}
1028 : }
1029 : // If the boundary is a range deletion tombstone, return that key.
1030 1 : if l.iterFile.LargestPointKey.Kind() == InternalKeyKindRangeDelete {
1031 1 : l.largestBoundary = &l.iterFile.LargestPointKey
1032 1 : if l.boundaryContext != nil {
1033 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1034 1 : }
1035 1 : return l.largestBoundary, base.LazyValue{}
1036 : }
1037 : // If the last point iterator positioning op might've skipped keys,
1038 : // it's possible the file's range deletions are still relevant to
1039 : // other levels. Return the largest boundary as a special ignorable
1040 : // marker to avoid advancing to the next file.
1041 : //
1042 : // The sstable iterator cannot guarantee that keys were skipped. A
1043 : // SeekGE that lands on a index separator k only knows that the
1044 : // block at the index entry contains keys ≤ k. We can't know whether
1045 : // there were actually keys between the seek key and the index
1046 : // separator key. If the block is then excluded due to block
1047 : // property filters, the iterator does not know whether keys were
1048 : // actually skipped by the block's exclusion.
1049 : //
1050 : // Since MaybeFilteredKeys cannot guarantee that keys were skipped,
1051 : // it's possible l.iterFile.Largest was already returned. Returning
1052 : // l.iterFile.Largest again is a violation of the strict
1053 : // monotonicity normally provided. The mergingIter's heap can
1054 : // tolerate this repeat key and in this case will keep the level at
1055 : // the top of the heap and immediately skip the entry, advancing to
1056 : // the next file.
1057 1 : if *l.rangeDelIterPtr != nil && l.filteredIter != nil &&
1058 1 : l.filteredIter.MaybeFilteredKeys() {
1059 1 : l.largestBoundary = &l.iterFile.Largest
1060 1 : if l.boundaryContext != nil {
1061 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1062 1 : }
1063 1 : return l.largestBoundary, base.LazyValue{}
1064 : }
1065 : }
1066 :
1067 : // Current file was exhausted. Move to the next file.
1068 1 : if l.loadFile(l.files.Next(), +1) == noFileLoaded {
1069 1 : return nil, base.LazyValue{}
1070 1 : }
1071 : }
1072 1 : return key, val
1073 : }
1074 :
1075 1 : func (l *levelIter) skipEmptyFileBackward() (*InternalKey, base.LazyValue) {
1076 1 : var key *InternalKey
1077 1 : var val base.LazyValue
1078 1 : // The first iteration of this loop starts with an already exhausted
1079 1 : // l.iter. The reason for the exhaustion is either that we iterated to the
1080 1 : // end of the sstable, or our iteration was terminated early due to the
1081 1 : // presence of a lower-bound. If l.rangeDelIterPtr is non-nil, we may need
1082 1 : // to pretend the iterator is not exhausted to allow for the merging to
1083 1 : // finish consuming the l.rangeDelIterPtr before levelIter switches the
1084 1 : // rangeDelIter from under it. This pretense is done by either generating
1085 1 : // a synthetic boundary key or returning the smallest key of the file,
1086 1 : // depending on the exhaustion reason.
1087 1 :
1088 1 : // Subsequent iterations will examine consecutive files such that the first
1089 1 : // file that does not have an exhausted iterator causes the code to return
1090 1 : // that key, else the behavior described above if there is a corresponding
1091 1 : // rangeDelIterPtr.
1092 1 : for ; key == nil; key, val = l.iter.Last() {
1093 1 : if l.iter.Error() != nil {
1094 0 : return nil, base.LazyValue{}
1095 0 : }
1096 1 : if l.rangeDelIterPtr != nil {
1097 1 : // We're being used as part of a mergingIter and we've exhausted the
1098 1 : // current sstable. If a lower bound is present and the lower bound lies
1099 1 : // within the current sstable, then we will have reached the lower bound
1100 1 : // rather than the beginning of the sstable. We need to return a
1101 1 : // synthetic boundary key so that mergingIter can use the range tombstone
1102 1 : // iterator until the other levels have reached this boundary.
1103 1 : //
1104 1 : // It is safe to set the boundary key to the LowerBound user key
1105 1 : // with the RANGEDEL sentinel since it is the smallest InternalKey
1106 1 : // that is within the inclusive lower bound, and does not
1107 1 : // represent a real key.
1108 1 : if l.tableOpts.LowerBound != nil {
1109 1 : if *l.rangeDelIterPtr != nil {
1110 1 : l.syntheticBoundary.UserKey = l.tableOpts.LowerBound
1111 1 : l.syntheticBoundary.Trailer = InternalKeyRangeDeleteSentinel
1112 1 : l.smallestBoundary = &l.syntheticBoundary
1113 1 : if l.boundaryContext != nil {
1114 1 : l.boundaryContext.isSyntheticIterBoundsKey = true
1115 1 : }
1116 1 : return l.smallestBoundary, base.LazyValue{}
1117 : }
1118 : // Else there are no range deletions in this sstable. This
1119 : // helps with performance when many levels are populated with
1120 : // sstables and most don't have any actual keys within the
1121 : // bounds.
1122 1 : return nil, base.LazyValue{}
1123 : }
1124 : // If the boundary could be a range deletion tombstone, return the
1125 : // smallest point key as a special ignorable key to avoid advancing to the
1126 : // next file.
1127 : //
1128 : // It's possible the SmallestPointKey was already returned. Returning it
1129 : // again is a violation of the strict monotonicity normally provided. The
1130 : // mergingIter's heap can tolerate this repeat key and in this case will
1131 : // keep the level at the top of the heap and immediately skip the entry,
1132 : // advancing to the next file.
1133 1 : if *l.rangeDelIterPtr != nil {
1134 1 : l.smallestBoundary = &l.iterFile.SmallestPointKey
1135 1 : if l.boundaryContext != nil {
1136 1 : l.boundaryContext.isIgnorableBoundaryKey = true
1137 1 : }
1138 1 : return l.smallestBoundary, base.LazyValue{}
1139 : }
1140 : }
1141 :
1142 : // Current file was exhausted. Move to the previous file.
1143 1 : if l.loadFile(l.files.Prev(), -1) == noFileLoaded {
1144 1 : return nil, base.LazyValue{}
1145 1 : }
1146 : }
1147 1 : return key, val
1148 : }
1149 :
1150 1 : func (l *levelIter) Error() error {
1151 1 : if l.err != nil || l.iter == nil {
1152 1 : return l.err
1153 1 : }
1154 1 : return l.iter.Error()
1155 : }
1156 :
1157 1 : func (l *levelIter) Close() error {
1158 1 : if l.iter != nil {
1159 1 : l.err = l.iter.Close()
1160 1 : l.iter = nil
1161 1 : }
1162 1 : if l.rangeDelIterPtr != nil {
1163 1 : if t := l.rangeDelIterCopy; t != nil {
1164 1 : l.err = firstError(l.err, t.Close())
1165 1 : }
1166 1 : *l.rangeDelIterPtr = nil
1167 1 : l.rangeDelIterCopy = nil
1168 : }
1169 1 : return l.err
1170 : }
1171 :
1172 1 : func (l *levelIter) SetBounds(lower, upper []byte) {
1173 1 : l.lower = lower
1174 1 : l.upper = upper
1175 1 :
1176 1 : if l.iter == nil {
1177 1 : return
1178 1 : }
1179 :
1180 : // Update tableOpts.{Lower,Upper}Bound in case the new boundaries fall within
1181 : // the boundaries of the current table.
1182 1 : if l.initTableBounds(l.iterFile) != 0 {
1183 1 : // The table does not overlap the bounds. Close() will set levelIter.err if
1184 1 : // an error occurs.
1185 1 : _ = l.Close()
1186 1 : return
1187 1 : }
1188 :
1189 1 : l.iter.SetBounds(l.tableOpts.LowerBound, l.tableOpts.UpperBound)
1190 : }
1191 :
1192 0 : func (l *levelIter) SetContext(ctx context.Context) {
1193 0 : l.ctx = ctx
1194 0 : if l.iter != nil {
1195 0 : // TODO(sumeer): this is losing the ctx = objiotracing.WithLevel(ctx,
1196 0 : // manifest.LevelToInt(opts.level)) that happens in table_cache.go.
1197 0 : l.iter.SetContext(ctx)
1198 0 : }
1199 : }
1200 :
1201 1 : func (l *levelIter) String() string {
1202 1 : if l.iterFile != nil {
1203 1 : return fmt.Sprintf("%s: fileNum=%s", l.level, l.iter.String())
1204 1 : }
1205 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.level)
1206 : }
1207 :
1208 : var _ internalIterator = &levelIter{}
|