Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "runtime/debug"
11 :
12 : "github.com/cockroachdb/errors"
13 : "github.com/cockroachdb/pebble/internal/base"
14 : "github.com/cockroachdb/pebble/internal/invariants"
15 : "github.com/cockroachdb/pebble/internal/keyspan"
16 : "github.com/cockroachdb/pebble/internal/manifest"
17 : "github.com/cockroachdb/pebble/internal/treeprinter"
18 : "github.com/cockroachdb/pebble/sstable"
19 : )
20 :
21 : type internalIterOpts struct {
22 : // if compaction is set, sstable-level iterators will be created using
23 : // NewCompactionIter; these iterators have a more constrained interface
24 : // and are optimized for the sequential scan of a compaction.
25 : compaction bool
26 : bufferPool *sstable.BufferPool
27 : stats *base.InternalIteratorStats
28 : iterStatsAccumulator sstable.IterStatsAccumulator
29 : boundLimitedFilter sstable.BoundLimitedBlockPropertyFilter
30 : }
31 :
32 : // levelIter provides a merged view of the sstables in a level.
33 : //
34 : // levelIter is used during compaction and as part of the Iterator
35 : // implementation. When used as part of the Iterator implementation, level
36 : // iteration needs to "pause" at range deletion boundaries if file contains
37 : // range deletions. In this case, the levelIter uses a keyspan.InterleavingIter
38 : // to materialize InternalKVs at start and end boundaries of range deletions.
39 : // This prevents mergingIter from advancing past the sstable until the sstable
40 : // contains the smallest (or largest for reverse iteration) key in the merged
41 : // heap. Note that mergingIter treats a range deletion tombstone returned by the
42 : // point iterator as a no-op.
43 : type levelIter struct {
44 : // The context is stored here since (a) iterators are expected to be
45 : // short-lived (since they pin sstables), (b) plumbing a context into every
46 : // method is very painful, (c) they do not (yet) respect context
47 : // cancellation and are only used for tracing.
48 : ctx context.Context
49 : logger Logger
50 : comparer *Comparer
51 : cmp Compare
52 : split Split
53 : // The lower/upper bounds for iteration as specified at creation or the most
54 : // recent call to SetBounds.
55 : lower []byte
56 : upper []byte
57 : // prefix holds the iteration prefix when the most recent absolute
58 : // positioning method was a SeekPrefixGE.
59 : prefix []byte
60 : // The iterator options for the currently open table. If
61 : // tableOpts.{Lower,Upper}Bound are nil, the corresponding iteration boundary
62 : // does not lie within the table bounds.
63 : tableOpts IterOptions
64 : // The layer this levelIter is initialized for. This can be either
65 : // a level L1+, an L0 sublevel, or a flushable ingests layer.
66 : layer manifest.Layer
67 : // combinedIterState may be set when a levelIter is used during user
68 : // iteration. Although levelIter only iterates over point keys, it's also
69 : // responsible for lazily constructing the combined range & point iterator
70 : // when it observes a file containing range keys. If the combined iter
71 : // state's initialized field is true, the iterator is already using combined
72 : // iterator, OR the iterator is not configured to use combined iteration. If
73 : // it's false, the levelIter must set the `triggered` and `key` fields when
74 : // the levelIter passes over a file containing range keys. See the
75 : // lazyCombinedIter for more details.
76 : combinedIterState *combinedIterState
77 : // The iter for the current file. It is nil under any of the following conditions:
78 : // - files.Current() == nil
79 : // - err != nil
80 : // - some other constraint, like the bounds in opts, caused the file at index to not
81 : // be relevant to the iteration.
82 : iter internalIterator
83 : // iterFile holds the current file. It is always equal to l.files.Current().
84 : iterFile *fileMetadata
85 : newIters tableNewIters
86 : files manifest.LevelIterator
87 : err error
88 :
89 : // When rangeDelIterSetter != nil, the caller requires that this function
90 : // gets called with a range deletion iterator whenever the current file
91 : // changes. The iterator is relinquished to the caller which is responsible
92 : // for closing it.
93 : //
94 : // When rangeDelIterSetter != nil, the levelIter will also interleave the
95 : // boundaries of range deletions among point keys.
96 : rangeDelIterSetter rangeDelIterSetter
97 :
98 : // interleaving is used when rangeDelIterFn != nil to interleave the
99 : // boundaries of range deletions among point keys. When the leve iterator is
100 : // used by a merging iterator, this ensures that we don't advance to a new
101 : // file until the range deletions are no longer needed by other levels.
102 : interleaving keyspan.InterleavingIter
103 :
104 : // internalOpts holds the internal iterator options to pass to the table
105 : // cache when constructing new table iterators.
106 : internalOpts internalIterOpts
107 :
108 : // Scratch space for the obsolete keys filter, when there are no other block
109 : // property filters specified. See the performance note where
110 : // IterOptions.PointKeyFilters is declared.
111 : filtersBuf [1]BlockPropertyFilter
112 :
113 : // exhaustedDir is set to +1 or -1 when the levelIter has been exhausted in
114 : // the forward or backward direction respectively. It is set when the
115 : // underlying data is exhausted or when iteration has reached the upper or
116 : // lower boundary and interleaved a synthetic iterator bound key. When the
117 : // iterator is exhausted and Next or Prev is called, the levelIter uses
118 : // exhaustedDir to determine whether the iterator should step on to the
119 : // first or last key within iteration bounds.
120 : exhaustedDir int8
121 :
122 : // Disable invariant checks even if they are otherwise enabled. Used by tests
123 : // which construct "impossible" situations (e.g. seeking to a key before the
124 : // lower bound).
125 : disableInvariants bool
126 : }
127 :
128 : type rangeDelIterSetter interface {
129 : setRangeDelIter(rangeDelIter keyspan.FragmentIterator)
130 : }
131 :
132 : // levelIter implements the base.InternalIterator interface.
133 : var _ base.InternalIterator = (*levelIter)(nil)
134 :
135 : // newLevelIter returns a levelIter. It is permissible to pass a nil split
136 : // parameter if the caller is never going to call SeekPrefixGE.
137 : func newLevelIter(
138 : ctx context.Context,
139 : opts IterOptions,
140 : comparer *Comparer,
141 : newIters tableNewIters,
142 : files manifest.LevelIterator,
143 : layer manifest.Layer,
144 : internalOpts internalIterOpts,
145 1 : ) *levelIter {
146 1 : l := &levelIter{}
147 1 : l.init(ctx, opts, comparer, newIters, files, layer, internalOpts)
148 1 : return l
149 1 : }
150 :
151 : func (l *levelIter) init(
152 : ctx context.Context,
153 : opts IterOptions,
154 : comparer *Comparer,
155 : newIters tableNewIters,
156 : files manifest.LevelIterator,
157 : layer manifest.Layer,
158 : internalOpts internalIterOpts,
159 1 : ) {
160 1 : l.ctx = ctx
161 1 : l.err = nil
162 1 : l.layer = layer
163 1 : l.logger = opts.getLogger()
164 1 : l.prefix = nil
165 1 : l.lower = opts.LowerBound
166 1 : l.upper = opts.UpperBound
167 1 : l.tableOpts.PointKeyFilters = opts.PointKeyFilters
168 1 : if len(opts.PointKeyFilters) == 0 {
169 1 : l.tableOpts.PointKeyFilters = l.filtersBuf[:0:1]
170 1 : }
171 1 : l.tableOpts.UseL6Filters = opts.UseL6Filters
172 1 : l.tableOpts.Category = opts.Category
173 1 : l.tableOpts.layer = l.layer
174 1 : l.tableOpts.snapshotForHideObsoletePoints = opts.snapshotForHideObsoletePoints
175 1 : l.comparer = comparer
176 1 : l.cmp = comparer.Compare
177 1 : l.split = comparer.Split
178 1 : l.iterFile = nil
179 1 : l.newIters = newIters
180 1 : l.files = files
181 1 : l.exhaustedDir = 0
182 1 : l.internalOpts = internalOpts
183 : }
184 :
185 : // initRangeDel puts the level iterator into a mode where it interleaves range
186 : // deletion boundaries with point keys and provides a range deletion iterator
187 : // (through rangeDelIterFn) whenever the current file changes.
188 : //
189 : // The range deletion iterator passed to rangeDelIterFn is relinquished to the
190 : // implementor who is responsible for closing it.
191 1 : func (l *levelIter) initRangeDel(rangeDelSetter rangeDelIterSetter) {
192 1 : l.rangeDelIterSetter = rangeDelSetter
193 1 : }
194 :
195 1 : func (l *levelIter) initCombinedIterState(state *combinedIterState) {
196 1 : l.combinedIterState = state
197 1 : }
198 :
199 1 : func (l *levelIter) maybeTriggerCombinedIteration(file *fileMetadata, dir int) {
200 1 : // If we encounter a file that contains range keys, we may need to
201 1 : // trigger a switch to combined range-key and point-key iteration,
202 1 : // if the *pebble.Iterator is configured for it. This switch is done
203 1 : // lazily because range keys are intended to be rare, and
204 1 : // constructing the range-key iterator substantially adds to the
205 1 : // cost of iterator construction and seeking.
206 1 : //
207 1 : // If l.combinedIterState.initialized is already true, either the
208 1 : // iterator is already using combined iteration or the iterator is not
209 1 : // configured to observe range keys. Either way, there's nothing to do.
210 1 : // If false, trigger the switch to combined iteration, using the the
211 1 : // file's bounds to seek the range-key iterator appropriately.
212 1 : //
213 1 : // We only need to trigger combined iteration if the file contains
214 1 : // RangeKeySets: if there are only Unsets and Dels, the user will observe no
215 1 : // range keys regardless. If this file has table stats available, they'll
216 1 : // tell us whether the file has any RangeKeySets. Otherwise, we must
217 1 : // fallback to assuming it does if HasRangeKeys=true.
218 1 : if file != nil && file.HasRangeKeys && l.combinedIterState != nil && !l.combinedIterState.initialized &&
219 1 : (l.upper == nil || l.cmp(file.SmallestRangeKey.UserKey, l.upper) < 0) &&
220 1 : (l.lower == nil || l.cmp(file.LargestRangeKey.UserKey, l.lower) > 0) &&
221 1 : (!file.StatsValid() || file.Stats.NumRangeKeySets > 0) {
222 1 : // The file contains range keys, and we're not using combined iteration yet.
223 1 : // Trigger a switch to combined iteration. It's possible that a switch has
224 1 : // already been triggered if multiple levels encounter files containing
225 1 : // range keys while executing a single mergingIter operation. In this case,
226 1 : // we need to compare the existing key recorded to l.combinedIterState.key,
227 1 : // adjusting it if our key is smaller (forward iteration) or larger
228 1 : // (backward iteration) than the existing key.
229 1 : //
230 1 : // These key comparisons are only required during a single high-level
231 1 : // iterator operation. When the high-level iter op completes,
232 1 : // iinitialized will be true, and future calls to this function will be
233 1 : // no-ops.
234 1 : switch dir {
235 1 : case +1:
236 1 : if !l.combinedIterState.triggered {
237 1 : l.combinedIterState.triggered = true
238 1 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
239 1 : } else if l.cmp(l.combinedIterState.key, file.SmallestRangeKey.UserKey) > 0 {
240 1 : l.combinedIterState.key = file.SmallestRangeKey.UserKey
241 1 : }
242 1 : case -1:
243 1 : if !l.combinedIterState.triggered {
244 1 : l.combinedIterState.triggered = true
245 1 : l.combinedIterState.key = file.LargestRangeKey.UserKey
246 1 : } else if l.cmp(l.combinedIterState.key, file.LargestRangeKey.UserKey) < 0 {
247 1 : l.combinedIterState.key = file.LargestRangeKey.UserKey
248 1 : }
249 : }
250 : }
251 : }
252 :
253 1 : func (l *levelIter) findFileGE(key []byte, flags base.SeekGEFlags) *fileMetadata {
254 1 : // Find the earliest file whose largest key is >= key.
255 1 :
256 1 : // NB: if flags.TrySeekUsingNext()=true, the levelIter must respect it. If
257 1 : // the levelIter is positioned at the key P, it must return a key ≥ P. If
258 1 : // used within a merging iterator, the merging iterator will depend on the
259 1 : // levelIter only moving forward to maintain heap invariants.
260 1 :
261 1 : // Ordinarily we seek the LevelIterator using SeekGE. In some instances, we
262 1 : // Next instead. In other instances, we try Next-ing first, falling back to
263 1 : // seek:
264 1 : // a) flags.TrySeekUsingNext(): The top-level Iterator knows we're seeking
265 1 : // to a key later than the current iterator position. We don't know how
266 1 : // much later the seek key is, so it's possible there are many sstables
267 1 : // between the current position and the seek key. However in most real-
268 1 : // world use cases, the seek key is likely to be nearby. Rather than
269 1 : // performing a log(N) seek through the file metadata, we next a few
270 1 : // times from our existing location. If we don't find a file whose
271 1 : // largest is >= key within a few nexts, we fall back to seeking.
272 1 : //
273 1 : // Note that in this case, the file returned by findFileGE may be
274 1 : // different than the file returned by a raw binary search (eg, when
275 1 : // TrySeekUsingNext=false). This is possible because the most recent
276 1 : // positioning operation may have already determined that previous
277 1 : // files' keys that are ≥ key are all deleted. This information is
278 1 : // encoded within the iterator's current iterator position and is
279 1 : // unavailable to a fresh binary search.
280 1 : //
281 1 : // b) flags.RelativeSeek(): The merging iterator decided to re-seek this
282 1 : // level according to a range tombstone. When lazy combined iteration
283 1 : // is enabled, the level iterator is responsible for watching for
284 1 : // files containing range keys and triggering the switch to combined
285 1 : // iteration when such a file is observed. If a range deletion was
286 1 : // observed in a higher level causing the merging iterator to seek the
287 1 : // level to the range deletion's end key, we need to check whether all
288 1 : // of the files between the old position and the new position contain
289 1 : // any range keys.
290 1 : //
291 1 : // In this scenario, we don't seek the LevelIterator and instead we
292 1 : // Next it, one file at a time, checking each for range keys. The
293 1 : // merging iterator sets this flag to inform us that we're moving
294 1 : // forward relative to the existing position and that we must examine
295 1 : // each intermediate sstable's metadata for lazy-combined iteration.
296 1 : // In this case, we only Next and never Seek. We set nextsUntilSeek=-1
297 1 : // to signal this intention.
298 1 : //
299 1 : // NB: At most one of flags.RelativeSeek() and flags.TrySeekUsingNext() may
300 1 : // be set, because the merging iterator re-seeks relative seeks with
301 1 : // explicitly only the RelativeSeek flag set.
302 1 : var nextsUntilSeek int
303 1 : var nextInsteadOfSeek bool
304 1 : if flags.TrySeekUsingNext() {
305 1 : nextInsteadOfSeek = true
306 1 : nextsUntilSeek = 4 // arbitrary
307 1 : }
308 1 : if flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized {
309 1 : nextInsteadOfSeek = true
310 1 : nextsUntilSeek = -1
311 1 : }
312 :
313 1 : var m *fileMetadata
314 1 : if nextInsteadOfSeek {
315 1 : m = l.iterFile
316 1 : } else {
317 1 : m = l.files.SeekGE(l.cmp, key)
318 1 : }
319 : // The below loop has a bit of an unusual organization. There are several
320 : // conditions under which we need to Next to a later file. If none of those
321 : // conditions are met, the file in `m` is okay to return. The loop body is
322 : // structured with a series of if statements, each of which may continue the
323 : // loop to the next file. If none of the statements are met, the end of the
324 : // loop body is a break.
325 1 : for m != nil {
326 1 : if m.HasRangeKeys {
327 1 : l.maybeTriggerCombinedIteration(m, +1)
328 1 :
329 1 : // Some files may only contain range keys, which we can skip.
330 1 : // NB: HasPointKeys=true if the file contains any points or range
331 1 : // deletions (which delete points).
332 1 : if !m.HasPointKeys {
333 1 : m = l.files.Next()
334 1 : continue
335 : }
336 : }
337 :
338 : // This file has point keys.
339 : //
340 : // However, there are a couple reasons why `m` may not be positioned ≥
341 : // `key` yet:
342 : //
343 : // 1. If SeekGE(key) landed on a file containing range keys, the file
344 : // may contain range keys ≥ `key` but no point keys ≥ `key`.
345 : // 2. When nexting instead of seeking, we must check to see whether
346 : // we've nexted sufficiently far, or we need to next again.
347 : //
348 : // If the file does not contain point keys ≥ `key`, next to continue
349 : // looking for a file that does.
350 1 : if (m.HasRangeKeys || nextInsteadOfSeek) && l.cmp(m.LargestPointKey.UserKey, key) < 0 {
351 1 : // If nextInsteadOfSeek is set and nextsUntilSeek is non-negative,
352 1 : // the iterator has been nexting hoping to discover the relevant
353 1 : // file without seeking. It's exhausted the allotted nextsUntilSeek
354 1 : // and should seek to the sought key.
355 1 : if nextInsteadOfSeek && nextsUntilSeek == 0 {
356 0 : nextInsteadOfSeek = false
357 0 : m = l.files.SeekGE(l.cmp, key)
358 0 : continue
359 1 : } else if nextsUntilSeek > 0 {
360 1 : nextsUntilSeek--
361 1 : }
362 1 : m = l.files.Next()
363 1 : continue
364 : }
365 :
366 : // This file has a point key bound ≥ `key`. But the largest point key
367 : // bound may still be a range deletion sentinel, which is exclusive. In
368 : // this case, the file doesn't actually contain any point keys equal to
369 : // `key`. We next to keep searching for a file that actually contains
370 : // point keys ≥ key.
371 : //
372 : // Additionally, this prevents loading untruncated range deletions from
373 : // a table which can't possibly contain the target key and is required
374 : // for correctness by mergingIter.SeekGE (see the comment in that
375 : // function).
376 1 : if m.LargestPointKey.IsExclusiveSentinel() && l.cmp(m.LargestPointKey.UserKey, key) == 0 {
377 1 : m = l.files.Next()
378 1 : continue
379 : }
380 :
381 : // This file contains point keys ≥ `key`. Break and return it.
382 1 : break
383 : }
384 1 : return m
385 : }
386 :
387 1 : func (l *levelIter) findFileLT(key []byte, flags base.SeekLTFlags) *fileMetadata {
388 1 : // Find the last file whose smallest key is < ikey.
389 1 :
390 1 : // Ordinarily we seek the LevelIterator using SeekLT.
391 1 : //
392 1 : // When lazy combined iteration is enabled, there's a complication. The
393 1 : // level iterator is responsible for watching for files containing range
394 1 : // keys and triggering the switch to combined iteration when such a file is
395 1 : // observed. If a range deletion was observed in a higher level causing the
396 1 : // merging iterator to seek the level to the range deletion's start key, we
397 1 : // need to check whether all of the files between the old position and the
398 1 : // new position contain any range keys.
399 1 : //
400 1 : // In this scenario, we don't seek the LevelIterator and instead we Prev it,
401 1 : // one file at a time, checking each for range keys.
402 1 : prevInsteadOfSeek := flags.RelativeSeek() && l.combinedIterState != nil && !l.combinedIterState.initialized
403 1 :
404 1 : var m *fileMetadata
405 1 : if prevInsteadOfSeek {
406 1 : m = l.iterFile
407 1 : } else {
408 1 : m = l.files.SeekLT(l.cmp, key)
409 1 : }
410 : // The below loop has a bit of an unusual organization. There are several
411 : // conditions under which we need to Prev to a previous file. If none of
412 : // those conditions are met, the file in `m` is okay to return. The loop
413 : // body is structured with a series of if statements, each of which may
414 : // continue the loop to the previous file. If none of the statements are
415 : // met, the end of the loop body is a break.
416 1 : for m != nil {
417 1 : if m.HasRangeKeys {
418 1 : l.maybeTriggerCombinedIteration(m, -1)
419 1 :
420 1 : // Some files may only contain range keys, which we can skip.
421 1 : // NB: HasPointKeys=true if the file contains any points or range
422 1 : // deletions (which delete points).
423 1 : if !m.HasPointKeys {
424 1 : m = l.files.Prev()
425 1 : continue
426 : }
427 : }
428 :
429 : // This file has point keys.
430 : //
431 : // However, there are a couple reasons why `m` may not be positioned <
432 : // `key` yet:
433 : //
434 : // 1. If SeekLT(key) landed on a file containing range keys, the file
435 : // may contain range keys < `key` but no point keys < `key`.
436 : // 2. When preving instead of seeking, we must check to see whether
437 : // we've preved sufficiently far, or we need to prev again.
438 : //
439 : // If the file does not contain point keys < `key`, prev to continue
440 : // looking for a file that does.
441 1 : if (m.HasRangeKeys || prevInsteadOfSeek) && l.cmp(m.SmallestPointKey.UserKey, key) >= 0 {
442 1 : m = l.files.Prev()
443 1 : continue
444 : }
445 :
446 : // This file contains point keys < `key`. Break and return it.
447 1 : break
448 : }
449 1 : return m
450 : }
451 :
452 : // Init the iteration bounds for the current table. Returns -1 if the table
453 : // lies fully before the lower bound, +1 if the table lies fully after the
454 : // upper bound, and 0 if the table overlaps the iteration bounds.
455 1 : func (l *levelIter) initTableBounds(f *fileMetadata) int {
456 1 : l.tableOpts.LowerBound = l.lower
457 1 : if l.tableOpts.LowerBound != nil {
458 1 : if l.cmp(f.LargestPointKey.UserKey, l.tableOpts.LowerBound) < 0 {
459 1 : // The largest key in the sstable is smaller than the lower bound.
460 1 : return -1
461 1 : }
462 1 : if l.cmp(l.tableOpts.LowerBound, f.SmallestPointKey.UserKey) <= 0 {
463 1 : // The lower bound is smaller or equal to the smallest key in the
464 1 : // table. Iteration within the table does not need to check the lower
465 1 : // bound.
466 1 : l.tableOpts.LowerBound = nil
467 1 : }
468 : }
469 1 : l.tableOpts.UpperBound = l.upper
470 1 : if l.tableOpts.UpperBound != nil {
471 1 : if l.cmp(f.SmallestPointKey.UserKey, l.tableOpts.UpperBound) >= 0 {
472 1 : // The smallest key in the sstable is greater than or equal to the upper
473 1 : // bound.
474 1 : return 1
475 1 : }
476 1 : if l.cmp(l.tableOpts.UpperBound, f.LargestPointKey.UserKey) > 0 {
477 1 : // The upper bound is greater than the largest key in the
478 1 : // table. Iteration within the table does not need to check the upper
479 1 : // bound. NB: tableOpts.UpperBound is exclusive and f.LargestPointKey is
480 1 : // inclusive.
481 1 : l.tableOpts.UpperBound = nil
482 1 : }
483 : }
484 1 : return 0
485 : }
486 :
487 : type loadFileReturnIndicator int8
488 :
489 : const (
490 : noFileLoaded loadFileReturnIndicator = iota
491 : fileAlreadyLoaded
492 : newFileLoaded
493 : )
494 :
495 1 : func (l *levelIter) loadFile(file *fileMetadata, dir int) loadFileReturnIndicator {
496 1 : if l.iterFile == file {
497 1 : if l.err != nil {
498 0 : return noFileLoaded
499 0 : }
500 1 : if l.iter != nil {
501 1 : // We don't bother comparing the file bounds with the iteration bounds when we have
502 1 : // an already open iterator. It is possible that the iter may not be relevant given the
503 1 : // current iteration bounds, but it knows those bounds, so it will enforce them.
504 1 :
505 1 : // There are a few reasons we might not have triggered combined
506 1 : // iteration yet, even though we already had `file` open.
507 1 : // 1. If the bounds changed, we might have previously avoided
508 1 : // switching to combined iteration because the bounds excluded
509 1 : // the range keys contained in this file.
510 1 : // 2. If an existing iterator was reconfigured to iterate over range
511 1 : // keys (eg, using SetOptions), then we wouldn't have triggered
512 1 : // the switch to combined iteration yet.
513 1 : l.maybeTriggerCombinedIteration(file, dir)
514 1 : return fileAlreadyLoaded
515 1 : }
516 : // We were already at file, but don't have an iterator, probably because the file was
517 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
518 : // have changed. We handle that below.
519 : }
520 :
521 : // Close iter and send a nil iterator through rangeDelIterFn.rangeDelIterFn.
522 1 : if err := l.Close(); err != nil {
523 1 : return noFileLoaded
524 1 : }
525 :
526 1 : for {
527 1 : l.iterFile = file
528 1 : if file == nil {
529 1 : return noFileLoaded
530 1 : }
531 :
532 1 : l.maybeTriggerCombinedIteration(file, dir)
533 1 : if !file.HasPointKeys {
534 1 : switch dir {
535 1 : case +1:
536 1 : file = l.files.Next()
537 1 : continue
538 1 : case -1:
539 1 : file = l.files.Prev()
540 1 : continue
541 : }
542 : }
543 :
544 1 : switch l.initTableBounds(file) {
545 1 : case -1:
546 1 : // The largest key in the sstable is smaller than the lower bound.
547 1 : if dir < 0 {
548 1 : return noFileLoaded
549 1 : }
550 0 : file = l.files.Next()
551 0 : continue
552 1 : case +1:
553 1 : // The smallest key in the sstable is greater than or equal to the upper
554 1 : // bound.
555 1 : if dir > 0 {
556 1 : return noFileLoaded
557 1 : }
558 0 : file = l.files.Prev()
559 0 : continue
560 : }
561 : // If we're in prefix iteration, it's possible this file's smallest
562 : // boundary is large enough to prove the file cannot possibly contain
563 : // any keys within the iteration prefix. Loading the next file is
564 : // unnecessary. This has been observed in practice on slow shared
565 : // storage. See #3575.
566 1 : if l.prefix != nil && l.cmp(l.split.Prefix(file.SmallestPointKey.UserKey), l.prefix) > 0 {
567 1 : // Note that because l.iter is nil, a subsequent call to
568 1 : // SeekPrefixGE with TrySeekUsingNext()=true will load the file
569 1 : // (returning newFileLoaded) and disable TrySeekUsingNext before
570 1 : // performing a seek in the file.
571 1 : return noFileLoaded
572 1 : }
573 :
574 1 : iterKinds := iterPointKeys
575 1 : if l.rangeDelIterSetter != nil {
576 1 : iterKinds |= iterRangeDeletions
577 1 : }
578 :
579 1 : var iters iterSet
580 1 : iters, l.err = l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts, iterKinds)
581 1 : if l.err != nil {
582 1 : if l.rangeDelIterSetter != nil {
583 1 : l.rangeDelIterSetter.setRangeDelIter(nil)
584 1 : }
585 1 : return noFileLoaded
586 : }
587 1 : l.iter = iters.Point()
588 1 : if l.rangeDelIterSetter != nil && iters.rangeDeletion != nil {
589 1 : // If this file has range deletions, interleave the bounds of the
590 1 : // range deletions among the point keys. When used with a
591 1 : // mergingIter, this ensures we don't move beyond a file with range
592 1 : // deletions until its range deletions are no longer relevant.
593 1 : //
594 1 : // For now, we open a second range deletion iterator. Future work
595 1 : // will avoid the need to open a second range deletion iterator, and
596 1 : // avoid surfacing the file's range deletion iterator via rangeDelIterFn.
597 1 : itersForBounds, err := l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts, iterRangeDeletions)
598 1 : if err != nil {
599 0 : l.iter = nil
600 0 : l.err = errors.CombineErrors(err, iters.CloseAll())
601 0 : return noFileLoaded
602 0 : }
603 1 : l.interleaving.Init(l.comparer, l.iter, itersForBounds.RangeDeletion(), keyspan.InterleavingIterOpts{
604 1 : LowerBound: l.tableOpts.LowerBound,
605 1 : UpperBound: l.tableOpts.UpperBound,
606 1 : InterleaveEndKeys: true,
607 1 : })
608 1 : l.iter = &l.interleaving
609 1 :
610 1 : // Relinquish iters.rangeDeletion to the caller.
611 1 : l.rangeDelIterSetter.setRangeDelIter(iters.rangeDeletion)
612 : }
613 1 : return newFileLoaded
614 : }
615 : }
616 :
617 : // In race builds we verify that the keys returned by levelIter lie within
618 : // [lower,upper).
619 1 : func (l *levelIter) verify(kv *base.InternalKV) *base.InternalKV {
620 1 : // Note that invariants.Enabled is a compile time constant, which means the
621 1 : // block of code will be compiled out of normal builds making this method
622 1 : // eligible for inlining. Do not change this to use a variable.
623 1 : if invariants.Enabled && !l.disableInvariants && kv != nil {
624 1 : // We allow returning a boundary key that is outside of the lower/upper
625 1 : // bounds as such keys are always range tombstones which will be skipped
626 1 : // by the Iterator.
627 1 : if l.lower != nil && kv != nil && !kv.K.IsExclusiveSentinel() && l.cmp(kv.K.UserKey, l.lower) < 0 {
628 0 : l.logger.Fatalf("levelIter %s: lower bound violation: %s < %s\n%s", l.layer, kv, l.lower, debug.Stack())
629 0 : }
630 1 : if l.upper != nil && kv != nil && !kv.K.IsExclusiveSentinel() && l.cmp(kv.K.UserKey, l.upper) > 0 {
631 0 : l.logger.Fatalf("levelIter %s: upper bound violation: %s > %s\n%s", l.layer, kv, l.upper, debug.Stack())
632 0 : }
633 : }
634 1 : return kv
635 : }
636 :
637 1 : func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKV {
638 1 : if invariants.Enabled && l.lower != nil && l.cmp(key, l.lower) < 0 {
639 0 : panic(errors.AssertionFailedf("levelIter SeekGE to key %q violates lower bound %q", key, l.lower))
640 : }
641 :
642 1 : l.err = nil // clear cached iteration error
643 1 : l.exhaustedDir = 0
644 1 : l.prefix = nil
645 1 : // NB: the top-level Iterator has already adjusted key based on
646 1 : // IterOptions.LowerBound.
647 1 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
648 1 : if loadFileIndicator == noFileLoaded {
649 1 : l.exhaustedForward()
650 1 : return nil
651 1 : }
652 1 : if loadFileIndicator == newFileLoaded {
653 1 : // File changed, so l.iter has changed, and that iterator is not
654 1 : // positioned appropriately.
655 1 : flags = flags.DisableTrySeekUsingNext()
656 1 : }
657 1 : if kv := l.iter.SeekGE(key, flags); kv != nil {
658 1 : return l.verify(kv)
659 1 : }
660 1 : return l.verify(l.skipEmptyFileForward())
661 : }
662 :
663 1 : func (l *levelIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *base.InternalKV {
664 1 : if invariants.Enabled && l.lower != nil && l.cmp(key, l.lower) < 0 {
665 0 : panic(errors.AssertionFailedf("levelIter SeekGE to key %q violates lower bound %q", key, l.lower))
666 : }
667 1 : l.err = nil // clear cached iteration error
668 1 : l.exhaustedDir = 0
669 1 : l.prefix = prefix
670 1 :
671 1 : // NB: the top-level Iterator has already adjusted key based on
672 1 : // IterOptions.LowerBound.
673 1 : loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1)
674 1 : if loadFileIndicator == noFileLoaded {
675 1 : l.exhaustedForward()
676 1 : return nil
677 1 : }
678 1 : if loadFileIndicator == newFileLoaded {
679 1 : // File changed, so l.iter has changed, and that iterator is not
680 1 : // positioned appropriately.
681 1 : flags = flags.DisableTrySeekUsingNext()
682 1 : }
683 1 : if kv := l.iter.SeekPrefixGE(prefix, key, flags); kv != nil {
684 1 : return l.verify(kv)
685 1 : }
686 1 : if err := l.iter.Error(); err != nil {
687 1 : return nil
688 1 : }
689 1 : return l.verify(l.skipEmptyFileForward())
690 : }
691 :
692 1 : func (l *levelIter) SeekLT(key []byte, flags base.SeekLTFlags) *base.InternalKV {
693 1 : if invariants.Enabled && l.upper != nil && l.cmp(key, l.upper) > 0 {
694 0 : panic(errors.AssertionFailedf("levelIter SeekLT to key %q violates upper bound %q", key, l.upper))
695 : }
696 :
697 1 : l.err = nil // clear cached iteration error
698 1 : l.exhaustedDir = 0
699 1 : l.prefix = nil
700 1 :
701 1 : // NB: the top-level Iterator has already adjusted key based on
702 1 : // IterOptions.UpperBound.
703 1 : if l.loadFile(l.findFileLT(key, flags), -1) == noFileLoaded {
704 1 : l.exhaustedBackward()
705 1 : return nil
706 1 : }
707 1 : if kv := l.iter.SeekLT(key, flags); kv != nil {
708 1 : return l.verify(kv)
709 1 : }
710 1 : return l.verify(l.skipEmptyFileBackward())
711 : }
712 :
713 1 : func (l *levelIter) First() *base.InternalKV {
714 1 : if invariants.Enabled && l.lower != nil {
715 0 : panic(errors.AssertionFailedf("levelIter First called while lower bound %q is set", l.lower))
716 : }
717 :
718 1 : l.err = nil // clear cached iteration error
719 1 : l.exhaustedDir = 0
720 1 : l.prefix = nil
721 1 :
722 1 : // NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is
723 1 : // set.
724 1 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
725 1 : l.exhaustedForward()
726 1 : return nil
727 1 : }
728 1 : if kv := l.iter.First(); kv != nil {
729 1 : return l.verify(kv)
730 1 : }
731 1 : return l.verify(l.skipEmptyFileForward())
732 : }
733 :
734 1 : func (l *levelIter) Last() *base.InternalKV {
735 1 : if invariants.Enabled && l.upper != nil {
736 0 : panic(errors.AssertionFailedf("levelIter Last called while upper bound %q is set", l.upper))
737 : }
738 :
739 1 : l.err = nil // clear cached iteration error
740 1 : l.exhaustedDir = 0
741 1 : l.prefix = nil
742 1 :
743 1 : // NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is
744 1 : // set.
745 1 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
746 1 : l.exhaustedBackward()
747 1 : return nil
748 1 : }
749 1 : if kv := l.iter.Last(); kv != nil {
750 1 : return l.verify(kv)
751 1 : }
752 1 : return l.verify(l.skipEmptyFileBackward())
753 : }
754 :
755 1 : func (l *levelIter) Next() *base.InternalKV {
756 1 : if l.exhaustedDir == -1 {
757 1 : if l.lower != nil {
758 1 : return l.SeekGE(l.lower, base.SeekGEFlagsNone)
759 1 : }
760 1 : return l.First()
761 : }
762 1 : if l.err != nil || l.iter == nil {
763 1 : return nil
764 1 : }
765 1 : if kv := l.iter.Next(); kv != nil {
766 1 : return l.verify(kv)
767 1 : }
768 1 : return l.verify(l.skipEmptyFileForward())
769 : }
770 :
771 1 : func (l *levelIter) NextPrefix(succKey []byte) *base.InternalKV {
772 1 : if l.err != nil || l.iter == nil {
773 0 : return nil
774 0 : }
775 :
776 1 : if kv := l.iter.NextPrefix(succKey); kv != nil {
777 1 : return l.verify(kv)
778 1 : }
779 1 : if l.iter.Error() != nil {
780 1 : return nil
781 1 : }
782 1 : if l.tableOpts.UpperBound != nil {
783 0 : // The UpperBound was within this file, so don't load the next file.
784 0 : l.exhaustedForward()
785 0 : return nil
786 0 : }
787 :
788 : // Seek the manifest level iterator using TrySeekUsingNext=true and
789 : // RelativeSeek=true so that we take advantage of the knowledge that
790 : // `succKey` can only be contained in later files.
791 1 : metadataSeekFlags := base.SeekGEFlagsNone.EnableTrySeekUsingNext().EnableRelativeSeek()
792 1 : if l.loadFile(l.findFileGE(succKey, metadataSeekFlags), +1) != noFileLoaded {
793 1 : // NB: The SeekGE on the file's iterator must not set TrySeekUsingNext,
794 1 : // because l.iter is unpositioned.
795 1 : if kv := l.iter.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
796 1 : return l.verify(kv)
797 1 : }
798 0 : return l.verify(l.skipEmptyFileForward())
799 : }
800 1 : l.exhaustedForward()
801 1 : return nil
802 : }
803 :
804 1 : func (l *levelIter) Prev() *base.InternalKV {
805 1 : if l.exhaustedDir == +1 {
806 1 : if l.upper != nil {
807 1 : return l.SeekLT(l.upper, base.SeekLTFlagsNone)
808 1 : }
809 1 : return l.Last()
810 : }
811 1 : if l.err != nil || l.iter == nil {
812 1 : return nil
813 1 : }
814 1 : if kv := l.iter.Prev(); kv != nil {
815 1 : return l.verify(kv)
816 1 : }
817 1 : return l.verify(l.skipEmptyFileBackward())
818 : }
819 :
820 1 : func (l *levelIter) skipEmptyFileForward() *base.InternalKV {
821 1 : var kv *base.InternalKV
822 1 : // The first iteration of this loop starts with an already exhausted l.iter.
823 1 : // The reason for the exhaustion is either that we iterated to the end of
824 1 : // the sstable, or our iteration was terminated early due to the presence of
825 1 : // an upper-bound or the use of SeekPrefixGE.
826 1 : //
827 1 : // Subsequent iterations will examine consecutive files such that the first
828 1 : // file that does not have an exhausted iterator causes the code to return
829 1 : // that key.
830 1 : for ; kv == nil; kv = l.iter.First() {
831 1 : if l.iter.Error() != nil {
832 1 : return nil
833 1 : }
834 : // If an upper bound is present and the upper bound lies within the
835 : // current sstable, then we will have reached the upper bound rather
836 : // than the end of the sstable.
837 1 : if l.tableOpts.UpperBound != nil {
838 1 : l.exhaustedForward()
839 1 : return nil
840 1 : }
841 :
842 : // If the iterator is in prefix iteration mode, it's possible that we
843 : // are here because bloom filter matching failed. In that case it is
844 : // likely that all keys matching the prefix are wholly within the
845 : // current file and cannot be in a subsequent file. In that case we
846 : // don't want to go to the next file, since loading and seeking in there
847 : // has some cost.
848 : //
849 : // This is not just an optimization. We must not advance to the next
850 : // file if the current file might possibly contain keys relevant to any
851 : // prefix greater than our current iteration prefix. If we did, a
852 : // subsequent SeekPrefixGE with TrySeekUsingNext could mistakenly skip
853 : // the file's relevant keys.
854 1 : if l.prefix != nil {
855 1 : if l.cmp(l.split.Prefix(l.iterFile.LargestPointKey.UserKey), l.prefix) > 0 {
856 1 : l.exhaustedForward()
857 1 : return nil
858 1 : }
859 : }
860 :
861 : // Current file was exhausted. Move to the next file.
862 1 : if l.loadFile(l.files.Next(), +1) == noFileLoaded {
863 1 : l.exhaustedForward()
864 1 : return nil
865 1 : }
866 : }
867 1 : return kv
868 : }
869 :
870 1 : func (l *levelIter) skipEmptyFileBackward() *base.InternalKV {
871 1 : var kv *base.InternalKV
872 1 : // The first iteration of this loop starts with an already exhausted
873 1 : // l.iter. The reason for the exhaustion is either that we iterated to the
874 1 : // end of the sstable, or our iteration was terminated early due to the
875 1 : // presence of a lower-bound.
876 1 : //
877 1 : // Subsequent iterations will examine consecutive files such that the first
878 1 : // file that does not have an exhausted iterator causes the code to return
879 1 : // that key.
880 1 : for ; kv == nil; kv = l.iter.Last() {
881 1 : if l.iter.Error() != nil {
882 1 : return nil
883 1 : }
884 : // If a lower bound is present and the lower bound lies within the
885 : // current sstable, then we will have reached the lowerr bound rather
886 : // than the end of the sstable.
887 1 : if l.tableOpts.LowerBound != nil {
888 1 : l.exhaustedBackward()
889 1 : return nil
890 1 : }
891 : // Current file was exhausted. Move to the previous file.
892 1 : if l.loadFile(l.files.Prev(), -1) == noFileLoaded {
893 1 : l.exhaustedBackward()
894 1 : return nil
895 1 : }
896 : }
897 1 : return kv
898 : }
899 :
900 1 : func (l *levelIter) exhaustedForward() {
901 1 : l.exhaustedDir = +1
902 1 : }
903 :
904 1 : func (l *levelIter) exhaustedBackward() {
905 1 : l.exhaustedDir = -1
906 1 : }
907 :
908 1 : func (l *levelIter) Error() error {
909 1 : if l.err != nil || l.iter == nil {
910 1 : return l.err
911 1 : }
912 1 : return l.iter.Error()
913 : }
914 :
915 1 : func (l *levelIter) Close() error {
916 1 : if l.iter != nil {
917 1 : l.err = l.iter.Close()
918 1 : l.iter = nil
919 1 : }
920 1 : if l.rangeDelIterSetter != nil {
921 1 : l.rangeDelIterSetter.setRangeDelIter(nil)
922 1 : }
923 1 : return l.err
924 : }
925 :
926 1 : func (l *levelIter) SetBounds(lower, upper []byte) {
927 1 : l.lower = lower
928 1 : l.upper = upper
929 1 :
930 1 : if l.iter == nil {
931 1 : return
932 1 : }
933 :
934 : // Update tableOpts.{Lower,Upper}Bound in case the new boundaries fall within
935 : // the boundaries of the current table.
936 1 : if l.initTableBounds(l.iterFile) != 0 {
937 1 : // The table does not overlap the bounds. Close() will set levelIter.err if
938 1 : // an error occurs.
939 1 : _ = l.Close()
940 1 : return
941 1 : }
942 :
943 1 : l.iter.SetBounds(l.tableOpts.LowerBound, l.tableOpts.UpperBound)
944 : }
945 :
946 1 : func (l *levelIter) SetContext(ctx context.Context) {
947 1 : l.ctx = ctx
948 1 : if l.iter != nil {
949 0 : // TODO(sumeer): this is losing the ctx = objiotracing.WithLevel(ctx,
950 0 : // manifest.LevelToInt(opts.level)) that happens in table_cache.go.
951 0 : l.iter.SetContext(ctx)
952 0 : }
953 : }
954 :
955 : // DebugTree is part of the InternalIterator interface.
956 0 : func (l *levelIter) DebugTree(tp treeprinter.Node) {
957 0 : n := tp.Childf("%T(%p) %s", l, l, l.String())
958 0 : if l.iter != nil {
959 0 : l.iter.DebugTree(n)
960 0 : }
961 : }
962 :
963 1 : func (l *levelIter) String() string {
964 1 : if l.iterFile != nil {
965 1 : return fmt.Sprintf("%s: fileNum=%s", l.layer, l.iterFile.FileNum.String())
966 1 : }
967 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.layer)
968 : }
969 :
970 : var _ internalIterator = &levelIter{}
|