Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "sync"
12 :
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/invariants"
16 : "github.com/cockroachdb/pebble/internal/treeprinter"
17 : "github.com/cockroachdb/pebble/objstorage"
18 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
19 : "github.com/cockroachdb/pebble/sstable/block"
20 : "github.com/cockroachdb/pebble/sstable/valblk"
21 : )
22 :
23 : type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterator[D]] struct {
24 : secondLevel singleLevelIterator[I, PI, D, PD]
25 : topLevelIndex I
26 : // pool is the pool from which the iterator was allocated and to which the
27 : // iterator should be returned on Close. Because the iterator is
28 : // parameterized by the type of the data block iterator, pools must be
29 : // specific to the type of the data block iterator.
30 : pool *sync.Pool
31 :
32 : // useFilterBlock controls whether we consult the bloom filter in the
33 : // twoLevelIterator code. Note that secondLevel.useFilterBlock is always
34 : // false - any filtering happens at the top level.
35 : useFilterBlock bool
36 : lastBloomFilterMatched bool
37 : }
38 :
39 : var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
40 :
41 : // loadSecondLevelIndexBlock loads the index block at the current top level
42 : // index position and leaves i.index unpositioned. If unsuccessful, it gets
43 : // i.secondLevel.err to any error encountered, which may be nil if we have
44 : // simply exhausted the entire table. This is used for two level indexes.
45 2 : func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loadBlockResult {
46 2 : // Ensure the index data block iterators are invalidated even if loading of
47 2 : // the index fails.
48 2 : PD(&i.secondLevel.data).Invalidate()
49 2 : PI(&i.secondLevel.index).Invalidate()
50 2 : if !PI(&i.topLevelIndex).Valid() {
51 0 : return loadBlockFailed
52 0 : }
53 2 : bhp, err := PI(&i.topLevelIndex).BlockHandleWithProperties()
54 2 : if err != nil {
55 0 : i.secondLevel.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry (%v)", err)
56 0 : return loadBlockFailed
57 0 : }
58 2 : if i.secondLevel.bpfs != nil {
59 2 : intersects, err := i.secondLevel.bpfs.intersects(bhp.Props)
60 2 : if err != nil {
61 0 : i.secondLevel.err = errCorruptIndexEntry(err)
62 0 : return loadBlockFailed
63 0 : }
64 2 : if intersects == blockMaybeExcluded {
65 1 : intersects = i.resolveMaybeExcluded(dir)
66 1 : }
67 2 : if intersects == blockExcluded {
68 2 : return loadBlockIrrelevant
69 2 : }
70 : // blockIntersects
71 : }
72 2 : indexBlock, err := i.secondLevel.reader.readIndexBlock(i.secondLevel.ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH, bhp.Handle)
73 2 : if err != nil {
74 1 : i.secondLevel.err = err
75 1 : return loadBlockFailed
76 1 : }
77 2 : err = PI(&i.secondLevel.index).InitHandle(i.secondLevel.reader.Comparer, indexBlock, i.secondLevel.transforms)
78 2 : if err != nil {
79 0 : PI(&i.secondLevel.index).Invalidate()
80 0 : i.secondLevel.err = err
81 0 : return loadBlockFailed
82 0 : }
83 2 : return loadBlockOK
84 : }
85 :
86 : // resolveMaybeExcluded is invoked when the block-property filterer has found
87 : // that an index block is excluded according to its properties but only if its
88 : // bounds fall within the filter's current bounds. This function consults the
89 : // appropriate bound, depending on the iteration direction, and returns either
90 : // `blockIntersects` or `blockExcluded`.
91 1 : func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
92 1 : // This iterator is configured with a bound-limited block property filter.
93 1 : // The bpf determined this entire index block could be excluded from
94 1 : // iteration based on the property encoded in the block handle. However, we
95 1 : // still need to determine if the index block is wholly contained within the
96 1 : // filter's key bounds.
97 1 : //
98 1 : // External guarantees ensure all its data blocks' keys are ≥ the filter's
99 1 : // lower bound during forward iteration, and that all its data blocks' keys
100 1 : // are < the filter's upper bound during backward iteration. We only need to
101 1 : // determine if the opposite bound is also met.
102 1 : //
103 1 : // The index separator in topLevelIndex.Separator() provides an inclusive
104 1 : // upper-bound for the index block's keys, guaranteeing that all its keys
105 1 : // are ≤ topLevelIndex.Separator(). For forward iteration, this is all we
106 1 : // need.
107 1 : if dir > 0 {
108 1 : // Forward iteration.
109 1 : if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(PI(&i.topLevelIndex).Separator()) {
110 1 : return blockExcluded
111 1 : }
112 1 : return blockIntersects
113 : }
114 :
115 : // Reverse iteration.
116 : //
117 : // Because we're iterating in the reverse direction, we don't yet have
118 : // enough context available to determine if the block is wholly contained
119 : // within its bounds. This case arises only during backward iteration,
120 : // because of the way the index is structured.
121 : //
122 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
123 : // block with separator `c`. During reverse iteration, the guarantee that
124 : // all the block's keys are < `d` is externally provided, but no guarantee
125 : // is made on the bpf's lower bound. The separator `c` only provides an
126 : // inclusive upper bound on the block's keys, indicating that the
127 : // corresponding block handle points to a block containing only keys ≤ `c`.
128 : //
129 : // To establish a lower bound, we step the top-level index backwards to read
130 : // the previous block's separator, which provides an inclusive lower bound
131 : // on the original index block's keys. Afterwards, we step forward to
132 : // restore our top-level index position.
133 1 : if !PI(&i.topLevelIndex).Prev() {
134 1 : // The original block points to the first index block of this table. If
135 1 : // we knew the lower bound for the entire table, it could provide a
136 1 : // lower bound, but the code refactoring necessary to read it doesn't
137 1 : // seem worth the payoff. We fall through to loading the block.
138 1 : } else if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(PI(&i.topLevelIndex).Separator()) {
139 1 : // The lower-bound on the original index block falls within the filter's
140 1 : // bounds, and we can skip the block (after restoring our current
141 1 : // top-level index position).
142 1 : _ = PI(&i.topLevelIndex).Next()
143 1 : return blockExcluded
144 1 : }
145 1 : _ = PI(&i.topLevelIndex).Next()
146 1 : return blockIntersects
147 : }
148 :
149 : // newColumnBlockTwoLevelIterator reads the top-level index block and creates and
150 : // initializes a two-level iterator over an sstable with column-oriented data
151 : // blocks.
152 : //
153 : // Note that lower, upper are iterator bounds and are separate from virtual
154 : // sstable bounds. If the virtualState passed in is not nil, then virtual
155 : // sstable bounds will be enforced.
156 : func newColumnBlockTwoLevelIterator(
157 : ctx context.Context,
158 : r *Reader,
159 : v *virtualState,
160 : transforms IterTransforms,
161 : lower, upper []byte,
162 : filterer *BlockPropertiesFilterer,
163 : filterBlockSizeLimit FilterBlockSizeLimit,
164 : stats *base.InternalIteratorStats,
165 : statsAccum IterStatsAccumulator,
166 : rp valblk.ReaderProvider,
167 : bufferPool *block.BufferPool,
168 2 : ) (*twoLevelIteratorColumnBlocks, error) {
169 2 : if r.err != nil {
170 0 : return nil, r.err
171 0 : }
172 2 : if !r.tableFormat.BlockColumnar() {
173 0 : panic(errors.AssertionFailedf("table format %d should not use columnar block format", r.tableFormat))
174 : }
175 2 : i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
176 2 : i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
177 2 : false, // Disable the use of the filter block in the second level.
178 2 : stats, statsAccum, bufferPool)
179 2 : var getLazyValuer block.GetLazyValueForPrefixAndValueHandler
180 2 : if r.Properties.NumValueBlocks > 0 {
181 2 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
182 2 : // can outlive the singleLevelIterator due to be being embedded in a
183 2 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
184 2 : // should only optimize this if it shows up as significant in end-to-end
185 2 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
186 2 : // is that if many sstable iterators only get positioned at latest
187 2 : // versions of keys, and therefore never expose a LazyValue that is
188 2 : // separated to their callers, they can put this valueBlockReader into a
189 2 : // sync.Pool.
190 2 : i.secondLevel.vbReader = valblk.MakeReader(&i.secondLevel, rp, r.valueBIH, stats)
191 2 : getLazyValuer = &i.secondLevel.vbReader
192 2 : i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
193 2 : }
194 2 : i.secondLevel.data.InitOnce(r.keySchema, r.Comparer, getLazyValuer)
195 2 : i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
196 2 : topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH)
197 2 : if err == nil {
198 2 : err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, transforms)
199 2 : }
200 2 : if err != nil {
201 0 : _ = i.Close()
202 0 : return nil, err
203 0 : }
204 2 : return i, nil
205 : }
206 :
207 : // newRowBlockTwoLevelIterator reads the top-level index block and creates and
208 : // initializes a two-level iterator over an sstable with row-oriented data
209 : // blocks.
210 : //
211 : // Note that lower, upper are iterator bounds and are separate from virtual
212 : // sstable bounds. If the virtualState passed in is not nil, then virtual
213 : // sstable bounds will be enforced.
214 : func newRowBlockTwoLevelIterator(
215 : ctx context.Context,
216 : r *Reader,
217 : v *virtualState,
218 : transforms IterTransforms,
219 : lower, upper []byte,
220 : filterer *BlockPropertiesFilterer,
221 : filterBlockSizeLimit FilterBlockSizeLimit,
222 : stats *base.InternalIteratorStats,
223 : statsAccum IterStatsAccumulator,
224 : rp valblk.ReaderProvider,
225 : bufferPool *block.BufferPool,
226 2 : ) (*twoLevelIteratorRowBlocks, error) {
227 2 : if r.err != nil {
228 0 : return nil, r.err
229 0 : }
230 2 : if r.tableFormat.BlockColumnar() {
231 0 : panic(errors.AssertionFailedf("table format %s uses block columnar format", r.tableFormat))
232 : }
233 2 : i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
234 2 : i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
235 2 : false, // Disable the use of the filter block in the second level.
236 2 : stats, statsAccum, bufferPool)
237 2 : if r.tableFormat >= TableFormatPebblev3 {
238 2 : if r.Properties.NumValueBlocks > 0 {
239 2 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
240 2 : // can outlive the singleLevelIterator due to be being embedded in a
241 2 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
242 2 : // should only optimize this if it shows up as significant in end-to-end
243 2 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
244 2 : // is that if many sstable iterators only get positioned at latest
245 2 : // versions of keys, and therefore never expose a LazyValue that is
246 2 : // separated to their callers, they can put this valueBlockReader into a
247 2 : // sync.Pool.
248 2 : i.secondLevel.vbReader = valblk.MakeReader(&i.secondLevel, rp, r.valueBIH, stats)
249 2 : i.secondLevel.data.SetGetLazyValuer(&i.secondLevel.vbReader)
250 2 : i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
251 2 : }
252 2 : i.secondLevel.data.SetHasValuePrefix(true)
253 : }
254 :
255 2 : i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
256 2 :
257 2 : topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH)
258 2 : if err == nil {
259 2 : err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, transforms)
260 2 : }
261 2 : if err != nil {
262 1 : _ = i.Close()
263 1 : return nil, err
264 1 : }
265 2 : return i, nil
266 : }
267 :
268 0 : func (i *twoLevelIterator[I, PI, D, PD]) String() string {
269 0 : return i.secondLevel.String()
270 0 : }
271 :
272 : // DebugTree is part of the InternalIterator interface.
273 0 : func (i *twoLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
274 0 : tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
275 0 : }
276 :
277 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
278 : // package. Note that SeekGE only checks the upper bound. It is up to the
279 : // caller to ensure that key is greater than or equal to the lower bound.
280 : func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
281 : key []byte, flags base.SeekGEFlags,
282 2 : ) *base.InternalKV {
283 2 : if i.secondLevel.vState != nil {
284 2 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
285 2 : // have to internally restrict the bounds.
286 2 : //
287 2 : // TODO(bananabrick): We can optimize away this check for the level iter
288 2 : // if necessary.
289 2 : if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
290 2 : key = i.secondLevel.lower
291 2 : }
292 : }
293 :
294 2 : err := i.secondLevel.err
295 2 : i.secondLevel.err = nil // clear cached iteration error
296 2 :
297 2 : // The twoLevelIterator could be already exhausted. Utilize that when
298 2 : // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
299 2 : // bounds-exhausted near the top of the file.
300 2 : if flags.TrySeekUsingNext() &&
301 2 : (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
302 2 : err == nil {
303 2 : // Already exhausted, so return nil.
304 2 : return nil
305 2 : }
306 :
307 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
308 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
309 :
310 : // We fall into the slow path if i.index.IsDataInvalidated() even if the
311 : // top-level iterator is already positioned correctly and all other
312 : // conditions are met. An alternative structure could reuse topLevelIndex's
313 : // current position and reload the index block to which it points. Arguably,
314 : // an index block load is expensive and the index block may still be earlier
315 : // than the index block containing the sought key, resulting in a wasteful
316 : // block load.
317 :
318 2 : var dontSeekWithinSingleLevelIter bool
319 2 : if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
320 2 : (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || PI(&i.topLevelIndex).SeparatorLT(key) {
321 2 : // Slow-path: need to position the topLevelIndex.
322 2 :
323 2 : // The previous exhausted state of singleLevelIterator is no longer
324 2 : // relevant, since we may be moving to a different index block.
325 2 : i.secondLevel.exhaustedBounds = 0
326 2 : flags = flags.DisableTrySeekUsingNext()
327 2 : if !PI(&i.topLevelIndex).SeekGE(key) {
328 2 : PD(&i.secondLevel.data).Invalidate()
329 2 : PI(&i.secondLevel.index).Invalidate()
330 2 : return nil
331 2 : }
332 :
333 2 : result := i.loadSecondLevelIndexBlock(+1)
334 2 : if result == loadBlockFailed {
335 1 : i.secondLevel.boundsCmp = 0
336 1 : return nil
337 1 : }
338 2 : if result == loadBlockIrrelevant {
339 2 : // Enforce the upper bound here since don't want to bother moving to
340 2 : // the next entry in the top level index if upper bound is already
341 2 : // exceeded. Note that the next entry starts with keys >=
342 2 : // topLevelIndex.Separator() since even though this is the block
343 2 : // separator, the same user key can span multiple index blocks. If
344 2 : // upper is exclusive we pass orEqual=true below, else we require
345 2 : // the separator to be strictly greater than upper.
346 2 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
347 2 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
348 2 : i.secondLevel.exhaustedBounds = +1
349 2 : }
350 : // Fall through to skipForward.
351 2 : dontSeekWithinSingleLevelIter = true
352 2 : // Clear boundsCmp.
353 2 : //
354 2 : // In the typical cases where dontSeekWithinSingleLevelIter=false,
355 2 : // the singleLevelIterator.SeekGE call will clear boundsCmp.
356 2 : // However, in this case where dontSeekWithinSingleLevelIter=true,
357 2 : // we never seek on the single-level iterator. This call will fall
358 2 : // through to skipForward, which may improperly leave boundsCmp=+1
359 2 : // unless we clear it here.
360 2 : i.secondLevel.boundsCmp = 0
361 : }
362 2 : } else {
363 2 : // INVARIANT: err == nil.
364 2 : //
365 2 : // Else fast-path: There are two possible cases, from
366 2 : // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
367 2 : //
368 2 : // 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
369 2 : // respecting the lower bound (guaranteed by Iterator). We know that the
370 2 : // iterator must already be positioned within or just outside the previous
371 2 : // bounds. Therefore, the topLevelIndex iter cannot be positioned at an
372 2 : // entry ahead of the seek position (though it can be positioned behind).
373 2 : // The !i.cmp(key, i.topLevelIndex.Separator) > 0 confirms that it is
374 2 : // not behind. Since it is not ahead and not behind it must be at the
375 2 : // right position.
376 2 : //
377 2 : // 2) This SeekGE will land on a key that is greater than the key we are
378 2 : // currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
379 2 : // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
380 2 : // index block. No need to reset the state of singleLevelIterator.
381 2 : //
382 2 : // Note that cases 1 and 2 never overlap, and one of them must be true.
383 2 : // This invariant checking is important enough that we do not gate it
384 2 : // behind invariants.Enabled.
385 2 : if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
386 0 : panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
387 0 : i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
388 : }
389 :
390 2 : if !flags.TrySeekUsingNext() {
391 2 : // Case 1. Bounds have changed so the previous exhausted bounds state is
392 2 : // irrelevant.
393 2 : // WARNING-data-exhausted: this is safe to do only because the monotonic
394 2 : // bounds optimizations only work when !data-exhausted. If they also
395 2 : // worked with data-exhausted, we have made it unclear whether
396 2 : // data-exhausted is actually true. See the comment at the top of the
397 2 : // file.
398 2 : i.secondLevel.exhaustedBounds = 0
399 2 : }
400 : // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
401 : // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
402 : // bug https://github.com/cockroachdb/pebble/issues/2036.
403 : }
404 :
405 2 : if !dontSeekWithinSingleLevelIter {
406 2 : // Note that while trySeekUsingNext could be false here, singleLevelIterator
407 2 : // could do its own boundsCmp-based optimization to seek using next.
408 2 : if ikv := i.secondLevel.SeekGE(key, flags); ikv != nil {
409 2 : return ikv
410 2 : }
411 : }
412 2 : return i.skipForward()
413 : }
414 :
415 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
416 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
417 : // to the caller to ensure that key is greater than or equal to the lower bound.
418 : func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
419 : prefix, key []byte, flags base.SeekGEFlags,
420 2 : ) *base.InternalKV {
421 2 : if i.secondLevel.vState != nil {
422 2 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
423 2 : // have to internally restrict the bounds.
424 2 : //
425 2 : // TODO(bananabrick): We can optimize away this check for the level iter
426 2 : // if necessary.
427 2 : if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
428 2 : key = i.secondLevel.lower
429 2 : }
430 : }
431 :
432 : // NOTE: prefix is only used for bloom filter checking and not later work in
433 : // this method. Hence, we can use the existing iterator position if the last
434 : // SeekPrefixGE did not fail bloom filter matching.
435 :
436 2 : err := i.secondLevel.err
437 2 : i.secondLevel.err = nil // clear cached iteration error
438 2 :
439 2 : // The twoLevelIterator could be already exhausted. Utilize that when
440 2 : // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
441 2 : // bounds-exhausted near the top of the file.
442 2 : filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
443 2 : if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
444 2 : (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
445 2 : err == nil {
446 1 : // Already exhausted, so return nil.
447 1 : return nil
448 1 : }
449 :
450 : // Check prefix bloom filter.
451 2 : if i.useFilterBlock {
452 2 : if !i.lastBloomFilterMatched {
453 2 : // Iterator is not positioned based on last seek.
454 2 : flags = flags.DisableTrySeekUsingNext()
455 2 : }
456 2 : i.lastBloomFilterMatched = false
457 2 : var mayContain bool
458 2 : mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
459 2 : if i.secondLevel.err != nil || !mayContain {
460 2 : // In the i.secondLevel.err == nil case, this invalidation may not be necessary for
461 2 : // correctness, and may be a place to optimize later by reusing the
462 2 : // already loaded block. It was necessary in earlier versions of the code
463 2 : // since the caller was allowed to call Next when SeekPrefixGE returned
464 2 : // nil. This is no longer allowed.
465 2 : PD(&i.secondLevel.data).Invalidate()
466 2 : return nil
467 2 : }
468 2 : i.lastBloomFilterMatched = true
469 : }
470 :
471 : // Bloom filter matches.
472 :
473 : // SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
474 : // enabled by trySeekUsingNext, or by monotonically increasing bounds
475 : // (i.boundsCmp).
476 :
477 : // We fall into the slow path if i.index.IsDataInvalidated() even if the
478 : // top-level iterator is already positioned correctly and all other
479 : // conditions are met. An alternative structure could reuse topLevelIndex's
480 : // current position and reload the index block to which it points. Arguably,
481 : // an index block load is expensive and the index block may still be earlier
482 : // than the index block containing the sought key, resulting in a wasteful
483 : // block load.
484 :
485 2 : var dontSeekWithinSingleLevelIter bool
486 2 : if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
487 2 : (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || PI(&i.topLevelIndex).SeparatorLT(key) {
488 2 : // Slow-path: need to position the topLevelIndex.
489 2 :
490 2 : // The previous exhausted state of singleLevelIterator is no longer
491 2 : // relevant, since we may be moving to a different index block.
492 2 : i.secondLevel.exhaustedBounds = 0
493 2 : flags = flags.DisableTrySeekUsingNext()
494 2 : if !PI(&i.topLevelIndex).SeekGE(key) {
495 2 : PD(&i.secondLevel.data).Invalidate()
496 2 : PI(&i.secondLevel.index).Invalidate()
497 2 : return nil
498 2 : }
499 :
500 2 : result := i.loadSecondLevelIndexBlock(+1)
501 2 : if result == loadBlockFailed {
502 1 : i.secondLevel.boundsCmp = 0
503 1 : return nil
504 1 : }
505 2 : if result == loadBlockIrrelevant {
506 2 : // Enforce the upper bound here since don't want to bother moving to
507 2 : // the next entry in the top level index if upper bound is already
508 2 : // exceeded. Note that the next entry starts with keys >=
509 2 : // topLevelIndex.Separator() since even though this is the block
510 2 : // separator, the same user key can span multiple index blocks. If
511 2 : // upper is exclusive we pass orEqual=true below, else we require
512 2 : // the separator to be strictly greater than upper.
513 2 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
514 2 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
515 2 : i.secondLevel.exhaustedBounds = +1
516 2 : }
517 : // Fall through to skipForward.
518 2 : dontSeekWithinSingleLevelIter = true
519 2 : // Clear boundsCmp.
520 2 : //
521 2 : // In the typical cases where dontSeekWithinSingleLevelIter=false,
522 2 : // the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
523 2 : // However, in this case where dontSeekWithinSingleLevelIter=true,
524 2 : // we never seek on the single-level iterator. This call will fall
525 2 : // through to skipForward, which may improperly leave boundsCmp=+1
526 2 : // unless we clear it here.
527 2 : i.secondLevel.boundsCmp = 0
528 : }
529 2 : } else {
530 2 : // INVARIANT: err == nil.
531 2 : //
532 2 : // Else fast-path: There are two possible cases, from
533 2 : // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
534 2 : //
535 2 : // 1) The bounds have moved forward (i.boundsCmp > 0) and this
536 2 : // SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
537 2 : // know that the iterator must already be positioned within or just
538 2 : // outside the previous bounds. Therefore, the topLevelIndex iter cannot
539 2 : // be positioned at an entry ahead of the seek position (though it can be
540 2 : // positioned behind). The !i.cmp(key, i.topLevelIndex.Separator()) > 0
541 2 : // confirms that it is not behind. Since it is not ahead and not behind it
542 2 : // must be at the right position.
543 2 : //
544 2 : // 2) This SeekPrefixGE will land on a key that is greater than the key we
545 2 : // are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
546 2 : // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
547 2 : // index block. No need to reset the state of singleLevelIterator.
548 2 : //
549 2 : // Note that cases 1 and 2 never overlap, and one of them must be true.
550 2 : // This invariant checking is important enough that we do not gate it
551 2 : // behind invariants.Enabled.
552 2 : if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
553 0 : panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
554 0 : i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
555 : }
556 :
557 2 : if !flags.TrySeekUsingNext() {
558 1 : // Case 1. Bounds have changed so the previous exhausted bounds state is
559 1 : // irrelevant.
560 1 : // WARNING-data-exhausted: this is safe to do only because the monotonic
561 1 : // bounds optimizations only work when !data-exhausted. If they also
562 1 : // worked with data-exhausted, we have made it unclear whether
563 1 : // data-exhausted is actually true. See the comment at the top of the
564 1 : // file.
565 1 : i.secondLevel.exhaustedBounds = 0
566 1 : }
567 : // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
568 : // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
569 : // bug https://github.com/cockroachdb/pebble/issues/2036.
570 : }
571 :
572 2 : if !dontSeekWithinSingleLevelIter {
573 2 : if ikv := i.secondLevel.seekPrefixGE(prefix, key, flags); ikv != nil {
574 2 : return ikv
575 2 : }
576 : }
577 : // NB: skipForward checks whether exhaustedBounds is already +1.
578 2 : return i.skipForward()
579 : }
580 :
581 : // virtualLast should only be called if i.vReader != nil.
582 2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
583 2 : if i.secondLevel.vState == nil {
584 0 : panic("pebble: invalid call to virtualLast")
585 : }
586 2 : if !i.secondLevel.endKeyInclusive {
587 2 : // Trivial case.
588 2 : return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
589 2 : }
590 2 : return i.virtualLastSeekLE()
591 : }
592 :
593 : // virtualLastSeekLE implements a SeekLE() that can be used as part
594 : // of reverse-iteration calls such as a Last() on a virtual sstable. Does a
595 : // SeekLE on the upper bound of the file/iterator.
596 2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
597 2 : // Callers of SeekLE don't know about virtual sstable bounds, so we may
598 2 : // have to internally restrict the bounds.
599 2 : //
600 2 : // TODO(bananabrick): We can optimize this check away for the level iter
601 2 : // if necessary.
602 2 : if !i.secondLevel.endKeyInclusive {
603 0 : panic("unexpected virtualLastSeekLE with exclusive upper bounds")
604 : }
605 2 : key := i.secondLevel.upper
606 2 : // Need to position the topLevelIndex.
607 2 : //
608 2 : // The previous exhausted state of singleLevelIterator is no longer
609 2 : // relevant, since we may be moving to a different index block.
610 2 : i.secondLevel.exhaustedBounds = 0
611 2 : // Seek optimization only applies until iterator is first positioned with a
612 2 : // SeekGE or SeekLT after SetBounds.
613 2 : i.secondLevel.boundsCmp = 0
614 2 : topLevelOk := PI(&i.topLevelIndex).SeekGE(key)
615 2 : // We can have multiple internal keys with the same user key as the seek
616 2 : // key. In that case, we want the last (greatest) internal key.
617 2 : for topLevelOk && bytes.Equal(PI(&i.topLevelIndex).Separator(), key) {
618 2 : topLevelOk = PI(&i.topLevelIndex).Next()
619 2 : }
620 2 : if !topLevelOk {
621 2 : return i.skipBackward()
622 2 : }
623 2 : result := i.loadSecondLevelIndexBlock(-1)
624 2 : if result == loadBlockFailed {
625 0 : i.secondLevel.boundsCmp = 0
626 0 : return nil
627 0 : }
628 2 : if result == loadBlockIrrelevant {
629 1 : // Load the previous block.
630 1 : return i.skipBackward()
631 1 : }
632 2 : if ikv := i.secondLevel.virtualLastSeekLE(); ikv != nil {
633 2 : return ikv
634 2 : }
635 2 : return i.skipBackward()
636 : }
637 :
638 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
639 : // package. Note that SeekLT only checks the lower bound. It is up to the
640 : // caller to ensure that key is less than the upper bound.
641 : func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
642 : key []byte, flags base.SeekLTFlags,
643 2 : ) *base.InternalKV {
644 2 : if i.secondLevel.vState != nil {
645 2 : // Might have to fix upper bound since virtual sstable bounds are not
646 2 : // known to callers of SeekLT.
647 2 : //
648 2 : // TODO(bananabrick): We can optimize away this check for the level iter
649 2 : // if necessary.
650 2 : cmp := i.secondLevel.cmp(key, i.secondLevel.upper)
651 2 : // key == i.secondLevel.upper is fine. We'll do the right thing and return the
652 2 : // first internal key with user key < key.
653 2 : if cmp > 0 {
654 2 : return i.virtualLast()
655 2 : }
656 : }
657 :
658 2 : i.secondLevel.exhaustedBounds = 0
659 2 : i.secondLevel.err = nil // clear cached iteration error
660 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
661 2 : i.secondLevel.boundsCmp = 0
662 2 :
663 2 : var result loadBlockResult
664 2 : // NB: Unlike SeekGE, we don't have a fast-path here since we don't know
665 2 : // whether the topLevelIndex is positioned after the position that would
666 2 : // be returned by doing i.topLevelIndex.SeekGE(). To know this we would
667 2 : // need to know the index key preceding the current one.
668 2 : // NB: If a bound-limited block property filter is configured, it's
669 2 : // externally ensured that the filter is disabled (through returning
670 2 : // Intersects=false irrespective of the block props provided) during seeks.
671 2 : if !PI(&i.topLevelIndex).SeekGE(key) {
672 2 : if !PI(&i.topLevelIndex).Last() {
673 0 : PD(&i.secondLevel.data).Invalidate()
674 0 : PI(&i.secondLevel.index).Invalidate()
675 0 : return nil
676 0 : }
677 :
678 2 : result = i.loadSecondLevelIndexBlock(-1)
679 2 : if result == loadBlockFailed {
680 1 : return nil
681 1 : }
682 2 : if result == loadBlockOK {
683 2 : if ikv := i.secondLevel.lastInternal(); ikv != nil {
684 2 : return i.secondLevel.maybeVerifyKey(ikv)
685 2 : }
686 : // Fall through to skipBackward since the singleLevelIterator did
687 : // not have any blocks that satisfy the block interval
688 : // constraints, or the lower bound was reached.
689 : }
690 : // Else loadBlockIrrelevant, so fall through.
691 2 : } else {
692 2 : result = i.loadSecondLevelIndexBlock(-1)
693 2 : if result == loadBlockFailed {
694 1 : return nil
695 1 : }
696 2 : if result == loadBlockOK {
697 2 : if ikv := i.secondLevel.SeekLT(key, flags); ikv != nil {
698 2 : return i.secondLevel.maybeVerifyKey(ikv)
699 2 : }
700 : // Fall through to skipBackward since the singleLevelIterator did
701 : // not have any blocks that satisfy the block interval
702 : // constraint, or the lower bound was reached.
703 : }
704 : // Else loadBlockIrrelevant, so fall through.
705 : }
706 2 : if result == loadBlockIrrelevant {
707 2 : // Enforce the lower bound here since don't want to bother moving to
708 2 : // the previous entry in the top level index if lower bound is already
709 2 : // exceeded. Note that the previous entry starts with keys <=
710 2 : // ikey.InternalKey.UserKey since even though this is the current block's
711 2 : // separator, the same user key can span multiple index blocks.
712 2 : if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
713 1 : i.secondLevel.exhaustedBounds = -1
714 1 : }
715 : }
716 : // NB: skipBackward checks whether exhaustedBounds is already -1.
717 2 : return i.skipBackward()
718 : }
719 :
720 : // First implements internalIterator.First, as documented in the pebble
721 : // package. Note that First only checks the upper bound. It is up to the caller
722 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
723 : // call to SeekGE(lower)).
724 2 : func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
725 2 : // If we have a lower bound, use SeekGE. Note that in general this is not
726 2 : // supported usage, except when the lower bound is there because the table is
727 2 : // virtual.
728 2 : if i.secondLevel.lower != nil {
729 2 : return i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
730 2 : }
731 2 : i.secondLevel.exhaustedBounds = 0
732 2 : i.secondLevel.err = nil // clear cached iteration error
733 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
734 2 : i.secondLevel.boundsCmp = 0
735 2 :
736 2 : if !PI(&i.topLevelIndex).First() {
737 0 : return nil
738 0 : }
739 2 : result := i.loadSecondLevelIndexBlock(+1)
740 2 : if result == loadBlockFailed {
741 1 : return nil
742 1 : }
743 2 : if result == loadBlockOK {
744 2 : if ikv := i.secondLevel.First(); ikv != nil {
745 2 : return ikv
746 2 : }
747 : // Else fall through to skipForward.
748 2 : } else {
749 2 : // result == loadBlockIrrelevant. Enforce the upper bound here since
750 2 : // don't want to bother moving to the next entry in the top level index
751 2 : // if upper bound is already exceeded. Note that the next entry starts
752 2 : // with keys >= topLevelIndex.Separator() since even though this is the
753 2 : // block separator, the same user key can span multiple index blocks.
754 2 : // If upper is exclusive we pass orEqual=true below, else we require the
755 2 : // separator to be strictly greater than upper.
756 2 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
757 2 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
758 1 : i.secondLevel.exhaustedBounds = +1
759 1 : }
760 : }
761 : // NB: skipForward checks whether exhaustedBounds is already +1.
762 2 : return i.skipForward()
763 : }
764 :
765 : // Last implements internalIterator.Last, as documented in the pebble
766 : // package. Note that Last only checks the lower bound. It is up to the caller
767 : // to ensure that key is less than the upper bound (e.g. via a call to
768 : // SeekLT(upper))
769 2 : func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
770 2 : if i.secondLevel.vState != nil {
771 2 : if i.secondLevel.endKeyInclusive {
772 2 : return i.virtualLast()
773 2 : }
774 2 : return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
775 : }
776 :
777 2 : if i.secondLevel.upper != nil {
778 0 : panic("twoLevelIterator.Last() used despite upper bound")
779 : }
780 2 : i.secondLevel.exhaustedBounds = 0
781 2 : i.secondLevel.err = nil // clear cached iteration error
782 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
783 2 : i.secondLevel.boundsCmp = 0
784 2 :
785 2 : if !PI(&i.topLevelIndex).Last() {
786 0 : return nil
787 0 : }
788 2 : result := i.loadSecondLevelIndexBlock(-1)
789 2 : if result == loadBlockFailed {
790 1 : return nil
791 1 : }
792 2 : if result == loadBlockOK {
793 2 : if ikv := i.secondLevel.Last(); ikv != nil {
794 2 : return ikv
795 2 : }
796 : // Else fall through to skipBackward.
797 2 : } else {
798 2 : // result == loadBlockIrrelevant. Enforce the lower bound here since
799 2 : // don't want to bother moving to the previous entry in the top level
800 2 : // index if lower bound is already exceeded. Note that the previous
801 2 : // entry starts with keys <= ikv.InternalKey.UserKey since even though
802 2 : // this is the current block's separator, the same user key can span
803 2 : // multiple index blocks.
804 2 : if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
805 1 : i.secondLevel.exhaustedBounds = -1
806 1 : }
807 : }
808 : // NB: skipBackward checks whether exhaustedBounds is already -1.
809 2 : return i.skipBackward()
810 : }
811 :
812 : // Next implements internalIterator.Next, as documented in the pebble
813 : // package.
814 : // Note: twoLevelCompactionIterator.Next mirrors the implementation of
815 : // twoLevelIterator.Next due to performance. Keep the two in sync.
816 2 : func (i *twoLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
817 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
818 2 : i.secondLevel.boundsCmp = 0
819 2 : if i.secondLevel.err != nil {
820 1 : // TODO(jackson): Can this case be turned into a panic? Once an error is
821 1 : // encountered, the iterator must be re-seeked.
822 1 : return nil
823 1 : }
824 2 : if ikv := i.secondLevel.Next(); ikv != nil {
825 2 : return ikv
826 2 : }
827 2 : return i.skipForward()
828 : }
829 :
830 : // NextPrefix implements (base.InternalIterator).NextPrefix.
831 2 : func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
832 2 : if i.secondLevel.exhaustedBounds == +1 {
833 0 : panic("Next called even though exhausted upper bound")
834 : }
835 : // Seek optimization only applies until iterator is first positioned after SetBounds.
836 2 : i.secondLevel.boundsCmp = 0
837 2 : if i.secondLevel.err != nil {
838 0 : // TODO(jackson): Can this case be turned into a panic? Once an error is
839 0 : // encountered, the iterator must be re-seeked.
840 0 : return nil
841 0 : }
842 2 : if ikv := i.secondLevel.NextPrefix(succKey); ikv != nil {
843 2 : return ikv
844 2 : }
845 : // ikv == nil
846 2 : if i.secondLevel.err != nil {
847 1 : return nil
848 1 : }
849 :
850 : // Did not find prefix in the existing second-level index block. This is the
851 : // slow-path where we seek the iterator.
852 2 : if !PI(&i.topLevelIndex).SeekGE(succKey) {
853 0 : PD(&i.secondLevel.data).Invalidate()
854 0 : PI(&i.secondLevel.index).Invalidate()
855 0 : return nil
856 0 : }
857 2 : result := i.loadSecondLevelIndexBlock(+1)
858 2 : if result == loadBlockFailed {
859 1 : return nil
860 1 : }
861 2 : if result == loadBlockIrrelevant {
862 2 : // Enforce the upper bound here since don't want to bother moving to the
863 2 : // next entry in the top level index if upper bound is already exceeded.
864 2 : // Note that the next entry starts with keys >=
865 2 : // topLevelIndex.Separator() since even though this is the block
866 2 : // separator, the same user key can span multiple index blocks. If upper
867 2 : // is exclusive we pass orEqual=true below, else we require the
868 2 : // separator to be strictly greater than upper.
869 2 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
870 2 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
871 0 : i.secondLevel.exhaustedBounds = +1
872 0 : }
873 2 : } else if kv := i.secondLevel.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
874 2 : return i.secondLevel.maybeVerifyKey(kv)
875 2 : }
876 2 : return i.skipForward()
877 : }
878 :
879 : // Prev implements internalIterator.Prev, as documented in the pebble
880 : // package.
881 2 : func (i *twoLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
882 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
883 2 : i.secondLevel.boundsCmp = 0
884 2 : if i.secondLevel.err != nil {
885 1 : return nil
886 1 : }
887 2 : if kv := i.secondLevel.Prev(); kv != nil {
888 2 : return kv
889 2 : }
890 2 : return i.skipBackward()
891 : }
892 :
893 2 : func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
894 2 : for {
895 2 : if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds > 0 {
896 2 : return nil
897 2 : }
898 :
899 : // It is possible that skipBackward went too far and the virtual table lower
900 : // bound is after the first key in the block we are about to load, in which
901 : // case we must use SeekGE below. The keys in the block we are about to load
902 : // start right after the topLevelIndex key (before we Next).
903 : //
904 : // An example of how this can happen:
905 : //
906 : // Second-level index block 1 - contains keys a@1, c@1
907 : // Second-level index block 2 - contains keys e@1, g@1
908 : // Second-level index block 3 - contains keys i@2, k@2
909 : //
910 : // The virtual table lower bound is f. We have a range key masking filter
911 : // that filters keys with @1 suffix. We are positioned inside block 3 then
912 : // we Prev(). Block 2 is entirely filtered out, which makes us move to
913 : // block 1. Now the range key masking filter gets an update (via
914 : // SpanChanged) and it no longer filters out any keys. At this point if a
915 : // Next happens, we will load block 2 but it would not be legal to return
916 : // "e@1" which is outside the virtual bounds.
917 : //
918 : // The core of the problem is that skipBackward doesn't know it can stop
919 : // at block 2, because it doesn't know what keys are at the start of that
920 : // block. This is why we don't have this problem in the opposite
921 : // direction: skipForward will never go beyond the last relevant block
922 : // because it looks at the separator key which is an upper bound for the
923 : // block.
924 : //
925 : // Note that this is only a problem with virtual tables; we make no
926 : // guarantees wrt an iterator lower bound when we iterate forward. But we
927 : // must never return keys that are not inside the virtual table.
928 2 : useSeek := i.secondLevel.vState != nil && (!PI(&i.topLevelIndex).Valid() ||
929 2 : PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.vState.lower.UserKey))
930 2 :
931 2 : i.secondLevel.exhaustedBounds = 0
932 2 : if !PI(&i.topLevelIndex).Next() {
933 2 : PD(&i.secondLevel.data).Invalidate()
934 2 : PI(&i.secondLevel.index).Invalidate()
935 2 : return nil
936 2 : }
937 2 : result := i.loadSecondLevelIndexBlock(+1)
938 2 : if result == loadBlockFailed {
939 1 : return nil
940 1 : }
941 2 : if result == loadBlockOK {
942 2 : var ikv *base.InternalKV
943 2 : if useSeek {
944 2 : ikv = i.secondLevel.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
945 2 : } else {
946 2 : ikv = i.secondLevel.firstInternal()
947 2 : }
948 2 : if ikv != nil {
949 2 : return i.secondLevel.maybeVerifyKey(ikv)
950 2 : }
951 : // Next iteration will return if singleLevelIterator set
952 : // exhaustedBounds = +1.
953 2 : } else {
954 2 : // result == loadBlockIrrelevant. Enforce the upper bound here since
955 2 : // don't want to bother moving to the next entry in the top level
956 2 : // index if upper bound is already exceeded. Note that the next
957 2 : // entry starts with keys >= i.topLevelIndex.Separator() since even
958 2 : // though this is the block separator, the same user key can span
959 2 : // multiple index blocks. If upper is exclusive we pass orEqual=true
960 2 : // below, else we require the separator to be strictly greater than
961 2 : // upper.
962 2 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
963 2 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
964 2 : i.secondLevel.exhaustedBounds = +1
965 2 : // Next iteration will return.
966 2 : }
967 : }
968 : }
969 : }
970 :
971 2 : func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
972 2 : for {
973 2 : if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
974 2 : return nil
975 2 : }
976 2 : i.secondLevel.exhaustedBounds = 0
977 2 : if !PI(&i.topLevelIndex).Prev() {
978 2 : PD(&i.secondLevel.data).Invalidate()
979 2 : PI(&i.secondLevel.index).Invalidate()
980 2 : return nil
981 2 : }
982 2 : result := i.loadSecondLevelIndexBlock(-1)
983 2 : if result == loadBlockFailed {
984 1 : return nil
985 1 : }
986 2 : if result == loadBlockOK {
987 2 : ikv := i.secondLevel.lastInternal()
988 2 : if ikv != nil {
989 2 : return i.secondLevel.maybeVerifyKey(ikv)
990 2 : }
991 :
992 : // Next iteration will return if singleLevelIterator set
993 : // exhaustedBounds = -1.
994 2 : } else {
995 2 : // result == loadBlockIrrelevant. Enforce the lower bound here since
996 2 : // don't want to bother moving to the previous entry in the top
997 2 : // level index if lower bound is already exceeded. Note that the
998 2 : // previous entry starts with keys <= i.topLevelIndex.Separator() since
999 2 : // even though this is the current block's separator, the same user
1000 2 : // key can span multiple index blocks.
1001 2 : if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
1002 2 : i.secondLevel.exhaustedBounds = -1
1003 2 : // Next iteration will return.
1004 2 : }
1005 : }
1006 : }
1007 : }
1008 :
1009 2 : func (i *twoLevelIterator[I, PI, D, PD]) Error() error {
1010 2 : return i.secondLevel.Error()
1011 2 : }
1012 :
1013 2 : func (i *twoLevelIterator[I, PI, D, PD]) SetBounds(lower, upper []byte) {
1014 2 : i.secondLevel.SetBounds(lower, upper)
1015 2 : }
1016 :
1017 0 : func (i *twoLevelIterator[I, PI, D, PD]) SetContext(ctx context.Context) {
1018 0 : i.secondLevel.SetContext(ctx)
1019 0 : }
1020 :
1021 2 : func (i *twoLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
1022 2 : i.secondLevel.SetCloseHook(fn)
1023 2 : }
1024 :
1025 2 : func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
1026 2 : i.secondLevel.SetupForCompaction()
1027 2 : }
1028 :
1029 : // Close implements internalIterator.Close, as documented in the pebble
1030 : // package.
1031 2 : func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
1032 2 : if invariants.Enabled && i.secondLevel.pool != nil {
1033 0 : panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
1034 : }
1035 2 : pool := i.pool
1036 2 : err := i.secondLevel.closeInternal()
1037 2 : i.secondLevel.resetForReuse()
1038 2 : err = firstError(err, PI(&i.topLevelIndex).Close())
1039 2 : i.useFilterBlock = false
1040 2 : i.lastBloomFilterMatched = false
1041 2 : if pool != nil {
1042 2 : pool.Put(i)
1043 2 : }
1044 2 : return err
1045 : }
|