Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "sync"
12 :
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/invariants"
16 : "github.com/cockroachdb/pebble/internal/treeprinter"
17 : "github.com/cockroachdb/pebble/objstorage"
18 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
19 : "github.com/cockroachdb/pebble/sstable/block"
20 : )
21 :
22 : type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterator[D]] struct {
23 : secondLevel singleLevelIterator[I, PI, D, PD]
24 : topLevelIndex I
25 : // pool is the pool from which the iterator was allocated and to which the
26 : // iterator should be returned on Close. Because the iterator is
27 : // parameterized by the type of the data block iterator, pools must be
28 : // specific to the type of the data block iterator.
29 : pool *sync.Pool
30 :
31 : // useFilterBlock controls whether we consult the bloom filter in the
32 : // twoLevelIterator code. Note that secondLevel.useFilterBlock is always
33 : // false - any filtering happens at the top level.
34 : useFilterBlock bool
35 : lastBloomFilterMatched bool
36 : }
37 :
38 : var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
39 :
40 : // loadSecondLevelIndexBlock loads the index block at the current top level
41 : // index position and leaves i.index unpositioned. If unsuccessful, it gets
42 : // i.secondLevel.err to any error encountered, which may be nil if we have
43 : // simply exhausted the entire table. This is used for two level indexes.
44 1 : func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loadBlockResult {
45 1 : // Ensure the index data block iterators are invalidated even if loading of
46 1 : // the index fails.
47 1 : PD(&i.secondLevel.data).Invalidate()
48 1 : PI(&i.secondLevel.index).Invalidate()
49 1 : if !PI(&i.topLevelIndex).Valid() {
50 0 : return loadBlockFailed
51 0 : }
52 1 : bhp, err := PI(&i.topLevelIndex).BlockHandleWithProperties()
53 1 : if err != nil {
54 0 : i.secondLevel.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry (%v)", err)
55 0 : return loadBlockFailed
56 0 : }
57 1 : if i.secondLevel.bpfs != nil {
58 1 : intersects, err := i.secondLevel.bpfs.intersects(bhp.Props)
59 1 : if err != nil {
60 0 : i.secondLevel.err = errCorruptIndexEntry(err)
61 0 : return loadBlockFailed
62 0 : }
63 1 : if intersects == blockMaybeExcluded {
64 1 : intersects = i.resolveMaybeExcluded(dir)
65 1 : }
66 1 : if intersects == blockExcluded {
67 1 : return loadBlockIrrelevant
68 1 : }
69 : // blockIntersects
70 : }
71 1 : indexBlock, err := i.secondLevel.reader.readIndexBlock(i.secondLevel.ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH, bhp.Handle)
72 1 : if err != nil {
73 0 : i.secondLevel.err = err
74 0 : return loadBlockFailed
75 0 : }
76 1 : err = PI(&i.secondLevel.index).InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, indexBlock, i.secondLevel.transforms)
77 1 : if err != nil {
78 0 : PI(&i.secondLevel.index).Invalidate()
79 0 : i.secondLevel.err = err
80 0 : return loadBlockFailed
81 0 : }
82 1 : return loadBlockOK
83 : }
84 :
85 : // resolveMaybeExcluded is invoked when the block-property filterer has found
86 : // that an index block is excluded according to its properties but only if its
87 : // bounds fall within the filter's current bounds. This function consults the
88 : // appropriate bound, depending on the iteration direction, and returns either
89 : // `blockIntersects` or `blockExcluded`.
90 1 : func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
91 1 : // This iterator is configured with a bound-limited block property filter.
92 1 : // The bpf determined this entire index block could be excluded from
93 1 : // iteration based on the property encoded in the block handle. However, we
94 1 : // still need to determine if the index block is wholly contained within the
95 1 : // filter's key bounds.
96 1 : //
97 1 : // External guarantees ensure all its data blocks' keys are ≥ the filter's
98 1 : // lower bound during forward iteration, and that all its data blocks' keys
99 1 : // are < the filter's upper bound during backward iteration. We only need to
100 1 : // determine if the opposite bound is also met.
101 1 : //
102 1 : // The index separator in topLevelIndex.Separator() provides an inclusive
103 1 : // upper-bound for the index block's keys, guaranteeing that all its keys
104 1 : // are ≤ topLevelIndex.Separator(). For forward iteration, this is all we
105 1 : // need.
106 1 : if dir > 0 {
107 1 : // Forward iteration.
108 1 : if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(PI(&i.topLevelIndex).Separator()) {
109 1 : return blockExcluded
110 1 : }
111 1 : return blockIntersects
112 : }
113 :
114 : // Reverse iteration.
115 : //
116 : // Because we're iterating in the reverse direction, we don't yet have
117 : // enough context available to determine if the block is wholly contained
118 : // within its bounds. This case arises only during backward iteration,
119 : // because of the way the index is structured.
120 : //
121 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
122 : // block with separator `c`. During reverse iteration, the guarantee that
123 : // all the block's keys are < `d` is externally provided, but no guarantee
124 : // is made on the bpf's lower bound. The separator `c` only provides an
125 : // inclusive upper bound on the block's keys, indicating that the
126 : // corresponding block handle points to a block containing only keys ≤ `c`.
127 : //
128 : // To establish a lower bound, we step the top-level index backwards to read
129 : // the previous block's separator, which provides an inclusive lower bound
130 : // on the original index block's keys. Afterwards, we step forward to
131 : // restore our top-level index position.
132 1 : if !PI(&i.topLevelIndex).Prev() {
133 1 : // The original block points to the first index block of this table. If
134 1 : // we knew the lower bound for the entire table, it could provide a
135 1 : // lower bound, but the code refactoring necessary to read it doesn't
136 1 : // seem worth the payoff. We fall through to loading the block.
137 1 : } else if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(PI(&i.topLevelIndex).Separator()) {
138 1 : // The lower-bound on the original index block falls within the filter's
139 1 : // bounds, and we can skip the block (after restoring our current
140 1 : // top-level index position).
141 1 : _ = PI(&i.topLevelIndex).Next()
142 1 : return blockExcluded
143 1 : }
144 1 : _ = PI(&i.topLevelIndex).Next()
145 1 : return blockIntersects
146 : }
147 :
148 : // newColumnBlockTwoLevelIterator reads the top-level index block and creates and
149 : // initializes a two-level iterator over an sstable with column-oriented data
150 : // blocks.
151 : //
152 : // Note that lower, upper are iterator bounds and are separate from virtual
153 : // sstable bounds. If the virtualState passed in is not nil, then virtual
154 : // sstable bounds will be enforced.
155 : func newColumnBlockTwoLevelIterator(
156 : ctx context.Context,
157 : r *Reader,
158 : v *virtualState,
159 : transforms IterTransforms,
160 : lower, upper []byte,
161 : filterer *BlockPropertiesFilterer,
162 : filterBlockSizeLimit FilterBlockSizeLimit,
163 : stats *base.InternalIteratorStats,
164 : statsAccum IterStatsAccumulator,
165 : rp ReaderProvider,
166 : bufferPool *block.BufferPool,
167 1 : ) (*twoLevelIteratorColumnBlocks, error) {
168 1 : if r.err != nil {
169 0 : return nil, r.err
170 0 : }
171 1 : if !r.tableFormat.BlockColumnar() {
172 0 : panic(errors.AssertionFailedf("table format %d should not use columnar block format", r.tableFormat))
173 : }
174 1 : i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
175 1 : i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
176 1 : false, // Disable the use of the filter block in the second level.
177 1 : stats, statsAccum, bufferPool)
178 1 : var getLazyValuer block.GetLazyValueForPrefixAndValueHandler
179 1 : if r.Properties.NumValueBlocks > 0 {
180 1 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
181 1 : // can outlive the singleLevelIterator due to be being embedded in a
182 1 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
183 1 : // should only optimize this if it shows up as significant in end-to-end
184 1 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
185 1 : // is that if many sstable iterators only get positioned at latest
186 1 : // versions of keys, and therefore never expose a LazyValue that is
187 1 : // separated to their callers, they can put this valueBlockReader into a
188 1 : // sync.Pool.
189 1 : i.secondLevel.vbReader = valueBlockReader{
190 1 : bpOpen: &i.secondLevel,
191 1 : rp: rp,
192 1 : vbih: r.valueBIH,
193 1 : stats: stats,
194 1 : }
195 1 : getLazyValuer = &i.secondLevel.vbReader
196 1 : i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
197 1 : }
198 1 : i.secondLevel.data.InitOnce(r.keySchema, r.Compare, r.Split, getLazyValuer)
199 1 : i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
200 1 : topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH)
201 1 : if err == nil {
202 1 : err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
203 1 : }
204 1 : if err != nil {
205 0 : _ = i.Close()
206 0 : return nil, err
207 0 : }
208 1 : return i, nil
209 : }
210 :
211 : // newRowBlockTwoLevelIterator reads the top-level index block and creates and
212 : // initializes a two-level iterator over an sstable with row-oriented data
213 : // blocks.
214 : //
215 : // Note that lower, upper are iterator bounds and are separate from virtual
216 : // sstable bounds. If the virtualState passed in is not nil, then virtual
217 : // sstable bounds will be enforced.
218 : func newRowBlockTwoLevelIterator(
219 : ctx context.Context,
220 : r *Reader,
221 : v *virtualState,
222 : transforms IterTransforms,
223 : lower, upper []byte,
224 : filterer *BlockPropertiesFilterer,
225 : filterBlockSizeLimit FilterBlockSizeLimit,
226 : stats *base.InternalIteratorStats,
227 : statsAccum IterStatsAccumulator,
228 : rp ReaderProvider,
229 : bufferPool *block.BufferPool,
230 1 : ) (*twoLevelIteratorRowBlocks, error) {
231 1 : if r.err != nil {
232 0 : return nil, r.err
233 0 : }
234 1 : if r.tableFormat.BlockColumnar() {
235 0 : panic(errors.AssertionFailedf("table format %s uses block columnar format", r.tableFormat))
236 : }
237 1 : i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
238 1 : i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
239 1 : false, // Disable the use of the filter block in the second level.
240 1 : stats, statsAccum, bufferPool)
241 1 : if r.tableFormat >= TableFormatPebblev3 {
242 1 : if r.Properties.NumValueBlocks > 0 {
243 1 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
244 1 : // can outlive the singleLevelIterator due to be being embedded in a
245 1 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
246 1 : // should only optimize this if it shows up as significant in end-to-end
247 1 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
248 1 : // is that if many sstable iterators only get positioned at latest
249 1 : // versions of keys, and therefore never expose a LazyValue that is
250 1 : // separated to their callers, they can put this valueBlockReader into a
251 1 : // sync.Pool.
252 1 : i.secondLevel.vbReader = valueBlockReader{
253 1 : bpOpen: &i.secondLevel,
254 1 : rp: rp,
255 1 : vbih: r.valueBIH,
256 1 : stats: stats,
257 1 : }
258 1 : i.secondLevel.data.SetGetLazyValuer(&i.secondLevel.vbReader)
259 1 : i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
260 1 : }
261 1 : i.secondLevel.data.SetHasValuePrefix(true)
262 : }
263 :
264 1 : i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
265 1 :
266 1 : topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readBlockEnv, i.secondLevel.indexFilterRH)
267 1 : if err == nil {
268 1 : err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
269 1 : }
270 1 : if err != nil {
271 0 : _ = i.Close()
272 0 : return nil, err
273 0 : }
274 1 : return i, nil
275 : }
276 :
277 0 : func (i *twoLevelIterator[I, PI, D, PD]) String() string {
278 0 : return i.secondLevel.String()
279 0 : }
280 :
281 : // DebugTree is part of the InternalIterator interface.
282 0 : func (i *twoLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
283 0 : tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
284 0 : }
285 :
286 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
287 : // package. Note that SeekGE only checks the upper bound. It is up to the
288 : // caller to ensure that key is greater than or equal to the lower bound.
289 : func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
290 : key []byte, flags base.SeekGEFlags,
291 1 : ) *base.InternalKV {
292 1 : if i.secondLevel.vState != nil {
293 1 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
294 1 : // have to internally restrict the bounds.
295 1 : //
296 1 : // TODO(bananabrick): We can optimize away this check for the level iter
297 1 : // if necessary.
298 1 : if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
299 1 : key = i.secondLevel.lower
300 1 : }
301 : }
302 :
303 1 : err := i.secondLevel.err
304 1 : i.secondLevel.err = nil // clear cached iteration error
305 1 :
306 1 : // The twoLevelIterator could be already exhausted. Utilize that when
307 1 : // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
308 1 : // bounds-exhausted near the top of the file.
309 1 : if flags.TrySeekUsingNext() &&
310 1 : (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
311 1 : err == nil {
312 1 : // Already exhausted, so return nil.
313 1 : return nil
314 1 : }
315 :
316 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
317 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
318 :
319 : // We fall into the slow path if i.index.IsDataInvalidated() even if the
320 : // top-level iterator is already positioned correctly and all other
321 : // conditions are met. An alternative structure could reuse topLevelIndex's
322 : // current position and reload the index block to which it points. Arguably,
323 : // an index block load is expensive and the index block may still be earlier
324 : // than the index block containing the sought key, resulting in a wasteful
325 : // block load.
326 :
327 1 : var dontSeekWithinSingleLevelIter bool
328 1 : if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
329 1 : (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || PI(&i.topLevelIndex).SeparatorLT(key) {
330 1 : // Slow-path: need to position the topLevelIndex.
331 1 :
332 1 : // The previous exhausted state of singleLevelIterator is no longer
333 1 : // relevant, since we may be moving to a different index block.
334 1 : i.secondLevel.exhaustedBounds = 0
335 1 : flags = flags.DisableTrySeekUsingNext()
336 1 : if !PI(&i.topLevelIndex).SeekGE(key) {
337 1 : PD(&i.secondLevel.data).Invalidate()
338 1 : PI(&i.secondLevel.index).Invalidate()
339 1 : return nil
340 1 : }
341 :
342 1 : result := i.loadSecondLevelIndexBlock(+1)
343 1 : if result == loadBlockFailed {
344 0 : i.secondLevel.boundsCmp = 0
345 0 : return nil
346 0 : }
347 1 : if result == loadBlockIrrelevant {
348 1 : // Enforce the upper bound here since don't want to bother moving to
349 1 : // the next entry in the top level index if upper bound is already
350 1 : // exceeded. Note that the next entry starts with keys >=
351 1 : // topLevelIndex.Separator() since even though this is the block
352 1 : // separator, the same user key can span multiple index blocks. If
353 1 : // upper is exclusive we pass orEqual=true below, else we require
354 1 : // the separator to be strictly greater than upper.
355 1 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
356 1 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
357 1 : i.secondLevel.exhaustedBounds = +1
358 1 : }
359 : // Fall through to skipForward.
360 1 : dontSeekWithinSingleLevelIter = true
361 1 : // Clear boundsCmp.
362 1 : //
363 1 : // In the typical cases where dontSeekWithinSingleLevelIter=false,
364 1 : // the singleLevelIterator.SeekGE call will clear boundsCmp.
365 1 : // However, in this case where dontSeekWithinSingleLevelIter=true,
366 1 : // we never seek on the single-level iterator. This call will fall
367 1 : // through to skipForward, which may improperly leave boundsCmp=+1
368 1 : // unless we clear it here.
369 1 : i.secondLevel.boundsCmp = 0
370 : }
371 1 : } else {
372 1 : // INVARIANT: err == nil.
373 1 : //
374 1 : // Else fast-path: There are two possible cases, from
375 1 : // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
376 1 : //
377 1 : // 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
378 1 : // respecting the lower bound (guaranteed by Iterator). We know that the
379 1 : // iterator must already be positioned within or just outside the previous
380 1 : // bounds. Therefore, the topLevelIndex iter cannot be positioned at an
381 1 : // entry ahead of the seek position (though it can be positioned behind).
382 1 : // The !i.cmp(key, i.topLevelIndex.Separator) > 0 confirms that it is
383 1 : // not behind. Since it is not ahead and not behind it must be at the
384 1 : // right position.
385 1 : //
386 1 : // 2) This SeekGE will land on a key that is greater than the key we are
387 1 : // currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
388 1 : // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
389 1 : // index block. No need to reset the state of singleLevelIterator.
390 1 : //
391 1 : // Note that cases 1 and 2 never overlap, and one of them must be true.
392 1 : // This invariant checking is important enough that we do not gate it
393 1 : // behind invariants.Enabled.
394 1 : if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
395 0 : panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
396 0 : i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
397 : }
398 :
399 1 : if !flags.TrySeekUsingNext() {
400 1 : // Case 1. Bounds have changed so the previous exhausted bounds state is
401 1 : // irrelevant.
402 1 : // WARNING-data-exhausted: this is safe to do only because the monotonic
403 1 : // bounds optimizations only work when !data-exhausted. If they also
404 1 : // worked with data-exhausted, we have made it unclear whether
405 1 : // data-exhausted is actually true. See the comment at the top of the
406 1 : // file.
407 1 : i.secondLevel.exhaustedBounds = 0
408 1 : }
409 : // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
410 : // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
411 : // bug https://github.com/cockroachdb/pebble/issues/2036.
412 : }
413 :
414 1 : if !dontSeekWithinSingleLevelIter {
415 1 : // Note that while trySeekUsingNext could be false here, singleLevelIterator
416 1 : // could do its own boundsCmp-based optimization to seek using next.
417 1 : if ikv := i.secondLevel.SeekGE(key, flags); ikv != nil {
418 1 : return ikv
419 1 : }
420 : }
421 1 : return i.skipForward()
422 : }
423 :
424 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
425 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
426 : // to the caller to ensure that key is greater than or equal to the lower bound.
427 : func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
428 : prefix, key []byte, flags base.SeekGEFlags,
429 1 : ) *base.InternalKV {
430 1 : if i.secondLevel.vState != nil {
431 1 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
432 1 : // have to internally restrict the bounds.
433 1 : //
434 1 : // TODO(bananabrick): We can optimize away this check for the level iter
435 1 : // if necessary.
436 1 : if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
437 1 : key = i.secondLevel.lower
438 1 : }
439 : }
440 :
441 : // NOTE: prefix is only used for bloom filter checking and not later work in
442 : // this method. Hence, we can use the existing iterator position if the last
443 : // SeekPrefixGE did not fail bloom filter matching.
444 :
445 1 : err := i.secondLevel.err
446 1 : i.secondLevel.err = nil // clear cached iteration error
447 1 :
448 1 : // The twoLevelIterator could be already exhausted. Utilize that when
449 1 : // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
450 1 : // bounds-exhausted near the top of the file.
451 1 : filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
452 1 : if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
453 1 : (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
454 1 : err == nil {
455 1 : // Already exhausted, so return nil.
456 1 : return nil
457 1 : }
458 :
459 : // Check prefix bloom filter.
460 1 : if i.useFilterBlock {
461 1 : if !i.lastBloomFilterMatched {
462 1 : // Iterator is not positioned based on last seek.
463 1 : flags = flags.DisableTrySeekUsingNext()
464 1 : }
465 1 : i.lastBloomFilterMatched = false
466 1 : var mayContain bool
467 1 : mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
468 1 : if i.secondLevel.err != nil || !mayContain {
469 1 : // In the i.secondLevel.err == nil case, this invalidation may not be necessary for
470 1 : // correctness, and may be a place to optimize later by reusing the
471 1 : // already loaded block. It was necessary in earlier versions of the code
472 1 : // since the caller was allowed to call Next when SeekPrefixGE returned
473 1 : // nil. This is no longer allowed.
474 1 : PD(&i.secondLevel.data).Invalidate()
475 1 : return nil
476 1 : }
477 1 : i.lastBloomFilterMatched = true
478 : }
479 :
480 : // Bloom filter matches.
481 :
482 : // SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
483 : // enabled by trySeekUsingNext, or by monotonically increasing bounds
484 : // (i.boundsCmp).
485 :
486 : // We fall into the slow path if i.index.IsDataInvalidated() even if the
487 : // top-level iterator is already positioned correctly and all other
488 : // conditions are met. An alternative structure could reuse topLevelIndex's
489 : // current position and reload the index block to which it points. Arguably,
490 : // an index block load is expensive and the index block may still be earlier
491 : // than the index block containing the sought key, resulting in a wasteful
492 : // block load.
493 :
494 1 : var dontSeekWithinSingleLevelIter bool
495 1 : if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
496 1 : (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || PI(&i.topLevelIndex).SeparatorLT(key) {
497 1 : // Slow-path: need to position the topLevelIndex.
498 1 :
499 1 : // The previous exhausted state of singleLevelIterator is no longer
500 1 : // relevant, since we may be moving to a different index block.
501 1 : i.secondLevel.exhaustedBounds = 0
502 1 : flags = flags.DisableTrySeekUsingNext()
503 1 : if !PI(&i.topLevelIndex).SeekGE(key) {
504 1 : PD(&i.secondLevel.data).Invalidate()
505 1 : PI(&i.secondLevel.index).Invalidate()
506 1 : return nil
507 1 : }
508 :
509 1 : result := i.loadSecondLevelIndexBlock(+1)
510 1 : if result == loadBlockFailed {
511 0 : i.secondLevel.boundsCmp = 0
512 0 : return nil
513 0 : }
514 1 : if result == loadBlockIrrelevant {
515 1 : // Enforce the upper bound here since don't want to bother moving to
516 1 : // the next entry in the top level index if upper bound is already
517 1 : // exceeded. Note that the next entry starts with keys >=
518 1 : // topLevelIndex.Separator() since even though this is the block
519 1 : // separator, the same user key can span multiple index blocks. If
520 1 : // upper is exclusive we pass orEqual=true below, else we require
521 1 : // the separator to be strictly greater than upper.
522 1 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
523 1 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
524 1 : i.secondLevel.exhaustedBounds = +1
525 1 : }
526 : // Fall through to skipForward.
527 1 : dontSeekWithinSingleLevelIter = true
528 1 : // Clear boundsCmp.
529 1 : //
530 1 : // In the typical cases where dontSeekWithinSingleLevelIter=false,
531 1 : // the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
532 1 : // However, in this case where dontSeekWithinSingleLevelIter=true,
533 1 : // we never seek on the single-level iterator. This call will fall
534 1 : // through to skipForward, which may improperly leave boundsCmp=+1
535 1 : // unless we clear it here.
536 1 : i.secondLevel.boundsCmp = 0
537 : }
538 1 : } else {
539 1 : // INVARIANT: err == nil.
540 1 : //
541 1 : // Else fast-path: There are two possible cases, from
542 1 : // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
543 1 : //
544 1 : // 1) The bounds have moved forward (i.boundsCmp > 0) and this
545 1 : // SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
546 1 : // know that the iterator must already be positioned within or just
547 1 : // outside the previous bounds. Therefore, the topLevelIndex iter cannot
548 1 : // be positioned at an entry ahead of the seek position (though it can be
549 1 : // positioned behind). The !i.cmp(key, i.topLevelIndex.Separator()) > 0
550 1 : // confirms that it is not behind. Since it is not ahead and not behind it
551 1 : // must be at the right position.
552 1 : //
553 1 : // 2) This SeekPrefixGE will land on a key that is greater than the key we
554 1 : // are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
555 1 : // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
556 1 : // index block. No need to reset the state of singleLevelIterator.
557 1 : //
558 1 : // Note that cases 1 and 2 never overlap, and one of them must be true.
559 1 : // This invariant checking is important enough that we do not gate it
560 1 : // behind invariants.Enabled.
561 1 : if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
562 0 : panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
563 0 : i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
564 : }
565 :
566 1 : if !flags.TrySeekUsingNext() {
567 1 : // Case 1. Bounds have changed so the previous exhausted bounds state is
568 1 : // irrelevant.
569 1 : // WARNING-data-exhausted: this is safe to do only because the monotonic
570 1 : // bounds optimizations only work when !data-exhausted. If they also
571 1 : // worked with data-exhausted, we have made it unclear whether
572 1 : // data-exhausted is actually true. See the comment at the top of the
573 1 : // file.
574 1 : i.secondLevel.exhaustedBounds = 0
575 1 : }
576 : // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
577 : // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
578 : // bug https://github.com/cockroachdb/pebble/issues/2036.
579 : }
580 :
581 1 : if !dontSeekWithinSingleLevelIter {
582 1 : if ikv := i.secondLevel.seekPrefixGE(prefix, key, flags); ikv != nil {
583 1 : return ikv
584 1 : }
585 : }
586 : // NB: skipForward checks whether exhaustedBounds is already +1.
587 1 : return i.skipForward()
588 : }
589 :
590 : // virtualLast should only be called if i.vReader != nil.
591 1 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
592 1 : if i.secondLevel.vState == nil {
593 0 : panic("pebble: invalid call to virtualLast")
594 : }
595 1 : if !i.secondLevel.endKeyInclusive {
596 1 : // Trivial case.
597 1 : return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
598 1 : }
599 1 : return i.virtualLastSeekLE()
600 : }
601 :
602 : // virtualLastSeekLE implements a SeekLE() that can be used as part
603 : // of reverse-iteration calls such as a Last() on a virtual sstable. Does a
604 : // SeekLE on the upper bound of the file/iterator.
605 1 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
606 1 : // Callers of SeekLE don't know about virtual sstable bounds, so we may
607 1 : // have to internally restrict the bounds.
608 1 : //
609 1 : // TODO(bananabrick): We can optimize this check away for the level iter
610 1 : // if necessary.
611 1 : if !i.secondLevel.endKeyInclusive {
612 0 : panic("unexpected virtualLastSeekLE with exclusive upper bounds")
613 : }
614 1 : key := i.secondLevel.upper
615 1 : // Need to position the topLevelIndex.
616 1 : //
617 1 : // The previous exhausted state of singleLevelIterator is no longer
618 1 : // relevant, since we may be moving to a different index block.
619 1 : i.secondLevel.exhaustedBounds = 0
620 1 : // Seek optimization only applies until iterator is first positioned with a
621 1 : // SeekGE or SeekLT after SetBounds.
622 1 : i.secondLevel.boundsCmp = 0
623 1 : topLevelOk := PI(&i.topLevelIndex).SeekGE(key)
624 1 : // We can have multiple internal keys with the same user key as the seek
625 1 : // key. In that case, we want the last (greatest) internal key.
626 1 : for topLevelOk && bytes.Equal(PI(&i.topLevelIndex).Separator(), key) {
627 1 : topLevelOk = PI(&i.topLevelIndex).Next()
628 1 : }
629 1 : if !topLevelOk {
630 1 : return i.skipBackward()
631 1 : }
632 1 : result := i.loadSecondLevelIndexBlock(-1)
633 1 : if result == loadBlockFailed {
634 0 : i.secondLevel.boundsCmp = 0
635 0 : return nil
636 0 : }
637 1 : if result == loadBlockIrrelevant {
638 1 : // Load the previous block.
639 1 : return i.skipBackward()
640 1 : }
641 1 : if ikv := i.secondLevel.virtualLastSeekLE(); ikv != nil {
642 1 : return ikv
643 1 : }
644 1 : return i.skipBackward()
645 : }
646 :
647 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
648 : // package. Note that SeekLT only checks the lower bound. It is up to the
649 : // caller to ensure that key is less than the upper bound.
650 : func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
651 : key []byte, flags base.SeekLTFlags,
652 1 : ) *base.InternalKV {
653 1 : if i.secondLevel.vState != nil {
654 1 : // Might have to fix upper bound since virtual sstable bounds are not
655 1 : // known to callers of SeekLT.
656 1 : //
657 1 : // TODO(bananabrick): We can optimize away this check for the level iter
658 1 : // if necessary.
659 1 : cmp := i.secondLevel.cmp(key, i.secondLevel.upper)
660 1 : // key == i.secondLevel.upper is fine. We'll do the right thing and return the
661 1 : // first internal key with user key < key.
662 1 : if cmp > 0 {
663 1 : return i.virtualLast()
664 1 : }
665 : }
666 :
667 1 : i.secondLevel.exhaustedBounds = 0
668 1 : i.secondLevel.err = nil // clear cached iteration error
669 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
670 1 : i.secondLevel.boundsCmp = 0
671 1 :
672 1 : var result loadBlockResult
673 1 : // NB: Unlike SeekGE, we don't have a fast-path here since we don't know
674 1 : // whether the topLevelIndex is positioned after the position that would
675 1 : // be returned by doing i.topLevelIndex.SeekGE(). To know this we would
676 1 : // need to know the index key preceding the current one.
677 1 : // NB: If a bound-limited block property filter is configured, it's
678 1 : // externally ensured that the filter is disabled (through returning
679 1 : // Intersects=false irrespective of the block props provided) during seeks.
680 1 : if !PI(&i.topLevelIndex).SeekGE(key) {
681 1 : if !PI(&i.topLevelIndex).Last() {
682 0 : PD(&i.secondLevel.data).Invalidate()
683 0 : PI(&i.secondLevel.index).Invalidate()
684 0 : return nil
685 0 : }
686 :
687 1 : result = i.loadSecondLevelIndexBlock(-1)
688 1 : if result == loadBlockFailed {
689 0 : return nil
690 0 : }
691 1 : if result == loadBlockOK {
692 1 : if ikv := i.secondLevel.lastInternal(); ikv != nil {
693 1 : return i.secondLevel.maybeVerifyKey(ikv)
694 1 : }
695 : // Fall through to skipBackward since the singleLevelIterator did
696 : // not have any blocks that satisfy the block interval
697 : // constraints, or the lower bound was reached.
698 : }
699 : // Else loadBlockIrrelevant, so fall through.
700 1 : } else {
701 1 : result = i.loadSecondLevelIndexBlock(-1)
702 1 : if result == loadBlockFailed {
703 0 : return nil
704 0 : }
705 1 : if result == loadBlockOK {
706 1 : if ikv := i.secondLevel.SeekLT(key, flags); ikv != nil {
707 1 : return i.secondLevel.maybeVerifyKey(ikv)
708 1 : }
709 : // Fall through to skipBackward since the singleLevelIterator did
710 : // not have any blocks that satisfy the block interval
711 : // constraint, or the lower bound was reached.
712 : }
713 : // Else loadBlockIrrelevant, so fall through.
714 : }
715 1 : if result == loadBlockIrrelevant {
716 1 : // Enforce the lower bound here since don't want to bother moving to
717 1 : // the previous entry in the top level index if lower bound is already
718 1 : // exceeded. Note that the previous entry starts with keys <=
719 1 : // ikey.InternalKey.UserKey since even though this is the current block's
720 1 : // separator, the same user key can span multiple index blocks.
721 1 : if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
722 1 : i.secondLevel.exhaustedBounds = -1
723 1 : }
724 : }
725 : // NB: skipBackward checks whether exhaustedBounds is already -1.
726 1 : return i.skipBackward()
727 : }
728 :
729 : // First implements internalIterator.First, as documented in the pebble
730 : // package. Note that First only checks the upper bound. It is up to the caller
731 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
732 : // call to SeekGE(lower)).
733 1 : func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
734 1 : // If we have a lower bound, use SeekGE. Note that in general this is not
735 1 : // supported usage, except when the lower bound is there because the table is
736 1 : // virtual.
737 1 : if i.secondLevel.lower != nil {
738 1 : return i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
739 1 : }
740 1 : i.secondLevel.exhaustedBounds = 0
741 1 : i.secondLevel.err = nil // clear cached iteration error
742 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
743 1 : i.secondLevel.boundsCmp = 0
744 1 :
745 1 : if !PI(&i.topLevelIndex).First() {
746 0 : return nil
747 0 : }
748 1 : result := i.loadSecondLevelIndexBlock(+1)
749 1 : if result == loadBlockFailed {
750 0 : return nil
751 0 : }
752 1 : if result == loadBlockOK {
753 1 : if ikv := i.secondLevel.First(); ikv != nil {
754 1 : return ikv
755 1 : }
756 : // Else fall through to skipForward.
757 1 : } else {
758 1 : // result == loadBlockIrrelevant. Enforce the upper bound here since
759 1 : // don't want to bother moving to the next entry in the top level index
760 1 : // if upper bound is already exceeded. Note that the next entry starts
761 1 : // with keys >= topLevelIndex.Separator() since even though this is the
762 1 : // block separator, the same user key can span multiple index blocks.
763 1 : // If upper is exclusive we pass orEqual=true below, else we require the
764 1 : // separator to be strictly greater than upper.
765 1 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
766 1 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
767 1 : i.secondLevel.exhaustedBounds = +1
768 1 : }
769 : }
770 : // NB: skipForward checks whether exhaustedBounds is already +1.
771 1 : return i.skipForward()
772 : }
773 :
774 : // Last implements internalIterator.Last, as documented in the pebble
775 : // package. Note that Last only checks the lower bound. It is up to the caller
776 : // to ensure that key is less than the upper bound (e.g. via a call to
777 : // SeekLT(upper))
778 1 : func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
779 1 : if i.secondLevel.vState != nil {
780 1 : if i.secondLevel.endKeyInclusive {
781 1 : return i.virtualLast()
782 1 : }
783 1 : return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
784 : }
785 :
786 1 : if i.secondLevel.upper != nil {
787 0 : panic("twoLevelIterator.Last() used despite upper bound")
788 : }
789 1 : i.secondLevel.exhaustedBounds = 0
790 1 : i.secondLevel.err = nil // clear cached iteration error
791 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
792 1 : i.secondLevel.boundsCmp = 0
793 1 :
794 1 : if !PI(&i.topLevelIndex).Last() {
795 0 : return nil
796 0 : }
797 1 : result := i.loadSecondLevelIndexBlock(-1)
798 1 : if result == loadBlockFailed {
799 0 : return nil
800 0 : }
801 1 : if result == loadBlockOK {
802 1 : if ikv := i.secondLevel.Last(); ikv != nil {
803 1 : return ikv
804 1 : }
805 : // Else fall through to skipBackward.
806 1 : } else {
807 1 : // result == loadBlockIrrelevant. Enforce the lower bound here since
808 1 : // don't want to bother moving to the previous entry in the top level
809 1 : // index if lower bound is already exceeded. Note that the previous
810 1 : // entry starts with keys <= ikv.InternalKey.UserKey since even though
811 1 : // this is the current block's separator, the same user key can span
812 1 : // multiple index blocks.
813 1 : if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
814 1 : i.secondLevel.exhaustedBounds = -1
815 1 : }
816 : }
817 : // NB: skipBackward checks whether exhaustedBounds is already -1.
818 1 : return i.skipBackward()
819 : }
820 :
821 : // Next implements internalIterator.Next, as documented in the pebble
822 : // package.
823 : // Note: twoLevelCompactionIterator.Next mirrors the implementation of
824 : // twoLevelIterator.Next due to performance. Keep the two in sync.
825 1 : func (i *twoLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
826 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
827 1 : i.secondLevel.boundsCmp = 0
828 1 : if i.secondLevel.err != nil {
829 0 : // TODO(jackson): Can this case be turned into a panic? Once an error is
830 0 : // encountered, the iterator must be re-seeked.
831 0 : return nil
832 0 : }
833 1 : if ikv := i.secondLevel.Next(); ikv != nil {
834 1 : return ikv
835 1 : }
836 1 : return i.skipForward()
837 : }
838 :
839 : // NextPrefix implements (base.InternalIterator).NextPrefix.
840 1 : func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
841 1 : if i.secondLevel.exhaustedBounds == +1 {
842 0 : panic("Next called even though exhausted upper bound")
843 : }
844 : // Seek optimization only applies until iterator is first positioned after SetBounds.
845 1 : i.secondLevel.boundsCmp = 0
846 1 : if i.secondLevel.err != nil {
847 0 : // TODO(jackson): Can this case be turned into a panic? Once an error is
848 0 : // encountered, the iterator must be re-seeked.
849 0 : return nil
850 0 : }
851 1 : if ikv := i.secondLevel.NextPrefix(succKey); ikv != nil {
852 1 : return ikv
853 1 : }
854 : // ikv == nil
855 1 : if i.secondLevel.err != nil {
856 0 : return nil
857 0 : }
858 :
859 : // Did not find prefix in the existing second-level index block. This is the
860 : // slow-path where we seek the iterator.
861 1 : if !PI(&i.topLevelIndex).SeekGE(succKey) {
862 1 : PD(&i.secondLevel.data).Invalidate()
863 1 : PI(&i.secondLevel.index).Invalidate()
864 1 : return nil
865 1 : }
866 1 : result := i.loadSecondLevelIndexBlock(+1)
867 1 : if result == loadBlockFailed {
868 0 : return nil
869 0 : }
870 1 : if result == loadBlockIrrelevant {
871 1 : // Enforce the upper bound here since don't want to bother moving to the
872 1 : // next entry in the top level index if upper bound is already exceeded.
873 1 : // Note that the next entry starts with keys >=
874 1 : // topLevelIndex.Separator() since even though this is the block
875 1 : // separator, the same user key can span multiple index blocks. If upper
876 1 : // is exclusive we pass orEqual=true below, else we require the
877 1 : // separator to be strictly greater than upper.
878 1 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
879 1 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
880 0 : i.secondLevel.exhaustedBounds = +1
881 0 : }
882 1 : } else if kv := i.secondLevel.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
883 1 : return i.secondLevel.maybeVerifyKey(kv)
884 1 : }
885 1 : return i.skipForward()
886 : }
887 :
888 : // Prev implements internalIterator.Prev, as documented in the pebble
889 : // package.
890 1 : func (i *twoLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
891 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
892 1 : i.secondLevel.boundsCmp = 0
893 1 : if i.secondLevel.err != nil {
894 0 : return nil
895 0 : }
896 1 : if kv := i.secondLevel.Prev(); kv != nil {
897 1 : return kv
898 1 : }
899 1 : return i.skipBackward()
900 : }
901 :
902 1 : func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
903 1 : for {
904 1 : if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds > 0 {
905 1 : return nil
906 1 : }
907 :
908 : // It is possible that skipBackward went too far and the virtual table lower
909 : // bound is after the first key in the block we are about to load, in which
910 : // case we must use SeekGE below. The keys in the block we are about to load
911 : // start right after the topLevelIndex key (before we Next).
912 : //
913 : // An example of how this can happen:
914 : //
915 : // Second-level index block 1 - contains keys a@1, c@1
916 : // Second-level index block 2 - contains keys e@1, g@1
917 : // Second-level index block 3 - contains keys i@2, k@2
918 : //
919 : // The virtual table lower bound is f. We have a range key masking filter
920 : // that filters keys with @1 suffix. We are positioned inside block 3 then
921 : // we Prev(). Block 2 is entirely filtered out, which makes us move to
922 : // block 1. Now the range key masking filter gets an update (via
923 : // SpanChanged) and it no longer filters out any keys. At this point if a
924 : // Next happens, we will load block 2 but it would not be legal to return
925 : // "e@1" which is outside the virtual bounds.
926 : //
927 : // The core of the problem is that skipBackward doesn't know it can stop
928 : // at block 2, because it doesn't know what keys are at the start of that
929 : // block. This is why we don't have this problem in the opposite
930 : // direction: skipForward will never go beyond the last relevant block
931 : // because it looks at the separator key which is an upper bound for the
932 : // block.
933 : //
934 : // Note that this is only a problem with virtual tables; we make no
935 : // guarantees wrt an iterator lower bound when we iterate forward. But we
936 : // must never return keys that are not inside the virtual table.
937 1 : useSeek := i.secondLevel.vState != nil && (!PI(&i.topLevelIndex).Valid() ||
938 1 : PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.vState.lower.UserKey))
939 1 :
940 1 : i.secondLevel.exhaustedBounds = 0
941 1 : if !PI(&i.topLevelIndex).Next() {
942 1 : PD(&i.secondLevel.data).Invalidate()
943 1 : PI(&i.secondLevel.index).Invalidate()
944 1 : return nil
945 1 : }
946 1 : result := i.loadSecondLevelIndexBlock(+1)
947 1 : if result == loadBlockFailed {
948 0 : return nil
949 0 : }
950 1 : if result == loadBlockOK {
951 1 : var ikv *base.InternalKV
952 1 : if useSeek {
953 1 : ikv = i.secondLevel.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
954 1 : } else {
955 1 : ikv = i.secondLevel.firstInternal()
956 1 : }
957 1 : if ikv != nil {
958 1 : return i.secondLevel.maybeVerifyKey(ikv)
959 1 : }
960 : // Next iteration will return if singleLevelIterator set
961 : // exhaustedBounds = +1.
962 1 : } else {
963 1 : // result == loadBlockIrrelevant. Enforce the upper bound here since
964 1 : // don't want to bother moving to the next entry in the top level
965 1 : // index if upper bound is already exceeded. Note that the next
966 1 : // entry starts with keys >= i.topLevelIndex.Separator() since even
967 1 : // though this is the block separator, the same user key can span
968 1 : // multiple index blocks. If upper is exclusive we pass orEqual=true
969 1 : // below, else we require the separator to be strictly greater than
970 1 : // upper.
971 1 : if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
972 1 : i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
973 1 : i.secondLevel.exhaustedBounds = +1
974 1 : // Next iteration will return.
975 1 : }
976 : }
977 : }
978 : }
979 :
980 1 : func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
981 1 : for {
982 1 : if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
983 1 : return nil
984 1 : }
985 1 : i.secondLevel.exhaustedBounds = 0
986 1 : if !PI(&i.topLevelIndex).Prev() {
987 1 : PD(&i.secondLevel.data).Invalidate()
988 1 : PI(&i.secondLevel.index).Invalidate()
989 1 : return nil
990 1 : }
991 1 : result := i.loadSecondLevelIndexBlock(-1)
992 1 : if result == loadBlockFailed {
993 0 : return nil
994 0 : }
995 1 : if result == loadBlockOK {
996 1 : ikv := i.secondLevel.lastInternal()
997 1 : if ikv != nil {
998 1 : return i.secondLevel.maybeVerifyKey(ikv)
999 1 : }
1000 :
1001 : // Next iteration will return if singleLevelIterator set
1002 : // exhaustedBounds = -1.
1003 1 : } else {
1004 1 : // result == loadBlockIrrelevant. Enforce the lower bound here since
1005 1 : // don't want to bother moving to the previous entry in the top
1006 1 : // level index if lower bound is already exceeded. Note that the
1007 1 : // previous entry starts with keys <= i.topLevelIndex.Separator() since
1008 1 : // even though this is the current block's separator, the same user
1009 1 : // key can span multiple index blocks.
1010 1 : if i.secondLevel.lower != nil && PI(&i.topLevelIndex).SeparatorLT(i.secondLevel.lower) {
1011 1 : i.secondLevel.exhaustedBounds = -1
1012 1 : // Next iteration will return.
1013 1 : }
1014 : }
1015 : }
1016 : }
1017 :
1018 1 : func (i *twoLevelIterator[I, PI, D, PD]) Error() error {
1019 1 : return i.secondLevel.Error()
1020 1 : }
1021 :
1022 1 : func (i *twoLevelIterator[I, PI, D, PD]) SetBounds(lower, upper []byte) {
1023 1 : i.secondLevel.SetBounds(lower, upper)
1024 1 : }
1025 :
1026 0 : func (i *twoLevelIterator[I, PI, D, PD]) SetContext(ctx context.Context) {
1027 0 : i.secondLevel.SetContext(ctx)
1028 0 : }
1029 :
1030 1 : func (i *twoLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
1031 1 : i.secondLevel.SetCloseHook(fn)
1032 1 : }
1033 :
1034 1 : func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
1035 1 : i.secondLevel.SetupForCompaction()
1036 1 : }
1037 :
1038 : // Close implements internalIterator.Close, as documented in the pebble
1039 : // package.
1040 1 : func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
1041 1 : if invariants.Enabled && i.secondLevel.pool != nil {
1042 0 : panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
1043 : }
1044 1 : pool := i.pool
1045 1 : err := i.secondLevel.closeInternal()
1046 1 : i.secondLevel.resetForReuse()
1047 1 : err = firstError(err, PI(&i.topLevelIndex).Close())
1048 1 : i.useFilterBlock = false
1049 1 : i.lastBloomFilterMatched = false
1050 1 : if pool != nil {
1051 1 : pool.Put(i)
1052 1 : }
1053 1 : return err
1054 : }
|