Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "sync"
12 :
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/invariants"
16 : "github.com/cockroachdb/pebble/internal/treeprinter"
17 : "github.com/cockroachdb/pebble/objstorage"
18 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
19 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
20 : "github.com/cockroachdb/pebble/sstable/block"
21 : )
22 :
23 : type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterator[D]] struct {
24 : secondLevel singleLevelIterator[I, PI, D, PD]
25 : topLevelIndex I
26 : // pool is the pool from which the iterator was allocated and to which the
27 : // iterator should be returned on Close. Because the iterator is
28 : // parameterized by the type of the data block iterator, pools must be
29 : // specific to the type of the data block iterator.
30 : pool *sync.Pool
31 :
32 : // useFilterBlock controls whether we consult the bloom filter in the
33 : // twoLevelIterator code. Note that secondLevel.useFilterBlock is always
34 : // false - any filtering happens at the top level.
35 : useFilterBlock bool
36 : lastBloomFilterMatched bool
37 : }
38 :
39 : var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
40 :
41 : // loadIndex loads the index block at the current top level index position and
42 : // leaves i.index unpositioned. If unsuccessful, it gets i.secondLevel.err to any error
43 : // encountered, which may be nil if we have simply exhausted the entire table.
44 : // This is used for two level indexes.
45 2 : func (i *twoLevelIterator[I, PI, D, PD]) loadIndex(dir int8) loadBlockResult {
46 2 : // Ensure the index data block iterators are invalidated even if loading of
47 2 : // the index fails.
48 2 : PD(&i.secondLevel.data).Invalidate()
49 2 : PI(&i.secondLevel.index).Invalidate()
50 2 : if !PI(&i.topLevelIndex).Valid() {
51 0 : return loadBlockFailed
52 0 : }
53 2 : bhp, err := PI(&i.topLevelIndex).BlockHandleWithProperties()
54 2 : if err != nil {
55 0 : i.secondLevel.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry (%v)", err)
56 0 : return loadBlockFailed
57 0 : }
58 2 : if i.secondLevel.bpfs != nil {
59 2 : intersects, err := i.secondLevel.bpfs.intersects(bhp.Props)
60 2 : if err != nil {
61 0 : i.secondLevel.err = errCorruptIndexEntry(err)
62 0 : return loadBlockFailed
63 0 : }
64 2 : if intersects == blockMaybeExcluded {
65 1 : intersects = i.resolveMaybeExcluded(dir)
66 1 : }
67 2 : if intersects == blockExcluded {
68 2 : return loadBlockIrrelevant
69 2 : }
70 : // blockIntersects
71 : }
72 2 : ctx := objiotracing.WithBlockType(i.secondLevel.ctx, objiotracing.MetadataBlock)
73 2 : indexBlock, err := i.secondLevel.reader.readBlock(
74 2 : ctx, bhp.Handle, nil /* transform */, i.secondLevel.indexFilterRH, i.secondLevel.stats, &i.secondLevel.iterStats, i.secondLevel.bufferPool)
75 2 : if err == nil {
76 2 : err = PI(&i.secondLevel.index).InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, indexBlock, i.secondLevel.transforms)
77 2 : }
78 2 : if err != nil {
79 1 : i.secondLevel.err = err
80 1 : return loadBlockFailed
81 1 : }
82 2 : return loadBlockOK
83 : }
84 :
85 : // resolveMaybeExcluded is invoked when the block-property filterer has found
86 : // that an index block is excluded according to its properties but only if its
87 : // bounds fall within the filter's current bounds. This function consults the
88 : // appropriate bound, depending on the iteration direction, and returns either
89 : // `blockIntersects` or `blockExcluded`.
90 1 : func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
91 1 : // This iterator is configured with a bound-limited block property filter.
92 1 : // The bpf determined this entire index block could be excluded from
93 1 : // iteration based on the property encoded in the block handle. However, we
94 1 : // still need to determine if the index block is wholly contained within the
95 1 : // filter's key bounds.
96 1 : //
97 1 : // External guarantees ensure all its data blocks' keys are ≥ the filter's
98 1 : // lower bound during forward iteration, and that all its data blocks' keys
99 1 : // are < the filter's upper bound during backward iteration. We only need to
100 1 : // determine if the opposite bound is also met.
101 1 : //
102 1 : // The index separator in topLevelIndex.Separator() provides an inclusive
103 1 : // upper-bound for the index block's keys, guaranteeing that all its keys
104 1 : // are ≤ topLevelIndex.Separator(). For forward iteration, this is all we
105 1 : // need.
106 1 : if dir > 0 {
107 1 : // Forward iteration.
108 1 : if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(PI(&i.topLevelIndex).Separator()) {
109 1 : return blockExcluded
110 1 : }
111 1 : return blockIntersects
112 : }
113 :
114 : // Reverse iteration.
115 : //
116 : // Because we're iterating in the reverse direction, we don't yet have
117 : // enough context available to determine if the block is wholly contained
118 : // within its bounds. This case arises only during backward iteration,
119 : // because of the way the index is structured.
120 : //
121 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
122 : // block with separator `c`. During reverse iteration, the guarantee that
123 : // all the block's keys are < `d` is externally provided, but no guarantee
124 : // is made on the bpf's lower bound. The separator `c` only provides an
125 : // inclusive upper bound on the block's keys, indicating that the
126 : // corresponding block handle points to a block containing only keys ≤ `c`.
127 : //
128 : // To establish a lower bound, we step the top-level index backwards to read
129 : // the previous block's separator, which provides an inclusive lower bound
130 : // on the original index block's keys. Afterwards, we step forward to
131 : // restore our top-level index position.
132 1 : if !PI(&i.topLevelIndex).Prev() {
133 1 : // The original block points to the first index block of this table. If
134 1 : // we knew the lower bound for the entire table, it could provide a
135 1 : // lower bound, but the code refactoring necessary to read it doesn't
136 1 : // seem worth the payoff. We fall through to loading the block.
137 1 : } else if i.secondLevel.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(PI(&i.topLevelIndex).Separator()) {
138 1 : // The lower-bound on the original index block falls within the filter's
139 1 : // bounds, and we can skip the block (after restoring our current
140 1 : // top-level index position).
141 1 : _ = PI(&i.topLevelIndex).Next()
142 1 : return blockExcluded
143 1 : }
144 1 : _ = PI(&i.topLevelIndex).Next()
145 1 : return blockIntersects
146 : }
147 :
148 : // newColumnBlockTwoLevelIterator reads the top-level index block and creates and
149 : // initializes a two-level iterator over an sstable with column-oriented data
150 : // blocks.
151 : //
152 : // Note that lower, upper are iterator bounds and are separate from virtual
153 : // sstable bounds. If the virtualState passed in is not nil, then virtual
154 : // sstable bounds will be enforced.
155 : func newColumnBlockTwoLevelIterator(
156 : ctx context.Context,
157 : r *Reader,
158 : v *virtualState,
159 : transforms IterTransforms,
160 : lower, upper []byte,
161 : filterer *BlockPropertiesFilterer,
162 : filterBlockSizeLimit FilterBlockSizeLimit,
163 : stats *base.InternalIteratorStats,
164 : categoryAndQoS CategoryAndQoS,
165 : statsCollector *CategoryStatsCollector,
166 : rp ReaderProvider,
167 : bufferPool *block.BufferPool,
168 0 : ) (*twoLevelIteratorColumnBlocks, error) {
169 0 : if r.err != nil {
170 0 : return nil, r.err
171 0 : }
172 0 : if !r.tableFormat.BlockColumnar() {
173 0 : panic(errors.AssertionFailedf("table format %d should not use columnar block format", r.tableFormat))
174 : }
175 0 : i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
176 0 : i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
177 0 : false, // Disable the use of the filter block in the second level.
178 0 : stats, categoryAndQoS, statsCollector, bufferPool)
179 0 : if r.Properties.NumValueBlocks > 0 {
180 0 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
181 0 : // can outlive the singleLevelIterator due to be being embedded in a
182 0 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
183 0 : // should only optimize this if it shows up as significant in end-to-end
184 0 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
185 0 : // is that if many sstable iterators only get positioned at latest
186 0 : // versions of keys, and therefore never expose a LazyValue that is
187 0 : // separated to their callers, they can put this valueBlockReader into a
188 0 : // sync.Pool.
189 0 : i.secondLevel.vbReader = &valueBlockReader{
190 0 : bpOpen: &i.secondLevel,
191 0 : rp: rp,
192 0 : vbih: r.valueBIH,
193 0 : stats: stats,
194 0 : }
195 0 : i.secondLevel.data.GetLazyValuer = i.secondLevel.vbReader
196 0 : i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
197 0 : }
198 0 : i.secondLevel.data.KeySchema = r.keySchema
199 0 : i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
200 0 : topLevelIndexH, err := r.readIndex(ctx, i.secondLevel.indexFilterRH, stats, &i.secondLevel.iterStats)
201 0 : if err == nil {
202 0 : err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
203 0 : }
204 0 : if err != nil {
205 0 : _ = i.Close()
206 0 : return nil, err
207 0 : }
208 0 : return i, nil
209 : }
210 :
211 : // newRowBlockTwoLevelIterator reads the top-level index block and creates and
212 : // initializes a two-level iterator over an sstable with row-oriented data
213 : // blocks.
214 : //
215 : // Note that lower, upper are iterator bounds and are separate from virtual
216 : // sstable bounds. If the virtualState passed in is not nil, then virtual
217 : // sstable bounds will be enforced.
218 : func newRowBlockTwoLevelIterator(
219 : ctx context.Context,
220 : r *Reader,
221 : v *virtualState,
222 : transforms IterTransforms,
223 : lower, upper []byte,
224 : filterer *BlockPropertiesFilterer,
225 : filterBlockSizeLimit FilterBlockSizeLimit,
226 : stats *base.InternalIteratorStats,
227 : categoryAndQoS CategoryAndQoS,
228 : statsCollector *CategoryStatsCollector,
229 : rp ReaderProvider,
230 : bufferPool *block.BufferPool,
231 2 : ) (*twoLevelIteratorRowBlocks, error) {
232 2 : if r.err != nil {
233 0 : return nil, r.err
234 0 : }
235 2 : if r.tableFormat.BlockColumnar() {
236 0 : panic(errors.AssertionFailedf("table format %s uses block columnar format", r.tableFormat))
237 : }
238 2 : i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
239 2 : i.secondLevel.init(ctx, r, v, transforms, lower, upper, filterer,
240 2 : false, // Disable the use of the filter block in the second level.
241 2 : stats, categoryAndQoS, statsCollector, bufferPool)
242 2 : if r.tableFormat >= TableFormatPebblev3 {
243 2 : if r.Properties.NumValueBlocks > 0 {
244 2 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
245 2 : // can outlive the singleLevelIterator due to be being embedded in a
246 2 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
247 2 : // should only optimize this if it shows up as significant in end-to-end
248 2 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
249 2 : // is that if many sstable iterators only get positioned at latest
250 2 : // versions of keys, and therefore never expose a LazyValue that is
251 2 : // separated to their callers, they can put this valueBlockReader into a
252 2 : // sync.Pool.
253 2 : i.secondLevel.vbReader = &valueBlockReader{
254 2 : bpOpen: &i.secondLevel,
255 2 : rp: rp,
256 2 : vbih: r.valueBIH,
257 2 : stats: stats,
258 2 : }
259 2 : i.secondLevel.data.SetGetLazyValuer(i.secondLevel.vbReader)
260 2 : i.secondLevel.vbRH = objstorageprovider.UsePreallocatedReadHandle(r.readable, objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
261 2 : }
262 2 : i.secondLevel.data.SetHasValuePrefix(true)
263 : }
264 :
265 2 : i.useFilterBlock = shouldUseFilterBlock(r, filterBlockSizeLimit)
266 2 :
267 2 : topLevelIndexH, err := r.readIndex(ctx, i.secondLevel.indexFilterRH, stats, &i.secondLevel.iterStats)
268 2 : if err == nil {
269 2 : err = i.topLevelIndex.InitHandle(i.secondLevel.cmp, i.secondLevel.reader.Split, topLevelIndexH, transforms)
270 2 : }
271 2 : if err != nil {
272 1 : _ = i.Close()
273 1 : return nil, err
274 1 : }
275 2 : return i, nil
276 : }
277 :
278 0 : func (i *twoLevelIterator[I, PI, D, PD]) String() string {
279 0 : return i.secondLevel.String()
280 0 : }
281 :
282 : // DebugTree is part of the InternalIterator interface.
283 0 : func (i *twoLevelIterator[I, PI, D, PD]) DebugTree(tp treeprinter.Node) {
284 0 : tp.Childf("%T(%p) fileNum=%s", i, i, i.String())
285 0 : }
286 :
287 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
288 : // package. Note that SeekGE only checks the upper bound. It is up to the
289 : // caller to ensure that key is greater than or equal to the lower bound.
290 : func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
291 : key []byte, flags base.SeekGEFlags,
292 2 : ) *base.InternalKV {
293 2 : if i.secondLevel.vState != nil {
294 2 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
295 2 : // have to internally restrict the bounds.
296 2 : //
297 2 : // TODO(bananabrick): We can optimize away this check for the level iter
298 2 : // if necessary.
299 2 : if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
300 2 : key = i.secondLevel.lower
301 2 : }
302 : }
303 :
304 2 : err := i.secondLevel.err
305 2 : i.secondLevel.err = nil // clear cached iteration error
306 2 :
307 2 : // The twoLevelIterator could be already exhausted. Utilize that when
308 2 : // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
309 2 : // bounds-exhausted near the top of the file.
310 2 : if flags.TrySeekUsingNext() &&
311 2 : (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
312 2 : err == nil {
313 2 : // Already exhausted, so return nil.
314 2 : return nil
315 2 : }
316 :
317 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
318 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
319 :
320 : // We fall into the slow path if i.index.IsDataInvalidated() even if the
321 : // top-level iterator is already positioned correctly and all other
322 : // conditions are met. An alternative structure could reuse topLevelIndex's
323 : // current position and reload the index block to which it points. Arguably,
324 : // an index block load is expensive and the index block may still be earlier
325 : // than the index block containing the sought key, resulting in a wasteful
326 : // block load.
327 :
328 2 : var dontSeekWithinSingleLevelIter bool
329 2 : if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
330 2 : (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.secondLevel.cmp(key, PI(&i.topLevelIndex).Separator()) > 0 {
331 2 : // Slow-path: need to position the topLevelIndex.
332 2 :
333 2 : // The previous exhausted state of singleLevelIterator is no longer
334 2 : // relevant, since we may be moving to a different index block.
335 2 : i.secondLevel.exhaustedBounds = 0
336 2 : flags = flags.DisableTrySeekUsingNext()
337 2 : if !PI(&i.topLevelIndex).SeekGE(key) {
338 2 : PD(&i.secondLevel.data).Invalidate()
339 2 : PI(&i.secondLevel.index).Invalidate()
340 2 : return nil
341 2 : }
342 :
343 2 : result := i.loadIndex(+1)
344 2 : if result == loadBlockFailed {
345 1 : i.secondLevel.boundsCmp = 0
346 1 : return nil
347 1 : }
348 2 : if result == loadBlockIrrelevant {
349 2 : // Enforce the upper bound here since don't want to bother moving
350 2 : // to the next entry in the top level index if upper bound is
351 2 : // already exceeded. Note that the next entry starts with keys >=
352 2 : // ikey.InternalKey.UserKey since even though this is the block separator, the
353 2 : // same user key can span multiple index blocks. If upper is
354 2 : // exclusive we use >= below, else we use >.
355 2 : if i.secondLevel.upper != nil {
356 2 : cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
357 2 : if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
358 2 : i.secondLevel.exhaustedBounds = +1
359 2 : }
360 : }
361 : // Fall through to skipForward.
362 2 : dontSeekWithinSingleLevelIter = true
363 2 : // Clear boundsCmp.
364 2 : //
365 2 : // In the typical cases where dontSeekWithinSingleLevelIter=false,
366 2 : // the singleLevelIterator.SeekGE call will clear boundsCmp.
367 2 : // However, in this case where dontSeekWithinSingleLevelIter=true,
368 2 : // we never seek on the single-level iterator. This call will fall
369 2 : // through to skipForward, which may improperly leave boundsCmp=+1
370 2 : // unless we clear it here.
371 2 : i.secondLevel.boundsCmp = 0
372 : }
373 2 : } else {
374 2 : // INVARIANT: err == nil.
375 2 : //
376 2 : // Else fast-path: There are two possible cases, from
377 2 : // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
378 2 : //
379 2 : // 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
380 2 : // respecting the lower bound (guaranteed by Iterator). We know that the
381 2 : // iterator must already be positioned within or just outside the previous
382 2 : // bounds. Therefore, the topLevelIndex iter cannot be positioned at an
383 2 : // entry ahead of the seek position (though it can be positioned behind).
384 2 : // The !i.cmp(key, i.topLevelIndex.Separator) > 0 confirms that it is
385 2 : // not behind. Since it is not ahead and not behind it must be at the
386 2 : // right position.
387 2 : //
388 2 : // 2) This SeekGE will land on a key that is greater than the key we are
389 2 : // currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
390 2 : // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
391 2 : // index block. No need to reset the state of singleLevelIterator.
392 2 : //
393 2 : // Note that cases 1 and 2 never overlap, and one of them must be true.
394 2 : // This invariant checking is important enough that we do not gate it
395 2 : // behind invariants.Enabled.
396 2 : if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
397 0 : panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
398 0 : i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
399 : }
400 :
401 2 : if !flags.TrySeekUsingNext() {
402 2 : // Case 1. Bounds have changed so the previous exhausted bounds state is
403 2 : // irrelevant.
404 2 : // WARNING-data-exhausted: this is safe to do only because the monotonic
405 2 : // bounds optimizations only work when !data-exhausted. If they also
406 2 : // worked with data-exhausted, we have made it unclear whether
407 2 : // data-exhausted is actually true. See the comment at the top of the
408 2 : // file.
409 2 : i.secondLevel.exhaustedBounds = 0
410 2 : }
411 : // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
412 : // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
413 : // bug https://github.com/cockroachdb/pebble/issues/2036.
414 : }
415 :
416 2 : if !dontSeekWithinSingleLevelIter {
417 2 : // Note that while trySeekUsingNext could be false here, singleLevelIterator
418 2 : // could do its own boundsCmp-based optimization to seek using next.
419 2 : if ikv := i.secondLevel.SeekGE(key, flags); ikv != nil {
420 2 : return ikv
421 2 : }
422 : }
423 2 : return i.skipForward()
424 : }
425 :
426 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
427 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
428 : // to the caller to ensure that key is greater than or equal to the lower bound.
429 : func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
430 : prefix, key []byte, flags base.SeekGEFlags,
431 2 : ) *base.InternalKV {
432 2 : if i.secondLevel.vState != nil {
433 2 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
434 2 : // have to internally restrict the bounds.
435 2 : //
436 2 : // TODO(bananabrick): We can optimize away this check for the level iter
437 2 : // if necessary.
438 2 : if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
439 2 : key = i.secondLevel.lower
440 2 : }
441 : }
442 :
443 : // NOTE: prefix is only used for bloom filter checking and not later work in
444 : // this method. Hence, we can use the existing iterator position if the last
445 : // SeekPrefixGE did not fail bloom filter matching.
446 :
447 2 : err := i.secondLevel.err
448 2 : i.secondLevel.err = nil // clear cached iteration error
449 2 :
450 2 : // The twoLevelIterator could be already exhausted. Utilize that when
451 2 : // trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
452 2 : // bounds-exhausted near the top of the file.
453 2 : filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
454 2 : if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
455 2 : (i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && PI(&i.secondLevel.index).IsDataInvalidated())) &&
456 2 : err == nil {
457 1 : // Already exhausted, so return nil.
458 1 : return nil
459 1 : }
460 :
461 : // Check prefix bloom filter.
462 2 : if i.useFilterBlock {
463 2 : if !i.lastBloomFilterMatched {
464 2 : // Iterator is not positioned based on last seek.
465 2 : flags = flags.DisableTrySeekUsingNext()
466 2 : }
467 2 : i.lastBloomFilterMatched = false
468 2 : var mayContain bool
469 2 : mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
470 2 : if i.secondLevel.err != nil || !mayContain {
471 2 : // In the i.secondLevel.err == nil case, this invalidation may not be necessary for
472 2 : // correctness, and may be a place to optimize later by reusing the
473 2 : // already loaded block. It was necessary in earlier versions of the code
474 2 : // since the caller was allowed to call Next when SeekPrefixGE returned
475 2 : // nil. This is no longer allowed.
476 2 : PD(&i.secondLevel.data).Invalidate()
477 2 : return nil
478 2 : }
479 2 : i.lastBloomFilterMatched = true
480 : }
481 :
482 : // Bloom filter matches.
483 :
484 : // SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
485 : // enabled by trySeekUsingNext, or by monotonically increasing bounds
486 : // (i.boundsCmp).
487 :
488 : // We fall into the slow path if i.index.IsDataInvalidated() even if the
489 : // top-level iterator is already positioned correctly and all other
490 : // conditions are met. An alternative structure could reuse topLevelIndex's
491 : // current position and reload the index block to which it points. Arguably,
492 : // an index block load is expensive and the index block may still be earlier
493 : // than the index block containing the sought key, resulting in a wasteful
494 : // block load.
495 :
496 2 : var dontSeekWithinSingleLevelIter bool
497 2 : if PI(&i.topLevelIndex).IsDataInvalidated() || !PI(&i.topLevelIndex).Valid() || PI(&i.secondLevel.index).IsDataInvalidated() || err != nil ||
498 2 : (i.secondLevel.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.secondLevel.cmp(key, PI(&i.topLevelIndex).Separator()) > 0 {
499 2 : // Slow-path: need to position the topLevelIndex.
500 2 :
501 2 : // The previous exhausted state of singleLevelIterator is no longer
502 2 : // relevant, since we may be moving to a different index block.
503 2 : i.secondLevel.exhaustedBounds = 0
504 2 : flags = flags.DisableTrySeekUsingNext()
505 2 : if !PI(&i.topLevelIndex).SeekGE(key) {
506 2 : PD(&i.secondLevel.data).Invalidate()
507 2 : PI(&i.secondLevel.index).Invalidate()
508 2 : return nil
509 2 : }
510 :
511 2 : result := i.loadIndex(+1)
512 2 : if result == loadBlockFailed {
513 1 : i.secondLevel.boundsCmp = 0
514 1 : return nil
515 1 : }
516 2 : if result == loadBlockIrrelevant {
517 2 : // Enforce the upper bound here since don't want to bother moving
518 2 : // to the next entry in the top level index if upper bound is
519 2 : // already exceeded. Note that the next entry starts with keys >=
520 2 : // ikey.InternalKey.UserKey since even though this is the block separator, the
521 2 : // same user key can span multiple index blocks. If upper is
522 2 : // exclusive we use >= below, else we use >.
523 2 : if i.secondLevel.upper != nil {
524 2 : cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
525 2 : if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
526 2 : i.secondLevel.exhaustedBounds = +1
527 2 : }
528 : }
529 : // Fall through to skipForward.
530 2 : dontSeekWithinSingleLevelIter = true
531 2 : // Clear boundsCmp.
532 2 : //
533 2 : // In the typical cases where dontSeekWithinSingleLevelIter=false,
534 2 : // the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
535 2 : // However, in this case where dontSeekWithinSingleLevelIter=true,
536 2 : // we never seek on the single-level iterator. This call will fall
537 2 : // through to skipForward, which may improperly leave boundsCmp=+1
538 2 : // unless we clear it here.
539 2 : i.secondLevel.boundsCmp = 0
540 : }
541 2 : } else {
542 2 : // INVARIANT: err == nil.
543 2 : //
544 2 : // Else fast-path: There are two possible cases, from
545 2 : // (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
546 2 : //
547 2 : // 1) The bounds have moved forward (i.boundsCmp > 0) and this
548 2 : // SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
549 2 : // know that the iterator must already be positioned within or just
550 2 : // outside the previous bounds. Therefore, the topLevelIndex iter cannot
551 2 : // be positioned at an entry ahead of the seek position (though it can be
552 2 : // positioned behind). The !i.cmp(key, i.topLevelIndex.Separator()) > 0
553 2 : // confirms that it is not behind. Since it is not ahead and not behind it
554 2 : // must be at the right position.
555 2 : //
556 2 : // 2) This SeekPrefixGE will land on a key that is greater than the key we
557 2 : // are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
558 2 : // i.topLevelIndex.Separator()) <= 0, we are at the correct lower level
559 2 : // index block. No need to reset the state of singleLevelIterator.
560 2 : //
561 2 : // Note that cases 1 and 2 never overlap, and one of them must be true.
562 2 : // This invariant checking is important enough that we do not gate it
563 2 : // behind invariants.Enabled.
564 2 : if i.secondLevel.boundsCmp > 0 == flags.TrySeekUsingNext() {
565 0 : panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
566 0 : i.secondLevel.boundsCmp > 0, flags.TrySeekUsingNext()))
567 : }
568 :
569 2 : if !flags.TrySeekUsingNext() {
570 1 : // Case 1. Bounds have changed so the previous exhausted bounds state is
571 1 : // irrelevant.
572 1 : // WARNING-data-exhausted: this is safe to do only because the monotonic
573 1 : // bounds optimizations only work when !data-exhausted. If they also
574 1 : // worked with data-exhausted, we have made it unclear whether
575 1 : // data-exhausted is actually true. See the comment at the top of the
576 1 : // file.
577 1 : i.secondLevel.exhaustedBounds = 0
578 1 : }
579 : // Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
580 : // preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
581 : // bug https://github.com/cockroachdb/pebble/issues/2036.
582 : }
583 :
584 2 : if !dontSeekWithinSingleLevelIter {
585 2 : if ikv := i.secondLevel.seekPrefixGE(prefix, key, flags); ikv != nil {
586 2 : return ikv
587 2 : }
588 : }
589 : // NB: skipForward checks whether exhaustedBounds is already +1.
590 2 : return i.skipForward()
591 : }
592 :
593 : // virtualLast should only be called if i.vReader != nil.
594 2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLast() *base.InternalKV {
595 2 : if i.secondLevel.vState == nil {
596 0 : panic("pebble: invalid call to virtualLast")
597 : }
598 2 : if !i.secondLevel.endKeyInclusive {
599 2 : // Trivial case.
600 2 : return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
601 2 : }
602 2 : return i.virtualLastSeekLE()
603 : }
604 :
605 : // virtualLastSeekLE implements a SeekLE() that can be used as part
606 : // of reverse-iteration calls such as a Last() on a virtual sstable. Does a
607 : // SeekLE on the upper bound of the file/iterator.
608 2 : func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
609 2 : // Callers of SeekLE don't know about virtual sstable bounds, so we may
610 2 : // have to internally restrict the bounds.
611 2 : //
612 2 : // TODO(bananabrick): We can optimize this check away for the level iter
613 2 : // if necessary.
614 2 : if !i.secondLevel.endKeyInclusive {
615 0 : panic("unexpected virtualLastSeekLE with exclusive upper bounds")
616 : }
617 2 : key := i.secondLevel.upper
618 2 : // Need to position the topLevelIndex.
619 2 : //
620 2 : // The previous exhausted state of singleLevelIterator is no longer
621 2 : // relevant, since we may be moving to a different index block.
622 2 : i.secondLevel.exhaustedBounds = 0
623 2 : // Seek optimization only applies until iterator is first positioned with a
624 2 : // SeekGE or SeekLT after SetBounds.
625 2 : i.secondLevel.boundsCmp = 0
626 2 : topLevelOk := PI(&i.topLevelIndex).SeekGE(key)
627 2 : // We can have multiple internal keys with the same user key as the seek
628 2 : // key. In that case, we want the last (greatest) internal key.
629 2 : for topLevelOk && bytes.Equal(PI(&i.topLevelIndex).Separator(), key) {
630 2 : topLevelOk = PI(&i.topLevelIndex).Next()
631 2 : }
632 2 : if !topLevelOk {
633 2 : return i.skipBackward()
634 2 : }
635 2 : result := i.loadIndex(-1)
636 2 : if result == loadBlockFailed {
637 0 : i.secondLevel.boundsCmp = 0
638 0 : return nil
639 0 : }
640 2 : if result == loadBlockIrrelevant {
641 1 : // Load the previous block.
642 1 : return i.skipBackward()
643 1 : }
644 2 : if ikv := i.secondLevel.virtualLastSeekLE(); ikv != nil {
645 2 : return ikv
646 2 : }
647 2 : return i.skipBackward()
648 : }
649 :
650 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
651 : // package. Note that SeekLT only checks the lower bound. It is up to the
652 : // caller to ensure that key is less than the upper bound.
653 : func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
654 : key []byte, flags base.SeekLTFlags,
655 2 : ) *base.InternalKV {
656 2 : if i.secondLevel.vState != nil {
657 2 : // Might have to fix upper bound since virtual sstable bounds are not
658 2 : // known to callers of SeekLT.
659 2 : //
660 2 : // TODO(bananabrick): We can optimize away this check for the level iter
661 2 : // if necessary.
662 2 : cmp := i.secondLevel.cmp(key, i.secondLevel.upper)
663 2 : // key == i.secondLevel.upper is fine. We'll do the right thing and return the
664 2 : // first internal key with user key < key.
665 2 : if cmp > 0 {
666 2 : return i.virtualLast()
667 2 : }
668 : }
669 :
670 2 : i.secondLevel.exhaustedBounds = 0
671 2 : i.secondLevel.err = nil // clear cached iteration error
672 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
673 2 : i.secondLevel.boundsCmp = 0
674 2 :
675 2 : var result loadBlockResult
676 2 : // NB: Unlike SeekGE, we don't have a fast-path here since we don't know
677 2 : // whether the topLevelIndex is positioned after the position that would
678 2 : // be returned by doing i.topLevelIndex.SeekGE(). To know this we would
679 2 : // need to know the index key preceding the current one.
680 2 : // NB: If a bound-limited block property filter is configured, it's
681 2 : // externally ensured that the filter is disabled (through returning
682 2 : // Intersects=false irrespective of the block props provided) during seeks.
683 2 : if !PI(&i.topLevelIndex).SeekGE(key) {
684 2 : if !PI(&i.topLevelIndex).Last() {
685 0 : PD(&i.secondLevel.data).Invalidate()
686 0 : PI(&i.secondLevel.index).Invalidate()
687 0 : return nil
688 0 : }
689 :
690 2 : result = i.loadIndex(-1)
691 2 : if result == loadBlockFailed {
692 1 : return nil
693 1 : }
694 2 : if result == loadBlockOK {
695 2 : if ikv := i.secondLevel.lastInternal(); ikv != nil {
696 2 : return i.secondLevel.maybeVerifyKey(ikv)
697 2 : }
698 : // Fall through to skipBackward since the singleLevelIterator did
699 : // not have any blocks that satisfy the block interval
700 : // constraints, or the lower bound was reached.
701 : }
702 : // Else loadBlockIrrelevant, so fall through.
703 2 : } else {
704 2 : result = i.loadIndex(-1)
705 2 : if result == loadBlockFailed {
706 1 : return nil
707 1 : }
708 2 : if result == loadBlockOK {
709 2 : if ikv := i.secondLevel.SeekLT(key, flags); ikv != nil {
710 2 : return i.secondLevel.maybeVerifyKey(ikv)
711 2 : }
712 : // Fall through to skipBackward since the singleLevelIterator did
713 : // not have any blocks that satisfy the block interval
714 : // constraint, or the lower bound was reached.
715 : }
716 : // Else loadBlockIrrelevant, so fall through.
717 : }
718 2 : if result == loadBlockIrrelevant {
719 2 : // Enforce the lower bound here since don't want to bother moving to
720 2 : // the previous entry in the top level index if lower bound is already
721 2 : // exceeded. Note that the previous entry starts with keys <=
722 2 : // ikey.InternalKey.UserKey since even though this is the current block's
723 2 : // separator, the same user key can span multiple index blocks.
724 2 : if i.secondLevel.lower != nil && i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.lower) < 0 {
725 1 : i.secondLevel.exhaustedBounds = -1
726 1 : }
727 : }
728 : // NB: skipBackward checks whether exhaustedBounds is already -1.
729 2 : return i.skipBackward()
730 : }
731 :
732 : // First implements internalIterator.First, as documented in the pebble
733 : // package. Note that First only checks the upper bound. It is up to the caller
734 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
735 : // call to SeekGE(lower)).
736 2 : func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
737 2 : // If we have a lower bound, use SeekGE. Note that in general this is not
738 2 : // supported usage, except when the lower bound is there because the table is
739 2 : // virtual.
740 2 : if i.secondLevel.lower != nil {
741 2 : return i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
742 2 : }
743 2 : i.secondLevel.exhaustedBounds = 0
744 2 : i.secondLevel.err = nil // clear cached iteration error
745 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
746 2 : i.secondLevel.boundsCmp = 0
747 2 :
748 2 : if !PI(&i.topLevelIndex).First() {
749 0 : return nil
750 0 : }
751 2 : result := i.loadIndex(+1)
752 2 : if result == loadBlockFailed {
753 1 : return nil
754 1 : }
755 2 : if result == loadBlockOK {
756 2 : if ikv := i.secondLevel.First(); ikv != nil {
757 2 : return ikv
758 2 : }
759 : // Else fall through to skipForward.
760 2 : } else {
761 2 : // result == loadBlockIrrelevant. Enforce the upper bound here since
762 2 : // don't want to bother moving to the next entry in the top level
763 2 : // index if upper bound is already exceeded. Note that the next entry
764 2 : // starts with keys >= ikv.InternalKey.UserKey since even though this is the
765 2 : // block separator, the same user key can span multiple index blocks.
766 2 : // If upper is exclusive we use >= below, else we use >.
767 2 : if i.secondLevel.upper != nil {
768 1 : cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
769 1 : if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
770 1 : i.secondLevel.exhaustedBounds = +1
771 1 : }
772 : }
773 : }
774 : // NB: skipForward checks whether exhaustedBounds is already +1.
775 2 : return i.skipForward()
776 : }
777 :
778 : // Last implements internalIterator.Last, as documented in the pebble
779 : // package. Note that Last only checks the lower bound. It is up to the caller
780 : // to ensure that key is less than the upper bound (e.g. via a call to
781 : // SeekLT(upper))
782 2 : func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
783 2 : if i.secondLevel.vState != nil {
784 2 : if i.secondLevel.endKeyInclusive {
785 2 : return i.virtualLast()
786 2 : }
787 2 : return i.SeekLT(i.secondLevel.upper, base.SeekLTFlagsNone)
788 : }
789 :
790 2 : if i.secondLevel.upper != nil {
791 0 : panic("twoLevelIterator.Last() used despite upper bound")
792 : }
793 2 : i.secondLevel.exhaustedBounds = 0
794 2 : i.secondLevel.err = nil // clear cached iteration error
795 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
796 2 : i.secondLevel.boundsCmp = 0
797 2 :
798 2 : if !PI(&i.topLevelIndex).Last() {
799 0 : return nil
800 0 : }
801 2 : result := i.loadIndex(-1)
802 2 : if result == loadBlockFailed {
803 1 : return nil
804 1 : }
805 2 : if result == loadBlockOK {
806 2 : if ikv := i.secondLevel.Last(); ikv != nil {
807 2 : return ikv
808 2 : }
809 : // Else fall through to skipBackward.
810 2 : } else {
811 2 : // result == loadBlockIrrelevant. Enforce the lower bound here since
812 2 : // don't want to bother moving to the previous entry in the top level
813 2 : // index if lower bound is already exceeded. Note that the previous
814 2 : // entry starts with keys <= ikv.InternalKey.UserKey since even though
815 2 : // this is the current block's separator, the same user key can span
816 2 : // multiple index blocks.
817 2 : if i.secondLevel.lower != nil && i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.lower) < 0 {
818 1 : i.secondLevel.exhaustedBounds = -1
819 1 : }
820 : }
821 : // NB: skipBackward checks whether exhaustedBounds is already -1.
822 2 : return i.skipBackward()
823 : }
824 :
825 : // Next implements internalIterator.Next, as documented in the pebble
826 : // package.
827 : // Note: twoLevelCompactionIterator.Next mirrors the implementation of
828 : // twoLevelIterator.Next due to performance. Keep the two in sync.
829 2 : func (i *twoLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
830 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
831 2 : i.secondLevel.boundsCmp = 0
832 2 : if i.secondLevel.err != nil {
833 1 : // TODO(jackson): Can this case be turned into a panic? Once an error is
834 1 : // encountered, the iterator must be re-seeked.
835 1 : return nil
836 1 : }
837 2 : if ikv := i.secondLevel.Next(); ikv != nil {
838 2 : return ikv
839 2 : }
840 2 : return i.skipForward()
841 : }
842 :
843 : // NextPrefix implements (base.InternalIterator).NextPrefix.
844 2 : func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
845 2 : if i.secondLevel.exhaustedBounds == +1 {
846 0 : panic("Next called even though exhausted upper bound")
847 : }
848 : // Seek optimization only applies until iterator is first positioned after SetBounds.
849 2 : i.secondLevel.boundsCmp = 0
850 2 : if i.secondLevel.err != nil {
851 0 : // TODO(jackson): Can this case be turned into a panic? Once an error is
852 0 : // encountered, the iterator must be re-seeked.
853 0 : return nil
854 0 : }
855 2 : if ikv := i.secondLevel.NextPrefix(succKey); ikv != nil {
856 2 : return ikv
857 2 : }
858 : // ikv == nil
859 2 : if i.secondLevel.err != nil {
860 1 : return nil
861 1 : }
862 :
863 : // Did not find prefix in the existing second-level index block. This is the
864 : // slow-path where we seek the iterator.
865 2 : if !PI(&i.topLevelIndex).SeekGE(succKey) {
866 0 : PD(&i.secondLevel.data).Invalidate()
867 0 : PI(&i.secondLevel.index).Invalidate()
868 0 : return nil
869 0 : }
870 2 : result := i.loadIndex(+1)
871 2 : if result == loadBlockFailed {
872 1 : return nil
873 1 : }
874 2 : if result == loadBlockIrrelevant {
875 2 : // Enforce the upper bound here since don't want to bother moving to the
876 2 : // next entry in the top level index if upper bound is already exceeded.
877 2 : // Note that the next entry starts with keys >= ikv.InternalKey.UserKey
878 2 : // since even though this is the block separator, the same user key can
879 2 : // span multiple index blocks. If upper is exclusive we use >= below,
880 2 : // else we use >.
881 2 : if i.secondLevel.upper != nil {
882 2 : cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
883 2 : if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
884 0 : i.secondLevel.exhaustedBounds = +1
885 0 : }
886 : }
887 2 : } else if kv := i.secondLevel.SeekGE(succKey, base.SeekGEFlagsNone); kv != nil {
888 2 : return i.secondLevel.maybeVerifyKey(kv)
889 2 : }
890 2 : return i.skipForward()
891 : }
892 :
893 : // Prev implements internalIterator.Prev, as documented in the pebble
894 : // package.
895 2 : func (i *twoLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
896 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
897 2 : i.secondLevel.boundsCmp = 0
898 2 : if i.secondLevel.err != nil {
899 1 : return nil
900 1 : }
901 2 : if kv := i.secondLevel.Prev(); kv != nil {
902 2 : return kv
903 2 : }
904 2 : return i.skipBackward()
905 : }
906 :
907 2 : func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
908 2 : for {
909 2 : if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds > 0 {
910 2 : return nil
911 2 : }
912 :
913 : // It is possible that skipBackward went too far and the virtual table lower
914 : // bound is after the first key in the block we are about to load, in which
915 : // case we must use SeekGE below. The keys in the block we are about to load
916 : // start right after the topLevelIndex key (before we Next).
917 : //
918 : // An example of how this can happen:
919 : //
920 : // Second-level index block 1 - contains keys a@1, c@1
921 : // Second-level index block 2 - contains keys e@1, g@1
922 : // Second-level index block 3 - contains keys i@2, k@2
923 : //
924 : // The virtual table lower bound is f. We have a range key masking filter
925 : // that filters keys with @1 suffix. We are positioned inside block 3 then
926 : // we Prev(). Block 2 is entirely filtered out, which makes us move to
927 : // block 1. Now the range key masking filter gets an update (via
928 : // SpanChanged) and it no longer filters out any keys. At this point if a
929 : // Next happens, we will load block 2 but it would not be legal to return
930 : // "e@1" which is outside the virtual bounds.
931 : //
932 : // The core of the problem is that skipBackward doesn't know it can stop
933 : // at block 2, because it doesn't know what keys are at the start of that
934 : // block. This is why we don't have this problem in the opposite
935 : // direction: skipForward will never go beyond the last relevant block
936 : // because it looks at the separator key which is an upper bound for the
937 : // block.
938 : //
939 : // Note that this is only a problem with virtual tables; we make no
940 : // guarantees wrt an iterator lower bound when we iterate forward. But we
941 : // must never return keys that are not inside the virtual table.
942 2 : useSeek := i.secondLevel.vState != nil && (!PI(&i.topLevelIndex).Valid() ||
943 2 : i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.vState.lower.UserKey) < 0)
944 2 :
945 2 : i.secondLevel.exhaustedBounds = 0
946 2 : if !PI(&i.topLevelIndex).Next() {
947 2 : PD(&i.secondLevel.data).Invalidate()
948 2 : PI(&i.secondLevel.index).Invalidate()
949 2 : return nil
950 2 : }
951 2 : result := i.loadIndex(+1)
952 2 : if result == loadBlockFailed {
953 1 : return nil
954 1 : }
955 2 : if result == loadBlockOK {
956 2 : var ikv *base.InternalKV
957 2 : if useSeek {
958 2 : ikv = i.secondLevel.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
959 2 : } else {
960 2 : ikv = i.secondLevel.firstInternal()
961 2 : }
962 2 : if ikv != nil {
963 2 : return i.secondLevel.maybeVerifyKey(ikv)
964 2 : }
965 : // Next iteration will return if singleLevelIterator set
966 : // exhaustedBounds = +1.
967 2 : } else {
968 2 : // result == loadBlockIrrelevant. Enforce the upper bound here since
969 2 : // don't want to bother moving to the next entry in the top level
970 2 : // index if upper bound is already exceeded. Note that the next
971 2 : // entry starts with keys >= i.topLevelIndex.Separator() since even
972 2 : // though this is the block separator, the same user key can span
973 2 : // multiple index blocks. If upper is exclusive we use >= below,
974 2 : // else we use >.
975 2 : if i.secondLevel.upper != nil {
976 2 : cmp := i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.upper)
977 2 : if (!i.secondLevel.endKeyInclusive && cmp >= 0) || cmp > 0 {
978 2 : i.secondLevel.exhaustedBounds = +1
979 2 : // Next iteration will return.
980 2 : }
981 : }
982 : }
983 : }
984 : }
985 :
986 2 : func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
987 2 : for {
988 2 : if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
989 2 : return nil
990 2 : }
991 2 : i.secondLevel.exhaustedBounds = 0
992 2 : if !PI(&i.topLevelIndex).Prev() {
993 2 : PD(&i.secondLevel.data).Invalidate()
994 2 : PI(&i.secondLevel.index).Invalidate()
995 2 : return nil
996 2 : }
997 2 : result := i.loadIndex(-1)
998 2 : if result == loadBlockFailed {
999 1 : return nil
1000 1 : }
1001 2 : if result == loadBlockOK {
1002 2 : ikv := i.secondLevel.lastInternal()
1003 2 : if ikv != nil {
1004 2 : return i.secondLevel.maybeVerifyKey(ikv)
1005 2 : }
1006 :
1007 : // Next iteration will return if singleLevelIterator set
1008 : // exhaustedBounds = -1.
1009 2 : } else {
1010 2 : // result == loadBlockIrrelevant. Enforce the lower bound here since
1011 2 : // don't want to bother moving to the previous entry in the top
1012 2 : // level index if lower bound is already exceeded. Note that the
1013 2 : // previous entry starts with keys <= i.topLevelIndex.Separator() since
1014 2 : // even though this is the current block's separator, the same user
1015 2 : // key can span multiple index blocks.
1016 2 : if i.secondLevel.lower != nil && i.secondLevel.cmp(PI(&i.topLevelIndex).Separator(), i.secondLevel.lower) < 0 {
1017 2 : i.secondLevel.exhaustedBounds = -1
1018 2 : // Next iteration will return.
1019 2 : }
1020 : }
1021 : }
1022 : }
1023 :
1024 2 : func (i *twoLevelIterator[I, PI, D, PD]) Error() error {
1025 2 : return i.secondLevel.Error()
1026 2 : }
1027 :
1028 2 : func (i *twoLevelIterator[I, PI, D, PD]) SetBounds(lower, upper []byte) {
1029 2 : i.secondLevel.SetBounds(lower, upper)
1030 2 : }
1031 :
1032 0 : func (i *twoLevelIterator[I, PI, D, PD]) SetContext(ctx context.Context) {
1033 0 : i.secondLevel.SetContext(ctx)
1034 0 : }
1035 :
1036 2 : func (i *twoLevelIterator[I, PI, D, PD]) SetCloseHook(fn func(i Iterator) error) {
1037 2 : i.secondLevel.SetCloseHook(fn)
1038 2 : }
1039 :
1040 2 : func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
1041 2 : i.secondLevel.SetupForCompaction()
1042 2 : }
1043 :
1044 : // Close implements internalIterator.Close, as documented in the pebble
1045 : // package.
1046 2 : func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
1047 2 : if invariants.Enabled && i.secondLevel.pool != nil {
1048 0 : panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
1049 : }
1050 2 : pool := i.pool
1051 2 : err := i.secondLevel.closeInternal()
1052 2 : err = firstError(err, PI(&i.topLevelIndex).Close())
1053 2 : *i = twoLevelIterator[I, PI, D, PD]{
1054 2 : secondLevel: i.secondLevel.resetForReuse(),
1055 2 : topLevelIndex: PI(&i.topLevelIndex).ResetForReuse(),
1056 2 : pool: pool,
1057 2 : }
1058 2 : if pool != nil {
1059 2 : pool.Put(i)
1060 2 : }
1061 2 : return err
1062 : }
|