Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "unsafe"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/objstorage"
15 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
16 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
17 : )
18 :
19 : // singleLevelIterator iterates over an entire table of data. To seek for a given
20 : // key, it first looks in the index for the block that contains that key, and then
21 : // looks inside that block.
22 : type singleLevelIterator struct {
23 : ctx context.Context
24 : cmp Compare
25 : // Global lower/upper bound for the iterator.
26 : lower []byte
27 : upper []byte
28 : bpfs *BlockPropertiesFilterer
29 : // Per-block lower/upper bound. Nil if the bound does not apply to the block
30 : // because we determined the block lies completely within the bound.
31 : blockLower []byte
32 : blockUpper []byte
33 : reader *Reader
34 : // vState will be set iff the iterator is constructed for virtual sstable
35 : // iteration.
36 : vState *virtualState
37 : // endKeyInclusive is set to force the iterator to treat the upper field as
38 : // inclusive while iterating instead of exclusive.
39 : endKeyInclusive bool
40 : index blockIter
41 : data blockIter
42 : dataRH objstorage.ReadHandle
43 : dataRHPrealloc objstorageprovider.PreallocatedReadHandle
44 : // dataBH refers to the last data block that the iterator considered
45 : // loading. It may not actually have loaded the block, due to an error or
46 : // because it was considered irrelevant.
47 : dataBH BlockHandle
48 : vbReader *valueBlockReader
49 : // vbRH is the read handle for value blocks, which are in a different
50 : // part of the sstable than data blocks.
51 : vbRH objstorage.ReadHandle
52 : vbRHPrealloc objstorageprovider.PreallocatedReadHandle
53 : err error
54 : closeHook func(i Iterator) error
55 : stats *base.InternalIteratorStats
56 : bufferPool *BufferPool
57 :
58 : // boundsCmp and positionedUsingLatestBounds are for optimizing iteration
59 : // that uses multiple adjacent bounds. The seek after setting a new bound
60 : // can use the fact that the iterator is either within the previous bounds
61 : // or exactly one key before or after the bounds. If the new bounds is
62 : // after/before the previous bounds, and we are already positioned at a
63 : // block that is relevant for the new bounds, we can try to first position
64 : // using Next/Prev (repeatedly) instead of doing a more expensive seek.
65 : //
66 : // When there are wide files at higher levels that match the bounds
67 : // but don't have any data for the bound, we will already be
68 : // positioned at the key beyond the bounds and won't need to do much
69 : // work -- given that most data is in L6, such files are likely to
70 : // dominate the performance of the mergingIter, and may be the main
71 : // benefit of this performance optimization (of course it also helps
72 : // when the file that has the data has successive seeks that stay in
73 : // the same block).
74 : //
75 : // Specifically, boundsCmp captures the relationship between the previous
76 : // and current bounds, if the iterator had been positioned after setting
77 : // the previous bounds. If it was not positioned, i.e., Seek/First/Last
78 : // were not called, we don't know where it is positioned and cannot
79 : // optimize.
80 : //
81 : // Example: Bounds moving forward, and iterator exhausted in forward direction.
82 : // bounds = [f, h), ^ shows block iterator position
83 : // file contents [ a b c d e f g h i j k ]
84 : // ^
85 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
86 : // set to +1. SeekGE(j) can use next (the optimization also requires that j
87 : // is within the block, but that is not for correctness, but to limit the
88 : // optimization to when it will actually be an optimization).
89 : //
90 : // Example: Bounds moving forward.
91 : // bounds = [f, h), ^ shows block iterator position
92 : // file contents [ a b c d e f g h i j k ]
93 : // ^
94 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
95 : // set to +1. SeekGE(j) can use next.
96 : //
97 : // Example: Bounds moving forward, but iterator not positioned using previous
98 : // bounds.
99 : // bounds = [f, h), ^ shows block iterator position
100 : // file contents [ a b c d e f g h i j k ]
101 : // ^
102 : // new bounds = [i, j). Iterator is at j since it was never positioned using
103 : // [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
104 : // SeekGE(i) will not use next.
105 : //
106 : // Example: Bounds moving forward and sparse file
107 : // bounds = [f, h), ^ shows block iterator position
108 : // file contents [ a z ]
109 : // ^
110 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
111 : // set to +1. SeekGE(j) notices that the iterator is already past j and does
112 : // not need to do anything.
113 : //
114 : // Similar examples can be constructed for backward iteration.
115 : //
116 : // This notion of exactly one key before or after the bounds is not quite
117 : // true when block properties are used to ignore blocks. In that case we
118 : // can't stop precisely at the first block that is past the bounds since
119 : // we are using the index entries to enforce the bounds.
120 : //
121 : // e.g. 3 blocks with keys [b, c] [f, g], [i, j, k] with index entries d,
122 : // h, l. And let the lower bound be k, and we are reverse iterating. If
123 : // the block [i, j, k] is ignored due to the block interval annotations we
124 : // do need to move the index to block [f, g] since the index entry for the
125 : // [i, j, k] block is l which is not less than the lower bound of k. So we
126 : // have passed the entries i, j.
127 : //
128 : // This behavior is harmless since the block property filters are fixed
129 : // for the lifetime of the iterator so i, j are irrelevant. In addition,
130 : // the current code will not load the [f, g] block, so the seek
131 : // optimization that attempts to use Next/Prev do not apply anyway.
132 : boundsCmp int
133 : positionedUsingLatestBounds bool
134 :
135 : // exhaustedBounds represents whether the iterator is exhausted for
136 : // iteration by reaching the upper or lower bound. +1 when exhausted
137 : // the upper bound, -1 when exhausted the lower bound, and 0 when
138 : // neither. exhaustedBounds is also used for the TrySeekUsingNext
139 : // optimization in twoLevelIterator and singleLevelIterator. Care should be
140 : // taken in setting this in twoLevelIterator before calling into
141 : // singleLevelIterator, given that these two iterators share this field.
142 : exhaustedBounds int8
143 :
144 : // maybeFilteredKeysSingleLevel indicates whether the last iterator
145 : // positioning operation may have skipped any data blocks due to
146 : // block-property filters when positioning the index.
147 : maybeFilteredKeysSingleLevel bool
148 :
149 : // useFilter specifies whether the filter block in this sstable, if present,
150 : // should be used for prefix seeks or not. In some cases it is beneficial
151 : // to skip a filter block even if it exists (eg. if probability of a match
152 : // is high).
153 : useFilter bool
154 : lastBloomFilterMatched bool
155 :
156 : hideObsoletePoints bool
157 : }
158 :
159 : // singleLevelIterator implements the base.InternalIterator interface.
160 : var _ base.InternalIterator = (*singleLevelIterator)(nil)
161 :
162 : // init initializes a singleLevelIterator for reading from the table. It is
163 : // synonmous with Reader.NewIter, but allows for reusing of the iterator
164 : // between different Readers.
165 : //
166 : // Note that lower, upper passed into init has nothing to do with virtual sstable
167 : // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
168 : // will be enforced.
169 : func (i *singleLevelIterator) init(
170 : ctx context.Context,
171 : r *Reader,
172 : v *virtualState,
173 : lower, upper []byte,
174 : filterer *BlockPropertiesFilterer,
175 : useFilter, hideObsoletePoints bool,
176 : stats *base.InternalIteratorStats,
177 : rp ReaderProvider,
178 : bufferPool *BufferPool,
179 2 : ) error {
180 2 : if r.err != nil {
181 0 : return r.err
182 0 : }
183 2 : indexH, err := r.readIndex(ctx, stats)
184 2 : if err != nil {
185 1 : return err
186 1 : }
187 2 : if v != nil {
188 1 : i.vState = v
189 1 : i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
190 1 : }
191 :
192 2 : i.ctx = ctx
193 2 : i.lower = lower
194 2 : i.upper = upper
195 2 : i.bpfs = filterer
196 2 : i.useFilter = useFilter
197 2 : i.reader = r
198 2 : i.cmp = r.Compare
199 2 : i.stats = stats
200 2 : i.hideObsoletePoints = hideObsoletePoints
201 2 : i.bufferPool = bufferPool
202 2 : err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
203 2 : if err != nil {
204 0 : // blockIter.Close releases indexH and always returns a nil error
205 0 : _ = i.index.Close()
206 0 : return err
207 0 : }
208 2 : i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
209 2 : if r.tableFormat >= TableFormatPebblev3 {
210 2 : if r.Properties.NumValueBlocks > 0 {
211 2 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
212 2 : // can outlive the singleLevelIterator due to be being embedded in a
213 2 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
214 2 : // should only optimize this if it shows up as significant in end-to-end
215 2 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
216 2 : // is that if many sstable iterators only get positioned at latest
217 2 : // versions of keys, and therefore never expose a LazyValue that is
218 2 : // separated to their callers, they can put this valueBlockReader into a
219 2 : // sync.Pool.
220 2 : i.vbReader = &valueBlockReader{
221 2 : ctx: ctx,
222 2 : bpOpen: i,
223 2 : rp: rp,
224 2 : vbih: r.valueBIH,
225 2 : stats: stats,
226 2 : }
227 2 : i.data.lazyValueHandling.vbr = i.vbReader
228 2 : i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
229 2 : }
230 2 : i.data.lazyValueHandling.hasValuePrefix = true
231 : }
232 2 : return nil
233 : }
234 :
235 : // Helper function to check if keys returned from iterator are within global and virtual bounds.
236 : func (i *singleLevelIterator) maybeVerifyKey(
237 : iKey *InternalKey, val base.LazyValue,
238 2 : ) (*InternalKey, base.LazyValue) {
239 2 : // maybeVerify key is only used for virtual sstable iterators.
240 2 : if invariants.Enabled && i.vState != nil && iKey != nil {
241 1 : key := iKey.UserKey
242 1 :
243 1 : uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
244 1 : lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
245 1 :
246 1 : if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
247 0 : panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
248 : }
249 : }
250 2 : return iKey, val
251 : }
252 :
253 : // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
254 : // Currently, it skips readahead ramp-up. It should be called after init is called.
255 2 : func (i *singleLevelIterator) setupForCompaction() {
256 2 : i.dataRH.SetupForCompaction()
257 2 : if i.vbRH != nil {
258 2 : i.vbRH.SetupForCompaction()
259 2 : }
260 : }
261 :
262 2 : func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
263 2 : return singleLevelIterator{
264 2 : index: i.index.resetForReuse(),
265 2 : data: i.data.resetForReuse(),
266 2 : }
267 2 : }
268 :
269 2 : func (i *singleLevelIterator) initBounds() {
270 2 : // Trim the iteration bounds for the current block. We don't have to check
271 2 : // the bounds on each iteration if the block is entirely contained within the
272 2 : // iteration bounds.
273 2 : i.blockLower = i.lower
274 2 : if i.blockLower != nil {
275 2 : key, _ := i.data.First()
276 2 : if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
277 2 : // The lower-bound is less than the first key in the block. No need
278 2 : // to check the lower-bound again for this block.
279 2 : i.blockLower = nil
280 2 : }
281 : }
282 2 : i.blockUpper = i.upper
283 2 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
284 2 : // The upper-bound is greater than the index key which itself is greater
285 2 : // than or equal to every key in the block. No need to check the
286 2 : // upper-bound again for this block. Even if blockUpper is inclusive
287 2 : // because of upper being inclusive, we can still safely set blockUpper
288 2 : // to nil here.
289 2 : //
290 2 : // TODO(bananabrick): We could also set blockUpper to nil for the >=
291 2 : // case, if blockUpper is inclusive.
292 2 : i.blockUpper = nil
293 2 : }
294 : }
295 :
296 : // Deterministic disabling of the bounds-based optimization that avoids seeking.
297 : // Uses the iterator pointer, since we want diversity in iterator behavior for
298 : // the same SetBounds call. Used for tests.
299 2 : func disableBoundsOpt(bound []byte, ptr uintptr) bool {
300 2 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
301 2 : simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
302 2 : return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
303 2 : }
304 :
305 : // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
306 : // optimizations performed by disableBoundsOpt for tests that require
307 : // deterministic iterator behavior. Some unit tests examine internal iterator
308 : // state and require this behavior to be deterministic.
309 : var ensureBoundsOptDeterminism bool
310 :
311 : // SetBounds implements internalIterator.SetBounds, as documented in the pebble
312 : // package. Note that the upper field is exclusive.
313 2 : func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
314 2 : i.boundsCmp = 0
315 2 : if i.vState != nil {
316 1 : // If the reader is constructed for a virtual sstable, then we must
317 1 : // constrain the bounds of the reader. For physical sstables, the bounds
318 1 : // can be wider than the actual sstable's bounds because we won't
319 1 : // accidentally expose additional keys as there are no additional keys.
320 1 : i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
321 1 : lower, upper, false,
322 1 : )
323 2 : } else {
324 2 : // TODO(bananabrick): Figure out the logic here to enable the boundsCmp
325 2 : // optimization for virtual sstables.
326 2 : if i.positionedUsingLatestBounds {
327 2 : if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
328 2 : i.boundsCmp = +1
329 2 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
330 2 : disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
331 2 : i.boundsCmp = 0
332 2 : }
333 2 : } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
334 2 : i.boundsCmp = -1
335 2 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
336 2 : disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
337 2 : i.boundsCmp = 0
338 2 : }
339 : }
340 : }
341 : }
342 :
343 2 : i.positionedUsingLatestBounds = false
344 2 : i.lower = lower
345 2 : i.upper = upper
346 2 : i.blockLower = nil
347 2 : i.blockUpper = nil
348 : }
349 :
350 : // loadBlock loads the block at the current index position and leaves i.data
351 : // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
352 : // may be nil if we have simply exhausted the entire table.
353 2 : func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
354 2 : if !i.index.valid() {
355 0 : // Ensure the data block iterator is invalidated even if loading of the block
356 0 : // fails.
357 0 : i.data.invalidate()
358 0 : return loadBlockFailed
359 0 : }
360 : // Load the next block.
361 2 : v := i.index.value()
362 2 : bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
363 2 : if i.dataBH == bhp.BlockHandle && i.data.valid() {
364 2 : // We're already at the data block we want to load. Reset bounds in case
365 2 : // they changed since the last seek, but don't reload the block from cache
366 2 : // or disk.
367 2 : //
368 2 : // It's safe to leave i.data in its original state here, as all callers to
369 2 : // loadBlock make an absolute positioning call (i.e. a seek, first, or last)
370 2 : // to `i.data` right after loadBlock returns loadBlockOK.
371 2 : i.initBounds()
372 2 : return loadBlockOK
373 2 : }
374 : // Ensure the data block iterator is invalidated even if loading of the block
375 : // fails.
376 2 : i.data.invalidate()
377 2 : i.dataBH = bhp.BlockHandle
378 2 : if err != nil {
379 0 : i.err = errCorruptIndexEntry
380 0 : return loadBlockFailed
381 0 : }
382 2 : if i.bpfs != nil {
383 2 : intersects, err := i.bpfs.intersects(bhp.Props)
384 2 : if err != nil {
385 0 : i.err = errCorruptIndexEntry
386 0 : return loadBlockFailed
387 0 : }
388 2 : if intersects == blockMaybeExcluded {
389 1 : intersects = i.resolveMaybeExcluded(dir)
390 1 : }
391 2 : if intersects == blockExcluded {
392 2 : i.maybeFilteredKeysSingleLevel = true
393 2 : return loadBlockIrrelevant
394 2 : }
395 : // blockIntersects
396 : }
397 2 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
398 2 : block, err := i.reader.readBlock(ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, i.bufferPool)
399 2 : if err != nil {
400 1 : i.err = err
401 1 : return loadBlockFailed
402 1 : }
403 2 : i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
404 2 : if i.err != nil {
405 0 : // The block is partially loaded, and we don't want it to appear valid.
406 0 : i.data.invalidate()
407 0 : return loadBlockFailed
408 0 : }
409 2 : i.initBounds()
410 2 : return loadBlockOK
411 : }
412 :
413 : // readBlockForVBR implements the blockProviderWhenOpen interface for use by
414 : // the valueBlockReader.
415 : func (i *singleLevelIterator) readBlockForVBR(
416 : ctx context.Context, h BlockHandle, stats *base.InternalIteratorStats,
417 2 : ) (bufferHandle, error) {
418 2 : ctx = objiotracing.WithBlockType(ctx, objiotracing.ValueBlock)
419 2 : return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, i.bufferPool)
420 2 : }
421 :
422 : // resolveMaybeExcluded is invoked when the block-property filterer has found
423 : // that a block is excluded according to its properties but only if its bounds
424 : // fall within the filter's current bounds. This function consults the
425 : // apprioriate bound, depending on the iteration direction, and returns either
426 : // `blockIntersects` or `blockMaybeExcluded`.
427 1 : func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
428 1 : // TODO(jackson): We could first try comparing to top-level index block's
429 1 : // key, and if within bounds avoid per-data block key comparisons.
430 1 :
431 1 : // This iterator is configured with a bound-limited block property
432 1 : // filter. The bpf determined this block could be excluded from
433 1 : // iteration based on the property encoded in the block handle.
434 1 : // However, we still need to determine if the block is wholly
435 1 : // contained within the filter's key bounds.
436 1 : //
437 1 : // External guarantees ensure all the block's keys are ≥ the
438 1 : // filter's lower bound during forward iteration, and that all the
439 1 : // block's keys are < the filter's upper bound during backward
440 1 : // iteration. We only need to determine if the opposite bound is
441 1 : // also met.
442 1 : //
443 1 : // The index separator in index.Key() provides an inclusive
444 1 : // upper-bound for the data block's keys, guaranteeing that all its
445 1 : // keys are ≤ index.Key(). For forward iteration, this is all we
446 1 : // need.
447 1 : if dir > 0 {
448 1 : // Forward iteration.
449 1 : if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
450 1 : return blockExcluded
451 1 : }
452 1 : return blockIntersects
453 : }
454 :
455 : // Reverse iteration.
456 : //
457 : // Because we're iterating in the reverse direction, we don't yet have
458 : // enough context available to determine if the block is wholly contained
459 : // within its bounds. This case arises only during backward iteration,
460 : // because of the way the index is structured.
461 : //
462 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
463 : // block with separator `c`. During reverse iteration, the guarantee that
464 : // all the block's keys are < `d` is externally provided, but no guarantee
465 : // is made on the bpf's lower bound. The separator `c` only provides an
466 : // inclusive upper bound on the block's keys, indicating that the
467 : // corresponding block handle points to a block containing only keys ≤ `c`.
468 : //
469 : // To establish a lower bound, we step the index backwards to read the
470 : // previous block's separator, which provides an inclusive lower bound on
471 : // the original block's keys. Afterwards, we step forward to restore our
472 : // index position.
473 1 : if peekKey, _ := i.index.Prev(); peekKey == nil {
474 1 : // The original block points to the first block of this index block. If
475 1 : // there's a two-level index, it could potentially provide a lower
476 1 : // bound, but the code refactoring necessary to read it doesn't seem
477 1 : // worth the payoff. We fall through to loading the block.
478 1 : } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
479 1 : // The lower-bound on the original block falls within the filter's
480 1 : // bounds, and we can skip the block (after restoring our current index
481 1 : // position).
482 1 : _, _ = i.index.Next()
483 1 : return blockExcluded
484 1 : }
485 1 : _, _ = i.index.Next()
486 1 : return blockIntersects
487 : }
488 :
489 2 : func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
490 2 : if i.data.getFirstUserKey() == nil {
491 0 : panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
492 : }
493 2 : i.blockLower = i.lower
494 2 : if i.blockLower != nil {
495 2 : firstUserKey := i.data.getFirstUserKey()
496 2 : if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
497 2 : // The lower-bound is less than the first key in the block. No need
498 2 : // to check the lower-bound again for this block.
499 2 : i.blockLower = nil
500 2 : }
501 : }
502 2 : i.blockUpper = i.upper
503 2 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
504 2 : // The upper-bound is greater than the index key which itself is greater
505 2 : // than or equal to every key in the block. No need to check the
506 2 : // upper-bound again for this block.
507 2 : i.blockUpper = nil
508 2 : }
509 : }
510 :
511 : // The number of times to call Next/Prev in a block before giving up and seeking.
512 : // The value of 4 is arbitrary.
513 : // TODO(sumeer): experiment with dynamic adjustment based on the history of
514 : // seeks for a particular iterator.
515 : const numStepsBeforeSeek = 4
516 :
517 : func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
518 : key []byte,
519 2 : ) (k *InternalKey, v base.LazyValue, done bool) {
520 2 : k, v = i.data.Key(), i.data.value()
521 2 : for j := 0; j < numStepsBeforeSeek; j++ {
522 2 : curKeyCmp := i.cmp(k.UserKey, key)
523 2 : if curKeyCmp >= 0 {
524 2 : if i.blockUpper != nil {
525 2 : cmp := i.cmp(k.UserKey, i.blockUpper)
526 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
527 2 : i.exhaustedBounds = +1
528 2 : return nil, base.LazyValue{}, true
529 2 : }
530 : }
531 2 : return k, v, true
532 : }
533 2 : k, v = i.data.Next()
534 2 : if k == nil {
535 1 : break
536 : }
537 : }
538 2 : return k, v, false
539 : }
540 :
541 : func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
542 : key []byte,
543 2 : ) (k *InternalKey, v base.LazyValue, done bool) {
544 2 : k, v = i.data.Key(), i.data.value()
545 2 : for j := 0; j < numStepsBeforeSeek; j++ {
546 2 : curKeyCmp := i.cmp(k.UserKey, key)
547 2 : if curKeyCmp < 0 {
548 2 : if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
549 2 : i.exhaustedBounds = -1
550 2 : return nil, base.LazyValue{}, true
551 2 : }
552 2 : return k, v, true
553 : }
554 2 : k, v = i.data.Prev()
555 2 : if k == nil {
556 1 : break
557 : }
558 : }
559 2 : return k, v, false
560 : }
561 :
562 2 : func (i *singleLevelIterator) recordOffset() uint64 {
563 2 : offset := i.dataBH.Offset
564 2 : if i.data.valid() {
565 2 : // - i.dataBH.Length/len(i.data.data) is the compression ratio. If
566 2 : // uncompressed, this is 1.
567 2 : // - i.data.nextOffset is the uncompressed position of the current record
568 2 : // in the block.
569 2 : // - i.dataBH.Offset is the offset of the block in the sstable before
570 2 : // decompression.
571 2 : offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
572 2 : } else {
573 2 : // Last entry in the block must increment bytes iterated by the size of the block trailer
574 2 : // and restart points.
575 2 : offset += i.dataBH.Length + blockTrailerLen
576 2 : }
577 2 : return offset
578 : }
579 :
580 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
581 : // package. Note that SeekGE only checks the upper bound. It is up to the
582 : // caller to ensure that key is greater than or equal to the lower bound.
583 : func (i *singleLevelIterator) SeekGE(
584 : key []byte, flags base.SeekGEFlags,
585 2 : ) (*InternalKey, base.LazyValue) {
586 2 : if i.vState != nil {
587 1 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
588 1 : // have to internally restrict the bounds.
589 1 : //
590 1 : // TODO(bananabrick): We can optimize this check away for the level iter
591 1 : // if necessary.
592 1 : if i.cmp(key, i.lower) < 0 {
593 1 : key = i.lower
594 1 : }
595 : }
596 :
597 2 : if flags.TrySeekUsingNext() {
598 2 : // The i.exhaustedBounds comparison indicates that the upper bound was
599 2 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
600 2 : // exhausted.
601 2 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
602 2 : // Already exhausted, so return nil.
603 2 : return nil, base.LazyValue{}
604 2 : }
605 2 : if i.err != nil {
606 0 : // The current iterator position cannot be used.
607 0 : flags = flags.DisableTrySeekUsingNext()
608 0 : }
609 : // INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
610 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
611 : // data-exhausted and bounds-exhausted, as defined earlier, are both
612 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
613 : // before calling into seekGEHelper.
614 : }
615 :
616 2 : i.exhaustedBounds = 0
617 2 : i.err = nil // clear cached iteration error
618 2 : boundsCmp := i.boundsCmp
619 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
620 2 : i.boundsCmp = 0
621 2 : i.positionedUsingLatestBounds = true
622 2 : return i.seekGEHelper(key, boundsCmp, flags)
623 : }
624 :
625 : // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
626 : func (i *singleLevelIterator) seekGEHelper(
627 : key []byte, boundsCmp int, flags base.SeekGEFlags,
628 2 : ) (*InternalKey, base.LazyValue) {
629 2 : // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
630 2 :
631 2 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
632 2 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
633 2 : // Care must be taken to ensure that when performing these optimizations and
634 2 : // the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
635 2 : // Consider a previous SeekGE that filtered keys from k until the current
636 2 : // iterator position.
637 2 : //
638 2 : // If the previous SeekGE exhausted the iterator, it's possible keys greater
639 2 : // than or equal to the current search key were filtered. We must not reuse
640 2 : // the current iterator position without remembering the previous value of
641 2 : // maybeFilteredKeys.
642 2 :
643 2 : var dontSeekWithinBlock bool
644 2 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
645 2 : boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
646 2 : // Fast-path: The bounds have moved forward and this SeekGE is
647 2 : // respecting the lower bound (guaranteed by Iterator). We know that
648 2 : // the iterator must already be positioned within or just outside the
649 2 : // previous bounds. Therefore it cannot be positioned at a block (or
650 2 : // the position within that block) that is ahead of the seek position.
651 2 : // However it can be positioned at an earlier block. This fast-path to
652 2 : // use Next() on the block is only applied when we are already at the
653 2 : // block that the slow-path (the else-clause) would load -- this is
654 2 : // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
655 2 : // predicate.
656 2 : i.initBoundsForAlreadyLoadedBlock()
657 2 : ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
658 2 : if done {
659 2 : return ikey, val
660 2 : }
661 2 : if ikey == nil {
662 1 : // Done with this block.
663 1 : dontSeekWithinBlock = true
664 1 : }
665 2 : } else {
666 2 : // Cannot use bounds monotonicity. But may be able to optimize if
667 2 : // caller claimed externally known invariant represented by
668 2 : // flags.TrySeekUsingNext().
669 2 : if flags.TrySeekUsingNext() {
670 2 : // seekPrefixGE or SeekGE has already ensured
671 2 : // !i.data.isDataInvalidated() && i.exhaustedBounds != +1
672 2 : currKey := i.data.Key()
673 2 : value := i.data.value()
674 2 : less := i.cmp(currKey.UserKey, key) < 0
675 2 : // We could be more sophisticated and confirm that the seek
676 2 : // position is within the current block before applying this
677 2 : // optimization. But there may be some benefit even if it is in
678 2 : // the next block, since we can avoid seeking i.index.
679 2 : for j := 0; less && j < numStepsBeforeSeek; j++ {
680 2 : currKey, value = i.Next()
681 2 : if currKey == nil {
682 2 : return nil, base.LazyValue{}
683 2 : }
684 2 : less = i.cmp(currKey.UserKey, key) < 0
685 : }
686 2 : if !less {
687 2 : if i.blockUpper != nil {
688 2 : cmp := i.cmp(currKey.UserKey, i.blockUpper)
689 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
690 0 : i.exhaustedBounds = +1
691 0 : return nil, base.LazyValue{}
692 0 : }
693 : }
694 2 : return currKey, value
695 : }
696 : }
697 :
698 : // Slow-path.
699 : // Since we're re-seeking the iterator, the previous value of
700 : // maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
701 : // during seeking, loadBlock will set it to true.
702 2 : i.maybeFilteredKeysSingleLevel = false
703 2 :
704 2 : var ikey *InternalKey
705 2 : if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
706 2 : // The target key is greater than any key in the index block.
707 2 : // Invalidate the block iterator so that a subsequent call to Prev()
708 2 : // will return the last key in the table.
709 2 : i.data.invalidate()
710 2 : return nil, base.LazyValue{}
711 2 : }
712 2 : result := i.loadBlock(+1)
713 2 : if result == loadBlockFailed {
714 1 : return nil, base.LazyValue{}
715 1 : }
716 2 : if result == loadBlockIrrelevant {
717 2 : // Enforce the upper bound here since don't want to bother moving
718 2 : // to the next block if upper bound is already exceeded. Note that
719 2 : // the next block starts with keys >= ikey.UserKey since even
720 2 : // though this is the block separator, the same user key can span
721 2 : // multiple blocks. If upper is exclusive we use >= below, else
722 2 : // we use >.
723 2 : if i.upper != nil {
724 2 : cmp := i.cmp(ikey.UserKey, i.upper)
725 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
726 2 : i.exhaustedBounds = +1
727 2 : return nil, base.LazyValue{}
728 2 : }
729 : }
730 : // Want to skip to the next block.
731 2 : dontSeekWithinBlock = true
732 : }
733 : }
734 2 : if !dontSeekWithinBlock {
735 2 : if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
736 2 : if i.blockUpper != nil {
737 2 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
738 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
739 2 : i.exhaustedBounds = +1
740 2 : return nil, base.LazyValue{}
741 2 : }
742 : }
743 2 : return ikey, val
744 : }
745 : }
746 2 : return i.skipForward()
747 : }
748 :
749 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
750 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
751 : // to the caller to ensure that key is greater than or equal to the lower bound.
752 : func (i *singleLevelIterator) SeekPrefixGE(
753 : prefix, key []byte, flags base.SeekGEFlags,
754 2 : ) (*base.InternalKey, base.LazyValue) {
755 2 : if i.vState != nil {
756 1 : // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
757 1 : // we may have to internally restrict the bounds.
758 1 : //
759 1 : // TODO(bananabrick): We can optimize away this check for the level iter
760 1 : // if necessary.
761 1 : if i.cmp(key, i.lower) < 0 {
762 1 : key = i.lower
763 1 : }
764 : }
765 2 : return i.seekPrefixGE(prefix, key, flags, i.useFilter)
766 : }
767 :
768 : func (i *singleLevelIterator) seekPrefixGE(
769 : prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
770 2 : ) (k *InternalKey, value base.LazyValue) {
771 2 : // NOTE: prefix is only used for bloom filter checking and not later work in
772 2 : // this method. Hence, we can use the existing iterator position if the last
773 2 : // SeekPrefixGE did not fail bloom filter matching.
774 2 :
775 2 : err := i.err
776 2 : i.err = nil // clear cached iteration error
777 2 : if checkFilter && i.reader.tableFilter != nil {
778 2 : if !i.lastBloomFilterMatched {
779 2 : // Iterator is not positioned based on last seek.
780 2 : flags = flags.DisableTrySeekUsingNext()
781 2 : }
782 2 : i.lastBloomFilterMatched = false
783 2 : // Check prefix bloom filter.
784 2 : var dataH bufferHandle
785 2 : dataH, i.err = i.reader.readFilter(i.ctx, i.stats)
786 2 : if i.err != nil {
787 0 : i.data.invalidate()
788 0 : return nil, base.LazyValue{}
789 0 : }
790 2 : mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
791 2 : dataH.Release()
792 2 : if !mayContain {
793 2 : // This invalidation may not be necessary for correctness, and may
794 2 : // be a place to optimize later by reusing the already loaded
795 2 : // block. It was necessary in earlier versions of the code since
796 2 : // the caller was allowed to call Next when SeekPrefixGE returned
797 2 : // nil. This is no longer allowed.
798 2 : i.data.invalidate()
799 2 : return nil, base.LazyValue{}
800 2 : }
801 2 : i.lastBloomFilterMatched = true
802 : }
803 2 : if flags.TrySeekUsingNext() {
804 2 : // The i.exhaustedBounds comparison indicates that the upper bound was
805 2 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
806 2 : // exhausted.
807 2 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
808 2 : // Already exhausted, so return nil.
809 2 : return nil, base.LazyValue{}
810 2 : }
811 2 : if err != nil {
812 0 : // The current iterator position cannot be used.
813 0 : flags = flags.DisableTrySeekUsingNext()
814 0 : }
815 : // INVARIANT: flags.TrySeekUsingNext() => err == nil &&
816 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
817 : // data-exhausted and bounds-exhausted, as defined earlier, are both
818 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
819 : // before calling into seekGEHelper.
820 : }
821 : // Bloom filter matches, or skipped, so this method will position the
822 : // iterator.
823 2 : i.exhaustedBounds = 0
824 2 : boundsCmp := i.boundsCmp
825 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
826 2 : i.boundsCmp = 0
827 2 : i.positionedUsingLatestBounds = true
828 2 : k, value = i.seekGEHelper(key, boundsCmp, flags)
829 2 : return i.maybeVerifyKey(k, value)
830 : }
831 :
832 : // virtualLast should only be called if i.vReader != nil.
833 1 : func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
834 1 : if i.vState == nil {
835 0 : panic("pebble: invalid call to virtualLast")
836 : }
837 :
838 : // Seek to the first internal key.
839 1 : ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
840 1 : if i.endKeyInclusive {
841 1 : // Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
842 1 : // c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
843 1 : // c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
844 1 : // the last key with userkey == i.upper.
845 1 : //
846 1 : // TODO(bananabrick): Think about how to improve this. If many internal keys
847 1 : // with the same user key at the upper bound then this could be slow, but
848 1 : // maybe the odds of having many internal keys with the same user key at the
849 1 : // upper bound are low.
850 1 : for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
851 1 : ikey, _ = i.Next()
852 1 : }
853 1 : return i.Prev()
854 : }
855 :
856 : // We seeked to the first key >= i.upper.
857 1 : return i.Prev()
858 : }
859 :
860 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
861 : // package. Note that SeekLT only checks the lower bound. It is up to the
862 : // caller to ensure that key is less than or equal to the upper bound.
863 : func (i *singleLevelIterator) SeekLT(
864 : key []byte, flags base.SeekLTFlags,
865 2 : ) (*InternalKey, base.LazyValue) {
866 2 : if i.vState != nil {
867 1 : // Might have to fix upper bound since virtual sstable bounds are not
868 1 : // known to callers of SeekLT.
869 1 : //
870 1 : // TODO(bananabrick): We can optimize away this check for the level iter
871 1 : // if necessary.
872 1 : cmp := i.cmp(key, i.upper)
873 1 : // key == i.upper is fine. We'll do the right thing and return the
874 1 : // first internal key with user key < key.
875 1 : if cmp > 0 {
876 1 : // Return the last key in the virtual sstable.
877 1 : return i.virtualLast()
878 1 : }
879 : }
880 :
881 2 : i.exhaustedBounds = 0
882 2 : i.err = nil // clear cached iteration error
883 2 : boundsCmp := i.boundsCmp
884 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
885 2 : i.boundsCmp = 0
886 2 :
887 2 : // Seeking operations perform various step-instead-of-seeking optimizations:
888 2 : // eg by considering monotonically increasing bounds (i.boundsCmp). Care
889 2 : // must be taken to ensure that when performing these optimizations and the
890 2 : // iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
891 2 : // appropriately. Consider a previous SeekLT that filtered keys from k
892 2 : // until the current iterator position.
893 2 : //
894 2 : // If the previous SeekLT did exhausted the iterator, it's possible keys
895 2 : // less than the current search key were filtered. We must not reuse the
896 2 : // current iterator position without remembering the previous value of
897 2 : // maybeFilteredKeysSingleLevel.
898 2 :
899 2 : i.positionedUsingLatestBounds = true
900 2 :
901 2 : var dontSeekWithinBlock bool
902 2 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
903 2 : boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
904 2 : // Fast-path: The bounds have moved backward, and this SeekLT is
905 2 : // respecting the upper bound (guaranteed by Iterator). We know that
906 2 : // the iterator must already be positioned within or just outside the
907 2 : // previous bounds. Therefore it cannot be positioned at a block (or
908 2 : // the position within that block) that is behind the seek position.
909 2 : // However it can be positioned at a later block. This fast-path to
910 2 : // use Prev() on the block is only applied when we are already at the
911 2 : // block that can satisfy this seek -- this is the motivation for the
912 2 : // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
913 2 : i.initBoundsForAlreadyLoadedBlock()
914 2 : ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
915 2 : if done {
916 2 : return ikey, val
917 2 : }
918 2 : if ikey == nil {
919 1 : // Done with this block.
920 1 : dontSeekWithinBlock = true
921 1 : }
922 2 : } else {
923 2 : // Slow-path.
924 2 : i.maybeFilteredKeysSingleLevel = false
925 2 : var ikey *InternalKey
926 2 :
927 2 : // NB: If a bound-limited block property filter is configured, it's
928 2 : // externally ensured that the filter is disabled (through returning
929 2 : // Intersects=false irrespective of the block props provided) during
930 2 : // seeks.
931 2 : if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
932 2 : ikey, _ = i.index.Last()
933 2 : if ikey == nil {
934 0 : return nil, base.LazyValue{}
935 0 : }
936 : }
937 : // INVARIANT: ikey != nil.
938 2 : result := i.loadBlock(-1)
939 2 : if result == loadBlockFailed {
940 0 : return nil, base.LazyValue{}
941 0 : }
942 2 : if result == loadBlockIrrelevant {
943 2 : // Enforce the lower bound here since don't want to bother moving
944 2 : // to the previous block if lower bound is already exceeded. Note
945 2 : // that the previous block starts with keys <= ikey.UserKey since
946 2 : // even though this is the current block's separator, the same
947 2 : // user key can span multiple blocks.
948 2 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
949 1 : i.exhaustedBounds = -1
950 1 : return nil, base.LazyValue{}
951 1 : }
952 : // Want to skip to the previous block.
953 2 : dontSeekWithinBlock = true
954 : }
955 : }
956 2 : if !dontSeekWithinBlock {
957 2 : if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
958 2 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
959 2 : i.exhaustedBounds = -1
960 2 : return nil, base.LazyValue{}
961 2 : }
962 2 : return ikey, val
963 : }
964 : }
965 : // The index contains separator keys which may lie between
966 : // user-keys. Consider the user-keys:
967 : //
968 : // complete
969 : // ---- new block ---
970 : // complexion
971 : //
972 : // If these two keys end one block and start the next, the index key may
973 : // be chosen as "compleu". The SeekGE in the index block will then point
974 : // us to the block containing "complexion". If this happens, we want the
975 : // last key from the previous data block.
976 2 : return i.maybeVerifyKey(i.skipBackward())
977 : }
978 :
979 : // First implements internalIterator.First, as documented in the pebble
980 : // package. Note that First only checks the upper bound. It is up to the caller
981 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
982 : // call to SeekGE(lower)).
983 2 : func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
984 2 : // If the iterator was created on a virtual sstable, we will SeekGE to the
985 2 : // lower bound instead of using First, because First does not respect
986 2 : // bounds.
987 2 : if i.vState != nil {
988 1 : return i.SeekGE(i.lower, base.SeekGEFlagsNone)
989 1 : }
990 :
991 2 : if i.lower != nil {
992 0 : panic("singleLevelIterator.First() used despite lower bound")
993 : }
994 2 : i.positionedUsingLatestBounds = true
995 2 : i.maybeFilteredKeysSingleLevel = false
996 2 :
997 2 : return i.firstInternal()
998 : }
999 :
1000 : // firstInternal is a helper used for absolute positioning in a single-level
1001 : // index file, or for positioning in the second-level index in a two-level
1002 : // index file. For the latter, one cannot make any claims about absolute
1003 : // positioning.
1004 2 : func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
1005 2 : i.exhaustedBounds = 0
1006 2 : i.err = nil // clear cached iteration error
1007 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1008 2 : i.boundsCmp = 0
1009 2 :
1010 2 : var ikey *InternalKey
1011 2 : if ikey, _ = i.index.First(); ikey == nil {
1012 0 : i.data.invalidate()
1013 0 : return nil, base.LazyValue{}
1014 0 : }
1015 2 : result := i.loadBlock(+1)
1016 2 : if result == loadBlockFailed {
1017 1 : return nil, base.LazyValue{}
1018 1 : }
1019 2 : if result == loadBlockOK {
1020 2 : if ikey, val := i.data.First(); ikey != nil {
1021 2 : if i.blockUpper != nil {
1022 2 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
1023 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1024 2 : i.exhaustedBounds = +1
1025 2 : return nil, base.LazyValue{}
1026 2 : }
1027 : }
1028 2 : return ikey, val
1029 : }
1030 : // Else fall through to skipForward.
1031 2 : } else {
1032 2 : // result == loadBlockIrrelevant. Enforce the upper bound here since
1033 2 : // don't want to bother moving to the next block if upper bound is
1034 2 : // already exceeded. Note that the next block starts with keys >=
1035 2 : // ikey.UserKey since even though this is the block separator, the
1036 2 : // same user key can span multiple blocks. If upper is exclusive we
1037 2 : // use >= below, else we use >.
1038 2 : if i.upper != nil {
1039 2 : cmp := i.cmp(ikey.UserKey, i.upper)
1040 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1041 2 : i.exhaustedBounds = +1
1042 2 : return nil, base.LazyValue{}
1043 2 : }
1044 : }
1045 : // Else fall through to skipForward.
1046 : }
1047 :
1048 2 : return i.skipForward()
1049 : }
1050 :
1051 : // Last implements internalIterator.Last, as documented in the pebble
1052 : // package. Note that Last only checks the lower bound. It is up to the caller
1053 : // to ensure that key is less than the upper bound (e.g. via a call to
1054 : // SeekLT(upper))
1055 2 : func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
1056 2 : if i.vState != nil {
1057 1 : return i.virtualLast()
1058 1 : }
1059 :
1060 2 : if i.upper != nil {
1061 0 : panic("singleLevelIterator.Last() used despite upper bound")
1062 : }
1063 2 : i.positionedUsingLatestBounds = true
1064 2 : i.maybeFilteredKeysSingleLevel = false
1065 2 : return i.lastInternal()
1066 : }
1067 :
1068 : // lastInternal is a helper used for absolute positioning in a single-level
1069 : // index file, or for positioning in the second-level index in a two-level
1070 : // index file. For the latter, one cannot make any claims about absolute
1071 : // positioning.
1072 2 : func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
1073 2 : i.exhaustedBounds = 0
1074 2 : i.err = nil // clear cached iteration error
1075 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1076 2 : i.boundsCmp = 0
1077 2 :
1078 2 : var ikey *InternalKey
1079 2 : if ikey, _ = i.index.Last(); ikey == nil {
1080 0 : i.data.invalidate()
1081 0 : return nil, base.LazyValue{}
1082 0 : }
1083 2 : result := i.loadBlock(-1)
1084 2 : if result == loadBlockFailed {
1085 1 : return nil, base.LazyValue{}
1086 1 : }
1087 2 : if result == loadBlockOK {
1088 2 : if ikey, val := i.data.Last(); ikey != nil {
1089 2 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
1090 2 : i.exhaustedBounds = -1
1091 2 : return nil, base.LazyValue{}
1092 2 : }
1093 2 : return ikey, val
1094 : }
1095 : // Else fall through to skipBackward.
1096 2 : } else {
1097 2 : // result == loadBlockIrrelevant. Enforce the lower bound here since
1098 2 : // don't want to bother moving to the previous block if lower bound is
1099 2 : // already exceeded. Note that the previous block starts with keys <=
1100 2 : // key.UserKey since even though this is the current block's
1101 2 : // separator, the same user key can span multiple blocks.
1102 2 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
1103 1 : i.exhaustedBounds = -1
1104 1 : return nil, base.LazyValue{}
1105 1 : }
1106 : }
1107 :
1108 2 : return i.skipBackward()
1109 : }
1110 :
1111 : // Next implements internalIterator.Next, as documented in the pebble
1112 : // package.
1113 : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
1114 : // due to performance. Keep the two in sync.
1115 2 : func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
1116 2 : if i.exhaustedBounds == +1 {
1117 0 : panic("Next called even though exhausted upper bound")
1118 : }
1119 2 : i.exhaustedBounds = 0
1120 2 : i.maybeFilteredKeysSingleLevel = false
1121 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1122 2 : i.boundsCmp = 0
1123 2 :
1124 2 : if i.err != nil {
1125 0 : return nil, base.LazyValue{}
1126 0 : }
1127 2 : if key, val := i.data.Next(); key != nil {
1128 2 : if i.blockUpper != nil {
1129 2 : cmp := i.cmp(key.UserKey, i.blockUpper)
1130 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1131 2 : i.exhaustedBounds = +1
1132 2 : return nil, base.LazyValue{}
1133 2 : }
1134 : }
1135 2 : return key, val
1136 : }
1137 2 : return i.skipForward()
1138 : }
1139 :
1140 : // NextPrefix implements (base.InternalIterator).NextPrefix.
1141 2 : func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
1142 2 : if i.exhaustedBounds == +1 {
1143 0 : panic("NextPrefix called even though exhausted upper bound")
1144 : }
1145 2 : i.exhaustedBounds = 0
1146 2 : i.maybeFilteredKeysSingleLevel = false
1147 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1148 2 : i.boundsCmp = 0
1149 2 : if i.err != nil {
1150 0 : return nil, base.LazyValue{}
1151 0 : }
1152 2 : if key, val := i.data.NextPrefix(succKey); key != nil {
1153 2 : if i.blockUpper != nil {
1154 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1155 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1156 1 : i.exhaustedBounds = +1
1157 1 : return nil, base.LazyValue{}
1158 1 : }
1159 : }
1160 2 : return key, val
1161 : }
1162 : // Did not find prefix in the existing data block. This is the slow-path
1163 : // where we effectively seek the iterator.
1164 2 : var ikey *InternalKey
1165 2 : // The key is likely to be in the next data block, so try one step.
1166 2 : if ikey, _ = i.index.Next(); ikey == nil {
1167 2 : // The target key is greater than any key in the index block.
1168 2 : // Invalidate the block iterator so that a subsequent call to Prev()
1169 2 : // will return the last key in the table.
1170 2 : i.data.invalidate()
1171 2 : return nil, base.LazyValue{}
1172 2 : }
1173 2 : if i.cmp(succKey, ikey.UserKey) > 0 {
1174 1 : // Not in the next data block, so seek the index.
1175 1 : if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
1176 1 : // The target key is greater than any key in the index block.
1177 1 : // Invalidate the block iterator so that a subsequent call to Prev()
1178 1 : // will return the last key in the table.
1179 1 : i.data.invalidate()
1180 1 : return nil, base.LazyValue{}
1181 1 : }
1182 : }
1183 2 : result := i.loadBlock(+1)
1184 2 : if result == loadBlockFailed {
1185 0 : return nil, base.LazyValue{}
1186 0 : }
1187 2 : if result == loadBlockIrrelevant {
1188 1 : // Enforce the upper bound here since don't want to bother moving
1189 1 : // to the next block if upper bound is already exceeded. Note that
1190 1 : // the next block starts with keys >= ikey.UserKey since even
1191 1 : // though this is the block separator, the same user key can span
1192 1 : // multiple blocks. If upper is exclusive we use >= below, else we use
1193 1 : // >.
1194 1 : if i.upper != nil {
1195 0 : cmp := i.cmp(ikey.UserKey, i.upper)
1196 0 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1197 0 : i.exhaustedBounds = +1
1198 0 : return nil, base.LazyValue{}
1199 0 : }
1200 : }
1201 2 : } else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
1202 2 : if i.blockUpper != nil {
1203 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1204 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1205 1 : i.exhaustedBounds = +1
1206 1 : return nil, base.LazyValue{}
1207 1 : }
1208 : }
1209 2 : return i.maybeVerifyKey(key, val)
1210 : }
1211 :
1212 2 : return i.skipForward()
1213 : }
1214 :
1215 : // Prev implements internalIterator.Prev, as documented in the pebble
1216 : // package.
1217 2 : func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
1218 2 : if i.exhaustedBounds == -1 {
1219 0 : panic("Prev called even though exhausted lower bound")
1220 : }
1221 2 : i.exhaustedBounds = 0
1222 2 : i.maybeFilteredKeysSingleLevel = false
1223 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1224 2 : i.boundsCmp = 0
1225 2 :
1226 2 : if i.err != nil {
1227 0 : return nil, base.LazyValue{}
1228 0 : }
1229 2 : if key, val := i.data.Prev(); key != nil {
1230 2 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1231 2 : i.exhaustedBounds = -1
1232 2 : return nil, base.LazyValue{}
1233 2 : }
1234 2 : return key, val
1235 : }
1236 2 : return i.skipBackward()
1237 : }
1238 :
1239 2 : func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
1240 2 : for {
1241 2 : var key *InternalKey
1242 2 : if key, _ = i.index.Next(); key == nil {
1243 2 : i.data.invalidate()
1244 2 : break
1245 : }
1246 2 : result := i.loadBlock(+1)
1247 2 : if result != loadBlockOK {
1248 2 : if i.err != nil {
1249 1 : break
1250 : }
1251 2 : if result == loadBlockFailed {
1252 0 : // We checked that i.index was at a valid entry, so
1253 0 : // loadBlockFailed could not have happened due to to i.index
1254 0 : // being exhausted, and must be due to an error.
1255 0 : panic("loadBlock should not have failed with no error")
1256 : }
1257 : // result == loadBlockIrrelevant. Enforce the upper bound here
1258 : // since don't want to bother moving to the next block if upper
1259 : // bound is already exceeded. Note that the next block starts with
1260 : // keys >= key.UserKey since even though this is the block
1261 : // separator, the same user key can span multiple blocks. If upper
1262 : // is exclusive we use >= below, else we use >.
1263 2 : if i.upper != nil {
1264 2 : cmp := i.cmp(key.UserKey, i.upper)
1265 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1266 2 : i.exhaustedBounds = +1
1267 2 : return nil, base.LazyValue{}
1268 2 : }
1269 : }
1270 2 : continue
1271 : }
1272 2 : if key, val := i.data.First(); key != nil {
1273 2 : if i.blockUpper != nil {
1274 2 : cmp := i.cmp(key.UserKey, i.blockUpper)
1275 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1276 2 : i.exhaustedBounds = +1
1277 2 : return nil, base.LazyValue{}
1278 2 : }
1279 : }
1280 2 : return i.maybeVerifyKey(key, val)
1281 : }
1282 : }
1283 2 : return nil, base.LazyValue{}
1284 : }
1285 :
1286 2 : func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
1287 2 : for {
1288 2 : var key *InternalKey
1289 2 : if key, _ = i.index.Prev(); key == nil {
1290 2 : i.data.invalidate()
1291 2 : break
1292 : }
1293 2 : result := i.loadBlock(-1)
1294 2 : if result != loadBlockOK {
1295 2 : if i.err != nil {
1296 1 : break
1297 : }
1298 2 : if result == loadBlockFailed {
1299 0 : // We checked that i.index was at a valid entry, so
1300 0 : // loadBlockFailed could not have happened due to to i.index
1301 0 : // being exhausted, and must be due to an error.
1302 0 : panic("loadBlock should not have failed with no error")
1303 : }
1304 : // result == loadBlockIrrelevant. Enforce the lower bound here
1305 : // since don't want to bother moving to the previous block if lower
1306 : // bound is already exceeded. Note that the previous block starts with
1307 : // keys <= key.UserKey since even though this is the current block's
1308 : // separator, the same user key can span multiple blocks.
1309 2 : if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
1310 1 : i.exhaustedBounds = -1
1311 1 : return nil, base.LazyValue{}
1312 1 : }
1313 2 : continue
1314 : }
1315 2 : key, val := i.data.Last()
1316 2 : if key == nil {
1317 2 : return nil, base.LazyValue{}
1318 2 : }
1319 2 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1320 2 : i.exhaustedBounds = -1
1321 2 : return nil, base.LazyValue{}
1322 2 : }
1323 2 : return i.maybeVerifyKey(key, val)
1324 : }
1325 2 : return nil, base.LazyValue{}
1326 : }
1327 :
1328 : // Error implements internalIterator.Error, as documented in the pebble
1329 : // package.
1330 2 : func (i *singleLevelIterator) Error() error {
1331 2 : if err := i.data.Error(); err != nil {
1332 0 : return err
1333 0 : }
1334 2 : return i.err
1335 : }
1336 :
1337 : // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
1338 : // whether or not the last positioning method may have skipped any keys due to
1339 : // block-property filters.
1340 2 : func (i *singleLevelIterator) MaybeFilteredKeys() bool {
1341 2 : return i.maybeFilteredKeysSingleLevel
1342 2 : }
1343 :
1344 : // SetCloseHook sets a function that will be called when the iterator is
1345 : // closed.
1346 2 : func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
1347 2 : i.closeHook = fn
1348 2 : }
1349 :
1350 2 : func firstError(err0, err1 error) error {
1351 2 : if err0 != nil {
1352 1 : return err0
1353 1 : }
1354 2 : return err1
1355 : }
1356 :
1357 : // Close implements internalIterator.Close, as documented in the pebble
1358 : // package.
1359 2 : func (i *singleLevelIterator) Close() error {
1360 2 : var err error
1361 2 : if i.closeHook != nil {
1362 2 : err = firstError(err, i.closeHook(i))
1363 2 : }
1364 2 : err = firstError(err, i.data.Close())
1365 2 : err = firstError(err, i.index.Close())
1366 2 : if i.dataRH != nil {
1367 2 : err = firstError(err, i.dataRH.Close())
1368 2 : i.dataRH = nil
1369 2 : }
1370 2 : err = firstError(err, i.err)
1371 2 : if i.bpfs != nil {
1372 2 : releaseBlockPropertiesFilterer(i.bpfs)
1373 2 : }
1374 2 : if i.vbReader != nil {
1375 2 : i.vbReader.close()
1376 2 : }
1377 2 : if i.vbRH != nil {
1378 2 : err = firstError(err, i.vbRH.Close())
1379 2 : i.vbRH = nil
1380 2 : }
1381 2 : *i = i.resetForReuse()
1382 2 : singleLevelIterPool.Put(i)
1383 2 : return err
1384 : }
1385 :
1386 2 : func (i *singleLevelIterator) String() string {
1387 2 : if i.vState != nil {
1388 1 : return i.vState.fileNum.String()
1389 1 : }
1390 2 : return i.reader.fileNum.String()
1391 : }
|