Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "unsafe"
12 :
13 : "github.com/cockroachdb/pebble/internal/base"
14 : "github.com/cockroachdb/pebble/internal/invariants"
15 : "github.com/cockroachdb/pebble/objstorage"
16 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
17 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
18 : )
19 :
20 : // singleLevelIterator iterates over an entire table of data. To seek for a given
21 : // key, it first looks in the index for the block that contains that key, and then
22 : // looks inside that block.
23 : type singleLevelIterator struct {
24 : ctx context.Context
25 : cmp Compare
26 : // Global lower/upper bound for the iterator.
27 : lower []byte
28 : upper []byte
29 : bpfs *BlockPropertiesFilterer
30 : // Per-block lower/upper bound. Nil if the bound does not apply to the block
31 : // because we determined the block lies completely within the bound.
32 : blockLower []byte
33 : blockUpper []byte
34 : reader *Reader
35 : // vState will be set iff the iterator is constructed for virtual sstable
36 : // iteration.
37 : vState *virtualState
38 : // endKeyInclusive is set to force the iterator to treat the upper field as
39 : // inclusive while iterating instead of exclusive.
40 : endKeyInclusive bool
41 : index blockIter
42 : data blockIter
43 : dataRH objstorage.ReadHandle
44 : dataRHPrealloc objstorageprovider.PreallocatedReadHandle
45 : // dataBH refers to the last data block that the iterator considered
46 : // loading. It may not actually have loaded the block, due to an error or
47 : // because it was considered irrelevant.
48 : dataBH BlockHandle
49 : vbReader *valueBlockReader
50 : // vbRH is the read handle for value blocks, which are in a different
51 : // part of the sstable than data blocks.
52 : vbRH objstorage.ReadHandle
53 : vbRHPrealloc objstorageprovider.PreallocatedReadHandle
54 : err error
55 : closeHook func(i Iterator) error
56 : // stats and iterStats are slightly different. stats is a shared struct
57 : // supplied from the outside, and represents stats for the whole iterator
58 : // tree and can be reset from the outside (e.g. when the pebble.Iterator is
59 : // being reused). It is currently only provided when the iterator tree is
60 : // rooted at pebble.Iterator. iterStats is this sstable iterator's private
61 : // stats that are reported to a CategoryStatsCollector when this iterator is
62 : // closed. More paths are instrumented with this as the
63 : // CategoryStatsCollector needed for this is provided by the
64 : // tableCacheContainer (which is more universally used).
65 : stats *base.InternalIteratorStats
66 : iterStats iterStatsAccumulator
67 : bufferPool *BufferPool
68 :
69 : // boundsCmp and positionedUsingLatestBounds are for optimizing iteration
70 : // that uses multiple adjacent bounds. The seek after setting a new bound
71 : // can use the fact that the iterator is either within the previous bounds
72 : // or exactly one key before or after the bounds. If the new bounds is
73 : // after/before the previous bounds, and we are already positioned at a
74 : // block that is relevant for the new bounds, we can try to first position
75 : // using Next/Prev (repeatedly) instead of doing a more expensive seek.
76 : //
77 : // When there are wide files at higher levels that match the bounds
78 : // but don't have any data for the bound, we will already be
79 : // positioned at the key beyond the bounds and won't need to do much
80 : // work -- given that most data is in L6, such files are likely to
81 : // dominate the performance of the mergingIter, and may be the main
82 : // benefit of this performance optimization (of course it also helps
83 : // when the file that has the data has successive seeks that stay in
84 : // the same block).
85 : //
86 : // Specifically, boundsCmp captures the relationship between the previous
87 : // and current bounds, if the iterator had been positioned after setting
88 : // the previous bounds. If it was not positioned, i.e., Seek/First/Last
89 : // were not called, we don't know where it is positioned and cannot
90 : // optimize.
91 : //
92 : // Example: Bounds moving forward, and iterator exhausted in forward direction.
93 : // bounds = [f, h), ^ shows block iterator position
94 : // file contents [ a b c d e f g h i j k ]
95 : // ^
96 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
97 : // set to +1. SeekGE(j) can use next (the optimization also requires that j
98 : // is within the block, but that is not for correctness, but to limit the
99 : // optimization to when it will actually be an optimization).
100 : //
101 : // Example: Bounds moving forward.
102 : // bounds = [f, h), ^ shows block iterator position
103 : // file contents [ a b c d e f g h i j k ]
104 : // ^
105 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
106 : // set to +1. SeekGE(j) can use next.
107 : //
108 : // Example: Bounds moving forward, but iterator not positioned using previous
109 : // bounds.
110 : // bounds = [f, h), ^ shows block iterator position
111 : // file contents [ a b c d e f g h i j k ]
112 : // ^
113 : // new bounds = [i, j). Iterator is at j since it was never positioned using
114 : // [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
115 : // SeekGE(i) will not use next.
116 : //
117 : // Example: Bounds moving forward and sparse file
118 : // bounds = [f, h), ^ shows block iterator position
119 : // file contents [ a z ]
120 : // ^
121 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
122 : // set to +1. SeekGE(j) notices that the iterator is already past j and does
123 : // not need to do anything.
124 : //
125 : // Similar examples can be constructed for backward iteration.
126 : //
127 : // This notion of exactly one key before or after the bounds is not quite
128 : // true when block properties are used to ignore blocks. In that case we
129 : // can't stop precisely at the first block that is past the bounds since
130 : // we are using the index entries to enforce the bounds.
131 : //
132 : // e.g. 3 blocks with keys [b, c] [f, g], [i, j, k] with index entries d,
133 : // h, l. And let the lower bound be k, and we are reverse iterating. If
134 : // the block [i, j, k] is ignored due to the block interval annotations we
135 : // do need to move the index to block [f, g] since the index entry for the
136 : // [i, j, k] block is l which is not less than the lower bound of k. So we
137 : // have passed the entries i, j.
138 : //
139 : // This behavior is harmless since the block property filters are fixed
140 : // for the lifetime of the iterator so i, j are irrelevant. In addition,
141 : // the current code will not load the [f, g] block, so the seek
142 : // optimization that attempts to use Next/Prev do not apply anyway.
143 : boundsCmp int
144 : positionedUsingLatestBounds bool
145 :
146 : // exhaustedBounds represents whether the iterator is exhausted for
147 : // iteration by reaching the upper or lower bound. +1 when exhausted
148 : // the upper bound, -1 when exhausted the lower bound, and 0 when
149 : // neither. exhaustedBounds is also used for the TrySeekUsingNext
150 : // optimization in twoLevelIterator and singleLevelIterator. Care should be
151 : // taken in setting this in twoLevelIterator before calling into
152 : // singleLevelIterator, given that these two iterators share this field.
153 : exhaustedBounds int8
154 :
155 : // maybeFilteredKeysSingleLevel indicates whether the last iterator
156 : // positioning operation may have skipped any data blocks due to
157 : // block-property filters when positioning the index.
158 : maybeFilteredKeysSingleLevel bool
159 :
160 : // useFilter specifies whether the filter block in this sstable, if present,
161 : // should be used for prefix seeks or not. In some cases it is beneficial
162 : // to skip a filter block even if it exists (eg. if probability of a match
163 : // is high).
164 : useFilter bool
165 : lastBloomFilterMatched bool
166 :
167 : hideObsoletePoints bool
168 : }
169 :
170 : // singleLevelIterator implements the base.InternalIterator interface.
171 : var _ base.InternalIterator = (*singleLevelIterator)(nil)
172 :
173 : // init initializes a singleLevelIterator for reading from the table. It is
174 : // synonmous with Reader.NewIter, but allows for reusing of the iterator
175 : // between different Readers.
176 : //
177 : // Note that lower, upper passed into init has nothing to do with virtual sstable
178 : // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
179 : // will be enforced.
180 : func (i *singleLevelIterator) init(
181 : ctx context.Context,
182 : r *Reader,
183 : v *virtualState,
184 : lower, upper []byte,
185 : filterer *BlockPropertiesFilterer,
186 : useFilter, hideObsoletePoints bool,
187 : stats *base.InternalIteratorStats,
188 : categoryAndQoS CategoryAndQoS,
189 : statsCollector *CategoryStatsCollector,
190 : rp ReaderProvider,
191 : bufferPool *BufferPool,
192 1 : ) error {
193 1 : if r.err != nil {
194 0 : return r.err
195 0 : }
196 1 : i.iterStats.init(categoryAndQoS, statsCollector)
197 1 : indexH, err := r.readIndex(ctx, stats, &i.iterStats)
198 1 : if err != nil {
199 1 : return err
200 1 : }
201 1 : if v != nil {
202 1 : i.vState = v
203 1 : i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
204 1 : }
205 :
206 1 : i.ctx = ctx
207 1 : i.lower = lower
208 1 : i.upper = upper
209 1 : i.bpfs = filterer
210 1 : i.useFilter = useFilter
211 1 : i.reader = r
212 1 : i.cmp = r.Compare
213 1 : i.stats = stats
214 1 : i.hideObsoletePoints = hideObsoletePoints
215 1 : i.bufferPool = bufferPool
216 1 : err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
217 1 : if err != nil {
218 0 : // blockIter.Close releases indexH and always returns a nil error
219 0 : _ = i.index.Close()
220 0 : return err
221 0 : }
222 1 : i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
223 1 : if r.tableFormat >= TableFormatPebblev3 {
224 1 : if r.Properties.NumValueBlocks > 0 {
225 1 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
226 1 : // can outlive the singleLevelIterator due to be being embedded in a
227 1 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
228 1 : // should only optimize this if it shows up as significant in end-to-end
229 1 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
230 1 : // is that if many sstable iterators only get positioned at latest
231 1 : // versions of keys, and therefore never expose a LazyValue that is
232 1 : // separated to their callers, they can put this valueBlockReader into a
233 1 : // sync.Pool.
234 1 : i.vbReader = &valueBlockReader{
235 1 : bpOpen: i,
236 1 : rp: rp,
237 1 : vbih: r.valueBIH,
238 1 : stats: stats,
239 1 : }
240 1 : i.data.lazyValueHandling.vbr = i.vbReader
241 1 : i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
242 1 : }
243 1 : i.data.lazyValueHandling.hasValuePrefix = true
244 : }
245 1 : return nil
246 : }
247 :
248 : // Helper function to check if keys returned from iterator are within global and virtual bounds.
249 : func (i *singleLevelIterator) maybeVerifyKey(
250 : iKey *InternalKey, val base.LazyValue,
251 1 : ) (*InternalKey, base.LazyValue) {
252 1 : // maybeVerify key is only used for virtual sstable iterators.
253 1 : if invariants.Enabled && i.vState != nil && iKey != nil {
254 1 : key := iKey.UserKey
255 1 :
256 1 : uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
257 1 : lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
258 1 :
259 1 : if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
260 0 : panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
261 : }
262 : }
263 1 : return iKey, val
264 : }
265 :
266 : // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
267 : // Currently, it skips readahead ramp-up. It should be called after init is called.
268 1 : func (i *singleLevelIterator) setupForCompaction() {
269 1 : i.dataRH.SetupForCompaction()
270 1 : if i.vbRH != nil {
271 1 : i.vbRH.SetupForCompaction()
272 1 : }
273 : }
274 :
275 1 : func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
276 1 : return singleLevelIterator{
277 1 : index: i.index.resetForReuse(),
278 1 : data: i.data.resetForReuse(),
279 1 : }
280 1 : }
281 :
282 1 : func (i *singleLevelIterator) initBounds() {
283 1 : // Trim the iteration bounds for the current block. We don't have to check
284 1 : // the bounds on each iteration if the block is entirely contained within the
285 1 : // iteration bounds.
286 1 : i.blockLower = i.lower
287 1 : if i.blockLower != nil {
288 1 : key, _ := i.data.First()
289 1 : if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
290 1 : // The lower-bound is less than the first key in the block. No need
291 1 : // to check the lower-bound again for this block.
292 1 : i.blockLower = nil
293 1 : }
294 : }
295 1 : i.blockUpper = i.upper
296 1 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
297 1 : // The upper-bound is greater than the index key which itself is greater
298 1 : // than or equal to every key in the block. No need to check the
299 1 : // upper-bound again for this block. Even if blockUpper is inclusive
300 1 : // because of upper being inclusive, we can still safely set blockUpper
301 1 : // to nil here.
302 1 : //
303 1 : // TODO(bananabrick): We could also set blockUpper to nil for the >=
304 1 : // case, if blockUpper is inclusive.
305 1 : i.blockUpper = nil
306 1 : }
307 : }
308 :
309 : // Deterministic disabling of the bounds-based optimization that avoids seeking.
310 : // Uses the iterator pointer, since we want diversity in iterator behavior for
311 : // the same SetBounds call. Used for tests.
312 1 : func disableBoundsOpt(bound []byte, ptr uintptr) bool {
313 1 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
314 1 : simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
315 1 : return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
316 1 : }
317 :
318 : // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
319 : // optimizations performed by disableBoundsOpt for tests that require
320 : // deterministic iterator behavior. Some unit tests examine internal iterator
321 : // state and require this behavior to be deterministic.
322 : var ensureBoundsOptDeterminism bool
323 :
324 : // SetBoundsWithSyntheticPrefix indicates whether this iterator requires keys
325 : // passed to its SetBounds() method by a prefix rewriting wrapper to be *not*
326 : // rewritten to be in terms of this iterator's content, but instead be passed
327 : // as-is, i.e. with the synthetic prefix still on them.
328 : //
329 : // This allows an optimization when this iterator is passing these bounds on to
330 : // a vState to additionally constrain them. In said vState, passed bounds are
331 : // combined with the vState bounds which are in terms of the rewritten prefix.
332 : // If the caller rewrote bounds to be in terms of content prefix and SetBounds
333 : // passed those to vState, the vState would need to *un*rewrite them back to the
334 : // synthetic prefix in order to combine them with the vState bounds. Thus, if
335 : // this iterator knows bounds will be passed to vState, it can signal that it
336 : // they should be passed without being rewritten to skip converting to and fro.
337 1 : func (i singleLevelIterator) SetBoundsWithSyntheticPrefix() bool {
338 1 : return i.vState != nil
339 1 : }
340 :
341 : // SetBounds implements internalIterator.SetBounds, as documented in the pebble
342 : // package. Note that the upper field is exclusive.
343 1 : func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
344 1 : i.boundsCmp = 0
345 1 : if i.vState != nil {
346 1 : // If the reader is constructed for a virtual sstable, then we must
347 1 : // constrain the bounds of the reader. For physical sstables, the bounds
348 1 : // can be wider than the actual sstable's bounds because we won't
349 1 : // accidentally expose additional keys as there are no additional keys.
350 1 : i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
351 1 : lower, upper, false,
352 1 : )
353 1 : } else {
354 1 : // TODO(bananabrick): Figure out the logic here to enable the boundsCmp
355 1 : // optimization for virtual sstables.
356 1 : if i.positionedUsingLatestBounds {
357 1 : if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
358 1 : i.boundsCmp = +1
359 1 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
360 1 : disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
361 1 : i.boundsCmp = 0
362 1 : }
363 1 : } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
364 1 : i.boundsCmp = -1
365 1 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
366 1 : disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
367 1 : i.boundsCmp = 0
368 1 : }
369 : }
370 : }
371 : }
372 :
373 1 : i.positionedUsingLatestBounds = false
374 1 : i.lower = lower
375 1 : i.upper = upper
376 1 : i.blockLower = nil
377 1 : i.blockUpper = nil
378 : }
379 :
380 1 : func (i *singleLevelIterator) SetContext(ctx context.Context) {
381 1 : i.ctx = ctx
382 1 : }
383 :
384 : // loadBlock loads the block at the current index position and leaves i.data
385 : // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
386 : // may be nil if we have simply exhausted the entire table.
387 1 : func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
388 1 : if !i.index.valid() {
389 0 : // Ensure the data block iterator is invalidated even if loading of the block
390 0 : // fails.
391 0 : i.data.invalidate()
392 0 : return loadBlockFailed
393 0 : }
394 : // Load the next block.
395 1 : v := i.index.value()
396 1 : bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
397 1 : if i.dataBH == bhp.BlockHandle && i.data.valid() {
398 1 : // We're already at the data block we want to load. Reset bounds in case
399 1 : // they changed since the last seek, but don't reload the block from cache
400 1 : // or disk.
401 1 : //
402 1 : // It's safe to leave i.data in its original state here, as all callers to
403 1 : // loadBlock make an absolute positioning call (i.e. a seek, first, or last)
404 1 : // to `i.data` right after loadBlock returns loadBlockOK.
405 1 : i.initBounds()
406 1 : return loadBlockOK
407 1 : }
408 : // Ensure the data block iterator is invalidated even if loading of the block
409 : // fails.
410 1 : i.data.invalidate()
411 1 : i.dataBH = bhp.BlockHandle
412 1 : if err != nil {
413 0 : i.err = errCorruptIndexEntry
414 0 : return loadBlockFailed
415 0 : }
416 1 : if i.bpfs != nil {
417 1 : intersects, err := i.bpfs.intersects(bhp.Props)
418 1 : if err != nil {
419 0 : i.err = errCorruptIndexEntry
420 0 : return loadBlockFailed
421 0 : }
422 1 : if intersects == blockMaybeExcluded {
423 1 : intersects = i.resolveMaybeExcluded(dir)
424 1 : }
425 1 : if intersects == blockExcluded {
426 1 : i.maybeFilteredKeysSingleLevel = true
427 1 : return loadBlockIrrelevant
428 1 : }
429 : // blockIntersects
430 : }
431 1 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
432 1 : block, err := i.reader.readBlock(
433 1 : ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, &i.iterStats, i.bufferPool)
434 1 : if err != nil {
435 1 : i.err = err
436 1 : return loadBlockFailed
437 1 : }
438 1 : i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
439 1 : if i.err != nil {
440 0 : // The block is partially loaded, and we don't want it to appear valid.
441 0 : i.data.invalidate()
442 0 : return loadBlockFailed
443 0 : }
444 1 : i.initBounds()
445 1 : return loadBlockOK
446 : }
447 :
448 : // readBlockForVBR implements the blockProviderWhenOpen interface for use by
449 : // the valueBlockReader.
450 : func (i *singleLevelIterator) readBlockForVBR(
451 : h BlockHandle, stats *base.InternalIteratorStats,
452 1 : ) (bufferHandle, error) {
453 1 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.ValueBlock)
454 1 : return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, &i.iterStats, i.bufferPool)
455 1 : }
456 :
457 : // resolveMaybeExcluded is invoked when the block-property filterer has found
458 : // that a block is excluded according to its properties but only if its bounds
459 : // fall within the filter's current bounds. This function consults the
460 : // apprioriate bound, depending on the iteration direction, and returns either
461 : // `blockIntersects` or `blockExcluded`.
462 1 : func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
463 1 : // TODO(jackson): We could first try comparing to top-level index block's
464 1 : // key, and if within bounds avoid per-data block key comparisons.
465 1 :
466 1 : // This iterator is configured with a bound-limited block property
467 1 : // filter. The bpf determined this block could be excluded from
468 1 : // iteration based on the property encoded in the block handle.
469 1 : // However, we still need to determine if the block is wholly
470 1 : // contained within the filter's key bounds.
471 1 : //
472 1 : // External guarantees ensure all the block's keys are ≥ the
473 1 : // filter's lower bound during forward iteration, and that all the
474 1 : // block's keys are < the filter's upper bound during backward
475 1 : // iteration. We only need to determine if the opposite bound is
476 1 : // also met.
477 1 : //
478 1 : // The index separator in index.Key() provides an inclusive
479 1 : // upper-bound for the data block's keys, guaranteeing that all its
480 1 : // keys are ≤ index.Key(). For forward iteration, this is all we
481 1 : // need.
482 1 : if dir > 0 {
483 1 : // Forward iteration.
484 1 : if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
485 1 : return blockExcluded
486 1 : }
487 1 : return blockIntersects
488 : }
489 :
490 : // Reverse iteration.
491 : //
492 : // Because we're iterating in the reverse direction, we don't yet have
493 : // enough context available to determine if the block is wholly contained
494 : // within its bounds. This case arises only during backward iteration,
495 : // because of the way the index is structured.
496 : //
497 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
498 : // block with separator `c`. During reverse iteration, the guarantee that
499 : // all the block's keys are < `d` is externally provided, but no guarantee
500 : // is made on the bpf's lower bound. The separator `c` only provides an
501 : // inclusive upper bound on the block's keys, indicating that the
502 : // corresponding block handle points to a block containing only keys ≤ `c`.
503 : //
504 : // To establish a lower bound, we step the index backwards to read the
505 : // previous block's separator, which provides an inclusive lower bound on
506 : // the original block's keys. Afterwards, we step forward to restore our
507 : // index position.
508 1 : if peekKey, _ := i.index.Prev(); peekKey == nil {
509 1 : // The original block points to the first block of this index block. If
510 1 : // there's a two-level index, it could potentially provide a lower
511 1 : // bound, but the code refactoring necessary to read it doesn't seem
512 1 : // worth the payoff. We fall through to loading the block.
513 1 : } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
514 1 : // The lower-bound on the original block falls within the filter's
515 1 : // bounds, and we can skip the block (after restoring our current index
516 1 : // position).
517 1 : _, _ = i.index.Next()
518 1 : return blockExcluded
519 1 : }
520 1 : _, _ = i.index.Next()
521 1 : return blockIntersects
522 : }
523 :
524 1 : func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
525 1 : if i.data.getFirstUserKey() == nil {
526 0 : panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
527 : }
528 1 : i.blockLower = i.lower
529 1 : if i.blockLower != nil {
530 1 : firstUserKey := i.data.getFirstUserKey()
531 1 : if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
532 1 : // The lower-bound is less than the first key in the block. No need
533 1 : // to check the lower-bound again for this block.
534 1 : i.blockLower = nil
535 1 : }
536 : }
537 1 : i.blockUpper = i.upper
538 1 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
539 1 : // The upper-bound is greater than the index key which itself is greater
540 1 : // than or equal to every key in the block. No need to check the
541 1 : // upper-bound again for this block.
542 1 : i.blockUpper = nil
543 1 : }
544 : }
545 :
546 : // The number of times to call Next/Prev in a block before giving up and seeking.
547 : // The value of 4 is arbitrary.
548 : // TODO(sumeer): experiment with dynamic adjustment based on the history of
549 : // seeks for a particular iterator.
550 : const numStepsBeforeSeek = 4
551 :
552 : func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
553 : key []byte,
554 1 : ) (k *InternalKey, v base.LazyValue, done bool) {
555 1 : k, v = i.data.Key(), i.data.value()
556 1 : for j := 0; j < numStepsBeforeSeek; j++ {
557 1 : curKeyCmp := i.cmp(k.UserKey, key)
558 1 : if curKeyCmp >= 0 {
559 1 : if i.blockUpper != nil {
560 1 : cmp := i.cmp(k.UserKey, i.blockUpper)
561 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
562 1 : i.exhaustedBounds = +1
563 1 : return nil, base.LazyValue{}, true
564 1 : }
565 : }
566 1 : return k, v, true
567 : }
568 1 : k, v = i.data.Next()
569 1 : if k == nil {
570 0 : break
571 : }
572 : }
573 1 : return k, v, false
574 : }
575 :
576 : func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
577 : key []byte,
578 1 : ) (k *InternalKey, v base.LazyValue, done bool) {
579 1 : k, v = i.data.Key(), i.data.value()
580 1 : for j := 0; j < numStepsBeforeSeek; j++ {
581 1 : curKeyCmp := i.cmp(k.UserKey, key)
582 1 : if curKeyCmp < 0 {
583 1 : if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
584 1 : i.exhaustedBounds = -1
585 1 : return nil, base.LazyValue{}, true
586 1 : }
587 1 : return k, v, true
588 : }
589 1 : k, v = i.data.Prev()
590 1 : if k == nil {
591 0 : break
592 : }
593 : }
594 1 : return k, v, false
595 : }
596 :
597 1 : func (i *singleLevelIterator) recordOffset() uint64 {
598 1 : offset := i.dataBH.Offset
599 1 : if i.data.valid() {
600 1 : // - i.dataBH.Length/len(i.data.data) is the compression ratio. If
601 1 : // uncompressed, this is 1.
602 1 : // - i.data.nextOffset is the uncompressed position of the current record
603 1 : // in the block.
604 1 : // - i.dataBH.Offset is the offset of the block in the sstable before
605 1 : // decompression.
606 1 : offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
607 1 : } else {
608 1 : // Last entry in the block must increment bytes iterated by the size of the block trailer
609 1 : // and restart points.
610 1 : offset += i.dataBH.Length + blockTrailerLen
611 1 : }
612 1 : return offset
613 : }
614 :
615 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
616 : // package. Note that SeekGE only checks the upper bound. It is up to the
617 : // caller to ensure that key is greater than or equal to the lower bound.
618 : func (i *singleLevelIterator) SeekGE(
619 : key []byte, flags base.SeekGEFlags,
620 1 : ) (*InternalKey, base.LazyValue) {
621 1 : if i.vState != nil {
622 1 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
623 1 : // have to internally restrict the bounds.
624 1 : //
625 1 : // TODO(bananabrick): We can optimize this check away for the level iter
626 1 : // if necessary.
627 1 : if i.cmp(key, i.lower) < 0 {
628 1 : key = i.lower
629 1 : }
630 : }
631 :
632 1 : if flags.TrySeekUsingNext() {
633 1 : // The i.exhaustedBounds comparison indicates that the upper bound was
634 1 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
635 1 : // exhausted.
636 1 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
637 1 : // Already exhausted, so return nil.
638 1 : return nil, base.LazyValue{}
639 1 : }
640 1 : if i.err != nil {
641 0 : // The current iterator position cannot be used.
642 0 : flags = flags.DisableTrySeekUsingNext()
643 0 : }
644 : // INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
645 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
646 : // data-exhausted and bounds-exhausted, as defined earlier, are both
647 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
648 : // before calling into seekGEHelper.
649 : }
650 :
651 1 : i.exhaustedBounds = 0
652 1 : i.err = nil // clear cached iteration error
653 1 : boundsCmp := i.boundsCmp
654 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
655 1 : i.boundsCmp = 0
656 1 : i.positionedUsingLatestBounds = true
657 1 : return i.seekGEHelper(key, boundsCmp, flags)
658 : }
659 :
660 : // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
661 : func (i *singleLevelIterator) seekGEHelper(
662 : key []byte, boundsCmp int, flags base.SeekGEFlags,
663 1 : ) (*InternalKey, base.LazyValue) {
664 1 : // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
665 1 :
666 1 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
667 1 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
668 1 : // Care must be taken to ensure that when performing these optimizations and
669 1 : // the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
670 1 : // Consider a previous SeekGE that filtered keys from k until the current
671 1 : // iterator position.
672 1 : //
673 1 : // If the previous SeekGE exhausted the iterator, it's possible keys greater
674 1 : // than or equal to the current search key were filtered. We must not reuse
675 1 : // the current iterator position without remembering the previous value of
676 1 : // maybeFilteredKeys.
677 1 :
678 1 : var dontSeekWithinBlock bool
679 1 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
680 1 : boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
681 1 : // Fast-path: The bounds have moved forward and this SeekGE is
682 1 : // respecting the lower bound (guaranteed by Iterator). We know that
683 1 : // the iterator must already be positioned within or just outside the
684 1 : // previous bounds. Therefore it cannot be positioned at a block (or
685 1 : // the position within that block) that is ahead of the seek position.
686 1 : // However it can be positioned at an earlier block. This fast-path to
687 1 : // use Next() on the block is only applied when we are already at the
688 1 : // block that the slow-path (the else-clause) would load -- this is
689 1 : // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
690 1 : // predicate.
691 1 : i.initBoundsForAlreadyLoadedBlock()
692 1 : ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
693 1 : if done {
694 1 : return ikey, val
695 1 : }
696 1 : if ikey == nil {
697 0 : // Done with this block.
698 0 : dontSeekWithinBlock = true
699 0 : }
700 1 : } else {
701 1 : // Cannot use bounds monotonicity. But may be able to optimize if
702 1 : // caller claimed externally known invariant represented by
703 1 : // flags.TrySeekUsingNext().
704 1 : if flags.TrySeekUsingNext() {
705 1 : // seekPrefixGE or SeekGE has already ensured
706 1 : // !i.data.isDataInvalidated() && i.exhaustedBounds != +1
707 1 : currKey := i.data.Key()
708 1 : value := i.data.value()
709 1 : less := i.cmp(currKey.UserKey, key) < 0
710 1 : // We could be more sophisticated and confirm that the seek
711 1 : // position is within the current block before applying this
712 1 : // optimization. But there may be some benefit even if it is in
713 1 : // the next block, since we can avoid seeking i.index.
714 1 : for j := 0; less && j < numStepsBeforeSeek; j++ {
715 1 : currKey, value = i.Next()
716 1 : if currKey == nil {
717 1 : return nil, base.LazyValue{}
718 1 : }
719 1 : less = i.cmp(currKey.UserKey, key) < 0
720 : }
721 1 : if !less {
722 1 : if i.blockUpper != nil {
723 1 : cmp := i.cmp(currKey.UserKey, i.blockUpper)
724 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
725 0 : i.exhaustedBounds = +1
726 0 : return nil, base.LazyValue{}
727 0 : }
728 : }
729 1 : return currKey, value
730 : }
731 : }
732 :
733 : // Slow-path.
734 : // Since we're re-seeking the iterator, the previous value of
735 : // maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
736 : // during seeking, loadBlock will set it to true.
737 1 : i.maybeFilteredKeysSingleLevel = false
738 1 :
739 1 : var ikey *InternalKey
740 1 : if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
741 1 : // The target key is greater than any key in the index block.
742 1 : // Invalidate the block iterator so that a subsequent call to Prev()
743 1 : // will return the last key in the table.
744 1 : i.data.invalidate()
745 1 : return nil, base.LazyValue{}
746 1 : }
747 1 : result := i.loadBlock(+1)
748 1 : if result == loadBlockFailed {
749 1 : return nil, base.LazyValue{}
750 1 : }
751 1 : if result == loadBlockIrrelevant {
752 1 : // Enforce the upper bound here since don't want to bother moving
753 1 : // to the next block if upper bound is already exceeded. Note that
754 1 : // the next block starts with keys >= ikey.UserKey since even
755 1 : // though this is the block separator, the same user key can span
756 1 : // multiple blocks. If upper is exclusive we use >= below, else
757 1 : // we use >.
758 1 : if i.upper != nil {
759 1 : cmp := i.cmp(ikey.UserKey, i.upper)
760 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
761 1 : i.exhaustedBounds = +1
762 1 : return nil, base.LazyValue{}
763 1 : }
764 : }
765 : // Want to skip to the next block.
766 1 : dontSeekWithinBlock = true
767 : }
768 : }
769 1 : if !dontSeekWithinBlock {
770 1 : if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
771 1 : if i.blockUpper != nil {
772 1 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
773 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
774 1 : i.exhaustedBounds = +1
775 1 : return nil, base.LazyValue{}
776 1 : }
777 : }
778 1 : return ikey, val
779 : }
780 : }
781 1 : return i.skipForward()
782 : }
783 :
784 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
785 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
786 : // to the caller to ensure that key is greater than or equal to the lower bound.
787 : func (i *singleLevelIterator) SeekPrefixGE(
788 : prefix, key []byte, flags base.SeekGEFlags,
789 1 : ) (*base.InternalKey, base.LazyValue) {
790 1 : if i.vState != nil {
791 1 : // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
792 1 : // we may have to internally restrict the bounds.
793 1 : //
794 1 : // TODO(bananabrick): We can optimize away this check for the level iter
795 1 : // if necessary.
796 1 : if i.cmp(key, i.lower) < 0 {
797 1 : key = i.lower
798 1 : }
799 : }
800 1 : return i.seekPrefixGE(prefix, key, flags, i.useFilter)
801 : }
802 :
803 : func (i *singleLevelIterator) seekPrefixGE(
804 : prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
805 1 : ) (k *InternalKey, value base.LazyValue) {
806 1 : // NOTE: prefix is only used for bloom filter checking and not later work in
807 1 : // this method. Hence, we can use the existing iterator position if the last
808 1 : // SeekPrefixGE did not fail bloom filter matching.
809 1 :
810 1 : err := i.err
811 1 : i.err = nil // clear cached iteration error
812 1 : if checkFilter && i.reader.tableFilter != nil {
813 1 : if !i.lastBloomFilterMatched {
814 1 : // Iterator is not positioned based on last seek.
815 1 : flags = flags.DisableTrySeekUsingNext()
816 1 : }
817 1 : i.lastBloomFilterMatched = false
818 1 : // Check prefix bloom filter.
819 1 : var dataH bufferHandle
820 1 : dataH, i.err = i.reader.readFilter(i.ctx, i.stats, &i.iterStats)
821 1 : if i.err != nil {
822 1 : i.data.invalidate()
823 1 : return nil, base.LazyValue{}
824 1 : }
825 1 : mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
826 1 : dataH.Release()
827 1 : if !mayContain {
828 1 : // This invalidation may not be necessary for correctness, and may
829 1 : // be a place to optimize later by reusing the already loaded
830 1 : // block. It was necessary in earlier versions of the code since
831 1 : // the caller was allowed to call Next when SeekPrefixGE returned
832 1 : // nil. This is no longer allowed.
833 1 : i.data.invalidate()
834 1 : return nil, base.LazyValue{}
835 1 : }
836 1 : i.lastBloomFilterMatched = true
837 : }
838 1 : if flags.TrySeekUsingNext() {
839 1 : // The i.exhaustedBounds comparison indicates that the upper bound was
840 1 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
841 1 : // exhausted.
842 1 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
843 0 : // Already exhausted, so return nil.
844 0 : return nil, base.LazyValue{}
845 0 : }
846 1 : if err != nil {
847 1 : // The current iterator position cannot be used.
848 1 : flags = flags.DisableTrySeekUsingNext()
849 1 : }
850 : // INVARIANT: flags.TrySeekUsingNext() => err == nil &&
851 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
852 : // data-exhausted and bounds-exhausted, as defined earlier, are both
853 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
854 : // before calling into seekGEHelper.
855 : }
856 : // Bloom filter matches, or skipped, so this method will position the
857 : // iterator.
858 1 : i.exhaustedBounds = 0
859 1 : boundsCmp := i.boundsCmp
860 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
861 1 : i.boundsCmp = 0
862 1 : i.positionedUsingLatestBounds = true
863 1 : k, value = i.seekGEHelper(key, boundsCmp, flags)
864 1 : return i.maybeVerifyKey(k, value)
865 : }
866 :
867 : // virtualLast should only be called if i.vReader != nil.
868 1 : func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
869 1 : if i.vState == nil {
870 0 : panic("pebble: invalid call to virtualLast")
871 : }
872 :
873 1 : if !i.endKeyInclusive {
874 1 : // Trivial case.
875 1 : return i.SeekLT(i.upper, base.SeekLTFlagsNone)
876 1 : }
877 1 : return i.virtualLastSeekLE(i.upper)
878 : }
879 :
880 : // virtualLastSeekLE is called by virtualLast to do a SeekLE as part of a
881 : // virtualLast. Consider generalizing this into a SeekLE() if there are other
882 : // uses of this method in the future.
883 1 : func (i *singleLevelIterator) virtualLastSeekLE(key []byte) (*InternalKey, base.LazyValue) {
884 1 : // Callers of SeekLE don't know about virtual sstable bounds, so we may
885 1 : // have to internally restrict the bounds.
886 1 : //
887 1 : // TODO(bananabrick): We can optimize this check away for the level iter
888 1 : // if necessary.
889 1 : if i.cmp(key, i.upper) >= 0 {
890 1 : if !i.endKeyInclusive {
891 0 : panic("unexpected virtualLastSeekLE with exclusive upper bounds")
892 : }
893 1 : key = i.upper
894 : }
895 :
896 1 : i.exhaustedBounds = 0
897 1 : i.err = nil // clear cached iteration error
898 1 : // Seek optimization only applies until iterator is first positioned with a
899 1 : // SeekGE or SeekLT after SetBounds.
900 1 : i.boundsCmp = 0
901 1 : i.positionedUsingLatestBounds = true
902 1 : i.maybeFilteredKeysSingleLevel = false
903 1 :
904 1 : ikey, _ := i.index.SeekGE(key, base.SeekGEFlagsNone)
905 1 : // We can have multiple internal keys with the same user key as the seek
906 1 : // key. In that case, we want the last (greatest) internal key.
907 1 : //
908 1 : // NB: We can avoid this Next()ing if we just implement a blockIter.SeekLE().
909 1 : // This might be challenging to do correctly, so impose regular operations
910 1 : // for now.
911 1 : for ikey != nil && bytes.Equal(ikey.UserKey, key) {
912 1 : ikey, _ = i.index.Next()
913 1 : }
914 1 : if ikey == nil {
915 1 : return i.skipBackward()
916 1 : }
917 1 : result := i.loadBlock(-1)
918 1 : if result == loadBlockFailed {
919 0 : return nil, base.LazyValue{}
920 0 : }
921 1 : if result == loadBlockIrrelevant {
922 0 : // Want to skip to the previous block.
923 0 : return i.skipBackward()
924 0 : }
925 1 : ikey, _ = i.data.SeekGE(key, base.SeekGEFlagsNone)
926 1 : var val base.LazyValue
927 1 : // Go to the last user key that matches key, and then Prev() on the data
928 1 : // block.
929 1 : for ikey != nil && bytes.Equal(ikey.UserKey, key) {
930 1 : ikey, _ = i.data.Next()
931 1 : }
932 1 : ikey, val = i.data.Prev()
933 1 : if ikey != nil {
934 1 : return ikey, val
935 1 : }
936 1 : return i.skipBackward()
937 : }
938 :
939 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
940 : // package. Note that SeekLT only checks the lower bound. It is up to the
941 : // caller to ensure that key is less than or equal to the upper bound.
942 : func (i *singleLevelIterator) SeekLT(
943 : key []byte, flags base.SeekLTFlags,
944 1 : ) (*InternalKey, base.LazyValue) {
945 1 : if i.vState != nil {
946 1 : // Might have to fix upper bound since virtual sstable bounds are not
947 1 : // known to callers of SeekLT.
948 1 : //
949 1 : // TODO(bananabrick): We can optimize away this check for the level iter
950 1 : // if necessary.
951 1 : cmp := i.cmp(key, i.upper)
952 1 : // key == i.upper is fine. We'll do the right thing and return the
953 1 : // first internal key with user key < key.
954 1 : if cmp > 0 {
955 1 : // Return the last key in the virtual sstable.
956 1 : return i.virtualLast()
957 1 : }
958 : }
959 :
960 1 : i.exhaustedBounds = 0
961 1 : i.err = nil // clear cached iteration error
962 1 : boundsCmp := i.boundsCmp
963 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
964 1 : i.boundsCmp = 0
965 1 :
966 1 : // Seeking operations perform various step-instead-of-seeking optimizations:
967 1 : // eg by considering monotonically increasing bounds (i.boundsCmp). Care
968 1 : // must be taken to ensure that when performing these optimizations and the
969 1 : // iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
970 1 : // appropriately. Consider a previous SeekLT that filtered keys from k
971 1 : // until the current iterator position.
972 1 : //
973 1 : // If the previous SeekLT did exhausted the iterator, it's possible keys
974 1 : // less than the current search key were filtered. We must not reuse the
975 1 : // current iterator position without remembering the previous value of
976 1 : // maybeFilteredKeysSingleLevel.
977 1 :
978 1 : i.positionedUsingLatestBounds = true
979 1 :
980 1 : var dontSeekWithinBlock bool
981 1 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
982 1 : boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
983 1 : // Fast-path: The bounds have moved backward, and this SeekLT is
984 1 : // respecting the upper bound (guaranteed by Iterator). We know that
985 1 : // the iterator must already be positioned within or just outside the
986 1 : // previous bounds. Therefore it cannot be positioned at a block (or
987 1 : // the position within that block) that is behind the seek position.
988 1 : // However it can be positioned at a later block. This fast-path to
989 1 : // use Prev() on the block is only applied when we are already at the
990 1 : // block that can satisfy this seek -- this is the motivation for the
991 1 : // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
992 1 : i.initBoundsForAlreadyLoadedBlock()
993 1 : ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
994 1 : if done {
995 1 : return ikey, val
996 1 : }
997 1 : if ikey == nil {
998 0 : // Done with this block.
999 0 : dontSeekWithinBlock = true
1000 0 : }
1001 1 : } else {
1002 1 : // Slow-path.
1003 1 : i.maybeFilteredKeysSingleLevel = false
1004 1 : var ikey *InternalKey
1005 1 :
1006 1 : // NB: If a bound-limited block property filter is configured, it's
1007 1 : // externally ensured that the filter is disabled (through returning
1008 1 : // Intersects=false irrespective of the block props provided) during
1009 1 : // seeks.
1010 1 : if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
1011 1 : ikey, _ = i.index.Last()
1012 1 : if ikey == nil {
1013 0 : return nil, base.LazyValue{}
1014 0 : }
1015 : }
1016 : // INVARIANT: ikey != nil.
1017 1 : result := i.loadBlock(-1)
1018 1 : if result == loadBlockFailed {
1019 1 : return nil, base.LazyValue{}
1020 1 : }
1021 1 : if result == loadBlockIrrelevant {
1022 1 : // Enforce the lower bound here since don't want to bother moving
1023 1 : // to the previous block if lower bound is already exceeded. Note
1024 1 : // that the previous block starts with keys <= ikey.UserKey since
1025 1 : // even though this is the current block's separator, the same
1026 1 : // user key can span multiple blocks.
1027 1 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
1028 0 : i.exhaustedBounds = -1
1029 0 : return nil, base.LazyValue{}
1030 0 : }
1031 : // Want to skip to the previous block.
1032 1 : dontSeekWithinBlock = true
1033 : }
1034 : }
1035 1 : if !dontSeekWithinBlock {
1036 1 : if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
1037 1 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
1038 1 : i.exhaustedBounds = -1
1039 1 : return nil, base.LazyValue{}
1040 1 : }
1041 1 : return ikey, val
1042 : }
1043 : }
1044 : // The index contains separator keys which may lie between
1045 : // user-keys. Consider the user-keys:
1046 : //
1047 : // complete
1048 : // ---- new block ---
1049 : // complexion
1050 : //
1051 : // If these two keys end one block and start the next, the index key may
1052 : // be chosen as "compleu". The SeekGE in the index block will then point
1053 : // us to the block containing "complexion". If this happens, we want the
1054 : // last key from the previous data block.
1055 1 : return i.maybeVerifyKey(i.skipBackward())
1056 : }
1057 :
1058 : // First implements internalIterator.First, as documented in the pebble
1059 : // package. Note that First only checks the upper bound. It is up to the caller
1060 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
1061 : // call to SeekGE(lower)).
1062 1 : func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
1063 1 : // If the iterator was created on a virtual sstable, we will SeekGE to the
1064 1 : // lower bound instead of using First, because First does not respect
1065 1 : // bounds.
1066 1 : if i.vState != nil {
1067 1 : return i.SeekGE(i.lower, base.SeekGEFlagsNone)
1068 1 : }
1069 :
1070 1 : if i.lower != nil {
1071 0 : panic("singleLevelIterator.First() used despite lower bound")
1072 : }
1073 1 : i.positionedUsingLatestBounds = true
1074 1 : i.maybeFilteredKeysSingleLevel = false
1075 1 :
1076 1 : return i.firstInternal()
1077 : }
1078 :
1079 : // firstInternal is a helper used for absolute positioning in a single-level
1080 : // index file, or for positioning in the second-level index in a two-level
1081 : // index file. For the latter, one cannot make any claims about absolute
1082 : // positioning.
1083 1 : func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
1084 1 : i.exhaustedBounds = 0
1085 1 : i.err = nil // clear cached iteration error
1086 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1087 1 : i.boundsCmp = 0
1088 1 :
1089 1 : var ikey *InternalKey
1090 1 : if ikey, _ = i.index.First(); ikey == nil {
1091 0 : i.data.invalidate()
1092 0 : return nil, base.LazyValue{}
1093 0 : }
1094 1 : result := i.loadBlock(+1)
1095 1 : if result == loadBlockFailed {
1096 1 : return nil, base.LazyValue{}
1097 1 : }
1098 1 : if result == loadBlockOK {
1099 1 : if ikey, val := i.data.First(); ikey != nil {
1100 1 : if i.blockUpper != nil {
1101 1 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
1102 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1103 1 : i.exhaustedBounds = +1
1104 1 : return nil, base.LazyValue{}
1105 1 : }
1106 : }
1107 1 : return ikey, val
1108 : }
1109 : // Else fall through to skipForward.
1110 1 : } else {
1111 1 : // result == loadBlockIrrelevant. Enforce the upper bound here since
1112 1 : // don't want to bother moving to the next block if upper bound is
1113 1 : // already exceeded. Note that the next block starts with keys >=
1114 1 : // ikey.UserKey since even though this is the block separator, the
1115 1 : // same user key can span multiple blocks. If upper is exclusive we
1116 1 : // use >= below, else we use >.
1117 1 : if i.upper != nil {
1118 1 : cmp := i.cmp(ikey.UserKey, i.upper)
1119 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1120 0 : i.exhaustedBounds = +1
1121 0 : return nil, base.LazyValue{}
1122 0 : }
1123 : }
1124 : // Else fall through to skipForward.
1125 : }
1126 :
1127 1 : return i.skipForward()
1128 : }
1129 :
1130 : // Last implements internalIterator.Last, as documented in the pebble
1131 : // package. Note that Last only checks the lower bound. It is up to the caller
1132 : // to ensure that key is less than the upper bound (e.g. via a call to
1133 : // SeekLT(upper))
1134 1 : func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
1135 1 : if i.vState != nil {
1136 1 : return i.virtualLast()
1137 1 : }
1138 :
1139 1 : if i.upper != nil {
1140 0 : panic("singleLevelIterator.Last() used despite upper bound")
1141 : }
1142 1 : i.positionedUsingLatestBounds = true
1143 1 : i.maybeFilteredKeysSingleLevel = false
1144 1 : return i.lastInternal()
1145 : }
1146 :
1147 : // lastInternal is a helper used for absolute positioning in a single-level
1148 : // index file, or for positioning in the second-level index in a two-level
1149 : // index file. For the latter, one cannot make any claims about absolute
1150 : // positioning.
1151 1 : func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
1152 1 : i.exhaustedBounds = 0
1153 1 : i.err = nil // clear cached iteration error
1154 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1155 1 : i.boundsCmp = 0
1156 1 :
1157 1 : var ikey *InternalKey
1158 1 : if ikey, _ = i.index.Last(); ikey == nil {
1159 0 : i.data.invalidate()
1160 0 : return nil, base.LazyValue{}
1161 0 : }
1162 1 : result := i.loadBlock(-1)
1163 1 : if result == loadBlockFailed {
1164 1 : return nil, base.LazyValue{}
1165 1 : }
1166 1 : if result == loadBlockOK {
1167 1 : if ikey, val := i.data.Last(); ikey != nil {
1168 1 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
1169 1 : i.exhaustedBounds = -1
1170 1 : return nil, base.LazyValue{}
1171 1 : }
1172 1 : return ikey, val
1173 : }
1174 : // Else fall through to skipBackward.
1175 1 : } else {
1176 1 : // result == loadBlockIrrelevant. Enforce the lower bound here since
1177 1 : // don't want to bother moving to the previous block if lower bound is
1178 1 : // already exceeded. Note that the previous block starts with keys <=
1179 1 : // key.UserKey since even though this is the current block's
1180 1 : // separator, the same user key can span multiple blocks.
1181 1 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
1182 0 : i.exhaustedBounds = -1
1183 0 : return nil, base.LazyValue{}
1184 0 : }
1185 : }
1186 :
1187 1 : return i.skipBackward()
1188 : }
1189 :
1190 : // Next implements internalIterator.Next, as documented in the pebble
1191 : // package.
1192 : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
1193 : // due to performance. Keep the two in sync.
1194 1 : func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
1195 1 : if i.exhaustedBounds == +1 {
1196 0 : panic("Next called even though exhausted upper bound")
1197 : }
1198 1 : i.exhaustedBounds = 0
1199 1 : i.maybeFilteredKeysSingleLevel = false
1200 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1201 1 : i.boundsCmp = 0
1202 1 :
1203 1 : if i.err != nil {
1204 1 : // TODO(jackson): Can this case be turned into a panic? Once an error is
1205 1 : // encountered, the iterator must be re-seeked.
1206 1 : return nil, base.LazyValue{}
1207 1 : }
1208 1 : if key, val := i.data.Next(); key != nil {
1209 1 : if i.blockUpper != nil {
1210 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1211 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1212 1 : i.exhaustedBounds = +1
1213 1 : return nil, base.LazyValue{}
1214 1 : }
1215 : }
1216 1 : return key, val
1217 : }
1218 1 : return i.skipForward()
1219 : }
1220 :
1221 : // NextPrefix implements (base.InternalIterator).NextPrefix.
1222 1 : func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
1223 1 : if i.exhaustedBounds == +1 {
1224 0 : panic("NextPrefix called even though exhausted upper bound")
1225 : }
1226 1 : i.exhaustedBounds = 0
1227 1 : i.maybeFilteredKeysSingleLevel = false
1228 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1229 1 : i.boundsCmp = 0
1230 1 : if i.err != nil {
1231 0 : // TODO(jackson): Can this case be turned into a panic? Once an error is
1232 0 : // encountered, the iterator must be re-seeked.
1233 0 : return nil, base.LazyValue{}
1234 0 : }
1235 1 : if key, val := i.data.NextPrefix(succKey); key != nil {
1236 1 : if i.blockUpper != nil {
1237 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1238 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1239 1 : i.exhaustedBounds = +1
1240 1 : return nil, base.LazyValue{}
1241 1 : }
1242 : }
1243 1 : return key, val
1244 : }
1245 : // Did not find prefix in the existing data block. This is the slow-path
1246 : // where we effectively seek the iterator.
1247 1 : var ikey *InternalKey
1248 1 : // The key is likely to be in the next data block, so try one step.
1249 1 : if ikey, _ = i.index.Next(); ikey == nil {
1250 1 : // The target key is greater than any key in the index block.
1251 1 : // Invalidate the block iterator so that a subsequent call to Prev()
1252 1 : // will return the last key in the table.
1253 1 : i.data.invalidate()
1254 1 : return nil, base.LazyValue{}
1255 1 : }
1256 1 : if i.cmp(succKey, ikey.UserKey) > 0 {
1257 1 : // Not in the next data block, so seek the index.
1258 1 : if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
1259 1 : // The target key is greater than any key in the index block.
1260 1 : // Invalidate the block iterator so that a subsequent call to Prev()
1261 1 : // will return the last key in the table.
1262 1 : i.data.invalidate()
1263 1 : return nil, base.LazyValue{}
1264 1 : }
1265 : }
1266 1 : result := i.loadBlock(+1)
1267 1 : if result == loadBlockFailed {
1268 1 : return nil, base.LazyValue{}
1269 1 : }
1270 1 : if result == loadBlockIrrelevant {
1271 0 : // Enforce the upper bound here since don't want to bother moving
1272 0 : // to the next block if upper bound is already exceeded. Note that
1273 0 : // the next block starts with keys >= ikey.UserKey since even
1274 0 : // though this is the block separator, the same user key can span
1275 0 : // multiple blocks. If upper is exclusive we use >= below, else we use
1276 0 : // >.
1277 0 : if i.upper != nil {
1278 0 : cmp := i.cmp(ikey.UserKey, i.upper)
1279 0 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1280 0 : i.exhaustedBounds = +1
1281 0 : return nil, base.LazyValue{}
1282 0 : }
1283 : }
1284 1 : } else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
1285 1 : if i.blockUpper != nil {
1286 0 : cmp := i.cmp(key.UserKey, i.blockUpper)
1287 0 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1288 0 : i.exhaustedBounds = +1
1289 0 : return nil, base.LazyValue{}
1290 0 : }
1291 : }
1292 1 : return i.maybeVerifyKey(key, val)
1293 : }
1294 :
1295 1 : return i.skipForward()
1296 : }
1297 :
1298 : // Prev implements internalIterator.Prev, as documented in the pebble
1299 : // package.
1300 1 : func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
1301 1 : if i.exhaustedBounds == -1 {
1302 0 : panic("Prev called even though exhausted lower bound")
1303 : }
1304 1 : i.exhaustedBounds = 0
1305 1 : i.maybeFilteredKeysSingleLevel = false
1306 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1307 1 : i.boundsCmp = 0
1308 1 :
1309 1 : if i.err != nil {
1310 1 : return nil, base.LazyValue{}
1311 1 : }
1312 1 : if key, val := i.data.Prev(); key != nil {
1313 1 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1314 1 : i.exhaustedBounds = -1
1315 1 : return nil, base.LazyValue{}
1316 1 : }
1317 1 : return key, val
1318 : }
1319 1 : return i.skipBackward()
1320 : }
1321 :
1322 1 : func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
1323 1 : for {
1324 1 : var key *InternalKey
1325 1 : if key, _ = i.index.Next(); key == nil {
1326 1 : i.data.invalidate()
1327 1 : break
1328 : }
1329 1 : result := i.loadBlock(+1)
1330 1 : if result != loadBlockOK {
1331 1 : if i.err != nil {
1332 1 : break
1333 : }
1334 1 : if result == loadBlockFailed {
1335 0 : // We checked that i.index was at a valid entry, so
1336 0 : // loadBlockFailed could not have happened due to to i.index
1337 0 : // being exhausted, and must be due to an error.
1338 0 : panic("loadBlock should not have failed with no error")
1339 : }
1340 : // result == loadBlockIrrelevant. Enforce the upper bound here
1341 : // since don't want to bother moving to the next block if upper
1342 : // bound is already exceeded. Note that the next block starts with
1343 : // keys >= key.UserKey since even though this is the block
1344 : // separator, the same user key can span multiple blocks. If upper
1345 : // is exclusive we use >= below, else we use >.
1346 1 : if i.upper != nil {
1347 1 : cmp := i.cmp(key.UserKey, i.upper)
1348 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1349 1 : i.exhaustedBounds = +1
1350 1 : return nil, base.LazyValue{}
1351 1 : }
1352 : }
1353 1 : continue
1354 : }
1355 1 : if key, val := i.data.First(); key != nil {
1356 1 : if i.blockUpper != nil {
1357 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1358 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1359 1 : i.exhaustedBounds = +1
1360 1 : return nil, base.LazyValue{}
1361 1 : }
1362 : }
1363 1 : return i.maybeVerifyKey(key, val)
1364 : }
1365 : }
1366 1 : return nil, base.LazyValue{}
1367 : }
1368 :
1369 1 : func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
1370 1 : for {
1371 1 : var key *InternalKey
1372 1 : if key, _ = i.index.Prev(); key == nil {
1373 1 : i.data.invalidate()
1374 1 : break
1375 : }
1376 1 : result := i.loadBlock(-1)
1377 1 : if result != loadBlockOK {
1378 1 : if i.err != nil {
1379 1 : break
1380 : }
1381 1 : if result == loadBlockFailed {
1382 0 : // We checked that i.index was at a valid entry, so
1383 0 : // loadBlockFailed could not have happened due to to i.index
1384 0 : // being exhausted, and must be due to an error.
1385 0 : panic("loadBlock should not have failed with no error")
1386 : }
1387 : // result == loadBlockIrrelevant. Enforce the lower bound here
1388 : // since don't want to bother moving to the previous block if lower
1389 : // bound is already exceeded. Note that the previous block starts with
1390 : // keys <= key.UserKey since even though this is the current block's
1391 : // separator, the same user key can span multiple blocks.
1392 1 : if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
1393 0 : i.exhaustedBounds = -1
1394 0 : return nil, base.LazyValue{}
1395 0 : }
1396 1 : continue
1397 : }
1398 1 : key, val := i.data.Last()
1399 1 : if key == nil {
1400 1 : return nil, base.LazyValue{}
1401 1 : }
1402 1 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1403 1 : i.exhaustedBounds = -1
1404 1 : return nil, base.LazyValue{}
1405 1 : }
1406 1 : return i.maybeVerifyKey(key, val)
1407 : }
1408 1 : return nil, base.LazyValue{}
1409 : }
1410 :
1411 : // Error implements internalIterator.Error, as documented in the pebble
1412 : // package.
1413 1 : func (i *singleLevelIterator) Error() error {
1414 1 : if err := i.data.Error(); err != nil {
1415 0 : return err
1416 0 : }
1417 1 : return i.err
1418 : }
1419 :
1420 : // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
1421 : // whether or not the last positioning method may have skipped any keys due to
1422 : // block-property filters.
1423 1 : func (i *singleLevelIterator) MaybeFilteredKeys() bool {
1424 1 : return i.maybeFilteredKeysSingleLevel
1425 1 : }
1426 :
1427 : // SetCloseHook sets a function that will be called when the iterator is
1428 : // closed.
1429 1 : func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
1430 1 : i.closeHook = fn
1431 1 : }
1432 :
1433 1 : func firstError(err0, err1 error) error {
1434 1 : if err0 != nil {
1435 1 : return err0
1436 1 : }
1437 1 : return err1
1438 : }
1439 :
1440 : // Close implements internalIterator.Close, as documented in the pebble
1441 : // package.
1442 1 : func (i *singleLevelIterator) Close() error {
1443 1 : i.iterStats.close()
1444 1 : var err error
1445 1 : if i.closeHook != nil {
1446 1 : err = firstError(err, i.closeHook(i))
1447 1 : }
1448 1 : err = firstError(err, i.data.Close())
1449 1 : err = firstError(err, i.index.Close())
1450 1 : if i.dataRH != nil {
1451 1 : err = firstError(err, i.dataRH.Close())
1452 1 : i.dataRH = nil
1453 1 : }
1454 1 : err = firstError(err, i.err)
1455 1 : if i.bpfs != nil {
1456 1 : releaseBlockPropertiesFilterer(i.bpfs)
1457 1 : }
1458 1 : if i.vbReader != nil {
1459 1 : i.vbReader.close()
1460 1 : }
1461 1 : if i.vbRH != nil {
1462 1 : err = firstError(err, i.vbRH.Close())
1463 1 : i.vbRH = nil
1464 1 : }
1465 1 : *i = i.resetForReuse()
1466 1 : singleLevelIterPool.Put(i)
1467 1 : return err
1468 : }
1469 :
1470 1 : func (i *singleLevelIterator) String() string {
1471 1 : if i.vState != nil {
1472 1 : return i.vState.fileNum.String()
1473 1 : }
1474 1 : return i.reader.fileNum.String()
1475 : }
|