Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "unsafe"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/objstorage"
15 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
16 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
17 : )
18 :
19 : // singleLevelIterator iterates over an entire table of data. To seek for a given
20 : // key, it first looks in the index for the block that contains that key, and then
21 : // looks inside that block.
22 : type singleLevelIterator struct {
23 : ctx context.Context
24 : cmp Compare
25 : // Global lower/upper bound for the iterator.
26 : lower []byte
27 : upper []byte
28 : bpfs *BlockPropertiesFilterer
29 : // Per-block lower/upper bound. Nil if the bound does not apply to the block
30 : // because we determined the block lies completely within the bound.
31 : blockLower []byte
32 : blockUpper []byte
33 : reader *Reader
34 : // vState will be set iff the iterator is constructed for virtual sstable
35 : // iteration.
36 : vState *virtualState
37 : // endKeyInclusive is set to force the iterator to treat the upper field as
38 : // inclusive while iterating instead of exclusive.
39 : endKeyInclusive bool
40 : index blockIter
41 : data blockIter
42 : dataRH objstorage.ReadHandle
43 : dataRHPrealloc objstorageprovider.PreallocatedReadHandle
44 : // dataBH refers to the last data block that the iterator considered
45 : // loading. It may not actually have loaded the block, due to an error or
46 : // because it was considered irrelevant.
47 : dataBH BlockHandle
48 : vbReader *valueBlockReader
49 : // vbRH is the read handle for value blocks, which are in a different
50 : // part of the sstable than data blocks.
51 : vbRH objstorage.ReadHandle
52 : vbRHPrealloc objstorageprovider.PreallocatedReadHandle
53 : err error
54 : closeHook func(i Iterator) error
55 : // stats and iterStats are slightly different. stats is a shared struct
56 : // supplied from the outside, and represents stats for the whole iterator
57 : // tree and can be reset from the outside (e.g. when the pebble.Iterator is
58 : // being reused). It is currently only provided when the iterator tree is
59 : // rooted at pebble.Iterator. iterStats is this sstable iterator's private
60 : // stats that are reported to a CategoryStatsCollector when this iterator is
61 : // closed. More paths are instrumented with this as the
62 : // CategoryStatsCollector needed for this is provided by the
63 : // tableCacheContainer (which is more universally used).
64 : stats *base.InternalIteratorStats
65 : iterStats iterStatsAccumulator
66 : bufferPool *BufferPool
67 :
68 : // boundsCmp and positionedUsingLatestBounds are for optimizing iteration
69 : // that uses multiple adjacent bounds. The seek after setting a new bound
70 : // can use the fact that the iterator is either within the previous bounds
71 : // or exactly one key before or after the bounds. If the new bounds is
72 : // after/before the previous bounds, and we are already positioned at a
73 : // block that is relevant for the new bounds, we can try to first position
74 : // using Next/Prev (repeatedly) instead of doing a more expensive seek.
75 : //
76 : // When there are wide files at higher levels that match the bounds
77 : // but don't have any data for the bound, we will already be
78 : // positioned at the key beyond the bounds and won't need to do much
79 : // work -- given that most data is in L6, such files are likely to
80 : // dominate the performance of the mergingIter, and may be the main
81 : // benefit of this performance optimization (of course it also helps
82 : // when the file that has the data has successive seeks that stay in
83 : // the same block).
84 : //
85 : // Specifically, boundsCmp captures the relationship between the previous
86 : // and current bounds, if the iterator had been positioned after setting
87 : // the previous bounds. If it was not positioned, i.e., Seek/First/Last
88 : // were not called, we don't know where it is positioned and cannot
89 : // optimize.
90 : //
91 : // Example: Bounds moving forward, and iterator exhausted in forward direction.
92 : // bounds = [f, h), ^ shows block iterator position
93 : // file contents [ a b c d e f g h i j k ]
94 : // ^
95 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
96 : // set to +1. SeekGE(j) can use next (the optimization also requires that j
97 : // is within the block, but that is not for correctness, but to limit the
98 : // optimization to when it will actually be an optimization).
99 : //
100 : // Example: Bounds moving forward.
101 : // bounds = [f, h), ^ shows block iterator position
102 : // file contents [ a b c d e f g h i j k ]
103 : // ^
104 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
105 : // set to +1. SeekGE(j) can use next.
106 : //
107 : // Example: Bounds moving forward, but iterator not positioned using previous
108 : // bounds.
109 : // bounds = [f, h), ^ shows block iterator position
110 : // file contents [ a b c d e f g h i j k ]
111 : // ^
112 : // new bounds = [i, j). Iterator is at j since it was never positioned using
113 : // [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
114 : // SeekGE(i) will not use next.
115 : //
116 : // Example: Bounds moving forward and sparse file
117 : // bounds = [f, h), ^ shows block iterator position
118 : // file contents [ a z ]
119 : // ^
120 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
121 : // set to +1. SeekGE(j) notices that the iterator is already past j and does
122 : // not need to do anything.
123 : //
124 : // Similar examples can be constructed for backward iteration.
125 : //
126 : // This notion of exactly one key before or after the bounds is not quite
127 : // true when block properties are used to ignore blocks. In that case we
128 : // can't stop precisely at the first block that is past the bounds since
129 : // we are using the index entries to enforce the bounds.
130 : //
131 : // e.g. 3 blocks with keys [b, c] [f, g], [i, j, k] with index entries d,
132 : // h, l. And let the lower bound be k, and we are reverse iterating. If
133 : // the block [i, j, k] is ignored due to the block interval annotations we
134 : // do need to move the index to block [f, g] since the index entry for the
135 : // [i, j, k] block is l which is not less than the lower bound of k. So we
136 : // have passed the entries i, j.
137 : //
138 : // This behavior is harmless since the block property filters are fixed
139 : // for the lifetime of the iterator so i, j are irrelevant. In addition,
140 : // the current code will not load the [f, g] block, so the seek
141 : // optimization that attempts to use Next/Prev do not apply anyway.
142 : boundsCmp int
143 : positionedUsingLatestBounds bool
144 :
145 : // exhaustedBounds represents whether the iterator is exhausted for
146 : // iteration by reaching the upper or lower bound. +1 when exhausted
147 : // the upper bound, -1 when exhausted the lower bound, and 0 when
148 : // neither. exhaustedBounds is also used for the TrySeekUsingNext
149 : // optimization in twoLevelIterator and singleLevelIterator. Care should be
150 : // taken in setting this in twoLevelIterator before calling into
151 : // singleLevelIterator, given that these two iterators share this field.
152 : exhaustedBounds int8
153 :
154 : // maybeFilteredKeysSingleLevel indicates whether the last iterator
155 : // positioning operation may have skipped any data blocks due to
156 : // block-property filters when positioning the index.
157 : maybeFilteredKeysSingleLevel bool
158 :
159 : // useFilter specifies whether the filter block in this sstable, if present,
160 : // should be used for prefix seeks or not. In some cases it is beneficial
161 : // to skip a filter block even if it exists (eg. if probability of a match
162 : // is high).
163 : useFilter bool
164 : lastBloomFilterMatched bool
165 :
166 : hideObsoletePoints bool
167 : }
168 :
169 : // singleLevelIterator implements the base.InternalIterator interface.
170 : var _ base.InternalIterator = (*singleLevelIterator)(nil)
171 :
172 : // init initializes a singleLevelIterator for reading from the table. It is
173 : // synonmous with Reader.NewIter, but allows for reusing of the iterator
174 : // between different Readers.
175 : //
176 : // Note that lower, upper passed into init has nothing to do with virtual sstable
177 : // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
178 : // will be enforced.
179 : func (i *singleLevelIterator) init(
180 : ctx context.Context,
181 : r *Reader,
182 : v *virtualState,
183 : lower, upper []byte,
184 : filterer *BlockPropertiesFilterer,
185 : useFilter, hideObsoletePoints bool,
186 : stats *base.InternalIteratorStats,
187 : categoryAndQoS CategoryAndQoS,
188 : statsCollector *CategoryStatsCollector,
189 : rp ReaderProvider,
190 : bufferPool *BufferPool,
191 1 : ) error {
192 1 : if r.err != nil {
193 0 : return r.err
194 0 : }
195 1 : i.iterStats.init(categoryAndQoS, statsCollector)
196 1 : indexH, err := r.readIndex(ctx, stats, &i.iterStats)
197 1 : if err != nil {
198 0 : return err
199 0 : }
200 1 : if v != nil {
201 1 : i.vState = v
202 1 : i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
203 1 : }
204 :
205 1 : i.ctx = ctx
206 1 : i.lower = lower
207 1 : i.upper = upper
208 1 : i.bpfs = filterer
209 1 : i.useFilter = useFilter
210 1 : i.reader = r
211 1 : i.cmp = r.Compare
212 1 : i.stats = stats
213 1 : i.hideObsoletePoints = hideObsoletePoints
214 1 : i.bufferPool = bufferPool
215 1 : err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
216 1 : if err != nil {
217 0 : // blockIter.Close releases indexH and always returns a nil error
218 0 : _ = i.index.Close()
219 0 : return err
220 0 : }
221 1 : i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
222 1 : if r.tableFormat >= TableFormatPebblev3 {
223 1 : if r.Properties.NumValueBlocks > 0 {
224 1 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
225 1 : // can outlive the singleLevelIterator due to be being embedded in a
226 1 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
227 1 : // should only optimize this if it shows up as significant in end-to-end
228 1 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
229 1 : // is that if many sstable iterators only get positioned at latest
230 1 : // versions of keys, and therefore never expose a LazyValue that is
231 1 : // separated to their callers, they can put this valueBlockReader into a
232 1 : // sync.Pool.
233 1 : i.vbReader = &valueBlockReader{
234 1 : bpOpen: i,
235 1 : rp: rp,
236 1 : vbih: r.valueBIH,
237 1 : stats: stats,
238 1 : }
239 1 : i.data.lazyValueHandling.vbr = i.vbReader
240 1 : i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
241 1 : }
242 1 : i.data.lazyValueHandling.hasValuePrefix = true
243 : }
244 1 : return nil
245 : }
246 :
247 : // Helper function to check if keys returned from iterator are within global and virtual bounds.
248 : func (i *singleLevelIterator) maybeVerifyKey(
249 : iKey *InternalKey, val base.LazyValue,
250 1 : ) (*InternalKey, base.LazyValue) {
251 1 : // maybeVerify key is only used for virtual sstable iterators.
252 1 : if invariants.Enabled && i.vState != nil && iKey != nil {
253 1 : key := iKey.UserKey
254 1 :
255 1 : uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
256 1 : lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
257 1 :
258 1 : if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
259 0 : panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
260 : }
261 : }
262 1 : return iKey, val
263 : }
264 :
265 : // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
266 : // Currently, it skips readahead ramp-up. It should be called after init is called.
267 1 : func (i *singleLevelIterator) setupForCompaction() {
268 1 : i.dataRH.SetupForCompaction()
269 1 : if i.vbRH != nil {
270 1 : i.vbRH.SetupForCompaction()
271 1 : }
272 : }
273 :
274 1 : func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
275 1 : return singleLevelIterator{
276 1 : index: i.index.resetForReuse(),
277 1 : data: i.data.resetForReuse(),
278 1 : }
279 1 : }
280 :
281 1 : func (i *singleLevelIterator) initBounds() {
282 1 : // Trim the iteration bounds for the current block. We don't have to check
283 1 : // the bounds on each iteration if the block is entirely contained within the
284 1 : // iteration bounds.
285 1 : i.blockLower = i.lower
286 1 : if i.blockLower != nil {
287 1 : key, _ := i.data.First()
288 1 : if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
289 1 : // The lower-bound is less than the first key in the block. No need
290 1 : // to check the lower-bound again for this block.
291 1 : i.blockLower = nil
292 1 : }
293 : }
294 1 : i.blockUpper = i.upper
295 1 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
296 1 : // The upper-bound is greater than the index key which itself is greater
297 1 : // than or equal to every key in the block. No need to check the
298 1 : // upper-bound again for this block. Even if blockUpper is inclusive
299 1 : // because of upper being inclusive, we can still safely set blockUpper
300 1 : // to nil here.
301 1 : //
302 1 : // TODO(bananabrick): We could also set blockUpper to nil for the >=
303 1 : // case, if blockUpper is inclusive.
304 1 : i.blockUpper = nil
305 1 : }
306 : }
307 :
308 : // Deterministic disabling of the bounds-based optimization that avoids seeking.
309 : // Uses the iterator pointer, since we want diversity in iterator behavior for
310 : // the same SetBounds call. Used for tests.
311 1 : func disableBoundsOpt(bound []byte, ptr uintptr) bool {
312 1 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
313 1 : simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
314 1 : return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
315 1 : }
316 :
317 : // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
318 : // optimizations performed by disableBoundsOpt for tests that require
319 : // deterministic iterator behavior. Some unit tests examine internal iterator
320 : // state and require this behavior to be deterministic.
321 : var ensureBoundsOptDeterminism bool
322 :
323 : // SetBounds implements internalIterator.SetBounds, as documented in the pebble
324 : // package. Note that the upper field is exclusive.
325 1 : func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
326 1 : i.boundsCmp = 0
327 1 : if i.vState != nil {
328 1 : // If the reader is constructed for a virtual sstable, then we must
329 1 : // constrain the bounds of the reader. For physical sstables, the bounds
330 1 : // can be wider than the actual sstable's bounds because we won't
331 1 : // accidentally expose additional keys as there are no additional keys.
332 1 : i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
333 1 : lower, upper, false,
334 1 : )
335 1 : } else {
336 1 : // TODO(bananabrick): Figure out the logic here to enable the boundsCmp
337 1 : // optimization for virtual sstables.
338 1 : if i.positionedUsingLatestBounds {
339 1 : if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
340 1 : i.boundsCmp = +1
341 1 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
342 1 : disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
343 1 : i.boundsCmp = 0
344 1 : }
345 1 : } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
346 1 : i.boundsCmp = -1
347 1 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
348 1 : disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
349 1 : i.boundsCmp = 0
350 1 : }
351 : }
352 : }
353 : }
354 :
355 1 : i.positionedUsingLatestBounds = false
356 1 : i.lower = lower
357 1 : i.upper = upper
358 1 : i.blockLower = nil
359 1 : i.blockUpper = nil
360 : }
361 :
362 0 : func (i *singleLevelIterator) SetContext(ctx context.Context) {
363 0 : i.ctx = ctx
364 0 : }
365 :
366 : // loadBlock loads the block at the current index position and leaves i.data
367 : // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
368 : // may be nil if we have simply exhausted the entire table.
369 1 : func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
370 1 : if !i.index.valid() {
371 0 : // Ensure the data block iterator is invalidated even if loading of the block
372 0 : // fails.
373 0 : i.data.invalidate()
374 0 : return loadBlockFailed
375 0 : }
376 : // Load the next block.
377 1 : v := i.index.value()
378 1 : bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
379 1 : if i.dataBH == bhp.BlockHandle && i.data.valid() {
380 1 : // We're already at the data block we want to load. Reset bounds in case
381 1 : // they changed since the last seek, but don't reload the block from cache
382 1 : // or disk.
383 1 : //
384 1 : // It's safe to leave i.data in its original state here, as all callers to
385 1 : // loadBlock make an absolute positioning call (i.e. a seek, first, or last)
386 1 : // to `i.data` right after loadBlock returns loadBlockOK.
387 1 : i.initBounds()
388 1 : return loadBlockOK
389 1 : }
390 : // Ensure the data block iterator is invalidated even if loading of the block
391 : // fails.
392 1 : i.data.invalidate()
393 1 : i.dataBH = bhp.BlockHandle
394 1 : if err != nil {
395 0 : i.err = errCorruptIndexEntry
396 0 : return loadBlockFailed
397 0 : }
398 1 : if i.bpfs != nil {
399 1 : intersects, err := i.bpfs.intersects(bhp.Props)
400 1 : if err != nil {
401 0 : i.err = errCorruptIndexEntry
402 0 : return loadBlockFailed
403 0 : }
404 1 : if intersects == blockMaybeExcluded {
405 1 : intersects = i.resolveMaybeExcluded(dir)
406 1 : }
407 1 : if intersects == blockExcluded {
408 1 : i.maybeFilteredKeysSingleLevel = true
409 1 : return loadBlockIrrelevant
410 1 : }
411 : // blockIntersects
412 : }
413 1 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
414 1 : block, err := i.reader.readBlock(
415 1 : ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, &i.iterStats, i.bufferPool)
416 1 : if err != nil {
417 0 : i.err = err
418 0 : return loadBlockFailed
419 0 : }
420 1 : i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
421 1 : if i.err != nil {
422 0 : // The block is partially loaded, and we don't want it to appear valid.
423 0 : i.data.invalidate()
424 0 : return loadBlockFailed
425 0 : }
426 1 : i.initBounds()
427 1 : return loadBlockOK
428 : }
429 :
430 : // readBlockForVBR implements the blockProviderWhenOpen interface for use by
431 : // the valueBlockReader.
432 : func (i *singleLevelIterator) readBlockForVBR(
433 : h BlockHandle, stats *base.InternalIteratorStats,
434 1 : ) (bufferHandle, error) {
435 1 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.ValueBlock)
436 1 : return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, &i.iterStats, i.bufferPool)
437 1 : }
438 :
439 : // resolveMaybeExcluded is invoked when the block-property filterer has found
440 : // that a block is excluded according to its properties but only if its bounds
441 : // fall within the filter's current bounds. This function consults the
442 : // apprioriate bound, depending on the iteration direction, and returns either
443 : // `blockIntersects` or `blockMaybeExcluded`.
444 1 : func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
445 1 : // TODO(jackson): We could first try comparing to top-level index block's
446 1 : // key, and if within bounds avoid per-data block key comparisons.
447 1 :
448 1 : // This iterator is configured with a bound-limited block property
449 1 : // filter. The bpf determined this block could be excluded from
450 1 : // iteration based on the property encoded in the block handle.
451 1 : // However, we still need to determine if the block is wholly
452 1 : // contained within the filter's key bounds.
453 1 : //
454 1 : // External guarantees ensure all the block's keys are ≥ the
455 1 : // filter's lower bound during forward iteration, and that all the
456 1 : // block's keys are < the filter's upper bound during backward
457 1 : // iteration. We only need to determine if the opposite bound is
458 1 : // also met.
459 1 : //
460 1 : // The index separator in index.Key() provides an inclusive
461 1 : // upper-bound for the data block's keys, guaranteeing that all its
462 1 : // keys are ≤ index.Key(). For forward iteration, this is all we
463 1 : // need.
464 1 : if dir > 0 {
465 1 : // Forward iteration.
466 1 : if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
467 1 : return blockExcluded
468 1 : }
469 1 : return blockIntersects
470 : }
471 :
472 : // Reverse iteration.
473 : //
474 : // Because we're iterating in the reverse direction, we don't yet have
475 : // enough context available to determine if the block is wholly contained
476 : // within its bounds. This case arises only during backward iteration,
477 : // because of the way the index is structured.
478 : //
479 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
480 : // block with separator `c`. During reverse iteration, the guarantee that
481 : // all the block's keys are < `d` is externally provided, but no guarantee
482 : // is made on the bpf's lower bound. The separator `c` only provides an
483 : // inclusive upper bound on the block's keys, indicating that the
484 : // corresponding block handle points to a block containing only keys ≤ `c`.
485 : //
486 : // To establish a lower bound, we step the index backwards to read the
487 : // previous block's separator, which provides an inclusive lower bound on
488 : // the original block's keys. Afterwards, we step forward to restore our
489 : // index position.
490 1 : if peekKey, _ := i.index.Prev(); peekKey == nil {
491 1 : // The original block points to the first block of this index block. If
492 1 : // there's a two-level index, it could potentially provide a lower
493 1 : // bound, but the code refactoring necessary to read it doesn't seem
494 1 : // worth the payoff. We fall through to loading the block.
495 1 : } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
496 1 : // The lower-bound on the original block falls within the filter's
497 1 : // bounds, and we can skip the block (after restoring our current index
498 1 : // position).
499 1 : _, _ = i.index.Next()
500 1 : return blockExcluded
501 1 : }
502 1 : _, _ = i.index.Next()
503 1 : return blockIntersects
504 : }
505 :
506 1 : func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
507 1 : if i.data.getFirstUserKey() == nil {
508 0 : panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
509 : }
510 1 : i.blockLower = i.lower
511 1 : if i.blockLower != nil {
512 1 : firstUserKey := i.data.getFirstUserKey()
513 1 : if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
514 1 : // The lower-bound is less than the first key in the block. No need
515 1 : // to check the lower-bound again for this block.
516 1 : i.blockLower = nil
517 1 : }
518 : }
519 1 : i.blockUpper = i.upper
520 1 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
521 1 : // The upper-bound is greater than the index key which itself is greater
522 1 : // than or equal to every key in the block. No need to check the
523 1 : // upper-bound again for this block.
524 1 : i.blockUpper = nil
525 1 : }
526 : }
527 :
528 : // The number of times to call Next/Prev in a block before giving up and seeking.
529 : // The value of 4 is arbitrary.
530 : // TODO(sumeer): experiment with dynamic adjustment based on the history of
531 : // seeks for a particular iterator.
532 : const numStepsBeforeSeek = 4
533 :
534 : func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
535 : key []byte,
536 1 : ) (k *InternalKey, v base.LazyValue, done bool) {
537 1 : k, v = i.data.Key(), i.data.value()
538 1 : for j := 0; j < numStepsBeforeSeek; j++ {
539 1 : curKeyCmp := i.cmp(k.UserKey, key)
540 1 : if curKeyCmp >= 0 {
541 1 : if i.blockUpper != nil {
542 1 : cmp := i.cmp(k.UserKey, i.blockUpper)
543 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
544 1 : i.exhaustedBounds = +1
545 1 : return nil, base.LazyValue{}, true
546 1 : }
547 : }
548 1 : return k, v, true
549 : }
550 1 : k, v = i.data.Next()
551 1 : if k == nil {
552 1 : break
553 : }
554 : }
555 1 : return k, v, false
556 : }
557 :
558 : func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
559 : key []byte,
560 1 : ) (k *InternalKey, v base.LazyValue, done bool) {
561 1 : k, v = i.data.Key(), i.data.value()
562 1 : for j := 0; j < numStepsBeforeSeek; j++ {
563 1 : curKeyCmp := i.cmp(k.UserKey, key)
564 1 : if curKeyCmp < 0 {
565 1 : if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
566 1 : i.exhaustedBounds = -1
567 1 : return nil, base.LazyValue{}, true
568 1 : }
569 1 : return k, v, true
570 : }
571 1 : k, v = i.data.Prev()
572 1 : if k == nil {
573 1 : break
574 : }
575 : }
576 1 : return k, v, false
577 : }
578 :
579 1 : func (i *singleLevelIterator) recordOffset() uint64 {
580 1 : offset := i.dataBH.Offset
581 1 : if i.data.valid() {
582 1 : // - i.dataBH.Length/len(i.data.data) is the compression ratio. If
583 1 : // uncompressed, this is 1.
584 1 : // - i.data.nextOffset is the uncompressed position of the current record
585 1 : // in the block.
586 1 : // - i.dataBH.Offset is the offset of the block in the sstable before
587 1 : // decompression.
588 1 : offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
589 1 : } else {
590 1 : // Last entry in the block must increment bytes iterated by the size of the block trailer
591 1 : // and restart points.
592 1 : offset += i.dataBH.Length + blockTrailerLen
593 1 : }
594 1 : return offset
595 : }
596 :
597 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
598 : // package. Note that SeekGE only checks the upper bound. It is up to the
599 : // caller to ensure that key is greater than or equal to the lower bound.
600 : func (i *singleLevelIterator) SeekGE(
601 : key []byte, flags base.SeekGEFlags,
602 1 : ) (*InternalKey, base.LazyValue) {
603 1 : if i.vState != nil {
604 1 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
605 1 : // have to internally restrict the bounds.
606 1 : //
607 1 : // TODO(bananabrick): We can optimize this check away for the level iter
608 1 : // if necessary.
609 1 : if i.cmp(key, i.lower) < 0 {
610 1 : key = i.lower
611 1 : }
612 : }
613 :
614 1 : if flags.TrySeekUsingNext() {
615 1 : // The i.exhaustedBounds comparison indicates that the upper bound was
616 1 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
617 1 : // exhausted.
618 1 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
619 1 : // Already exhausted, so return nil.
620 1 : return nil, base.LazyValue{}
621 1 : }
622 1 : if i.err != nil {
623 0 : // The current iterator position cannot be used.
624 0 : flags = flags.DisableTrySeekUsingNext()
625 0 : }
626 : // INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
627 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
628 : // data-exhausted and bounds-exhausted, as defined earlier, are both
629 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
630 : // before calling into seekGEHelper.
631 : }
632 :
633 1 : i.exhaustedBounds = 0
634 1 : i.err = nil // clear cached iteration error
635 1 : boundsCmp := i.boundsCmp
636 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
637 1 : i.boundsCmp = 0
638 1 : i.positionedUsingLatestBounds = true
639 1 : return i.seekGEHelper(key, boundsCmp, flags)
640 : }
641 :
642 : // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
643 : func (i *singleLevelIterator) seekGEHelper(
644 : key []byte, boundsCmp int, flags base.SeekGEFlags,
645 1 : ) (*InternalKey, base.LazyValue) {
646 1 : // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
647 1 :
648 1 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
649 1 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
650 1 : // Care must be taken to ensure that when performing these optimizations and
651 1 : // the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
652 1 : // Consider a previous SeekGE that filtered keys from k until the current
653 1 : // iterator position.
654 1 : //
655 1 : // If the previous SeekGE exhausted the iterator, it's possible keys greater
656 1 : // than or equal to the current search key were filtered. We must not reuse
657 1 : // the current iterator position without remembering the previous value of
658 1 : // maybeFilteredKeys.
659 1 :
660 1 : var dontSeekWithinBlock bool
661 1 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
662 1 : boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
663 1 : // Fast-path: The bounds have moved forward and this SeekGE is
664 1 : // respecting the lower bound (guaranteed by Iterator). We know that
665 1 : // the iterator must already be positioned within or just outside the
666 1 : // previous bounds. Therefore it cannot be positioned at a block (or
667 1 : // the position within that block) that is ahead of the seek position.
668 1 : // However it can be positioned at an earlier block. This fast-path to
669 1 : // use Next() on the block is only applied when we are already at the
670 1 : // block that the slow-path (the else-clause) would load -- this is
671 1 : // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
672 1 : // predicate.
673 1 : i.initBoundsForAlreadyLoadedBlock()
674 1 : ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
675 1 : if done {
676 1 : return ikey, val
677 1 : }
678 1 : if ikey == nil {
679 1 : // Done with this block.
680 1 : dontSeekWithinBlock = true
681 1 : }
682 1 : } else {
683 1 : // Cannot use bounds monotonicity. But may be able to optimize if
684 1 : // caller claimed externally known invariant represented by
685 1 : // flags.TrySeekUsingNext().
686 1 : if flags.TrySeekUsingNext() {
687 1 : // seekPrefixGE or SeekGE has already ensured
688 1 : // !i.data.isDataInvalidated() && i.exhaustedBounds != +1
689 1 : currKey := i.data.Key()
690 1 : value := i.data.value()
691 1 : less := i.cmp(currKey.UserKey, key) < 0
692 1 : // We could be more sophisticated and confirm that the seek
693 1 : // position is within the current block before applying this
694 1 : // optimization. But there may be some benefit even if it is in
695 1 : // the next block, since we can avoid seeking i.index.
696 1 : for j := 0; less && j < numStepsBeforeSeek; j++ {
697 1 : currKey, value = i.Next()
698 1 : if currKey == nil {
699 1 : return nil, base.LazyValue{}
700 1 : }
701 1 : less = i.cmp(currKey.UserKey, key) < 0
702 : }
703 1 : if !less {
704 1 : if i.blockUpper != nil {
705 1 : cmp := i.cmp(currKey.UserKey, i.blockUpper)
706 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
707 0 : i.exhaustedBounds = +1
708 0 : return nil, base.LazyValue{}
709 0 : }
710 : }
711 1 : return currKey, value
712 : }
713 : }
714 :
715 : // Slow-path.
716 : // Since we're re-seeking the iterator, the previous value of
717 : // maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
718 : // during seeking, loadBlock will set it to true.
719 1 : i.maybeFilteredKeysSingleLevel = false
720 1 :
721 1 : var ikey *InternalKey
722 1 : if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
723 1 : // The target key is greater than any key in the index block.
724 1 : // Invalidate the block iterator so that a subsequent call to Prev()
725 1 : // will return the last key in the table.
726 1 : i.data.invalidate()
727 1 : return nil, base.LazyValue{}
728 1 : }
729 1 : result := i.loadBlock(+1)
730 1 : if result == loadBlockFailed {
731 0 : return nil, base.LazyValue{}
732 0 : }
733 1 : if result == loadBlockIrrelevant {
734 1 : // Enforce the upper bound here since don't want to bother moving
735 1 : // to the next block if upper bound is already exceeded. Note that
736 1 : // the next block starts with keys >= ikey.UserKey since even
737 1 : // though this is the block separator, the same user key can span
738 1 : // multiple blocks. If upper is exclusive we use >= below, else
739 1 : // we use >.
740 1 : if i.upper != nil {
741 1 : cmp := i.cmp(ikey.UserKey, i.upper)
742 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
743 1 : i.exhaustedBounds = +1
744 1 : return nil, base.LazyValue{}
745 1 : }
746 : }
747 : // Want to skip to the next block.
748 1 : dontSeekWithinBlock = true
749 : }
750 : }
751 1 : if !dontSeekWithinBlock {
752 1 : if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
753 1 : if i.blockUpper != nil {
754 1 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
755 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
756 1 : i.exhaustedBounds = +1
757 1 : return nil, base.LazyValue{}
758 1 : }
759 : }
760 1 : return ikey, val
761 : }
762 : }
763 1 : return i.skipForward()
764 : }
765 :
766 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
767 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
768 : // to the caller to ensure that key is greater than or equal to the lower bound.
769 : func (i *singleLevelIterator) SeekPrefixGE(
770 : prefix, key []byte, flags base.SeekGEFlags,
771 1 : ) (*base.InternalKey, base.LazyValue) {
772 1 : if i.vState != nil {
773 1 : // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
774 1 : // we may have to internally restrict the bounds.
775 1 : //
776 1 : // TODO(bananabrick): We can optimize away this check for the level iter
777 1 : // if necessary.
778 1 : if i.cmp(key, i.lower) < 0 {
779 1 : key = i.lower
780 1 : }
781 : }
782 1 : return i.seekPrefixGE(prefix, key, flags, i.useFilter)
783 : }
784 :
785 : func (i *singleLevelIterator) seekPrefixGE(
786 : prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
787 1 : ) (k *InternalKey, value base.LazyValue) {
788 1 : // NOTE: prefix is only used for bloom filter checking and not later work in
789 1 : // this method. Hence, we can use the existing iterator position if the last
790 1 : // SeekPrefixGE did not fail bloom filter matching.
791 1 :
792 1 : err := i.err
793 1 : i.err = nil // clear cached iteration error
794 1 : if checkFilter && i.reader.tableFilter != nil {
795 1 : if !i.lastBloomFilterMatched {
796 1 : // Iterator is not positioned based on last seek.
797 1 : flags = flags.DisableTrySeekUsingNext()
798 1 : }
799 1 : i.lastBloomFilterMatched = false
800 1 : // Check prefix bloom filter.
801 1 : var dataH bufferHandle
802 1 : dataH, i.err = i.reader.readFilter(i.ctx, i.stats, &i.iterStats)
803 1 : if i.err != nil {
804 0 : i.data.invalidate()
805 0 : return nil, base.LazyValue{}
806 0 : }
807 1 : mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
808 1 : dataH.Release()
809 1 : if !mayContain {
810 1 : // This invalidation may not be necessary for correctness, and may
811 1 : // be a place to optimize later by reusing the already loaded
812 1 : // block. It was necessary in earlier versions of the code since
813 1 : // the caller was allowed to call Next when SeekPrefixGE returned
814 1 : // nil. This is no longer allowed.
815 1 : i.data.invalidate()
816 1 : return nil, base.LazyValue{}
817 1 : }
818 1 : i.lastBloomFilterMatched = true
819 : }
820 1 : if flags.TrySeekUsingNext() {
821 1 : // The i.exhaustedBounds comparison indicates that the upper bound was
822 1 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
823 1 : // exhausted.
824 1 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
825 1 : // Already exhausted, so return nil.
826 1 : return nil, base.LazyValue{}
827 1 : }
828 1 : if err != nil {
829 0 : // The current iterator position cannot be used.
830 0 : flags = flags.DisableTrySeekUsingNext()
831 0 : }
832 : // INVARIANT: flags.TrySeekUsingNext() => err == nil &&
833 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
834 : // data-exhausted and bounds-exhausted, as defined earlier, are both
835 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
836 : // before calling into seekGEHelper.
837 : }
838 : // Bloom filter matches, or skipped, so this method will position the
839 : // iterator.
840 1 : i.exhaustedBounds = 0
841 1 : boundsCmp := i.boundsCmp
842 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
843 1 : i.boundsCmp = 0
844 1 : i.positionedUsingLatestBounds = true
845 1 : k, value = i.seekGEHelper(key, boundsCmp, flags)
846 1 : return i.maybeVerifyKey(k, value)
847 : }
848 :
849 : // virtualLast should only be called if i.vReader != nil.
850 1 : func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
851 1 : if i.vState == nil {
852 0 : panic("pebble: invalid call to virtualLast")
853 : }
854 :
855 : // Seek to the first internal key.
856 1 : ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
857 1 : if i.endKeyInclusive {
858 1 : // Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
859 1 : // c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
860 1 : // c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
861 1 : // the last key with userkey == i.upper.
862 1 : //
863 1 : // TODO(bananabrick): Think about how to improve this. If many internal keys
864 1 : // with the same user key at the upper bound then this could be slow, but
865 1 : // maybe the odds of having many internal keys with the same user key at the
866 1 : // upper bound are low.
867 1 : for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
868 1 : ikey, _ = i.Next()
869 1 : }
870 1 : return i.Prev()
871 : }
872 :
873 : // We seeked to the first key >= i.upper.
874 1 : return i.Prev()
875 : }
876 :
877 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
878 : // package. Note that SeekLT only checks the lower bound. It is up to the
879 : // caller to ensure that key is less than or equal to the upper bound.
880 : func (i *singleLevelIterator) SeekLT(
881 : key []byte, flags base.SeekLTFlags,
882 1 : ) (*InternalKey, base.LazyValue) {
883 1 : if i.vState != nil {
884 1 : // Might have to fix upper bound since virtual sstable bounds are not
885 1 : // known to callers of SeekLT.
886 1 : //
887 1 : // TODO(bananabrick): We can optimize away this check for the level iter
888 1 : // if necessary.
889 1 : cmp := i.cmp(key, i.upper)
890 1 : // key == i.upper is fine. We'll do the right thing and return the
891 1 : // first internal key with user key < key.
892 1 : if cmp > 0 {
893 1 : // Return the last key in the virtual sstable.
894 1 : return i.virtualLast()
895 1 : }
896 : }
897 :
898 1 : i.exhaustedBounds = 0
899 1 : i.err = nil // clear cached iteration error
900 1 : boundsCmp := i.boundsCmp
901 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
902 1 : i.boundsCmp = 0
903 1 :
904 1 : // Seeking operations perform various step-instead-of-seeking optimizations:
905 1 : // eg by considering monotonically increasing bounds (i.boundsCmp). Care
906 1 : // must be taken to ensure that when performing these optimizations and the
907 1 : // iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
908 1 : // appropriately. Consider a previous SeekLT that filtered keys from k
909 1 : // until the current iterator position.
910 1 : //
911 1 : // If the previous SeekLT did exhausted the iterator, it's possible keys
912 1 : // less than the current search key were filtered. We must not reuse the
913 1 : // current iterator position without remembering the previous value of
914 1 : // maybeFilteredKeysSingleLevel.
915 1 :
916 1 : i.positionedUsingLatestBounds = true
917 1 :
918 1 : var dontSeekWithinBlock bool
919 1 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
920 1 : boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
921 1 : // Fast-path: The bounds have moved backward, and this SeekLT is
922 1 : // respecting the upper bound (guaranteed by Iterator). We know that
923 1 : // the iterator must already be positioned within or just outside the
924 1 : // previous bounds. Therefore it cannot be positioned at a block (or
925 1 : // the position within that block) that is behind the seek position.
926 1 : // However it can be positioned at a later block. This fast-path to
927 1 : // use Prev() on the block is only applied when we are already at the
928 1 : // block that can satisfy this seek -- this is the motivation for the
929 1 : // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
930 1 : i.initBoundsForAlreadyLoadedBlock()
931 1 : ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
932 1 : if done {
933 1 : return ikey, val
934 1 : }
935 1 : if ikey == nil {
936 1 : // Done with this block.
937 1 : dontSeekWithinBlock = true
938 1 : }
939 1 : } else {
940 1 : // Slow-path.
941 1 : i.maybeFilteredKeysSingleLevel = false
942 1 : var ikey *InternalKey
943 1 :
944 1 : // NB: If a bound-limited block property filter is configured, it's
945 1 : // externally ensured that the filter is disabled (through returning
946 1 : // Intersects=false irrespective of the block props provided) during
947 1 : // seeks.
948 1 : if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
949 1 : ikey, _ = i.index.Last()
950 1 : if ikey == nil {
951 0 : return nil, base.LazyValue{}
952 0 : }
953 : }
954 : // INVARIANT: ikey != nil.
955 1 : result := i.loadBlock(-1)
956 1 : if result == loadBlockFailed {
957 0 : return nil, base.LazyValue{}
958 0 : }
959 1 : if result == loadBlockIrrelevant {
960 1 : // Enforce the lower bound here since don't want to bother moving
961 1 : // to the previous block if lower bound is already exceeded. Note
962 1 : // that the previous block starts with keys <= ikey.UserKey since
963 1 : // even though this is the current block's separator, the same
964 1 : // user key can span multiple blocks.
965 1 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
966 1 : i.exhaustedBounds = -1
967 1 : return nil, base.LazyValue{}
968 1 : }
969 : // Want to skip to the previous block.
970 1 : dontSeekWithinBlock = true
971 : }
972 : }
973 1 : if !dontSeekWithinBlock {
974 1 : if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
975 1 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
976 1 : i.exhaustedBounds = -1
977 1 : return nil, base.LazyValue{}
978 1 : }
979 1 : return ikey, val
980 : }
981 : }
982 : // The index contains separator keys which may lie between
983 : // user-keys. Consider the user-keys:
984 : //
985 : // complete
986 : // ---- new block ---
987 : // complexion
988 : //
989 : // If these two keys end one block and start the next, the index key may
990 : // be chosen as "compleu". The SeekGE in the index block will then point
991 : // us to the block containing "complexion". If this happens, we want the
992 : // last key from the previous data block.
993 1 : return i.maybeVerifyKey(i.skipBackward())
994 : }
995 :
996 : // First implements internalIterator.First, as documented in the pebble
997 : // package. Note that First only checks the upper bound. It is up to the caller
998 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
999 : // call to SeekGE(lower)).
1000 1 : func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
1001 1 : // If the iterator was created on a virtual sstable, we will SeekGE to the
1002 1 : // lower bound instead of using First, because First does not respect
1003 1 : // bounds.
1004 1 : if i.vState != nil {
1005 1 : return i.SeekGE(i.lower, base.SeekGEFlagsNone)
1006 1 : }
1007 :
1008 1 : if i.lower != nil {
1009 0 : panic("singleLevelIterator.First() used despite lower bound")
1010 : }
1011 1 : i.positionedUsingLatestBounds = true
1012 1 : i.maybeFilteredKeysSingleLevel = false
1013 1 :
1014 1 : return i.firstInternal()
1015 : }
1016 :
1017 : // firstInternal is a helper used for absolute positioning in a single-level
1018 : // index file, or for positioning in the second-level index in a two-level
1019 : // index file. For the latter, one cannot make any claims about absolute
1020 : // positioning.
1021 1 : func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
1022 1 : i.exhaustedBounds = 0
1023 1 : i.err = nil // clear cached iteration error
1024 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1025 1 : i.boundsCmp = 0
1026 1 :
1027 1 : var ikey *InternalKey
1028 1 : if ikey, _ = i.index.First(); ikey == nil {
1029 0 : i.data.invalidate()
1030 0 : return nil, base.LazyValue{}
1031 0 : }
1032 1 : result := i.loadBlock(+1)
1033 1 : if result == loadBlockFailed {
1034 0 : return nil, base.LazyValue{}
1035 0 : }
1036 1 : if result == loadBlockOK {
1037 1 : if ikey, val := i.data.First(); ikey != nil {
1038 1 : if i.blockUpper != nil {
1039 1 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
1040 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1041 1 : i.exhaustedBounds = +1
1042 1 : return nil, base.LazyValue{}
1043 1 : }
1044 : }
1045 1 : return ikey, val
1046 : }
1047 : // Else fall through to skipForward.
1048 1 : } else {
1049 1 : // result == loadBlockIrrelevant. Enforce the upper bound here since
1050 1 : // don't want to bother moving to the next block if upper bound is
1051 1 : // already exceeded. Note that the next block starts with keys >=
1052 1 : // ikey.UserKey since even though this is the block separator, the
1053 1 : // same user key can span multiple blocks. If upper is exclusive we
1054 1 : // use >= below, else we use >.
1055 1 : if i.upper != nil {
1056 1 : cmp := i.cmp(ikey.UserKey, i.upper)
1057 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1058 1 : i.exhaustedBounds = +1
1059 1 : return nil, base.LazyValue{}
1060 1 : }
1061 : }
1062 : // Else fall through to skipForward.
1063 : }
1064 :
1065 1 : return i.skipForward()
1066 : }
1067 :
1068 : // Last implements internalIterator.Last, as documented in the pebble
1069 : // package. Note that Last only checks the lower bound. It is up to the caller
1070 : // to ensure that key is less than the upper bound (e.g. via a call to
1071 : // SeekLT(upper))
1072 1 : func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
1073 1 : if i.vState != nil {
1074 1 : return i.virtualLast()
1075 1 : }
1076 :
1077 1 : if i.upper != nil {
1078 0 : panic("singleLevelIterator.Last() used despite upper bound")
1079 : }
1080 1 : i.positionedUsingLatestBounds = true
1081 1 : i.maybeFilteredKeysSingleLevel = false
1082 1 : return i.lastInternal()
1083 : }
1084 :
1085 : // lastInternal is a helper used for absolute positioning in a single-level
1086 : // index file, or for positioning in the second-level index in a two-level
1087 : // index file. For the latter, one cannot make any claims about absolute
1088 : // positioning.
1089 1 : func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
1090 1 : i.exhaustedBounds = 0
1091 1 : i.err = nil // clear cached iteration error
1092 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1093 1 : i.boundsCmp = 0
1094 1 :
1095 1 : var ikey *InternalKey
1096 1 : if ikey, _ = i.index.Last(); ikey == nil {
1097 0 : i.data.invalidate()
1098 0 : return nil, base.LazyValue{}
1099 0 : }
1100 1 : result := i.loadBlock(-1)
1101 1 : if result == loadBlockFailed {
1102 0 : return nil, base.LazyValue{}
1103 0 : }
1104 1 : if result == loadBlockOK {
1105 1 : if ikey, val := i.data.Last(); ikey != nil {
1106 1 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
1107 1 : i.exhaustedBounds = -1
1108 1 : return nil, base.LazyValue{}
1109 1 : }
1110 1 : return ikey, val
1111 : }
1112 : // Else fall through to skipBackward.
1113 1 : } else {
1114 1 : // result == loadBlockIrrelevant. Enforce the lower bound here since
1115 1 : // don't want to bother moving to the previous block if lower bound is
1116 1 : // already exceeded. Note that the previous block starts with keys <=
1117 1 : // key.UserKey since even though this is the current block's
1118 1 : // separator, the same user key can span multiple blocks.
1119 1 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
1120 1 : i.exhaustedBounds = -1
1121 1 : return nil, base.LazyValue{}
1122 1 : }
1123 : }
1124 :
1125 1 : return i.skipBackward()
1126 : }
1127 :
1128 : // Next implements internalIterator.Next, as documented in the pebble
1129 : // package.
1130 : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
1131 : // due to performance. Keep the two in sync.
1132 1 : func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
1133 1 : if i.exhaustedBounds == +1 {
1134 0 : panic("Next called even though exhausted upper bound")
1135 : }
1136 1 : i.exhaustedBounds = 0
1137 1 : i.maybeFilteredKeysSingleLevel = false
1138 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1139 1 : i.boundsCmp = 0
1140 1 :
1141 1 : if i.err != nil {
1142 0 : return nil, base.LazyValue{}
1143 0 : }
1144 1 : if key, val := i.data.Next(); key != nil {
1145 1 : if i.blockUpper != nil {
1146 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1147 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1148 1 : i.exhaustedBounds = +1
1149 1 : return nil, base.LazyValue{}
1150 1 : }
1151 : }
1152 1 : return key, val
1153 : }
1154 1 : return i.skipForward()
1155 : }
1156 :
1157 : // NextPrefix implements (base.InternalIterator).NextPrefix.
1158 1 : func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
1159 1 : if i.exhaustedBounds == +1 {
1160 0 : panic("NextPrefix called even though exhausted upper bound")
1161 : }
1162 1 : i.exhaustedBounds = 0
1163 1 : i.maybeFilteredKeysSingleLevel = false
1164 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1165 1 : i.boundsCmp = 0
1166 1 : if i.err != nil {
1167 0 : return nil, base.LazyValue{}
1168 0 : }
1169 1 : if key, val := i.data.NextPrefix(succKey); key != nil {
1170 1 : if i.blockUpper != nil {
1171 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1172 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1173 1 : i.exhaustedBounds = +1
1174 1 : return nil, base.LazyValue{}
1175 1 : }
1176 : }
1177 1 : return key, val
1178 : }
1179 : // Did not find prefix in the existing data block. This is the slow-path
1180 : // where we effectively seek the iterator.
1181 1 : var ikey *InternalKey
1182 1 : // The key is likely to be in the next data block, so try one step.
1183 1 : if ikey, _ = i.index.Next(); ikey == nil {
1184 1 : // The target key is greater than any key in the index block.
1185 1 : // Invalidate the block iterator so that a subsequent call to Prev()
1186 1 : // will return the last key in the table.
1187 1 : i.data.invalidate()
1188 1 : return nil, base.LazyValue{}
1189 1 : }
1190 1 : if i.cmp(succKey, ikey.UserKey) > 0 {
1191 1 : // Not in the next data block, so seek the index.
1192 1 : if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
1193 1 : // The target key is greater than any key in the index block.
1194 1 : // Invalidate the block iterator so that a subsequent call to Prev()
1195 1 : // will return the last key in the table.
1196 1 : i.data.invalidate()
1197 1 : return nil, base.LazyValue{}
1198 1 : }
1199 : }
1200 1 : result := i.loadBlock(+1)
1201 1 : if result == loadBlockFailed {
1202 0 : return nil, base.LazyValue{}
1203 0 : }
1204 1 : if result == loadBlockIrrelevant {
1205 1 : // Enforce the upper bound here since don't want to bother moving
1206 1 : // to the next block if upper bound is already exceeded. Note that
1207 1 : // the next block starts with keys >= ikey.UserKey since even
1208 1 : // though this is the block separator, the same user key can span
1209 1 : // multiple blocks. If upper is exclusive we use >= below, else we use
1210 1 : // >.
1211 1 : if i.upper != nil {
1212 0 : cmp := i.cmp(ikey.UserKey, i.upper)
1213 0 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1214 0 : i.exhaustedBounds = +1
1215 0 : return nil, base.LazyValue{}
1216 0 : }
1217 : }
1218 1 : } else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
1219 1 : if i.blockUpper != nil {
1220 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1221 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1222 1 : i.exhaustedBounds = +1
1223 1 : return nil, base.LazyValue{}
1224 1 : }
1225 : }
1226 1 : return i.maybeVerifyKey(key, val)
1227 : }
1228 :
1229 1 : return i.skipForward()
1230 : }
1231 :
1232 : // Prev implements internalIterator.Prev, as documented in the pebble
1233 : // package.
1234 1 : func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
1235 1 : if i.exhaustedBounds == -1 {
1236 0 : panic("Prev called even though exhausted lower bound")
1237 : }
1238 1 : i.exhaustedBounds = 0
1239 1 : i.maybeFilteredKeysSingleLevel = false
1240 1 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1241 1 : i.boundsCmp = 0
1242 1 :
1243 1 : if i.err != nil {
1244 0 : return nil, base.LazyValue{}
1245 0 : }
1246 1 : if key, val := i.data.Prev(); key != nil {
1247 1 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1248 1 : i.exhaustedBounds = -1
1249 1 : return nil, base.LazyValue{}
1250 1 : }
1251 1 : return key, val
1252 : }
1253 1 : return i.skipBackward()
1254 : }
1255 :
1256 1 : func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
1257 1 : for {
1258 1 : var key *InternalKey
1259 1 : if key, _ = i.index.Next(); key == nil {
1260 1 : i.data.invalidate()
1261 1 : break
1262 : }
1263 1 : result := i.loadBlock(+1)
1264 1 : if result != loadBlockOK {
1265 1 : if i.err != nil {
1266 0 : break
1267 : }
1268 1 : if result == loadBlockFailed {
1269 0 : // We checked that i.index was at a valid entry, so
1270 0 : // loadBlockFailed could not have happened due to to i.index
1271 0 : // being exhausted, and must be due to an error.
1272 0 : panic("loadBlock should not have failed with no error")
1273 : }
1274 : // result == loadBlockIrrelevant. Enforce the upper bound here
1275 : // since don't want to bother moving to the next block if upper
1276 : // bound is already exceeded. Note that the next block starts with
1277 : // keys >= key.UserKey since even though this is the block
1278 : // separator, the same user key can span multiple blocks. If upper
1279 : // is exclusive we use >= below, else we use >.
1280 1 : if i.upper != nil {
1281 1 : cmp := i.cmp(key.UserKey, i.upper)
1282 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1283 1 : i.exhaustedBounds = +1
1284 1 : return nil, base.LazyValue{}
1285 1 : }
1286 : }
1287 1 : continue
1288 : }
1289 1 : if key, val := i.data.First(); key != nil {
1290 1 : if i.blockUpper != nil {
1291 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1292 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1293 1 : i.exhaustedBounds = +1
1294 1 : return nil, base.LazyValue{}
1295 1 : }
1296 : }
1297 1 : return i.maybeVerifyKey(key, val)
1298 : }
1299 : }
1300 1 : return nil, base.LazyValue{}
1301 : }
1302 :
1303 1 : func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
1304 1 : for {
1305 1 : var key *InternalKey
1306 1 : if key, _ = i.index.Prev(); key == nil {
1307 1 : i.data.invalidate()
1308 1 : break
1309 : }
1310 1 : result := i.loadBlock(-1)
1311 1 : if result != loadBlockOK {
1312 1 : if i.err != nil {
1313 0 : break
1314 : }
1315 1 : if result == loadBlockFailed {
1316 0 : // We checked that i.index was at a valid entry, so
1317 0 : // loadBlockFailed could not have happened due to to i.index
1318 0 : // being exhausted, and must be due to an error.
1319 0 : panic("loadBlock should not have failed with no error")
1320 : }
1321 : // result == loadBlockIrrelevant. Enforce the lower bound here
1322 : // since don't want to bother moving to the previous block if lower
1323 : // bound is already exceeded. Note that the previous block starts with
1324 : // keys <= key.UserKey since even though this is the current block's
1325 : // separator, the same user key can span multiple blocks.
1326 1 : if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
1327 1 : i.exhaustedBounds = -1
1328 1 : return nil, base.LazyValue{}
1329 1 : }
1330 1 : continue
1331 : }
1332 1 : key, val := i.data.Last()
1333 1 : if key == nil {
1334 1 : return nil, base.LazyValue{}
1335 1 : }
1336 1 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1337 1 : i.exhaustedBounds = -1
1338 1 : return nil, base.LazyValue{}
1339 1 : }
1340 1 : return i.maybeVerifyKey(key, val)
1341 : }
1342 1 : return nil, base.LazyValue{}
1343 : }
1344 :
1345 : // Error implements internalIterator.Error, as documented in the pebble
1346 : // package.
1347 1 : func (i *singleLevelIterator) Error() error {
1348 1 : if err := i.data.Error(); err != nil {
1349 0 : return err
1350 0 : }
1351 1 : return i.err
1352 : }
1353 :
1354 : // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
1355 : // whether or not the last positioning method may have skipped any keys due to
1356 : // block-property filters.
1357 1 : func (i *singleLevelIterator) MaybeFilteredKeys() bool {
1358 1 : return i.maybeFilteredKeysSingleLevel
1359 1 : }
1360 :
1361 : // SetCloseHook sets a function that will be called when the iterator is
1362 : // closed.
1363 1 : func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
1364 1 : i.closeHook = fn
1365 1 : }
1366 :
1367 1 : func firstError(err0, err1 error) error {
1368 1 : if err0 != nil {
1369 0 : return err0
1370 0 : }
1371 1 : return err1
1372 : }
1373 :
1374 : // Close implements internalIterator.Close, as documented in the pebble
1375 : // package.
1376 1 : func (i *singleLevelIterator) Close() error {
1377 1 : i.iterStats.close()
1378 1 : var err error
1379 1 : if i.closeHook != nil {
1380 1 : err = firstError(err, i.closeHook(i))
1381 1 : }
1382 1 : err = firstError(err, i.data.Close())
1383 1 : err = firstError(err, i.index.Close())
1384 1 : if i.dataRH != nil {
1385 1 : err = firstError(err, i.dataRH.Close())
1386 1 : i.dataRH = nil
1387 1 : }
1388 1 : err = firstError(err, i.err)
1389 1 : if i.bpfs != nil {
1390 1 : releaseBlockPropertiesFilterer(i.bpfs)
1391 1 : }
1392 1 : if i.vbReader != nil {
1393 1 : i.vbReader.close()
1394 1 : }
1395 1 : if i.vbRH != nil {
1396 1 : err = firstError(err, i.vbRH.Close())
1397 1 : i.vbRH = nil
1398 1 : }
1399 1 : *i = i.resetForReuse()
1400 1 : singleLevelIterPool.Put(i)
1401 1 : return err
1402 : }
1403 :
1404 1 : func (i *singleLevelIterator) String() string {
1405 1 : if i.vState != nil {
1406 1 : return i.vState.fileNum.String()
1407 1 : }
1408 1 : return i.reader.fileNum.String()
1409 : }
|