Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "context"
9 : "fmt"
10 : "unsafe"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/objstorage"
15 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
16 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
17 : )
18 :
19 : // singleLevelIterator iterates over an entire table of data. To seek for a given
20 : // key, it first looks in the index for the block that contains that key, and then
21 : // looks inside that block.
22 : type singleLevelIterator struct {
23 : ctx context.Context
24 : cmp Compare
25 : // Global lower/upper bound for the iterator.
26 : lower []byte
27 : upper []byte
28 : bpfs *BlockPropertiesFilterer
29 : // Per-block lower/upper bound. Nil if the bound does not apply to the block
30 : // because we determined the block lies completely within the bound.
31 : blockLower []byte
32 : blockUpper []byte
33 : reader *Reader
34 : // vState will be set iff the iterator is constructed for virtual sstable
35 : // iteration.
36 : vState *virtualState
37 : // endKeyInclusive is set to force the iterator to treat the upper field as
38 : // inclusive while iterating instead of exclusive.
39 : endKeyInclusive bool
40 : index blockIter
41 : data blockIter
42 : dataRH objstorage.ReadHandle
43 : dataRHPrealloc objstorageprovider.PreallocatedReadHandle
44 : // dataBH refers to the last data block that the iterator considered
45 : // loading. It may not actually have loaded the block, due to an error or
46 : // because it was considered irrelevant.
47 : dataBH BlockHandle
48 : vbReader *valueBlockReader
49 : // vbRH is the read handle for value blocks, which are in a different
50 : // part of the sstable than data blocks.
51 : vbRH objstorage.ReadHandle
52 : vbRHPrealloc objstorageprovider.PreallocatedReadHandle
53 : err error
54 : closeHook func(i Iterator) error
55 : // stats and iterStats are slightly different. stats is a shared struct
56 : // supplied from the outside, and represents stats for the whole iterator
57 : // tree and can be reset from the outside (e.g. when the pebble.Iterator is
58 : // being reused). It is currently only provided when the iterator tree is
59 : // rooted at pebble.Iterator. iterStats is this sstable iterator's private
60 : // stats that are reported to a CategoryStatsCollector when this iterator is
61 : // closed. More paths are instrumented with this as the
62 : // CategoryStatsCollector needed for this is provided by the
63 : // tableCacheContainer (which is more universally used).
64 : stats *base.InternalIteratorStats
65 : iterStats iterStatsAccumulator
66 : bufferPool *BufferPool
67 :
68 : // boundsCmp and positionedUsingLatestBounds are for optimizing iteration
69 : // that uses multiple adjacent bounds. The seek after setting a new bound
70 : // can use the fact that the iterator is either within the previous bounds
71 : // or exactly one key before or after the bounds. If the new bounds is
72 : // after/before the previous bounds, and we are already positioned at a
73 : // block that is relevant for the new bounds, we can try to first position
74 : // using Next/Prev (repeatedly) instead of doing a more expensive seek.
75 : //
76 : // When there are wide files at higher levels that match the bounds
77 : // but don't have any data for the bound, we will already be
78 : // positioned at the key beyond the bounds and won't need to do much
79 : // work -- given that most data is in L6, such files are likely to
80 : // dominate the performance of the mergingIter, and may be the main
81 : // benefit of this performance optimization (of course it also helps
82 : // when the file that has the data has successive seeks that stay in
83 : // the same block).
84 : //
85 : // Specifically, boundsCmp captures the relationship between the previous
86 : // and current bounds, if the iterator had been positioned after setting
87 : // the previous bounds. If it was not positioned, i.e., Seek/First/Last
88 : // were not called, we don't know where it is positioned and cannot
89 : // optimize.
90 : //
91 : // Example: Bounds moving forward, and iterator exhausted in forward direction.
92 : // bounds = [f, h), ^ shows block iterator position
93 : // file contents [ a b c d e f g h i j k ]
94 : // ^
95 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
96 : // set to +1. SeekGE(j) can use next (the optimization also requires that j
97 : // is within the block, but that is not for correctness, but to limit the
98 : // optimization to when it will actually be an optimization).
99 : //
100 : // Example: Bounds moving forward.
101 : // bounds = [f, h), ^ shows block iterator position
102 : // file contents [ a b c d e f g h i j k ]
103 : // ^
104 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
105 : // set to +1. SeekGE(j) can use next.
106 : //
107 : // Example: Bounds moving forward, but iterator not positioned using previous
108 : // bounds.
109 : // bounds = [f, h), ^ shows block iterator position
110 : // file contents [ a b c d e f g h i j k ]
111 : // ^
112 : // new bounds = [i, j). Iterator is at j since it was never positioned using
113 : // [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
114 : // SeekGE(i) will not use next.
115 : //
116 : // Example: Bounds moving forward and sparse file
117 : // bounds = [f, h), ^ shows block iterator position
118 : // file contents [ a z ]
119 : // ^
120 : // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
121 : // set to +1. SeekGE(j) notices that the iterator is already past j and does
122 : // not need to do anything.
123 : //
124 : // Similar examples can be constructed for backward iteration.
125 : //
126 : // This notion of exactly one key before or after the bounds is not quite
127 : // true when block properties are used to ignore blocks. In that case we
128 : // can't stop precisely at the first block that is past the bounds since
129 : // we are using the index entries to enforce the bounds.
130 : //
131 : // e.g. 3 blocks with keys [b, c] [f, g], [i, j, k] with index entries d,
132 : // h, l. And let the lower bound be k, and we are reverse iterating. If
133 : // the block [i, j, k] is ignored due to the block interval annotations we
134 : // do need to move the index to block [f, g] since the index entry for the
135 : // [i, j, k] block is l which is not less than the lower bound of k. So we
136 : // have passed the entries i, j.
137 : //
138 : // This behavior is harmless since the block property filters are fixed
139 : // for the lifetime of the iterator so i, j are irrelevant. In addition,
140 : // the current code will not load the [f, g] block, so the seek
141 : // optimization that attempts to use Next/Prev do not apply anyway.
142 : boundsCmp int
143 : positionedUsingLatestBounds bool
144 :
145 : // exhaustedBounds represents whether the iterator is exhausted for
146 : // iteration by reaching the upper or lower bound. +1 when exhausted
147 : // the upper bound, -1 when exhausted the lower bound, and 0 when
148 : // neither. exhaustedBounds is also used for the TrySeekUsingNext
149 : // optimization in twoLevelIterator and singleLevelIterator. Care should be
150 : // taken in setting this in twoLevelIterator before calling into
151 : // singleLevelIterator, given that these two iterators share this field.
152 : exhaustedBounds int8
153 :
154 : // maybeFilteredKeysSingleLevel indicates whether the last iterator
155 : // positioning operation may have skipped any data blocks due to
156 : // block-property filters when positioning the index.
157 : maybeFilteredKeysSingleLevel bool
158 :
159 : // useFilter specifies whether the filter block in this sstable, if present,
160 : // should be used for prefix seeks or not. In some cases it is beneficial
161 : // to skip a filter block even if it exists (eg. if probability of a match
162 : // is high).
163 : useFilter bool
164 : lastBloomFilterMatched bool
165 :
166 : hideObsoletePoints bool
167 : }
168 :
169 : // singleLevelIterator implements the base.InternalIterator interface.
170 : var _ base.InternalIterator = (*singleLevelIterator)(nil)
171 :
172 : // init initializes a singleLevelIterator for reading from the table. It is
173 : // synonmous with Reader.NewIter, but allows for reusing of the iterator
174 : // between different Readers.
175 : //
176 : // Note that lower, upper passed into init has nothing to do with virtual sstable
177 : // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
178 : // will be enforced.
179 : func (i *singleLevelIterator) init(
180 : ctx context.Context,
181 : r *Reader,
182 : v *virtualState,
183 : lower, upper []byte,
184 : filterer *BlockPropertiesFilterer,
185 : useFilter, hideObsoletePoints bool,
186 : stats *base.InternalIteratorStats,
187 : categoryAndQoS CategoryAndQoS,
188 : statsCollector *CategoryStatsCollector,
189 : rp ReaderProvider,
190 : bufferPool *BufferPool,
191 2 : ) error {
192 2 : if r.err != nil {
193 0 : return r.err
194 0 : }
195 2 : i.iterStats.init(categoryAndQoS, statsCollector)
196 2 : indexH, err := r.readIndex(ctx, stats, &i.iterStats)
197 2 : if err != nil {
198 1 : return err
199 1 : }
200 2 : if v != nil {
201 2 : i.vState = v
202 2 : i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
203 2 : }
204 :
205 2 : i.ctx = ctx
206 2 : i.lower = lower
207 2 : i.upper = upper
208 2 : i.bpfs = filterer
209 2 : i.useFilter = useFilter
210 2 : i.reader = r
211 2 : i.cmp = r.Compare
212 2 : i.stats = stats
213 2 : i.hideObsoletePoints = hideObsoletePoints
214 2 : i.bufferPool = bufferPool
215 2 : err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
216 2 : if err != nil {
217 0 : // blockIter.Close releases indexH and always returns a nil error
218 0 : _ = i.index.Close()
219 0 : return err
220 0 : }
221 2 : i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
222 2 : if r.tableFormat >= TableFormatPebblev3 {
223 2 : if r.Properties.NumValueBlocks > 0 {
224 2 : // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
225 2 : // can outlive the singleLevelIterator due to be being embedded in a
226 2 : // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
227 2 : // should only optimize this if it shows up as significant in end-to-end
228 2 : // CockroachDB benchmarks, since it is tricky to do so. One possibility
229 2 : // is that if many sstable iterators only get positioned at latest
230 2 : // versions of keys, and therefore never expose a LazyValue that is
231 2 : // separated to their callers, they can put this valueBlockReader into a
232 2 : // sync.Pool.
233 2 : i.vbReader = &valueBlockReader{
234 2 : bpOpen: i,
235 2 : rp: rp,
236 2 : vbih: r.valueBIH,
237 2 : stats: stats,
238 2 : }
239 2 : i.data.lazyValueHandling.vbr = i.vbReader
240 2 : i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
241 2 : }
242 2 : i.data.lazyValueHandling.hasValuePrefix = true
243 : }
244 2 : return nil
245 : }
246 :
247 : // Helper function to check if keys returned from iterator are within global and virtual bounds.
248 : func (i *singleLevelIterator) maybeVerifyKey(
249 : iKey *InternalKey, val base.LazyValue,
250 2 : ) (*InternalKey, base.LazyValue) {
251 2 : // maybeVerify key is only used for virtual sstable iterators.
252 2 : if invariants.Enabled && i.vState != nil && iKey != nil {
253 2 : key := iKey.UserKey
254 2 :
255 2 : uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
256 2 : lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
257 2 :
258 2 : if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
259 0 : panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
260 : }
261 : }
262 2 : return iKey, val
263 : }
264 :
265 : // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
266 : // Currently, it skips readahead ramp-up. It should be called after init is called.
267 2 : func (i *singleLevelIterator) setupForCompaction() {
268 2 : i.dataRH.SetupForCompaction()
269 2 : if i.vbRH != nil {
270 2 : i.vbRH.SetupForCompaction()
271 2 : }
272 : }
273 :
274 2 : func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
275 2 : return singleLevelIterator{
276 2 : index: i.index.resetForReuse(),
277 2 : data: i.data.resetForReuse(),
278 2 : }
279 2 : }
280 :
281 2 : func (i *singleLevelIterator) initBounds() {
282 2 : // Trim the iteration bounds for the current block. We don't have to check
283 2 : // the bounds on each iteration if the block is entirely contained within the
284 2 : // iteration bounds.
285 2 : i.blockLower = i.lower
286 2 : if i.blockLower != nil {
287 2 : key, _ := i.data.First()
288 2 : if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
289 2 : // The lower-bound is less than the first key in the block. No need
290 2 : // to check the lower-bound again for this block.
291 2 : i.blockLower = nil
292 2 : }
293 : }
294 2 : i.blockUpper = i.upper
295 2 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
296 2 : // The upper-bound is greater than the index key which itself is greater
297 2 : // than or equal to every key in the block. No need to check the
298 2 : // upper-bound again for this block. Even if blockUpper is inclusive
299 2 : // because of upper being inclusive, we can still safely set blockUpper
300 2 : // to nil here.
301 2 : //
302 2 : // TODO(bananabrick): We could also set blockUpper to nil for the >=
303 2 : // case, if blockUpper is inclusive.
304 2 : i.blockUpper = nil
305 2 : }
306 : }
307 :
308 : // Deterministic disabling of the bounds-based optimization that avoids seeking.
309 : // Uses the iterator pointer, since we want diversity in iterator behavior for
310 : // the same SetBounds call. Used for tests.
311 2 : func disableBoundsOpt(bound []byte, ptr uintptr) bool {
312 2 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
313 2 : simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
314 2 : return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
315 2 : }
316 :
317 : // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
318 : // optimizations performed by disableBoundsOpt for tests that require
319 : // deterministic iterator behavior. Some unit tests examine internal iterator
320 : // state and require this behavior to be deterministic.
321 : var ensureBoundsOptDeterminism bool
322 :
323 : // SetBounds implements internalIterator.SetBounds, as documented in the pebble
324 : // package. Note that the upper field is exclusive.
325 2 : func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
326 2 : i.boundsCmp = 0
327 2 : if i.vState != nil {
328 2 : // If the reader is constructed for a virtual sstable, then we must
329 2 : // constrain the bounds of the reader. For physical sstables, the bounds
330 2 : // can be wider than the actual sstable's bounds because we won't
331 2 : // accidentally expose additional keys as there are no additional keys.
332 2 : i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
333 2 : lower, upper, false,
334 2 : )
335 2 : } else {
336 2 : // TODO(bananabrick): Figure out the logic here to enable the boundsCmp
337 2 : // optimization for virtual sstables.
338 2 : if i.positionedUsingLatestBounds {
339 2 : if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
340 2 : i.boundsCmp = +1
341 2 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
342 2 : disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
343 2 : i.boundsCmp = 0
344 2 : }
345 2 : } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
346 2 : i.boundsCmp = -1
347 2 : if invariants.Enabled && !ensureBoundsOptDeterminism &&
348 2 : disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
349 2 : i.boundsCmp = 0
350 2 : }
351 : }
352 : }
353 : }
354 :
355 2 : i.positionedUsingLatestBounds = false
356 2 : i.lower = lower
357 2 : i.upper = upper
358 2 : i.blockLower = nil
359 2 : i.blockUpper = nil
360 : }
361 :
362 0 : func (i *singleLevelIterator) SetContext(ctx context.Context) {
363 0 : i.ctx = ctx
364 0 : }
365 :
366 : // loadBlock loads the block at the current index position and leaves i.data
367 : // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
368 : // may be nil if we have simply exhausted the entire table.
369 2 : func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
370 2 : if !i.index.valid() {
371 0 : // Ensure the data block iterator is invalidated even if loading of the block
372 0 : // fails.
373 0 : i.data.invalidate()
374 0 : return loadBlockFailed
375 0 : }
376 : // Load the next block.
377 2 : v := i.index.value()
378 2 : bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
379 2 : if i.dataBH == bhp.BlockHandle && i.data.valid() {
380 2 : // We're already at the data block we want to load. Reset bounds in case
381 2 : // they changed since the last seek, but don't reload the block from cache
382 2 : // or disk.
383 2 : //
384 2 : // It's safe to leave i.data in its original state here, as all callers to
385 2 : // loadBlock make an absolute positioning call (i.e. a seek, first, or last)
386 2 : // to `i.data` right after loadBlock returns loadBlockOK.
387 2 : i.initBounds()
388 2 : return loadBlockOK
389 2 : }
390 : // Ensure the data block iterator is invalidated even if loading of the block
391 : // fails.
392 2 : i.data.invalidate()
393 2 : i.dataBH = bhp.BlockHandle
394 2 : if err != nil {
395 0 : i.err = errCorruptIndexEntry
396 0 : return loadBlockFailed
397 0 : }
398 2 : if i.bpfs != nil {
399 2 : intersects, err := i.bpfs.intersects(bhp.Props)
400 2 : if err != nil {
401 0 : i.err = errCorruptIndexEntry
402 0 : return loadBlockFailed
403 0 : }
404 2 : if intersects == blockMaybeExcluded {
405 2 : intersects = i.resolveMaybeExcluded(dir)
406 2 : }
407 2 : if intersects == blockExcluded {
408 2 : i.maybeFilteredKeysSingleLevel = true
409 2 : return loadBlockIrrelevant
410 2 : }
411 : // blockIntersects
412 : }
413 2 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
414 2 : block, err := i.reader.readBlock(
415 2 : ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, &i.iterStats, i.bufferPool)
416 2 : if err != nil {
417 1 : i.err = err
418 1 : return loadBlockFailed
419 1 : }
420 2 : i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
421 2 : if i.err != nil {
422 0 : // The block is partially loaded, and we don't want it to appear valid.
423 0 : i.data.invalidate()
424 0 : return loadBlockFailed
425 0 : }
426 2 : i.initBounds()
427 2 : return loadBlockOK
428 : }
429 :
430 : // readBlockForVBR implements the blockProviderWhenOpen interface for use by
431 : // the valueBlockReader.
432 : func (i *singleLevelIterator) readBlockForVBR(
433 : h BlockHandle, stats *base.InternalIteratorStats,
434 2 : ) (bufferHandle, error) {
435 2 : ctx := objiotracing.WithBlockType(i.ctx, objiotracing.ValueBlock)
436 2 : return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, &i.iterStats, i.bufferPool)
437 2 : }
438 :
439 : // resolveMaybeExcluded is invoked when the block-property filterer has found
440 : // that a block is excluded according to its properties but only if its bounds
441 : // fall within the filter's current bounds. This function consults the
442 : // apprioriate bound, depending on the iteration direction, and returns either
443 : // `blockIntersects` or `blockMaybeExcluded`.
444 2 : func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
445 2 : // TODO(jackson): We could first try comparing to top-level index block's
446 2 : // key, and if within bounds avoid per-data block key comparisons.
447 2 :
448 2 : // This iterator is configured with a bound-limited block property
449 2 : // filter. The bpf determined this block could be excluded from
450 2 : // iteration based on the property encoded in the block handle.
451 2 : // However, we still need to determine if the block is wholly
452 2 : // contained within the filter's key bounds.
453 2 : //
454 2 : // External guarantees ensure all the block's keys are ≥ the
455 2 : // filter's lower bound during forward iteration, and that all the
456 2 : // block's keys are < the filter's upper bound during backward
457 2 : // iteration. We only need to determine if the opposite bound is
458 2 : // also met.
459 2 : //
460 2 : // The index separator in index.Key() provides an inclusive
461 2 : // upper-bound for the data block's keys, guaranteeing that all its
462 2 : // keys are ≤ index.Key(). For forward iteration, this is all we
463 2 : // need.
464 2 : if dir > 0 {
465 2 : // Forward iteration.
466 2 : if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
467 2 : return blockExcluded
468 2 : }
469 2 : return blockIntersects
470 : }
471 :
472 : // Reverse iteration.
473 : //
474 : // Because we're iterating in the reverse direction, we don't yet have
475 : // enough context available to determine if the block is wholly contained
476 : // within its bounds. This case arises only during backward iteration,
477 : // because of the way the index is structured.
478 : //
479 : // Consider a bound-limited bpf limited to the bounds [b,d), loading the
480 : // block with separator `c`. During reverse iteration, the guarantee that
481 : // all the block's keys are < `d` is externally provided, but no guarantee
482 : // is made on the bpf's lower bound. The separator `c` only provides an
483 : // inclusive upper bound on the block's keys, indicating that the
484 : // corresponding block handle points to a block containing only keys ≤ `c`.
485 : //
486 : // To establish a lower bound, we step the index backwards to read the
487 : // previous block's separator, which provides an inclusive lower bound on
488 : // the original block's keys. Afterwards, we step forward to restore our
489 : // index position.
490 2 : if peekKey, _ := i.index.Prev(); peekKey == nil {
491 2 : // The original block points to the first block of this index block. If
492 2 : // there's a two-level index, it could potentially provide a lower
493 2 : // bound, but the code refactoring necessary to read it doesn't seem
494 2 : // worth the payoff. We fall through to loading the block.
495 2 : } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
496 2 : // The lower-bound on the original block falls within the filter's
497 2 : // bounds, and we can skip the block (after restoring our current index
498 2 : // position).
499 2 : _, _ = i.index.Next()
500 2 : return blockExcluded
501 2 : }
502 2 : _, _ = i.index.Next()
503 2 : return blockIntersects
504 : }
505 :
506 2 : func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
507 2 : if i.data.getFirstUserKey() == nil {
508 0 : panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
509 : }
510 2 : i.blockLower = i.lower
511 2 : if i.blockLower != nil {
512 2 : firstUserKey := i.data.getFirstUserKey()
513 2 : if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
514 2 : // The lower-bound is less than the first key in the block. No need
515 2 : // to check the lower-bound again for this block.
516 2 : i.blockLower = nil
517 2 : }
518 : }
519 2 : i.blockUpper = i.upper
520 2 : if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
521 2 : // The upper-bound is greater than the index key which itself is greater
522 2 : // than or equal to every key in the block. No need to check the
523 2 : // upper-bound again for this block.
524 2 : i.blockUpper = nil
525 2 : }
526 : }
527 :
528 : // The number of times to call Next/Prev in a block before giving up and seeking.
529 : // The value of 4 is arbitrary.
530 : // TODO(sumeer): experiment with dynamic adjustment based on the history of
531 : // seeks for a particular iterator.
532 : const numStepsBeforeSeek = 4
533 :
534 : func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
535 : key []byte,
536 2 : ) (k *InternalKey, v base.LazyValue, done bool) {
537 2 : k, v = i.data.Key(), i.data.value()
538 2 : for j := 0; j < numStepsBeforeSeek; j++ {
539 2 : curKeyCmp := i.cmp(k.UserKey, key)
540 2 : if curKeyCmp >= 0 {
541 2 : if i.blockUpper != nil {
542 2 : cmp := i.cmp(k.UserKey, i.blockUpper)
543 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
544 2 : i.exhaustedBounds = +1
545 2 : return nil, base.LazyValue{}, true
546 2 : }
547 : }
548 2 : return k, v, true
549 : }
550 2 : k, v = i.data.Next()
551 2 : if k == nil {
552 1 : break
553 : }
554 : }
555 2 : return k, v, false
556 : }
557 :
558 : func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
559 : key []byte,
560 2 : ) (k *InternalKey, v base.LazyValue, done bool) {
561 2 : k, v = i.data.Key(), i.data.value()
562 2 : for j := 0; j < numStepsBeforeSeek; j++ {
563 2 : curKeyCmp := i.cmp(k.UserKey, key)
564 2 : if curKeyCmp < 0 {
565 2 : if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
566 2 : i.exhaustedBounds = -1
567 2 : return nil, base.LazyValue{}, true
568 2 : }
569 2 : return k, v, true
570 : }
571 2 : k, v = i.data.Prev()
572 2 : if k == nil {
573 1 : break
574 : }
575 : }
576 2 : return k, v, false
577 : }
578 :
579 2 : func (i *singleLevelIterator) recordOffset() uint64 {
580 2 : offset := i.dataBH.Offset
581 2 : if i.data.valid() {
582 2 : // - i.dataBH.Length/len(i.data.data) is the compression ratio. If
583 2 : // uncompressed, this is 1.
584 2 : // - i.data.nextOffset is the uncompressed position of the current record
585 2 : // in the block.
586 2 : // - i.dataBH.Offset is the offset of the block in the sstable before
587 2 : // decompression.
588 2 : offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
589 2 : } else {
590 2 : // Last entry in the block must increment bytes iterated by the size of the block trailer
591 2 : // and restart points.
592 2 : offset += i.dataBH.Length + blockTrailerLen
593 2 : }
594 2 : return offset
595 : }
596 :
597 : // SeekGE implements internalIterator.SeekGE, as documented in the pebble
598 : // package. Note that SeekGE only checks the upper bound. It is up to the
599 : // caller to ensure that key is greater than or equal to the lower bound.
600 : func (i *singleLevelIterator) SeekGE(
601 : key []byte, flags base.SeekGEFlags,
602 2 : ) (*InternalKey, base.LazyValue) {
603 2 : if i.vState != nil {
604 2 : // Callers of SeekGE don't know about virtual sstable bounds, so we may
605 2 : // have to internally restrict the bounds.
606 2 : //
607 2 : // TODO(bananabrick): We can optimize this check away for the level iter
608 2 : // if necessary.
609 2 : if i.cmp(key, i.lower) < 0 {
610 2 : key = i.lower
611 2 : }
612 : }
613 :
614 2 : if flags.TrySeekUsingNext() {
615 2 : // The i.exhaustedBounds comparison indicates that the upper bound was
616 2 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
617 2 : // exhausted.
618 2 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
619 2 : // Already exhausted, so return nil.
620 2 : return nil, base.LazyValue{}
621 2 : }
622 2 : if i.err != nil {
623 0 : // The current iterator position cannot be used.
624 0 : flags = flags.DisableTrySeekUsingNext()
625 0 : }
626 : // INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
627 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
628 : // data-exhausted and bounds-exhausted, as defined earlier, are both
629 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
630 : // before calling into seekGEHelper.
631 : }
632 :
633 2 : i.exhaustedBounds = 0
634 2 : i.err = nil // clear cached iteration error
635 2 : boundsCmp := i.boundsCmp
636 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
637 2 : i.boundsCmp = 0
638 2 : i.positionedUsingLatestBounds = true
639 2 : return i.seekGEHelper(key, boundsCmp, flags)
640 : }
641 :
642 : // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
643 : func (i *singleLevelIterator) seekGEHelper(
644 : key []byte, boundsCmp int, flags base.SeekGEFlags,
645 2 : ) (*InternalKey, base.LazyValue) {
646 2 : // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
647 2 :
648 2 : // SeekGE performs various step-instead-of-seeking optimizations: eg enabled
649 2 : // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
650 2 : // Care must be taken to ensure that when performing these optimizations and
651 2 : // the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
652 2 : // Consider a previous SeekGE that filtered keys from k until the current
653 2 : // iterator position.
654 2 : //
655 2 : // If the previous SeekGE exhausted the iterator, it's possible keys greater
656 2 : // than or equal to the current search key were filtered. We must not reuse
657 2 : // the current iterator position without remembering the previous value of
658 2 : // maybeFilteredKeys.
659 2 :
660 2 : var dontSeekWithinBlock bool
661 2 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
662 2 : boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
663 2 : // Fast-path: The bounds have moved forward and this SeekGE is
664 2 : // respecting the lower bound (guaranteed by Iterator). We know that
665 2 : // the iterator must already be positioned within or just outside the
666 2 : // previous bounds. Therefore it cannot be positioned at a block (or
667 2 : // the position within that block) that is ahead of the seek position.
668 2 : // However it can be positioned at an earlier block. This fast-path to
669 2 : // use Next() on the block is only applied when we are already at the
670 2 : // block that the slow-path (the else-clause) would load -- this is
671 2 : // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
672 2 : // predicate.
673 2 : i.initBoundsForAlreadyLoadedBlock()
674 2 : ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
675 2 : if done {
676 2 : return ikey, val
677 2 : }
678 2 : if ikey == nil {
679 1 : // Done with this block.
680 1 : dontSeekWithinBlock = true
681 1 : }
682 2 : } else {
683 2 : // Cannot use bounds monotonicity. But may be able to optimize if
684 2 : // caller claimed externally known invariant represented by
685 2 : // flags.TrySeekUsingNext().
686 2 : if flags.TrySeekUsingNext() {
687 2 : // seekPrefixGE or SeekGE has already ensured
688 2 : // !i.data.isDataInvalidated() && i.exhaustedBounds != +1
689 2 : currKey := i.data.Key()
690 2 : value := i.data.value()
691 2 : less := i.cmp(currKey.UserKey, key) < 0
692 2 : // We could be more sophisticated and confirm that the seek
693 2 : // position is within the current block before applying this
694 2 : // optimization. But there may be some benefit even if it is in
695 2 : // the next block, since we can avoid seeking i.index.
696 2 : for j := 0; less && j < numStepsBeforeSeek; j++ {
697 2 : currKey, value = i.Next()
698 2 : if currKey == nil {
699 2 : return nil, base.LazyValue{}
700 2 : }
701 2 : less = i.cmp(currKey.UserKey, key) < 0
702 : }
703 2 : if !less {
704 2 : if i.blockUpper != nil {
705 2 : cmp := i.cmp(currKey.UserKey, i.blockUpper)
706 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
707 0 : i.exhaustedBounds = +1
708 0 : return nil, base.LazyValue{}
709 0 : }
710 : }
711 2 : return currKey, value
712 : }
713 : }
714 :
715 : // Slow-path.
716 : // Since we're re-seeking the iterator, the previous value of
717 : // maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
718 : // during seeking, loadBlock will set it to true.
719 2 : i.maybeFilteredKeysSingleLevel = false
720 2 :
721 2 : var ikey *InternalKey
722 2 : if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
723 2 : // The target key is greater than any key in the index block.
724 2 : // Invalidate the block iterator so that a subsequent call to Prev()
725 2 : // will return the last key in the table.
726 2 : i.data.invalidate()
727 2 : return nil, base.LazyValue{}
728 2 : }
729 2 : result := i.loadBlock(+1)
730 2 : if result == loadBlockFailed {
731 1 : return nil, base.LazyValue{}
732 1 : }
733 2 : if result == loadBlockIrrelevant {
734 2 : // Enforce the upper bound here since don't want to bother moving
735 2 : // to the next block if upper bound is already exceeded. Note that
736 2 : // the next block starts with keys >= ikey.UserKey since even
737 2 : // though this is the block separator, the same user key can span
738 2 : // multiple blocks. If upper is exclusive we use >= below, else
739 2 : // we use >.
740 2 : if i.upper != nil {
741 2 : cmp := i.cmp(ikey.UserKey, i.upper)
742 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
743 2 : i.exhaustedBounds = +1
744 2 : return nil, base.LazyValue{}
745 2 : }
746 : }
747 : // Want to skip to the next block.
748 2 : dontSeekWithinBlock = true
749 : }
750 : }
751 2 : if !dontSeekWithinBlock {
752 2 : if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
753 2 : if i.blockUpper != nil {
754 2 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
755 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
756 2 : i.exhaustedBounds = +1
757 2 : return nil, base.LazyValue{}
758 2 : }
759 : }
760 2 : return ikey, val
761 : }
762 : }
763 2 : return i.skipForward()
764 : }
765 :
766 : // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
767 : // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
768 : // to the caller to ensure that key is greater than or equal to the lower bound.
769 : func (i *singleLevelIterator) SeekPrefixGE(
770 : prefix, key []byte, flags base.SeekGEFlags,
771 2 : ) (*base.InternalKey, base.LazyValue) {
772 2 : if i.vState != nil {
773 2 : // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
774 2 : // we may have to internally restrict the bounds.
775 2 : //
776 2 : // TODO(bananabrick): We can optimize away this check for the level iter
777 2 : // if necessary.
778 2 : if i.cmp(key, i.lower) < 0 {
779 2 : key = i.lower
780 2 : }
781 : }
782 2 : return i.seekPrefixGE(prefix, key, flags, i.useFilter)
783 : }
784 :
785 : func (i *singleLevelIterator) seekPrefixGE(
786 : prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
787 2 : ) (k *InternalKey, value base.LazyValue) {
788 2 : // NOTE: prefix is only used for bloom filter checking and not later work in
789 2 : // this method. Hence, we can use the existing iterator position if the last
790 2 : // SeekPrefixGE did not fail bloom filter matching.
791 2 :
792 2 : err := i.err
793 2 : i.err = nil // clear cached iteration error
794 2 : if checkFilter && i.reader.tableFilter != nil {
795 2 : if !i.lastBloomFilterMatched {
796 2 : // Iterator is not positioned based on last seek.
797 2 : flags = flags.DisableTrySeekUsingNext()
798 2 : }
799 2 : i.lastBloomFilterMatched = false
800 2 : // Check prefix bloom filter.
801 2 : var dataH bufferHandle
802 2 : dataH, i.err = i.reader.readFilter(i.ctx, i.stats, &i.iterStats)
803 2 : if i.err != nil {
804 1 : i.data.invalidate()
805 1 : return nil, base.LazyValue{}
806 1 : }
807 2 : mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
808 2 : dataH.Release()
809 2 : if !mayContain {
810 2 : // This invalidation may not be necessary for correctness, and may
811 2 : // be a place to optimize later by reusing the already loaded
812 2 : // block. It was necessary in earlier versions of the code since
813 2 : // the caller was allowed to call Next when SeekPrefixGE returned
814 2 : // nil. This is no longer allowed.
815 2 : i.data.invalidate()
816 2 : return nil, base.LazyValue{}
817 2 : }
818 2 : i.lastBloomFilterMatched = true
819 : }
820 2 : if flags.TrySeekUsingNext() {
821 2 : // The i.exhaustedBounds comparison indicates that the upper bound was
822 2 : // reached. The i.data.isDataInvalidated() indicates that the sstable was
823 2 : // exhausted.
824 2 : if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
825 2 : // Already exhausted, so return nil.
826 2 : return nil, base.LazyValue{}
827 2 : }
828 2 : if err != nil {
829 1 : // The current iterator position cannot be used.
830 1 : flags = flags.DisableTrySeekUsingNext()
831 1 : }
832 : // INVARIANT: flags.TrySeekUsingNext() => err == nil &&
833 : // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
834 : // data-exhausted and bounds-exhausted, as defined earlier, are both
835 : // false. Ths makes it safe to clear out i.exhaustedBounds and i.err
836 : // before calling into seekGEHelper.
837 : }
838 : // Bloom filter matches, or skipped, so this method will position the
839 : // iterator.
840 2 : i.exhaustedBounds = 0
841 2 : boundsCmp := i.boundsCmp
842 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
843 2 : i.boundsCmp = 0
844 2 : i.positionedUsingLatestBounds = true
845 2 : k, value = i.seekGEHelper(key, boundsCmp, flags)
846 2 : return i.maybeVerifyKey(k, value)
847 : }
848 :
849 : // virtualLast should only be called if i.vReader != nil.
850 2 : func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
851 2 : if i.vState == nil {
852 0 : panic("pebble: invalid call to virtualLast")
853 : }
854 :
855 : // Seek to the first internal key.
856 2 : ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
857 2 : if i.endKeyInclusive {
858 2 : // Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
859 2 : // c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
860 2 : // c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
861 2 : // the last key with userkey == i.upper.
862 2 : //
863 2 : // TODO(bananabrick): Think about how to improve this. If many internal keys
864 2 : // with the same user key at the upper bound then this could be slow, but
865 2 : // maybe the odds of having many internal keys with the same user key at the
866 2 : // upper bound are low.
867 2 : for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
868 2 : ikey, _ = i.Next()
869 2 : }
870 2 : return i.Prev()
871 : }
872 :
873 : // We seeked to the first key >= i.upper.
874 2 : return i.Prev()
875 : }
876 :
877 : // SeekLT implements internalIterator.SeekLT, as documented in the pebble
878 : // package. Note that SeekLT only checks the lower bound. It is up to the
879 : // caller to ensure that key is less than or equal to the upper bound.
880 : func (i *singleLevelIterator) SeekLT(
881 : key []byte, flags base.SeekLTFlags,
882 2 : ) (*InternalKey, base.LazyValue) {
883 2 : if i.vState != nil {
884 2 : // Might have to fix upper bound since virtual sstable bounds are not
885 2 : // known to callers of SeekLT.
886 2 : //
887 2 : // TODO(bananabrick): We can optimize away this check for the level iter
888 2 : // if necessary.
889 2 : cmp := i.cmp(key, i.upper)
890 2 : // key == i.upper is fine. We'll do the right thing and return the
891 2 : // first internal key with user key < key.
892 2 : if cmp > 0 {
893 2 : // Return the last key in the virtual sstable.
894 2 : return i.virtualLast()
895 2 : }
896 : }
897 :
898 2 : i.exhaustedBounds = 0
899 2 : i.err = nil // clear cached iteration error
900 2 : boundsCmp := i.boundsCmp
901 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
902 2 : i.boundsCmp = 0
903 2 :
904 2 : // Seeking operations perform various step-instead-of-seeking optimizations:
905 2 : // eg by considering monotonically increasing bounds (i.boundsCmp). Care
906 2 : // must be taken to ensure that when performing these optimizations and the
907 2 : // iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
908 2 : // appropriately. Consider a previous SeekLT that filtered keys from k
909 2 : // until the current iterator position.
910 2 : //
911 2 : // If the previous SeekLT did exhausted the iterator, it's possible keys
912 2 : // less than the current search key were filtered. We must not reuse the
913 2 : // current iterator position without remembering the previous value of
914 2 : // maybeFilteredKeysSingleLevel.
915 2 :
916 2 : i.positionedUsingLatestBounds = true
917 2 :
918 2 : var dontSeekWithinBlock bool
919 2 : if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
920 2 : boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
921 2 : // Fast-path: The bounds have moved backward, and this SeekLT is
922 2 : // respecting the upper bound (guaranteed by Iterator). We know that
923 2 : // the iterator must already be positioned within or just outside the
924 2 : // previous bounds. Therefore it cannot be positioned at a block (or
925 2 : // the position within that block) that is behind the seek position.
926 2 : // However it can be positioned at a later block. This fast-path to
927 2 : // use Prev() on the block is only applied when we are already at the
928 2 : // block that can satisfy this seek -- this is the motivation for the
929 2 : // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
930 2 : i.initBoundsForAlreadyLoadedBlock()
931 2 : ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
932 2 : if done {
933 2 : return ikey, val
934 2 : }
935 2 : if ikey == nil {
936 1 : // Done with this block.
937 1 : dontSeekWithinBlock = true
938 1 : }
939 2 : } else {
940 2 : // Slow-path.
941 2 : i.maybeFilteredKeysSingleLevel = false
942 2 : var ikey *InternalKey
943 2 :
944 2 : // NB: If a bound-limited block property filter is configured, it's
945 2 : // externally ensured that the filter is disabled (through returning
946 2 : // Intersects=false irrespective of the block props provided) during
947 2 : // seeks.
948 2 : if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
949 2 : ikey, _ = i.index.Last()
950 2 : if ikey == nil {
951 0 : return nil, base.LazyValue{}
952 0 : }
953 : }
954 : // INVARIANT: ikey != nil.
955 2 : result := i.loadBlock(-1)
956 2 : if result == loadBlockFailed {
957 1 : return nil, base.LazyValue{}
958 1 : }
959 2 : if result == loadBlockIrrelevant {
960 2 : // Enforce the lower bound here since don't want to bother moving
961 2 : // to the previous block if lower bound is already exceeded. Note
962 2 : // that the previous block starts with keys <= ikey.UserKey since
963 2 : // even though this is the current block's separator, the same
964 2 : // user key can span multiple blocks.
965 2 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
966 1 : i.exhaustedBounds = -1
967 1 : return nil, base.LazyValue{}
968 1 : }
969 : // Want to skip to the previous block.
970 2 : dontSeekWithinBlock = true
971 : }
972 : }
973 2 : if !dontSeekWithinBlock {
974 2 : if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
975 2 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
976 2 : i.exhaustedBounds = -1
977 2 : return nil, base.LazyValue{}
978 2 : }
979 2 : return ikey, val
980 : }
981 : }
982 : // The index contains separator keys which may lie between
983 : // user-keys. Consider the user-keys:
984 : //
985 : // complete
986 : // ---- new block ---
987 : // complexion
988 : //
989 : // If these two keys end one block and start the next, the index key may
990 : // be chosen as "compleu". The SeekGE in the index block will then point
991 : // us to the block containing "complexion". If this happens, we want the
992 : // last key from the previous data block.
993 2 : return i.maybeVerifyKey(i.skipBackward())
994 : }
995 :
996 : // First implements internalIterator.First, as documented in the pebble
997 : // package. Note that First only checks the upper bound. It is up to the caller
998 : // to ensure that key is greater than or equal to the lower bound (e.g. via a
999 : // call to SeekGE(lower)).
1000 2 : func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
1001 2 : // If the iterator was created on a virtual sstable, we will SeekGE to the
1002 2 : // lower bound instead of using First, because First does not respect
1003 2 : // bounds.
1004 2 : if i.vState != nil {
1005 2 : return i.SeekGE(i.lower, base.SeekGEFlagsNone)
1006 2 : }
1007 :
1008 2 : if i.lower != nil {
1009 0 : panic("singleLevelIterator.First() used despite lower bound")
1010 : }
1011 2 : i.positionedUsingLatestBounds = true
1012 2 : i.maybeFilteredKeysSingleLevel = false
1013 2 :
1014 2 : return i.firstInternal()
1015 : }
1016 :
1017 : // firstInternal is a helper used for absolute positioning in a single-level
1018 : // index file, or for positioning in the second-level index in a two-level
1019 : // index file. For the latter, one cannot make any claims about absolute
1020 : // positioning.
1021 2 : func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
1022 2 : i.exhaustedBounds = 0
1023 2 : i.err = nil // clear cached iteration error
1024 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1025 2 : i.boundsCmp = 0
1026 2 :
1027 2 : var ikey *InternalKey
1028 2 : if ikey, _ = i.index.First(); ikey == nil {
1029 0 : i.data.invalidate()
1030 0 : return nil, base.LazyValue{}
1031 0 : }
1032 2 : result := i.loadBlock(+1)
1033 2 : if result == loadBlockFailed {
1034 1 : return nil, base.LazyValue{}
1035 1 : }
1036 2 : if result == loadBlockOK {
1037 2 : if ikey, val := i.data.First(); ikey != nil {
1038 2 : if i.blockUpper != nil {
1039 2 : cmp := i.cmp(ikey.UserKey, i.blockUpper)
1040 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1041 2 : i.exhaustedBounds = +1
1042 2 : return nil, base.LazyValue{}
1043 2 : }
1044 : }
1045 2 : return ikey, val
1046 : }
1047 : // Else fall through to skipForward.
1048 2 : } else {
1049 2 : // result == loadBlockIrrelevant. Enforce the upper bound here since
1050 2 : // don't want to bother moving to the next block if upper bound is
1051 2 : // already exceeded. Note that the next block starts with keys >=
1052 2 : // ikey.UserKey since even though this is the block separator, the
1053 2 : // same user key can span multiple blocks. If upper is exclusive we
1054 2 : // use >= below, else we use >.
1055 2 : if i.upper != nil {
1056 2 : cmp := i.cmp(ikey.UserKey, i.upper)
1057 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1058 1 : i.exhaustedBounds = +1
1059 1 : return nil, base.LazyValue{}
1060 1 : }
1061 : }
1062 : // Else fall through to skipForward.
1063 : }
1064 :
1065 2 : return i.skipForward()
1066 : }
1067 :
1068 : // Last implements internalIterator.Last, as documented in the pebble
1069 : // package. Note that Last only checks the lower bound. It is up to the caller
1070 : // to ensure that key is less than the upper bound (e.g. via a call to
1071 : // SeekLT(upper))
1072 2 : func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
1073 2 : if i.vState != nil {
1074 2 : return i.virtualLast()
1075 2 : }
1076 :
1077 2 : if i.upper != nil {
1078 0 : panic("singleLevelIterator.Last() used despite upper bound")
1079 : }
1080 2 : i.positionedUsingLatestBounds = true
1081 2 : i.maybeFilteredKeysSingleLevel = false
1082 2 : return i.lastInternal()
1083 : }
1084 :
1085 : // lastInternal is a helper used for absolute positioning in a single-level
1086 : // index file, or for positioning in the second-level index in a two-level
1087 : // index file. For the latter, one cannot make any claims about absolute
1088 : // positioning.
1089 2 : func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
1090 2 : i.exhaustedBounds = 0
1091 2 : i.err = nil // clear cached iteration error
1092 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1093 2 : i.boundsCmp = 0
1094 2 :
1095 2 : var ikey *InternalKey
1096 2 : if ikey, _ = i.index.Last(); ikey == nil {
1097 0 : i.data.invalidate()
1098 0 : return nil, base.LazyValue{}
1099 0 : }
1100 2 : result := i.loadBlock(-1)
1101 2 : if result == loadBlockFailed {
1102 1 : return nil, base.LazyValue{}
1103 1 : }
1104 2 : if result == loadBlockOK {
1105 2 : if ikey, val := i.data.Last(); ikey != nil {
1106 2 : if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
1107 2 : i.exhaustedBounds = -1
1108 2 : return nil, base.LazyValue{}
1109 2 : }
1110 2 : return ikey, val
1111 : }
1112 : // Else fall through to skipBackward.
1113 2 : } else {
1114 2 : // result == loadBlockIrrelevant. Enforce the lower bound here since
1115 2 : // don't want to bother moving to the previous block if lower bound is
1116 2 : // already exceeded. Note that the previous block starts with keys <=
1117 2 : // key.UserKey since even though this is the current block's
1118 2 : // separator, the same user key can span multiple blocks.
1119 2 : if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
1120 1 : i.exhaustedBounds = -1
1121 1 : return nil, base.LazyValue{}
1122 1 : }
1123 : }
1124 :
1125 2 : return i.skipBackward()
1126 : }
1127 :
1128 : // Next implements internalIterator.Next, as documented in the pebble
1129 : // package.
1130 : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
1131 : // due to performance. Keep the two in sync.
1132 2 : func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
1133 2 : if i.exhaustedBounds == +1 {
1134 0 : panic("Next called even though exhausted upper bound")
1135 : }
1136 2 : i.exhaustedBounds = 0
1137 2 : i.maybeFilteredKeysSingleLevel = false
1138 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1139 2 : i.boundsCmp = 0
1140 2 :
1141 2 : if i.err != nil {
1142 1 : // TODO(jackson): Can this case be turned into a panic? Once an error is
1143 1 : // encountered, the iterator must be re-seeked.
1144 1 : return nil, base.LazyValue{}
1145 1 : }
1146 2 : if key, val := i.data.Next(); key != nil {
1147 2 : if i.blockUpper != nil {
1148 2 : cmp := i.cmp(key.UserKey, i.blockUpper)
1149 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1150 2 : i.exhaustedBounds = +1
1151 2 : return nil, base.LazyValue{}
1152 2 : }
1153 : }
1154 2 : return key, val
1155 : }
1156 2 : return i.skipForward()
1157 : }
1158 :
1159 : // NextPrefix implements (base.InternalIterator).NextPrefix.
1160 2 : func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
1161 2 : if i.exhaustedBounds == +1 {
1162 0 : panic("NextPrefix called even though exhausted upper bound")
1163 : }
1164 2 : i.exhaustedBounds = 0
1165 2 : i.maybeFilteredKeysSingleLevel = false
1166 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1167 2 : i.boundsCmp = 0
1168 2 : if i.err != nil {
1169 0 : // TODO(jackson): Can this case be turned into a panic? Once an error is
1170 0 : // encountered, the iterator must be re-seeked.
1171 0 : return nil, base.LazyValue{}
1172 0 : }
1173 2 : if key, val := i.data.NextPrefix(succKey); key != nil {
1174 2 : if i.blockUpper != nil {
1175 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1176 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1177 1 : i.exhaustedBounds = +1
1178 1 : return nil, base.LazyValue{}
1179 1 : }
1180 : }
1181 2 : return key, val
1182 : }
1183 : // Did not find prefix in the existing data block. This is the slow-path
1184 : // where we effectively seek the iterator.
1185 2 : var ikey *InternalKey
1186 2 : // The key is likely to be in the next data block, so try one step.
1187 2 : if ikey, _ = i.index.Next(); ikey == nil {
1188 2 : // The target key is greater than any key in the index block.
1189 2 : // Invalidate the block iterator so that a subsequent call to Prev()
1190 2 : // will return the last key in the table.
1191 2 : i.data.invalidate()
1192 2 : return nil, base.LazyValue{}
1193 2 : }
1194 2 : if i.cmp(succKey, ikey.UserKey) > 0 {
1195 2 : // Not in the next data block, so seek the index.
1196 2 : if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
1197 2 : // The target key is greater than any key in the index block.
1198 2 : // Invalidate the block iterator so that a subsequent call to Prev()
1199 2 : // will return the last key in the table.
1200 2 : i.data.invalidate()
1201 2 : return nil, base.LazyValue{}
1202 2 : }
1203 : }
1204 2 : result := i.loadBlock(+1)
1205 2 : if result == loadBlockFailed {
1206 1 : return nil, base.LazyValue{}
1207 1 : }
1208 2 : if result == loadBlockIrrelevant {
1209 1 : // Enforce the upper bound here since don't want to bother moving
1210 1 : // to the next block if upper bound is already exceeded. Note that
1211 1 : // the next block starts with keys >= ikey.UserKey since even
1212 1 : // though this is the block separator, the same user key can span
1213 1 : // multiple blocks. If upper is exclusive we use >= below, else we use
1214 1 : // >.
1215 1 : if i.upper != nil {
1216 1 : cmp := i.cmp(ikey.UserKey, i.upper)
1217 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1218 0 : i.exhaustedBounds = +1
1219 0 : return nil, base.LazyValue{}
1220 0 : }
1221 : }
1222 2 : } else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
1223 2 : if i.blockUpper != nil {
1224 1 : cmp := i.cmp(key.UserKey, i.blockUpper)
1225 1 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1226 1 : i.exhaustedBounds = +1
1227 1 : return nil, base.LazyValue{}
1228 1 : }
1229 : }
1230 2 : return i.maybeVerifyKey(key, val)
1231 : }
1232 :
1233 2 : return i.skipForward()
1234 : }
1235 :
1236 : // Prev implements internalIterator.Prev, as documented in the pebble
1237 : // package.
1238 2 : func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
1239 2 : if i.exhaustedBounds == -1 {
1240 0 : panic("Prev called even though exhausted lower bound")
1241 : }
1242 2 : i.exhaustedBounds = 0
1243 2 : i.maybeFilteredKeysSingleLevel = false
1244 2 : // Seek optimization only applies until iterator is first positioned after SetBounds.
1245 2 : i.boundsCmp = 0
1246 2 :
1247 2 : if i.err != nil {
1248 1 : return nil, base.LazyValue{}
1249 1 : }
1250 2 : if key, val := i.data.Prev(); key != nil {
1251 2 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1252 2 : i.exhaustedBounds = -1
1253 2 : return nil, base.LazyValue{}
1254 2 : }
1255 2 : return key, val
1256 : }
1257 2 : return i.skipBackward()
1258 : }
1259 :
1260 2 : func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
1261 2 : for {
1262 2 : var key *InternalKey
1263 2 : if key, _ = i.index.Next(); key == nil {
1264 2 : i.data.invalidate()
1265 2 : break
1266 : }
1267 2 : result := i.loadBlock(+1)
1268 2 : if result != loadBlockOK {
1269 2 : if i.err != nil {
1270 1 : break
1271 : }
1272 2 : if result == loadBlockFailed {
1273 0 : // We checked that i.index was at a valid entry, so
1274 0 : // loadBlockFailed could not have happened due to to i.index
1275 0 : // being exhausted, and must be due to an error.
1276 0 : panic("loadBlock should not have failed with no error")
1277 : }
1278 : // result == loadBlockIrrelevant. Enforce the upper bound here
1279 : // since don't want to bother moving to the next block if upper
1280 : // bound is already exceeded. Note that the next block starts with
1281 : // keys >= key.UserKey since even though this is the block
1282 : // separator, the same user key can span multiple blocks. If upper
1283 : // is exclusive we use >= below, else we use >.
1284 2 : if i.upper != nil {
1285 2 : cmp := i.cmp(key.UserKey, i.upper)
1286 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1287 2 : i.exhaustedBounds = +1
1288 2 : return nil, base.LazyValue{}
1289 2 : }
1290 : }
1291 2 : continue
1292 : }
1293 2 : if key, val := i.data.First(); key != nil {
1294 2 : if i.blockUpper != nil {
1295 2 : cmp := i.cmp(key.UserKey, i.blockUpper)
1296 2 : if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
1297 2 : i.exhaustedBounds = +1
1298 2 : return nil, base.LazyValue{}
1299 2 : }
1300 : }
1301 2 : return i.maybeVerifyKey(key, val)
1302 : }
1303 : }
1304 2 : return nil, base.LazyValue{}
1305 : }
1306 :
1307 2 : func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
1308 2 : for {
1309 2 : var key *InternalKey
1310 2 : if key, _ = i.index.Prev(); key == nil {
1311 2 : i.data.invalidate()
1312 2 : break
1313 : }
1314 2 : result := i.loadBlock(-1)
1315 2 : if result != loadBlockOK {
1316 2 : if i.err != nil {
1317 1 : break
1318 : }
1319 2 : if result == loadBlockFailed {
1320 0 : // We checked that i.index was at a valid entry, so
1321 0 : // loadBlockFailed could not have happened due to to i.index
1322 0 : // being exhausted, and must be due to an error.
1323 0 : panic("loadBlock should not have failed with no error")
1324 : }
1325 : // result == loadBlockIrrelevant. Enforce the lower bound here
1326 : // since don't want to bother moving to the previous block if lower
1327 : // bound is already exceeded. Note that the previous block starts with
1328 : // keys <= key.UserKey since even though this is the current block's
1329 : // separator, the same user key can span multiple blocks.
1330 2 : if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
1331 1 : i.exhaustedBounds = -1
1332 1 : return nil, base.LazyValue{}
1333 1 : }
1334 2 : continue
1335 : }
1336 2 : key, val := i.data.Last()
1337 2 : if key == nil {
1338 1 : return nil, base.LazyValue{}
1339 1 : }
1340 2 : if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
1341 2 : i.exhaustedBounds = -1
1342 2 : return nil, base.LazyValue{}
1343 2 : }
1344 2 : return i.maybeVerifyKey(key, val)
1345 : }
1346 2 : return nil, base.LazyValue{}
1347 : }
1348 :
1349 : // Error implements internalIterator.Error, as documented in the pebble
1350 : // package.
1351 2 : func (i *singleLevelIterator) Error() error {
1352 2 : if err := i.data.Error(); err != nil {
1353 0 : return err
1354 0 : }
1355 2 : return i.err
1356 : }
1357 :
1358 : // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
1359 : // whether or not the last positioning method may have skipped any keys due to
1360 : // block-property filters.
1361 2 : func (i *singleLevelIterator) MaybeFilteredKeys() bool {
1362 2 : return i.maybeFilteredKeysSingleLevel
1363 2 : }
1364 :
1365 : // SetCloseHook sets a function that will be called when the iterator is
1366 : // closed.
1367 2 : func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
1368 2 : i.closeHook = fn
1369 2 : }
1370 :
1371 2 : func firstError(err0, err1 error) error {
1372 2 : if err0 != nil {
1373 1 : return err0
1374 1 : }
1375 2 : return err1
1376 : }
1377 :
1378 : // Close implements internalIterator.Close, as documented in the pebble
1379 : // package.
1380 2 : func (i *singleLevelIterator) Close() error {
1381 2 : i.iterStats.close()
1382 2 : var err error
1383 2 : if i.closeHook != nil {
1384 2 : err = firstError(err, i.closeHook(i))
1385 2 : }
1386 2 : err = firstError(err, i.data.Close())
1387 2 : err = firstError(err, i.index.Close())
1388 2 : if i.dataRH != nil {
1389 2 : err = firstError(err, i.dataRH.Close())
1390 2 : i.dataRH = nil
1391 2 : }
1392 2 : err = firstError(err, i.err)
1393 2 : if i.bpfs != nil {
1394 2 : releaseBlockPropertiesFilterer(i.bpfs)
1395 2 : }
1396 2 : if i.vbReader != nil {
1397 2 : i.vbReader.close()
1398 2 : }
1399 2 : if i.vbRH != nil {
1400 2 : err = firstError(err, i.vbRH.Close())
1401 2 : i.vbRH = nil
1402 2 : }
1403 2 : *i = i.resetForReuse()
1404 2 : singleLevelIterPool.Put(i)
1405 2 : return err
1406 : }
1407 :
1408 2 : func (i *singleLevelIterator) String() string {
1409 2 : if i.vState != nil {
1410 2 : return i.vState.fileNum.String()
1411 2 : }
1412 2 : return i.reader.fileNum.String()
1413 : }
|