Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "github.com/cockroachdb/pebble/internal/base"
9 : "github.com/cockroachdb/pebble/internal/invariants"
10 : "github.com/cockroachdb/pebble/internal/keyspan"
11 : "github.com/cockroachdb/pebble/internal/manifest"
12 : "github.com/cockroachdb/pebble/sstable"
13 : )
14 :
15 : // constructRangeKeyIter constructs the range-key iterator stack, populating
16 : // i.rangeKey.rangeKeyIter with the resulting iterator.
17 2 : func (i *Iterator) constructRangeKeyIter() {
18 2 : i.rangeKey.rangeKeyIter = i.rangeKey.iterConfig.Init(
19 2 : &i.comparer, i.seqNum, i.opts.LowerBound, i.opts.UpperBound,
20 2 : &i.hasPrefix, &i.prefixOrFullSeekKey, true /* onlySets */, &i.rangeKey.rangeKeyBuffers.internal)
21 2 :
22 2 : // If there's an indexed batch with range keys, include it.
23 2 : if i.batch != nil {
24 2 : if i.batch.index == nil {
25 0 : i.rangeKey.iterConfig.AddLevel(newErrorKeyspanIter(ErrNotIndexed))
26 2 : } else {
27 2 : // Only include the batch's range key iterator if it has any keys.
28 2 : // NB: This can force reconstruction of the rangekey iterator stack
29 2 : // in SetOptions if subsequently range keys are added. See
30 2 : // SetOptions.
31 2 : if i.batch.countRangeKeys > 0 {
32 1 : i.batch.initRangeKeyIter(&i.opts, &i.batchRangeKeyIter, i.batchSeqNum)
33 1 : i.rangeKey.iterConfig.AddLevel(&i.batchRangeKeyIter)
34 1 : }
35 : }
36 : }
37 :
38 : // Next are the flushables: memtables and large batches.
39 2 : if i.readState != nil {
40 2 : for j := len(i.readState.memtables) - 1; j >= 0; j-- {
41 2 : mem := i.readState.memtables[j]
42 2 : // We only need to read from memtables which contain sequence numbers older
43 2 : // than seqNum.
44 2 : if logSeqNum := mem.logSeqNum; logSeqNum >= i.seqNum {
45 2 : continue
46 : }
47 2 : if rki := mem.newRangeKeyIter(&i.opts); rki != nil {
48 2 : i.rangeKey.iterConfig.AddLevel(rki)
49 2 : }
50 : }
51 : }
52 :
53 2 : current := i.version
54 2 : if current == nil {
55 2 : current = i.readState.current
56 2 : }
57 : // Next are the file levels: L0 sub-levels followed by lower levels.
58 : //
59 : // Add file-specific iterators for L0 files containing range keys. This is less
60 : // efficient than using levelIters for sublevels of L0 files containing
61 : // range keys, but range keys are expected to be sparse anyway, reducing the
62 : // cost benefit of maintaining a separate L0Sublevels instance for range key
63 : // files and then using it here.
64 : //
65 : // NB: We iterate L0's files in reverse order. They're sorted by
66 : // LargestSeqNum ascending, and we need to add them to the merging iterator
67 : // in LargestSeqNum descending to preserve the merging iterator's invariants
68 : // around Key Trailer order.
69 2 : iter := current.RangeKeyLevels[0].Iter()
70 2 : for f := iter.Last(); f != nil; f = iter.Prev() {
71 2 : spanIter, err := i.newIterRangeKey(f, i.opts.SpanIterOptions())
72 2 : if err != nil {
73 0 : i.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err})
74 0 : continue
75 : }
76 2 : i.rangeKey.iterConfig.AddLevel(spanIter)
77 : }
78 :
79 : // Add level iterators for the non-empty non-L0 levels.
80 2 : for level := 1; level < len(current.RangeKeyLevels); level++ {
81 2 : if current.RangeKeyLevels[level].Empty() {
82 2 : continue
83 : }
84 2 : li := i.rangeKey.iterConfig.NewLevelIter()
85 2 : spanIterOpts := i.opts.SpanIterOptions()
86 2 : li.Init(spanIterOpts, i.cmp, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(),
87 2 : manifest.Level(level), manifest.KeyTypeRange)
88 2 : i.rangeKey.iterConfig.AddLevel(li)
89 : }
90 : }
91 :
92 : // Range key masking
93 : //
94 : // Pebble iterators may be configured such that range keys with suffixes mask
95 : // point keys with lower suffixes. The intended use is implementing a MVCC
96 : // delete range operation using range keys, when suffixes are MVCC timestamps.
97 : //
98 : // To enable masking, the user populates the IterOptions's RangeKeyMasking
99 : // field. The Suffix field configures which range keys act as masks. The
100 : // intended use is to hold a MVCC read timestamp. When implementing a MVCC
101 : // delete range operation, only range keys that are visible at the read
102 : // timestamp should be visible. If a range key has a suffix ≤
103 : // RangeKeyMasking.Suffix, it acts as a mask.
104 : //
105 : // Range key masking is facilitated by the keyspan.InterleavingIter. The
106 : // interleaving iterator interleaves range keys and point keys during combined
107 : // iteration. During user iteration, the interleaving iterator is configured
108 : // with a keyspan.SpanMask, implemented by the rangeKeyMasking struct below.
109 : // The SpanMask interface defines two methods: SpanChanged and SkipPoint.
110 : //
111 : // SpanChanged is used to keep the current mask up-to-date. Whenever the point
112 : // iterator has stepped into or out of the bounds of a range key, the
113 : // interleaving iterator invokes SpanChanged passing the current covering range
114 : // key. The below rangeKeyMasking implementation scans the range keys looking
115 : // for the range key with the largest suffix that's still ≤ the suffix supplied
116 : // to IterOptions.RangeKeyMasking.Suffix (the "read timestamp"). If it finds a
117 : // range key that meets the condition, the range key should act as a mask. The
118 : // span and the relevant range key's suffix are saved.
119 : //
120 : // The above ensures that `rangeKeyMasking.maskActiveSuffix` always contains the
121 : // current masking suffix such that any point keys with lower suffixes should be
122 : // skipped.
123 : //
124 : // There are two ways in which masked point keys are skipped.
125 : //
126 : // 1. Interleaving iterator SkipPoint
127 : //
128 : // Whenever the interleaving iterator encounters a point key that falls within
129 : // the bounds of a range key, it invokes SkipPoint. The interleaving iterator
130 : // guarantees that the SpanChanged method described above has already been
131 : // invoked with the covering range key. The below rangeKeyMasking implementation
132 : // of SkipPoint splits the key into prefix and suffix, compares the suffix to
133 : // the `maskActiveSuffix` updated by SpanChanged and returns true if
134 : // suffix(point) < maskActiveSuffix.
135 : //
136 : // The SkipPoint logic is sufficient to ensure that the Pebble iterator filters
137 : // out all masked point keys. However, it requires the iterator read each masked
138 : // point key. For broad range keys that mask many points, this may be expensive.
139 : //
140 : // 2. Block property filter
141 : //
142 : // For more efficient handling of braad range keys that mask many points, the
143 : // IterOptions.RangeKeyMasking field has an optional Filter option. This Filter
144 : // field takes a superset of the block-property filter interface, adding a
145 : // method to dynamically configure the filter's filtering criteria.
146 : //
147 : // To make use of the Filter option, the user is required to define and
148 : // configure a block-property collector that collects a property containing at
149 : // least the maximum suffix of a key within a block.
150 : //
151 : // When the SpanChanged method described above is invoked, rangeKeyMasking also
152 : // reconfigures the user-provided filter. It invokes a SetSuffix method,
153 : // providing the `maskActiveSuffix`, requesting that from now on the
154 : // block-property filter return Intersects()=false for any properties indicating
155 : // that a block contains exclusively keys with suffixes greater than the
156 : // provided suffix.
157 : //
158 : // Note that unlike other block-property filters, the filter used for masking
159 : // must not apply across the entire keyspace. It must only filter blocks that
160 : // lie within the bounds of the range key that set the mask suffix. To
161 : // accommodate this, rangeKeyMasking implements a special interface:
162 : // sstable.BoundLimitedBlockPropertyFilter. This interface extends the block
163 : // property filter interface with two new methods: KeyIsWithinLowerBound and
164 : // KeyIsWithinUpperBound. The rangeKeyMasking type wraps the user-provided block
165 : // property filter, implementing these two methods and overriding Intersects to
166 : // always return true if there is no active mask.
167 : //
168 : // The logic to ensure that a mask block-property filter is only applied within
169 : // the bounds of the masking range key is subtle. The interleaving iterator
170 : // guarantees that it never invokes SpanChanged until the point iterator is
171 : // positioned within the range key. During forward iteration, this guarantees
172 : // that any block that a sstable reader might attempt to load contains only keys
173 : // greater than or equal to the range key's lower bound. During backward
174 : // iteration, it provides the analagous guarantee on the range key's upper
175 : // bound.
176 : //
177 : // The above ensures that an sstable reader only needs to verify that a block
178 : // that it skips meets the opposite bound. This is where the
179 : // KeyIsWithinLowerBound and KeyIsWithinUpperBound methods are used. When an
180 : // sstable iterator is configured with a BoundLimitedBlockPropertyFilter, it
181 : // checks for intersection with the block-property filter before every block
182 : // load, like ordinary block-property filters. However, if the bound-limited
183 : // block property filter indicates that it does NOT intersect, the filter's
184 : // relevant KeyIsWithin{Lower,Upper}Bound method is queried, using a block
185 : // index separator as the bound. If the method indicates that the provided index
186 : // separator does not fall within the range key bounds, the no-intersection
187 : // result is ignored, and the block is read.
188 :
189 : type rangeKeyMasking struct {
190 : cmp base.Compare
191 : split base.Split
192 : filter BlockPropertyFilterMask
193 : // maskActiveSuffix holds the suffix of a range key currently acting as a
194 : // mask, hiding point keys with suffixes greater than it. maskActiveSuffix
195 : // is only ever non-nil if IterOptions.RangeKeyMasking.Suffix is non-nil.
196 : // maskActiveSuffix is updated whenever the iterator passes over a new range
197 : // key. The maskActiveSuffix should only be used if maskSpan is non-nil.
198 : //
199 : // See SpanChanged.
200 : maskActiveSuffix []byte
201 : // maskSpan holds the span from which the active mask suffix was extracted.
202 : // The span is used for bounds comparisons, to ensure that a range-key mask
203 : // is not applied beyond the bounds of the range key.
204 : maskSpan *keyspan.Span
205 : parent *Iterator
206 : }
207 :
208 2 : func (m *rangeKeyMasking) init(parent *Iterator, cmp base.Compare, split base.Split) {
209 2 : m.cmp = cmp
210 2 : m.split = split
211 2 : if parent.opts.RangeKeyMasking.Filter != nil {
212 2 : m.filter = parent.opts.RangeKeyMasking.Filter()
213 2 : }
214 2 : m.parent = parent
215 : }
216 :
217 : // SpanChanged implements the keyspan.SpanMask interface, used during range key
218 : // iteration.
219 2 : func (m *rangeKeyMasking) SpanChanged(s *keyspan.Span) {
220 2 : if s == nil && m.maskSpan == nil {
221 2 : return
222 2 : }
223 2 : m.maskSpan = nil
224 2 : m.maskActiveSuffix = m.maskActiveSuffix[:0]
225 2 :
226 2 : // Find the smallest suffix of a range key contained within the Span,
227 2 : // excluding suffixes less than m.opts.RangeKeyMasking.Suffix.
228 2 : if s != nil {
229 2 : m.parent.rangeKey.stale = true
230 2 : if m.parent.opts.RangeKeyMasking.Suffix != nil {
231 2 : for j := range s.Keys {
232 2 : if s.Keys[j].Suffix == nil {
233 0 : continue
234 : }
235 2 : if m.cmp(s.Keys[j].Suffix, m.parent.opts.RangeKeyMasking.Suffix) < 0 {
236 2 : continue
237 : }
238 2 : if len(m.maskActiveSuffix) == 0 || m.cmp(m.maskActiveSuffix, s.Keys[j].Suffix) > 0 {
239 2 : m.maskSpan = s
240 2 : m.maskActiveSuffix = append(m.maskActiveSuffix[:0], s.Keys[j].Suffix...)
241 2 : }
242 : }
243 : }
244 : }
245 :
246 2 : if m.maskSpan != nil && m.parent.opts.RangeKeyMasking.Filter != nil {
247 2 : // Update the block-property filter to filter point keys with suffixes
248 2 : // greater than m.maskActiveSuffix.
249 2 : err := m.filter.SetSuffix(m.maskActiveSuffix)
250 2 : if err != nil {
251 0 : m.parent.err = err
252 0 : }
253 : }
254 : // If no span is active, we leave the inner block-property filter configured
255 : // with its existing suffix. That's okay, because Intersects calls are first
256 : // evaluated by iteratorRangeKeyState.Intersects, which considers all blocks
257 : // as intersecting if there's no active mask.
258 : }
259 :
260 : // SkipPoint implements the keyspan.SpanMask interface, used during range key
261 : // iteration. Whenever a point key is covered by a non-empty Span, the
262 : // interleaving iterator invokes SkipPoint. This function is responsible for
263 : // performing range key masking.
264 : //
265 : // If a non-nil IterOptions.RangeKeyMasking.Suffix is set, range key masking is
266 : // enabled. Masking hides point keys, transparently skipping over the keys.
267 : // Whether or not a point key is masked is determined by comparing the point
268 : // key's suffix, the overlapping span's keys' suffixes, and the user-configured
269 : // IterOption's RangeKeyMasking.Suffix. When configured with a masking threshold
270 : // _t_, and there exists a span with suffix _r_ covering a point key with suffix
271 : // _p_, and
272 : //
273 : // _t_ ≤ _r_ < _p_
274 : //
275 : // then the point key is elided. Consider the following rendering, where using
276 : // integer suffixes with higher integers sort before suffixes with lower
277 : // integers, (for example @7 ≤ @6 < @5):
278 : //
279 : // ^
280 : // @9 | •―――――――――――――――○ [e,m)@9
281 : // s 8 | • l@8
282 : // u 7 |------------------------------------ @7 RangeKeyMasking.Suffix
283 : // f 6 | [h,q)@6 •―――――――――――――――――○ (threshold)
284 : // f 5 | • h@5
285 : // f 4 | • n@4
286 : // i 3 | •―――――――――――○ [f,l)@3
287 : // x 2 | • b@2
288 : // 1 |
289 : // 0 |___________________________________
290 : // a b c d e f g h i j k l m n o p q
291 : //
292 : // An iterator scanning the entire keyspace with the masking threshold set to @7
293 : // will observe point keys b@2 and l@8. The span keys [h,q)@6 and [f,l)@3 serve
294 : // as masks, because cmp(@6,@7) ≥ 0 and cmp(@3,@7) ≥ 0. The span key [e,m)@9
295 : // does not serve as a mask, because cmp(@9,@7) < 0.
296 : //
297 : // Although point l@8 falls within the user key bounds of [e,m)@9, [e,m)@9 is
298 : // non-masking due to its suffix. The point key l@8 also falls within the user
299 : // key bounds of [h,q)@6, but since cmp(@6,@8) ≥ 0, l@8 is unmasked.
300 : //
301 : // Invariant: The userKey is within the user key bounds of the span most
302 : // recently provided to `SpanChanged`.
303 2 : func (m *rangeKeyMasking) SkipPoint(userKey []byte) bool {
304 2 : m.parent.stats.RangeKeyStats.ContainedPoints++
305 2 : if m.maskSpan == nil {
306 2 : // No range key is currently acting as a mask, so don't skip.
307 2 : return false
308 2 : }
309 : // Range key masking is enabled and the current span includes a range key
310 : // that is being used as a mask. (NB: SpanChanged already verified that the
311 : // range key's suffix is ≥ RangeKeyMasking.Suffix).
312 : //
313 : // This point key falls within the bounds of the range key (guaranteed by
314 : // the InterleavingIter). Skip the point key if the range key's suffix is
315 : // greater than the point key's suffix.
316 2 : pointSuffix := userKey[m.split(userKey):]
317 2 : if len(pointSuffix) > 0 && m.cmp(m.maskActiveSuffix, pointSuffix) < 0 {
318 2 : m.parent.stats.RangeKeyStats.SkippedPoints++
319 2 : return true
320 2 : }
321 2 : return false
322 : }
323 :
324 : // The iteratorRangeKeyState type implements the sstable package's
325 : // BoundLimitedBlockPropertyFilter interface in order to use block property
326 : // filters for range key masking. The iteratorRangeKeyState implementation wraps
327 : // the block-property filter provided in Options.RangeKeyMasking.Filter.
328 : //
329 : // Using a block-property filter for range-key masking requires limiting the
330 : // filter's effect to the bounds of the range key currently acting as a mask.
331 : // Consider the range key [a,m)@10, and an iterator positioned just before the
332 : // below block, bounded by index separators `c` and `z`:
333 : //
334 : // c z
335 : // x | c@9 c@5 c@1 d@7 e@4 y@4 | ...
336 : // iter pos
337 : //
338 : // The next block cannot be skipped, despite the range key suffix @10 is greater
339 : // than all the block's keys' suffixes, because it contains a key (y@4) outside
340 : // the bounds of the range key.
341 : //
342 : // This extended BoundLimitedBlockPropertyFilter interface adds two new methods,
343 : // KeyIsWithinLowerBound and KeyIsWithinUpperBound, for testing whether a
344 : // particular block is within bounds.
345 : //
346 : // The iteratorRangeKeyState implements these new methods by first checking if
347 : // the iterator is currently positioned within a range key. If not, the provided
348 : // key is considered out-of-bounds. If the iterator is positioned within a range
349 : // key, it compares the corresponding range key bound.
350 : var _ sstable.BoundLimitedBlockPropertyFilter = (*rangeKeyMasking)(nil)
351 :
352 : // Name implements the limitedBlockPropertyFilter interface defined in the
353 : // sstable package by passing through to the user-defined block property filter.
354 2 : func (m *rangeKeyMasking) Name() string {
355 2 : return m.filter.Name()
356 2 : }
357 :
358 : // Intersects implements the limitedBlockPropertyFilter interface defined in the
359 : // sstable package by passing the intersection decision to the user-provided
360 : // block property filter only if a range key is covering the current iterator
361 : // position.
362 1 : func (m *rangeKeyMasking) Intersects(prop []byte) (bool, error) {
363 1 : if m.maskSpan == nil {
364 1 : // No span is actively masking.
365 1 : return true, nil
366 1 : }
367 1 : return m.filter.Intersects(prop)
368 : }
369 :
370 : // KeyIsWithinLowerBound implements the limitedBlockPropertyFilter interface
371 : // defined in the sstable package. It's used to restrict the masking block
372 : // property filter to only applying within the bounds of the active range key.
373 1 : func (m *rangeKeyMasking) KeyIsWithinLowerBound(key []byte) bool {
374 1 : // Invariant: m.maskSpan != nil
375 1 : //
376 1 : // The provided `key` is an inclusive lower bound of the block we're
377 1 : // considering skipping.
378 1 : return m.cmp(m.maskSpan.Start, key) <= 0
379 1 : }
380 :
381 : // KeyIsWithinUpperBound implements the limitedBlockPropertyFilter interface
382 : // defined in the sstable package. It's used to restrict the masking block
383 : // property filter to only applying within the bounds of the active range key.
384 1 : func (m *rangeKeyMasking) KeyIsWithinUpperBound(key []byte) bool {
385 1 : // Invariant: m.maskSpan != nil
386 1 : //
387 1 : // The provided `key` is an *inclusive* upper bound of the block we're
388 1 : // considering skipping, so the range key's end must be strictly greater
389 1 : // than the block bound for the block to be within bounds.
390 1 : return m.cmp(m.maskSpan.End, key) > 0
391 1 : }
392 :
393 : // lazyCombinedIter implements the internalIterator interface, wrapping a
394 : // pointIter. It requires the pointIter's the levelIters be configured with
395 : // pointers to its combinedIterState. When the levelIter observes a file
396 : // containing a range key, the lazyCombinedIter constructs the combined
397 : // range+point key iterator stack and switches to it.
398 : type lazyCombinedIter struct {
399 : // parent holds a pointer to the root *pebble.Iterator containing this
400 : // iterator. It's used to mutate the internalIterator in use when switching
401 : // to combined iteration.
402 : parent *Iterator
403 : pointIter internalIterator
404 : combinedIterState combinedIterState
405 : }
406 :
407 : // combinedIterState encapsulates the current state of combined iteration.
408 : // Various low-level iterators (mergingIter, leveliter) hold pointers to the
409 : // *pebble.Iterator's combinedIterState. This allows them to check whether or
410 : // not they must monitor for files containing range keys (!initialized), or not.
411 : //
412 : // When !initialized, low-level iterators watch for files containing range keys.
413 : // When one is discovered, they set triggered=true and key to the smallest
414 : // (forward direction) or largest (reverse direction) range key that's been
415 : // observed.
416 : type combinedIterState struct {
417 : // key holds the smallest (forward direction) or largest (backward
418 : // direction) user key from a range key bound discovered during the iterator
419 : // operation that triggered the switch to combined iteration.
420 : //
421 : // Slices stored here must be stable. This is possible because callers pass
422 : // a Smallest/Largest bound from a fileMetadata, which are immutable. A key
423 : // slice's bytes must not be overwritten.
424 : key []byte
425 : triggered bool
426 : initialized bool
427 : }
428 :
429 : // Assert that *lazyCombinedIter implements internalIterator.
430 : var _ internalIterator = (*lazyCombinedIter)(nil)
431 :
432 : // initCombinedIteration is invoked after a pointIter positioning operation
433 : // resulted in i.combinedIterState.triggered=true.
434 : //
435 : // The `dir` parameter is `+1` or `-1` indicating forward iteration or backward
436 : // iteration respectively.
437 : //
438 : // The `pointKey` and `pointValue` parameters provide the new point key-value
439 : // pair that the iterator was just positioned to. The combined iterator should
440 : // be seeded with this point key-value pair and return the smaller (forward
441 : // iteration) or largest (backward iteration) of the two.
442 : //
443 : // The `seekKey` parameter is non-nil only if the iterator operation that
444 : // triggered the switch to combined iteration was a SeekGE, SeekPrefixGE or
445 : // SeekLT. It provides the seek key supplied and is used to seek the range-key
446 : // iterator using the same key. This is necessary for SeekGE/SeekPrefixGE
447 : // operations that land in the middle of a range key and must truncate to the
448 : // user-provided seek key.
449 : func (i *lazyCombinedIter) initCombinedIteration(
450 : dir int8, pointKey *InternalKey, pointValue base.LazyValue, seekKey []byte,
451 2 : ) (*InternalKey, base.LazyValue) {
452 2 : // Invariant: i.parent.rangeKey is nil.
453 2 : // Invariant: !i.combinedIterState.initialized.
454 2 : if invariants.Enabled {
455 2 : if i.combinedIterState.initialized {
456 0 : panic("pebble: combined iterator already initialized")
457 : }
458 2 : if i.parent.rangeKey != nil {
459 0 : panic("pebble: iterator already has a range-key iterator stack")
460 : }
461 : }
462 :
463 : // We need to determine the key to seek the range key iterator to. If
464 : // seekKey is not nil, the user-initiated operation that triggered the
465 : // switch to combined iteration was itself a seek, and we can use that key.
466 : // Otherwise, a First/Last or relative positioning operation triggered the
467 : // switch to combined iteration.
468 : //
469 : // The levelIter that observed a file containing range keys populated
470 : // combinedIterState.key with the smallest (forward) or largest (backward)
471 : // range key it observed. If multiple levelIters observed files with range
472 : // keys during the same operation on the mergingIter, combinedIterState.key
473 : // is the smallest [during forward iteration; largest in reverse iteration]
474 : // such key.
475 2 : if seekKey == nil {
476 2 : // Use the levelIter-populated key.
477 2 : seekKey = i.combinedIterState.key
478 2 :
479 2 : // We may need to adjust the levelIter-populated seek key to the
480 2 : // surfaced point key. If the key observed is beyond [in the iteration
481 2 : // direction] the current point key, there may still exist a range key
482 2 : // at an earlier key. Consider the following example:
483 2 : //
484 2 : // L5: 000003:[bar.DEL.5, foo.RANGEKEYSET.9]
485 2 : // L6: 000001:[bar.SET.2] 000002:[bax.RANGEKEYSET.8]
486 2 : //
487 2 : // A call to First() seeks the levels to files L5.000003 and L6.000001.
488 2 : // The L5 levelIter observes that L5.000003 contains the range key with
489 2 : // start key `foo`, and triggers a switch to combined iteration, setting
490 2 : // `combinedIterState.key` = `foo`.
491 2 : //
492 2 : // The L6 levelIter did not observe the true first range key
493 2 : // (bax.RANGEKEYSET.8), because it appears in a later sstable. When the
494 2 : // combined iterator is initialized, the range key iterator must be
495 2 : // seeked to a key that will find `bax`. To accomplish this, we seek the
496 2 : // key instead to `bar`. It is guaranteed that no range key exists
497 2 : // earlier than `bar`, otherwise a levelIter would've observed it and
498 2 : // set `combinedIterState.key` to its start key.
499 2 : if pointKey != nil {
500 2 : if dir == +1 && i.parent.cmp(i.combinedIterState.key, pointKey.UserKey) > 0 {
501 2 : seekKey = pointKey.UserKey
502 2 : } else if dir == -1 && i.parent.cmp(seekKey, pointKey.UserKey) < 0 {
503 1 : seekKey = pointKey.UserKey
504 1 : }
505 : }
506 : }
507 :
508 : // An operation on the point iterator observed a file containing range keys,
509 : // so we must switch to combined interleaving iteration. First, construct
510 : // the range key iterator stack. It must not exist, otherwise we'd already
511 : // be performing combined iteration.
512 2 : i.parent.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState)
513 2 : i.parent.rangeKey.init(i.parent.comparer.Compare, i.parent.comparer.Split, &i.parent.opts)
514 2 : i.parent.constructRangeKeyIter()
515 2 :
516 2 : // Initialize the Iterator's interleaving iterator.
517 2 : i.parent.rangeKey.iiter.Init(
518 2 : &i.parent.comparer, i.parent.pointIter, i.parent.rangeKey.rangeKeyIter,
519 2 : keyspan.InterleavingIterOpts{
520 2 : Mask: &i.parent.rangeKeyMasking,
521 2 : LowerBound: i.parent.opts.LowerBound,
522 2 : UpperBound: i.parent.opts.UpperBound,
523 2 : })
524 2 :
525 2 : // Set the parent's primary iterator to point to the combined, interleaving
526 2 : // iterator that's now initialized with our current state.
527 2 : i.parent.iter = &i.parent.rangeKey.iiter
528 2 : i.combinedIterState.initialized = true
529 2 : i.combinedIterState.key = nil
530 2 :
531 2 : // All future iterator operations will go directly through the combined
532 2 : // iterator.
533 2 : //
534 2 : // Initialize the interleaving iterator. We pass the point key-value pair so
535 2 : // that the interleaving iterator knows where the point iterator is
536 2 : // positioned. Additionally, we pass the seek key to which the range-key
537 2 : // iterator should be seeked in order to initialize its position.
538 2 : //
539 2 : // In the forward direction (invert for backwards), the seek key is a key
540 2 : // guaranteed to find the smallest range key that's greater than the last
541 2 : // key the iterator returned. The range key may be less than pointKey, in
542 2 : // which case the range key will be interleaved next instead of the point
543 2 : // key.
544 2 : if dir == +1 {
545 2 : var prefix []byte
546 2 : if i.parent.hasPrefix {
547 2 : prefix = i.parent.prefixOrFullSeekKey
548 2 : }
549 2 : return i.parent.rangeKey.iiter.InitSeekGE(prefix, seekKey, pointKey, pointValue)
550 : }
551 2 : return i.parent.rangeKey.iiter.InitSeekLT(seekKey, pointKey, pointValue)
552 : }
553 :
554 : func (i *lazyCombinedIter) SeekGE(
555 : key []byte, flags base.SeekGEFlags,
556 2 : ) (*InternalKey, base.LazyValue) {
557 2 : if i.combinedIterState.initialized {
558 0 : return i.parent.rangeKey.iiter.SeekGE(key, flags)
559 0 : }
560 2 : k, v := i.pointIter.SeekGE(key, flags)
561 2 : if i.combinedIterState.triggered {
562 2 : return i.initCombinedIteration(+1, k, v, key)
563 2 : }
564 2 : return k, v
565 : }
566 :
567 : func (i *lazyCombinedIter) SeekPrefixGE(
568 : prefix, key []byte, flags base.SeekGEFlags,
569 2 : ) (*InternalKey, base.LazyValue) {
570 2 : if i.combinedIterState.initialized {
571 0 : return i.parent.rangeKey.iiter.SeekPrefixGE(prefix, key, flags)
572 0 : }
573 2 : k, v := i.pointIter.SeekPrefixGE(prefix, key, flags)
574 2 : if i.combinedIterState.triggered {
575 2 : return i.initCombinedIteration(+1, k, v, key)
576 2 : }
577 2 : return k, v
578 : }
579 :
580 : func (i *lazyCombinedIter) SeekLT(
581 : key []byte, flags base.SeekLTFlags,
582 2 : ) (*InternalKey, base.LazyValue) {
583 2 : if i.combinedIterState.initialized {
584 0 : return i.parent.rangeKey.iiter.SeekLT(key, flags)
585 0 : }
586 2 : k, v := i.pointIter.SeekLT(key, flags)
587 2 : if i.combinedIterState.triggered {
588 2 : return i.initCombinedIteration(-1, k, v, key)
589 2 : }
590 2 : return k, v
591 : }
592 :
593 2 : func (i *lazyCombinedIter) First() (*InternalKey, base.LazyValue) {
594 2 : if i.combinedIterState.initialized {
595 0 : return i.parent.rangeKey.iiter.First()
596 0 : }
597 2 : k, v := i.pointIter.First()
598 2 : if i.combinedIterState.triggered {
599 2 : return i.initCombinedIteration(+1, k, v, nil)
600 2 : }
601 2 : return k, v
602 : }
603 :
604 2 : func (i *lazyCombinedIter) Last() (*InternalKey, base.LazyValue) {
605 2 : if i.combinedIterState.initialized {
606 0 : return i.parent.rangeKey.iiter.Last()
607 0 : }
608 2 : k, v := i.pointIter.Last()
609 2 : if i.combinedIterState.triggered {
610 2 : return i.initCombinedIteration(-1, k, v, nil)
611 2 : }
612 2 : return k, v
613 : }
614 :
615 2 : func (i *lazyCombinedIter) Next() (*InternalKey, base.LazyValue) {
616 2 : if i.combinedIterState.initialized {
617 0 : return i.parent.rangeKey.iiter.Next()
618 0 : }
619 2 : k, v := i.pointIter.Next()
620 2 : if i.combinedIterState.triggered {
621 2 : return i.initCombinedIteration(+1, k, v, nil)
622 2 : }
623 2 : return k, v
624 : }
625 :
626 2 : func (i *lazyCombinedIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
627 2 : if i.combinedIterState.initialized {
628 0 : return i.parent.rangeKey.iiter.NextPrefix(succKey)
629 0 : }
630 2 : k, v := i.pointIter.NextPrefix(succKey)
631 2 : if i.combinedIterState.triggered {
632 0 : return i.initCombinedIteration(+1, k, v, nil)
633 0 : }
634 2 : return k, v
635 : }
636 :
637 2 : func (i *lazyCombinedIter) Prev() (*InternalKey, base.LazyValue) {
638 2 : if i.combinedIterState.initialized {
639 0 : return i.parent.rangeKey.iiter.Prev()
640 0 : }
641 2 : k, v := i.pointIter.Prev()
642 2 : if i.combinedIterState.triggered {
643 2 : return i.initCombinedIteration(-1, k, v, nil)
644 2 : }
645 2 : return k, v
646 : }
647 :
648 2 : func (i *lazyCombinedIter) Error() error {
649 2 : if i.combinedIterState.initialized {
650 0 : return i.parent.rangeKey.iiter.Error()
651 0 : }
652 2 : return i.pointIter.Error()
653 : }
654 :
655 2 : func (i *lazyCombinedIter) Close() error {
656 2 : if i.combinedIterState.initialized {
657 0 : return i.parent.rangeKey.iiter.Close()
658 0 : }
659 2 : return i.pointIter.Close()
660 : }
661 :
662 2 : func (i *lazyCombinedIter) SetBounds(lower, upper []byte) {
663 2 : if i.combinedIterState.initialized {
664 0 : i.parent.rangeKey.iiter.SetBounds(lower, upper)
665 0 : return
666 0 : }
667 2 : i.pointIter.SetBounds(lower, upper)
668 : }
669 :
670 0 : func (i *lazyCombinedIter) String() string {
671 0 : if i.combinedIterState.initialized {
672 0 : return i.parent.rangeKey.iiter.String()
673 0 : }
674 0 : return i.pointIter.String()
675 : }
|