Line data Source code
1 : // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspanimpl
6 :
7 : import (
8 : "bytes"
9 : "cmp"
10 : "context"
11 : "fmt"
12 : "slices"
13 :
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/invariants"
16 : "github.com/cockroachdb/pebble/internal/keyspan"
17 : "github.com/cockroachdb/pebble/internal/manifest"
18 : "github.com/cockroachdb/pebble/internal/treeprinter"
19 : )
20 :
21 : // TODO(jackson): Consider implementing an optimization to seek lower levels
22 : // past higher levels' RANGEKEYDELs. This would be analaogous to the
23 : // optimization pebble.mergingIter performs for RANGEDELs during point key
24 : // seeks. It may not be worth it, because range keys are rare and cascading
25 : // seeks would require introducing key comparisons to switchTo{Min,Max}Heap
26 : // where there currently are none.
27 :
28 : // MergingIter merges spans across levels of the LSM, exposing an iterator over
29 : // spans that yields sets of spans fragmented at unique user key boundaries.
30 : //
31 : // A MergingIter is initialized with an arbitrary number of child iterators over
32 : // fragmented spans. Each child iterator exposes fragmented key spans, such that
33 : // overlapping keys are surfaced in a single Span. Key spans from one child
34 : // iterator may overlap key spans from another child iterator arbitrarily.
35 : //
36 : // The spans combined by MergingIter will return spans with keys sorted by
37 : // trailer descending. If the MergingIter is configured with a Transformer, it's
38 : // permitted to modify the ordering of the spans' keys returned by MergingIter.
39 : //
40 : // # Algorithm
41 : //
42 : // The merging iterator wraps child iterators, merging and fragmenting spans
43 : // across levels. The high-level algorithm is:
44 : //
45 : // 1. Initialize the heap with bound keys from child iterators' spans.
46 : // 2. Find the next [or previous] two unique user keys' from bounds.
47 : // 3. Consider the span formed between the two unique user keys a candidate
48 : // span.
49 : // 4. Determine if any of the child iterators' spans overlap the candidate
50 : // span.
51 : // 4a. If any of the child iterator's current bounds are end keys
52 : // (during forward iteration) or start keys (during reverse
53 : // iteration), then all the spans with that bound overlap the
54 : // candidate span.
55 : // 4b. Apply the configured transform, which may remove keys.
56 : // 4c. If no spans overlap, forget the smallest (forward iteration)
57 : // or largest (reverse iteration) unique user key and advance
58 : // the iterators to the next unique user key. Start again from 3.
59 : //
60 : // # Detailed algorithm
61 : //
62 : // Each level (i0, i1, ...) has a user-provided input FragmentIterator. The
63 : // merging iterator steps through individual boundaries of the underlying
64 : // spans separately. If the underlying FragmentIterator has fragments
65 : // [a,b){#2,#1} [b,c){#1} the mergingIterLevel.{next,prev} step through:
66 : //
67 : // (a, start), (b, end), (b, start), (c, end)
68 : //
69 : // Note that (a, start) and (b, end) are observed ONCE each, despite two keys
70 : // sharing those bounds. Also note that (b, end) and (b, start) are two distinct
71 : // iterator positions of a mergingIterLevel.
72 : //
73 : // The merging iterator maintains a heap (min during forward iteration, max
74 : // during reverse iteration) containing the boundKeys. Each boundKey is a
75 : // 3-tuple holding the bound user key, whether the bound is a start or end key
76 : // and the set of keys from that level that have that bound. The heap orders
77 : // based on the boundKey's user key only.
78 : //
79 : // The merging iterator is responsible for merging spans across levels to
80 : // determine which span is next, but it's also responsible for fragmenting
81 : // overlapping spans. Consider the example:
82 : //
83 : // i0: b---d e-----h
84 : // i1: a---c h-----k
85 : // i2: a------------------------------p
86 : //
87 : // fragments: a-b-c-d-e-----h-----k----------p
88 : //
89 : // None of the individual child iterators contain a span with the exact bounds
90 : // [c,d), but the merging iterator must produce a span [c,d). To accomplish
91 : // this, the merging iterator visits every span between unique boundary user
92 : // keys. In the above example, this is:
93 : //
94 : // [a,b), [b,c), [c,d), [d,e), [e, h), [h, k), [k, p)
95 : //
96 : // The merging iterator first initializes the heap to prepare for iteration.
97 : // The description below discusses the mechanics of forward iteration after a
98 : // call to First, but the mechanics are similar for reverse iteration and
99 : // other positioning methods.
100 : //
101 : // During a call to First, the heap is initialized by seeking every
102 : // mergingIterLevel to the first bound of the first fragment. In the above
103 : // example, this seeks the child iterators to:
104 : //
105 : // i0: (b, boundKindFragmentStart, [ [b,d) ])
106 : // i1: (a, boundKindFragmentStart, [ [a,c) ])
107 : // i2: (a, boundKindFragmentStart, [ [a,p) ])
108 : //
109 : // After fixing up the heap, the root of the heap is a boundKey with the
110 : // smallest user key ('a' in the example). Once the heap is setup for iteration
111 : // in the appropriate direction and location, the merging iterator uses
112 : // find{Next,Prev}FragmentSet to find the next/previous span bounds.
113 : //
114 : // During forward iteration, the root of the heap's user key is the start key
115 : // key of next merged span. findNextFragmentSet sets m.start to this user
116 : // key. The heap may contain other boundKeys with the same user key if another
117 : // level has a fragment starting or ending at the same key, so the
118 : // findNextFragmentSet method pulls from the heap until it finds the first key
119 : // greater than m.start. This key is used as the end key.
120 : //
121 : // In the above example, this results in m.start = 'a', m.end = 'b' and child
122 : // iterators in the following positions:
123 : //
124 : // i0: (b, boundKindFragmentStart, [ [b,d) ])
125 : // i1: (c, boundKindFragmentEnd, [ [a,c) ])
126 : // i2: (p, boundKindFragmentEnd, [ [a,p) ])
127 : //
128 : // With the user key bounds of the next merged span established,
129 : // findNextFragmentSet must determine which, if any, fragments overlap the span.
130 : // During forward iteration any child iterator that is now positioned at an end
131 : // boundary has an overlapping span. (Justification: The child iterator's end
132 : // boundary is ≥ m.end. The corresponding start boundary must be ≤ m.start since
133 : // there were no other user keys between m.start and m.end. So the fragments
134 : // associated with the iterator's current end boundary have start and end bounds
135 : // such that start ≤ m.start < m.end ≤ end).
136 : //
137 : // findNextFragmentSet iterates over the levels, collecting keys from any child
138 : // iterators positioned at end boundaries. In the above example, i1 and i2 are
139 : // positioned at end boundaries, so findNextFragmentSet collects the keys of
140 : // [a,c) and [a,p). These spans contain the merging iterator's [m.start, m.end)
141 : // span, but they may also extend beyond the m.start and m.end. The merging
142 : // iterator returns the keys with the merging iter's m.start and m.end bounds,
143 : // preserving the underlying keys' sequence numbers, key kinds and values.
144 : //
145 : // A MergingIter is configured with a Transform that's applied to the span
146 : // before surfacing it to the iterator user. A Transform may remove keys
147 : // arbitrarily, but it may not modify the values themselves.
148 : //
149 : // It may be the case that findNextFragmentSet finds no levels positioned at end
150 : // boundaries, or that there are no spans remaining after applying a transform,
151 : // in which case the span [m.start, m.end) overlaps with nothing. In this case
152 : // findNextFragmentSet loops, repeating the above process again until it finds a
153 : // span that does contain keys.
154 : //
155 : // # Memory safety
156 : //
157 : // The FragmentIterator interface only guarantees stability of a Span and its
158 : // associated slices until the next positioning method is called. Adjacent Spans
159 : // may be contained in different sstables, requring the FragmentIterator
160 : // implementation to close one sstable, releasing its memory, before opening the
161 : // next. Most of the state used by the MergingIter is derived from spans at
162 : // current child iterator positions only, ensuring state is stable. The one
163 : // exception is the start bound during forward iteration and the end bound
164 : // during reverse iteration.
165 : //
166 : // If the heap root originates from an end boundary when findNextFragmentSet
167 : // begins, a Next on the heap root level may invalidate the end boundary. To
168 : // accommodate this, find{Next,Prev}FragmentSet copy the initial boundary if the
169 : // subsequent Next/Prev would move to the next span.
170 : type MergingIter struct {
171 : comparer *base.Comparer
172 : *MergingBuffers
173 : // start and end hold the bounds for the span currently under the
174 : // iterator position.
175 : //
176 : // Invariant: None of the levels' iterators contain spans with a bound
177 : // between start and end. For all bounds b, b ≤ start || b ≥ end.
178 : start, end []byte
179 :
180 : // transformer defines a transformation to be applied to a span before it's
181 : // yielded to the user. Transforming may filter individual keys contained
182 : // within the span.
183 : transformer keyspan.Transformer
184 : // span holds the iterator's current span. This span is used as the
185 : // destination for transforms. Every tranformed span overwrites the
186 : // previous.
187 : span keyspan.Span
188 : dir int8
189 :
190 : // alloc preallocates mergingIterLevel and mergingIterItems for use by the
191 : // merging iterator. As long as the merging iterator is used with
192 : // manifest.NumLevels+3 and fewer fragment iterators, the merging iterator
193 : // will not need to allocate upon initialization. The value NumLevels+3
194 : // mirrors the preallocated levels in iterAlloc used for point iterators.
195 : // Invariant: cap(levels) == cap(items)
196 : alloc struct {
197 : levels [manifest.NumLevels + 3]mergingIterLevel
198 : items [manifest.NumLevels + 3]mergingIterItem
199 : }
200 : }
201 :
202 : // MergingBuffers holds buffers used while merging keyspans.
203 : type MergingBuffers struct {
204 : // keys holds all of the keys across all levels that overlap the key span
205 : // [start, end), sorted by InternalKeyTrailer descending. This slice is reconstituted
206 : // in synthesizeKeys from each mergingIterLevel's keys every time the
207 : // [start, end) bounds change.
208 : //
209 : // Each element points into a child iterator's memory, so the keys may not
210 : // be directly modified.
211 : keys []keyspan.Key
212 : // levels holds levels allocated by MergingIter.init. The MergingIter will
213 : // prefer use of its `manifest.NumLevels+3` array, so this slice will be
214 : // longer if set.
215 : levels []mergingIterLevel
216 : wrapFn keyspan.WrapFn
217 : // heap holds a slice for the merging iterator heap allocated by
218 : // MergingIter.init. The MergingIter will prefer use of its
219 : // `manifest.NumLevels+3` items array, so this slice will be longer if set.
220 : heap mergingIterHeap
221 : // buf is a buffer used to save [start, end) boundary keys.
222 : buf []byte
223 : }
224 :
225 : // PrepareForReuse discards any excessively large buffers.
226 2 : func (bufs *MergingBuffers) PrepareForReuse() {
227 2 : if cap(bufs.buf) > keyspan.BufferReuseMaxCapacity {
228 0 : bufs.buf = nil
229 0 : }
230 : }
231 :
232 : // MergingIter implements the FragmentIterator interface.
233 : var _ keyspan.FragmentIterator = (*MergingIter)(nil)
234 :
235 : type mergingIterLevel struct {
236 : iter keyspan.FragmentIterator
237 :
238 : // heapKey holds the current key at this level for use within the heap.
239 : heapKey boundKey
240 : }
241 :
242 2 : func (l *mergingIterLevel) next() error {
243 2 : if l.heapKey.kind == boundKindFragmentStart {
244 2 : l.heapKey = boundKey{
245 2 : kind: boundKindFragmentEnd,
246 2 : key: l.heapKey.span.End,
247 2 : span: l.heapKey.span,
248 2 : }
249 2 : return nil
250 2 : }
251 2 : s, err := l.iter.Next()
252 2 : switch {
253 1 : case err != nil:
254 1 : return err
255 2 : case s == nil:
256 2 : l.heapKey = boundKey{kind: boundKindInvalid}
257 2 : return nil
258 2 : default:
259 2 : l.heapKey = boundKey{
260 2 : kind: boundKindFragmentStart,
261 2 : key: s.Start,
262 2 : span: s,
263 2 : }
264 2 : return nil
265 : }
266 : }
267 :
268 2 : func (l *mergingIterLevel) prev() error {
269 2 : if l.heapKey.kind == boundKindFragmentEnd {
270 2 : l.heapKey = boundKey{
271 2 : kind: boundKindFragmentStart,
272 2 : key: l.heapKey.span.Start,
273 2 : span: l.heapKey.span,
274 2 : }
275 2 : return nil
276 2 : }
277 2 : s, err := l.iter.Prev()
278 2 : switch {
279 1 : case err != nil:
280 1 : return err
281 2 : case s == nil:
282 2 : l.heapKey = boundKey{kind: boundKindInvalid}
283 2 : return nil
284 2 : default:
285 2 : l.heapKey = boundKey{
286 2 : kind: boundKindFragmentEnd,
287 2 : key: s.End,
288 2 : span: s,
289 2 : }
290 2 : return nil
291 : }
292 : }
293 :
294 : // Init initializes the merging iterator with the provided fragment iterators.
295 : func (m *MergingIter) Init(
296 : comparer *base.Comparer,
297 : transformer keyspan.Transformer,
298 : bufs *MergingBuffers,
299 : iters ...keyspan.FragmentIterator,
300 2 : ) {
301 2 : *m = MergingIter{
302 2 : comparer: comparer,
303 2 : MergingBuffers: bufs,
304 2 : transformer: transformer,
305 2 : }
306 2 : m.heap.cmp = comparer.Compare
307 2 : levels, items := m.levels, m.heap.items
308 2 :
309 2 : // Invariant: cap(levels) >= cap(items)
310 2 : // Invariant: cap(alloc.levels) == cap(alloc.items)
311 2 : if len(iters) <= len(m.alloc.levels) {
312 2 : // The slices allocated on the MergingIter struct are large enough.
313 2 : m.levels = m.alloc.levels[:len(iters)]
314 2 : m.heap.items = m.alloc.items[:0]
315 2 : } else if len(iters) <= cap(levels) {
316 0 : // The existing heap-allocated slices are large enough, so reuse them.
317 0 : m.levels = levels[:len(iters)]
318 0 : m.heap.items = items[:0]
319 2 : } else {
320 2 : // Heap allocate new slices.
321 2 : m.levels = make([]mergingIterLevel, len(iters))
322 2 : m.heap.items = make([]mergingIterItem, 0, len(iters))
323 2 : }
324 2 : for i := range m.levels {
325 2 : m.levels[i] = mergingIterLevel{iter: iters[i]}
326 2 : if m.wrapFn != nil {
327 0 : m.levels[i].iter = m.wrapFn(m.levels[i].iter)
328 0 : }
329 : }
330 : }
331 :
332 : // AddLevel adds a new level to the bottom of the merging iterator. AddLevel
333 : // must be called after Init and before any other method.
334 2 : func (m *MergingIter) AddLevel(iter keyspan.FragmentIterator) {
335 2 : if m.wrapFn != nil {
336 0 : iter = m.wrapFn(iter)
337 0 : }
338 2 : m.levels = append(m.levels, mergingIterLevel{iter: iter})
339 : }
340 :
341 : // SeekGE moves the iterator to the first span covering a key greater than
342 : // or equal to the given key. This is equivalent to seeking to the first
343 : // span with an end key greater than the given key.
344 2 : func (m *MergingIter) SeekGE(key []byte) (*keyspan.Span, error) {
345 2 : // SeekGE(k) seeks to the first span with an end key greater than the given
346 2 : // key. The merged span M that we're searching for might straddle the seek
347 2 : // `key`. In this case, the M.Start may be a key ≤ the seek key.
348 2 : //
349 2 : // Consider a SeekGE(dog) in the following example.
350 2 : //
351 2 : // i0: b---d e-----h
352 2 : // i1: a---c h-----k
353 2 : // i2: a------------------------------p
354 2 : // merged: a-b-c-d-e-----h-----k----------p
355 2 : //
356 2 : // The merged span M containing 'dog' is [d,e). The 'd' of the merged span
357 2 : // comes from i0's [b,d)'s end boundary. The [b,d) span does not cover any
358 2 : // key >= dog, so we cannot find the span by positioning the child iterators
359 2 : // using a SeekGE(dog).
360 2 : //
361 2 : // Instead, if we take all the child iterators' spans bounds:
362 2 : // a b c d e h k p
363 2 : // We want to partition them into keys ≤ `key` and keys > `key`.
364 2 : // dog
365 2 : // │
366 2 : // a b c d│e h k p
367 2 : // │
368 2 : // The largest key on the left of the partition forms the merged span's
369 2 : // start key, and the smallest key on the right of the partition forms the
370 2 : // merged span's end key. Recharacterized:
371 2 : //
372 2 : // M.Start: the largest boundary ≤ k of any child span
373 2 : // M.End: the smallest boundary > k of any child span
374 2 : //
375 2 : // The FragmentIterator interface doesn't implement seeking by all bounds,
376 2 : // it implements seeking by containment. A SeekGE(k) will ensure we observe
377 2 : // all start boundaries ≥ k and all end boundaries > k but does not ensure
378 2 : // we observe end boundaries = k or any boundaries < k. A SeekLT(k) will
379 2 : // ensure we observe all start boundaries < k and all end boundaries ≤ k but
380 2 : // does not ensure we observe any start boundaries = k or any boundaries >
381 2 : // k. This forces us to seek in one direction and step in the other.
382 2 : //
383 2 : // In a SeekGE, we want to end up oriented in the forward direction when
384 2 : // complete, so we begin with searching for M.Start by SeekLT-ing every
385 2 : // child iterator to `k`. For every child span found, we determine the
386 2 : // largest bound ≤ `k` and use it to initialize our max heap. The resulting
387 2 : // root of the max heap is a preliminary value for `M.Start`.
388 2 : for i := range m.levels {
389 2 : l := &m.levels[i]
390 2 : s, err := l.iter.SeekLT(key)
391 2 : switch {
392 1 : case err != nil:
393 1 : return nil, err
394 2 : case s == nil:
395 2 : l.heapKey = boundKey{kind: boundKindInvalid}
396 2 : case m.comparer.Compare(s.End, key) <= 0:
397 2 : l.heapKey = boundKey{
398 2 : kind: boundKindFragmentEnd,
399 2 : key: s.End,
400 2 : span: s,
401 2 : }
402 2 : default:
403 2 : // s.End > key && s.Start < key
404 2 : // We need to use this span's start bound, since that's the largest
405 2 : // bound ≤ key.
406 2 : l.heapKey = boundKey{
407 2 : kind: boundKindFragmentStart,
408 2 : key: s.Start,
409 2 : span: s,
410 2 : }
411 : }
412 : }
413 2 : m.initMaxHeap()
414 2 : if len(m.heap.items) == 0 {
415 2 : // There are no spans covering any key < `key`. There is no span that
416 2 : // straddles the seek key. Reorient the heap into a min heap and return
417 2 : // the first span we find in the forward direction.
418 2 : if err := m.switchToMinHeap(); err != nil {
419 1 : return nil, err
420 1 : }
421 2 : return m.findNextFragmentSet()
422 : }
423 :
424 : // The heap root is now the largest boundary key b such that:
425 : // 1. b < k
426 : // 2. b = k, and b is an end boundary
427 : // There's a third case that we will need to consider later, after we've
428 : // switched to a min heap:
429 : // 3. there exists a start boundary key b such that b = k.
430 : // A start boundary key equal to k would not be surfaced when we seeked all
431 : // the levels using SeekLT(k), since no key <k would be covered within a
432 : // span within an inclusive `k` start boundary.
433 : //
434 : // Assume that the tightest boundary ≤ k is the current heap root (cases 1 &
435 : // 2). After we switch to a min heap, we'll check for the third case and
436 : // adjust the start boundary if necessary.
437 2 : m.start = m.heap.items[0].boundKey.key
438 2 :
439 2 : // Before switching the direction of the heap, save a copy of the start
440 2 : // boundary if it's the end boundary of some child span. Next-ing the child
441 2 : // iterator might switch files and invalidate the memory of the bound.
442 2 : if m.heap.items[0].boundKey.kind == boundKindFragmentEnd {
443 2 : m.buf = append(m.buf[:0], m.start...)
444 2 : m.start = m.buf
445 2 : }
446 :
447 : // Switch to a min heap. This will move each level to the next bound in
448 : // every level, and then establish a min heap. This allows us to obtain the
449 : // smallest boundary key > `key`, which will serve as our candidate end
450 : // bound.
451 2 : if err := m.switchToMinHeap(); err != nil {
452 0 : return nil, err
453 2 : } else if len(m.heap.items) == 0 {
454 2 : return nil, nil
455 2 : }
456 :
457 : // Check for the case 3 described above. It's possible that when we switch
458 : // heap directions, we discover a start boundary of some child span that is
459 : // equal to the seek key `key`. In this case, we want this key to be our
460 : // start boundary.
461 2 : if m.heap.items[0].boundKey.kind == boundKindFragmentStart &&
462 2 : m.comparer.Equal(m.heap.items[0].boundKey.key, key) {
463 2 : // Call findNextFragmentSet, which will set m.start to the heap root and
464 2 : // proceed forward.
465 2 : return m.findNextFragmentSet()
466 2 : }
467 :
468 2 : m.end = m.heap.items[0].boundKey.key
469 2 : if found, s, err := m.synthesizeKeys(+1); err != nil {
470 0 : return nil, err
471 2 : } else if found && s != nil {
472 2 : return s, nil
473 2 : }
474 2 : return m.findNextFragmentSet()
475 : }
476 :
477 : // SeekLT moves the iterator to the last span covering a key less than the
478 : // given key. This is equivalent to seeking to the last span with a start
479 : // key less than the given key.
480 2 : func (m *MergingIter) SeekLT(key []byte) (*keyspan.Span, error) {
481 2 : // SeekLT(k) seeks to the last span with a start key less than the given
482 2 : // key. The merged span M that we're searching for might straddle the seek
483 2 : // `key`. In this case, the M.End may be a key ≥ the seek key.
484 2 : //
485 2 : // Consider a SeekLT(dog) in the following example.
486 2 : //
487 2 : // i0: b---d e-----h
488 2 : // i1: a---c h-----k
489 2 : // i2: a------------------------------p
490 2 : // merged: a-b-c-d-e-----h-----k----------p
491 2 : //
492 2 : // The merged span M containing the largest key <'dog' is [d,e). The 'e' of
493 2 : // the merged span comes from i0's [e,h)'s start boundary. The [e,h) span
494 2 : // does not cover any key < dog, so we cannot find the span by positioning
495 2 : // the child iterators using a SeekLT(dog).
496 2 : //
497 2 : // Instead, if we take all the child iterators' spans bounds:
498 2 : // a b c d e h k p
499 2 : // We want to partition them into keys < `key` and keys ≥ `key`.
500 2 : // dog
501 2 : // │
502 2 : // a b c d│e h k p
503 2 : // │
504 2 : // The largest key on the left of the partition forms the merged span's
505 2 : // start key, and the smallest key on the right of the partition forms the
506 2 : // merged span's end key. Recharacterized:
507 2 : //
508 2 : // M.Start: the largest boundary < k of any child span
509 2 : // M.End: the smallest boundary ≥ k of any child span
510 2 : //
511 2 : // The FragmentIterator interface doesn't implement seeking by all bounds,
512 2 : // it implements seeking by containment. A SeekGE(k) will ensure we observe
513 2 : // all start boundaries ≥ k and all end boundaries > k but does not ensure
514 2 : // we observe end boundaries = k or any boundaries < k. A SeekLT(k) will
515 2 : // ensure we observe all start boundaries < k and all end boundaries ≤ k but
516 2 : // does not ensure we observe any start boundaries = k or any boundaries >
517 2 : // k. This forces us to seek in one direction and step in the other.
518 2 : //
519 2 : // In a SeekLT, we want to end up oriented in the backward direction when
520 2 : // complete, so we begin with searching for M.End by SeekGE-ing every
521 2 : // child iterator to `k`. For every child span found, we determine the
522 2 : // smallest bound ≥ `k` and use it to initialize our min heap. The resulting
523 2 : // root of the min heap is a preliminary value for `M.End`.
524 2 : for i := range m.levels {
525 2 : l := &m.levels[i]
526 2 : s, err := l.iter.SeekGE(key)
527 2 : switch {
528 1 : case err != nil:
529 1 : return nil, err
530 2 : case s == nil:
531 2 : l.heapKey = boundKey{kind: boundKindInvalid}
532 2 : case m.comparer.Compare(s.Start, key) >= 0:
533 2 : l.heapKey = boundKey{
534 2 : kind: boundKindFragmentStart,
535 2 : key: s.Start,
536 2 : span: s,
537 2 : }
538 2 : default:
539 2 : // s.Start < key
540 2 : // We need to use this span's end bound, since that's the smallest
541 2 : // bound > key.
542 2 : l.heapKey = boundKey{
543 2 : kind: boundKindFragmentEnd,
544 2 : key: s.End,
545 2 : span: s,
546 2 : }
547 : }
548 : }
549 2 : m.initMinHeap()
550 2 : if len(m.heap.items) == 0 {
551 2 : // There are no spans covering any key ≥ `key`. There is no span that
552 2 : // straddles the seek key. Reorient the heap into a max heap and return
553 2 : // the first span we find in the reverse direction.
554 2 : if err := m.switchToMaxHeap(); err != nil {
555 0 : return nil, err
556 0 : }
557 2 : return m.findPrevFragmentSet()
558 : }
559 :
560 : // The heap root is now the smallest boundary key b such that:
561 : // 1. b > k
562 : // 2. b = k, and b is a start boundary
563 : // There's a third case that we will need to consider later, after we've
564 : // switched to a max heap:
565 : // 3. there exists an end boundary key b such that b = k.
566 : // An end boundary key equal to k would not be surfaced when we seeked all
567 : // the levels using SeekGE(k), since k would not be contained within the
568 : // exclusive end boundary.
569 : //
570 : // Assume that the tightest boundary ≥ k is the current heap root (cases 1 &
571 : // 2). After we switch to a max heap, we'll check for the third case and
572 : // adjust the end boundary if necessary.
573 2 : m.end = m.heap.items[0].boundKey.key
574 2 :
575 2 : // Before switching the direction of the heap, save a copy of the end
576 2 : // boundary if it's the start boundary of some child span. Prev-ing the
577 2 : // child iterator might switch files and invalidate the memory of the bound.
578 2 : if m.heap.items[0].boundKey.kind == boundKindFragmentStart {
579 2 : m.buf = append(m.buf[:0], m.end...)
580 2 : m.end = m.buf
581 2 : }
582 :
583 : // Switch to a max heap. This will move each level to the previous bound in
584 : // every level, and then establish a max heap. This allows us to obtain the
585 : // largest boundary key < `key`, which will serve as our candidate start
586 : // bound.
587 2 : if err := m.switchToMaxHeap(); err != nil {
588 0 : return nil, err
589 2 : } else if len(m.heap.items) == 0 {
590 2 : return nil, nil
591 2 : }
592 : // Check for the case 3 described above. It's possible that when we switch
593 : // heap directions, we discover an end boundary of some child span that is
594 : // equal to the seek key `key`. In this case, we want this key to be our end
595 : // boundary.
596 2 : if m.heap.items[0].boundKey.kind == boundKindFragmentEnd &&
597 2 : m.comparer.Equal(m.heap.items[0].boundKey.key, key) {
598 2 : // Call findPrevFragmentSet, which will set m.end to the heap root and
599 2 : // proceed backwards.
600 2 : return m.findPrevFragmentSet()
601 2 : }
602 :
603 2 : m.start = m.heap.items[0].boundKey.key
604 2 : if found, s, err := m.synthesizeKeys(-1); err != nil {
605 0 : return nil, err
606 2 : } else if found && s != nil {
607 2 : return s, nil
608 2 : }
609 2 : return m.findPrevFragmentSet()
610 : }
611 :
612 : // First seeks the iterator to the first span.
613 2 : func (m *MergingIter) First() (*keyspan.Span, error) {
614 2 : for i := range m.levels {
615 2 : s, err := m.levels[i].iter.First()
616 2 : switch {
617 1 : case err != nil:
618 1 : return nil, err
619 2 : case s == nil:
620 2 : m.levels[i].heapKey = boundKey{kind: boundKindInvalid}
621 2 : default:
622 2 : m.levels[i].heapKey = boundKey{
623 2 : kind: boundKindFragmentStart,
624 2 : key: s.Start,
625 2 : span: s,
626 2 : }
627 : }
628 : }
629 2 : m.initMinHeap()
630 2 : return m.findNextFragmentSet()
631 : }
632 :
633 : // Last seeks the iterator to the last span.
634 2 : func (m *MergingIter) Last() (*keyspan.Span, error) {
635 2 : for i := range m.levels {
636 2 : s, err := m.levels[i].iter.Last()
637 2 : switch {
638 1 : case err != nil:
639 1 : return nil, err
640 1 : case s == nil:
641 1 : m.levels[i].heapKey = boundKey{kind: boundKindInvalid}
642 2 : default:
643 2 : m.levels[i].heapKey = boundKey{
644 2 : kind: boundKindFragmentEnd,
645 2 : key: s.End,
646 2 : span: s,
647 2 : }
648 : }
649 : }
650 2 : m.initMaxHeap()
651 2 : return m.findPrevFragmentSet()
652 : }
653 :
654 : // Next advances the iterator to the next span.
655 2 : func (m *MergingIter) Next() (*keyspan.Span, error) {
656 2 : if m.dir == +1 && (m.end == nil || m.start == nil) {
657 0 : return nil, nil
658 0 : }
659 2 : if m.dir != +1 {
660 2 : if err := m.switchToMinHeap(); err != nil {
661 0 : return nil, err
662 0 : }
663 : }
664 2 : return m.findNextFragmentSet()
665 : }
666 :
667 : // Prev advances the iterator to the previous span.
668 2 : func (m *MergingIter) Prev() (*keyspan.Span, error) {
669 2 : if m.dir == -1 && (m.end == nil || m.start == nil) {
670 0 : return nil, nil
671 0 : }
672 2 : if m.dir != -1 {
673 2 : if err := m.switchToMaxHeap(); err != nil {
674 0 : return nil, err
675 0 : }
676 : }
677 2 : return m.findPrevFragmentSet()
678 : }
679 :
680 : // SetContext is part of the FragmentIterator interface.
681 0 : func (m *MergingIter) SetContext(ctx context.Context) {
682 0 : for i := range m.levels {
683 0 : m.levels[i].iter.SetContext(ctx)
684 0 : }
685 : }
686 :
687 : // Close closes the iterator, releasing all acquired resources.
688 2 : func (m *MergingIter) Close() {
689 2 : for i := range m.levels {
690 2 : m.levels[i].iter.Close()
691 2 : }
692 2 : m.levels = nil
693 2 : m.heap.items = m.heap.items[:0]
694 : }
695 :
696 : // String implements fmt.Stringer.
697 0 : func (m *MergingIter) String() string {
698 0 : return "merging-keyspan"
699 0 : }
700 :
701 2 : func (m *MergingIter) initMinHeap() {
702 2 : m.dir = +1
703 2 : m.heap.reverse = false
704 2 : m.initHeap()
705 2 : }
706 :
707 2 : func (m *MergingIter) initMaxHeap() {
708 2 : m.dir = -1
709 2 : m.heap.reverse = true
710 2 : m.initHeap()
711 2 : }
712 :
713 2 : func (m *MergingIter) initHeap() {
714 2 : m.heap.items = m.heap.items[:0]
715 2 : for i := range m.levels {
716 2 : if l := &m.levels[i]; l.heapKey.kind != boundKindInvalid {
717 2 : m.heap.items = append(m.heap.items, mergingIterItem{
718 2 : index: i,
719 2 : boundKey: &l.heapKey,
720 2 : })
721 2 : }
722 : }
723 2 : m.heap.init()
724 : }
725 :
726 2 : func (m *MergingIter) switchToMinHeap() error {
727 2 : // switchToMinHeap reorients the heap for forward iteration, without moving
728 2 : // the current MergingIter position.
729 2 :
730 2 : // The iterator is currently positioned at the span [m.start, m.end),
731 2 : // oriented in the reverse direction, so each level's iterator is positioned
732 2 : // to the largest key ≤ m.start. To reorient in the forward direction, we
733 2 : // must advance each level's iterator to the smallest key ≥ m.end. Consider
734 2 : // this three-level example.
735 2 : //
736 2 : // i0: b---d e-----h
737 2 : // i1: a---c h-----k
738 2 : // i2: a------------------------------p
739 2 : //
740 2 : // merged: a-b-c-d-e-----h-----k----------p
741 2 : //
742 2 : // If currently positioned at the merged span [c,d), then the level
743 2 : // iterators' heap keys are:
744 2 : //
745 2 : // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p))
746 2 : //
747 2 : // Reversing the heap should not move the merging iterator and should not
748 2 : // change the current [m.start, m.end) bounds. It should only prepare for
749 2 : // forward iteration by updating the child iterators' heap keys to:
750 2 : //
751 2 : // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p))
752 2 : //
753 2 : // In every level the first key ≥ m.end is the next in the iterator.
754 2 : // Justification: Suppose not and a level iterator's next key was some key k
755 2 : // such that k < m.end. The max-heap invariant dictates that the current
756 2 : // iterator position is the largest entry with a user key ≥ m.start. This
757 2 : // means k > m.start. We started with the assumption that k < m.end, so
758 2 : // m.start < k < m.end. But then k is between our current span bounds,
759 2 : // and reverse iteration would have constructed the current interval to be
760 2 : // [k, m.end) not [m.start, m.end).
761 2 :
762 2 : if invariants.Enabled {
763 2 : for i := range m.levels {
764 2 : l := &m.levels[i]
765 2 : if l.heapKey.kind != boundKindInvalid && m.comparer.Compare(l.heapKey.key, m.start) > 0 {
766 0 : panic("pebble: invariant violation: max-heap key > m.start")
767 : }
768 : }
769 : }
770 :
771 2 : for i := range m.levels {
772 2 : if err := m.levels[i].next(); err != nil {
773 1 : return err
774 1 : }
775 : }
776 2 : m.initMinHeap()
777 2 : return nil
778 : }
779 :
780 2 : func (m *MergingIter) switchToMaxHeap() error {
781 2 : // switchToMaxHeap reorients the heap for reverse iteration, without moving
782 2 : // the current MergingIter position.
783 2 :
784 2 : // The iterator is currently positioned at the span [m.start, m.end),
785 2 : // oriented in the forward direction. Each level's iterator is positioned at
786 2 : // the smallest bound ≥ m.end. To reorient in the reverse direction, we must
787 2 : // move each level's iterator to the largest key ≤ m.start. Consider this
788 2 : // three-level example.
789 2 : //
790 2 : // i0: b---d e-----h
791 2 : // i1: a---c h-----k
792 2 : // i2: a------------------------------p
793 2 : //
794 2 : // merged: a-b-c-d-e-----h-----k----------p
795 2 : //
796 2 : // If currently positioned at the merged span [c,d), then the level
797 2 : // iterators' heap keys are:
798 2 : //
799 2 : // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p))
800 2 : //
801 2 : // Reversing the heap should not move the merging iterator and should not
802 2 : // change the current [m.start, m.end) bounds. It should only prepare for
803 2 : // reverse iteration by updating the child iterators' heap keys to:
804 2 : //
805 2 : // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p))
806 2 : //
807 2 : // In every level the largest key ≤ m.start is the prev in the iterator.
808 2 : // Justification: Suppose not and a level iterator's prev key was some key k
809 2 : // such that k > m.start. The min-heap invariant dictates that the current
810 2 : // iterator position is the smallest entry with a user key ≥ m.end. This
811 2 : // means k < m.end, otherwise the iterator would be positioned at k. We
812 2 : // started with the assumption that k > m.start, so m.start < k < m.end. But
813 2 : // then k is between our current span bounds, and reverse iteration
814 2 : // would have constructed the current interval to be [m.start, k) not
815 2 : // [m.start, m.end).
816 2 :
817 2 : if invariants.Enabled {
818 2 : for i := range m.levels {
819 2 : l := &m.levels[i]
820 2 : if l.heapKey.kind != boundKindInvalid && m.comparer.Compare(l.heapKey.key, m.end) < 0 {
821 0 : panic("pebble: invariant violation: min-heap key < m.end")
822 : }
823 : }
824 : }
825 :
826 2 : for i := range m.levels {
827 2 : if err := m.levels[i].prev(); err != nil {
828 0 : return err
829 0 : }
830 : }
831 2 : m.initMaxHeap()
832 2 : return nil
833 : }
834 :
835 2 : func (m *MergingIter) findNextFragmentSet() (*keyspan.Span, error) {
836 2 : // Each iteration of this loop considers a new merged span between unique
837 2 : // user keys. An iteration may find that there exists no overlap for a given
838 2 : // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the
839 2 : // below loop will still consider [b,d) before continuing to [d, e)). It
840 2 : // returns when it finds a span that is covered by at least one key.
841 2 :
842 2 : for m.heap.len() > 0 {
843 2 : // Initialize the next span's start bound. SeekGE and First prepare the
844 2 : // heap without advancing. Next leaves the heap in a state such that the
845 2 : // root is the smallest bound key equal to the returned span's end key,
846 2 : // so the heap is already positioned at the next merged span's start key.
847 2 :
848 2 : // NB: m.heapRoot() might be either an end boundary OR a start boundary
849 2 : // of a level's span. Both end and start boundaries may still be a start
850 2 : // key of a span in the set of fragmented spans returned by MergingIter.
851 2 : // Consider the scenario:
852 2 : // a----------l #1
853 2 : // b-----------m #2
854 2 : //
855 2 : // The merged, fully-fragmented spans that MergingIter exposes to the caller
856 2 : // have bounds:
857 2 : // a-b #1
858 2 : // b--------l #1
859 2 : // b--------l #2
860 2 : // l-m #2
861 2 : //
862 2 : // When advancing to l-m#2, we must set m.start to 'l', which originated
863 2 : // from [a,l)#1's end boundary.
864 2 : m.start = m.heap.items[0].boundKey.key
865 2 :
866 2 : // Before calling nextEntry, consider whether it might invalidate our
867 2 : // start boundary. If the start boundary key originated from an end
868 2 : // boundary, then we need to copy the start key before advancing the
869 2 : // underlying iterator to the next Span.
870 2 : if m.heap.items[0].boundKey.kind == boundKindFragmentEnd {
871 2 : m.buf = append(m.buf[:0], m.start...)
872 2 : m.start = m.buf
873 2 : }
874 :
875 : // There may be many entries all with the same user key. Spans in other
876 : // levels may also start or end at this same user key. For eg:
877 : // L1: [a, c) [c, d)
878 : // L2: [c, e)
879 : // If we're positioned at L1's end(c) end boundary, we want to advance
880 : // to the first bound > c.
881 2 : if err := m.nextEntry(); err != nil {
882 1 : return nil, err
883 1 : }
884 2 : for len(m.heap.items) > 0 && m.comparer.Equal(m.heapRoot(), m.start) {
885 2 : if err := m.nextEntry(); err != nil {
886 0 : return nil, err
887 0 : }
888 : }
889 2 : if len(m.heap.items) == 0 {
890 2 : break
891 : }
892 :
893 : // The current entry at the top of the heap is the first key > m.start.
894 : // It must become the end bound for the span we will return to the user.
895 : // In the above example, the root of the heap is L1's end(d).
896 2 : m.end = m.heap.items[0].boundKey.key
897 2 :
898 2 : // Each level within m.levels may have a span that overlaps the
899 2 : // fragmented key span [m.start, m.end). Update m.keys to point to them
900 2 : // and sort them by kind, sequence number. There may not be any keys
901 2 : // defined over [m.start, m.end) if we're between the end of one span
902 2 : // and the start of the next, OR if the configured transform filters any
903 2 : // keys out. We allow empty spans that were emitted by child iterators, but
904 2 : // we elide empty spans created by the mergingIter itself that don't overlap
905 2 : // with any child iterator returned spans (i.e. empty spans that bridge two
906 2 : // distinct child-iterator-defined spans).
907 2 : if found, s, err := m.synthesizeKeys(+1); err != nil {
908 0 : return nil, err
909 2 : } else if found && s != nil {
910 2 : return s, nil
911 2 : }
912 : }
913 : // Exhausted.
914 2 : m.clear()
915 2 : return nil, nil
916 : }
917 :
918 2 : func (m *MergingIter) findPrevFragmentSet() (*keyspan.Span, error) {
919 2 : // Each iteration of this loop considers a new merged span between unique
920 2 : // user keys. An iteration may find that there exists no overlap for a given
921 2 : // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the
922 2 : // below loop will still consider [b,d) before continuing to [a, b)). It
923 2 : // returns when it finds a span that is covered by at least one key.
924 2 :
925 2 : for m.heap.len() > 0 {
926 2 : // Initialize the next span's end bound. SeekLT and Last prepare the
927 2 : // heap without advancing. Prev leaves the heap in a state such that the
928 2 : // root is the largest bound key equal to the returned span's start key,
929 2 : // so the heap is already positioned at the next merged span's end key.
930 2 :
931 2 : // NB: m.heapRoot() might be either an end boundary OR a start boundary
932 2 : // of a level's span. Both end and start boundaries may still be a start
933 2 : // key of a span returned by MergingIter. Consider the scenario:
934 2 : // a----------l #2
935 2 : // b-----------m #1
936 2 : //
937 2 : // The merged, fully-fragmented spans that MergingIter exposes to the caller
938 2 : // have bounds:
939 2 : // a-b #2
940 2 : // b--------l #2
941 2 : // b--------l #1
942 2 : // l-m #1
943 2 : //
944 2 : // When Preving to a-b#2, we must set m.end to 'b', which originated
945 2 : // from [b,m)#1's start boundary.
946 2 : m.end = m.heap.items[0].boundKey.key
947 2 :
948 2 : // Before calling prevEntry, consider whether it might invalidate our
949 2 : // end boundary. If the end boundary key originated from a start
950 2 : // boundary, then we need to copy the end key before advancing the
951 2 : // underlying iterator to the previous Span.
952 2 : if m.heap.items[0].boundKey.kind == boundKindFragmentStart {
953 2 : m.buf = append(m.buf[:0], m.end...)
954 2 : m.end = m.buf
955 2 : }
956 :
957 : // There may be many entries all with the same user key. Spans in other
958 : // levels may also start or end at this same user key. For eg:
959 : // L1: [a, c) [c, d)
960 : // L2: [c, e)
961 : // If we're positioned at L1's start(c) start boundary, we want to prev
962 : // to move to the first bound < c.
963 2 : if err := m.prevEntry(); err != nil {
964 1 : return nil, err
965 1 : }
966 2 : for len(m.heap.items) > 0 && m.comparer.Equal(m.heapRoot(), m.end) {
967 2 : if err := m.prevEntry(); err != nil {
968 0 : return nil, err
969 0 : }
970 : }
971 2 : if len(m.heap.items) == 0 {
972 2 : break
973 : }
974 :
975 : // The current entry at the top of the heap is the first key < m.end.
976 : // It must become the start bound for the span we will return to the
977 : // user. In the above example, the root of the heap is L1's start(a).
978 2 : m.start = m.heap.items[0].boundKey.key
979 2 :
980 2 : // Each level within m.levels may have a set of keys that overlap the
981 2 : // fragmented key span [m.start, m.end). Update m.keys to point to them
982 2 : // and sort them by kind, sequence number. There may not be any keys
983 2 : // spanning [m.start, m.end) if we're between the end of one span and
984 2 : // the start of the next, OR if the configured transform filters any
985 2 : // keys out. We allow empty spans that were emitted by child iterators, but
986 2 : // we elide empty spans created by the mergingIter itself that don't overlap
987 2 : // with any child iterator returned spans (i.e. empty spans that bridge two
988 2 : // distinct child-iterator-defined spans).
989 2 : if found, s, err := m.synthesizeKeys(-1); err != nil {
990 0 : return nil, err
991 2 : } else if found && s != nil {
992 2 : return s, nil
993 2 : }
994 : }
995 : // Exhausted.
996 2 : m.clear()
997 2 : return nil, nil
998 : }
999 :
1000 2 : func (m *MergingIter) heapRoot() []byte {
1001 2 : return m.heap.items[0].boundKey.key
1002 2 : }
1003 :
1004 : // synthesizeKeys is called by find{Next,Prev}FragmentSet to populate and
1005 : // sort the set of keys overlapping [m.start, m.end).
1006 : //
1007 : // During forward iteration, if the current heap item is a fragment end,
1008 : // then the fragment's start must be ≤ m.start and the fragment overlaps the
1009 : // current iterator position of [m.start, m.end).
1010 : //
1011 : // During reverse iteration, if the current heap item is a fragment start,
1012 : // then the fragment's end must be ≥ m.end and the fragment overlaps the
1013 : // current iteration position of [m.start, m.end).
1014 : //
1015 : // The boolean return value, `found`, is true if the returned span overlaps
1016 : // with a span returned by a child iterator.
1017 2 : func (m *MergingIter) synthesizeKeys(dir int8) (bool, *keyspan.Span, error) {
1018 2 : if invariants.Enabled {
1019 2 : if m.comparer.Compare(m.start, m.end) >= 0 {
1020 0 : panic(fmt.Sprintf("pebble: invariant violation: span start ≥ end: %s >= %s", m.start, m.end))
1021 : }
1022 : }
1023 :
1024 2 : m.keys = m.keys[:0]
1025 2 : found := false
1026 2 : for i := range m.levels {
1027 2 : if dir == +1 && m.levels[i].heapKey.kind == boundKindFragmentEnd ||
1028 2 : dir == -1 && m.levels[i].heapKey.kind == boundKindFragmentStart {
1029 2 : m.keys = append(m.keys, m.levels[i].heapKey.span.Keys...)
1030 2 : found = true
1031 2 : }
1032 : }
1033 : // Sort the keys by sequence number in descending order.
1034 : //
1035 : // TODO(jackson): We should be able to remove this sort and instead
1036 : // guarantee that we'll return keys in the order of the levels they're from.
1037 : // With careful iterator construction, this would guarantee that they're
1038 : // sorted by trailer descending for the range key iteration use case.
1039 2 : slices.SortFunc(m.keys, func(a, b keyspan.Key) int {
1040 2 : return cmp.Compare(b.Trailer, a.Trailer)
1041 2 : })
1042 :
1043 : // Apply the configured transform. See VisibleTransform.
1044 2 : m.span = keyspan.Span{
1045 2 : Start: m.start,
1046 2 : End: m.end,
1047 2 : Keys: m.keys,
1048 2 : KeysOrder: keyspan.ByTrailerDesc,
1049 2 : }
1050 2 : if err := m.transformer.Transform(m.comparer.CompareSuffixes, m.span, &m.span); err != nil {
1051 0 : return false, nil, err
1052 0 : }
1053 2 : return found, &m.span, nil
1054 : }
1055 :
1056 2 : func (m *MergingIter) clear() {
1057 2 : for fi := range m.keys {
1058 2 : m.keys[fi] = keyspan.Key{}
1059 2 : }
1060 2 : m.keys = m.keys[:0]
1061 : }
1062 :
1063 : // nextEntry steps to the next entry.
1064 2 : func (m *MergingIter) nextEntry() error {
1065 2 : l := &m.levels[m.heap.items[0].index]
1066 2 : if err := l.next(); err != nil {
1067 1 : return err
1068 1 : }
1069 2 : if !l.heapKey.valid() {
1070 2 : // l.iter is exhausted.
1071 2 : m.heap.pop()
1072 2 : return nil
1073 2 : }
1074 :
1075 2 : if m.heap.len() > 1 {
1076 2 : m.heap.fix(0)
1077 2 : }
1078 2 : return nil
1079 : }
1080 :
1081 : // prevEntry steps to the previous entry.
1082 2 : func (m *MergingIter) prevEntry() error {
1083 2 : l := &m.levels[m.heap.items[0].index]
1084 2 : if err := l.prev(); err != nil {
1085 1 : return err
1086 1 : }
1087 2 : if !l.heapKey.valid() {
1088 2 : // l.iter is exhausted.
1089 2 : m.heap.pop()
1090 2 : return nil
1091 2 : }
1092 :
1093 2 : if m.heap.len() > 1 {
1094 2 : m.heap.fix(0)
1095 2 : }
1096 2 : return nil
1097 : }
1098 :
1099 : // DebugString returns a string representing the current internal state of the
1100 : // merging iterator and its heap for debugging purposes.
1101 0 : func (m *MergingIter) DebugString() string {
1102 0 : var buf bytes.Buffer
1103 0 : fmt.Fprintf(&buf, "Current bounds: [%q, %q)\n", m.start, m.end)
1104 0 : for i := range m.levels {
1105 0 : fmt.Fprintf(&buf, "%d: heap key %s\n", i, m.levels[i].heapKey)
1106 0 : }
1107 0 : return buf.String()
1108 : }
1109 :
1110 : // WrapChildren implements FragmentIterator.
1111 0 : func (m *MergingIter) WrapChildren(wrap keyspan.WrapFn) {
1112 0 : for i := range m.levels {
1113 0 : m.levels[i].iter = wrap(m.levels[i].iter)
1114 0 : }
1115 0 : m.wrapFn = wrap
1116 : }
1117 :
1118 : // DebugTree is part of the FragmentIterator interface.
1119 0 : func (m *MergingIter) DebugTree(tp treeprinter.Node) {
1120 0 : n := tp.Childf("%T(%p)", m, m)
1121 0 : for i := range m.levels {
1122 0 : if iter := m.levels[i].iter; iter != nil {
1123 0 : m.levels[i].iter.DebugTree(n)
1124 0 : }
1125 : }
1126 : }
1127 :
1128 : type mergingIterItem struct {
1129 : // boundKey points to the corresponding mergingIterLevel's `iterKey`.
1130 : *boundKey
1131 : // index is the index of this level within the MergingIter's levels field.
1132 : index int
1133 : }
1134 :
1135 : // mergingIterHeap is copied from mergingIterHeap defined in the root pebble
1136 : // package for use with point keys.
1137 :
1138 : type mergingIterHeap struct {
1139 : cmp base.Compare
1140 : reverse bool
1141 : items []mergingIterItem
1142 : }
1143 :
1144 2 : func (h *mergingIterHeap) len() int {
1145 2 : return len(h.items)
1146 2 : }
1147 :
1148 2 : func (h *mergingIterHeap) less(i, j int) bool {
1149 2 : // This key comparison only uses the user key and not the boundKind. Bound
1150 2 : // kind doesn't matter because when stepping over a user key,
1151 2 : // findNextFragmentSet and findPrevFragmentSet skip past all heap items with
1152 2 : // that user key, and makes no assumptions on ordering. All other heap
1153 2 : // examinations only consider the user key.
1154 2 : ik, jk := h.items[i].key, h.items[j].key
1155 2 : c := h.cmp(ik, jk)
1156 2 : if h.reverse {
1157 2 : return c > 0
1158 2 : }
1159 2 : return c < 0
1160 : }
1161 :
1162 2 : func (h *mergingIterHeap) swap(i, j int) {
1163 2 : h.items[i], h.items[j] = h.items[j], h.items[i]
1164 2 : }
1165 :
1166 : // init, fix, up and down are copied from the go stdlib.
1167 2 : func (h *mergingIterHeap) init() {
1168 2 : // heapify
1169 2 : n := h.len()
1170 2 : for i := n/2 - 1; i >= 0; i-- {
1171 2 : h.down(i, n)
1172 2 : }
1173 : }
1174 :
1175 2 : func (h *mergingIterHeap) fix(i int) {
1176 2 : if !h.down(i, h.len()) {
1177 2 : h.up(i)
1178 2 : }
1179 : }
1180 :
1181 2 : func (h *mergingIterHeap) pop() *mergingIterItem {
1182 2 : n := h.len() - 1
1183 2 : h.swap(0, n)
1184 2 : h.down(0, n)
1185 2 : item := &h.items[n]
1186 2 : h.items = h.items[:n]
1187 2 : return item
1188 2 : }
1189 :
1190 2 : func (h *mergingIterHeap) up(j int) {
1191 2 : for {
1192 2 : i := (j - 1) / 2 // parent
1193 2 : if i == j || !h.less(j, i) {
1194 2 : break
1195 : }
1196 0 : h.swap(i, j)
1197 0 : j = i
1198 : }
1199 : }
1200 :
1201 2 : func (h *mergingIterHeap) down(i0, n int) bool {
1202 2 : i := i0
1203 2 : for {
1204 2 : j1 := 2*i + 1
1205 2 : if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
1206 2 : break
1207 : }
1208 2 : j := j1 // left child
1209 2 : if j2 := j1 + 1; j2 < n && h.less(j2, j1) {
1210 2 : j = j2 // = 2*i + 2 // right child
1211 2 : }
1212 2 : if !h.less(j, i) {
1213 2 : break
1214 : }
1215 2 : h.swap(i, j)
1216 2 : i = j
1217 : }
1218 2 : return i > i0
1219 : }
1220 :
1221 : type boundKind int8
1222 :
1223 : const (
1224 : boundKindInvalid boundKind = iota
1225 : boundKindFragmentStart
1226 : boundKindFragmentEnd
1227 : )
1228 :
1229 : type boundKey struct {
1230 : kind boundKind
1231 : key []byte
1232 : // span holds the span the bound key comes from.
1233 : //
1234 : // If kind is boundKindFragmentStart, then key is span.Start. If kind is
1235 : // boundKindFragmentEnd, then key is span.End.
1236 : span *keyspan.Span
1237 : }
1238 :
1239 2 : func (k boundKey) valid() bool {
1240 2 : return k.kind != boundKindInvalid
1241 2 : }
1242 :
1243 0 : func (k boundKey) String() string {
1244 0 : var buf bytes.Buffer
1245 0 : switch k.kind {
1246 0 : case boundKindInvalid:
1247 0 : fmt.Fprint(&buf, "invalid")
1248 0 : case boundKindFragmentStart:
1249 0 : fmt.Fprint(&buf, "fragment-start")
1250 0 : case boundKindFragmentEnd:
1251 0 : fmt.Fprint(&buf, "fragment-end ")
1252 0 : default:
1253 0 : fmt.Fprintf(&buf, "unknown-kind(%d)", k.kind)
1254 : }
1255 0 : fmt.Fprintf(&buf, " %s [", k.key)
1256 0 : fmt.Fprintf(&buf, "%s", k.span)
1257 0 : fmt.Fprint(&buf, "]")
1258 0 : return buf.String()
1259 : }
|