Line data Source code
1 : // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspan
6 :
7 : import (
8 : "bytes"
9 : "fmt"
10 : "sort"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/internal/manifest"
15 : )
16 :
17 : // TODO(jackson): Consider implementing an optimization to seek lower levels
18 : // past higher levels' RANGEKEYDELs. This would be analaogous to the
19 : // optimization pebble.mergingIter performs for RANGEDELs during point key
20 : // seeks. It may not be worth it, because range keys are rare and cascading
21 : // seeks would require introducing key comparisons to switchTo{Min,Max}Heap
22 : // where there currently are none.
23 :
24 : // TODO(jackson): There are several opportunities to use base.Equal in the
25 : // MergingIter implementation, but will require a bit of plumbing to thread the
26 : // Equal function.
27 :
28 : // MergingIter merges spans across levels of the LSM, exposing an iterator over
29 : // spans that yields sets of spans fragmented at unique user key boundaries.
30 : //
31 : // A MergingIter is initialized with an arbitrary number of child iterators over
32 : // fragmented spans. Each child iterator exposes fragmented key spans, such that
33 : // overlapping keys are surfaced in a single Span. Key spans from one child
34 : // iterator may overlap key spans from another child iterator arbitrarily.
35 : //
36 : // The spans combined by MergingIter will return spans with keys sorted by
37 : // trailer descending. If the MergingIter is configured with a Transformer, it's
38 : // permitted to modify the ordering of the spans' keys returned by MergingIter.
39 : //
40 : // # Algorithm
41 : //
42 : // The merging iterator wraps child iterators, merging and fragmenting spans
43 : // across levels. The high-level algorithm is:
44 : //
45 : // 1. Initialize the heap with bound keys from child iterators' spans.
46 : // 2. Find the next [or previous] two unique user keys' from bounds.
47 : // 3. Consider the span formed between the two unique user keys a candidate
48 : // span.
49 : // 4. Determine if any of the child iterators' spans overlap the candidate
50 : // span.
51 : // 4a. If any of the child iterator's current bounds are end keys
52 : // (during forward iteration) or start keys (during reverse
53 : // iteration), then all the spans with that bound overlap the
54 : // candidate span.
55 : // 4b. Apply the configured transform, which may remove keys.
56 : // 4c. If no spans overlap, forget the smallest (forward iteration)
57 : // or largest (reverse iteration) unique user key and advance
58 : // the iterators to the next unique user key. Start again from 3.
59 : //
60 : // # Detailed algorithm
61 : //
62 : // Each level (i0, i1, ...) has a user-provided input FragmentIterator. The
63 : // merging iterator steps through individual boundaries of the underlying
64 : // spans separately. If the underlying FragmentIterator has fragments
65 : // [a,b){#2,#1} [b,c){#1} the mergingIterLevel.{next,prev} step through:
66 : //
67 : // (a, start), (b, end), (b, start), (c, end)
68 : //
69 : // Note that (a, start) and (b, end) are observed ONCE each, despite two keys
70 : // sharing those bounds. Also note that (b, end) and (b, start) are two distinct
71 : // iterator positions of a mergingIterLevel.
72 : //
73 : // The merging iterator maintains a heap (min during forward iteration, max
74 : // during reverse iteration) containing the boundKeys. Each boundKey is a
75 : // 3-tuple holding the bound user key, whether the bound is a start or end key
76 : // and the set of keys from that level that have that bound. The heap orders
77 : // based on the boundKey's user key only.
78 : //
79 : // The merging iterator is responsible for merging spans across levels to
80 : // determine which span is next, but it's also responsible for fragmenting
81 : // overlapping spans. Consider the example:
82 : //
83 : // i0: b---d e-----h
84 : // i1: a---c h-----k
85 : // i2: a------------------------------p
86 : //
87 : // fragments: a-b-c-d-e-----h-----k----------p
88 : //
89 : // None of the individual child iterators contain a span with the exact bounds
90 : // [c,d), but the merging iterator must produce a span [c,d). To accomplish
91 : // this, the merging iterator visits every span between unique boundary user
92 : // keys. In the above example, this is:
93 : //
94 : // [a,b), [b,c), [c,d), [d,e), [e, h), [h, k), [k, p)
95 : //
96 : // The merging iterator first initializes the heap to prepare for iteration.
97 : // The description below discusses the mechanics of forward iteration after a
98 : // call to First, but the mechanics are similar for reverse iteration and
99 : // other positioning methods.
100 : //
101 : // During a call to First, the heap is initialized by seeking every
102 : // mergingIterLevel to the first bound of the first fragment. In the above
103 : // example, this seeks the child iterators to:
104 : //
105 : // i0: (b, boundKindFragmentStart, [ [b,d) ])
106 : // i1: (a, boundKindFragmentStart, [ [a,c) ])
107 : // i2: (a, boundKindFragmentStart, [ [a,p) ])
108 : //
109 : // After fixing up the heap, the root of the heap is a boundKey with the
110 : // smallest user key ('a' in the example). Once the heap is setup for iteration
111 : // in the appropriate direction and location, the merging iterator uses
112 : // find{Next,Prev}FragmentSet to find the next/previous span bounds.
113 : //
114 : // During forward iteration, the root of the heap's user key is the start key
115 : // key of next merged span. findNextFragmentSet sets m.start to this user
116 : // key. The heap may contain other boundKeys with the same user key if another
117 : // level has a fragment starting or ending at the same key, so the
118 : // findNextFragmentSet method pulls from the heap until it finds the first key
119 : // greater than m.start. This key is used as the end key.
120 : //
121 : // In the above example, this results in m.start = 'a', m.end = 'b' and child
122 : // iterators in the following positions:
123 : //
124 : // i0: (b, boundKindFragmentStart, [ [b,d) ])
125 : // i1: (c, boundKindFragmentEnd, [ [a,c) ])
126 : // i2: (p, boundKindFragmentEnd, [ [a,p) ])
127 : //
128 : // With the user key bounds of the next merged span established,
129 : // findNextFragmentSet must determine which, if any, fragments overlap the span.
130 : // During forward iteration any child iterator that is now positioned at an end
131 : // boundary has an overlapping span. (Justification: The child iterator's end
132 : // boundary is ≥ m.end. The corresponding start boundary must be ≤ m.start since
133 : // there were no other user keys between m.start and m.end. So the fragments
134 : // associated with the iterator's current end boundary have start and end bounds
135 : // such that start ≤ m.start < m.end ≤ end).
136 : //
137 : // findNextFragmentSet iterates over the levels, collecting keys from any child
138 : // iterators positioned at end boundaries. In the above example, i1 and i2 are
139 : // positioned at end boundaries, so findNextFragmentSet collects the keys of
140 : // [a,c) and [a,p). These spans contain the merging iterator's [m.start, m.end)
141 : // span, but they may also extend beyond the m.start and m.end. The merging
142 : // iterator returns the keys with the merging iter's m.start and m.end bounds,
143 : // preserving the underlying keys' sequence numbers, key kinds and values.
144 : //
145 : // A MergingIter is configured with a Transform that's applied to the span
146 : // before surfacing it to the iterator user. A Transform may remove keys
147 : // arbitrarily, but it may not modify the values themselves.
148 : //
149 : // It may be the case that findNextFragmentSet finds no levels positioned at end
150 : // boundaries, or that there are no spans remaining after applying a transform,
151 : // in which case the span [m.start, m.end) overlaps with nothing. In this case
152 : // findNextFragmentSet loops, repeating the above process again until it finds a
153 : // span that does contain keys.
154 : //
155 : // # Memory safety
156 : //
157 : // The FragmentIterator interface only guarantees stability of a Span and its
158 : // associated slices until the next positioning method is called. Adjacent Spans
159 : // may be contained in different sstables, requring the FragmentIterator
160 : // implementation to close one sstable, releasing its memory, before opening the
161 : // next. Most of the state used by the MergingIter is derived from spans at
162 : // current child iterator positions only, ensuring state is stable. The one
163 : // exception is the start bound during forward iteration and the end bound
164 : // during reverse iteration.
165 : //
166 : // If the heap root originates from an end boundary when findNextFragmentSet
167 : // begins, a Next on the heap root level may invalidate the end boundary. To
168 : // accommodate this, find{Next,Prev}FragmentSet copy the initial boundary if the
169 : // subsequent Next/Prev would move to the next span.
170 : type MergingIter struct {
171 : *MergingBuffers
172 : // start and end hold the bounds for the span currently under the
173 : // iterator position.
174 : //
175 : // Invariant: None of the levels' iterators contain spans with a bound
176 : // between start and end. For all bounds b, b ≤ start || b ≥ end.
177 : start, end []byte
178 :
179 : // transformer defines a transformation to be applied to a span before it's
180 : // yielded to the user. Transforming may filter individual keys contained
181 : // within the span.
182 : transformer Transformer
183 : // span holds the iterator's current span. This span is used as the
184 : // destination for transforms. Every tranformed span overwrites the
185 : // previous.
186 : span Span
187 : err error
188 : dir int8
189 :
190 : // alloc preallocates mergingIterLevel and mergingIterItems for use by the
191 : // merging iterator. As long as the merging iterator is used with
192 : // manifest.NumLevels+3 and fewer fragment iterators, the merging iterator
193 : // will not need to allocate upon initialization. The value NumLevels+3
194 : // mirrors the preallocated levels in iterAlloc used for point iterators.
195 : // Invariant: cap(levels) == cap(items)
196 : alloc struct {
197 : levels [manifest.NumLevels + 3]mergingIterLevel
198 : items [manifest.NumLevels + 3]mergingIterItem
199 : }
200 : }
201 :
202 : // MergingBuffers holds buffers used while merging keyspans.
203 : type MergingBuffers struct {
204 : // keys holds all of the keys across all levels that overlap the key span
205 : // [start, end), sorted by Trailer descending. This slice is reconstituted
206 : // in synthesizeKeys from each mergingIterLevel's keys every time the
207 : // [start, end) bounds change.
208 : //
209 : // Each element points into a child iterator's memory, so the keys may not
210 : // be directly modified.
211 : keys keysBySeqNumKind
212 : // levels holds levels allocated by MergingIter.init. The MergingIter will
213 : // prefer use of its `manifest.NumLevels+3` array, so this slice will be
214 : // longer if set.
215 : levels []mergingIterLevel
216 : wrapFn WrapFn
217 : // heap holds a slice for the merging iterator heap allocated by
218 : // MergingIter.init. The MergingIter will prefer use of its
219 : // `manifest.NumLevels+3` items array, so this slice will be longer if set.
220 : heap mergingIterHeap
221 : // buf is a buffer used to save [start, end) boundary keys.
222 : buf []byte
223 : }
224 :
225 : // PrepareForReuse discards any excessively large buffers.
226 1 : func (bufs *MergingBuffers) PrepareForReuse() {
227 1 : if cap(bufs.buf) > bufferReuseMaxCapacity {
228 0 : bufs.buf = nil
229 0 : }
230 : }
231 :
232 : // MergingIter implements the FragmentIterator interface.
233 : var _ FragmentIterator = (*MergingIter)(nil)
234 :
235 : type mergingIterLevel struct {
236 : iter FragmentIterator
237 :
238 : // heapKey holds the current key at this level for use within the heap.
239 : heapKey boundKey
240 : }
241 :
242 1 : func (l *mergingIterLevel) next() {
243 1 : if l.heapKey.kind == boundKindFragmentStart {
244 1 : l.heapKey = boundKey{
245 1 : kind: boundKindFragmentEnd,
246 1 : key: l.heapKey.span.End,
247 1 : span: l.heapKey.span,
248 1 : }
249 1 : return
250 1 : }
251 1 : if s := l.iter.Next(); s == nil {
252 1 : l.heapKey = boundKey{kind: boundKindInvalid}
253 1 : } else {
254 1 : l.heapKey = boundKey{
255 1 : kind: boundKindFragmentStart,
256 1 : key: s.Start,
257 1 : span: s,
258 1 : }
259 1 : }
260 : }
261 :
262 1 : func (l *mergingIterLevel) prev() {
263 1 : if l.heapKey.kind == boundKindFragmentEnd {
264 1 : l.heapKey = boundKey{
265 1 : kind: boundKindFragmentStart,
266 1 : key: l.heapKey.span.Start,
267 1 : span: l.heapKey.span,
268 1 : }
269 1 : return
270 1 : }
271 1 : if s := l.iter.Prev(); s == nil {
272 1 : l.heapKey = boundKey{kind: boundKindInvalid}
273 1 : } else {
274 1 : l.heapKey = boundKey{
275 1 : kind: boundKindFragmentEnd,
276 1 : key: s.End,
277 1 : span: s,
278 1 : }
279 1 : }
280 : }
281 :
282 : // Init initializes the merging iterator with the provided fragment iterators.
283 : func (m *MergingIter) Init(
284 : cmp base.Compare, transformer Transformer, bufs *MergingBuffers, iters ...FragmentIterator,
285 1 : ) {
286 1 : *m = MergingIter{
287 1 : MergingBuffers: bufs,
288 1 : transformer: transformer,
289 1 : }
290 1 : m.heap.cmp = cmp
291 1 : levels, items := m.levels, m.heap.items
292 1 :
293 1 : // Invariant: cap(levels) >= cap(items)
294 1 : // Invariant: cap(alloc.levels) == cap(alloc.items)
295 1 : if len(iters) <= len(m.alloc.levels) {
296 1 : // The slices allocated on the MergingIter struct are large enough.
297 1 : m.levels = m.alloc.levels[:len(iters)]
298 1 : m.heap.items = m.alloc.items[:0]
299 1 : } else if len(iters) <= cap(levels) {
300 0 : // The existing heap-allocated slices are large enough, so reuse them.
301 0 : m.levels = levels[:len(iters)]
302 0 : m.heap.items = items[:0]
303 1 : } else {
304 1 : // Heap allocate new slices.
305 1 : m.levels = make([]mergingIterLevel, len(iters))
306 1 : m.heap.items = make([]mergingIterItem, 0, len(iters))
307 1 : }
308 1 : for i := range m.levels {
309 1 : m.levels[i] = mergingIterLevel{iter: iters[i]}
310 1 : if m.wrapFn != nil {
311 0 : m.levels[i].iter = m.wrapFn(m.levels[i].iter)
312 0 : }
313 : }
314 : }
315 :
316 : // AddLevel adds a new level to the bottom of the merging iterator. AddLevel
317 : // must be called after Init and before any other method.
318 1 : func (m *MergingIter) AddLevel(iter FragmentIterator) {
319 1 : if m.wrapFn != nil {
320 0 : iter = m.wrapFn(iter)
321 0 : }
322 1 : m.levels = append(m.levels, mergingIterLevel{iter: iter})
323 : }
324 :
325 : // SeekGE moves the iterator to the first span covering a key greater than
326 : // or equal to the given key. This is equivalent to seeking to the first
327 : // span with an end key greater than the given key.
328 1 : func (m *MergingIter) SeekGE(key []byte) *Span {
329 1 : m.invalidate() // clear state about current position
330 1 :
331 1 : // SeekGE(k) seeks to the first span with an end key greater than the given
332 1 : // key. The merged span M that we're searching for might straddle the seek
333 1 : // `key`. In this case, the M.Start may be a key ≤ the seek key.
334 1 : //
335 1 : // Consider a SeekGE(dog) in the following example.
336 1 : //
337 1 : // i0: b---d e-----h
338 1 : // i1: a---c h-----k
339 1 : // i2: a------------------------------p
340 1 : // merged: a-b-c-d-e-----h-----k----------p
341 1 : //
342 1 : // The merged span M containing 'dog' is [d,e). The 'd' of the merged span
343 1 : // comes from i0's [b,d)'s end boundary. The [b,d) span does not cover any
344 1 : // key >= dog, so we cannot find the span by positioning the child iterators
345 1 : // using a SeekGE(dog).
346 1 : //
347 1 : // Instead, if we take all the child iterators' spans bounds:
348 1 : // a b c d e h k p
349 1 : // We want to partition them into keys ≤ `key` and keys > `key`.
350 1 : // dog
351 1 : // │
352 1 : // a b c d│e h k p
353 1 : // │
354 1 : // The largest key on the left of the partition forms the merged span's
355 1 : // start key, and the smallest key on the right of the partition forms the
356 1 : // merged span's end key. Recharacterized:
357 1 : //
358 1 : // M.Start: the largest boundary ≤ k of any child span
359 1 : // M.End: the smallest boundary > k of any child span
360 1 : //
361 1 : // The FragmentIterator interface doesn't implement seeking by all bounds,
362 1 : // it implements seeking by containment. A SeekGE(k) will ensure we observe
363 1 : // all start boundaries ≥ k and all end boundaries > k but does not ensure
364 1 : // we observe end boundaries = k or any boundaries < k. A SeekLT(k) will
365 1 : // ensure we observe all start boundaries < k and all end boundaries ≤ k but
366 1 : // does not ensure we observe any start boundaries = k or any boundaries >
367 1 : // k. This forces us to seek in one direction and step in the other.
368 1 : //
369 1 : // In a SeekGE, we want to end up oriented in the forward direction when
370 1 : // complete, so we begin with searching for M.Start by SeekLT-ing every
371 1 : // child iterator to `k`. For every child span found, we determine the
372 1 : // largest bound ≤ `k` and use it to initialize our max heap. The resulting
373 1 : // root of the max heap is a preliminary value for `M.Start`.
374 1 : for i := range m.levels {
375 1 : l := &m.levels[i]
376 1 : s := l.iter.SeekLT(key)
377 1 : if s == nil {
378 1 : l.heapKey = boundKey{kind: boundKindInvalid}
379 1 : } else if m.cmp(s.End, key) <= 0 {
380 1 : l.heapKey = boundKey{
381 1 : kind: boundKindFragmentEnd,
382 1 : key: s.End,
383 1 : span: s,
384 1 : }
385 1 : } else {
386 1 : // s.End > key && s.Start < key
387 1 : // We need to use this span's start bound, since that's the largest
388 1 : // bound ≤ key.
389 1 : l.heapKey = boundKey{
390 1 : kind: boundKindFragmentStart,
391 1 : key: s.Start,
392 1 : span: s,
393 1 : }
394 1 : }
395 : }
396 1 : m.initMaxHeap()
397 1 : if m.err != nil {
398 1 : return nil
399 1 : } else if len(m.heap.items) == 0 {
400 1 : // There are no spans covering any key < `key`. There is no span that
401 1 : // straddles the seek key. Reorient the heap into a min heap and return
402 1 : // the first span we find in the forward direction.
403 1 : m.switchToMinHeap()
404 1 : return m.findNextFragmentSet()
405 1 : }
406 :
407 : // The heap root is now the largest boundary key b such that:
408 : // 1. b < k
409 : // 2. b = k, and b is an end boundary
410 : // There's a third case that we will need to consider later, after we've
411 : // switched to a min heap:
412 : // 3. there exists a start boundary key b such that b = k.
413 : // A start boundary key equal to k would not be surfaced when we seeked all
414 : // the levels using SeekLT(k), since no key <k would be covered within a
415 : // span within an inclusive `k` start boundary.
416 : //
417 : // Assume that the tightest boundary ≤ k is the current heap root (cases 1 &
418 : // 2). After we switch to a min heap, we'll check for the third case and
419 : // adjust the start boundary if necessary.
420 1 : m.start = m.heap.items[0].boundKey.key
421 1 :
422 1 : // Before switching the direction of the heap, save a copy of the start
423 1 : // boundary if it's the end boundary of some child span. Next-ing the child
424 1 : // iterator might switch files and invalidate the memory of the bound.
425 1 : if m.heap.items[0].boundKey.kind == boundKindFragmentEnd {
426 1 : m.buf = append(m.buf[:0], m.start...)
427 1 : m.start = m.buf
428 1 : }
429 :
430 : // Switch to a min heap. This will move each level to the next bound in
431 : // every level, and then establish a min heap. This allows us to obtain the
432 : // smallest boundary key > `key`, which will serve as our candidate end
433 : // bound.
434 1 : m.switchToMinHeap()
435 1 : if m.err != nil {
436 1 : return nil
437 1 : } else if len(m.heap.items) == 0 {
438 1 : return nil
439 1 : }
440 :
441 : // Check for the case 3 described above. It's possible that when we switch
442 : // heap directions, we discover a start boundary of some child span that is
443 : // equal to the seek key `key`. In this case, we want this key to be our
444 : // start boundary.
445 1 : if m.heap.items[0].boundKey.kind == boundKindFragmentStart &&
446 1 : m.cmp(m.heap.items[0].boundKey.key, key) == 0 {
447 1 : // Call findNextFragmentSet, which will set m.start to the heap root and
448 1 : // proceed forward.
449 1 : return m.findNextFragmentSet()
450 1 : }
451 :
452 1 : m.end = m.heap.items[0].boundKey.key
453 1 : if found, s := m.synthesizeKeys(+1); found && s != nil {
454 1 : return s
455 1 : }
456 1 : return m.findNextFragmentSet()
457 :
458 : }
459 :
460 : // SeekLT moves the iterator to the last span covering a key less than the
461 : // given key. This is equivalent to seeking to the last span with a start
462 : // key less than the given key.
463 1 : func (m *MergingIter) SeekLT(key []byte) *Span {
464 1 : m.invalidate() // clear state about current position
465 1 :
466 1 : // SeekLT(k) seeks to the last span with a start key less than the given
467 1 : // key. The merged span M that we're searching for might straddle the seek
468 1 : // `key`. In this case, the M.End may be a key ≥ the seek key.
469 1 : //
470 1 : // Consider a SeekLT(dog) in the following example.
471 1 : //
472 1 : // i0: b---d e-----h
473 1 : // i1: a---c h-----k
474 1 : // i2: a------------------------------p
475 1 : // merged: a-b-c-d-e-----h-----k----------p
476 1 : //
477 1 : // The merged span M containing the largest key <'dog' is [d,e). The 'e' of
478 1 : // the merged span comes from i0's [e,h)'s start boundary. The [e,h) span
479 1 : // does not cover any key < dog, so we cannot find the span by positioning
480 1 : // the child iterators using a SeekLT(dog).
481 1 : //
482 1 : // Instead, if we take all the child iterators' spans bounds:
483 1 : // a b c d e h k p
484 1 : // We want to partition them into keys < `key` and keys ≥ `key`.
485 1 : // dog
486 1 : // │
487 1 : // a b c d│e h k p
488 1 : // │
489 1 : // The largest key on the left of the partition forms the merged span's
490 1 : // start key, and the smallest key on the right of the partition forms the
491 1 : // merged span's end key. Recharacterized:
492 1 : //
493 1 : // M.Start: the largest boundary < k of any child span
494 1 : // M.End: the smallest boundary ≥ k of any child span
495 1 : //
496 1 : // The FragmentIterator interface doesn't implement seeking by all bounds,
497 1 : // it implements seeking by containment. A SeekGE(k) will ensure we observe
498 1 : // all start boundaries ≥ k and all end boundaries > k but does not ensure
499 1 : // we observe end boundaries = k or any boundaries < k. A SeekLT(k) will
500 1 : // ensure we observe all start boundaries < k and all end boundaries ≤ k but
501 1 : // does not ensure we observe any start boundaries = k or any boundaries >
502 1 : // k. This forces us to seek in one direction and step in the other.
503 1 : //
504 1 : // In a SeekLT, we want to end up oriented in the backward direction when
505 1 : // complete, so we begin with searching for M.End by SeekGE-ing every
506 1 : // child iterator to `k`. For every child span found, we determine the
507 1 : // smallest bound ≥ `k` and use it to initialize our min heap. The resulting
508 1 : // root of the min heap is a preliminary value for `M.End`.
509 1 : for i := range m.levels {
510 1 : l := &m.levels[i]
511 1 : s := l.iter.SeekGE(key)
512 1 : if s == nil {
513 1 : l.heapKey = boundKey{kind: boundKindInvalid}
514 1 : } else if m.cmp(s.Start, key) >= 0 {
515 1 : l.heapKey = boundKey{
516 1 : kind: boundKindFragmentStart,
517 1 : key: s.Start,
518 1 : span: s,
519 1 : }
520 1 : } else {
521 1 : // s.Start < key
522 1 : // We need to use this span's end bound, since that's the smallest
523 1 : // bound > key.
524 1 : l.heapKey = boundKey{
525 1 : kind: boundKindFragmentEnd,
526 1 : key: s.End,
527 1 : span: s,
528 1 : }
529 1 : }
530 : }
531 1 : m.initMinHeap()
532 1 : if m.err != nil {
533 1 : return nil
534 1 : } else if len(m.heap.items) == 0 {
535 1 : // There are no spans covering any key ≥ `key`. There is no span that
536 1 : // straddles the seek key. Reorient the heap into a max heap and return
537 1 : // the first span we find in the reverse direction.
538 1 : m.switchToMaxHeap()
539 1 : return m.findPrevFragmentSet()
540 1 : }
541 :
542 : // The heap root is now the smallest boundary key b such that:
543 : // 1. b > k
544 : // 2. b = k, and b is a start boundary
545 : // There's a third case that we will need to consider later, after we've
546 : // switched to a max heap:
547 : // 3. there exists an end boundary key b such that b = k.
548 : // An end boundary key equal to k would not be surfaced when we seeked all
549 : // the levels using SeekGE(k), since k would not be contained within the
550 : // exclusive end boundary.
551 : //
552 : // Assume that the tightest boundary ≥ k is the current heap root (cases 1 &
553 : // 2). After we switch to a max heap, we'll check for the third case and
554 : // adjust the end boundary if necessary.
555 1 : m.end = m.heap.items[0].boundKey.key
556 1 :
557 1 : // Before switching the direction of the heap, save a copy of the end
558 1 : // boundary if it's the start boundary of some child span. Prev-ing the
559 1 : // child iterator might switch files and invalidate the memory of the bound.
560 1 : if m.heap.items[0].boundKey.kind == boundKindFragmentStart {
561 1 : m.buf = append(m.buf[:0], m.end...)
562 1 : m.end = m.buf
563 1 : }
564 :
565 : // Switch to a max heap. This will move each level to the previous bound in
566 : // every level, and then establish a max heap. This allows us to obtain the
567 : // largest boundary key < `key`, which will serve as our candidate start
568 : // bound.
569 1 : m.switchToMaxHeap()
570 1 : if m.err != nil {
571 1 : return nil
572 1 : } else if len(m.heap.items) == 0 {
573 1 : return nil
574 1 : }
575 : // Check for the case 3 described above. It's possible that when we switch
576 : // heap directions, we discover an end boundary of some child span that is
577 : // equal to the seek key `key`. In this case, we want this key to be our end
578 : // boundary.
579 1 : if m.heap.items[0].boundKey.kind == boundKindFragmentEnd &&
580 1 : m.cmp(m.heap.items[0].boundKey.key, key) == 0 {
581 1 : // Call findPrevFragmentSet, which will set m.end to the heap root and
582 1 : // proceed backwards.
583 1 : return m.findPrevFragmentSet()
584 1 : }
585 :
586 1 : m.start = m.heap.items[0].boundKey.key
587 1 : if found, s := m.synthesizeKeys(-1); found && s != nil {
588 1 : return s
589 1 : }
590 1 : return m.findPrevFragmentSet()
591 : }
592 :
593 : // First seeks the iterator to the first span.
594 1 : func (m *MergingIter) First() *Span {
595 1 : m.invalidate() // clear state about current position
596 1 : for i := range m.levels {
597 1 : if s := m.levels[i].iter.First(); s == nil {
598 1 : m.levels[i].heapKey = boundKey{kind: boundKindInvalid}
599 1 : } else {
600 1 : m.levels[i].heapKey = boundKey{
601 1 : kind: boundKindFragmentStart,
602 1 : key: s.Start,
603 1 : span: s,
604 1 : }
605 1 : }
606 : }
607 1 : m.initMinHeap()
608 1 : return m.findNextFragmentSet()
609 : }
610 :
611 : // Last seeks the iterator to the last span.
612 1 : func (m *MergingIter) Last() *Span {
613 1 : m.invalidate() // clear state about current position
614 1 : for i := range m.levels {
615 1 : if s := m.levels[i].iter.Last(); s == nil {
616 1 : m.levels[i].heapKey = boundKey{kind: boundKindInvalid}
617 1 : } else {
618 1 : m.levels[i].heapKey = boundKey{
619 1 : kind: boundKindFragmentEnd,
620 1 : key: s.End,
621 1 : span: s,
622 1 : }
623 1 : }
624 : }
625 1 : m.initMaxHeap()
626 1 : return m.findPrevFragmentSet()
627 : }
628 :
629 : // Next advances the iterator to the next span.
630 1 : func (m *MergingIter) Next() *Span {
631 1 : if m.err != nil {
632 0 : return nil
633 0 : }
634 1 : if m.dir == +1 && (m.end == nil || m.start == nil) {
635 0 : return nil
636 0 : }
637 1 : if m.dir != +1 {
638 1 : m.switchToMinHeap()
639 1 : }
640 1 : return m.findNextFragmentSet()
641 : }
642 :
643 : // Prev advances the iterator to the previous span.
644 1 : func (m *MergingIter) Prev() *Span {
645 1 : if m.err != nil {
646 0 : return nil
647 0 : }
648 1 : if m.dir == -1 && (m.end == nil || m.start == nil) {
649 0 : return nil
650 0 : }
651 1 : if m.dir != -1 {
652 1 : m.switchToMaxHeap()
653 1 : }
654 1 : return m.findPrevFragmentSet()
655 : }
656 :
657 : // Error returns any accumulated error.
658 1 : func (m *MergingIter) Error() error {
659 1 : if m.heap.len() == 0 || m.err != nil {
660 1 : return m.err
661 1 : }
662 1 : return m.levels[m.heap.items[0].index].iter.Error()
663 : }
664 :
665 : // Close closes the iterator, releasing all acquired resources.
666 1 : func (m *MergingIter) Close() error {
667 1 : for i := range m.levels {
668 1 : if err := m.levels[i].iter.Close(); err != nil && m.err == nil {
669 0 : m.err = err
670 0 : }
671 : }
672 1 : m.levels = nil
673 1 : m.heap.items = m.heap.items[:0]
674 1 : return m.err
675 : }
676 :
677 : // String implements fmt.Stringer.
678 0 : func (m *MergingIter) String() string {
679 0 : return "merging-keyspan"
680 0 : }
681 :
682 1 : func (m *MergingIter) initMinHeap() {
683 1 : m.dir = +1
684 1 : m.heap.reverse = false
685 1 : m.initHeap()
686 1 : }
687 :
688 1 : func (m *MergingIter) initMaxHeap() {
689 1 : m.dir = -1
690 1 : m.heap.reverse = true
691 1 : m.initHeap()
692 1 : }
693 :
694 1 : func (m *MergingIter) initHeap() {
695 1 : m.heap.items = m.heap.items[:0]
696 1 : for i := range m.levels {
697 1 : if l := &m.levels[i]; l.heapKey.kind != boundKindInvalid {
698 1 : m.heap.items = append(m.heap.items, mergingIterItem{
699 1 : index: i,
700 1 : boundKey: &l.heapKey,
701 1 : })
702 1 : } else {
703 1 : m.err = firstError(m.err, l.iter.Error())
704 1 : if m.err != nil {
705 1 : return
706 1 : }
707 : }
708 : }
709 1 : m.heap.init()
710 : }
711 :
712 1 : func (m *MergingIter) switchToMinHeap() {
713 1 : // switchToMinHeap reorients the heap for forward iteration, without moving
714 1 : // the current MergingIter position.
715 1 :
716 1 : // The iterator is currently positioned at the span [m.start, m.end),
717 1 : // oriented in the reverse direction, so each level's iterator is positioned
718 1 : // to the largest key ≤ m.start. To reorient in the forward direction, we
719 1 : // must advance each level's iterator to the smallest key ≥ m.end. Consider
720 1 : // this three-level example.
721 1 : //
722 1 : // i0: b---d e-----h
723 1 : // i1: a---c h-----k
724 1 : // i2: a------------------------------p
725 1 : //
726 1 : // merged: a-b-c-d-e-----h-----k----------p
727 1 : //
728 1 : // If currently positioned at the merged span [c,d), then the level
729 1 : // iterators' heap keys are:
730 1 : //
731 1 : // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p))
732 1 : //
733 1 : // Reversing the heap should not move the merging iterator and should not
734 1 : // change the current [m.start, m.end) bounds. It should only prepare for
735 1 : // forward iteration by updating the child iterators' heap keys to:
736 1 : //
737 1 : // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p))
738 1 : //
739 1 : // In every level the first key ≥ m.end is the next in the iterator.
740 1 : // Justification: Suppose not and a level iterator's next key was some key k
741 1 : // such that k < m.end. The max-heap invariant dictates that the current
742 1 : // iterator position is the largest entry with a user key ≥ m.start. This
743 1 : // means k > m.start. We started with the assumption that k < m.end, so
744 1 : // m.start < k < m.end. But then k is between our current span bounds,
745 1 : // and reverse iteration would have constructed the current interval to be
746 1 : // [k, m.end) not [m.start, m.end).
747 1 :
748 1 : if invariants.Enabled {
749 1 : for i := range m.levels {
750 1 : l := &m.levels[i]
751 1 : if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.start) > 0 {
752 0 : panic("pebble: invariant violation: max-heap key > m.start")
753 : }
754 : }
755 : }
756 :
757 1 : for i := range m.levels {
758 1 : m.levels[i].next()
759 1 : }
760 1 : m.initMinHeap()
761 : }
762 :
763 1 : func (m *MergingIter) switchToMaxHeap() {
764 1 : // switchToMaxHeap reorients the heap for reverse iteration, without moving
765 1 : // the current MergingIter position.
766 1 :
767 1 : // The iterator is currently positioned at the span [m.start, m.end),
768 1 : // oriented in the forward direction. Each level's iterator is positioned at
769 1 : // the smallest bound ≥ m.end. To reorient in the reverse direction, we must
770 1 : // move each level's iterator to the largest key ≤ m.start. Consider this
771 1 : // three-level example.
772 1 : //
773 1 : // i0: b---d e-----h
774 1 : // i1: a---c h-----k
775 1 : // i2: a------------------------------p
776 1 : //
777 1 : // merged: a-b-c-d-e-----h-----k----------p
778 1 : //
779 1 : // If currently positioned at the merged span [c,d), then the level
780 1 : // iterators' heap keys are:
781 1 : //
782 1 : // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p))
783 1 : //
784 1 : // Reversing the heap should not move the merging iterator and should not
785 1 : // change the current [m.start, m.end) bounds. It should only prepare for
786 1 : // reverse iteration by updating the child iterators' heap keys to:
787 1 : //
788 1 : // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p))
789 1 : //
790 1 : // In every level the largest key ≤ m.start is the prev in the iterator.
791 1 : // Justification: Suppose not and a level iterator's prev key was some key k
792 1 : // such that k > m.start. The min-heap invariant dictates that the current
793 1 : // iterator position is the smallest entry with a user key ≥ m.end. This
794 1 : // means k < m.end, otherwise the iterator would be positioned at k. We
795 1 : // started with the assumption that k > m.start, so m.start < k < m.end. But
796 1 : // then k is between our current span bounds, and reverse iteration
797 1 : // would have constructed the current interval to be [m.start, k) not
798 1 : // [m.start, m.end).
799 1 :
800 1 : if invariants.Enabled {
801 1 : for i := range m.levels {
802 1 : l := &m.levels[i]
803 1 : if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.end) < 0 {
804 0 : panic("pebble: invariant violation: min-heap key < m.end")
805 : }
806 : }
807 : }
808 :
809 1 : for i := range m.levels {
810 1 : m.levels[i].prev()
811 1 : }
812 1 : m.initMaxHeap()
813 : }
814 :
815 1 : func (m *MergingIter) cmp(a, b []byte) int {
816 1 : return m.heap.cmp(a, b)
817 1 : }
818 :
819 1 : func (m *MergingIter) findNextFragmentSet() *Span {
820 1 : // Each iteration of this loop considers a new merged span between unique
821 1 : // user keys. An iteration may find that there exists no overlap for a given
822 1 : // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the
823 1 : // below loop will still consider [b,d) before continuing to [d, e)). It
824 1 : // returns when it finds a span that is covered by at least one key.
825 1 :
826 1 : for m.heap.len() > 0 && m.err == nil {
827 1 : // Initialize the next span's start bound. SeekGE and First prepare the
828 1 : // heap without advancing. Next leaves the heap in a state such that the
829 1 : // root is the smallest bound key equal to the returned span's end key,
830 1 : // so the heap is already positioned at the next merged span's start key.
831 1 :
832 1 : // NB: m.heapRoot() might be either an end boundary OR a start boundary
833 1 : // of a level's span. Both end and start boundaries may still be a start
834 1 : // key of a span in the set of fragmented spans returned by MergingIter.
835 1 : // Consider the scenario:
836 1 : // a----------l #1
837 1 : // b-----------m #2
838 1 : //
839 1 : // The merged, fully-fragmented spans that MergingIter exposes to the caller
840 1 : // have bounds:
841 1 : // a-b #1
842 1 : // b--------l #1
843 1 : // b--------l #2
844 1 : // l-m #2
845 1 : //
846 1 : // When advancing to l-m#2, we must set m.start to 'l', which originated
847 1 : // from [a,l)#1's end boundary.
848 1 : m.start = m.heap.items[0].boundKey.key
849 1 :
850 1 : // Before calling nextEntry, consider whether it might invalidate our
851 1 : // start boundary. If the start boundary key originated from an end
852 1 : // boundary, then we need to copy the start key before advancing the
853 1 : // underlying iterator to the next Span.
854 1 : if m.heap.items[0].boundKey.kind == boundKindFragmentEnd {
855 1 : m.buf = append(m.buf[:0], m.start...)
856 1 : m.start = m.buf
857 1 : }
858 :
859 : // There may be many entries all with the same user key. Spans in other
860 : // levels may also start or end at this same user key. For eg:
861 : // L1: [a, c) [c, d)
862 : // L2: [c, e)
863 : // If we're positioned at L1's end(c) end boundary, we want to advance
864 : // to the first bound > c.
865 1 : m.nextEntry()
866 1 : for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.start) == 0 {
867 1 : m.nextEntry()
868 1 : }
869 1 : if len(m.heap.items) == 0 || m.err != nil {
870 1 : break
871 : }
872 :
873 : // The current entry at the top of the heap is the first key > m.start.
874 : // It must become the end bound for the span we will return to the user.
875 : // In the above example, the root of the heap is L1's end(d).
876 1 : m.end = m.heap.items[0].boundKey.key
877 1 :
878 1 : // Each level within m.levels may have a span that overlaps the
879 1 : // fragmented key span [m.start, m.end). Update m.keys to point to them
880 1 : // and sort them by kind, sequence number. There may not be any keys
881 1 : // defined over [m.start, m.end) if we're between the end of one span
882 1 : // and the start of the next, OR if the configured transform filters any
883 1 : // keys out. We allow empty spans that were emitted by child iterators, but
884 1 : // we elide empty spans created by the mergingIter itself that don't overlap
885 1 : // with any child iterator returned spans (i.e. empty spans that bridge two
886 1 : // distinct child-iterator-defined spans).
887 1 : if found, s := m.synthesizeKeys(+1); found && s != nil {
888 1 : return s
889 1 : }
890 : }
891 : // Exhausted.
892 1 : m.clear()
893 1 : return nil
894 : }
895 :
896 1 : func (m *MergingIter) findPrevFragmentSet() *Span {
897 1 : // Each iteration of this loop considers a new merged span between unique
898 1 : // user keys. An iteration may find that there exists no overlap for a given
899 1 : // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the
900 1 : // below loop will still consider [b,d) before continuing to [a, b)). It
901 1 : // returns when it finds a span that is covered by at least one key.
902 1 :
903 1 : for m.heap.len() > 0 && m.err == nil {
904 1 : // Initialize the next span's end bound. SeekLT and Last prepare the
905 1 : // heap without advancing. Prev leaves the heap in a state such that the
906 1 : // root is the largest bound key equal to the returned span's start key,
907 1 : // so the heap is already positioned at the next merged span's end key.
908 1 :
909 1 : // NB: m.heapRoot() might be either an end boundary OR a start boundary
910 1 : // of a level's span. Both end and start boundaries may still be a start
911 1 : // key of a span returned by MergingIter. Consider the scenario:
912 1 : // a----------l #2
913 1 : // b-----------m #1
914 1 : //
915 1 : // The merged, fully-fragmented spans that MergingIter exposes to the caller
916 1 : // have bounds:
917 1 : // a-b #2
918 1 : // b--------l #2
919 1 : // b--------l #1
920 1 : // l-m #1
921 1 : //
922 1 : // When Preving to a-b#2, we must set m.end to 'b', which originated
923 1 : // from [b,m)#1's start boundary.
924 1 : m.end = m.heap.items[0].boundKey.key
925 1 :
926 1 : // Before calling prevEntry, consider whether it might invalidate our
927 1 : // end boundary. If the end boundary key originated from a start
928 1 : // boundary, then we need to copy the end key before advancing the
929 1 : // underlying iterator to the previous Span.
930 1 : if m.heap.items[0].boundKey.kind == boundKindFragmentStart {
931 1 : m.buf = append(m.buf[:0], m.end...)
932 1 : m.end = m.buf
933 1 : }
934 :
935 : // There may be many entries all with the same user key. Spans in other
936 : // levels may also start or end at this same user key. For eg:
937 : // L1: [a, c) [c, d)
938 : // L2: [c, e)
939 : // If we're positioned at L1's start(c) start boundary, we want to prev
940 : // to move to the first bound < c.
941 1 : m.prevEntry()
942 1 : for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.end) == 0 {
943 1 : m.prevEntry()
944 1 : }
945 1 : if len(m.heap.items) == 0 || m.err != nil {
946 1 : break
947 : }
948 :
949 : // The current entry at the top of the heap is the first key < m.end.
950 : // It must become the start bound for the span we will return to the
951 : // user. In the above example, the root of the heap is L1's start(a).
952 1 : m.start = m.heap.items[0].boundKey.key
953 1 :
954 1 : // Each level within m.levels may have a set of keys that overlap the
955 1 : // fragmented key span [m.start, m.end). Update m.keys to point to them
956 1 : // and sort them by kind, sequence number. There may not be any keys
957 1 : // spanning [m.start, m.end) if we're between the end of one span and
958 1 : // the start of the next, OR if the configured transform filters any
959 1 : // keys out. We allow empty spans that were emitted by child iterators, but
960 1 : // we elide empty spans created by the mergingIter itself that don't overlap
961 1 : // with any child iterator returned spans (i.e. empty spans that bridge two
962 1 : // distinct child-iterator-defined spans).
963 1 : if found, s := m.synthesizeKeys(-1); found && s != nil {
964 1 : return s
965 1 : }
966 : }
967 : // Exhausted.
968 1 : m.clear()
969 1 : return nil
970 : }
971 :
972 1 : func (m *MergingIter) heapRoot() []byte {
973 1 : return m.heap.items[0].boundKey.key
974 1 : }
975 :
976 : // synthesizeKeys is called by find{Next,Prev}FragmentSet to populate and
977 : // sort the set of keys overlapping [m.start, m.end).
978 : //
979 : // During forward iteration, if the current heap item is a fragment end,
980 : // then the fragment's start must be ≤ m.start and the fragment overlaps the
981 : // current iterator position of [m.start, m.end).
982 : //
983 : // During reverse iteration, if the current heap item is a fragment start,
984 : // then the fragment's end must be ≥ m.end and the fragment overlaps the
985 : // current iteration position of [m.start, m.end).
986 : //
987 : // The boolean return value, `found`, is true if the returned span overlaps
988 : // with a span returned by a child iterator.
989 1 : func (m *MergingIter) synthesizeKeys(dir int8) (bool, *Span) {
990 1 : if invariants.Enabled {
991 1 : if m.cmp(m.start, m.end) >= 0 {
992 0 : panic(fmt.Sprintf("pebble: invariant violation: span start ≥ end: %s >= %s", m.start, m.end))
993 : }
994 : }
995 :
996 1 : m.keys = m.keys[:0]
997 1 : found := false
998 1 : for i := range m.levels {
999 1 : if dir == +1 && m.levels[i].heapKey.kind == boundKindFragmentEnd ||
1000 1 : dir == -1 && m.levels[i].heapKey.kind == boundKindFragmentStart {
1001 1 : m.keys = append(m.keys, m.levels[i].heapKey.span.Keys...)
1002 1 : found = true
1003 1 : }
1004 : }
1005 : // TODO(jackson): We should be able to remove this sort and instead
1006 : // guarantee that we'll return keys in the order of the levels they're from.
1007 : // With careful iterator construction, this would guarantee that they're
1008 : // sorted by trailer descending for the range key iteration use case.
1009 1 : sort.Sort(&m.keys)
1010 1 :
1011 1 : // Apply the configured transform. See VisibleTransform.
1012 1 : m.span = Span{
1013 1 : Start: m.start,
1014 1 : End: m.end,
1015 1 : Keys: m.keys,
1016 1 : KeysOrder: ByTrailerDesc,
1017 1 : }
1018 1 : // NB: m.heap.cmp is a base.Compare, whereas m.cmp is a method on
1019 1 : // MergingIter.
1020 1 : if err := m.transformer.Transform(m.heap.cmp, m.span, &m.span); err != nil {
1021 0 : m.err = err
1022 0 : return false, nil
1023 0 : }
1024 1 : return found, &m.span
1025 : }
1026 :
1027 1 : func (m *MergingIter) invalidate() {
1028 1 : m.err = nil
1029 1 : }
1030 :
1031 1 : func (m *MergingIter) clear() {
1032 1 : for fi := range m.keys {
1033 1 : m.keys[fi] = Key{}
1034 1 : }
1035 1 : m.keys = m.keys[:0]
1036 : }
1037 :
1038 : // nextEntry steps to the next entry.
1039 1 : func (m *MergingIter) nextEntry() {
1040 1 : l := &m.levels[m.heap.items[0].index]
1041 1 : l.next()
1042 1 : if !l.heapKey.valid() {
1043 1 : // l.iter is exhausted.
1044 1 : m.err = l.iter.Error()
1045 1 : if m.err == nil {
1046 1 : m.heap.pop()
1047 1 : }
1048 1 : return
1049 : }
1050 :
1051 1 : if m.heap.len() > 1 {
1052 1 : m.heap.fix(0)
1053 1 : }
1054 : }
1055 :
1056 : // prevEntry steps to the previous entry.
1057 1 : func (m *MergingIter) prevEntry() {
1058 1 : l := &m.levels[m.heap.items[0].index]
1059 1 : l.prev()
1060 1 : if !l.heapKey.valid() {
1061 1 : // l.iter is exhausted.
1062 1 : m.err = l.iter.Error()
1063 1 : if m.err == nil {
1064 1 : m.heap.pop()
1065 1 : }
1066 1 : return
1067 : }
1068 :
1069 1 : if m.heap.len() > 1 {
1070 1 : m.heap.fix(0)
1071 1 : }
1072 : }
1073 :
1074 : // DebugString returns a string representing the current internal state of the
1075 : // merging iterator and its heap for debugging purposes.
1076 0 : func (m *MergingIter) DebugString() string {
1077 0 : var buf bytes.Buffer
1078 0 : fmt.Fprintf(&buf, "Current bounds: [%q, %q)\n", m.start, m.end)
1079 0 : for i := range m.levels {
1080 0 : fmt.Fprintf(&buf, "%d: heap key %s\n", i, m.levels[i].heapKey)
1081 0 : }
1082 0 : return buf.String()
1083 : }
1084 :
1085 : // WrapChildren implements FragmentIterator.
1086 0 : func (m *MergingIter) WrapChildren(wrap WrapFn) {
1087 0 : for i := range m.levels {
1088 0 : m.levels[i].iter = wrap(m.levels[i].iter)
1089 0 : }
1090 0 : m.wrapFn = wrap
1091 : }
1092 :
1093 : type mergingIterItem struct {
1094 : // boundKey points to the corresponding mergingIterLevel's `iterKey`.
1095 : *boundKey
1096 : // index is the index of this level within the MergingIter's levels field.
1097 : index int
1098 : }
1099 :
1100 : // mergingIterHeap is copied from mergingIterHeap defined in the root pebble
1101 : // package for use with point keys.
1102 :
1103 : type mergingIterHeap struct {
1104 : cmp base.Compare
1105 : reverse bool
1106 : items []mergingIterItem
1107 : }
1108 :
1109 1 : func (h *mergingIterHeap) len() int {
1110 1 : return len(h.items)
1111 1 : }
1112 :
1113 1 : func (h *mergingIterHeap) less(i, j int) bool {
1114 1 : // This key comparison only uses the user key and not the boundKind. Bound
1115 1 : // kind doesn't matter because when stepping over a user key,
1116 1 : // findNextFragmentSet and findPrevFragmentSet skip past all heap items with
1117 1 : // that user key, and makes no assumptions on ordering. All other heap
1118 1 : // examinations only consider the user key.
1119 1 : ik, jk := h.items[i].key, h.items[j].key
1120 1 : c := h.cmp(ik, jk)
1121 1 : if h.reverse {
1122 1 : return c > 0
1123 1 : }
1124 1 : return c < 0
1125 : }
1126 :
1127 1 : func (h *mergingIterHeap) swap(i, j int) {
1128 1 : h.items[i], h.items[j] = h.items[j], h.items[i]
1129 1 : }
1130 :
1131 : // init, fix, up and down are copied from the go stdlib.
1132 1 : func (h *mergingIterHeap) init() {
1133 1 : // heapify
1134 1 : n := h.len()
1135 1 : for i := n/2 - 1; i >= 0; i-- {
1136 1 : h.down(i, n)
1137 1 : }
1138 : }
1139 :
1140 1 : func (h *mergingIterHeap) fix(i int) {
1141 1 : if !h.down(i, h.len()) {
1142 1 : h.up(i)
1143 1 : }
1144 : }
1145 :
1146 1 : func (h *mergingIterHeap) pop() *mergingIterItem {
1147 1 : n := h.len() - 1
1148 1 : h.swap(0, n)
1149 1 : h.down(0, n)
1150 1 : item := &h.items[n]
1151 1 : h.items = h.items[:n]
1152 1 : return item
1153 1 : }
1154 :
1155 1 : func (h *mergingIterHeap) up(j int) {
1156 1 : for {
1157 1 : i := (j - 1) / 2 // parent
1158 1 : if i == j || !h.less(j, i) {
1159 1 : break
1160 : }
1161 0 : h.swap(i, j)
1162 0 : j = i
1163 : }
1164 : }
1165 :
1166 1 : func (h *mergingIterHeap) down(i0, n int) bool {
1167 1 : i := i0
1168 1 : for {
1169 1 : j1 := 2*i + 1
1170 1 : if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
1171 1 : break
1172 : }
1173 1 : j := j1 // left child
1174 1 : if j2 := j1 + 1; j2 < n && h.less(j2, j1) {
1175 1 : j = j2 // = 2*i + 2 // right child
1176 1 : }
1177 1 : if !h.less(j, i) {
1178 1 : break
1179 : }
1180 1 : h.swap(i, j)
1181 1 : i = j
1182 : }
1183 1 : return i > i0
1184 : }
1185 :
1186 : type boundKind int8
1187 :
1188 : const (
1189 : boundKindInvalid boundKind = iota
1190 : boundKindFragmentStart
1191 : boundKindFragmentEnd
1192 : )
1193 :
1194 : type boundKey struct {
1195 : kind boundKind
1196 : key []byte
1197 : // span holds the span the bound key comes from.
1198 : //
1199 : // If kind is boundKindFragmentStart, then key is span.Start. If kind is
1200 : // boundKindFragmentEnd, then key is span.End.
1201 : span *Span
1202 : }
1203 :
1204 1 : func (k boundKey) valid() bool {
1205 1 : return k.kind != boundKindInvalid
1206 1 : }
1207 :
1208 0 : func (k boundKey) String() string {
1209 0 : var buf bytes.Buffer
1210 0 : switch k.kind {
1211 0 : case boundKindInvalid:
1212 0 : fmt.Fprint(&buf, "invalid")
1213 0 : case boundKindFragmentStart:
1214 0 : fmt.Fprint(&buf, "fragment-start")
1215 0 : case boundKindFragmentEnd:
1216 0 : fmt.Fprint(&buf, "fragment-end ")
1217 0 : default:
1218 0 : fmt.Fprintf(&buf, "unknown-kind(%d)", k.kind)
1219 : }
1220 0 : fmt.Fprintf(&buf, " %s [", k.key)
1221 0 : fmt.Fprintf(&buf, "%s", k.span)
1222 0 : fmt.Fprint(&buf, "]")
1223 0 : return buf.String()
1224 : }
|