Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package rangekeystack
6 :
7 : import (
8 : "bytes"
9 :
10 : "github.com/cockroachdb/pebble/internal/base"
11 : "github.com/cockroachdb/pebble/internal/invariants"
12 : "github.com/cockroachdb/pebble/internal/keyspan"
13 : "github.com/cockroachdb/pebble/internal/keyspan/keyspanimpl"
14 : "github.com/cockroachdb/pebble/internal/manifest"
15 : "github.com/cockroachdb/pebble/internal/rangekey"
16 : )
17 :
18 : // UserIteratorConfig holds state for constructing the range key iterator stack
19 : // for user iteration. The range key iterator must merge range key spans across
20 : // the levels of the LSM. This merging is performed by a keyspanimpl.MergingIter
21 : // on-the-fly. The UserIteratorConfig implements keyspan.Transformer, evaluating
22 : // range-key semantics and shadowing, so the spans returned by a MergingIter are
23 : // fully resolved.
24 : //
25 : // The MergingIter is wrapped by a BoundedIter, which elides spans that are
26 : // outside the iterator bounds (or the current prefix's bounds, during prefix
27 : // iteration mode).
28 : //
29 : // To provide determinisim during iteration, the BoundedIter is wrapped by a
30 : // DefragmentingIter that defragments abutting spans with identical
31 : // user-observable state.
32 : //
33 : // At the top-level an InterleavingIter interleaves range keys with point keys
34 : // and performs truncation to iterator bounds.
35 : //
36 : // Below is an abbreviated diagram illustrating the mechanics of a SeekGE.
37 : //
38 : // InterleavingIter.SeekGE
39 : // │
40 : // DefragmentingIter.SeekGE
41 : // │
42 : // BoundedIter.SeekGE
43 : // │
44 : // ╭────────────────┴───────────────╮
45 : // │ ├── defragmentBwd*
46 : // MergingIter.SeekGE │
47 : // │ ╰── defragmentFwd
48 : // ╰─╶╶ per level╶╶ ─╮
49 : // │
50 : // │
51 : // ├── <?>.SeekLT
52 : // │
53 : // ╰── <?>.Next
54 : type UserIteratorConfig struct {
55 : snapshot uint64
56 : comparer *base.Comparer
57 : miter keyspanimpl.MergingIter
58 : biter keyspan.BoundedIter
59 : diter keyspan.DefragmentingIter
60 : liters [manifest.NumLevels]keyspanimpl.LevelIter
61 : litersUsed int
62 : internalKeys bool
63 : bufs *Buffers
64 : }
65 :
66 : // Buffers holds various buffers used for range key iteration. They're exposed
67 : // so that they may be pooled and reused between iterators.
68 : type Buffers struct {
69 : merging keyspanimpl.MergingBuffers
70 : defragmenting keyspan.DefragmentingBuffers
71 : sortBuf keyspan.KeysBySuffix
72 : }
73 :
74 : // PrepareForReuse discards any excessively large buffers.
75 2 : func (bufs *Buffers) PrepareForReuse() {
76 2 : bufs.merging.PrepareForReuse()
77 2 : bufs.defragmenting.PrepareForReuse()
78 2 : }
79 :
80 : // Init initializes the range key iterator stack for user iteration. The
81 : // resulting fragment iterator applies range key semantics, defragments spans
82 : // according to their user-observable state and, if !internalKeys, removes all
83 : // Keys other than RangeKeySets describing the current state of range keys. The
84 : // resulting spans contain Keys sorted by suffix (unless internalKeys is true,
85 : // in which case they remain sorted by trailer descending).
86 : //
87 : // The snapshot sequence number parameter determines which keys are visible. Any
88 : // keys not visible at the provided snapshot are ignored.
89 : func (ui *UserIteratorConfig) Init(
90 : comparer *base.Comparer,
91 : snapshot uint64,
92 : lower, upper []byte,
93 : hasPrefix *bool,
94 : prefix *[]byte,
95 : internalKeys bool,
96 : bufs *Buffers,
97 : iters ...keyspan.FragmentIterator,
98 2 : ) keyspan.FragmentIterator {
99 2 : ui.snapshot = snapshot
100 2 : ui.comparer = comparer
101 2 : ui.internalKeys = internalKeys
102 2 : ui.miter.Init(comparer.Compare, ui, &bufs.merging, iters...)
103 2 : ui.biter.Init(comparer.Compare, comparer.Split, &ui.miter, lower, upper, hasPrefix, prefix)
104 2 : if internalKeys {
105 2 : ui.diter.Init(comparer, &ui.biter, keyspan.DefragmentInternal, keyspan.StaticDefragmentReducer, &bufs.defragmenting)
106 2 : } else {
107 2 : ui.diter.Init(comparer, &ui.biter, ui, keyspan.StaticDefragmentReducer, &bufs.defragmenting)
108 2 : }
109 2 : ui.litersUsed = 0
110 2 : ui.bufs = bufs
111 2 : return &ui.diter
112 : }
113 :
114 : // AddLevel adds a new level to the bottom of the iterator stack. AddLevel
115 : // must be called after Init and before any other method on the iterator.
116 2 : func (ui *UserIteratorConfig) AddLevel(iter keyspan.FragmentIterator) {
117 2 : ui.miter.AddLevel(iter)
118 2 : }
119 :
120 : // NewLevelIter returns a pointer to a newly allocated or reused
121 : // keyspanimpl.LevelIter. The caller is responsible for calling Init() on this
122 : // instance.
123 2 : func (ui *UserIteratorConfig) NewLevelIter() *keyspanimpl.LevelIter {
124 2 : if ui.litersUsed >= len(ui.liters) {
125 2 : return &keyspanimpl.LevelIter{}
126 2 : }
127 2 : ui.litersUsed++
128 2 : return &ui.liters[ui.litersUsed-1]
129 : }
130 :
131 : // SetBounds propagates bounds to the iterator stack. The fragment iterator
132 : // interface ordinarily doesn't enforce bounds, so this is exposed as an
133 : // explicit method on the user iterator config.
134 2 : func (ui *UserIteratorConfig) SetBounds(lower, upper []byte) {
135 2 : ui.biter.SetBounds(lower, upper)
136 2 : }
137 :
138 : // Transform implements the keyspan.Transformer interface for use with a
139 : // keyspanimpl.MergingIter. It transforms spans by resolving range keys at the
140 : // provided snapshot sequence number. Shadowing of keys is resolved (eg, removal
141 : // of unset keys, removal of keys overwritten by a set at the same suffix, etc)
142 : // and then non-RangeKeySet keys are removed. The resulting transformed spans
143 : // only contain RangeKeySets describing the state visible at the provided
144 : // sequence number, and hold their Keys sorted by Suffix (except if internalKeys
145 : // is true, then keys remain sorted by trailer.
146 2 : func (ui *UserIteratorConfig) Transform(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error {
147 2 : // Apply shadowing of keys.
148 2 : dst.Start = s.Start
149 2 : dst.End = s.End
150 2 : ui.bufs.sortBuf = keyspan.KeysBySuffix{
151 2 : Cmp: cmp,
152 2 : Keys: ui.bufs.sortBuf.Keys[:0],
153 2 : }
154 2 : rangekey.CoalesceIntoKeysBySuffix(ui.comparer.Equal, &ui.bufs.sortBuf, ui.snapshot, s.Keys)
155 2 : if ui.internalKeys {
156 2 : if s.KeysOrder != keyspan.ByTrailerDesc {
157 0 : panic("unexpected key ordering in UserIteratorTransform with internalKeys = true")
158 : }
159 2 : dst.Keys = ui.bufs.sortBuf.Keys
160 2 : keyspan.SortKeysByTrailer(&dst.Keys)
161 2 : return nil
162 : }
163 : // During user iteration over range keys, unsets and deletes don't matter. This
164 : // step helps logical defragmentation during iteration.
165 2 : keys := ui.bufs.sortBuf.Keys
166 2 : dst.Keys = dst.Keys[:0]
167 2 : for i := range keys {
168 2 : switch keys[i].Kind() {
169 2 : case base.InternalKeyKindRangeKeySet:
170 2 : if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 {
171 0 : panic("pebble: keys unexpectedly not in ascending suffix order")
172 : }
173 2 : dst.Keys = append(dst.Keys, keys[i])
174 2 : case base.InternalKeyKindRangeKeyUnset:
175 2 : if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 {
176 0 : panic("pebble: keys unexpectedly not in ascending suffix order")
177 : }
178 : // Skip.
179 2 : continue
180 2 : case base.InternalKeyKindRangeKeyDelete:
181 2 : // Skip.
182 2 : continue
183 0 : default:
184 0 : return base.CorruptionErrorf("pebble: unrecognized range key kind %s", keys[i].Kind())
185 : }
186 : }
187 : // coalesce results in dst.Keys being sorted by Suffix.
188 2 : dst.KeysOrder = keyspan.BySuffixAsc
189 2 : return nil
190 : }
191 :
192 : // ShouldDefragment implements the DefragmentMethod interface and configures a
193 : // DefragmentingIter to defragment spans of range keys if their user-visible
194 : // state is identical. This defragmenting method assumes the provided spans have
195 : // already been transformed through (UserIterationConfig).Transform, so all
196 : // RangeKeySets are user-visible sets and are already in Suffix order. This
197 : // defragmenter checks for equality between set suffixes and values (ignoring
198 : // sequence numbers). It's intended for use during user iteration, when the
199 : // wrapped keyspan iterator is merging spans across all levels of the LSM.
200 2 : func (ui *UserIteratorConfig) ShouldDefragment(equal base.Equal, a, b *keyspan.Span) bool {
201 2 : // This method is not called with internalKeys = true.
202 2 : if ui.internalKeys {
203 0 : panic("unexpected call to ShouldDefragment with internalKeys = true")
204 : }
205 : // This implementation must only be used on spans that have transformed by
206 : // ui.Transform. The transform applies shadowing, removes all keys besides
207 : // the resulting Sets and sorts the keys by suffix. Since shadowing has been
208 : // applied, each Set must set a unique suffix. If the two spans are
209 : // equivalent, they must have the same number of range key sets.
210 2 : if len(a.Keys) != len(b.Keys) || len(a.Keys) == 0 {
211 2 : return false
212 2 : }
213 2 : if a.KeysOrder != keyspan.BySuffixAsc || b.KeysOrder != keyspan.BySuffixAsc {
214 0 : panic("pebble: range key span's keys unexpectedly not in ascending suffix order")
215 : }
216 :
217 2 : ret := true
218 2 : for i := range a.Keys {
219 2 : if invariants.Enabled {
220 2 : if a.Keys[i].Kind() != base.InternalKeyKindRangeKeySet ||
221 2 : b.Keys[i].Kind() != base.InternalKeyKindRangeKeySet {
222 0 : panic("pebble: unexpected non-RangeKeySet during defragmentation")
223 : }
224 2 : if i > 0 && (ui.comparer.Compare(a.Keys[i].Suffix, a.Keys[i-1].Suffix) < 0 ||
225 2 : ui.comparer.Compare(b.Keys[i].Suffix, b.Keys[i-1].Suffix) < 0) {
226 0 : panic("pebble: range keys not ordered by suffix during defragmentation")
227 : }
228 : }
229 2 : if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) {
230 2 : ret = false
231 2 : break
232 : }
233 2 : if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) {
234 2 : ret = false
235 2 : break
236 : }
237 : }
238 2 : return ret
239 : }
|