Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package rowblk
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "os"
12 : "sync"
13 :
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/invariants"
16 : "github.com/cockroachdb/pebble/internal/keyspan"
17 : "github.com/cockroachdb/pebble/internal/rangedel"
18 : "github.com/cockroachdb/pebble/internal/rangekey"
19 : "github.com/cockroachdb/pebble/internal/treeprinter"
20 : "github.com/cockroachdb/pebble/sstable/block"
21 : )
22 :
23 : // fragmentIter wraps an Iter, implementing the keyspan.FragmentIterator
24 : // interface. It's used for reading range deletion and range key blocks.
25 : //
26 : // Range deletions and range keys are fragmented before they're persisted to the
27 : // block. Overlapping fragments have identical bounds. The fragmentIter gathers
28 : // all the fragments with identical bounds within a block and returns a single
29 : // keyspan.Span describing all the keys defined over the span.
30 : //
31 : // # Memory lifetime
32 : //
33 : // A Span returned by fragmentIter is only guaranteed to be stable until the
34 : // next fragmentIter iteration positioning method. A Span's Keys slice may be
35 : // reused, so the user must not assume it's stable.
36 : //
37 : // Blocks holding range deletions and range keys are configured to use a restart
38 : // interval of 1. This provides key stability. The caller may treat the various
39 : // byte slices (start, end, suffix, value) as stable for the lifetime of the
40 : // iterator.
41 : type fragmentIter struct {
42 : suffixCmp base.CompareRangeSuffixes
43 : blockIter Iter
44 : keyBuf [2]keyspan.Key
45 : span keyspan.Span
46 : dir int8
47 :
48 : // fileNum is used for logging/debugging.
49 : fileNum base.DiskFileNum
50 :
51 : syntheticSuffix block.SyntheticSuffix
52 : syntheticPrefix block.SyntheticPrefix
53 : // startKeyBuf is a buffer that is reused to store the start key of the span
54 : // when a synthetic prefix is used.
55 : startKeyBuf []byte
56 : // endKeyBuf is a buffer that is reused to generate the end key of the span
57 : // when a synthetic prefix is set. It always starts with syntheticPrefix.
58 : endKeyBuf []byte
59 :
60 : closeCheck invariants.CloseChecker
61 : }
62 :
63 : var _ keyspan.FragmentIterator = (*fragmentIter)(nil)
64 :
65 : var fragmentBlockIterPool = sync.Pool{
66 1 : New: func() interface{} {
67 1 : i := &fragmentIter{}
68 1 : // Note: this is a no-op if invariants are disabled or race is enabled.
69 1 : invariants.SetFinalizer(i, checkFragmentBlockIterator)
70 1 : return i
71 1 : },
72 : }
73 :
74 : // NewFragmentIter returns a new keyspan iterator that iterates over a block's
75 : // spans.
76 : func NewFragmentIter(
77 : fileNum base.DiskFileNum,
78 : cmp base.Compare,
79 : suffixCmp base.CompareRangeSuffixes,
80 : split base.Split,
81 : blockHandle block.BufferHandle,
82 : transforms block.FragmentIterTransforms,
83 1 : ) (keyspan.FragmentIterator, error) {
84 1 : i := fragmentBlockIterPool.Get().(*fragmentIter)
85 1 :
86 1 : i.suffixCmp = suffixCmp
87 1 : // Use the i.keyBuf array to back the Keys slice to prevent an allocation
88 1 : // when the spans contain few keys.
89 1 : i.span.Keys = i.keyBuf[:0]
90 1 : i.fileNum = fileNum
91 1 : i.syntheticSuffix = transforms.SyntheticSuffix
92 1 : i.syntheticPrefix = transforms.SyntheticPrefix
93 1 : if transforms.SyntheticPrefix.IsSet() {
94 1 : i.endKeyBuf = append(i.endKeyBuf[:0], transforms.SyntheticPrefix...)
95 1 : }
96 1 : i.closeCheck = invariants.CloseChecker{}
97 1 :
98 1 : if err := i.blockIter.InitHandle(cmp, split, blockHandle, block.IterTransforms{
99 1 : SyntheticSeqNum: transforms.SyntheticSeqNum,
100 1 : // We let the blockIter prepend the prefix to span start keys; the fragment
101 1 : // iterator will prepend it for end keys. We could do everything in the
102 1 : // fragment iterator, but we'd have to duplicate the logic for adjusting the
103 1 : // seek key for SeekGE/SeekLT.
104 1 : SyntheticPrefix: transforms.SyntheticPrefix,
105 1 : // It's okay for HideObsoletePoints to be false here, even for shared
106 1 : // ingested sstables. This is because rangedels do not apply to points in
107 1 : // the same sstable at the same sequence number anyway, so exposing obsolete
108 1 : // rangedels is harmless.
109 1 : HideObsoletePoints: false,
110 1 : }); err != nil {
111 0 : i.Close()
112 0 : return nil, err
113 0 : }
114 1 : return i, nil
115 : }
116 :
117 : // initSpan initializes the span with a single fragment.
118 : //
119 : // Note that the span start and end keys and range key contents are aliased to
120 : // the key or value when we don't have a synthetic prefix. This is ok because
121 : // the range del/key block doesn't use prefix compression, so the key/value will
122 : // be pointing directly into the buffer data.
123 1 : func (i *fragmentIter) initSpan(ik base.InternalKey, internalValue []byte) error {
124 1 : if ik.Kind() == base.InternalKeyKindRangeDelete {
125 1 : i.span = rangedel.Decode(ik, internalValue, i.span.Keys[:0])
126 1 : } else {
127 1 : var err error
128 1 : i.span, err = rangekey.Decode(ik, internalValue, i.span.Keys[:0])
129 1 : if err != nil {
130 0 : return err
131 0 : }
132 : }
133 : // When synthetic prefix is used in the blockIter, the keys cannot be used
134 : // across multiple blockIter operations; we have to make a copy in this case.
135 1 : if i.syntheticPrefix.IsSet() || invariants.Sometimes(10) {
136 1 : i.startKeyBuf = append(i.startKeyBuf[:0], i.span.Start...)
137 1 : i.span.Start = i.startKeyBuf
138 1 : }
139 1 : return nil
140 : }
141 :
142 : // addToSpan adds a fragment to the existing span. The fragment must be for the
143 : // same start/end keys.
144 : func (i *fragmentIter) addToSpan(
145 : cmp base.Compare, ik base.InternalKey, internalValue []byte,
146 1 : ) error {
147 1 : var err error
148 1 : if ik.Kind() == base.InternalKeyKindRangeDelete {
149 1 : err = rangedel.DecodeIntoSpan(cmp, ik, internalValue, &i.span)
150 1 : } else {
151 1 : err = rangekey.DecodeIntoSpan(cmp, ik, internalValue, &i.span)
152 1 : }
153 1 : return err
154 : }
155 :
156 : // applySpanTransforms applies changes to the span that we decoded, if
157 : // appropriate.
158 1 : func (i *fragmentIter) applySpanTransforms() error {
159 1 : if i.syntheticPrefix.IsSet() || invariants.Sometimes(10) {
160 1 : // We have to make a copy of the start key because it will not stay valid
161 1 : // across multiple blockIter operations.
162 1 : i.startKeyBuf = append(i.startKeyBuf[:0], i.span.Start...)
163 1 : i.span.Start = i.startKeyBuf
164 1 : if invariants.Enabled && !bytes.Equal(i.syntheticPrefix, i.endKeyBuf[:len(i.syntheticPrefix)]) {
165 0 : panic("pebble: invariant violation: synthetic prefix mismatch")
166 : }
167 1 : i.endKeyBuf = append(i.endKeyBuf[:len(i.syntheticPrefix)], i.span.End...)
168 1 : i.span.End = i.endKeyBuf
169 : }
170 :
171 1 : if i.syntheticSuffix.IsSet() {
172 1 : for keyIdx := range i.span.Keys {
173 1 : k := &i.span.Keys[keyIdx]
174 1 :
175 1 : switch k.Kind() {
176 0 : case base.InternalKeyKindRangeKeySet:
177 0 : if len(k.Suffix) > 0 {
178 0 : if invariants.Enabled && i.suffixCmp(i.syntheticSuffix, k.Suffix) >= 0 {
179 0 : return base.AssertionFailedf("synthetic suffix %q >= RangeKeySet suffix %q",
180 0 : i.syntheticSuffix, k.Suffix)
181 0 : }
182 0 : k.Suffix = i.syntheticSuffix
183 : }
184 1 : case base.InternalKeyKindRangeKeyDelete:
185 : // Nothing to do.
186 0 : default:
187 0 : return base.AssertionFailedf("synthetic suffix not supported with key kind %s", k.Kind())
188 : }
189 : }
190 : }
191 1 : return nil
192 : }
193 :
194 : // gatherForward gathers internal keys with identical bounds. Keys defined over
195 : // spans of the keyspace are fragmented such that any overlapping key spans have
196 : // identical bounds. When these spans are persisted to a range deletion or range
197 : // key block, they may be persisted as multiple internal keys in order to encode
198 : // multiple sequence numbers or key kinds.
199 : //
200 : // gatherForward iterates forward, re-combining the fragmented internal keys to
201 : // reconstruct a keyspan.Span that holds all the keys defined over the span.
202 1 : func (i *fragmentIter) gatherForward(kv *base.InternalKV) (*keyspan.Span, error) {
203 1 : i.span = keyspan.Span{}
204 1 : if kv == nil || !i.blockIter.Valid() {
205 1 : return nil, nil
206 1 : }
207 : // Use the i.keyBuf array to back the Keys slice to prevent an allocation
208 : // when a span contains few keys.
209 1 : i.span.Keys = i.keyBuf[:0]
210 1 :
211 1 : // Decode the span's end key and individual keys from the value.
212 1 : if err := i.initSpan(kv.K, kv.InPlaceValue()); err != nil {
213 0 : return nil, err
214 0 : }
215 :
216 : // There might exist additional internal keys with identical bounds encoded
217 : // within the block. Iterate forward, accumulating all the keys with
218 : // identical bounds to s.
219 :
220 : // Overlapping fragments are required to have exactly equal start and
221 : // end bounds.
222 1 : for kv = i.blockIter.Next(); kv != nil && i.blockIter.cmp(kv.K.UserKey, i.span.Start) == 0; kv = i.blockIter.Next() {
223 1 : if err := i.addToSpan(i.blockIter.cmp, kv.K, kv.InPlaceValue()); err != nil {
224 0 : return nil, err
225 0 : }
226 : }
227 1 : if err := i.applySpanTransforms(); err != nil {
228 0 : return nil, err
229 0 : }
230 : // i.blockIter is positioned over the first internal key for the next span.
231 1 : return &i.span, nil
232 : }
233 :
234 : // gatherBackward gathers internal keys with identical bounds. Keys defined over
235 : // spans of the keyspace are fragmented such that any overlapping key spans have
236 : // identical bounds. When these spans are persisted to a range deletion or range
237 : // key block, they may be persisted as multiple internal keys in order to encode
238 : // multiple sequence numbers or key kinds.
239 : //
240 : // gatherBackward iterates backwards, re-combining the fragmented internal keys
241 : // to reconstruct a keyspan.Span that holds all the keys defined over the span.
242 1 : func (i *fragmentIter) gatherBackward(kv *base.InternalKV) (*keyspan.Span, error) {
243 1 : i.span = keyspan.Span{}
244 1 : if kv == nil || !i.blockIter.Valid() {
245 1 : return nil, nil
246 1 : }
247 :
248 : // Decode the span's end key and individual keys from the value.
249 1 : if err := i.initSpan(kv.K, kv.InPlaceValue()); err != nil {
250 0 : return nil, err
251 0 : }
252 :
253 : // There might exist additional internal keys with identical bounds encoded
254 : // within the block. Iterate backward, accumulating all the keys with
255 : // identical bounds to s.
256 : //
257 : // Overlapping fragments are required to have exactly equal start and
258 : // end bounds.
259 1 : for kv = i.blockIter.Prev(); kv != nil && i.blockIter.cmp(kv.K.UserKey, i.span.Start) == 0; kv = i.blockIter.Prev() {
260 1 : if err := i.addToSpan(i.blockIter.cmp, kv.K, kv.InPlaceValue()); err != nil {
261 0 : return nil, err
262 0 : }
263 : }
264 : // i.blockIter is positioned over the last internal key for the previous
265 : // span.
266 :
267 : // Backwards iteration encounters internal keys in the wrong order.
268 1 : keyspan.SortKeysByTrailer(i.span.Keys)
269 1 :
270 1 : i.applySpanTransforms()
271 1 : return &i.span, nil
272 : }
273 :
274 : // SetContext is part of the FragmentIterator interface.
275 0 : func (i *fragmentIter) SetContext(ctx context.Context) {}
276 :
277 : // Close implements (keyspan.FragmentIterator).Close.
278 1 : func (i *fragmentIter) Close() {
279 1 : i.blockIter.Close()
280 1 : i.closeCheck.Close()
281 1 :
282 1 : if invariants.Sometimes(25) {
283 1 : // In invariants mode, sometimes don't add the object to the pool so that we
284 1 : // can check for double closes that take longer than the object stays in the
285 1 : // pool.
286 1 : return
287 1 : }
288 :
289 1 : i.blockIter.ResetForReuse()
290 1 : *i = fragmentIter{
291 1 : blockIter: i.blockIter,
292 1 : closeCheck: i.closeCheck,
293 1 : startKeyBuf: i.startKeyBuf[:0],
294 1 : endKeyBuf: i.endKeyBuf[:0],
295 1 : }
296 1 : fragmentBlockIterPool.Put(i)
297 : }
298 :
299 : // First implements (keyspan.FragmentIterator).First
300 1 : func (i *fragmentIter) First() (*keyspan.Span, error) {
301 1 : i.dir = +1
302 1 : return i.gatherForward(i.blockIter.First())
303 1 : }
304 :
305 : // Last implements (keyspan.FragmentIterator).Last.
306 1 : func (i *fragmentIter) Last() (*keyspan.Span, error) {
307 1 : i.dir = -1
308 1 : return i.gatherBackward(i.blockIter.Last())
309 1 : }
310 :
311 : // Next implements (keyspan.FragmentIterator).Next.
312 1 : func (i *fragmentIter) Next() (*keyspan.Span, error) {
313 1 : switch {
314 1 : case i.dir == -1 && !i.span.Valid():
315 1 : // Switching directions.
316 1 : //
317 1 : // i.blockIter is exhausted, before the first key. Move onto the first.
318 1 : i.blockIter.First()
319 1 : i.dir = +1
320 1 : case i.dir == -1 && i.span.Valid():
321 1 : // Switching directions.
322 1 : //
323 1 : // i.blockIter is currently positioned over the last internal key for
324 1 : // the previous span. Next it once to move to the first internal key
325 1 : // that makes up the current span, and gatherForwaad to land on the
326 1 : // first internal key making up the next span.
327 1 : //
328 1 : // In the diagram below, if the last span returned to the user during
329 1 : // reverse iteration was [b,c), i.blockIter is currently positioned at
330 1 : // [a,b). The block iter must be positioned over [d,e) to gather the
331 1 : // next span's fragments.
332 1 : //
333 1 : // ... [a,b) [b,c) [b,c) [b,c) [d,e) ...
334 1 : // ^ ^
335 1 : // i.blockIter want
336 1 : if x, err := i.gatherForward(i.blockIter.Next()); err != nil {
337 0 : return nil, err
338 1 : } else if invariants.Enabled && !x.Valid() {
339 0 : panic("pebble: invariant violation: next entry unexpectedly invalid")
340 : }
341 1 : i.dir = +1
342 : }
343 : // We know that this blockIter has in-place values.
344 1 : return i.gatherForward(i.blockIter.KV())
345 : }
346 :
347 : // Prev implements (keyspan.FragmentIterator).Prev.
348 1 : func (i *fragmentIter) Prev() (*keyspan.Span, error) {
349 1 : switch {
350 1 : case i.dir == +1 && !i.span.Valid():
351 1 : // Switching directions.
352 1 : //
353 1 : // i.blockIter is exhausted, after the last key. Move onto the last.
354 1 : i.blockIter.Last()
355 1 : i.dir = -1
356 1 : case i.dir == +1 && i.span.Valid():
357 1 : // Switching directions.
358 1 : //
359 1 : // i.blockIter is currently positioned over the first internal key for
360 1 : // the next span. Prev it once to move to the last internal key that
361 1 : // makes up the current span, and gatherBackward to land on the last
362 1 : // internal key making up the previous span.
363 1 : //
364 1 : // In the diagram below, if the last span returned to the user during
365 1 : // forward iteration was [b,c), i.blockIter is currently positioned at
366 1 : // [d,e). The block iter must be positioned over [a,b) to gather the
367 1 : // previous span's fragments.
368 1 : //
369 1 : // ... [a,b) [b,c) [b,c) [b,c) [d,e) ...
370 1 : // ^ ^
371 1 : // want i.blockIter
372 1 : if x, err := i.gatherBackward(i.blockIter.Prev()); err != nil {
373 0 : return nil, err
374 1 : } else if invariants.Enabled && !x.Valid() {
375 0 : panic("pebble: invariant violation: previous entry unexpectedly invalid")
376 : }
377 1 : i.dir = -1
378 : }
379 : // We know that this blockIter has in-place values.
380 1 : return i.gatherBackward(i.blockIter.KV())
381 : }
382 :
383 : // SeekGE implements (keyspan.FragmentIterator).SeekGE.
384 1 : func (i *fragmentIter) SeekGE(k []byte) (*keyspan.Span, error) {
385 1 : if s, err := i.SeekLT(k); err != nil {
386 0 : return nil, err
387 1 : } else if s != nil && i.blockIter.cmp(k, s.End) < 0 {
388 1 : return s, nil
389 1 : }
390 : // TODO(jackson): If the above i.SeekLT(k) discovers a span but the span
391 : // doesn't meet the k < s.End comparison, then there's no need for the
392 : // SeekLT to gatherBackward.
393 1 : return i.Next()
394 : }
395 :
396 : // SeekLT implements (keyspan.FragmentIterator).SeekLT.
397 1 : func (i *fragmentIter) SeekLT(k []byte) (*keyspan.Span, error) {
398 1 : i.dir = -1
399 1 : return i.gatherBackward(i.blockIter.SeekLT(k, base.SeekLTFlagsNone))
400 1 : }
401 :
402 : // String implements fmt.Stringer.
403 0 : func (i *fragmentIter) String() string {
404 0 : return "fragment-block-iter"
405 0 : }
406 :
407 : // WrapChildren implements FragmentIterator.
408 0 : func (i *fragmentIter) WrapChildren(wrap keyspan.WrapFn) {}
409 :
410 : // DebugTree is part of the FragmentIterator interface.
411 0 : func (i *fragmentIter) DebugTree(tp treeprinter.Node) {
412 0 : tp.Childf("%T(%p) fileNum=%s", i, i, i.fileNum)
413 0 : }
414 :
415 1 : func checkFragmentBlockIterator(obj interface{}) {
416 1 : i := obj.(*fragmentIter)
417 1 : if h := i.blockIter.Handle(); h.Valid() {
418 0 : fmt.Fprintf(os.Stderr, "fragmentBlockIter.blockIter.handle is not nil: %#v\n", h)
419 0 : os.Exit(1)
420 0 : }
421 : }
|