Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package rowblk
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "os"
12 : "sync"
13 :
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/invariants"
16 : "github.com/cockroachdb/pebble/internal/keyspan"
17 : "github.com/cockroachdb/pebble/internal/rangedel"
18 : "github.com/cockroachdb/pebble/internal/rangekey"
19 : "github.com/cockroachdb/pebble/internal/treeprinter"
20 : "github.com/cockroachdb/pebble/sstable/block"
21 : )
22 :
23 : // fragmentIter wraps an Iter, implementing the keyspan.FragmentIterator
24 : // interface. It's used for reading range deletion and range key blocks.
25 : //
26 : // Range deletions and range keys are fragmented before they're persisted to the
27 : // block. Overlapping fragments have identical bounds. The fragmentIter gathers
28 : // all the fragments with identical bounds within a block and returns a single
29 : // keyspan.Span describing all the keys defined over the span.
30 : //
31 : // # Memory lifetime
32 : //
33 : // A Span returned by fragmentIter is only guaranteed to be stable until the
34 : // next fragmentIter iteration positioning method. A Span's Keys slice may be
35 : // reused, so the user must not assume it's stable.
36 : //
37 : // Blocks holding range deletions and range keys are configured to use a restart
38 : // interval of 1. This provides key stability. The caller may treat the various
39 : // byte slices (start, end, suffix, value) as stable for the lifetime of the
40 : // iterator.
41 : type fragmentIter struct {
42 : suffixCmp base.CompareSuffixes
43 : blockIter Iter
44 : keyBuf [2]keyspan.Key
45 : span keyspan.Span
46 : dir int8
47 :
48 : // fileNum is used for logging/debugging.
49 : fileNum base.DiskFileNum
50 :
51 : // elideSameSeqnum, if true, returns only the first-occurring (in forward
52 : // order) Key for each sequence number.
53 : elideSameSeqnum bool
54 :
55 : syntheticSuffix block.SyntheticSuffix
56 : syntheticPrefix block.SyntheticPrefix
57 : // startKeyBuf is a buffer that is reused to store the start key of the span
58 : // when a synthetic prefix is used.
59 : startKeyBuf []byte
60 : // endKeyBuf is a buffer that is reused to generate the end key of the span
61 : // when a synthetic prefix is set. It always starts with syntheticPrefix.
62 : endKeyBuf []byte
63 :
64 : closeCheck invariants.CloseChecker
65 : }
66 :
67 : var _ keyspan.FragmentIterator = (*fragmentIter)(nil)
68 :
69 : var fragmentBlockIterPool = sync.Pool{
70 2 : New: func() interface{} {
71 2 : i := &fragmentIter{}
72 2 : // Note: this is a no-op if invariants are disabled or race is enabled.
73 2 : invariants.SetFinalizer(i, checkFragmentBlockIterator)
74 2 : return i
75 2 : },
76 : }
77 :
78 : // NewFragmentIter returns a new keyspan iterator that iterates over a block's
79 : // spans.
80 : func NewFragmentIter(
81 : fileNum base.DiskFileNum,
82 : cmp base.Compare,
83 : suffixCmp base.CompareSuffixes,
84 : split base.Split,
85 : blockHandle block.BufferHandle,
86 : transforms block.FragmentIterTransforms,
87 2 : ) (keyspan.FragmentIterator, error) {
88 2 : i := fragmentBlockIterPool.Get().(*fragmentIter)
89 2 :
90 2 : i.suffixCmp = suffixCmp
91 2 : // Use the i.keyBuf array to back the Keys slice to prevent an allocation
92 2 : // when the spans contain few keys.
93 2 : i.span.Keys = i.keyBuf[:0]
94 2 : i.fileNum = fileNum
95 2 : i.elideSameSeqnum = transforms.ElideSameSeqNum
96 2 : i.syntheticSuffix = transforms.SyntheticSuffix
97 2 : i.syntheticPrefix = transforms.SyntheticPrefix
98 2 : if transforms.SyntheticPrefix.IsSet() {
99 2 : i.endKeyBuf = append(i.endKeyBuf[:0], transforms.SyntheticPrefix...)
100 2 : }
101 2 : i.closeCheck = invariants.CloseChecker{}
102 2 :
103 2 : if err := i.blockIter.InitHandle(cmp, split, blockHandle, block.IterTransforms{
104 2 : SyntheticSeqNum: transforms.SyntheticSeqNum,
105 2 : // We let the blockIter prepend the prefix to span start keys; the fragment
106 2 : // iterator will prepend it for end keys. We could do everything in the
107 2 : // fragment iterator, but we'd have to duplicate the logic for adjusting the
108 2 : // seek key for SeekGE/SeekLT.
109 2 : SyntheticPrefix: transforms.SyntheticPrefix,
110 2 : // It's okay for HideObsoletePoints to be false here, even for shared
111 2 : // ingested sstables. This is because rangedels do not apply to points in
112 2 : // the same sstable at the same sequence number anyway, so exposing obsolete
113 2 : // rangedels is harmless.
114 2 : HideObsoletePoints: false,
115 2 : }); err != nil {
116 0 : i.Close()
117 0 : return nil, err
118 0 : }
119 2 : return i, nil
120 : }
121 :
122 : // initSpan initializes the span with a single fragment.
123 : //
124 : // Note that the span start and end keys and range key contents are aliased to
125 : // the key or value when we don't have a synthetic prefix. This is ok because
126 : // the range del/key block doesn't use prefix compression, so the key/value will
127 : // be pointing directly into the buffer data.
128 2 : func (i *fragmentIter) initSpan(ik base.InternalKey, internalValue []byte) error {
129 2 : if ik.Kind() == base.InternalKeyKindRangeDelete {
130 2 : i.span = rangedel.Decode(ik, internalValue, i.span.Keys[:0])
131 2 : } else {
132 2 : var err error
133 2 : i.span, err = rangekey.Decode(ik, internalValue, i.span.Keys[:0])
134 2 : if err != nil {
135 0 : return err
136 0 : }
137 : }
138 : // When synthetic prefix is used in the blockIter, the keys cannot be used
139 : // across multiple blockIter operations; we have to make a copy in this case.
140 2 : if i.syntheticPrefix.IsSet() || invariants.Sometimes(10) {
141 2 : i.startKeyBuf = append(i.startKeyBuf[:0], i.span.Start...)
142 2 : i.span.Start = i.startKeyBuf
143 2 : }
144 2 : return nil
145 : }
146 :
147 : // addToSpan adds a fragment to the existing span. The fragment must be for the
148 : // same start/end keys.
149 : func (i *fragmentIter) addToSpan(
150 : cmp base.Compare, ik base.InternalKey, internalValue []byte,
151 2 : ) error {
152 2 : var err error
153 2 : if ik.Kind() == base.InternalKeyKindRangeDelete {
154 2 : err = rangedel.DecodeIntoSpan(cmp, ik, internalValue, &i.span)
155 2 : } else {
156 2 : err = rangekey.DecodeIntoSpan(cmp, ik, internalValue, &i.span)
157 2 : }
158 2 : return err
159 : }
160 :
161 : // applySpanTransforms applies changes to the span that we decoded, if
162 : // appropriate.
163 2 : func (i *fragmentIter) applySpanTransforms() error {
164 2 : if i.elideSameSeqnum && len(i.span.Keys) > 0 {
165 2 : lastSeqNum := i.span.Keys[0].SeqNum()
166 2 : k := 1
167 2 : for j := 1; j < len(i.span.Keys); j++ {
168 2 : if lastSeqNum != i.span.Keys[j].SeqNum() {
169 2 : lastSeqNum = i.span.Keys[j].SeqNum()
170 2 : i.span.Keys[k] = i.span.Keys[j]
171 2 : k++
172 2 : }
173 : }
174 2 : i.span.Keys = i.span.Keys[:k]
175 : }
176 :
177 2 : if i.syntheticPrefix.IsSet() || invariants.Sometimes(10) {
178 2 : // We have to make a copy of the start key because it will not stay valid
179 2 : // across multiple blockIter operations.
180 2 : i.startKeyBuf = append(i.startKeyBuf[:0], i.span.Start...)
181 2 : i.span.Start = i.startKeyBuf
182 2 : if invariants.Enabled && !bytes.Equal(i.syntheticPrefix, i.endKeyBuf[:len(i.syntheticPrefix)]) {
183 0 : panic("pebble: invariant violation: synthetic prefix mismatch")
184 : }
185 2 : i.endKeyBuf = append(i.endKeyBuf[:len(i.syntheticPrefix)], i.span.End...)
186 2 : i.span.End = i.endKeyBuf
187 : }
188 :
189 2 : if i.syntheticSuffix.IsSet() {
190 2 : for keyIdx := range i.span.Keys {
191 2 : k := &i.span.Keys[keyIdx]
192 2 :
193 2 : switch k.Kind() {
194 2 : case base.InternalKeyKindRangeKeySet:
195 2 : if len(k.Suffix) > 0 {
196 2 : if invariants.Enabled && i.suffixCmp(i.syntheticSuffix, k.Suffix) >= 0 {
197 1 : return base.AssertionFailedf("synthetic suffix %q >= RangeKeySet suffix %q",
198 1 : i.syntheticSuffix, k.Suffix)
199 1 : }
200 2 : k.Suffix = i.syntheticSuffix
201 : }
202 2 : case base.InternalKeyKindRangeKeyDelete:
203 : // Nothing to do.
204 1 : default:
205 1 : return base.AssertionFailedf("synthetic suffix not supported with key kind %s", k.Kind())
206 : }
207 : }
208 : }
209 2 : return nil
210 : }
211 :
212 : // gatherForward gathers internal keys with identical bounds. Keys defined over
213 : // spans of the keyspace are fragmented such that any overlapping key spans have
214 : // identical bounds. When these spans are persisted to a range deletion or range
215 : // key block, they may be persisted as multiple internal keys in order to encode
216 : // multiple sequence numbers or key kinds.
217 : //
218 : // gatherForward iterates forward, re-combining the fragmented internal keys to
219 : // reconstruct a keyspan.Span that holds all the keys defined over the span.
220 2 : func (i *fragmentIter) gatherForward(kv *base.InternalKV) (*keyspan.Span, error) {
221 2 : i.span = keyspan.Span{}
222 2 : if kv == nil || !i.blockIter.Valid() {
223 2 : return nil, nil
224 2 : }
225 : // Use the i.keyBuf array to back the Keys slice to prevent an allocation
226 : // when a span contains few keys.
227 2 : i.span.Keys = i.keyBuf[:0]
228 2 :
229 2 : // Decode the span's end key and individual keys from the value.
230 2 : if err := i.initSpan(kv.K, kv.InPlaceValue()); err != nil {
231 0 : return nil, err
232 0 : }
233 :
234 : // There might exist additional internal keys with identical bounds encoded
235 : // within the block. Iterate forward, accumulating all the keys with
236 : // identical bounds to s.
237 :
238 : // Overlapping fragments are required to have exactly equal start and
239 : // end bounds.
240 2 : for kv = i.blockIter.Next(); kv != nil && i.blockIter.cmp(kv.K.UserKey, i.span.Start) == 0; kv = i.blockIter.Next() {
241 2 : if err := i.addToSpan(i.blockIter.cmp, kv.K, kv.InPlaceValue()); err != nil {
242 0 : return nil, err
243 0 : }
244 : }
245 2 : if err := i.applySpanTransforms(); err != nil {
246 0 : return nil, err
247 0 : }
248 : // i.blockIter is positioned over the first internal key for the next span.
249 2 : return &i.span, nil
250 : }
251 :
252 : // gatherBackward gathers internal keys with identical bounds. Keys defined over
253 : // spans of the keyspace are fragmented such that any overlapping key spans have
254 : // identical bounds. When these spans are persisted to a range deletion or range
255 : // key block, they may be persisted as multiple internal keys in order to encode
256 : // multiple sequence numbers or key kinds.
257 : //
258 : // gatherBackward iterates backwards, re-combining the fragmented internal keys
259 : // to reconstruct a keyspan.Span that holds all the keys defined over the span.
260 2 : func (i *fragmentIter) gatherBackward(kv *base.InternalKV) (*keyspan.Span, error) {
261 2 : i.span = keyspan.Span{}
262 2 : if kv == nil || !i.blockIter.Valid() {
263 2 : return nil, nil
264 2 : }
265 :
266 : // Decode the span's end key and individual keys from the value.
267 2 : if err := i.initSpan(kv.K, kv.InPlaceValue()); err != nil {
268 0 : return nil, err
269 0 : }
270 :
271 : // There might exist additional internal keys with identical bounds encoded
272 : // within the block. Iterate backward, accumulating all the keys with
273 : // identical bounds to s.
274 : //
275 : // Overlapping fragments are required to have exactly equal start and
276 : // end bounds.
277 2 : for kv = i.blockIter.Prev(); kv != nil && i.blockIter.cmp(kv.K.UserKey, i.span.Start) == 0; kv = i.blockIter.Prev() {
278 2 : if err := i.addToSpan(i.blockIter.cmp, kv.K, kv.InPlaceValue()); err != nil {
279 0 : return nil, err
280 0 : }
281 : }
282 : // i.blockIter is positioned over the last internal key for the previous
283 : // span.
284 :
285 : // Backwards iteration encounters internal keys in the wrong order.
286 2 : keyspan.SortKeysByTrailer(i.span.Keys)
287 2 :
288 2 : i.applySpanTransforms()
289 2 : return &i.span, nil
290 : }
291 :
292 : // SetContext is part of the FragmentIterator interface.
293 0 : func (i *fragmentIter) SetContext(ctx context.Context) {}
294 :
295 : // Close implements (keyspan.FragmentIterator).Close.
296 2 : func (i *fragmentIter) Close() {
297 2 : i.blockIter.Close()
298 2 : i.closeCheck.Close()
299 2 :
300 2 : if invariants.Sometimes(25) {
301 2 : // In invariants mode, sometimes don't add the object to the pool so that we
302 2 : // can check for double closes that take longer than the object stays in the
303 2 : // pool.
304 2 : return
305 2 : }
306 :
307 2 : *i = fragmentIter{
308 2 : blockIter: i.blockIter.ResetForReuse(),
309 2 : closeCheck: i.closeCheck,
310 2 : startKeyBuf: i.startKeyBuf[:0],
311 2 : endKeyBuf: i.endKeyBuf[:0],
312 2 : }
313 2 : fragmentBlockIterPool.Put(i)
314 : }
315 :
316 : // First implements (keyspan.FragmentIterator).First
317 2 : func (i *fragmentIter) First() (*keyspan.Span, error) {
318 2 : i.dir = +1
319 2 : return i.gatherForward(i.blockIter.First())
320 2 : }
321 :
322 : // Last implements (keyspan.FragmentIterator).Last.
323 2 : func (i *fragmentIter) Last() (*keyspan.Span, error) {
324 2 : i.dir = -1
325 2 : return i.gatherBackward(i.blockIter.Last())
326 2 : }
327 :
328 : // Next implements (keyspan.FragmentIterator).Next.
329 2 : func (i *fragmentIter) Next() (*keyspan.Span, error) {
330 2 : switch {
331 2 : case i.dir == -1 && !i.span.Valid():
332 2 : // Switching directions.
333 2 : //
334 2 : // i.blockIter is exhausted, before the first key. Move onto the first.
335 2 : i.blockIter.First()
336 2 : i.dir = +1
337 2 : case i.dir == -1 && i.span.Valid():
338 2 : // Switching directions.
339 2 : //
340 2 : // i.blockIter is currently positioned over the last internal key for
341 2 : // the previous span. Next it once to move to the first internal key
342 2 : // that makes up the current span, and gatherForwaad to land on the
343 2 : // first internal key making up the next span.
344 2 : //
345 2 : // In the diagram below, if the last span returned to the user during
346 2 : // reverse iteration was [b,c), i.blockIter is currently positioned at
347 2 : // [a,b). The block iter must be positioned over [d,e) to gather the
348 2 : // next span's fragments.
349 2 : //
350 2 : // ... [a,b) [b,c) [b,c) [b,c) [d,e) ...
351 2 : // ^ ^
352 2 : // i.blockIter want
353 2 : if x, err := i.gatherForward(i.blockIter.Next()); err != nil {
354 0 : return nil, err
355 2 : } else if invariants.Enabled && !x.Valid() {
356 0 : panic("pebble: invariant violation: next entry unexpectedly invalid")
357 : }
358 2 : i.dir = +1
359 : }
360 : // We know that this blockIter has in-place values.
361 2 : return i.gatherForward(i.blockIter.KV())
362 : }
363 :
364 : // Prev implements (keyspan.FragmentIterator).Prev.
365 2 : func (i *fragmentIter) Prev() (*keyspan.Span, error) {
366 2 : switch {
367 2 : case i.dir == +1 && !i.span.Valid():
368 2 : // Switching directions.
369 2 : //
370 2 : // i.blockIter is exhausted, after the last key. Move onto the last.
371 2 : i.blockIter.Last()
372 2 : i.dir = -1
373 2 : case i.dir == +1 && i.span.Valid():
374 2 : // Switching directions.
375 2 : //
376 2 : // i.blockIter is currently positioned over the first internal key for
377 2 : // the next span. Prev it once to move to the last internal key that
378 2 : // makes up the current span, and gatherBackward to land on the last
379 2 : // internal key making up the previous span.
380 2 : //
381 2 : // In the diagram below, if the last span returned to the user during
382 2 : // forward iteration was [b,c), i.blockIter is currently positioned at
383 2 : // [d,e). The block iter must be positioned over [a,b) to gather the
384 2 : // previous span's fragments.
385 2 : //
386 2 : // ... [a,b) [b,c) [b,c) [b,c) [d,e) ...
387 2 : // ^ ^
388 2 : // want i.blockIter
389 2 : if x, err := i.gatherBackward(i.blockIter.Prev()); err != nil {
390 0 : return nil, err
391 2 : } else if invariants.Enabled && !x.Valid() {
392 0 : panic("pebble: invariant violation: previous entry unexpectedly invalid")
393 : }
394 2 : i.dir = -1
395 : }
396 : // We know that this blockIter has in-place values.
397 2 : return i.gatherBackward(i.blockIter.KV())
398 : }
399 :
400 : // SeekGE implements (keyspan.FragmentIterator).SeekGE.
401 2 : func (i *fragmentIter) SeekGE(k []byte) (*keyspan.Span, error) {
402 2 : if s, err := i.SeekLT(k); err != nil {
403 0 : return nil, err
404 2 : } else if s != nil && i.blockIter.cmp(k, s.End) < 0 {
405 2 : return s, nil
406 2 : }
407 : // TODO(jackson): If the above i.SeekLT(k) discovers a span but the span
408 : // doesn't meet the k < s.End comparison, then there's no need for the
409 : // SeekLT to gatherBackward.
410 2 : return i.Next()
411 : }
412 :
413 : // SeekLT implements (keyspan.FragmentIterator).SeekLT.
414 2 : func (i *fragmentIter) SeekLT(k []byte) (*keyspan.Span, error) {
415 2 : i.dir = -1
416 2 : return i.gatherBackward(i.blockIter.SeekLT(k, base.SeekLTFlagsNone))
417 2 : }
418 :
419 : // String implements fmt.Stringer.
420 0 : func (i *fragmentIter) String() string {
421 0 : return "fragment-block-iter"
422 0 : }
423 :
424 : // WrapChildren implements FragmentIterator.
425 0 : func (i *fragmentIter) WrapChildren(wrap keyspan.WrapFn) {}
426 :
427 : // DebugTree is part of the FragmentIterator interface.
428 0 : func (i *fragmentIter) DebugTree(tp treeprinter.Node) {
429 0 : tp.Childf("%T(%p) fileNum=%s", i, i, i.fileNum)
430 0 : }
431 :
432 2 : func checkFragmentBlockIterator(obj interface{}) {
433 2 : i := obj.(*fragmentIter)
434 2 : if p := i.blockIter.Handle().Get(); p != nil {
435 0 : fmt.Fprintf(os.Stderr, "fragmentBlockIter.blockIter.handle is not nil: %p\n", p)
436 0 : os.Exit(1)
437 0 : }
438 : }
|