Line data Source code
1 : // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspanimpl
6 :
7 : import (
8 : "fmt"
9 :
10 : "github.com/cockroachdb/errors"
11 : "github.com/cockroachdb/pebble/internal/base"
12 : "github.com/cockroachdb/pebble/internal/invariants"
13 : "github.com/cockroachdb/pebble/internal/keyspan"
14 : "github.com/cockroachdb/pebble/internal/manifest"
15 : )
16 :
17 : // TableNewSpanIter creates a new iterator for range key spans for the given
18 : // file.
19 : type TableNewSpanIter func(
20 : file *manifest.FileMetadata, iterOptions keyspan.SpanIterOptions,
21 : ) (keyspan.FragmentIterator, error)
22 :
23 : // LevelIter provides a merged view of spans from sstables in a level.
24 : // It takes advantage of level invariants to only have one sstable span block
25 : // open at one time, opened using the newIter function passed in.
26 : type LevelIter struct {
27 : cmp base.Compare
28 : // Denotes the kind of key the level iterator should read. If the key type
29 : // is KeyTypePoint, the level iterator will read range tombstones (which
30 : // only affect point keys). If the key type is KeyTypeRange, the level
31 : // iterator will read range keys. It is invalid to configure an iterator
32 : // with the KeyTypePointAndRange key type.
33 : //
34 : // If key type is KeyTypePoint, no straddle spans are emitted between files,
35 : // and point key bounds are used to find files instead of range key bounds.
36 : //
37 : // TODO(bilal): Straddle spans can safely be produced in rangedel mode once
38 : // we can guarantee that we will never read sstables in a level that split
39 : // user keys across them. This might be guaranteed in a future release, but
40 : // as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when
41 : // keyType == KeyTypePoint
42 : keyType manifest.KeyType
43 : // The LSM level this LevelIter is initialized for. Used in logging.
44 : level manifest.Level
45 : // The below fields are used to fill in gaps between adjacent files' range
46 : // key spaces. This is an optimization to avoid unnecessarily loading files
47 : // in cases where range keys are sparse and rare. dir is set by every
48 : // positioning operation, straddleDir is set to dir whenever a straddling
49 : // Span is synthesized and the last positioning operation returned a
50 : // synthesized straddle span.
51 : //
52 : // Note that when a straddle span is initialized, iterFile is modified to
53 : // point to the next file in the straddleDir direction. A change of direction
54 : // on a straddle key therefore necessitates the value of iterFile to be
55 : // reverted.
56 : dir int
57 : straddle keyspan.Span
58 : straddleDir int
59 : // The iter for the current file (iterFile). It is nil under any of the
60 : // following conditions:
61 : // - files.Current() == nil
62 : // - err != nil
63 : // - straddleDir != 0, in which case iterFile is not nil and points to the
64 : // next file (in the straddleDir direction).
65 : // - some other constraint, like the bounds in opts, caused the file at index to not
66 : // be relevant to the iteration.
67 : iter keyspan.FragmentIterator
68 : wrapFn keyspan.WrapFn
69 : // iterFile holds the current file.
70 : // INVARIANT: iterFile = files.Current()
71 : iterFile *manifest.FileMetadata
72 : newIter TableNewSpanIter
73 : files manifest.LevelIterator
74 : err error
75 :
76 : // The options that were passed in.
77 : tableOpts keyspan.SpanIterOptions
78 :
79 : // TODO(bilal): Add InternalIteratorStats.
80 : }
81 :
82 : // LevelIter implements the keyspan.FragmentIterator interface.
83 : var _ keyspan.FragmentIterator = (*LevelIter)(nil)
84 :
85 : // NewLevelIter returns a LevelIter.
86 : func NewLevelIter(
87 : opts keyspan.SpanIterOptions,
88 : cmp base.Compare,
89 : newIter TableNewSpanIter,
90 : files manifest.LevelIterator,
91 : level manifest.Level,
92 : keyType manifest.KeyType,
93 1 : ) *LevelIter {
94 1 : l := &LevelIter{}
95 1 : l.Init(opts, cmp, newIter, files, level, keyType)
96 1 : return l
97 1 : }
98 :
99 : // Init initializes a LevelIter.
100 : func (l *LevelIter) Init(
101 : opts keyspan.SpanIterOptions,
102 : cmp base.Compare,
103 : newIter TableNewSpanIter,
104 : files manifest.LevelIterator,
105 : level manifest.Level,
106 : keyType manifest.KeyType,
107 1 : ) {
108 1 : l.err = nil
109 1 : l.level = level
110 1 : l.tableOpts = opts
111 1 : l.cmp = cmp
112 1 : l.iterFile = nil
113 1 : l.newIter = newIter
114 1 : switch keyType {
115 1 : case manifest.KeyTypePoint:
116 1 : l.keyType = keyType
117 1 : l.files = files.Filter(keyType)
118 1 : case manifest.KeyTypeRange:
119 1 : l.keyType = keyType
120 1 : l.files = files.Filter(keyType)
121 0 : default:
122 0 : panic(fmt.Sprintf("unsupported key type: %v", keyType))
123 : }
124 : }
125 :
126 1 : func (l *LevelIter) findFileGE(key []byte) *manifest.FileMetadata {
127 1 : // Find the earliest file whose largest key is >= key.
128 1 : //
129 1 : // If the earliest file has its largest key == key and that largest key is a
130 1 : // range deletion sentinel, we know that we manufactured this sentinel to convert
131 1 : // the exclusive range deletion end key into an inclusive key (reminder: [start, end)#seqnum
132 1 : // is the form of a range deletion sentinel which can contribute a largest key = end#sentinel).
133 1 : // In this case we don't return this as the earliest file since there is nothing actually
134 1 : // equal to key in it.
135 1 :
136 1 : m := l.files.SeekGE(l.cmp, key)
137 1 : for m != nil {
138 1 : largestKey := m.LargestRangeKey
139 1 : if l.keyType == manifest.KeyTypePoint {
140 1 : largestKey = m.LargestPointKey
141 1 : }
142 1 : if !largestKey.IsExclusiveSentinel() || l.cmp(largestKey.UserKey, key) != 0 {
143 1 : break
144 : }
145 0 : m = l.files.Next()
146 : }
147 1 : return m
148 : }
149 :
150 1 : func (l *LevelIter) findFileLT(key []byte) *manifest.FileMetadata {
151 1 : // Find the last file whose smallest key is < key.
152 1 : return l.files.SeekLT(l.cmp, key)
153 1 : }
154 :
155 : type loadFileReturnIndicator int8
156 :
157 : const (
158 : noFileLoaded loadFileReturnIndicator = iota
159 : fileAlreadyLoaded
160 : newFileLoaded
161 : )
162 :
163 1 : func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator {
164 1 : indicator := noFileLoaded
165 1 : if l.iterFile == file {
166 1 : if l.err != nil {
167 0 : return noFileLoaded
168 0 : }
169 1 : if l.iter != nil {
170 1 : // We are already at the file, but we would need to check for bounds.
171 1 : // Set indicator accordingly.
172 1 : indicator = fileAlreadyLoaded
173 1 : }
174 : // We were already at file, but don't have an iterator, probably because the file was
175 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
176 : // have changed. We handle that below.
177 : }
178 :
179 : // Note that LevelIter.Close() can be called multiple times.
180 1 : if indicator != fileAlreadyLoaded {
181 1 : if err := l.Close(); err != nil {
182 0 : return noFileLoaded
183 0 : }
184 : }
185 :
186 1 : l.iterFile = file
187 1 : if file == nil {
188 1 : return noFileLoaded
189 1 : }
190 1 : if indicator != fileAlreadyLoaded {
191 1 : l.iter, l.err = l.newIter(file, l.tableOpts)
192 1 : if l.wrapFn != nil {
193 0 : l.iter = l.wrapFn(l.iter)
194 0 : }
195 1 : l.iter = keyspan.MaybeAssert(l.iter, l.cmp)
196 1 : indicator = newFileLoaded
197 : }
198 1 : if l.err != nil {
199 1 : return noFileLoaded
200 1 : }
201 1 : return indicator
202 : }
203 :
204 : // SeekGE implements keyspan.FragmentIterator.
205 1 : func (l *LevelIter) SeekGE(key []byte) (*keyspan.Span, error) {
206 1 : l.dir = +1
207 1 : l.straddle = keyspan.Span{}
208 1 : l.straddleDir = 0
209 1 : l.err = nil // clear cached iteration error
210 1 :
211 1 : f := l.findFileGE(key)
212 1 : if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 {
213 1 : // Peek at the previous file.
214 1 : prevFile := l.files.Prev()
215 1 : l.files.Next()
216 1 : if prevFile != nil {
217 1 : // We could unconditionally return an empty span between the seek key and
218 1 : // f.SmallestRangeKey, however if this span is to the left of all range
219 1 : // keys on this level, it could lead to inconsistent behaviour in relative
220 1 : // positioning operations. Consider this example, with a b-c range key:
221 1 : //
222 1 : // SeekGE(a) -> a-b:{}
223 1 : // Next() -> b-c{(#5,RANGEKEYSET,@4,foo)}
224 1 : // Prev() -> nil
225 1 : //
226 1 : // Iterators higher up in the iterator stack rely on this sort of relative
227 1 : // positioning consistency.
228 1 : //
229 1 : // TODO(bilal): Investigate ways to be able to return straddle spans in
230 1 : // cases similar to the above, while still retaining correctness.
231 1 : // Return a straddling key instead of loading the file.
232 1 : l.iterFile = f
233 1 : if l.err = l.Close(); l.err != nil {
234 0 : return l.verify(nil, l.err)
235 0 : }
236 1 : l.straddleDir = +1
237 1 : l.straddle = keyspan.Span{
238 1 : Start: prevFile.LargestRangeKey.UserKey,
239 1 : End: f.SmallestRangeKey.UserKey,
240 1 : Keys: nil,
241 1 : }
242 1 : return l.verify(&l.straddle, nil)
243 : }
244 : }
245 1 : loadFileIndicator := l.loadFile(f, +1)
246 1 : if loadFileIndicator == noFileLoaded {
247 1 : return l.verify(nil, l.err)
248 1 : }
249 1 : if span, err := l.iter.SeekGE(key); err != nil {
250 0 : return l.verify(nil, err)
251 1 : } else if span != nil {
252 1 : return l.verify(span, nil)
253 1 : }
254 1 : return l.skipEmptyFileForward()
255 : }
256 :
257 : // SeekLT implements keyspan.FragmentIterator.
258 1 : func (l *LevelIter) SeekLT(key []byte) (*keyspan.Span, error) {
259 1 : l.dir = -1
260 1 : l.straddle = keyspan.Span{}
261 1 : l.straddleDir = 0
262 1 : l.err = nil // clear cached iteration error
263 1 :
264 1 : f := l.findFileLT(key)
265 1 : if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 {
266 1 : // Peek at the next file.
267 1 : nextFile := l.files.Next()
268 1 : l.files.Prev()
269 1 : if nextFile != nil {
270 1 : // We could unconditionally return an empty span between f.LargestRangeKey
271 1 : // and the seek key, however if this span is to the right of all range keys
272 1 : // on this level, it could lead to inconsistent behaviour in relative
273 1 : // positioning operations. Consider this example, with a b-c range key:
274 1 : //
275 1 : // SeekLT(d) -> c-d:{}
276 1 : // Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)}
277 1 : // Next() -> nil
278 1 : //
279 1 : // Iterators higher up in the iterator stack rely on this sort of relative
280 1 : // positioning consistency.
281 1 : //
282 1 : // TODO(bilal): Investigate ways to be able to return straddle spans in
283 1 : // cases similar to the above, while still retaining correctness.
284 1 : // Return a straddling key instead of loading the file.
285 1 : l.iterFile = f
286 1 : if l.err = l.Close(); l.err != nil {
287 0 : return l.verify(nil, l.err)
288 0 : }
289 1 : l.straddleDir = -1
290 1 : l.straddle = keyspan.Span{
291 1 : Start: f.LargestRangeKey.UserKey,
292 1 : End: nextFile.SmallestRangeKey.UserKey,
293 1 : Keys: nil,
294 1 : }
295 1 : return l.verify(&l.straddle, nil)
296 : }
297 : }
298 1 : if l.loadFile(f, -1) == noFileLoaded {
299 1 : return l.verify(nil, l.err)
300 1 : }
301 1 : if span, err := l.iter.SeekLT(key); err != nil {
302 0 : return l.verify(nil, err)
303 1 : } else if span != nil {
304 1 : return l.verify(span, nil)
305 1 : }
306 1 : return l.skipEmptyFileBackward()
307 : }
308 :
309 : // First implements keyspan.FragmentIterator.
310 1 : func (l *LevelIter) First() (*keyspan.Span, error) {
311 1 : l.dir = +1
312 1 : l.straddle = keyspan.Span{}
313 1 : l.straddleDir = 0
314 1 : l.err = nil // clear cached iteration error
315 1 :
316 1 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
317 1 : return l.verify(nil, l.err)
318 1 : }
319 1 : if span, err := l.iter.First(); err != nil {
320 0 : return l.verify(nil, err)
321 1 : } else if span != nil {
322 1 : return l.verify(span, nil)
323 1 : }
324 1 : return l.skipEmptyFileForward()
325 : }
326 :
327 : // Last implements keyspan.FragmentIterator.
328 1 : func (l *LevelIter) Last() (*keyspan.Span, error) {
329 1 : l.dir = -1
330 1 : l.straddle = keyspan.Span{}
331 1 : l.straddleDir = 0
332 1 : l.err = nil // clear cached iteration error
333 1 :
334 1 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
335 1 : return l.verify(nil, l.err)
336 1 : }
337 1 : if span, err := l.iter.Last(); err != nil {
338 0 : return l.verify(nil, err)
339 1 : } else if span != nil {
340 1 : return l.verify(span, nil)
341 1 : }
342 1 : return l.skipEmptyFileBackward()
343 : }
344 :
345 : // Next implements keyspan.FragmentIterator.
346 1 : func (l *LevelIter) Next() (*keyspan.Span, error) {
347 1 : if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) {
348 1 : return l.verify(nil, l.err)
349 1 : }
350 1 : if l.iter == nil && l.iterFile == nil {
351 1 : // l.dir <= 0
352 1 : return l.First()
353 1 : }
354 1 : l.dir = +1
355 1 :
356 1 : if l.iter != nil {
357 1 : if span, err := l.iter.Next(); err != nil {
358 0 : return l.verify(nil, err)
359 1 : } else if span != nil {
360 1 : return l.verify(span, nil)
361 1 : }
362 : }
363 1 : return l.skipEmptyFileForward()
364 : }
365 :
366 : // Prev implements keyspan.FragmentIterator.
367 1 : func (l *LevelIter) Prev() (*keyspan.Span, error) {
368 1 : if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) {
369 1 : return l.verify(nil, l.err)
370 1 : }
371 1 : if l.iter == nil && l.iterFile == nil {
372 1 : // l.dir >= 0
373 1 : return l.Last()
374 1 : }
375 1 : l.dir = -1
376 1 :
377 1 : if l.iter != nil {
378 1 : if span, err := l.iter.Prev(); err != nil {
379 0 : return nil, err
380 1 : } else if span != nil {
381 1 : return l.verify(span, nil)
382 1 : }
383 : }
384 1 : return l.skipEmptyFileBackward()
385 : }
386 :
387 1 : func (l *LevelIter) skipEmptyFileForward() (*keyspan.Span, error) {
388 1 : if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
389 1 : l.iterFile != nil && l.iter != nil {
390 1 : // We were at a file that had spans. Check if the next file that has
391 1 : // spans is not directly adjacent to the current file i.e. there is a
392 1 : // gap in the span keyspace between the two files. In that case, synthesize
393 1 : // a "straddle span" in l.straddle and return that.
394 1 : //
395 1 : // Straddle spans are not created in rangedel mode.
396 1 : if l.err = l.Close(); l.err != nil {
397 0 : return l.verify(nil, l.err)
398 0 : }
399 1 : startKey := l.iterFile.LargestRangeKey.UserKey
400 1 : // Resetting l.iterFile without loading the file into l.iter is okay and
401 1 : // does not change the logic in loadFile() as long as l.iter is also nil;
402 1 : // which it should be due to the Close() call above.
403 1 : l.iterFile = l.files.Next()
404 1 : if l.iterFile == nil {
405 1 : return l.verify(nil, nil)
406 1 : }
407 1 : endKey := l.iterFile.SmallestRangeKey.UserKey
408 1 : if l.cmp(startKey, endKey) < 0 {
409 1 : // There is a gap between the two files. Synthesize a straddling span
410 1 : // to avoid unnecessarily loading the next file.
411 1 : l.straddle = keyspan.Span{
412 1 : Start: startKey,
413 1 : End: endKey,
414 1 : }
415 1 : l.straddleDir = +1
416 1 : return l.verify(&l.straddle, nil)
417 1 : }
418 1 : } else if l.straddleDir < 0 {
419 1 : // We were at a straddle key, but are now changing directions. l.iterFile
420 1 : // was already moved backward by skipEmptyFileBackward, so advance it
421 1 : // forward.
422 1 : l.iterFile = l.files.Next()
423 1 : }
424 1 : l.straddle = keyspan.Span{}
425 1 : l.straddleDir = 0
426 1 : var span *keyspan.Span
427 1 : for span.Empty() {
428 1 : fileToLoad := l.iterFile
429 1 : if l.keyType == manifest.KeyTypePoint {
430 1 : // We haven't iterated to the next file yet if we're in point key
431 1 : // (rangedel) mode.
432 1 : fileToLoad = l.files.Next()
433 1 : }
434 1 : if l.loadFile(fileToLoad, +1) == noFileLoaded {
435 1 : return l.verify(nil, l.err)
436 1 : }
437 1 : span, l.err = l.iter.First()
438 1 : if l.err != nil {
439 0 : return l.verify(nil, l.err)
440 0 : }
441 : // In rangedel mode, we can expect to get empty files that we'd need to
442 : // skip over, but not in range key mode.
443 1 : if l.keyType == manifest.KeyTypeRange {
444 1 : break
445 : }
446 : }
447 1 : return l.verify(span, l.err)
448 : }
449 :
450 1 : func (l *LevelIter) skipEmptyFileBackward() (*keyspan.Span, error) {
451 1 : // We were at a file that had spans. Check if the previous file that has
452 1 : // spans is not directly adjacent to the current file i.e. there is a
453 1 : // gap in the span keyspace between the two files. In that case, synthesize
454 1 : // a "straddle span" in l.straddle and return that.
455 1 : //
456 1 : // Straddle spans are not created in rangedel mode.
457 1 : if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
458 1 : l.iterFile != nil && l.iter != nil {
459 1 : if l.err = l.Close(); l.err != nil {
460 0 : return l.verify(nil, l.err)
461 0 : }
462 1 : endKey := l.iterFile.SmallestRangeKey.UserKey
463 1 : // Resetting l.iterFile without loading the file into l.iter is okay and
464 1 : // does not change the logic in loadFile() as long as l.iter is also nil;
465 1 : // which it should be due to the Close() call above.
466 1 : l.iterFile = l.files.Prev()
467 1 : if l.iterFile == nil {
468 1 : return l.verify(nil, nil)
469 1 : }
470 1 : startKey := l.iterFile.LargestRangeKey.UserKey
471 1 : if l.cmp(startKey, endKey) < 0 {
472 1 : // There is a gap between the two files. Synthesize a straddling span
473 1 : // to avoid unnecessarily loading the next file.
474 1 : l.straddle = keyspan.Span{
475 1 : Start: startKey,
476 1 : End: endKey,
477 1 : }
478 1 : l.straddleDir = -1
479 1 : return l.verify(&l.straddle, nil)
480 1 : }
481 1 : } else if l.straddleDir > 0 {
482 1 : // We were at a straddle key, but are now changing directions. l.iterFile
483 1 : // was already advanced forward by skipEmptyFileForward, so move it
484 1 : // backward.
485 1 : l.iterFile = l.files.Prev()
486 1 : }
487 1 : l.straddle = keyspan.Span{}
488 1 : l.straddleDir = 0
489 1 : var span *keyspan.Span
490 1 : for span.Empty() {
491 1 : fileToLoad := l.iterFile
492 1 : if l.keyType == manifest.KeyTypePoint {
493 1 : fileToLoad = l.files.Prev()
494 1 : }
495 1 : if l.loadFile(fileToLoad, -1) == noFileLoaded {
496 1 : return l.verify(nil, l.err)
497 1 : }
498 1 : span, l.err = l.iter.Last()
499 1 : if l.err != nil {
500 0 : return l.verify(span, l.err)
501 0 : }
502 : // In rangedel mode, we can expect to get empty files that we'd need to
503 : // skip over, but not in range key mode as the filter on the FileMetadata
504 : // should guarantee we always get a non-empty file.
505 1 : if l.keyType == manifest.KeyTypeRange {
506 1 : break
507 : }
508 : }
509 1 : return l.verify(span, l.err)
510 : }
511 :
512 : // verify is invoked whenever a span is returned from an iterator positioning
513 : // method to a caller. During invariant builds, it asserts invariants to the
514 : // caller.
515 1 : func (l *LevelIter) verify(s *keyspan.Span, err error) (*keyspan.Span, error) {
516 1 : // NB: Do not add any logic outside the invariants.Enabled conditional to
517 1 : // ensure that verify is always compiled away in production builds.
518 1 : if invariants.Enabled {
519 1 : if err != l.err {
520 0 : panic(errors.AssertionFailedf("LevelIter.err (%v) != returned error (%v)", l.err, err))
521 : }
522 1 : if err != nil && s != nil {
523 0 : panic(errors.AssertionFailedf("non-nil error returned alongside non-nil span"))
524 : }
525 1 : if f := l.files.Current(); f != l.iterFile {
526 0 : panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged",
527 0 : f, l.iterFile))
528 : }
529 : }
530 1 : return s, err
531 : }
532 :
533 : // Error implements keyspan.FragmentIterator.
534 0 : func (l *LevelIter) Error() error {
535 0 : return l.err
536 0 : }
537 :
538 : // Close implements keyspan.FragmentIterator.
539 1 : func (l *LevelIter) Close() error {
540 1 : if l.iter != nil {
541 1 : l.err = l.iter.Close()
542 1 : l.iter = nil
543 1 : }
544 1 : return l.err
545 : }
546 :
547 : // String implements keyspan.FragmentIterator.
548 0 : func (l *LevelIter) String() string {
549 0 : if l.iterFile != nil {
550 0 : return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum)
551 0 : }
552 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.level)
553 : }
554 :
555 : // WrapChildren implements FragmentIterator.
556 0 : func (l *LevelIter) WrapChildren(wrap keyspan.WrapFn) {
557 0 : l.iter = wrap(l.iter)
558 0 : l.wrapFn = wrap
559 0 : }
|