Line data Source code
1 : // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspan
6 :
7 : import (
8 : "fmt"
9 :
10 : "github.com/cockroachdb/errors"
11 : "github.com/cockroachdb/pebble/internal/base"
12 : "github.com/cockroachdb/pebble/internal/invariants"
13 : "github.com/cockroachdb/pebble/internal/manifest"
14 : )
15 :
16 : // LevelIter provides a merged view of spans from sstables in a level.
17 : // It takes advantage of level invariants to only have one sstable span block
18 : // open at one time, opened using the newIter function passed in.
19 : type LevelIter struct {
20 : cmp base.Compare
21 : // Denotes the kind of key the level iterator should read. If the key type
22 : // is KeyTypePoint, the level iterator will read range tombstones (which
23 : // only affect point keys). If the key type is KeyTypeRange, the level
24 : // iterator will read range keys. It is invalid to configure an iterator
25 : // with the KeyTypePointAndRange key type.
26 : //
27 : // If key type is KeyTypePoint, no straddle spans are emitted between files,
28 : // and point key bounds are used to find files instead of range key bounds.
29 : //
30 : // TODO(bilal): Straddle spans can safely be produced in rangedel mode once
31 : // we can guarantee that we will never read sstables in a level that split
32 : // user keys across them. This might be guaranteed in a future release, but
33 : // as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when
34 : // keyType == KeyTypePoint
35 : keyType manifest.KeyType
36 : // The LSM level this LevelIter is initialized for. Used in logging.
37 : level manifest.Level
38 : // The below fields are used to fill in gaps between adjacent files' range
39 : // key spaces. This is an optimization to avoid unnecessarily loading files
40 : // in cases where range keys are sparse and rare. dir is set by every
41 : // positioning operation, straddleDir is set to dir whenever a straddling
42 : // Span is synthesized and the last positioning operation returned a
43 : // synthesized straddle span.
44 : //
45 : // Note that when a straddle span is initialized, iterFile is modified to
46 : // point to the next file in the straddleDir direction. A change of direction
47 : // on a straddle key therefore necessitates the value of iterFile to be
48 : // reverted.
49 : dir int
50 : straddle Span
51 : straddleDir int
52 : // The iter for the current file (iterFile). It is nil under any of the
53 : // following conditions:
54 : // - files.Current() == nil
55 : // - err != nil
56 : // - straddleDir != 0, in which case iterFile is not nil and points to the
57 : // next file (in the straddleDir direction).
58 : // - some other constraint, like the bounds in opts, caused the file at index to not
59 : // be relevant to the iteration.
60 : iter FragmentIterator
61 : wrapFn WrapFn
62 : // iterFile holds the current file.
63 : // INVARIANT: iterFile = files.Current()
64 : iterFile *manifest.FileMetadata
65 : newIter TableNewSpanIter
66 : files manifest.LevelIterator
67 : err error
68 :
69 : // The options that were passed in.
70 : tableOpts SpanIterOptions
71 :
72 : // TODO(bilal): Add InternalIteratorStats.
73 : }
74 :
75 : // LevelIter implements the keyspan.FragmentIterator interface.
76 : var _ FragmentIterator = (*LevelIter)(nil)
77 :
78 : // NewLevelIter returns a LevelIter.
79 : func NewLevelIter(
80 : opts SpanIterOptions,
81 : cmp base.Compare,
82 : newIter TableNewSpanIter,
83 : files manifest.LevelIterator,
84 : level manifest.Level,
85 : keyType manifest.KeyType,
86 1 : ) *LevelIter {
87 1 : l := &LevelIter{}
88 1 : l.Init(opts, cmp, newIter, files, level, keyType)
89 1 : return l
90 1 : }
91 :
92 : // Init initializes a LevelIter.
93 : func (l *LevelIter) Init(
94 : opts SpanIterOptions,
95 : cmp base.Compare,
96 : newIter TableNewSpanIter,
97 : files manifest.LevelIterator,
98 : level manifest.Level,
99 : keyType manifest.KeyType,
100 1 : ) {
101 1 : l.err = nil
102 1 : l.level = level
103 1 : l.tableOpts = opts
104 1 : l.cmp = cmp
105 1 : l.iterFile = nil
106 1 : l.newIter = newIter
107 1 : switch keyType {
108 1 : case manifest.KeyTypePoint:
109 1 : l.keyType = keyType
110 1 : l.files = files.Filter(keyType)
111 1 : case manifest.KeyTypeRange:
112 1 : l.keyType = keyType
113 1 : l.files = files.Filter(keyType)
114 0 : default:
115 0 : panic(fmt.Sprintf("unsupported key type: %v", keyType))
116 : }
117 : }
118 :
119 1 : func (l *LevelIter) findFileGE(key []byte) *manifest.FileMetadata {
120 1 : // Find the earliest file whose largest key is >= key.
121 1 : //
122 1 : // If the earliest file has its largest key == key and that largest key is a
123 1 : // range deletion sentinel, we know that we manufactured this sentinel to convert
124 1 : // the exclusive range deletion end key into an inclusive key (reminder: [start, end)#seqnum
125 1 : // is the form of a range deletion sentinel which can contribute a largest key = end#sentinel).
126 1 : // In this case we don't return this as the earliest file since there is nothing actually
127 1 : // equal to key in it.
128 1 :
129 1 : m := l.files.SeekGE(l.cmp, key)
130 1 : for m != nil {
131 1 : largestKey := m.LargestRangeKey
132 1 : if l.keyType == manifest.KeyTypePoint {
133 1 : largestKey = m.LargestPointKey
134 1 : }
135 1 : if !largestKey.IsExclusiveSentinel() || l.cmp(largestKey.UserKey, key) != 0 {
136 1 : break
137 : }
138 0 : m = l.files.Next()
139 : }
140 1 : return m
141 : }
142 :
143 1 : func (l *LevelIter) findFileLT(key []byte) *manifest.FileMetadata {
144 1 : // Find the last file whose smallest key is < key.
145 1 : return l.files.SeekLT(l.cmp, key)
146 1 : }
147 :
148 : type loadFileReturnIndicator int8
149 :
150 : const (
151 : noFileLoaded loadFileReturnIndicator = iota
152 : fileAlreadyLoaded
153 : newFileLoaded
154 : )
155 :
156 1 : func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator {
157 1 : indicator := noFileLoaded
158 1 : if l.iterFile == file {
159 1 : if l.err != nil {
160 0 : return noFileLoaded
161 0 : }
162 1 : if l.iter != nil {
163 1 : // We are already at the file, but we would need to check for bounds.
164 1 : // Set indicator accordingly.
165 1 : indicator = fileAlreadyLoaded
166 1 : }
167 : // We were already at file, but don't have an iterator, probably because the file was
168 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
169 : // have changed. We handle that below.
170 : }
171 :
172 : // Note that LevelIter.Close() can be called multiple times.
173 1 : if indicator != fileAlreadyLoaded {
174 1 : if err := l.Close(); err != nil {
175 0 : return noFileLoaded
176 0 : }
177 : }
178 :
179 1 : l.iterFile = file
180 1 : if file == nil {
181 1 : return noFileLoaded
182 1 : }
183 1 : if indicator != fileAlreadyLoaded {
184 1 : l.iter, l.err = l.newIter(file, l.tableOpts)
185 1 : if l.wrapFn != nil {
186 0 : l.iter = l.wrapFn(l.iter)
187 0 : }
188 1 : l.iter = MaybeAssert(l.iter, l.cmp)
189 1 : indicator = newFileLoaded
190 : }
191 1 : if l.err != nil {
192 1 : return noFileLoaded
193 1 : }
194 1 : return indicator
195 : }
196 :
197 : // SeekGE implements keyspan.FragmentIterator.
198 1 : func (l *LevelIter) SeekGE(key []byte) (*Span, error) {
199 1 : l.dir = +1
200 1 : l.straddle = Span{}
201 1 : l.straddleDir = 0
202 1 : l.err = nil // clear cached iteration error
203 1 :
204 1 : f := l.findFileGE(key)
205 1 : if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 {
206 1 : // Peek at the previous file.
207 1 : prevFile := l.files.Prev()
208 1 : l.files.Next()
209 1 : if prevFile != nil {
210 1 : // We could unconditionally return an empty span between the seek key and
211 1 : // f.SmallestRangeKey, however if this span is to the left of all range
212 1 : // keys on this level, it could lead to inconsistent behaviour in relative
213 1 : // positioning operations. Consider this example, with a b-c range key:
214 1 : //
215 1 : // SeekGE(a) -> a-b:{}
216 1 : // Next() -> b-c{(#5,RANGEKEYSET,@4,foo)}
217 1 : // Prev() -> nil
218 1 : //
219 1 : // Iterators higher up in the iterator stack rely on this sort of relative
220 1 : // positioning consistency.
221 1 : //
222 1 : // TODO(bilal): Investigate ways to be able to return straddle spans in
223 1 : // cases similar to the above, while still retaining correctness.
224 1 : // Return a straddling key instead of loading the file.
225 1 : l.iterFile = f
226 1 : if l.err = l.Close(); l.err != nil {
227 0 : return l.verify(nil, l.err)
228 0 : }
229 1 : l.straddleDir = +1
230 1 : l.straddle = Span{
231 1 : Start: prevFile.LargestRangeKey.UserKey,
232 1 : End: f.SmallestRangeKey.UserKey,
233 1 : Keys: nil,
234 1 : }
235 1 : return l.verify(&l.straddle, nil)
236 : }
237 : }
238 1 : loadFileIndicator := l.loadFile(f, +1)
239 1 : if loadFileIndicator == noFileLoaded {
240 1 : return l.verify(nil, l.err)
241 1 : }
242 1 : if span, err := l.iter.SeekGE(key); err != nil {
243 0 : return l.verify(nil, err)
244 1 : } else if span != nil {
245 1 : return l.verify(span, nil)
246 1 : }
247 1 : return l.skipEmptyFileForward()
248 : }
249 :
250 : // SeekLT implements keyspan.FragmentIterator.
251 1 : func (l *LevelIter) SeekLT(key []byte) (*Span, error) {
252 1 : l.dir = -1
253 1 : l.straddle = Span{}
254 1 : l.straddleDir = 0
255 1 : l.err = nil // clear cached iteration error
256 1 :
257 1 : f := l.findFileLT(key)
258 1 : if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 {
259 1 : // Peek at the next file.
260 1 : nextFile := l.files.Next()
261 1 : l.files.Prev()
262 1 : if nextFile != nil {
263 1 : // We could unconditionally return an empty span between f.LargestRangeKey
264 1 : // and the seek key, however if this span is to the right of all range keys
265 1 : // on this level, it could lead to inconsistent behaviour in relative
266 1 : // positioning operations. Consider this example, with a b-c range key:
267 1 : //
268 1 : // SeekLT(d) -> c-d:{}
269 1 : // Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)}
270 1 : // Next() -> nil
271 1 : //
272 1 : // Iterators higher up in the iterator stack rely on this sort of relative
273 1 : // positioning consistency.
274 1 : //
275 1 : // TODO(bilal): Investigate ways to be able to return straddle spans in
276 1 : // cases similar to the above, while still retaining correctness.
277 1 : // Return a straddling key instead of loading the file.
278 1 : l.iterFile = f
279 1 : if l.err = l.Close(); l.err != nil {
280 0 : return l.verify(nil, l.err)
281 0 : }
282 1 : l.straddleDir = -1
283 1 : l.straddle = Span{
284 1 : Start: f.LargestRangeKey.UserKey,
285 1 : End: nextFile.SmallestRangeKey.UserKey,
286 1 : Keys: nil,
287 1 : }
288 1 : return l.verify(&l.straddle, nil)
289 : }
290 : }
291 1 : if l.loadFile(f, -1) == noFileLoaded {
292 1 : return l.verify(nil, l.err)
293 1 : }
294 1 : if span, err := l.iter.SeekLT(key); err != nil {
295 0 : return l.verify(nil, err)
296 1 : } else if span != nil {
297 1 : return l.verify(span, nil)
298 1 : }
299 1 : return l.skipEmptyFileBackward()
300 : }
301 :
302 : // First implements keyspan.FragmentIterator.
303 1 : func (l *LevelIter) First() (*Span, error) {
304 1 : l.dir = +1
305 1 : l.straddle = Span{}
306 1 : l.straddleDir = 0
307 1 : l.err = nil // clear cached iteration error
308 1 :
309 1 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
310 1 : return l.verify(nil, l.err)
311 1 : }
312 1 : if span, err := l.iter.First(); err != nil {
313 0 : return l.verify(nil, err)
314 1 : } else if span != nil {
315 1 : return l.verify(span, nil)
316 1 : }
317 1 : return l.skipEmptyFileForward()
318 : }
319 :
320 : // Last implements keyspan.FragmentIterator.
321 1 : func (l *LevelIter) Last() (*Span, error) {
322 1 : l.dir = -1
323 1 : l.straddle = Span{}
324 1 : l.straddleDir = 0
325 1 : l.err = nil // clear cached iteration error
326 1 :
327 1 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
328 1 : return l.verify(nil, l.err)
329 1 : }
330 1 : if span, err := l.iter.Last(); err != nil {
331 0 : return l.verify(nil, err)
332 1 : } else if span != nil {
333 1 : return l.verify(span, nil)
334 1 : }
335 1 : return l.skipEmptyFileBackward()
336 : }
337 :
338 : // Next implements keyspan.FragmentIterator.
339 1 : func (l *LevelIter) Next() (*Span, error) {
340 1 : if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) {
341 1 : return l.verify(nil, l.err)
342 1 : }
343 1 : if l.iter == nil && l.iterFile == nil {
344 1 : // l.dir <= 0
345 1 : return l.First()
346 1 : }
347 1 : l.dir = +1
348 1 :
349 1 : if l.iter != nil {
350 1 : if span, err := l.iter.Next(); err != nil {
351 0 : return l.verify(nil, err)
352 1 : } else if span != nil {
353 1 : return l.verify(span, nil)
354 1 : }
355 : }
356 1 : return l.skipEmptyFileForward()
357 : }
358 :
359 : // Prev implements keyspan.FragmentIterator.
360 1 : func (l *LevelIter) Prev() (*Span, error) {
361 1 : if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) {
362 1 : return l.verify(nil, l.err)
363 1 : }
364 1 : if l.iter == nil && l.iterFile == nil {
365 1 : // l.dir >= 0
366 1 : return l.Last()
367 1 : }
368 1 : l.dir = -1
369 1 :
370 1 : if l.iter != nil {
371 1 : if span, err := l.iter.Prev(); err != nil {
372 0 : return nil, err
373 1 : } else if span != nil {
374 1 : return l.verify(span, nil)
375 1 : }
376 : }
377 1 : return l.skipEmptyFileBackward()
378 : }
379 :
380 1 : func (l *LevelIter) skipEmptyFileForward() (*Span, error) {
381 1 : if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
382 1 : l.iterFile != nil && l.iter != nil {
383 1 : // We were at a file that had spans. Check if the next file that has
384 1 : // spans is not directly adjacent to the current file i.e. there is a
385 1 : // gap in the span keyspace between the two files. In that case, synthesize
386 1 : // a "straddle span" in l.straddle and return that.
387 1 : //
388 1 : // Straddle spans are not created in rangedel mode.
389 1 : if l.err = l.Close(); l.err != nil {
390 0 : return l.verify(nil, l.err)
391 0 : }
392 1 : startKey := l.iterFile.LargestRangeKey.UserKey
393 1 : // Resetting l.iterFile without loading the file into l.iter is okay and
394 1 : // does not change the logic in loadFile() as long as l.iter is also nil;
395 1 : // which it should be due to the Close() call above.
396 1 : l.iterFile = l.files.Next()
397 1 : if l.iterFile == nil {
398 1 : return l.verify(nil, nil)
399 1 : }
400 1 : endKey := l.iterFile.SmallestRangeKey.UserKey
401 1 : if l.cmp(startKey, endKey) < 0 {
402 1 : // There is a gap between the two files. Synthesize a straddling span
403 1 : // to avoid unnecessarily loading the next file.
404 1 : l.straddle = Span{
405 1 : Start: startKey,
406 1 : End: endKey,
407 1 : }
408 1 : l.straddleDir = +1
409 1 : return l.verify(&l.straddle, nil)
410 1 : }
411 1 : } else if l.straddleDir < 0 {
412 1 : // We were at a straddle key, but are now changing directions. l.iterFile
413 1 : // was already moved backward by skipEmptyFileBackward, so advance it
414 1 : // forward.
415 1 : l.iterFile = l.files.Next()
416 1 : }
417 1 : l.straddle = Span{}
418 1 : l.straddleDir = 0
419 1 : var span *Span
420 1 : for span.Empty() {
421 1 : fileToLoad := l.iterFile
422 1 : if l.keyType == manifest.KeyTypePoint {
423 1 : // We haven't iterated to the next file yet if we're in point key
424 1 : // (rangedel) mode.
425 1 : fileToLoad = l.files.Next()
426 1 : }
427 1 : if l.loadFile(fileToLoad, +1) == noFileLoaded {
428 1 : return l.verify(nil, l.err)
429 1 : }
430 1 : span, l.err = l.iter.First()
431 1 : if l.err != nil {
432 0 : return l.verify(nil, l.err)
433 0 : }
434 : // In rangedel mode, we can expect to get empty files that we'd need to
435 : // skip over, but not in range key mode.
436 1 : if l.keyType == manifest.KeyTypeRange {
437 1 : break
438 : }
439 : }
440 1 : return l.verify(span, l.err)
441 : }
442 :
443 1 : func (l *LevelIter) skipEmptyFileBackward() (*Span, error) {
444 1 : // We were at a file that had spans. Check if the previous file that has
445 1 : // spans is not directly adjacent to the current file i.e. there is a
446 1 : // gap in the span keyspace between the two files. In that case, synthesize
447 1 : // a "straddle span" in l.straddle and return that.
448 1 : //
449 1 : // Straddle spans are not created in rangedel mode.
450 1 : if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
451 1 : l.iterFile != nil && l.iter != nil {
452 1 : if l.err = l.Close(); l.err != nil {
453 0 : return l.verify(nil, l.err)
454 0 : }
455 1 : endKey := l.iterFile.SmallestRangeKey.UserKey
456 1 : // Resetting l.iterFile without loading the file into l.iter is okay and
457 1 : // does not change the logic in loadFile() as long as l.iter is also nil;
458 1 : // which it should be due to the Close() call above.
459 1 : l.iterFile = l.files.Prev()
460 1 : if l.iterFile == nil {
461 1 : return l.verify(nil, nil)
462 1 : }
463 1 : startKey := l.iterFile.LargestRangeKey.UserKey
464 1 : if l.cmp(startKey, endKey) < 0 {
465 1 : // There is a gap between the two files. Synthesize a straddling span
466 1 : // to avoid unnecessarily loading the next file.
467 1 : l.straddle = Span{
468 1 : Start: startKey,
469 1 : End: endKey,
470 1 : }
471 1 : l.straddleDir = -1
472 1 : return l.verify(&l.straddle, nil)
473 1 : }
474 1 : } else if l.straddleDir > 0 {
475 1 : // We were at a straddle key, but are now changing directions. l.iterFile
476 1 : // was already advanced forward by skipEmptyFileForward, so move it
477 1 : // backward.
478 1 : l.iterFile = l.files.Prev()
479 1 : }
480 1 : l.straddle = Span{}
481 1 : l.straddleDir = 0
482 1 : var span *Span
483 1 : for span.Empty() {
484 1 : fileToLoad := l.iterFile
485 1 : if l.keyType == manifest.KeyTypePoint {
486 1 : fileToLoad = l.files.Prev()
487 1 : }
488 1 : if l.loadFile(fileToLoad, -1) == noFileLoaded {
489 1 : return l.verify(nil, l.err)
490 1 : }
491 1 : span, l.err = l.iter.Last()
492 1 : if l.err != nil {
493 0 : return l.verify(span, l.err)
494 0 : }
495 : // In rangedel mode, we can expect to get empty files that we'd need to
496 : // skip over, but not in range key mode as the filter on the FileMetadata
497 : // should guarantee we always get a non-empty file.
498 1 : if l.keyType == manifest.KeyTypeRange {
499 1 : break
500 : }
501 : }
502 1 : return l.verify(span, l.err)
503 : }
504 :
505 : // verify is invoked whenever a span is returned from an iterator positioning
506 : // method to a caller. During invariant builds, it asserts invariants to the
507 : // caller.
508 1 : func (l *LevelIter) verify(s *Span, err error) (*Span, error) {
509 1 : // NB: Do not add any logic outside the invariants.Enabled conditional to
510 1 : // ensure that verify is always compiled away in production builds.
511 1 : if invariants.Enabled {
512 1 : if err != l.err {
513 0 : panic(errors.AssertionFailedf("LevelIter.err (%v) != returned error (%v)", l.err, err))
514 : }
515 1 : if err != nil && s != nil {
516 0 : panic(errors.AssertionFailedf("non-nil error returned alongside non-nil span"))
517 : }
518 1 : if f := l.files.Current(); f != l.iterFile {
519 0 : panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged",
520 0 : f, l.iterFile))
521 : }
522 : }
523 1 : return s, err
524 : }
525 :
526 : // Error implements keyspan.FragmentIterator.
527 0 : func (l *LevelIter) Error() error {
528 0 : return l.err
529 0 : }
530 :
531 : // Close implements keyspan.FragmentIterator.
532 1 : func (l *LevelIter) Close() error {
533 1 : if l.iter != nil {
534 1 : l.err = l.iter.Close()
535 1 : l.iter = nil
536 1 : }
537 1 : return l.err
538 : }
539 :
540 : // String implements keyspan.FragmentIterator.
541 0 : func (l *LevelIter) String() string {
542 0 : if l.iterFile != nil {
543 0 : return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum)
544 0 : }
545 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.level)
546 : }
547 :
548 : // WrapChildren implements FragmentIterator.
549 0 : func (l *LevelIter) WrapChildren(wrap WrapFn) {
550 0 : l.iter = wrap(l.iter)
551 0 : l.wrapFn = wrap
552 0 : }
|