Line data Source code
1 : // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspanimpl
6 :
7 : import (
8 : "fmt"
9 :
10 : "github.com/cockroachdb/errors"
11 : "github.com/cockroachdb/pebble/internal/base"
12 : "github.com/cockroachdb/pebble/internal/invariants"
13 : "github.com/cockroachdb/pebble/internal/keyspan"
14 : "github.com/cockroachdb/pebble/internal/manifest"
15 : )
16 :
17 : // TableNewSpanIter creates a new iterator for range key spans for the given
18 : // file.
19 : type TableNewSpanIter func(
20 : file *manifest.FileMetadata, iterOptions keyspan.SpanIterOptions,
21 : ) (keyspan.FragmentIterator, error)
22 :
23 : // LevelIter provides a merged view of spans from sstables in a level.
24 : // It takes advantage of level invariants to only have one sstable span block
25 : // open at one time, opened using the newIter function passed in.
26 : type LevelIter struct {
27 : cmp base.Compare
28 : // Denotes the kind of key the level iterator should read. If the key type
29 : // is KeyTypePoint, the level iterator will read range tombstones (which
30 : // only affect point keys). If the key type is KeyTypeRange, the level
31 : // iterator will read range keys. It is invalid to configure an iterator
32 : // with the KeyTypePointAndRange key type.
33 : //
34 : // If key type is KeyTypePoint, no straddle spans are emitted between files,
35 : // and point key bounds are used to find files instead of range key bounds.
36 : //
37 : // TODO(bilal): Straddle spans can safely be produced in rangedel mode once
38 : // we can guarantee that we will never read sstables in a level that split
39 : // user keys across them. This might be guaranteed in a future release, but
40 : // as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when
41 : // keyType == KeyTypePoint
42 : keyType manifest.KeyType
43 : // The LSM level this LevelIter is initialized for. Used in logging.
44 : level manifest.Level
45 : // The below fields are used to fill in gaps between adjacent files' range
46 : // key spaces. This is an optimization to avoid unnecessarily loading files
47 : // in cases where range keys are sparse and rare. dir is set by every
48 : // positioning operation, straddleDir is set to dir whenever a straddling
49 : // Span is synthesized and the last positioning operation returned a
50 : // synthesized straddle span.
51 : //
52 : // Note that when a straddle span is initialized, iterFile is modified to
53 : // point to the next file in the straddleDir direction. A change of direction
54 : // on a straddle key therefore necessitates the value of iterFile to be
55 : // reverted.
56 : dir int
57 : straddle keyspan.Span
58 : straddleDir int
59 : // The iter for the current file (iterFile). It is nil under any of the
60 : // following conditions:
61 : // - files.Current() == nil
62 : // - err != nil
63 : // - straddleDir != 0, in which case iterFile is not nil and points to the
64 : // next file (in the straddleDir direction).
65 : // - some other constraint, like the bounds in opts, caused the file at index to not
66 : // be relevant to the iteration.
67 : iter keyspan.FragmentIterator
68 : wrapFn keyspan.WrapFn
69 : // iterFile holds the current file.
70 : // INVARIANT: iterFile = files.Current()
71 : iterFile *manifest.FileMetadata
72 : newIter TableNewSpanIter
73 : files manifest.LevelIterator
74 : err error
75 :
76 : // The options that were passed in.
77 : tableOpts keyspan.SpanIterOptions
78 :
79 : // TODO(bilal): Add InternalIteratorStats.
80 : }
81 :
82 : // LevelIter implements the keyspan.FragmentIterator interface.
83 : var _ keyspan.FragmentIterator = (*LevelIter)(nil)
84 :
85 : // NewLevelIter returns a LevelIter.
86 : func NewLevelIter(
87 : opts keyspan.SpanIterOptions,
88 : cmp base.Compare,
89 : newIter TableNewSpanIter,
90 : files manifest.LevelIterator,
91 : level manifest.Level,
92 : keyType manifest.KeyType,
93 2 : ) *LevelIter {
94 2 : l := &LevelIter{}
95 2 : l.Init(opts, cmp, newIter, files, level, keyType)
96 2 : return l
97 2 : }
98 :
99 : // Init initializes a LevelIter.
100 : func (l *LevelIter) Init(
101 : opts keyspan.SpanIterOptions,
102 : cmp base.Compare,
103 : newIter TableNewSpanIter,
104 : files manifest.LevelIterator,
105 : level manifest.Level,
106 : keyType manifest.KeyType,
107 2 : ) {
108 2 : l.err = nil
109 2 : l.level = level
110 2 : l.tableOpts = opts
111 2 : l.cmp = cmp
112 2 : l.iterFile = nil
113 2 : l.newIter = newIter
114 2 : switch keyType {
115 2 : case manifest.KeyTypePoint:
116 2 : l.keyType = keyType
117 2 : l.files = files.Filter(keyType)
118 2 : case manifest.KeyTypeRange:
119 2 : l.keyType = keyType
120 2 : l.files = files.Filter(keyType)
121 0 : default:
122 0 : panic(fmt.Sprintf("unsupported key type: %v", keyType))
123 : }
124 : }
125 :
126 : type loadFileReturnIndicator int8
127 :
128 : const (
129 : noFileLoaded loadFileReturnIndicator = iota
130 : fileAlreadyLoaded
131 : newFileLoaded
132 : )
133 :
134 2 : func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator {
135 2 : indicator := noFileLoaded
136 2 : if l.iterFile == file {
137 2 : if l.err != nil {
138 0 : return noFileLoaded
139 0 : }
140 2 : if l.iter != nil {
141 2 : // We are already at the file, but we would need to check for bounds.
142 2 : // Set indicator accordingly.
143 2 : indicator = fileAlreadyLoaded
144 2 : }
145 : // We were already at file, but don't have an iterator, probably because the file was
146 : // beyond the iteration bounds. It may still be, but it is also possible that the bounds
147 : // have changed. We handle that below.
148 : }
149 :
150 : // Note that LevelIter.Close() can be called multiple times.
151 2 : if indicator != fileAlreadyLoaded {
152 2 : if err := l.Close(); err != nil {
153 0 : return noFileLoaded
154 0 : }
155 : }
156 :
157 2 : l.iterFile = file
158 2 : if file == nil {
159 2 : return noFileLoaded
160 2 : }
161 2 : if indicator != fileAlreadyLoaded {
162 2 : l.iter, l.err = l.newIter(file, l.tableOpts)
163 2 : if l.wrapFn != nil {
164 0 : l.iter = l.wrapFn(l.iter)
165 0 : }
166 2 : l.iter = keyspan.MaybeAssert(l.iter, l.cmp)
167 2 : indicator = newFileLoaded
168 : }
169 2 : if l.err != nil {
170 1 : return noFileLoaded
171 1 : }
172 2 : return indicator
173 : }
174 :
175 : // SeekGE implements keyspan.FragmentIterator.
176 2 : func (l *LevelIter) SeekGE(key []byte) (*keyspan.Span, error) {
177 2 : l.dir = +1
178 2 : l.straddle = keyspan.Span{}
179 2 : l.straddleDir = 0
180 2 : l.err = nil // clear cached iteration error
181 2 :
182 2 : f := l.files.SeekGE(l.cmp, key)
183 2 : if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 {
184 2 : // Peek at the previous file.
185 2 : prevFile := l.files.Prev()
186 2 : l.files.Next()
187 2 : if prevFile != nil {
188 2 : // We could unconditionally return an empty span between the seek key and
189 2 : // f.SmallestRangeKey, however if this span is to the left of all range
190 2 : // keys on this level, it could lead to inconsistent behaviour in relative
191 2 : // positioning operations. Consider this example, with a b-c range key:
192 2 : //
193 2 : // SeekGE(a) -> a-b:{}
194 2 : // Next() -> b-c{(#5,RANGEKEYSET,@4,foo)}
195 2 : // Prev() -> nil
196 2 : //
197 2 : // Iterators higher up in the iterator stack rely on this sort of relative
198 2 : // positioning consistency.
199 2 : //
200 2 : // TODO(bilal): Investigate ways to be able to return straddle spans in
201 2 : // cases similar to the above, while still retaining correctness.
202 2 : // Return a straddling key instead of loading the file.
203 2 : l.iterFile = f
204 2 : if l.err = l.Close(); l.err != nil {
205 0 : return l.verify(nil, l.err)
206 0 : }
207 2 : l.straddleDir = +1
208 2 : l.straddle = keyspan.Span{
209 2 : Start: prevFile.LargestRangeKey.UserKey,
210 2 : End: f.SmallestRangeKey.UserKey,
211 2 : Keys: nil,
212 2 : }
213 2 : return l.verify(&l.straddle, nil)
214 : }
215 : }
216 2 : loadFileIndicator := l.loadFile(f, +1)
217 2 : if loadFileIndicator == noFileLoaded {
218 2 : return l.verify(nil, l.err)
219 2 : }
220 2 : if span, err := l.iter.SeekGE(key); err != nil {
221 0 : return l.verify(nil, err)
222 2 : } else if span != nil {
223 2 : return l.verify(span, nil)
224 2 : }
225 2 : return l.skipEmptyFileForward()
226 : }
227 :
228 : // SeekLT implements keyspan.FragmentIterator.
229 2 : func (l *LevelIter) SeekLT(key []byte) (*keyspan.Span, error) {
230 2 : l.dir = -1
231 2 : l.straddle = keyspan.Span{}
232 2 : l.straddleDir = 0
233 2 : l.err = nil // clear cached iteration error
234 2 :
235 2 : f := l.files.SeekLT(l.cmp, key)
236 2 : if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 {
237 2 : // Peek at the next file.
238 2 : nextFile := l.files.Next()
239 2 : l.files.Prev()
240 2 : if nextFile != nil {
241 2 : // We could unconditionally return an empty span between f.LargestRangeKey
242 2 : // and the seek key, however if this span is to the right of all range keys
243 2 : // on this level, it could lead to inconsistent behaviour in relative
244 2 : // positioning operations. Consider this example, with a b-c range key:
245 2 : //
246 2 : // SeekLT(d) -> c-d:{}
247 2 : // Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)}
248 2 : // Next() -> nil
249 2 : //
250 2 : // Iterators higher up in the iterator stack rely on this sort of relative
251 2 : // positioning consistency.
252 2 : //
253 2 : // TODO(bilal): Investigate ways to be able to return straddle spans in
254 2 : // cases similar to the above, while still retaining correctness.
255 2 : // Return a straddling key instead of loading the file.
256 2 : l.iterFile = f
257 2 : if l.err = l.Close(); l.err != nil {
258 0 : return l.verify(nil, l.err)
259 0 : }
260 2 : l.straddleDir = -1
261 2 : l.straddle = keyspan.Span{
262 2 : Start: f.LargestRangeKey.UserKey,
263 2 : End: nextFile.SmallestRangeKey.UserKey,
264 2 : Keys: nil,
265 2 : }
266 2 : return l.verify(&l.straddle, nil)
267 : }
268 : }
269 2 : if l.loadFile(f, -1) == noFileLoaded {
270 2 : return l.verify(nil, l.err)
271 2 : }
272 2 : if span, err := l.iter.SeekLT(key); err != nil {
273 0 : return l.verify(nil, err)
274 2 : } else if span != nil {
275 2 : return l.verify(span, nil)
276 2 : }
277 2 : return l.skipEmptyFileBackward()
278 : }
279 :
280 : // First implements keyspan.FragmentIterator.
281 2 : func (l *LevelIter) First() (*keyspan.Span, error) {
282 2 : l.dir = +1
283 2 : l.straddle = keyspan.Span{}
284 2 : l.straddleDir = 0
285 2 : l.err = nil // clear cached iteration error
286 2 :
287 2 : if l.loadFile(l.files.First(), +1) == noFileLoaded {
288 2 : return l.verify(nil, l.err)
289 2 : }
290 2 : if span, err := l.iter.First(); err != nil {
291 0 : return l.verify(nil, err)
292 2 : } else if span != nil {
293 2 : return l.verify(span, nil)
294 2 : }
295 2 : return l.skipEmptyFileForward()
296 : }
297 :
298 : // Last implements keyspan.FragmentIterator.
299 2 : func (l *LevelIter) Last() (*keyspan.Span, error) {
300 2 : l.dir = -1
301 2 : l.straddle = keyspan.Span{}
302 2 : l.straddleDir = 0
303 2 : l.err = nil // clear cached iteration error
304 2 :
305 2 : if l.loadFile(l.files.Last(), -1) == noFileLoaded {
306 2 : return l.verify(nil, l.err)
307 2 : }
308 2 : if span, err := l.iter.Last(); err != nil {
309 0 : return l.verify(nil, err)
310 2 : } else if span != nil {
311 2 : return l.verify(span, nil)
312 2 : }
313 2 : return l.skipEmptyFileBackward()
314 : }
315 :
316 : // Next implements keyspan.FragmentIterator.
317 2 : func (l *LevelIter) Next() (*keyspan.Span, error) {
318 2 : if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) {
319 1 : return l.verify(nil, l.err)
320 1 : }
321 2 : if l.iter == nil && l.iterFile == nil {
322 2 : // l.dir <= 0
323 2 : return l.First()
324 2 : }
325 2 : l.dir = +1
326 2 :
327 2 : if l.iter != nil {
328 2 : if span, err := l.iter.Next(); err != nil {
329 0 : return l.verify(nil, err)
330 2 : } else if span != nil {
331 2 : return l.verify(span, nil)
332 2 : }
333 : }
334 2 : return l.skipEmptyFileForward()
335 : }
336 :
337 : // Prev implements keyspan.FragmentIterator.
338 2 : func (l *LevelIter) Prev() (*keyspan.Span, error) {
339 2 : if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) {
340 1 : return l.verify(nil, l.err)
341 1 : }
342 2 : if l.iter == nil && l.iterFile == nil {
343 2 : // l.dir >= 0
344 2 : return l.Last()
345 2 : }
346 2 : l.dir = -1
347 2 :
348 2 : if l.iter != nil {
349 2 : if span, err := l.iter.Prev(); err != nil {
350 0 : return nil, err
351 2 : } else if span != nil {
352 2 : return l.verify(span, nil)
353 2 : }
354 : }
355 2 : return l.skipEmptyFileBackward()
356 : }
357 :
358 2 : func (l *LevelIter) skipEmptyFileForward() (*keyspan.Span, error) {
359 2 : if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
360 2 : l.iterFile != nil && l.iter != nil {
361 2 : // We were at a file that had spans. Check if the next file that has
362 2 : // spans is not directly adjacent to the current file i.e. there is a
363 2 : // gap in the span keyspace between the two files. In that case, synthesize
364 2 : // a "straddle span" in l.straddle and return that.
365 2 : //
366 2 : // Straddle spans are not created in rangedel mode.
367 2 : if l.err = l.Close(); l.err != nil {
368 0 : return l.verify(nil, l.err)
369 0 : }
370 2 : startKey := l.iterFile.LargestRangeKey.UserKey
371 2 : // Resetting l.iterFile without loading the file into l.iter is okay and
372 2 : // does not change the logic in loadFile() as long as l.iter is also nil;
373 2 : // which it should be due to the Close() call above.
374 2 : l.iterFile = l.files.Next()
375 2 : if l.iterFile == nil {
376 2 : return l.verify(nil, nil)
377 2 : }
378 2 : endKey := l.iterFile.SmallestRangeKey.UserKey
379 2 : if l.cmp(startKey, endKey) < 0 {
380 2 : // There is a gap between the two files. Synthesize a straddling span
381 2 : // to avoid unnecessarily loading the next file.
382 2 : l.straddle = keyspan.Span{
383 2 : Start: startKey,
384 2 : End: endKey,
385 2 : }
386 2 : l.straddleDir = +1
387 2 : return l.verify(&l.straddle, nil)
388 2 : }
389 2 : } else if l.straddleDir < 0 {
390 2 : // We were at a straddle key, but are now changing directions. l.iterFile
391 2 : // was already moved backward by skipEmptyFileBackward, so advance it
392 2 : // forward.
393 2 : l.iterFile = l.files.Next()
394 2 : }
395 2 : l.straddle = keyspan.Span{}
396 2 : l.straddleDir = 0
397 2 : var span *keyspan.Span
398 2 : for span.Empty() {
399 2 : fileToLoad := l.iterFile
400 2 : if l.keyType == manifest.KeyTypePoint {
401 2 : // We haven't iterated to the next file yet if we're in point key
402 2 : // (rangedel) mode.
403 2 : fileToLoad = l.files.Next()
404 2 : }
405 2 : if l.loadFile(fileToLoad, +1) == noFileLoaded {
406 2 : return l.verify(nil, l.err)
407 2 : }
408 2 : span, l.err = l.iter.First()
409 2 : if l.err != nil {
410 0 : return l.verify(nil, l.err)
411 0 : }
412 : // In rangedel mode, we can expect to get empty files that we'd need to
413 : // skip over, but not in range key mode.
414 2 : if l.keyType == manifest.KeyTypeRange {
415 2 : break
416 : }
417 : }
418 2 : return l.verify(span, l.err)
419 : }
420 :
421 2 : func (l *LevelIter) skipEmptyFileBackward() (*keyspan.Span, error) {
422 2 : // We were at a file that had spans. Check if the previous file that has
423 2 : // spans is not directly adjacent to the current file i.e. there is a
424 2 : // gap in the span keyspace between the two files. In that case, synthesize
425 2 : // a "straddle span" in l.straddle and return that.
426 2 : //
427 2 : // Straddle spans are not created in rangedel mode.
428 2 : if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
429 2 : l.iterFile != nil && l.iter != nil {
430 2 : if l.err = l.Close(); l.err != nil {
431 0 : return l.verify(nil, l.err)
432 0 : }
433 2 : endKey := l.iterFile.SmallestRangeKey.UserKey
434 2 : // Resetting l.iterFile without loading the file into l.iter is okay and
435 2 : // does not change the logic in loadFile() as long as l.iter is also nil;
436 2 : // which it should be due to the Close() call above.
437 2 : l.iterFile = l.files.Prev()
438 2 : if l.iterFile == nil {
439 2 : return l.verify(nil, nil)
440 2 : }
441 2 : startKey := l.iterFile.LargestRangeKey.UserKey
442 2 : if l.cmp(startKey, endKey) < 0 {
443 2 : // There is a gap between the two files. Synthesize a straddling span
444 2 : // to avoid unnecessarily loading the next file.
445 2 : l.straddle = keyspan.Span{
446 2 : Start: startKey,
447 2 : End: endKey,
448 2 : }
449 2 : l.straddleDir = -1
450 2 : return l.verify(&l.straddle, nil)
451 2 : }
452 2 : } else if l.straddleDir > 0 {
453 2 : // We were at a straddle key, but are now changing directions. l.iterFile
454 2 : // was already advanced forward by skipEmptyFileForward, so move it
455 2 : // backward.
456 2 : l.iterFile = l.files.Prev()
457 2 : }
458 2 : l.straddle = keyspan.Span{}
459 2 : l.straddleDir = 0
460 2 : var span *keyspan.Span
461 2 : for span.Empty() {
462 2 : fileToLoad := l.iterFile
463 2 : if l.keyType == manifest.KeyTypePoint {
464 2 : fileToLoad = l.files.Prev()
465 2 : }
466 2 : if l.loadFile(fileToLoad, -1) == noFileLoaded {
467 2 : return l.verify(nil, l.err)
468 2 : }
469 2 : span, l.err = l.iter.Last()
470 2 : if l.err != nil {
471 0 : return l.verify(span, l.err)
472 0 : }
473 : // In rangedel mode, we can expect to get empty files that we'd need to
474 : // skip over, but not in range key mode as the filter on the FileMetadata
475 : // should guarantee we always get a non-empty file.
476 2 : if l.keyType == manifest.KeyTypeRange {
477 2 : break
478 : }
479 : }
480 2 : return l.verify(span, l.err)
481 : }
482 :
483 : // verify is invoked whenever a span is returned from an iterator positioning
484 : // method to a caller. During invariant builds, it asserts invariants to the
485 : // caller.
486 2 : func (l *LevelIter) verify(s *keyspan.Span, err error) (*keyspan.Span, error) {
487 2 : // NB: Do not add any logic outside the invariants.Enabled conditional to
488 2 : // ensure that verify is always compiled away in production builds.
489 2 : if invariants.Enabled {
490 2 : if err != l.err {
491 0 : panic(errors.AssertionFailedf("LevelIter.err (%v) != returned error (%v)", l.err, err))
492 : }
493 2 : if err != nil && s != nil {
494 0 : panic(errors.AssertionFailedf("non-nil error returned alongside non-nil span"))
495 : }
496 2 : if f := l.files.Current(); f != l.iterFile {
497 0 : panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged",
498 0 : f, l.iterFile))
499 : }
500 : }
501 2 : return s, err
502 : }
503 :
504 : // Error implements keyspan.FragmentIterator.
505 0 : func (l *LevelIter) Error() error {
506 0 : return l.err
507 0 : }
508 :
509 : // Close implements keyspan.FragmentIterator.
510 2 : func (l *LevelIter) Close() error {
511 2 : if l.iter != nil {
512 2 : l.err = l.iter.Close()
513 2 : l.iter = nil
514 2 : }
515 2 : return l.err
516 : }
517 :
518 : // String implements keyspan.FragmentIterator.
519 0 : func (l *LevelIter) String() string {
520 0 : if l.iterFile != nil {
521 0 : return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum)
522 0 : }
523 0 : return fmt.Sprintf("%s: fileNum=<nil>", l.level)
524 : }
525 :
526 : // WrapChildren implements FragmentIterator.
527 0 : func (l *LevelIter) WrapChildren(wrap keyspan.WrapFn) {
528 0 : l.iter = wrap(l.iter)
529 0 : l.wrapFn = wrap
530 0 : }
|