Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package sstable 6 : 7 : import ( 8 : "fmt" 9 : "os" 10 : "sync" 11 : 12 : "github.com/cockroachdb/pebble/internal/base" 13 : "github.com/cockroachdb/pebble/internal/invariants" 14 : "github.com/cockroachdb/pebble/sstable/block" 15 : "github.com/cockroachdb/pebble/sstable/rowblk" 16 : ) 17 : 18 : // Iterator iterates over an entire table of data. 19 : type Iterator interface { 20 : base.InternalIterator 21 : 22 : // NextPrefix implements (base.InternalIterator).NextPrefix. 23 : NextPrefix(succKey []byte) *base.InternalKV 24 : 25 : SetCloseHook(fn func(i Iterator) error) 26 : } 27 : 28 : // Iterator positioning optimizations and singleLevelIterator and 29 : // twoLevelIterator: 30 : // 31 : // An iterator is absolute positioned using one of the Seek or First or Last 32 : // calls. After absolute positioning, there can be relative positioning done 33 : // by stepping using Prev or Next. 34 : // 35 : // We implement optimizations below where an absolute positioning call can in 36 : // some cases use the current position to do less work. To understand these, 37 : // we first define some terms. An iterator is bounds-exhausted if the bounds 38 : // (upper of lower) have been reached. An iterator is data-exhausted if it has 39 : // the reached the end of the data (forward or reverse) in the sstable. A 40 : // singleLevelIterator only knows a local-data-exhausted property since when 41 : // it is used as part of a twoLevelIterator, the twoLevelIterator can step to 42 : // the next lower-level index block. 43 : // 44 : // The bounds-exhausted property is tracked by 45 : // singleLevelIterator.exhaustedBounds being +1 (upper bound reached) or -1 46 : // (lower bound reached). The same field is reused by twoLevelIterator. Either 47 : // may notice the exhaustion of the bound and set it. Note that if 48 : // singleLevelIterator sets this property, it is not a local property (since 49 : // the bound has been reached regardless of whether this is in the context of 50 : // the twoLevelIterator or not). 51 : // 52 : // The data-exhausted property is tracked in a more subtle manner. We define 53 : // two predicates: 54 : // - partial-local-data-exhausted (PLDE): 55 : // i.data.IsDataInvalidated() || !i.data.Valid() 56 : // - partial-global-data-exhausted (PGDE): 57 : // i.index.IsDataInvalidated() || !i.index.Valid() || i.data.IsDataInvalidated() || 58 : // !i.data.Valid() 59 : // 60 : // PLDE is defined for a singleLevelIterator. PGDE is defined for a 61 : // twoLevelIterator. Oddly, in our code below the singleLevelIterator does not 62 : // know when it is part of a twoLevelIterator so it does not know when its 63 : // property is local or global. 64 : // 65 : // Now to define data-exhausted: 66 : // - Prerequisite: we must know that the iterator has been positioned and 67 : // i.err is nil. 68 : // - bounds-exhausted must not be true: 69 : // If bounds-exhausted is true, we have incomplete knowledge of 70 : // data-exhausted since PLDE or PGDE could be true because we could have 71 : // chosen not to load index block or data block and figured out that the 72 : // bound is exhausted (due to block property filters filtering out index and 73 : // data blocks and going past the bound on the top level index block). Note 74 : // that if we tried to separate out the BPF case from others we could 75 : // develop more knowledge here. 76 : // - PGDE is true for twoLevelIterator. PLDE is true if it is a standalone 77 : // singleLevelIterator. !PLDE or !PGDE of course imply that data-exhausted 78 : // is not true. 79 : // 80 : // An implication of the above is that if we are going to somehow utilize 81 : // knowledge of data-exhausted in an optimization, we must not forget the 82 : // existing value of bounds-exhausted since by forgetting the latter we can 83 : // erroneously think that data-exhausted is true. Bug #2036 was due to this 84 : // forgetting. 85 : // 86 : // Now to the two categories of optimizations we currently have: 87 : // - Monotonic bounds optimization that reuse prior iterator position when 88 : // doing seek: These only work with !data-exhausted. We could choose to make 89 : // these work with data-exhausted but have not bothered because in the 90 : // context of a DB if data-exhausted were true, the DB would move to the 91 : // next file in the level. Note that this behavior of moving to the next 92 : // file is not necessarily true for L0 files, so there could be some benefit 93 : // in the future in this optimization. See the WARNING-data-exhausted 94 : // comments if trying to optimize this in the future. 95 : // - TrySeekUsingNext optimizations: these work regardless of exhaustion 96 : // state. 97 : // 98 : // Implementation detail: In the code PLDE only checks that 99 : // i.data.IsDataInvalidated(). This narrower check is safe, since this is a 100 : // subset of the set expressed by the OR expression. Also, it is not a 101 : // de-optimization since whenever we exhaust the iterator we explicitly call 102 : // i.data.Invalidate(). PGDE checks i.index.IsDataInvalidated() && 103 : // i.data.IsDataInvalidated(). Again, this narrower check is safe, and not a 104 : // de-optimization since whenever we exhaust the iterator we explicitly call 105 : // i.index.Invalidate() and i.data.Invalidate(). The && is questionable -- for 106 : // now this is a bit of defensive code. We should seriously consider removing 107 : // it, since defensive code suggests we are not confident about our invariants 108 : // (and if we are not confident, we need more invariant assertions, not 109 : // defensive code). 110 : // 111 : // TODO(sumeer): remove the aforementioned defensive code. 112 : 113 : var ( 114 : singleLevelIterRowBlockPool sync.Pool // *singleLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter] 115 : twoLevelIterRowBlockPool sync.Pool // *twoLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter] 116 : ) 117 : 118 2 : func init() { 119 2 : singleLevelIterRowBlockPool = sync.Pool{ 120 2 : New: func() interface{} { 121 2 : i := &singleLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter]{pool: &singleLevelIterRowBlockPool} 122 2 : // Note: this is a no-op if invariants are disabled or race is enabled. 123 2 : invariants.SetFinalizer(i, checkSingleLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter]) 124 2 : return i 125 2 : }, 126 : } 127 2 : twoLevelIterRowBlockPool = sync.Pool{ 128 2 : New: func() interface{} { 129 2 : i := &twoLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter]{pool: &twoLevelIterRowBlockPool} 130 2 : // Note: this is a no-op if invariants are disabled or race is enabled. 131 2 : invariants.SetFinalizer(i, checkTwoLevelIterator[rowblk.IndexIter, *rowblk.IndexIter, rowblk.Iter, *rowblk.Iter]) 132 2 : return i 133 2 : }, 134 : } 135 : } 136 : 137 : func checkSingleLevelIterator[I any, PI block.IndexBlockIterator[I], D any, PD block.DataBlockIterator[D]]( 138 : obj interface{}, 139 2 : ) { 140 2 : i := obj.(*singleLevelIterator[I, PI, D, PD]) 141 2 : if p := PD(&i.data).Handle().Get(); p != nil { 142 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.data.handle is not nil: %p\n", p) 143 0 : os.Exit(1) 144 0 : } 145 2 : if p := PI(&i.index).Handle().Get(); p != nil { 146 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.index.handle is not nil: %p\n", p) 147 0 : os.Exit(1) 148 0 : } 149 : } 150 : 151 : func checkTwoLevelIterator[I any, PI block.IndexBlockIterator[I], D any, PD block.DataBlockIterator[D]]( 152 : obj interface{}, 153 2 : ) { 154 2 : i := obj.(*twoLevelIterator[I, PI, D, PD]) 155 2 : if p := PD(&i.secondLevel.data).Handle().Get(); p != nil { 156 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.data.handle is not nil: %p\n", p) 157 0 : os.Exit(1) 158 0 : } 159 2 : if p := PI(&i.secondLevel.index).Handle().Get(); p != nil { 160 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.index.handle is not nil: %p\n", p) 161 0 : os.Exit(1) 162 0 : } 163 : }