Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package sstable 6 : 7 : import ( 8 : "fmt" 9 : "os" 10 : "sync" 11 : 12 : "github.com/cockroachdb/pebble/internal/base" 13 : "github.com/cockroachdb/pebble/internal/invariants" 14 : ) 15 : 16 : // Iterator iterates over an entire table of data. 17 : type Iterator interface { 18 : base.InternalIterator 19 : 20 : // NextPrefix implements (base.InternalIterator).NextPrefix. 21 : NextPrefix(succKey []byte) *base.InternalKV 22 : 23 : SetCloseHook(fn func(i Iterator) error) 24 : } 25 : 26 : // Iterator positioning optimizations and singleLevelIterator and 27 : // twoLevelIterator: 28 : // 29 : // An iterator is absolute positioned using one of the Seek or First or Last 30 : // calls. After absolute positioning, there can be relative positioning done 31 : // by stepping using Prev or Next. 32 : // 33 : // We implement optimizations below where an absolute positioning call can in 34 : // some cases use the current position to do less work. To understand these, 35 : // we first define some terms. An iterator is bounds-exhausted if the bounds 36 : // (upper of lower) have been reached. An iterator is data-exhausted if it has 37 : // the reached the end of the data (forward or reverse) in the sstable. A 38 : // singleLevelIterator only knows a local-data-exhausted property since when 39 : // it is used as part of a twoLevelIterator, the twoLevelIterator can step to 40 : // the next lower-level index block. 41 : // 42 : // The bounds-exhausted property is tracked by 43 : // singleLevelIterator.exhaustedBounds being +1 (upper bound reached) or -1 44 : // (lower bound reached). The same field is reused by twoLevelIterator. Either 45 : // may notice the exhaustion of the bound and set it. Note that if 46 : // singleLevelIterator sets this property, it is not a local property (since 47 : // the bound has been reached regardless of whether this is in the context of 48 : // the twoLevelIterator or not). 49 : // 50 : // The data-exhausted property is tracked in a more subtle manner. We define 51 : // two predicates: 52 : // - partial-local-data-exhausted (PLDE): 53 : // i.data.isDataInvalidated() || !i.data.valid() 54 : // - partial-global-data-exhausted (PGDE): 55 : // i.index.isDataInvalidated() || !i.index.valid() || i.data.isDataInvalidated() || 56 : // !i.data.valid() 57 : // 58 : // PLDE is defined for a singleLevelIterator. PGDE is defined for a 59 : // twoLevelIterator. Oddly, in our code below the singleLevelIterator does not 60 : // know when it is part of a twoLevelIterator so it does not know when its 61 : // property is local or global. 62 : // 63 : // Now to define data-exhausted: 64 : // - Prerequisite: we must know that the iterator has been positioned and 65 : // i.err is nil. 66 : // - bounds-exhausted must not be true: 67 : // If bounds-exhausted is true, we have incomplete knowledge of 68 : // data-exhausted since PLDE or PGDE could be true because we could have 69 : // chosen not to load index block or data block and figured out that the 70 : // bound is exhausted (due to block property filters filtering out index and 71 : // data blocks and going past the bound on the top level index block). Note 72 : // that if we tried to separate out the BPF case from others we could 73 : // develop more knowledge here. 74 : // - PGDE is true for twoLevelIterator. PLDE is true if it is a standalone 75 : // singleLevelIterator. !PLDE or !PGDE of course imply that data-exhausted 76 : // is not true. 77 : // 78 : // An implication of the above is that if we are going to somehow utilize 79 : // knowledge of data-exhausted in an optimization, we must not forget the 80 : // existing value of bounds-exhausted since by forgetting the latter we can 81 : // erroneously think that data-exhausted is true. Bug #2036 was due to this 82 : // forgetting. 83 : // 84 : // Now to the two categories of optimizations we currently have: 85 : // - Monotonic bounds optimization that reuse prior iterator position when 86 : // doing seek: These only work with !data-exhausted. We could choose to make 87 : // these work with data-exhausted but have not bothered because in the 88 : // context of a DB if data-exhausted were true, the DB would move to the 89 : // next file in the level. Note that this behavior of moving to the next 90 : // file is not necessarily true for L0 files, so there could be some benefit 91 : // in the future in this optimization. See the WARNING-data-exhausted 92 : // comments if trying to optimize this in the future. 93 : // - TrySeekUsingNext optimizations: these work regardless of exhaustion 94 : // state. 95 : // 96 : // Implementation detail: In the code PLDE only checks that 97 : // i.data.isDataInvalidated(). This narrower check is safe, since this is a 98 : // subset of the set expressed by the OR expression. Also, it is not a 99 : // de-optimization since whenever we exhaust the iterator we explicitly call 100 : // i.data.invalidate(). PGDE checks i.index.isDataInvalidated() && 101 : // i.data.isDataInvalidated(). Again, this narrower check is safe, and not a 102 : // de-optimization since whenever we exhaust the iterator we explicitly call 103 : // i.index.invalidate() and i.data.invalidate(). The && is questionable -- for 104 : // now this is a bit of defensive code. We should seriously consider removing 105 : // it, since defensive code suggests we are not confident about our invariants 106 : // (and if we are not confident, we need more invariant assertions, not 107 : // defensive code). 108 : // 109 : // TODO(sumeer): remove the aforementioned defensive code. 110 : 111 : var singleLevelIterPool = sync.Pool{ 112 2 : New: func() interface{} { 113 2 : i := &singleLevelIterator{} 114 2 : // Note: this is a no-op if invariants are disabled or race is enabled. 115 2 : invariants.SetFinalizer(i, checkSingleLevelIterator) 116 2 : return i 117 2 : }, 118 : } 119 : 120 : var twoLevelIterPool = sync.Pool{ 121 2 : New: func() interface{} { 122 2 : i := &twoLevelIterator{} 123 2 : // Note: this is a no-op if invariants are disabled or race is enabled. 124 2 : invariants.SetFinalizer(i, checkTwoLevelIterator) 125 2 : return i 126 2 : }, 127 : } 128 : 129 2 : func checkSingleLevelIterator(obj interface{}) { 130 2 : i := obj.(*singleLevelIterator) 131 2 : if p := i.data.Handle().Get(); p != nil { 132 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.data.handle is not nil: %p\n", p) 133 0 : os.Exit(1) 134 0 : } 135 2 : if p := i.index.Handle().Get(); p != nil { 136 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.index.handle is not nil: %p\n", p) 137 0 : os.Exit(1) 138 0 : } 139 : } 140 : 141 2 : func checkTwoLevelIterator(obj interface{}) { 142 2 : i := obj.(*twoLevelIterator) 143 2 : if p := i.data.Handle().Get(); p != nil { 144 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.data.handle is not nil: %p\n", p) 145 0 : os.Exit(1) 146 0 : } 147 2 : if p := i.index.Handle().Get(); p != nil { 148 0 : fmt.Fprintf(os.Stderr, "singleLevelIterator.index.handle is not nil: %p\n", p) 149 0 : os.Exit(1) 150 0 : } 151 : } 152 : 153 : // compactionIterator is similar to Iterator but it increments the number of 154 : // bytes that have been iterated through. 155 : type compactionIterator struct { 156 : *singleLevelIterator 157 : } 158 : 159 : // compactionIterator implements the base.InternalIterator interface. 160 : var _ base.InternalIterator = (*compactionIterator)(nil) 161 : 162 0 : func (i *compactionIterator) String() string { 163 0 : if i.vState != nil { 164 0 : return i.vState.fileNum.String() 165 0 : } 166 0 : return i.reader.fileNum.String() 167 : } 168 : 169 0 : func (i *compactionIterator) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKV { 170 0 : panic("pebble: SeekGE unimplemented") 171 : } 172 : 173 : func (i *compactionIterator) SeekPrefixGE( 174 : prefix, key []byte, flags base.SeekGEFlags, 175 0 : ) *base.InternalKV { 176 0 : panic("pebble: SeekPrefixGE unimplemented") 177 : } 178 : 179 0 : func (i *compactionIterator) SeekLT(key []byte, flags base.SeekLTFlags) *base.InternalKV { 180 0 : panic("pebble: SeekLT unimplemented") 181 : } 182 : 183 2 : func (i *compactionIterator) First() *base.InternalKV { 184 2 : i.err = nil // clear cached iteration error 185 2 : return i.skipForward(i.singleLevelIterator.First()) 186 2 : } 187 : 188 0 : func (i *compactionIterator) Last() *base.InternalKV { 189 0 : panic("pebble: Last unimplemented") 190 : } 191 : 192 : // Note: compactionIterator.Next mirrors the implementation of Iterator.Next 193 : // due to performance. Keep the two in sync. 194 2 : func (i *compactionIterator) Next() *base.InternalKV { 195 2 : if i.err != nil { 196 0 : return nil 197 0 : } 198 2 : return i.skipForward(i.data.Next()) 199 : } 200 : 201 0 : func (i *compactionIterator) NextPrefix(succKey []byte) *base.InternalKV { 202 0 : panic("pebble: NextPrefix unimplemented") 203 : } 204 : 205 0 : func (i *compactionIterator) Prev() *base.InternalKV { 206 0 : panic("pebble: Prev unimplemented") 207 : } 208 : 209 2 : func (i *compactionIterator) skipForward(kv *base.InternalKV) *base.InternalKV { 210 2 : if kv == nil { 211 2 : for { 212 2 : if kv := i.index.Next(); kv == nil { 213 2 : break 214 : } 215 2 : result := i.loadBlock(+1) 216 2 : if result != loadBlockOK { 217 0 : if i.err != nil { 218 0 : break 219 : } 220 0 : switch result { 221 0 : case loadBlockFailed: 222 0 : // We checked that i.index was at a valid entry, so 223 0 : // loadBlockFailed could not have happened due to to i.index 224 0 : // being exhausted, and must be due to an error. 225 0 : panic("loadBlock should not have failed with no error") 226 0 : case loadBlockIrrelevant: 227 0 : panic("compactionIter should not be using block intervals for skipping") 228 0 : default: 229 0 : panic(fmt.Sprintf("unexpected case %d", result)) 230 : } 231 : } 232 : // result == loadBlockOK 233 2 : if kv = i.data.First(); kv != nil { 234 2 : break 235 : } 236 : } 237 : } 238 : 239 : // We have an upper bound when the table is virtual. 240 2 : if i.upper != nil && kv != nil { 241 2 : cmp := i.cmp(kv.K.UserKey, i.upper) 242 2 : if cmp > 0 || (!i.endKeyInclusive && cmp == 0) { 243 2 : return nil 244 2 : } 245 : } 246 : 247 2 : return kv 248 : }