Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "context"
9 : "fmt"
10 :
11 : "github.com/cockroachdb/pebble/internal/base"
12 : "github.com/cockroachdb/pebble/internal/keyspan"
13 : "github.com/cockroachdb/pebble/internal/manifest"
14 : "github.com/cockroachdb/pebble/internal/treeprinter"
15 : )
16 :
17 : // getIter is an internal iterator used to perform gets. It iterates through
18 : // the values for a particular key, level by level. It is not a general purpose
19 : // internalIterator, but specialized for Get operations so that it loads data
20 : // lazily.
21 : type getIter struct {
22 : comparer *Comparer
23 : newIters tableNewIters
24 : snapshot base.SeqNum
25 : iterOpts IterOptions
26 : iiopts internalIterOpts
27 : key []byte
28 : prefix []byte
29 : iter internalIterator
30 : level int
31 : batch *Batch
32 : mem flushableList
33 : l0 []manifest.LevelSlice
34 : version *manifest.Version
35 : iterKV *base.InternalKV
36 : // tombstoned and tombstonedSeqNum track whether the key has been deleted by
37 : // a range delete tombstone. The first visible (at getIter.snapshot) range
38 : // deletion encounterd transitions tombstoned to true. The tombstonedSeqNum
39 : // field is updated to hold the sequence number of the tombstone.
40 : tombstoned bool
41 : tombstonedSeqNum base.SeqNum
42 : err error
43 : }
44 :
45 : // TODO(sumeer): CockroachDB code doesn't use getIter, but, for completeness,
46 : // make this implement InternalIteratorWithStats.
47 :
48 : // getIter implements the base.InternalIterator interface.
49 : var _ base.InternalIterator = (*getIter)(nil)
50 :
51 0 : func (g *getIter) String() string {
52 0 : return fmt.Sprintf("len(l0)=%d, len(mem)=%d, level=%d", len(g.l0), len(g.mem), g.level)
53 0 : }
54 :
55 0 : func (g *getIter) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKV {
56 0 : panic("pebble: SeekGE unimplemented")
57 : }
58 :
59 0 : func (g *getIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *base.InternalKV {
60 0 : return g.SeekPrefixGEStrict(prefix, key, flags)
61 0 : }
62 :
63 0 : func (g *getIter) SeekPrefixGEStrict(prefix, key []byte, flags base.SeekGEFlags) *base.InternalKV {
64 0 : panic("pebble: SeekPrefixGE unimplemented")
65 : }
66 :
67 0 : func (g *getIter) SeekLT(key []byte, flags base.SeekLTFlags) *base.InternalKV {
68 0 : panic("pebble: SeekLT unimplemented")
69 : }
70 :
71 1 : func (g *getIter) First() *base.InternalKV {
72 1 : return g.Next()
73 1 : }
74 :
75 0 : func (g *getIter) Last() *base.InternalKV {
76 0 : panic("pebble: Last unimplemented")
77 : }
78 :
79 1 : func (g *getIter) Next() *base.InternalKV {
80 1 : // If g.iter != nil, we're already iterating through a level. Next. Note
81 1 : // that it's possible the next key within the level is still relevant (eg,
82 1 : // MERGE keys written in the presence of an LSM snapshot).
83 1 : //
84 1 : // NB: We can't perform this Next below, in the for loop, because when we
85 1 : // open an iterator into the next level, we need to seek to the key.
86 1 : if g.iter != nil {
87 1 : g.iterKV = g.iter.Next()
88 1 : if err := g.iter.Error(); err != nil {
89 0 : g.err = err
90 0 : return nil
91 0 : }
92 : }
93 :
94 : // This for loop finds the next internal key in the LSM that is equal to
95 : // g.key, visible at g.snapshot and not shadowed by a range deletion. If it
96 : // exhausts a level, it initializes iterators for the next level.
97 1 : for {
98 1 : if g.iter != nil {
99 1 : if g.iterKV != nil {
100 1 : // Check if the current KV pair is deleted by a range deletion.
101 1 : if g.tombstoned && g.tombstonedSeqNum > g.iterKV.SeqNum() {
102 1 : // We have a range tombstone covering this key. Rather than
103 1 : // return a point or range deletion here, we return nil and
104 1 : // close our internal iterator stopping iteration.
105 1 : g.err = g.iter.Close()
106 1 : g.iter = nil
107 1 : return nil
108 1 : }
109 :
110 : // Is this the correct user key?
111 1 : if g.comparer.Equal(g.key, g.iterKV.K.UserKey) {
112 1 : // If the KV pair is not visible at the get's snapshot,
113 1 : // Next. The level may still contain older keys with the
114 1 : // same user key that are visible.
115 1 : if !g.iterKV.Visible(g.snapshot, base.SeqNumMax) {
116 1 : g.iterKV = g.iter.Next()
117 1 : continue
118 : }
119 1 : return g.iterKV
120 : }
121 : }
122 : // We've advanced the iterator passed the desired key. Move on to the
123 : // next memtable / level.
124 1 : g.err = g.iter.Close()
125 1 : g.iter = nil
126 1 : if g.err != nil {
127 1 : return nil
128 1 : }
129 : }
130 : // g.iter == nil; we need to initialize the next iterator.
131 1 : if !g.initializeNextIterator() {
132 1 : return nil
133 1 : }
134 1 : g.iterKV = g.iter.SeekPrefixGE(g.prefix, g.key, base.SeekGEFlagsNone)
135 : }
136 : }
137 :
138 0 : func (g *getIter) Prev() *base.InternalKV {
139 0 : panic("pebble: Prev unimplemented")
140 : }
141 :
142 0 : func (g *getIter) NextPrefix([]byte) *base.InternalKV {
143 0 : panic("pebble: NextPrefix unimplemented")
144 : }
145 :
146 1 : func (g *getIter) Error() error {
147 1 : return g.err
148 1 : }
149 :
150 1 : func (g *getIter) Close() error {
151 1 : if g.iter != nil {
152 1 : if err := g.iter.Close(); err != nil && g.err == nil {
153 0 : g.err = err
154 0 : }
155 1 : g.iter = nil
156 : }
157 1 : return g.err
158 : }
159 :
160 0 : func (g *getIter) SetBounds(lower, upper []byte) {
161 0 : panic("pebble: SetBounds unimplemented")
162 : }
163 :
164 0 : func (g *getIter) SetContext(_ context.Context) {}
165 :
166 : // DebugTree is part of the InternalIterator interface.
167 0 : func (g *getIter) DebugTree(tp treeprinter.Node) {
168 0 : n := tp.Childf("%T(%p)", g, g)
169 0 : if g.iter != nil {
170 0 : g.iter.DebugTree(n)
171 0 : }
172 : }
173 :
174 1 : func (g *getIter) initializeNextIterator() (ok bool) {
175 1 : // A batch's keys shadow all other keys, so we visit the batch first.
176 1 : if g.batch != nil {
177 1 : if g.batch.index == nil {
178 0 : g.err = ErrNotIndexed
179 0 : g.iterKV = nil
180 0 : return false
181 0 : }
182 1 : g.iter = g.batch.newInternalIter(nil)
183 1 : if !g.maybeSetTombstone(g.batch.newRangeDelIter(nil,
184 1 : // Get always reads the entirety of the batch's history, so no
185 1 : // batch keys should be filtered.
186 1 : base.SeqNumMax,
187 1 : )) {
188 0 : return false
189 0 : }
190 1 : g.batch = nil
191 1 : return true
192 : }
193 :
194 : // If we're trying to initialize the next level of the iterator stack but
195 : // have a tombstone from a previous level, it is guaranteed to delete keys
196 : // in lower levels. This key is deleted.
197 1 : if g.tombstoned {
198 1 : return false
199 1 : }
200 :
201 : // Create iterators from memtables from newest to oldest.
202 1 : if n := len(g.mem); n > 0 {
203 1 : m := g.mem[n-1]
204 1 : g.iter = m.newIter(nil)
205 1 : if !g.maybeSetTombstone(m.newRangeDelIter(nil)) {
206 0 : return false
207 0 : }
208 1 : g.mem = g.mem[:n-1]
209 1 : return true
210 : }
211 :
212 : // Visit each sublevel of L0 individually, so that we only need to read
213 : // at most one file per sublevel.
214 1 : if g.level == 0 {
215 1 : // Create iterators from L0 from newest to oldest.
216 1 : if n := len(g.l0); n > 0 {
217 1 : files := g.l0[n-1].Iter()
218 1 : g.l0 = g.l0[:n-1]
219 1 :
220 1 : iter, rangeDelIter, err := g.getSSTableIterators(files, manifest.L0Sublevel(n))
221 1 : if err != nil {
222 1 : g.err = firstError(g.err, err)
223 1 : return false
224 1 : }
225 1 : if !g.maybeSetTombstone(rangeDelIter) {
226 0 : return false
227 0 : }
228 1 : g.iter = iter
229 1 : return true
230 : }
231 : // We've exhausted all the sublevels of L0. Progress to L1.
232 1 : g.level++
233 : }
234 1 : for g.level < numLevels {
235 1 : if g.version.Levels[g.level].Empty() {
236 1 : g.level++
237 1 : continue
238 : }
239 : // Open the next level of the LSM.
240 1 : iter, rangeDelIter, err := g.getSSTableIterators(g.version.Levels[g.level].Iter(), manifest.Level(g.level))
241 1 : if err != nil {
242 1 : g.err = firstError(g.err, err)
243 1 : return false
244 1 : }
245 1 : if !g.maybeSetTombstone(rangeDelIter) {
246 0 : return false
247 0 : }
248 1 : g.level++
249 1 : g.iter = iter
250 1 : return true
251 : }
252 : // We've exhausted all levels of the LSM.
253 1 : return false
254 : }
255 :
256 : // getSSTableIterators returns a point iterator and a range deletion iterator
257 : // for the sstable in files that overlaps with the key g.key. Pebble does not
258 : // split user keys across adjacent sstables within a level, ensuring that at
259 : // most one sstable overlaps g.key.
260 : func (g *getIter) getSSTableIterators(
261 : files manifest.LevelIterator, level manifest.Layer,
262 1 : ) (internalIterator, keyspan.FragmentIterator, error) {
263 1 : files = files.Filter(manifest.KeyTypePoint)
264 1 : m := files.SeekGE(g.comparer.Compare, g.key)
265 1 : if m == nil {
266 1 : return emptyIter, nil, nil
267 1 : }
268 : // m is now positioned at the file containing the first point key ≥ `g.key`.
269 : // Does it exist and possibly contain point keys with the user key 'g.key'?
270 1 : if m == nil || !m.HasPointKeys || g.comparer.Compare(m.PointKeyBounds.SmallestUserKey(), g.key) > 0 {
271 1 : return emptyIter, nil, nil
272 1 : }
273 : // m may possibly contain point (or range deletion) keys relevant to g.key.
274 1 : g.iterOpts.layer = level
275 1 : iters, err := g.newIters(context.Background(), m, &g.iterOpts, g.iiopts, iterPointKeys|iterRangeDeletions)
276 1 : if err != nil {
277 1 : return emptyIter, nil, err
278 1 : }
279 1 : return iters.Point(), iters.RangeDeletion(), nil
280 : }
281 :
282 : // maybeSetTombstone updates g.tombstoned[SeqNum] to reflect the presence of a
283 : // range deletion covering g.key, if there are any. It returns true if
284 : // successful, or false if an error occurred and the caller should abort
285 : // iteration.
286 1 : func (g *getIter) maybeSetTombstone(rangeDelIter keyspan.FragmentIterator) (ok bool) {
287 1 : if rangeDelIter == nil {
288 1 : // Nothing to do.
289 1 : return true
290 1 : }
291 : // Find the range deletion that covers the sought key, if any.
292 1 : t, err := keyspan.Get(g.comparer.Compare, rangeDelIter, g.key)
293 1 : if err != nil {
294 0 : g.err = firstError(g.err, err)
295 0 : return false
296 0 : }
297 : // Find the most recent visible range deletion's sequence number. We only
298 : // care about the most recent range deletion that's visible because it's the
299 : // "most powerful."
300 1 : g.tombstonedSeqNum, g.tombstoned = t.LargestVisibleSeqNum(g.snapshot)
301 1 : rangeDelIter.Close()
302 1 : return true
303 : }
|