Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : // Package overlap provides facilities for checking whether tables have data
6 : // overlap.
7 : package overlap
8 :
9 : import (
10 : "context"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/keyspan"
14 : "github.com/cockroachdb/pebble/internal/manifest"
15 : )
16 :
17 : // WithLSM stores the result of checking for boundary and data overlap between a
18 : // region of key space and the LSM levels, starting from the top (L0) and
19 : // stopping at the highest level with data overlap.
20 : type WithLSM [manifest.NumLevels]WithLevel
21 :
22 : // WithLevel is the result of checking overlap against an LSM level.
23 : type WithLevel struct {
24 : Result Kind
25 : // SplitFile can be set only when result is OnlyBoundary. If it is set, this
26 : // file can be split to free up the range of interest.
27 : SplitFile *manifest.FileMetadata
28 : }
29 :
30 : // Kind indicates the kind of overlap detected between a key range and a level.
31 : // We check two types of overlap:
32 : //
33 : // - file boundary overlap: whether the key range overlaps any of the level's
34 : // user key boundaries;
35 : //
36 : // - data overlap: whether the key range overlaps any keys or ranges in the
37 : // level. Data overlap implies file boundary overlap.
38 : type Kind uint8
39 :
40 : const (
41 : // None indicates that the key range of interest doesn't overlap any tables on
42 : // the level.
43 : None Kind = iota + 1
44 : // OnlyBoundary indicates that there is boundary overlap but no data overlap.
45 : OnlyBoundary
46 : // Data indicates that at least a key or range in the level overlaps with the
47 : // key range of interest. Note that the data overlap check is best-effort and
48 : // there could be false positives.
49 : Data
50 : )
51 :
52 : // Checker is used to check for data overlap between tables in the LSM and a
53 : // user key region of interest.
54 : type Checker struct {
55 : cmp base.Compare
56 : iteratorFactory IteratorFactory
57 : }
58 :
59 : // IteratorFactory is an interface that is used by the Checker to create
60 : // iterators for a given table. All methods can return nil as an empty iterator.
61 : type IteratorFactory interface {
62 : Points(ctx context.Context, m *manifest.FileMetadata) (base.InternalIterator, error)
63 : RangeDels(ctx context.Context, m *manifest.FileMetadata) (keyspan.FragmentIterator, error)
64 : RangeKeys(ctx context.Context, m *manifest.FileMetadata) (keyspan.FragmentIterator, error)
65 : }
66 :
67 : // MakeChecker initializes a new Checker.
68 1 : func MakeChecker(cmp base.Compare, iteratorFactory IteratorFactory) Checker {
69 1 : return Checker{
70 1 : cmp: cmp,
71 1 : iteratorFactory: iteratorFactory,
72 1 : }
73 1 : }
74 :
75 : // LSMOverlap calculates the LSM overlap for the given region.
76 : func (c *Checker) LSMOverlap(
77 : ctx context.Context, region base.UserKeyBounds, v *manifest.Version,
78 1 : ) (WithLSM, error) {
79 1 : var result WithLSM
80 1 : result[0].Result = None
81 1 : for sublevel := 0; sublevel < len(v.L0Sublevels.Levels); sublevel++ {
82 1 : res, err := c.LevelOverlap(ctx, region, v.L0Sublevels.Levels[sublevel])
83 1 : if err != nil {
84 0 : return WithLSM{}, err
85 0 : }
86 1 : if res.Result == Data {
87 1 : result[0].Result = Data
88 1 : return result, nil
89 1 : }
90 1 : if res.Result == OnlyBoundary {
91 1 : result[0].Result = OnlyBoundary
92 1 : }
93 : }
94 1 : for level := 1; level < manifest.NumLevels; level++ {
95 1 : var err error
96 1 : result[level], err = c.LevelOverlap(ctx, region, v.Levels[level].Slice())
97 1 : if err != nil {
98 0 : return WithLSM{}, err
99 0 : }
100 1 : if result[level].Result == Data {
101 1 : return result, err
102 1 : }
103 : }
104 1 : return result, nil
105 : }
106 :
107 : // LevelOverlap returns true if there is possible data overlap between a user
108 : // key region and an L0 sublevel or L1+ level.
109 : func (c *Checker) LevelOverlap(
110 : ctx context.Context, region base.UserKeyBounds, ls manifest.LevelSlice,
111 1 : ) (WithLevel, error) {
112 1 : // Quick check: if the target region contains any file boundaries, we assume
113 1 : // data overlap. This is a correct assumption in most cases; it is pessimistic
114 1 : // only for external ingestions which could have "loose" boundaries. External
115 1 : // ingestions are also the most expensive to look at, so we don't want to do
116 1 : // that just in the off chance that we'll find a significant empty region at
117 1 : // the boundary.
118 1 : //
119 1 : // This check is important because the region can be very large in the key
120 1 : // space and encompass many files, and we don't want to open any of them in
121 1 : // that case.
122 1 : startIter := ls.Iter()
123 1 : file := startIter.SeekGE(c.cmp, region.Start)
124 1 : if file == nil {
125 1 : // No overlapping files.
126 1 : return WithLevel{Result: None}, nil
127 1 : }
128 1 : fileBounds := file.UserKeyBounds()
129 1 : if !region.End.IsUpperBoundFor(c.cmp, fileBounds.Start) {
130 1 : // No overlapping files.
131 1 : return WithLevel{Result: None}, nil
132 1 : }
133 1 : if c.cmp(fileBounds.Start, region.Start) >= 0 || region.End.CompareUpperBounds(c.cmp, fileBounds.End) >= 0 {
134 1 : // The file ends or starts inside our region; we assume data overlap.
135 1 : return WithLevel{Result: Data}, nil
136 1 : }
137 : // We have a single file to look at; its boundaries enclose our region.
138 1 : empty, err := c.EmptyRegion(ctx, region, file)
139 1 : if err != nil {
140 0 : return WithLevel{}, err
141 0 : }
142 1 : if !empty {
143 1 : return WithLevel{Result: Data}, nil
144 1 : }
145 1 : return WithLevel{
146 1 : Result: OnlyBoundary,
147 1 : SplitFile: file,
148 1 : }, nil
149 : }
150 :
151 : // EmptyRegion returns true if the given region doesn't overlap with any keys or
152 : // ranges in the given table.
153 : func (c *Checker) EmptyRegion(
154 : ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
155 1 : ) (bool, error) {
156 1 : empty, err := c.emptyRegionPointsAndRangeDels(ctx, region, m)
157 1 : if err != nil || !empty {
158 1 : return empty, err
159 1 : }
160 1 : return c.emptyRegionRangeKeys(ctx, region, m)
161 : }
162 :
163 : // emptyRegionPointsAndRangeDels returns true if the file doesn't contain any
164 : // point keys or range del spans that overlap with region.
165 : func (c *Checker) emptyRegionPointsAndRangeDels(
166 : ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
167 1 : ) (bool, error) {
168 1 : if !m.HasPointKeys {
169 1 : return true, nil
170 1 : }
171 1 : pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint)
172 1 : if !pointBounds.Overlaps(c.cmp, ®ion) {
173 1 : return true, nil
174 1 : }
175 1 : points, err := c.iteratorFactory.Points(ctx, m)
176 1 : if err != nil {
177 0 : return false, err
178 0 : }
179 1 : if points != nil {
180 1 : defer points.Close()
181 1 : var kv *base.InternalKV
182 1 : if c.cmp(region.Start, pointBounds.Start) <= 0 {
183 1 : kv = points.First()
184 1 : } else {
185 1 : kv = points.SeekGE(region.Start, base.SeekGEFlagsNone)
186 1 : }
187 1 : if kv == nil && points.Error() != nil {
188 0 : return false, points.Error()
189 0 : }
190 1 : if kv != nil && region.End.IsUpperBoundForInternalKey(c.cmp, kv.K) {
191 1 : // Found overlap.
192 1 : return false, nil
193 1 : }
194 : }
195 1 : rangeDels, err := c.iteratorFactory.RangeDels(ctx, m)
196 1 : if err != nil {
197 0 : return false, err
198 0 : }
199 1 : if rangeDels != nil {
200 1 : defer rangeDels.Close()
201 1 : empty, err := c.emptyFragmentRegion(region, pointBounds.Start, rangeDels)
202 1 : if err != nil || !empty {
203 1 : return empty, err
204 1 : }
205 : }
206 : // Found no overlap.
207 1 : return true, nil
208 : }
209 :
210 : // emptyRegionRangeKeys returns true if the file doesn't contain any range key
211 : // spans that overlap with region.
212 : func (c *Checker) emptyRegionRangeKeys(
213 : ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
214 1 : ) (bool, error) {
215 1 : if !m.HasRangeKeys {
216 1 : return true, nil
217 1 : }
218 1 : rangeKeyBounds := m.UserKeyBoundsByType(manifest.KeyTypeRange)
219 1 : if !rangeKeyBounds.Overlaps(c.cmp, ®ion) {
220 1 : return true, nil
221 1 : }
222 1 : rangeKeys, err := c.iteratorFactory.RangeKeys(ctx, m)
223 1 : if err != nil {
224 0 : return false, err
225 0 : }
226 1 : if rangeKeys != nil {
227 1 : defer rangeKeys.Close()
228 1 : empty, err := c.emptyFragmentRegion(region, rangeKeyBounds.Start, rangeKeys)
229 1 : if err != nil || !empty {
230 1 : return empty, err
231 1 : }
232 : }
233 : // Found no overlap.
234 1 : return true, nil
235 : }
236 :
237 : // emptyFragmentRegion returns true if the given iterator doesn't contain any
238 : // spans that overlap with region. The fragmentLowerBounds is a known lower
239 : // bound for all the spans.
240 : func (c *Checker) emptyFragmentRegion(
241 : region base.UserKeyBounds, fragmentLowerBound []byte, fragments keyspan.FragmentIterator,
242 1 : ) (bool, error) {
243 1 : var span *keyspan.Span
244 1 : var err error
245 1 : if c.cmp(region.Start, fragmentLowerBound) <= 0 {
246 1 : // This is an optimization: we know there are no spans before region.Start,
247 1 : // so we can use First.
248 1 : span, err = fragments.First()
249 1 : } else {
250 1 : span, err = fragments.SeekGE(region.Start)
251 1 : }
252 1 : if err != nil {
253 0 : return false, err
254 0 : }
255 1 : if span != nil && span.Empty() {
256 0 : return false, base.AssertionFailedf("fragment iterator produced empty span")
257 0 : }
258 1 : if span != nil && region.End.IsUpperBoundFor(c.cmp, span.Start) {
259 1 : // Found overlap.
260 1 : return false, nil
261 1 : }
262 1 : return true, nil
263 : }
|