LCOV - code coverage report
Current view: top level - pebble/internal/overlap - checker.go (source / functions) Hit Total Coverage
Test: 2024-12-29 08:16Z 87a5141c - tests only.lcov Lines: 130 152 85.5 %
Date: 2024-12-29 08:17:52 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package overlap provides facilities for checking whether tables have data
       6             : // overlap.
       7             : package overlap
       8             : 
       9             : import (
      10             :         "context"
      11             : 
      12             :         "github.com/cockroachdb/pebble/internal/base"
      13             :         "github.com/cockroachdb/pebble/internal/keyspan"
      14             :         "github.com/cockroachdb/pebble/internal/manifest"
      15             : )
      16             : 
      17             : // WithLSM stores the result of checking for boundary and data overlap between a
      18             : // region of key space and the LSM levels, starting from the top (L0) and
      19             : // stopping at the highest level with data overlap.
      20             : type WithLSM [manifest.NumLevels]WithLevel
      21             : 
      22             : // WithLevel is the result of checking overlap against an LSM level.
      23             : type WithLevel struct {
      24             :         Result Kind
      25             :         // SplitFile can be set only when result is OnlyBoundary. If it is set, this
      26             :         // file can be split to free up the range of interest.
      27             :         SplitFile *manifest.FileMetadata
      28             : }
      29             : 
      30             : // Kind indicates the kind of overlap detected between a key range and a level.
      31             : // We check two types of overlap:
      32             : //
      33             : //   - file boundary overlap: whether the key range overlaps any of the level's
      34             : //     user key boundaries;
      35             : //
      36             : //   - data overlap: whether the key range overlaps any keys or ranges in the
      37             : //     level. Data overlap implies file boundary overlap.
      38             : type Kind uint8
      39             : 
      40             : const (
      41             :         // None indicates that the key range of interest doesn't overlap any tables on
      42             :         // the level.
      43             :         None Kind = iota + 1
      44             :         // OnlyBoundary indicates that there is boundary overlap but no data overlap.
      45             :         OnlyBoundary
      46             :         // Data indicates that at least a key or range in the level overlaps with the
      47             :         // key range of interest. Note that the data overlap check is best-effort and
      48             :         // there could be false positives.
      49             :         Data
      50             : )
      51             : 
      52             : // Checker is used to check for data overlap between tables in the LSM and a
      53             : // user key region of interest.
      54             : type Checker struct {
      55             :         cmp             base.Compare
      56             :         iteratorFactory IteratorFactory
      57             : }
      58             : 
      59             : // IteratorFactory is an interface that is used by the Checker to create
      60             : // iterators for a given table. All methods can return nil as an empty iterator.
      61             : type IteratorFactory interface {
      62             :         Points(ctx context.Context, m *manifest.FileMetadata) (base.InternalIterator, error)
      63             :         RangeDels(ctx context.Context, m *manifest.FileMetadata) (keyspan.FragmentIterator, error)
      64             :         RangeKeys(ctx context.Context, m *manifest.FileMetadata) (keyspan.FragmentIterator, error)
      65             : }
      66             : 
      67             : // MakeChecker initializes a new Checker.
      68           1 : func MakeChecker(cmp base.Compare, iteratorFactory IteratorFactory) Checker {
      69           1 :         return Checker{
      70           1 :                 cmp:             cmp,
      71           1 :                 iteratorFactory: iteratorFactory,
      72           1 :         }
      73           1 : }
      74             : 
      75             : // LSMOverlap calculates the LSM overlap for the given region.
      76             : func (c *Checker) LSMOverlap(
      77             :         ctx context.Context, region base.UserKeyBounds, v *manifest.Version,
      78           1 : ) (WithLSM, error) {
      79           1 :         var result WithLSM
      80           1 :         result[0].Result = None
      81           1 :         for sublevel := 0; sublevel < len(v.L0Sublevels.Levels); sublevel++ {
      82           1 :                 res, err := c.LevelOverlap(ctx, region, v.L0Sublevels.Levels[sublevel])
      83           1 :                 if err != nil {
      84           0 :                         return WithLSM{}, err
      85           0 :                 }
      86           1 :                 if res.Result == Data {
      87           1 :                         result[0].Result = Data
      88           1 :                         return result, nil
      89           1 :                 }
      90           1 :                 if res.Result == OnlyBoundary {
      91           1 :                         result[0].Result = OnlyBoundary
      92           1 :                 }
      93             :         }
      94           1 :         for level := 1; level < manifest.NumLevels; level++ {
      95           1 :                 var err error
      96           1 :                 result[level], err = c.LevelOverlap(ctx, region, v.Levels[level].Slice())
      97           1 :                 if err != nil {
      98           0 :                         return WithLSM{}, err
      99           0 :                 }
     100           1 :                 if result[level].Result == Data {
     101           1 :                         return result, err
     102           1 :                 }
     103             :         }
     104           1 :         return result, nil
     105             : }
     106             : 
     107             : // LevelOverlap returns true if there is possible data overlap between a user
     108             : // key region and an L0 sublevel or L1+ level.
     109             : func (c *Checker) LevelOverlap(
     110             :         ctx context.Context, region base.UserKeyBounds, ls manifest.LevelSlice,
     111           1 : ) (WithLevel, error) {
     112           1 :         // Quick check: if the target region contains any file boundaries, we assume
     113           1 :         // data overlap. This is a correct assumption in most cases; it is pessimistic
     114           1 :         // only for external ingestions which could have "loose" boundaries. External
     115           1 :         // ingestions are also the most expensive to look at, so we don't want to do
     116           1 :         // that just in the off chance that we'll find a significant empty region at
     117           1 :         // the boundary.
     118           1 :         //
     119           1 :         // This check is important because the region can be very large in the key
     120           1 :         // space and encompass many files, and we don't want to open any of them in
     121           1 :         // that case.
     122           1 :         startIter := ls.Iter()
     123           1 :         file := startIter.SeekGE(c.cmp, region.Start)
     124           1 :         if file == nil {
     125           1 :                 // No overlapping files.
     126           1 :                 return WithLevel{Result: None}, nil
     127           1 :         }
     128           1 :         fileBounds := file.UserKeyBounds()
     129           1 :         if !region.End.IsUpperBoundFor(c.cmp, fileBounds.Start) {
     130           1 :                 // No overlapping files.
     131           1 :                 return WithLevel{Result: None}, nil
     132           1 :         }
     133           1 :         if c.cmp(fileBounds.Start, region.Start) >= 0 || region.End.CompareUpperBounds(c.cmp, fileBounds.End) >= 0 {
     134           1 :                 // The file ends or starts inside our region; we assume data overlap.
     135           1 :                 return WithLevel{Result: Data}, nil
     136           1 :         }
     137             :         // We have a single file to look at; its boundaries enclose our region.
     138           1 :         empty, err := c.EmptyRegion(ctx, region, file)
     139           1 :         if err != nil {
     140           0 :                 return WithLevel{}, err
     141           0 :         }
     142           1 :         if !empty {
     143           1 :                 return WithLevel{Result: Data}, nil
     144           1 :         }
     145           1 :         return WithLevel{
     146           1 :                 Result:    OnlyBoundary,
     147           1 :                 SplitFile: file,
     148           1 :         }, nil
     149             : }
     150             : 
     151             : // EmptyRegion returns true if the given region doesn't overlap with any keys or
     152             : // ranges in the given table.
     153             : func (c *Checker) EmptyRegion(
     154             :         ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
     155           1 : ) (bool, error) {
     156           1 :         empty, err := c.emptyRegionPointsAndRangeDels(ctx, region, m)
     157           1 :         if err != nil || !empty {
     158           1 :                 return empty, err
     159           1 :         }
     160           1 :         return c.emptyRegionRangeKeys(ctx, region, m)
     161             : }
     162             : 
     163             : // emptyRegionPointsAndRangeDels returns true if the file doesn't contain any
     164             : // point keys or range del spans that overlap with region.
     165             : func (c *Checker) emptyRegionPointsAndRangeDels(
     166             :         ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
     167           1 : ) (bool, error) {
     168           1 :         if !m.HasPointKeys {
     169           1 :                 return true, nil
     170           1 :         }
     171           1 :         pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint)
     172           1 :         if !pointBounds.Overlaps(c.cmp, &region) {
     173           1 :                 return true, nil
     174           1 :         }
     175           1 :         points, err := c.iteratorFactory.Points(ctx, m)
     176           1 :         if err != nil {
     177           0 :                 return false, err
     178           0 :         }
     179           1 :         if points != nil {
     180           1 :                 defer points.Close()
     181           1 :                 var kv *base.InternalKV
     182           1 :                 if c.cmp(region.Start, pointBounds.Start) <= 0 {
     183           0 :                         kv = points.First()
     184           1 :                 } else {
     185           1 :                         kv = points.SeekGE(region.Start, base.SeekGEFlagsNone)
     186           1 :                 }
     187           1 :                 if kv == nil && points.Error() != nil {
     188           0 :                         return false, points.Error()
     189           0 :                 }
     190           1 :                 if kv != nil && region.End.IsUpperBoundForInternalKey(c.cmp, kv.K) {
     191           1 :                         // Found overlap.
     192           1 :                         return false, nil
     193           1 :                 }
     194             :         }
     195           1 :         rangeDels, err := c.iteratorFactory.RangeDels(ctx, m)
     196           1 :         if err != nil {
     197           0 :                 return false, err
     198           0 :         }
     199           1 :         if rangeDels != nil {
     200           1 :                 defer rangeDels.Close()
     201           1 :                 empty, err := c.emptyFragmentRegion(region, pointBounds.Start, rangeDels)
     202           1 :                 if err != nil || !empty {
     203           1 :                         return empty, err
     204           1 :                 }
     205             :         }
     206             :         // Found no overlap.
     207           1 :         return true, nil
     208             : }
     209             : 
     210             : // emptyRegionRangeKeys returns true if the file doesn't contain any range key
     211             : // spans that overlap with region.
     212             : func (c *Checker) emptyRegionRangeKeys(
     213             :         ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
     214           1 : ) (bool, error) {
     215           1 :         if !m.HasRangeKeys {
     216           1 :                 return true, nil
     217           1 :         }
     218           1 :         rangeKeyBounds := m.UserKeyBoundsByType(manifest.KeyTypeRange)
     219           1 :         if !rangeKeyBounds.Overlaps(c.cmp, &region) {
     220           1 :                 return true, nil
     221           1 :         }
     222           1 :         rangeKeys, err := c.iteratorFactory.RangeKeys(ctx, m)
     223           1 :         if err != nil {
     224           0 :                 return false, err
     225           0 :         }
     226           1 :         if rangeKeys != nil {
     227           1 :                 defer rangeKeys.Close()
     228           1 :                 empty, err := c.emptyFragmentRegion(region, rangeKeyBounds.Start, rangeKeys)
     229           1 :                 if err != nil || !empty {
     230           1 :                         return empty, err
     231           1 :                 }
     232             :         }
     233             :         // Found no overlap.
     234           1 :         return true, nil
     235             : }
     236             : 
     237             : // emptyFragmentRegion returns true if the given iterator doesn't contain any
     238             : // spans that overlap with region. The fragmentLowerBounds is a known lower
     239             : // bound for all the spans.
     240             : func (c *Checker) emptyFragmentRegion(
     241             :         region base.UserKeyBounds, fragmentLowerBound []byte, fragments keyspan.FragmentIterator,
     242           1 : ) (bool, error) {
     243           1 :         var span *keyspan.Span
     244           1 :         var err error
     245           1 :         if c.cmp(region.Start, fragmentLowerBound) <= 0 {
     246           0 :                 // This is an optimization: we know there are no spans before region.Start,
     247           0 :                 // so we can use First.
     248           0 :                 span, err = fragments.First()
     249           1 :         } else {
     250           1 :                 span, err = fragments.SeekGE(region.Start)
     251           1 :         }
     252           1 :         if err != nil {
     253           0 :                 return false, err
     254           0 :         }
     255           1 :         if span != nil && span.Empty() {
     256           0 :                 return false, base.AssertionFailedf("fragment iterator produced empty span")
     257           0 :         }
     258           1 :         if span != nil && region.End.IsUpperBoundFor(c.cmp, span.Start) {
     259           1 :                 // Found overlap.
     260           1 :                 return false, nil
     261           1 :         }
     262           1 :         return true, nil
     263             : }

Generated by: LCOV version 1.14