LCOV - code coverage report
Current view: top level - pebble/internal/overlap - checker.go (source / functions) Hit Total Coverage
Test: 2024-09-03 08:16Z c2b6801c - tests + meta.lcov Lines: 134 152 88.2 %
Date: 2024-09-03 08:17:15 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package overlap provides facilities for checking whether tables have data
       6             : // overlap.
       7             : package overlap
       8             : 
       9             : import (
      10             :         "context"
      11             : 
      12             :         "github.com/cockroachdb/pebble/internal/base"
      13             :         "github.com/cockroachdb/pebble/internal/keyspan"
      14             :         "github.com/cockroachdb/pebble/internal/manifest"
      15             : )
      16             : 
      17             : // WithLSM stores the result of checking for boundary and data overlap between a
      18             : // region of key space and the LSM levels, starting from the top (L0) and
      19             : // stopping at the highest level with data overlap.
      20             : type WithLSM [manifest.NumLevels]WithLevel
      21             : 
      22             : // WithLevel is the result of checking overlap against an LSM level.
      23             : type WithLevel struct {
      24             :         Result Kind
      25             :         // SplitFile can be set only when result is OnlyBoundary. If it is set, this
      26             :         // file can be split to free up the range of interest.
      27             :         SplitFile *manifest.FileMetadata
      28             : }
      29             : 
      30             : // Kind indicates the kind of overlap detected between a key range and a level.
      31             : // We check two types of overlap:
      32             : //
      33             : //   - file boundary overlap: whether the key range overlaps any of the level's
      34             : //     user key boundaries;
      35             : //
      36             : //   - data overlap: whether the key range overlaps any keys or ranges in the
      37             : //     level. Data overlap implies file boundary overlap.
      38             : type Kind uint8
      39             : 
      40             : const (
      41             :         // None indicates that the key range of interest doesn't overlap any tables on
      42             :         // the level.
      43             :         None Kind = iota + 1
      44             :         // OnlyBoundary indicates that there is boundary overlap but no data overlap.
      45             :         OnlyBoundary
      46             :         // Data indicates that at least a key or range in the level overlaps with the
      47             :         // key range of interest. Note that the data overlap check is best-effort and
      48             :         // there could be false positives.
      49             :         Data
      50             : )
      51             : 
      52             : // Checker is used to check for data overlap between tables in the LSM and a
      53             : // user key region of interest.
      54             : type Checker struct {
      55             :         cmp             base.Compare
      56             :         iteratorFactory IteratorFactory
      57             : }
      58             : 
      59             : // IteratorFactory is an interface that is used by the Checker to create
      60             : // iterators for a given table. All methods can return nil as an empty iterator.
      61             : type IteratorFactory interface {
      62             :         Points(ctx context.Context, m *manifest.FileMetadata) (base.InternalIterator, error)
      63             :         RangeDels(ctx context.Context, m *manifest.FileMetadata) (keyspan.FragmentIterator, error)
      64             :         RangeKeys(ctx context.Context, m *manifest.FileMetadata) (keyspan.FragmentIterator, error)
      65             : }
      66             : 
      67             : // MakeChecker initializes a new Checker.
      68           2 : func MakeChecker(cmp base.Compare, iteratorFactory IteratorFactory) Checker {
      69           2 :         return Checker{
      70           2 :                 cmp:             cmp,
      71           2 :                 iteratorFactory: iteratorFactory,
      72           2 :         }
      73           2 : }
      74             : 
      75             : // LSMOverlap calculates the LSM overlap for the given region.
      76             : func (c *Checker) LSMOverlap(
      77             :         ctx context.Context, region base.UserKeyBounds, v *manifest.Version,
      78           2 : ) (WithLSM, error) {
      79           2 :         var result WithLSM
      80           2 :         result[0].Result = None
      81           2 :         for sublevel := 0; sublevel < len(v.L0Sublevels.Levels); sublevel++ {
      82           2 :                 res, err := c.LevelOverlap(ctx, region, v.L0Sublevels.Levels[sublevel])
      83           2 :                 if err != nil {
      84           0 :                         return WithLSM{}, err
      85           0 :                 }
      86           2 :                 if res.Result == Data {
      87           2 :                         result[0].Result = Data
      88           2 :                         return result, nil
      89           2 :                 }
      90           2 :                 if res.Result == OnlyBoundary {
      91           2 :                         result[0].Result = OnlyBoundary
      92           2 :                 }
      93             :         }
      94           2 :         for level := 1; level < manifest.NumLevels; level++ {
      95           2 :                 var err error
      96           2 :                 result[level], err = c.LevelOverlap(ctx, region, v.Levels[level].Slice())
      97           2 :                 if err != nil {
      98           0 :                         return WithLSM{}, err
      99           0 :                 }
     100           2 :                 if result[level].Result == Data {
     101           2 :                         return result, err
     102           2 :                 }
     103             :         }
     104           2 :         return result, nil
     105             : }
     106             : 
     107             : // LevelOverlap returns true if there is possible data overlap between a user
     108             : // key region and an L0 sublevel or L1+ level.
     109             : func (c *Checker) LevelOverlap(
     110             :         ctx context.Context, region base.UserKeyBounds, ls manifest.LevelSlice,
     111           2 : ) (WithLevel, error) {
     112           2 :         // Quick check: if the target region contains any file boundaries, we assume
     113           2 :         // data overlap. This is a correct assumption in most cases; it is pessimistic
     114           2 :         // only for external ingestions which could have "loose" boundaries. External
     115           2 :         // ingestions are also the most expensive to look at, so we don't want to do
     116           2 :         // that just in the off chance that we'll find a significant empty region at
     117           2 :         // the boundary.
     118           2 :         //
     119           2 :         // This check is important because the region can be very large in the key
     120           2 :         // space and encompass many files, and we don't want to open any of them in
     121           2 :         // that case.
     122           2 :         startIter := ls.Iter()
     123           2 :         file := startIter.SeekGE(c.cmp, region.Start)
     124           2 :         if file == nil {
     125           2 :                 // No overlapping files.
     126           2 :                 return WithLevel{Result: None}, nil
     127           2 :         }
     128           2 :         fileBounds := file.UserKeyBounds()
     129           2 :         if !region.End.IsUpperBoundFor(c.cmp, fileBounds.Start) {
     130           2 :                 // No overlapping files.
     131           2 :                 return WithLevel{Result: None}, nil
     132           2 :         }
     133           2 :         if c.cmp(fileBounds.Start, region.Start) >= 0 || region.End.CompareUpperBounds(c.cmp, fileBounds.End) >= 0 {
     134           2 :                 // The file ends or starts inside our region; we assume data overlap.
     135           2 :                 return WithLevel{Result: Data}, nil
     136           2 :         }
     137             :         // We have a single file to look at; its boundaries enclose our region.
     138           2 :         empty, err := c.EmptyRegion(ctx, region, file)
     139           2 :         if err != nil {
     140           0 :                 return WithLevel{}, err
     141           0 :         }
     142           2 :         if !empty {
     143           2 :                 return WithLevel{Result: Data}, nil
     144           2 :         }
     145           2 :         return WithLevel{
     146           2 :                 Result:    OnlyBoundary,
     147           2 :                 SplitFile: file,
     148           2 :         }, nil
     149             : }
     150             : 
     151             : // EmptyRegion returns true if the given region doesn't overlap with any keys or
     152             : // ranges in the given table.
     153             : func (c *Checker) EmptyRegion(
     154             :         ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
     155           2 : ) (bool, error) {
     156           2 :         empty, err := c.emptyRegionPointsAndRangeDels(ctx, region, m)
     157           2 :         if err != nil || !empty {
     158           2 :                 return empty, err
     159           2 :         }
     160           2 :         return c.emptyRegionRangeKeys(ctx, region, m)
     161             : }
     162             : 
     163             : // emptyRegionPointsAndRangeDels returns true if the file doesn't contain any
     164             : // point keys or range del spans that overlap with region.
     165             : func (c *Checker) emptyRegionPointsAndRangeDels(
     166             :         ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
     167           2 : ) (bool, error) {
     168           2 :         if !m.HasPointKeys {
     169           2 :                 return true, nil
     170           2 :         }
     171           2 :         pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint)
     172           2 :         if !pointBounds.Overlaps(c.cmp, &region) {
     173           2 :                 return true, nil
     174           2 :         }
     175           2 :         points, err := c.iteratorFactory.Points(ctx, m)
     176           2 :         if err != nil {
     177           0 :                 return false, err
     178           0 :         }
     179           2 :         if points != nil {
     180           2 :                 defer points.Close()
     181           2 :                 var kv *base.InternalKV
     182           2 :                 if c.cmp(region.Start, pointBounds.Start) <= 0 {
     183           1 :                         kv = points.First()
     184           2 :                 } else {
     185           2 :                         kv = points.SeekGE(region.Start, base.SeekGEFlagsNone)
     186           2 :                 }
     187           2 :                 if kv == nil && points.Error() != nil {
     188           0 :                         return false, points.Error()
     189           0 :                 }
     190           2 :                 if kv != nil && region.End.IsUpperBoundForInternalKey(c.cmp, kv.K) {
     191           2 :                         // Found overlap.
     192           2 :                         return false, nil
     193           2 :                 }
     194             :         }
     195           2 :         rangeDels, err := c.iteratorFactory.RangeDels(ctx, m)
     196           2 :         if err != nil {
     197           0 :                 return false, err
     198           0 :         }
     199           2 :         if rangeDels != nil {
     200           2 :                 defer rangeDels.Close()
     201           2 :                 empty, err := c.emptyFragmentRegion(region, pointBounds.Start, rangeDels)
     202           2 :                 if err != nil || !empty {
     203           2 :                         return empty, err
     204           2 :                 }
     205             :         }
     206             :         // Found no overlap.
     207           2 :         return true, nil
     208             : }
     209             : 
     210             : // emptyRegionRangeKeys returns true if the file doesn't contain any range key
     211             : // spans that overlap with region.
     212             : func (c *Checker) emptyRegionRangeKeys(
     213             :         ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata,
     214           2 : ) (bool, error) {
     215           2 :         if !m.HasRangeKeys {
     216           2 :                 return true, nil
     217           2 :         }
     218           2 :         rangeKeyBounds := m.UserKeyBoundsByType(manifest.KeyTypeRange)
     219           2 :         if !rangeKeyBounds.Overlaps(c.cmp, &region) {
     220           2 :                 return true, nil
     221           2 :         }
     222           2 :         rangeKeys, err := c.iteratorFactory.RangeKeys(ctx, m)
     223           2 :         if err != nil {
     224           0 :                 return false, err
     225           0 :         }
     226           2 :         if rangeKeys != nil {
     227           2 :                 defer rangeKeys.Close()
     228           2 :                 empty, err := c.emptyFragmentRegion(region, rangeKeyBounds.Start, rangeKeys)
     229           2 :                 if err != nil || !empty {
     230           2 :                         return empty, err
     231           2 :                 }
     232             :         }
     233             :         // Found no overlap.
     234           2 :         return true, nil
     235             : }
     236             : 
     237             : // emptyFragmentRegion returns true if the given iterator doesn't contain any
     238             : // spans that overlap with region. The fragmentLowerBounds is a known lower
     239             : // bound for all the spans.
     240             : func (c *Checker) emptyFragmentRegion(
     241             :         region base.UserKeyBounds, fragmentLowerBound []byte, fragments keyspan.FragmentIterator,
     242           2 : ) (bool, error) {
     243           2 :         var span *keyspan.Span
     244           2 :         var err error
     245           2 :         if c.cmp(region.Start, fragmentLowerBound) <= 0 {
     246           2 :                 // This is an optimization: we know there are no spans before region.Start,
     247           2 :                 // so we can use First.
     248           2 :                 span, err = fragments.First()
     249           2 :         } else {
     250           2 :                 span, err = fragments.SeekGE(region.Start)
     251           2 :         }
     252           2 :         if err != nil {
     253           0 :                 return false, err
     254           0 :         }
     255           2 :         if span != nil && span.Empty() {
     256           0 :                 return false, base.AssertionFailedf("fragment iterator produced empty span")
     257           0 :         }
     258           2 :         if span != nil && region.End.IsUpperBoundFor(c.cmp, span.Start) {
     259           2 :                 // Found overlap.
     260           2 :                 return false, nil
     261           2 :         }
     262           2 :         return true, nil
     263             : }

Generated by: LCOV version 1.14