LCOV - code coverage report
Current view: top level - pebble/internal/rangekey - coalesce.go (source / functions) Hit Total Coverage
Test: 2024-10-06 08:16Z 649e50ad - tests only.lcov Lines: 89 96 92.7 %
Date: 2024-10-06 08:16:44 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package rangekey
       6             : 
       7             : import (
       8             :         "math"
       9             :         "slices"
      10             : 
      11             :         "github.com/cockroachdb/pebble/internal/base"
      12             :         "github.com/cockroachdb/pebble/internal/invariants"
      13             :         "github.com/cockroachdb/pebble/internal/keyspan"
      14             : )
      15             : 
      16             : // Coalesce imposes range key semantics and coalesces range keys with the same
      17             : // bounds. Coalesce drops any keys shadowed by more recent sets, unsets or
      18             : // deletes. Coalesce modifies the provided span's Keys slice, reslicing the
      19             : // slice to remove dropped keys.
      20             : //
      21             : // Coalescence has subtle behavior with respect to sequence numbers. Coalesce
      22             : // depends on a keyspan.Span's Keys being sorted in sequence number descending
      23             : // order. The first key has the largest sequence number. The returned coalesced
      24             : // span includes only the largest sequence number. All other sequence numbers
      25             : // are forgotten. When a compaction constructs output range keys from a
      26             : // coalesced span, it produces at most one RANGEKEYSET, one RANGEKEYUNSET and
      27             : // one RANGEKEYDEL. Each one of these keys adopt the largest sequence number.
      28             : //
      29             : // This has the potentially surprising effect of 'promoting' a key to a higher
      30             : // sequence number. This is okay, because:
      31             : //   - There are no other overlapping keys within the coalesced span of
      32             : //     sequence numbers (otherwise they would be in the compaction, due to
      33             : //     the LSM invariant).
      34             : //   - Range key sequence numbers are never compared to point key sequence
      35             : //     numbers. Range keys and point keys have parallel existences.
      36             : //   - Compactions only coalesce within snapshot stripes.
      37             : //
      38             : // Additionally, internal range keys at the same sequence number have subtle
      39             : // mechanics:
      40             : //   - RANGEKEYSETs shadow RANGEKEYUNSETs of the same suffix.
      41             : //   - RANGEKEYDELs only apply to keys at lower sequence numbers.
      42             : //
      43             : // This is required for ingestion. Ingested sstables are assigned a single
      44             : // sequence number for the file, at which all of the file's keys are visible.
      45             : // The RANGEKEYSET, RANGEKEYUNSET and RANGEKEYDEL key kinds are ordered such
      46             : // that among keys with equal sequence numbers (thus ordered by their kinds) the
      47             : // keys do not affect one another. Ingested sstables are expected to be
      48             : // consistent with respect to the set/unset suffixes: A given suffix should be
      49             : // set or unset but not both.
      50             : //
      51             : // The resulting dst Keys slice is sorted by InternalKeyTrailer.
      52           1 : func Coalesce(suffixCmp base.CompareSuffixes, keys []keyspan.Key, dst *[]keyspan.Key) {
      53           1 :         // TODO(jackson): Currently, Coalesce doesn't actually perform the sequence
      54           1 :         // number promotion described in the comment above.
      55           1 :         *dst = CoalesceInto(suffixCmp, (*dst)[:0], math.MaxUint64, keys)
      56           1 :         // Update the span with the (potentially reduced) keys slice. coalesce left
      57           1 :         // the keys in *dst sorted by suffix. Re-sort them by trailer.
      58           1 :         keyspan.SortKeysByTrailer(*dst)
      59           1 : }
      60             : 
      61             : // CoalesceInto is a variant of Coalesce which outputs the results into dst
      62             : // without sorting them.
      63             : func CoalesceInto(
      64             :         suffixCmp base.CompareSuffixes, dst []keyspan.Key, snapshot base.SeqNum, keys []keyspan.Key,
      65           1 : ) []keyspan.Key {
      66           1 :         dst = dst[:0]
      67           1 :         // First, enforce visibility and RangeKeyDelete mechanics. We only need to
      68           1 :         // consider the prefix of keys before and including the first
      69           1 :         // RangeKeyDelete. We also must skip any keys that aren't visible at the
      70           1 :         // provided snapshot sequence number.
      71           1 :         //
      72           1 :         // NB: Within a given sequence number, keys are ordered as:
      73           1 :         //   RangeKeySet > RangeKeyUnset > RangeKeyDelete
      74           1 :         // This is significant, because this ensures that a Set or Unset sharing a
      75           1 :         // sequence number with a Delete do not shadow each other.
      76           1 :         deleteIdx := -1
      77           1 :         for i := range keys {
      78           1 :                 if invariants.Enabled && i > 0 && keys[i].Trailer > keys[i-1].Trailer {
      79           0 :                         panic("pebble: invariant violation: span keys unordered")
      80             :                 }
      81           1 :                 if !keys[i].VisibleAt(snapshot) {
      82           1 :                         continue
      83             :                 }
      84             :                 // Once a RangeKeyDelete is observed, we know it shadows all subsequent
      85             :                 // keys and we can break early. We don't add the RangeKeyDelete key to
      86             :                 // keysBySuffix.keys yet, because we don't want a suffix-less key
      87             :                 // that appeared earlier in the slice to elide it. It'll be added back
      88             :                 // in at the end.
      89           1 :                 if keys[i].Kind() == base.InternalKeyKindRangeKeyDelete {
      90           1 :                         deleteIdx = i
      91           1 :                         break
      92             :                 }
      93           1 :                 dst = append(dst, keys[i])
      94             :         }
      95             : 
      96             :         // Sort the accumulated keys by suffix. There may be duplicates within a
      97             :         // suffix, in which case the one with a larger trailer survives.
      98             :         //
      99             :         // We use a stable sort so that the first key with a given suffix is the one
     100             :         // that with the highest InternalKeyTrailer (because the input `keys` was sorted by
     101             :         // trailer descending).
     102           1 :         slices.SortStableFunc(dst, func(a, b keyspan.Key) int {
     103           1 :                 return suffixCmp(a.Suffix, b.Suffix)
     104           1 :         })
     105             : 
     106             :         // Grab a handle of the full sorted slice, before reslicing
     107             :         // dst to accumulate the final coalesced keys.
     108           1 :         sorted := dst
     109           1 :         dst = dst[:0]
     110           1 : 
     111           1 :         var (
     112           1 :                 // prevSuffix is updated on each iteration of the below loop, and
     113           1 :                 // compared by the subsequent iteration to determine whether adjacent
     114           1 :                 // keys are defined at the same suffix.
     115           1 :                 prevSuffix []byte
     116           1 :                 // shadowing is set to true once any Key is shadowed by another key.
     117           1 :                 // When it's set to true—or after the loop if no keys are shadowed—the
     118           1 :                 // keysBySuffix.keys slice is resliced to contain the prefix of
     119           1 :                 // unshadowed keys. This avoids copying them incrementally in the common
     120           1 :                 // case of no shadowing.
     121           1 :                 shadowing bool
     122           1 :         )
     123           1 :         for i := range sorted {
     124           1 :                 if i > 0 && suffixCmp(prevSuffix, sorted[i].Suffix) == 0 {
     125           1 :                         // Skip; this key is shadowed by the predecessor that had a larger
     126           1 :                         // InternalKeyTrailer. If this is the first shadowed key, set shadowing=true
     127           1 :                         // and reslice keysBySuffix.keys to hold the entire unshadowed
     128           1 :                         // prefix.
     129           1 :                         if !shadowing {
     130           1 :                                 dst = dst[:i]
     131           1 :                                 shadowing = true
     132           1 :                         }
     133           1 :                         continue
     134             :                 }
     135           1 :                 prevSuffix = sorted[i].Suffix
     136           1 :                 if shadowing {
     137           1 :                         dst = append(dst, sorted[i])
     138           1 :                 }
     139             :         }
     140             :         // If there was no shadowing, dst.keys is untouched. We can simply set it to
     141             :         // the existing `sorted` slice (also backed by dst).
     142           1 :         if !shadowing {
     143           1 :                 dst = sorted
     144           1 :         }
     145             :         // If the original input `keys` slice contained a RangeKeyDelete, add it.
     146           1 :         if deleteIdx >= 0 {
     147           1 :                 dst = append(dst, keys[deleteIdx])
     148           1 :         }
     149           1 :         return dst
     150             : }
     151             : 
     152             : // ForeignSSTTransformer implements a keyspan.Transformer for range keys in
     153             : // shared ingested sstables. It is largely similar to the Transform function
     154             : // implemented in UserIteratorConfig in that it calls coalesce to remove range
     155             : // keys shadowed by other range keys, but also retains the range key that does
     156             : // the shadowing. In addition, it elides RangeKey unsets/dels in L6 as they are
     157             : // inapplicable when reading from a different Pebble instance. Finally, it
     158             : // returns keys sorted in trailer order, not suffix order, as that's what the
     159             : // rest of the iterator stack expects.
     160             : type ForeignSSTTransformer struct {
     161             :         Equal   base.Equal
     162             :         SeqNum  base.SeqNum
     163             :         sortBuf []keyspan.Key
     164             : }
     165             : 
     166             : // Transform implements the Transformer interface.
     167             : func (f *ForeignSSTTransformer) Transform(
     168             :         suffixCmp base.CompareSuffixes, s keyspan.Span, dst *keyspan.Span,
     169           1 : ) error {
     170           1 :         // Apply shadowing of keys.
     171           1 :         dst.Start = s.Start
     172           1 :         dst.End = s.End
     173           1 :         f.sortBuf = f.sortBuf[:0]
     174           1 :         f.sortBuf = CoalesceInto(suffixCmp, f.sortBuf, math.MaxUint64, s.Keys)
     175           1 :         keys := f.sortBuf
     176           1 :         dst.Keys = dst.Keys[:0]
     177           1 :         for i := range keys {
     178           1 :                 switch keys[i].Kind() {
     179           1 :                 case base.InternalKeyKindRangeKeySet:
     180           1 :                         if invariants.Enabled && len(dst.Keys) > 0 && suffixCmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 {
     181           0 :                                 panic("pebble: keys unexpectedly not in ascending suffix order")
     182             :                         }
     183           0 :                 case base.InternalKeyKindRangeKeyUnset:
     184           0 :                         if invariants.Enabled && len(dst.Keys) > 0 && suffixCmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 {
     185           0 :                                 panic("pebble: keys unexpectedly not in ascending suffix order")
     186             :                         }
     187           1 :                 case base.InternalKeyKindRangeKeyDelete:
     188             :                         // Nothing to do.
     189           0 :                 default:
     190           0 :                         return base.CorruptionErrorf("pebble: unrecognized range key kind %s", keys[i].Kind())
     191             :                 }
     192           1 :                 dst.Keys = append(dst.Keys, keyspan.Key{
     193           1 :                         Trailer: base.MakeTrailer(f.SeqNum, keys[i].Kind()),
     194           1 :                         Suffix:  keys[i].Suffix,
     195           1 :                         Value:   keys[i].Value,
     196           1 :                 })
     197             :         }
     198             :         // coalesce results in dst.Keys being sorted by Suffix. Change it back to
     199             :         // ByTrailerDesc, as that's what the iterator stack will expect.
     200           1 :         keyspan.SortKeysByTrailer(dst.Keys)
     201           1 :         dst.KeysOrder = keyspan.ByTrailerDesc
     202           1 :         return nil
     203             : }

Generated by: LCOV version 1.14