Line data Source code
1 : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package rangekey 6 : 7 : import ( 8 : "math" 9 : "sort" 10 : 11 : "github.com/cockroachdb/pebble/internal/base" 12 : "github.com/cockroachdb/pebble/internal/invariants" 13 : "github.com/cockroachdb/pebble/internal/keyspan" 14 : ) 15 : 16 : // Coalesce imposes range key semantics and coalesces range keys with the same 17 : // bounds. Coalesce drops any keys shadowed by more recent sets, unsets or 18 : // deletes. Coalesce modifies the provided span's Keys slice, reslicing the 19 : // slice to remove dropped keys. 20 : // 21 : // Coalescence has subtle behavior with respect to sequence numbers. Coalesce 22 : // depends on a keyspan.Span's Keys being sorted in sequence number descending 23 : // order. The first key has the largest sequence number. The returned coalesced 24 : // span includes only the largest sequence number. All other sequence numbers 25 : // are forgotten. When a compaction constructs output range keys from a 26 : // coalesced span, it produces at most one RANGEKEYSET, one RANGEKEYUNSET and 27 : // one RANGEKEYDEL. Each one of these keys adopt the largest sequence number. 28 : // 29 : // This has the potentially surprising effect of 'promoting' a key to a higher 30 : // sequence number. This is okay, because: 31 : // - There are no other overlapping keys within the coalesced span of 32 : // sequence numbers (otherwise they would be in the compaction, due to 33 : // the LSM invariant). 34 : // - Range key sequence numbers are never compared to point key sequence 35 : // numbers. Range keys and point keys have parallel existences. 36 : // - Compactions only coalesce within snapshot stripes. 37 : // 38 : // Additionally, internal range keys at the same sequence number have subtle 39 : // mechanics: 40 : // - RANGEKEYSETs shadow RANGEKEYUNSETs of the same suffix. 41 : // - RANGEKEYDELs only apply to keys at lower sequence numbers. 42 : // 43 : // This is required for ingestion. Ingested sstables are assigned a single 44 : // sequence number for the file, at which all of the file's keys are visible. 45 : // The RANGEKEYSET, RANGEKEYUNSET and RANGEKEYDEL key kinds are ordered such 46 : // that among keys with equal sequence numbers (thus ordered by their kinds) the 47 : // keys do not affect one another. Ingested sstables are expected to be 48 : // consistent with respect to the set/unset suffixes: A given suffix should be 49 : // set or unset but not both. 50 : // 51 : // The resulting dst Keys slice is sorted by Trailer. 52 1 : func Coalesce(cmp base.Compare, eq base.Equal, keys []keyspan.Key, dst *[]keyspan.Key) { 53 1 : // TODO(jackson): Currently, Coalesce doesn't actually perform the sequence 54 1 : // number promotion described in the comment above. 55 1 : keysBySuffix := keyspan.KeysBySuffix{ 56 1 : Cmp: cmp, 57 1 : Keys: (*dst)[:0], 58 1 : } 59 1 : CoalesceIntoKeysBySuffix(eq, &keysBySuffix, math.MaxUint64, keys) 60 1 : // Update the span with the (potentially reduced) keys slice. coalesce left 61 1 : // the keys in *dst sorted by suffix. Re-sort them by trailer. 62 1 : *dst = keysBySuffix.Keys 63 1 : keyspan.SortKeysByTrailer(dst) 64 1 : } 65 : 66 : // CoalesceIntoKeysBySuffix is a variant of Coalesce which outputs the results into 67 : // keyspan.KeysBySuffix without sorting them. 68 : func CoalesceIntoKeysBySuffix( 69 : equal base.Equal, keysBySuffix *keyspan.KeysBySuffix, snapshot uint64, keys []keyspan.Key, 70 1 : ) { 71 1 : // First, enforce visibility and RangeKeyDelete mechanics. We only need to 72 1 : // consider the prefix of keys before and including the first 73 1 : // RangeKeyDelete. We also must skip any keys that aren't visible at the 74 1 : // provided snapshot sequence number. 75 1 : // 76 1 : // NB: Within a given sequence number, keys are ordered as: 77 1 : // RangeKeySet > RangeKeyUnset > RangeKeyDelete 78 1 : // This is significant, because this ensures that a Set or Unset sharing a 79 1 : // sequence number with a Delete do not shadow each other. 80 1 : deleteIdx := -1 81 1 : for i := range keys { 82 1 : if invariants.Enabled && i > 0 && keys[i].Trailer > keys[i-1].Trailer { 83 0 : panic("pebble: invariant violation: span keys unordered") 84 : } 85 1 : if !keys[i].VisibleAt(snapshot) { 86 1 : continue 87 : } 88 : // Once a RangeKeyDelete is observed, we know it shadows all subsequent 89 : // keys and we can break early. We don't add the RangeKeyDelete key to 90 : // keysBySuffix.keys yet, because we don't want a suffix-less key 91 : // that appeared earlier in the slice to elide it. It'll be added back 92 : // in at the end. 93 1 : if keys[i].Kind() == base.InternalKeyKindRangeKeyDelete { 94 1 : deleteIdx = i 95 1 : break 96 : } 97 1 : keysBySuffix.Keys = append(keysBySuffix.Keys, keys[i]) 98 : } 99 : 100 : // Sort the accumulated keys by suffix. There may be duplicates within a 101 : // suffix, in which case the one with a larger trailer survives. 102 : // 103 : // We use a stable sort so that the first key with a given suffix is the one 104 : // that with the highest Trailer (because the input `keys` was sorted by 105 : // trailer descending). 106 1 : sort.Stable(keysBySuffix) 107 1 : 108 1 : // Grab a handle of the full sorted slice, before reslicing 109 1 : // keysBySuffix.keys to accumulate the final coalesced keys. 110 1 : sorted := keysBySuffix.Keys 111 1 : keysBySuffix.Keys = keysBySuffix.Keys[:0] 112 1 : 113 1 : var ( 114 1 : // prevSuffix is updated on each iteration of the below loop, and 115 1 : // compared by the subsequent iteration to determine whether adjacent 116 1 : // keys are defined at the same suffix. 117 1 : prevSuffix []byte 118 1 : // shadowing is set to true once any Key is shadowed by another key. 119 1 : // When it's set to true—or after the loop if no keys are shadowed—the 120 1 : // keysBySuffix.keys slice is resliced to contain the prefix of 121 1 : // unshadowed keys. This avoids copying them incrementally in the common 122 1 : // case of no shadowing. 123 1 : shadowing bool 124 1 : ) 125 1 : for i := range sorted { 126 1 : if i > 0 && equal(prevSuffix, sorted[i].Suffix) { 127 1 : // Skip; this key is shadowed by the predecessor that had a larger 128 1 : // Trailer. If this is the first shadowed key, set shadowing=true 129 1 : // and reslice keysBySuffix.keys to hold the entire unshadowed 130 1 : // prefix. 131 1 : if !shadowing { 132 1 : keysBySuffix.Keys = keysBySuffix.Keys[:i] 133 1 : shadowing = true 134 1 : } 135 1 : continue 136 : } 137 1 : prevSuffix = sorted[i].Suffix 138 1 : if shadowing { 139 1 : keysBySuffix.Keys = append(keysBySuffix.Keys, sorted[i]) 140 1 : } 141 : } 142 : // If there was no shadowing, keysBySuffix.keys is untouched. We can simply 143 : // set it to the existing `sorted` slice (also backed by keysBySuffix.keys). 144 1 : if !shadowing { 145 1 : keysBySuffix.Keys = sorted 146 1 : } 147 : // If the original input `keys` slice contained a RangeKeyDelete, add it. 148 1 : if deleteIdx >= 0 { 149 1 : keysBySuffix.Keys = append(keysBySuffix.Keys, keys[deleteIdx]) 150 1 : } 151 : } 152 : 153 : // ForeignSSTTransformer implements a keyspan.Transformer for range keys in 154 : // shared ingested sstables. It is largely similar to the Transform function 155 : // implemented in UserIteratorConfig in that it calls coalesce to remove range 156 : // keys shadowed by other range keys, but also retains the range key that does 157 : // the shadowing. In addition, it elides RangeKey unsets/dels in L6 as they are 158 : // inapplicable when reading from a different Pebble instance. Finally, it 159 : // returns keys sorted in trailer order, not suffix order, as that's what the 160 : // rest of the iterator stack expects. 161 : type ForeignSSTTransformer struct { 162 : Equal base.Equal 163 : SeqNum uint64 164 : sortBuf keyspan.KeysBySuffix 165 : } 166 : 167 : // Transform implements the Transformer interface. 168 : func (f *ForeignSSTTransformer) Transform( 169 : cmp base.Compare, s keyspan.Span, dst *keyspan.Span, 170 1 : ) error { 171 1 : // Apply shadowing of keys. 172 1 : dst.Start = s.Start 173 1 : dst.End = s.End 174 1 : f.sortBuf = keyspan.KeysBySuffix{ 175 1 : Cmp: cmp, 176 1 : Keys: f.sortBuf.Keys[:0], 177 1 : } 178 1 : CoalesceIntoKeysBySuffix(f.Equal, &f.sortBuf, math.MaxUint64, s.Keys) 179 1 : keys := f.sortBuf.Keys 180 1 : dst.Keys = dst.Keys[:0] 181 1 : for i := range keys { 182 1 : switch keys[i].Kind() { 183 1 : case base.InternalKeyKindRangeKeySet: 184 1 : if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 { 185 0 : panic("pebble: keys unexpectedly not in ascending suffix order") 186 : } 187 1 : case base.InternalKeyKindRangeKeyUnset: 188 1 : if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 { 189 0 : panic("pebble: keys unexpectedly not in ascending suffix order") 190 : } 191 1 : case base.InternalKeyKindRangeKeyDelete: 192 : // Nothing to do. 193 0 : default: 194 0 : return base.CorruptionErrorf("pebble: unrecognized range key kind %s", keys[i].Kind()) 195 : } 196 1 : dst.Keys = append(dst.Keys, keyspan.Key{ 197 1 : Trailer: base.MakeTrailer(f.SeqNum, keys[i].Kind()), 198 1 : Suffix: keys[i].Suffix, 199 1 : Value: keys[i].Value, 200 1 : }) 201 : } 202 : // coalesce results in dst.Keys being sorted by Suffix. Change it back to 203 : // ByTrailerDesc, as that's what the iterator stack will expect. 204 1 : keyspan.SortKeysByTrailer(&dst.Keys) 205 1 : dst.KeysOrder = keyspan.ByTrailerDesc 206 1 : return nil 207 : }