Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspan // import "github.com/cockroachdb/pebble/internal/keyspan"
6 :
7 : import (
8 : "bytes"
9 : "fmt"
10 : "sort"
11 : "strconv"
12 : "strings"
13 : "unicode"
14 :
15 : "github.com/cockroachdb/pebble/internal/base"
16 : )
17 :
18 : // Span represents a set of keys over a span of user key space. All of the keys
19 : // within a Span are applied across the span's key span indicated by Start and
20 : // End. Each internal key applied over the user key span appears as a separate
21 : // Key, with its own kind and sequence number. Optionally, each Key may also
22 : // have a Suffix and/or Value.
23 : //
24 : // Note that the start user key is inclusive and the end user key is exclusive.
25 : //
26 : // Currently the only supported key kinds are:
27 : //
28 : // RANGEDEL, RANGEKEYSET, RANGEKEYUNSET, RANGEKEYDEL.
29 : type Span struct {
30 : // Start and End encode the user key range of all the contained items, with
31 : // an inclusive start key and exclusive end key. Both Start and End must be
32 : // non-nil, or both nil if representing an invalid Span.
33 : Start, End []byte
34 : // Keys holds the set of keys applied over the [Start, End) user key range.
35 : // Keys is sorted by (SeqNum, Kind) descending, unless otherwise specified
36 : // by the context. If SeqNum and Kind are equal, the order of Keys is
37 : // undefined. Keys may be empty, even if Start and End are non-nil.
38 : //
39 : // Keys are a decoded representation of the internal keys stored in batches
40 : // or sstable blocks. A single internal key in a range key block may produce
41 : // several decoded Keys.
42 : Keys []Key
43 : KeysOrder KeysOrder
44 : }
45 :
46 : // KeysOrder describes the ordering of Keys within a Span.
47 : type KeysOrder int8
48 :
49 : const (
50 : // ByTrailerDesc indicates a Span's keys are sorted by Trailer descending.
51 : // This is the default ordering, and the ordering used during physical
52 : // storage.
53 : ByTrailerDesc KeysOrder = iota
54 : // BySuffixAsc indicates a Span's keys are sorted by Suffix ascending. This
55 : // ordering is used during user iteration of range keys.
56 : BySuffixAsc
57 : )
58 :
59 : // Key represents a single key applied over a span of user keys. A Key is
60 : // contained by a Span which specifies the span of user keys over which the Key
61 : // is applied.
62 : type Key struct {
63 : // Trailer contains the key kind and sequence number.
64 : Trailer uint64
65 : // Suffix holds an optional suffix associated with the key. This is only
66 : // non-nil for RANGEKEYSET and RANGEKEYUNSET keys.
67 : Suffix []byte
68 : // Value holds a logical value associated with the Key. It is NOT the
69 : // internal value stored in a range key or range deletion block. This is
70 : // only non-nil for RANGEKEYSET keys.
71 : Value []byte
72 : }
73 :
74 : // SeqNum returns the sequence number component of the key.
75 1 : func (k Key) SeqNum() uint64 {
76 1 : return k.Trailer >> 8
77 1 : }
78 :
79 : // VisibleAt returns true if the provided key is visible at the provided
80 : // snapshot sequence number. It interprets batch sequence numbers as always
81 : // visible, because non-visible batch span keys are filtered when they're
82 : // fragmented.
83 1 : func (k Key) VisibleAt(snapshot uint64) bool {
84 1 : seq := k.SeqNum()
85 1 : return seq < snapshot || seq&base.InternalKeySeqNumBatch != 0
86 1 : }
87 :
88 : // Kind returns the kind component of the key.
89 1 : func (k Key) Kind() base.InternalKeyKind {
90 1 : return base.InternalKeyKind(k.Trailer & 0xff)
91 1 : }
92 :
93 : // Equal returns true if this Key is equal to the given key. Two keys are said
94 : // to be equal if the two Keys have equal trailers, suffix and value. Suffix
95 : // comparison uses the provided base.Compare func. Value comparison is bytewise.
96 1 : func (k Key) Equal(equal base.Equal, b Key) bool {
97 1 : return k.Trailer == b.Trailer &&
98 1 : equal(k.Suffix, b.Suffix) &&
99 1 : bytes.Equal(k.Value, b.Value)
100 1 : }
101 :
102 : // Valid returns true if the span is defined.
103 1 : func (s *Span) Valid() bool {
104 1 : return s.Start != nil && s.End != nil
105 1 : }
106 :
107 : // Empty returns true if the span does not contain any keys. An empty span may
108 : // still be Valid. A non-empty span must be Valid.
109 : //
110 : // An Empty span may be produced by Visible, or be produced by iterators in
111 : // order to surface the gaps between keys.
112 1 : func (s *Span) Empty() bool {
113 1 : return s == nil || len(s.Keys) == 0
114 1 : }
115 :
116 : // SmallestKey returns the smallest internal key defined by the span's keys.
117 : // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
118 : // contains no keys or its keys are sorted in a different order.
119 1 : func (s *Span) SmallestKey() base.InternalKey {
120 1 : if len(s.Keys) == 0 {
121 0 : panic("pebble: Span contains no keys")
122 1 : } else if s.KeysOrder != ByTrailerDesc {
123 0 : panic("pebble: span's keys unexpectedly not in trailer order")
124 : }
125 : // The first key has the highest (sequence number,kind) tuple.
126 1 : return base.InternalKey{
127 1 : UserKey: s.Start,
128 1 : Trailer: s.Keys[0].Trailer,
129 1 : }
130 : }
131 :
132 : // LargestKey returns the largest internal key defined by the span's keys. The
133 : // returned key will always be a "sentinel key" at the end boundary. The
134 : // "sentinel key" models the exclusive end boundary by returning an InternalKey
135 : // with the maximal sequence number, ensuring all InternalKeys with the same
136 : // user key sort after the sentinel key.
137 : //
138 : // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
139 : // contains no keys or its keys are sorted in a different order.
140 1 : func (s *Span) LargestKey() base.InternalKey {
141 1 : if len(s.Keys) == 0 {
142 0 : panic("pebble: Span contains no keys")
143 1 : } else if s.KeysOrder != ByTrailerDesc {
144 0 : panic("pebble: span's keys unexpectedly not in trailer order")
145 : }
146 : // The last key has the lowest (sequence number,kind) tuple.
147 1 : kind := s.Keys[len(s.Keys)-1].Kind()
148 1 : return base.MakeExclusiveSentinelKey(kind, s.End)
149 : }
150 :
151 : // SmallestSeqNum returns the smallest sequence number of a key contained within
152 : // the span. It requires the Span's keys be in ByTrailerDesc order. It panics if
153 : // the span contains no keys or its keys are sorted in a different order.
154 1 : func (s *Span) SmallestSeqNum() uint64 {
155 1 : if len(s.Keys) == 0 {
156 0 : panic("pebble: Span contains no keys")
157 1 : } else if s.KeysOrder != ByTrailerDesc {
158 0 : panic("pebble: span's keys unexpectedly not in trailer order")
159 : }
160 :
161 1 : return s.Keys[len(s.Keys)-1].SeqNum()
162 : }
163 :
164 : // LargestSeqNum returns the largest sequence number of a key contained within
165 : // the span. It requires the Span's keys be in ByTrailerDesc order. It panics if
166 : // the span contains no keys or its keys are sorted in a different order.
167 1 : func (s *Span) LargestSeqNum() uint64 {
168 1 : if len(s.Keys) == 0 {
169 0 : panic("pebble: Span contains no keys")
170 1 : } else if s.KeysOrder != ByTrailerDesc {
171 0 : panic("pebble: span's keys unexpectedly not in trailer order")
172 : }
173 1 : return s.Keys[0].SeqNum()
174 : }
175 :
176 : // TODO(jackson): Replace most of the calls to Visible with more targeted calls
177 : // that avoid the need to construct a new Span.
178 :
179 : // Visible returns a span with the subset of keys visible at the provided
180 : // sequence number. It requires the Span's keys be in ByTrailerDesc order. It
181 : // panics if the span's keys are sorted in a different order.
182 : //
183 : // Visible may incur an allocation, so callers should prefer targeted,
184 : // non-allocating methods when possible.
185 1 : func (s Span) Visible(snapshot uint64) Span {
186 1 : if s.KeysOrder != ByTrailerDesc {
187 0 : panic("pebble: span's keys unexpectedly not in trailer order")
188 : }
189 :
190 1 : ret := Span{Start: s.Start, End: s.End}
191 1 : if len(s.Keys) == 0 {
192 0 : return ret
193 0 : }
194 :
195 : // Keys from indexed batches may force an allocation. The Keys slice is
196 : // ordered by sequence number, so ordinarily we can return the trailing
197 : // subslice containing keys with sequence numbers less than `seqNum`.
198 : //
199 : // However, batch keys are special. Only visible batch keys are included
200 : // when an Iterator's batch spans are fragmented. They must always be
201 : // visible.
202 : //
203 : // Batch keys can create a sandwich of visible batch keys at the beginning
204 : // of the slice and visible committed keys at the end of the slice, forcing
205 : // us to allocate a new slice and copy the contents.
206 : //
207 : // Care is taking to only incur an allocation only when batch keys and
208 : // visible keys actually sandwich non-visible keys.
209 :
210 : // lastBatchIdx and lastNonVisibleIdx are set to the last index of a batch
211 : // key and a non-visible key respectively.
212 1 : lastBatchIdx := -1
213 1 : lastNonVisibleIdx := -1
214 1 : for i := range s.Keys {
215 1 : if seqNum := s.Keys[i].SeqNum(); seqNum&base.InternalKeySeqNumBatch != 0 {
216 0 : // Batch key. Always visible.
217 0 : lastBatchIdx = i
218 1 : } else if seqNum >= snapshot {
219 1 : // This key is not visible.
220 1 : lastNonVisibleIdx = i
221 1 : }
222 : }
223 :
224 : // In the following comments: b = batch, h = hidden, v = visible (committed).
225 1 : switch {
226 1 : case lastNonVisibleIdx == -1:
227 1 : // All keys are visible.
228 1 : //
229 1 : // [b b b], [v v v] and [b b b v v v]
230 1 : ret.Keys = s.Keys
231 1 : case lastBatchIdx == -1:
232 1 : // There are no batch keys, so we can return the continuous subslice
233 1 : // starting after the last non-visible Key.
234 1 : //
235 1 : // h h h [v v v]
236 1 : ret.Keys = s.Keys[lastNonVisibleIdx+1:]
237 0 : case lastNonVisibleIdx == len(s.Keys)-1:
238 0 : // While we have a batch key and non-visible keys, there are no
239 0 : // committed visible keys. The 'sandwich' is missing the bottom layer,
240 0 : // so we can return the continuous sublice at the beginning.
241 0 : //
242 0 : // [b b b] h h h
243 0 : ret.Keys = s.Keys[0 : lastBatchIdx+1]
244 0 : default:
245 0 : // This is the problematic sandwich case. Allocate a new slice, copying
246 0 : // the batch keys and the visible keys into it.
247 0 : //
248 0 : // [b b b] h h h [v v v]
249 0 : ret.Keys = make([]Key, (lastBatchIdx+1)+(len(s.Keys)-lastNonVisibleIdx-1))
250 0 : copy(ret.Keys, s.Keys[:lastBatchIdx+1])
251 0 : copy(ret.Keys[lastBatchIdx+1:], s.Keys[lastNonVisibleIdx+1:])
252 : }
253 1 : return ret
254 : }
255 :
256 : // VisibleAt returns true if the span contains a key visible at the provided
257 : // snapshot. Keys with sequence numbers with the batch bit set are treated as
258 : // always visible.
259 : //
260 : // VisibleAt requires the Span's keys be in ByTrailerDesc order. It panics if
261 : // the span's keys are sorted in a different order.
262 1 : func (s *Span) VisibleAt(snapshot uint64) bool {
263 1 : if s.KeysOrder != ByTrailerDesc {
264 0 : panic("pebble: span's keys unexpectedly not in trailer order")
265 : }
266 1 : if len(s.Keys) == 0 {
267 0 : return false
268 1 : } else if first := s.Keys[0].SeqNum(); first&base.InternalKeySeqNumBatch != 0 {
269 1 : // Only visible batch keys are included when an Iterator's batch spans
270 1 : // are fragmented. They must always be visible.
271 1 : return true
272 1 : } else {
273 1 : // Otherwise we check the last key. Since keys are ordered decreasing in
274 1 : // sequence number, the last key has the lowest sequence number of any
275 1 : // of the span's keys. If any of the keys are visible, the last key must
276 1 : // be visible. Or put differently: if the last key is not visible, then
277 1 : // no key is visible.
278 1 : return s.Keys[len(s.Keys)-1].SeqNum() < snapshot
279 1 : }
280 : }
281 :
282 : // ShallowClone returns the span with a Keys slice owned by the span itself.
283 : // None of the key byte slices are cloned (see Span.DeepClone).
284 0 : func (s *Span) ShallowClone() Span {
285 0 : c := Span{
286 0 : Start: s.Start,
287 0 : End: s.End,
288 0 : Keys: make([]Key, len(s.Keys)),
289 0 : KeysOrder: s.KeysOrder,
290 0 : }
291 0 : copy(c.Keys, s.Keys)
292 0 : return c
293 0 : }
294 :
295 : // DeepClone clones the span, creating copies of all contained slices. DeepClone
296 : // is intended for non-production code paths like tests, the level checker, etc
297 : // because it is allocation heavy.
298 1 : func (s *Span) DeepClone() Span {
299 1 : c := Span{
300 1 : Start: make([]byte, len(s.Start)),
301 1 : End: make([]byte, len(s.End)),
302 1 : Keys: make([]Key, len(s.Keys)),
303 1 : KeysOrder: s.KeysOrder,
304 1 : }
305 1 : copy(c.Start, s.Start)
306 1 : copy(c.End, s.End)
307 1 : for i := range s.Keys {
308 1 : c.Keys[i].Trailer = s.Keys[i].Trailer
309 1 : if len(s.Keys[i].Suffix) > 0 {
310 0 : c.Keys[i].Suffix = make([]byte, len(s.Keys[i].Suffix))
311 0 : copy(c.Keys[i].Suffix, s.Keys[i].Suffix)
312 0 : }
313 1 : if len(s.Keys[i].Value) > 0 {
314 0 : c.Keys[i].Value = make([]byte, len(s.Keys[i].Value))
315 0 : copy(c.Keys[i].Value, s.Keys[i].Value)
316 0 : }
317 : }
318 1 : return c
319 : }
320 :
321 : // Contains returns true if the specified key resides within the span's bounds.
322 1 : func (s *Span) Contains(cmp base.Compare, key []byte) bool {
323 1 : return cmp(s.Start, key) <= 0 && cmp(key, s.End) < 0
324 1 : }
325 :
326 : // Covers returns true if the span covers keys at seqNum.
327 : //
328 : // Covers requires the Span's keys be in ByTrailerDesc order. It panics if the
329 : // span's keys are sorted in a different order.
330 0 : func (s Span) Covers(seqNum uint64) bool {
331 0 : if s.KeysOrder != ByTrailerDesc {
332 0 : panic("pebble: span's keys unexpectedly not in trailer order")
333 : }
334 0 : return !s.Empty() && s.Keys[0].SeqNum() > seqNum
335 : }
336 :
337 : // CoversAt returns true if the span contains a key that is visible at the
338 : // provided snapshot sequence number, and that key's sequence number is higher
339 : // than seqNum.
340 : //
341 : // Keys with sequence numbers with the batch bit set are treated as always
342 : // visible.
343 : //
344 : // CoversAt requires the Span's keys be in ByTrailerDesc order. It panics if the
345 : // span's keys are sorted in a different order.
346 1 : func (s *Span) CoversAt(snapshot, seqNum uint64) bool {
347 1 : if s.KeysOrder != ByTrailerDesc {
348 0 : panic("pebble: span's keys unexpectedly not in trailer order")
349 : }
350 : // NB: A key is visible at `snapshot` if its sequence number is strictly
351 : // less than `snapshot`. See base.Visible.
352 1 : for i := range s.Keys {
353 1 : if kseq := s.Keys[i].SeqNum(); kseq&base.InternalKeySeqNumBatch != 0 {
354 1 : // Only visible batch keys are included when an Iterator's batch spans
355 1 : // are fragmented. They must always be visible.
356 1 : return kseq > seqNum
357 1 : } else if kseq < snapshot {
358 1 : return kseq > seqNum
359 1 : }
360 : }
361 1 : return false
362 : }
363 :
364 : // String returns a string representation of the span.
365 0 : func (s Span) String() string {
366 0 : return fmt.Sprint(prettySpan{Span: s, formatKey: base.DefaultFormatter})
367 0 : }
368 :
369 : // Pretty returns a formatter for the span.
370 0 : func (s Span) Pretty(f base.FormatKey) fmt.Formatter {
371 0 : // TODO(jackson): Take a base.FormatValue to format Key.Value too.
372 0 : return prettySpan{s, f}
373 0 : }
374 :
375 : type prettySpan struct {
376 : Span
377 : formatKey base.FormatKey
378 : }
379 :
380 0 : func (s prettySpan) Format(fs fmt.State, c rune) {
381 0 : if !s.Valid() {
382 0 : fmt.Fprintf(fs, "<invalid>")
383 0 : return
384 0 : }
385 0 : fmt.Fprintf(fs, "%s-%s:{", s.formatKey(s.Start), s.formatKey(s.End))
386 0 : for i, k := range s.Keys {
387 0 : if i > 0 {
388 0 : fmt.Fprint(fs, " ")
389 0 : }
390 0 : fmt.Fprintf(fs, "(#%d,%s", k.SeqNum(), k.Kind())
391 0 : if len(k.Suffix) > 0 || len(k.Value) > 0 {
392 0 : fmt.Fprintf(fs, ",%s", k.Suffix)
393 0 : }
394 0 : if len(k.Value) > 0 {
395 0 : fmt.Fprintf(fs, ",%s", k.Value)
396 0 : }
397 0 : fmt.Fprint(fs, ")")
398 : }
399 0 : fmt.Fprintf(fs, "}")
400 : }
401 :
402 : // SortKeysByTrailer sorts a keys slice by trailer.
403 1 : func SortKeysByTrailer(keys *[]Key) {
404 1 : // NB: keys is a pointer to a slice instead of a slice to avoid `sorted`
405 1 : // escaping to the heap.
406 1 : sorted := (*keysBySeqNumKind)(keys)
407 1 : sort.Sort(sorted)
408 1 : }
409 :
410 : // KeysBySuffix implements sort.Interface, sorting its member Keys slice to by
411 : // Suffix in the order dictated by Cmp.
412 : type KeysBySuffix struct {
413 : Cmp base.Compare
414 : Keys []Key
415 : }
416 :
417 1 : func (s *KeysBySuffix) Len() int { return len(s.Keys) }
418 1 : func (s *KeysBySuffix) Less(i, j int) bool { return s.Cmp(s.Keys[i].Suffix, s.Keys[j].Suffix) < 0 }
419 1 : func (s *KeysBySuffix) Swap(i, j int) { s.Keys[i], s.Keys[j] = s.Keys[j], s.Keys[i] }
420 :
421 : // ParseSpan parses the string representation of a Span. It's intended for
422 : // tests. ParseSpan panics if passed a malformed span representation.
423 0 : func ParseSpan(input string) Span {
424 0 : var s Span
425 0 : parts := strings.FieldsFunc(input, func(r rune) bool {
426 0 : switch r {
427 0 : case '-', ':', '{', '}':
428 0 : return true
429 0 : default:
430 0 : return unicode.IsSpace(r)
431 : }
432 : })
433 0 : s.Start, s.End = []byte(parts[0]), []byte(parts[1])
434 0 :
435 0 : // Each of the remaining parts represents a single Key.
436 0 : s.Keys = make([]Key, 0, len(parts)-2)
437 0 : for _, p := range parts[2:] {
438 0 : keyFields := strings.FieldsFunc(p, func(r rune) bool {
439 0 : switch r {
440 0 : case '#', ',', '(', ')':
441 0 : return true
442 0 : default:
443 0 : return unicode.IsSpace(r)
444 : }
445 : })
446 :
447 0 : var k Key
448 0 : // Parse the sequence number.
449 0 : seqNum, err := strconv.ParseUint(keyFields[0], 10, 64)
450 0 : if err != nil {
451 0 : panic(fmt.Sprintf("invalid sequence number: %q: %s", keyFields[0], err))
452 : }
453 : // Parse the key kind.
454 0 : kind := base.ParseKind(keyFields[1])
455 0 : k.Trailer = base.MakeTrailer(seqNum, kind)
456 0 : // Parse the optional suffix.
457 0 : if len(keyFields) >= 3 {
458 0 : k.Suffix = []byte(keyFields[2])
459 0 : }
460 : // Parse the optional value.
461 0 : if len(keyFields) >= 4 {
462 0 : k.Value = []byte(keyFields[3])
463 0 : }
464 0 : s.Keys = append(s.Keys, k)
465 : }
466 0 : return s
467 : }
|