Line data Source code
1 : // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package keyspan // import "github.com/cockroachdb/pebble/internal/keyspan"
6 :
7 : import (
8 : "bytes"
9 : "fmt"
10 : "sort"
11 : "strconv"
12 : "strings"
13 : "unicode"
14 :
15 : "github.com/cockroachdb/pebble/internal/base"
16 : )
17 :
18 : // Span represents a set of keys over a span of user key space. All of the keys
19 : // within a Span are applied across the span's key span indicated by Start and
20 : // End. Each internal key applied over the user key span appears as a separate
21 : // Key, with its own kind and sequence number. Optionally, each Key may also
22 : // have a Suffix and/or Value.
23 : //
24 : // Note that the start user key is inclusive and the end user key is exclusive.
25 : //
26 : // Currently the only supported key kinds are:
27 : //
28 : // RANGEDEL, RANGEKEYSET, RANGEKEYUNSET, RANGEKEYDEL.
29 : type Span struct {
30 : // Start and End encode the user key range of all the contained items, with
31 : // an inclusive start key and exclusive end key. Both Start and End must be
32 : // non-nil, or both nil if representing an invalid Span.
33 : Start, End []byte
34 : // Keys holds the set of keys applied over the [Start, End) user key range.
35 : // Keys is sorted by (SeqNum, Kind) descending, unless otherwise specified
36 : // by the context. If SeqNum and Kind are equal, the order of Keys is
37 : // undefined. Keys may be empty, even if Start and End are non-nil.
38 : //
39 : // Keys are a decoded representation of the internal keys stored in batches
40 : // or sstable blocks. A single internal key in a range key block may produce
41 : // several decoded Keys.
42 : Keys []Key
43 : KeysOrder KeysOrder
44 : }
45 :
46 : // KeysOrder describes the ordering of Keys within a Span.
47 : type KeysOrder int8
48 :
49 : const (
50 : // ByTrailerDesc indicates a Span's keys are sorted by Trailer descending.
51 : // This is the default ordering, and the ordering used during physical
52 : // storage.
53 : ByTrailerDesc KeysOrder = iota
54 : // BySuffixAsc indicates a Span's keys are sorted by Suffix ascending. This
55 : // ordering is used during user iteration of range keys.
56 : BySuffixAsc
57 : )
58 :
59 : // Key represents a single key applied over a span of user keys. A Key is
60 : // contained by a Span which specifies the span of user keys over which the Key
61 : // is applied.
62 : type Key struct {
63 : // Trailer contains the key kind and sequence number.
64 : Trailer uint64
65 : // Suffix holds an optional suffix associated with the key. This is only
66 : // non-nil for RANGEKEYSET and RANGEKEYUNSET keys.
67 : Suffix []byte
68 : // Value holds a logical value associated with the Key. It is NOT the
69 : // internal value stored in a range key or range deletion block. This is
70 : // only non-nil for RANGEKEYSET keys.
71 : Value []byte
72 : }
73 :
74 : // SeqNum returns the sequence number component of the key.
75 1 : func (k Key) SeqNum() uint64 {
76 1 : return k.Trailer >> 8
77 1 : }
78 :
79 : // VisibleAt returns true if the provided key is visible at the provided
80 : // snapshot sequence number. It interprets batch sequence numbers as always
81 : // visible, because non-visible batch span keys are filtered when they're
82 : // fragmented.
83 1 : func (k Key) VisibleAt(snapshot uint64) bool {
84 1 : seq := k.SeqNum()
85 1 : return seq < snapshot || seq&base.InternalKeySeqNumBatch != 0
86 1 : }
87 :
88 : // Kind returns the kind component of the key.
89 1 : func (k Key) Kind() base.InternalKeyKind {
90 1 : return base.InternalKeyKind(k.Trailer & 0xff)
91 1 : }
92 :
93 : // Equal returns true if this Key is equal to the given key. Two keys are said
94 : // to be equal if the two Keys have equal trailers, suffix and value. Suffix
95 : // comparison uses the provided base.Compare func. Value comparison is bytewise.
96 1 : func (k Key) Equal(equal base.Equal, b Key) bool {
97 1 : return k.Trailer == b.Trailer &&
98 1 : equal(k.Suffix, b.Suffix) &&
99 1 : bytes.Equal(k.Value, b.Value)
100 1 : }
101 :
102 0 : func (k Key) String() string {
103 0 : var b strings.Builder
104 0 : fmt.Fprintf(&b, "(#%d,%s", k.SeqNum(), k.Kind())
105 0 : if len(k.Suffix) > 0 || len(k.Value) > 0 {
106 0 : fmt.Fprintf(&b, ",%s", k.Suffix)
107 0 : }
108 0 : if len(k.Value) > 0 {
109 0 : fmt.Fprintf(&b, ",%s", k.Value)
110 0 : }
111 0 : b.WriteString(")")
112 0 : return b.String()
113 : }
114 :
115 : // Valid returns true if the span is defined.
116 1 : func (s *Span) Valid() bool {
117 1 : return s.Start != nil && s.End != nil
118 1 : }
119 :
120 : // Empty returns true if the span does not contain any keys. An empty span may
121 : // still be Valid. A non-empty span must be Valid.
122 : //
123 : // An Empty span may be produced by Visible, or be produced by iterators in
124 : // order to surface the gaps between keys.
125 1 : func (s *Span) Empty() bool {
126 1 : return s == nil || len(s.Keys) == 0
127 1 : }
128 :
129 : // Bounds returns Start and End as UserKeyBounds.
130 0 : func (s *Span) Bounds() base.UserKeyBounds {
131 0 : return base.UserKeyBoundsEndExclusive(s.Start, s.End)
132 0 : }
133 :
134 : // SmallestKey returns the smallest internal key defined by the span's keys.
135 : // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
136 : // contains no keys or its keys are sorted in a different order.
137 1 : func (s *Span) SmallestKey() base.InternalKey {
138 1 : if len(s.Keys) == 0 {
139 0 : panic("pebble: Span contains no keys")
140 1 : } else if s.KeysOrder != ByTrailerDesc {
141 0 : panic("pebble: span's keys unexpectedly not in trailer order")
142 : }
143 : // The first key has the highest (sequence number,kind) tuple.
144 1 : return base.InternalKey{
145 1 : UserKey: s.Start,
146 1 : Trailer: s.Keys[0].Trailer,
147 1 : }
148 : }
149 :
150 : // LargestKey returns the largest internal key defined by the span's keys. The
151 : // returned key will always be a "sentinel key" at the end boundary. The
152 : // "sentinel key" models the exclusive end boundary by returning an InternalKey
153 : // with the maximal sequence number, ensuring all InternalKeys with the same
154 : // user key sort after the sentinel key.
155 : //
156 : // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
157 : // contains no keys or its keys are sorted in a different order.
158 1 : func (s *Span) LargestKey() base.InternalKey {
159 1 : if len(s.Keys) == 0 {
160 0 : panic("pebble: Span contains no keys")
161 1 : } else if s.KeysOrder != ByTrailerDesc {
162 0 : panic("pebble: span's keys unexpectedly not in trailer order")
163 : }
164 : // The last key has the lowest (sequence number,kind) tuple.
165 1 : kind := s.Keys[len(s.Keys)-1].Kind()
166 1 : return base.MakeExclusiveSentinelKey(kind, s.End)
167 : }
168 :
169 : // SmallestSeqNum returns the smallest sequence number of a key contained within
170 : // the span. It requires the Span's keys be in ByTrailerDesc order. It panics if
171 : // the span contains no keys or its keys are sorted in a different order.
172 1 : func (s *Span) SmallestSeqNum() uint64 {
173 1 : if len(s.Keys) == 0 {
174 0 : panic("pebble: Span contains no keys")
175 1 : } else if s.KeysOrder != ByTrailerDesc {
176 0 : panic("pebble: span's keys unexpectedly not in trailer order")
177 : }
178 :
179 1 : return s.Keys[len(s.Keys)-1].SeqNum()
180 : }
181 :
182 : // LargestSeqNum returns the largest sequence number of a key contained within
183 : // the span. It requires the Span's keys be in ByTrailerDesc order. It panics if
184 : // the span contains no keys or its keys are sorted in a different order.
185 1 : func (s *Span) LargestSeqNum() uint64 {
186 1 : if len(s.Keys) == 0 {
187 0 : panic("pebble: Span contains no keys")
188 1 : } else if s.KeysOrder != ByTrailerDesc {
189 0 : panic("pebble: span's keys unexpectedly not in trailer order")
190 : }
191 1 : return s.Keys[0].SeqNum()
192 : }
193 :
194 : // LargestVisibleSeqNum returns the largest sequence number of a key contained
195 : // within the span that's also visible at the provided snapshot sequence number.
196 : // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
197 : // contains no keys or its keys are sorted in a different order.
198 1 : func (s *Span) LargestVisibleSeqNum(snapshot uint64) (largest uint64, ok bool) {
199 1 : if s == nil {
200 1 : return 0, false
201 1 : } else if len(s.Keys) == 0 {
202 0 : panic("pebble: Span contains no keys")
203 1 : } else if s.KeysOrder != ByTrailerDesc {
204 0 : panic("pebble: span's keys unexpectedly not in trailer order")
205 : }
206 1 : for i := range s.Keys {
207 1 : if s.Keys[i].VisibleAt(snapshot) {
208 1 : return s.Keys[i].SeqNum(), true
209 1 : }
210 : }
211 1 : return 0, false
212 : }
213 :
214 : // TODO(jackson): Replace most of the calls to Visible with more targeted calls
215 : // that avoid the need to construct a new Span.
216 :
217 : // Visible returns a span with the subset of keys visible at the provided
218 : // sequence number. It requires the Span's keys be in ByTrailerDesc order. It
219 : // panics if the span's keys are sorted in a different order.
220 : //
221 : // Visible may incur an allocation, so callers should prefer targeted,
222 : // non-allocating methods when possible.
223 1 : func (s Span) Visible(snapshot uint64) Span {
224 1 : if s.KeysOrder != ByTrailerDesc {
225 0 : panic("pebble: span's keys unexpectedly not in trailer order")
226 : }
227 :
228 1 : ret := Span{Start: s.Start, End: s.End}
229 1 : if len(s.Keys) == 0 {
230 0 : return ret
231 0 : }
232 :
233 : // Keys from indexed batches may force an allocation. The Keys slice is
234 : // ordered by sequence number, so ordinarily we can return the trailing
235 : // subslice containing keys with sequence numbers less than `seqNum`.
236 : //
237 : // However, batch keys are special. Only visible batch keys are included
238 : // when an Iterator's batch spans are fragmented. They must always be
239 : // visible.
240 : //
241 : // Batch keys can create a sandwich of visible batch keys at the beginning
242 : // of the slice and visible committed keys at the end of the slice, forcing
243 : // us to allocate a new slice and copy the contents.
244 : //
245 : // Care is taking to only incur an allocation only when batch keys and
246 : // visible keys actually sandwich non-visible keys.
247 :
248 : // lastBatchIdx and lastNonVisibleIdx are set to the last index of a batch
249 : // key and a non-visible key respectively.
250 1 : lastBatchIdx := -1
251 1 : lastNonVisibleIdx := -1
252 1 : for i := range s.Keys {
253 1 : if seqNum := s.Keys[i].SeqNum(); seqNum&base.InternalKeySeqNumBatch != 0 {
254 0 : // Batch key. Always visible.
255 0 : lastBatchIdx = i
256 1 : } else if seqNum >= snapshot {
257 1 : // This key is not visible.
258 1 : lastNonVisibleIdx = i
259 1 : }
260 : }
261 :
262 : // In the following comments: b = batch, h = hidden, v = visible (committed).
263 1 : switch {
264 1 : case lastNonVisibleIdx == -1:
265 1 : // All keys are visible.
266 1 : //
267 1 : // [b b b], [v v v] and [b b b v v v]
268 1 : ret.Keys = s.Keys
269 1 : case lastBatchIdx == -1:
270 1 : // There are no batch keys, so we can return the continuous subslice
271 1 : // starting after the last non-visible Key.
272 1 : //
273 1 : // h h h [v v v]
274 1 : ret.Keys = s.Keys[lastNonVisibleIdx+1:]
275 0 : case lastNonVisibleIdx == len(s.Keys)-1:
276 0 : // While we have a batch key and non-visible keys, there are no
277 0 : // committed visible keys. The 'sandwich' is missing the bottom layer,
278 0 : // so we can return the continuous sublice at the beginning.
279 0 : //
280 0 : // [b b b] h h h
281 0 : ret.Keys = s.Keys[0 : lastBatchIdx+1]
282 0 : default:
283 0 : // This is the problematic sandwich case. Allocate a new slice, copying
284 0 : // the batch keys and the visible keys into it.
285 0 : //
286 0 : // [b b b] h h h [v v v]
287 0 : ret.Keys = make([]Key, (lastBatchIdx+1)+(len(s.Keys)-lastNonVisibleIdx-1))
288 0 : copy(ret.Keys, s.Keys[:lastBatchIdx+1])
289 0 : copy(ret.Keys[lastBatchIdx+1:], s.Keys[lastNonVisibleIdx+1:])
290 : }
291 1 : return ret
292 : }
293 :
294 : // VisibleAt returns true if the span contains a key visible at the provided
295 : // snapshot. Keys with sequence numbers with the batch bit set are treated as
296 : // always visible.
297 : //
298 : // VisibleAt requires the Span's keys be in ByTrailerDesc order. It panics if
299 : // the span's keys are sorted in a different order.
300 1 : func (s *Span) VisibleAt(snapshot uint64) bool {
301 1 : if s.KeysOrder != ByTrailerDesc {
302 0 : panic("pebble: span's keys unexpectedly not in trailer order")
303 : }
304 1 : if len(s.Keys) == 0 {
305 0 : return false
306 1 : } else if first := s.Keys[0].SeqNum(); first&base.InternalKeySeqNumBatch != 0 {
307 0 : // Only visible batch keys are included when an Iterator's batch spans
308 0 : // are fragmented. They must always be visible.
309 0 : return true
310 1 : } else {
311 1 : // Otherwise we check the last key. Since keys are ordered decreasing in
312 1 : // sequence number, the last key has the lowest sequence number of any
313 1 : // of the span's keys. If any of the keys are visible, the last key must
314 1 : // be visible. Or put differently: if the last key is not visible, then
315 1 : // no key is visible.
316 1 : return s.Keys[len(s.Keys)-1].SeqNum() < snapshot
317 1 : }
318 : }
319 :
320 : // ShallowClone returns the span with a Keys slice owned by the span itself.
321 : // None of the key byte slices are cloned (see Span.DeepClone).
322 0 : func (s *Span) ShallowClone() Span {
323 0 : c := Span{
324 0 : Start: s.Start,
325 0 : End: s.End,
326 0 : Keys: make([]Key, len(s.Keys)),
327 0 : KeysOrder: s.KeysOrder,
328 0 : }
329 0 : copy(c.Keys, s.Keys)
330 0 : return c
331 0 : }
332 :
333 : // DeepClone clones the span, creating copies of all contained slices. DeepClone
334 : // is intended for non-production code paths like tests, the level checker, etc
335 : // because it is allocation heavy.
336 1 : func (s *Span) DeepClone() Span {
337 1 : c := Span{
338 1 : Start: make([]byte, len(s.Start)),
339 1 : End: make([]byte, len(s.End)),
340 1 : Keys: make([]Key, len(s.Keys)),
341 1 : KeysOrder: s.KeysOrder,
342 1 : }
343 1 : copy(c.Start, s.Start)
344 1 : copy(c.End, s.End)
345 1 : for i := range s.Keys {
346 1 : c.Keys[i].Trailer = s.Keys[i].Trailer
347 1 : if len(s.Keys[i].Suffix) > 0 {
348 0 : c.Keys[i].Suffix = make([]byte, len(s.Keys[i].Suffix))
349 0 : copy(c.Keys[i].Suffix, s.Keys[i].Suffix)
350 0 : }
351 1 : if len(s.Keys[i].Value) > 0 {
352 0 : c.Keys[i].Value = make([]byte, len(s.Keys[i].Value))
353 0 : copy(c.Keys[i].Value, s.Keys[i].Value)
354 0 : }
355 : }
356 1 : return c
357 : }
358 :
359 : // Contains returns true if the specified key resides within the span's bounds.
360 1 : func (s *Span) Contains(cmp base.Compare, key []byte) bool {
361 1 : return cmp(s.Start, key) <= 0 && cmp(key, s.End) < 0
362 1 : }
363 :
364 : // Covers returns true if the span covers keys at seqNum.
365 : //
366 : // Covers requires the Span's keys be in ByTrailerDesc order. It panics if the
367 : // span's keys are sorted in a different order.
368 1 : func (s Span) Covers(seqNum uint64) bool {
369 1 : if s.KeysOrder != ByTrailerDesc {
370 0 : panic("pebble: span's keys unexpectedly not in trailer order")
371 : }
372 1 : return !s.Empty() && s.Keys[0].SeqNum() > seqNum
373 : }
374 :
375 : // CoversAt returns true if the span contains a key that is visible at the
376 : // provided snapshot sequence number, and that key's sequence number is higher
377 : // than seqNum.
378 : //
379 : // Keys with sequence numbers with the batch bit set are treated as always
380 : // visible.
381 : //
382 : // CoversAt requires the Span's keys be in ByTrailerDesc order. It panics if the
383 : // span's keys are sorted in a different order.
384 1 : func (s *Span) CoversAt(snapshot, seqNum uint64) bool {
385 1 : if s.KeysOrder != ByTrailerDesc {
386 0 : panic("pebble: span's keys unexpectedly not in trailer order")
387 : }
388 : // NB: A key is visible at `snapshot` if its sequence number is strictly
389 : // less than `snapshot`. See base.Visible.
390 1 : for i := range s.Keys {
391 1 : if kseq := s.Keys[i].SeqNum(); kseq&base.InternalKeySeqNumBatch != 0 {
392 0 : // Only visible batch keys are included when an Iterator's batch spans
393 0 : // are fragmented. They must always be visible.
394 0 : return kseq > seqNum
395 1 : } else if kseq < snapshot {
396 1 : return kseq > seqNum
397 1 : }
398 : }
399 1 : return false
400 : }
401 :
402 : // Reset clears the span's Start, End, and Keys fields, retaining the slices for
403 : // reuse.
404 1 : func (s *Span) Reset() {
405 1 : s.Start = s.Start[:0]
406 1 : s.End = s.End[:0]
407 1 : s.Keys = s.Keys[:0]
408 1 : }
409 :
410 : // CopyFrom copies the contents of the other span, retaining the slices
411 : // allocated in this span. Key.Suffix and Key.Value slices are not cloned.
412 1 : func (s *Span) CopyFrom(other *Span) {
413 1 : s.Start = append(s.Start[:0], other.Start...)
414 1 : s.End = append(s.End[:0], other.End...)
415 1 : s.Keys = append(s.Keys[:0], other.Keys...)
416 1 : s.KeysOrder = other.KeysOrder
417 1 : }
418 :
419 : // String returns a string representation of the span.
420 0 : func (s Span) String() string {
421 0 : return fmt.Sprint(prettySpan{Span: s, formatKey: base.DefaultFormatter})
422 0 : }
423 :
424 : // Pretty returns a formatter for the span.
425 0 : func (s Span) Pretty(f base.FormatKey) fmt.Formatter {
426 0 : // TODO(jackson): Take a base.FormatValue to format Key.Value too.
427 0 : return prettySpan{s, f}
428 0 : }
429 :
430 : type prettySpan struct {
431 : Span
432 : formatKey base.FormatKey
433 : }
434 :
435 0 : func (s prettySpan) Format(fs fmt.State, c rune) {
436 0 : if !s.Valid() {
437 0 : fmt.Fprintf(fs, "<invalid>")
438 0 : return
439 0 : }
440 0 : fmt.Fprintf(fs, "%s-%s:{", s.formatKey(s.Start), s.formatKey(s.End))
441 0 : for i, k := range s.Keys {
442 0 : if i > 0 {
443 0 : fmt.Fprint(fs, " ")
444 0 : }
445 0 : fmt.Fprint(fs, k.String())
446 : }
447 0 : fmt.Fprintf(fs, "}")
448 : }
449 :
450 : // SortKeysByTrailer sorts a keys slice by trailer.
451 1 : func SortKeysByTrailer(keys *[]Key) {
452 1 : // NB: keys is a pointer to a slice instead of a slice to avoid `sorted`
453 1 : // escaping to the heap.
454 1 : sorted := (*keysBySeqNumKind)(keys)
455 1 : sort.Sort(sorted)
456 1 : }
457 :
458 : // KeysBySuffix implements sort.Interface, sorting its member Keys slice to by
459 : // Suffix in the order dictated by Cmp.
460 : type KeysBySuffix struct {
461 : Cmp base.Compare
462 : Keys []Key
463 : }
464 :
465 1 : func (s *KeysBySuffix) Len() int { return len(s.Keys) }
466 1 : func (s *KeysBySuffix) Less(i, j int) bool { return s.Cmp(s.Keys[i].Suffix, s.Keys[j].Suffix) < 0 }
467 1 : func (s *KeysBySuffix) Swap(i, j int) { s.Keys[i], s.Keys[j] = s.Keys[j], s.Keys[i] }
468 :
469 : // ParseSpan parses the string representation of a Span. It's intended for
470 : // tests. ParseSpan panics if passed a malformed span representation.
471 0 : func ParseSpan(input string) Span {
472 0 : var s Span
473 0 : parts := strings.FieldsFunc(input, func(r rune) bool {
474 0 : switch r {
475 0 : case '-', ':', '{', '}':
476 0 : return true
477 0 : default:
478 0 : return unicode.IsSpace(r)
479 : }
480 : })
481 0 : s.Start, s.End = []byte(parts[0]), []byte(parts[1])
482 0 :
483 0 : // Each of the remaining parts represents a single Key.
484 0 : s.Keys = make([]Key, 0, len(parts)-2)
485 0 : for _, p := range parts[2:] {
486 0 : keyFields := strings.FieldsFunc(p, func(r rune) bool {
487 0 : switch r {
488 0 : case '#', ',', '(', ')':
489 0 : return true
490 0 : default:
491 0 : return unicode.IsSpace(r)
492 : }
493 : })
494 :
495 0 : var k Key
496 0 : // Parse the sequence number.
497 0 : seqNum, err := strconv.ParseUint(keyFields[0], 10, 64)
498 0 : if err != nil {
499 0 : panic(fmt.Sprintf("invalid sequence number: %q: %s", keyFields[0], err))
500 : }
501 : // Parse the key kind.
502 0 : kind := base.ParseKind(keyFields[1])
503 0 : k.Trailer = base.MakeTrailer(seqNum, kind)
504 0 : // Parse the optional suffix.
505 0 : if len(keyFields) >= 3 {
506 0 : k.Suffix = []byte(keyFields[2])
507 0 : }
508 : // Parse the optional value.
509 0 : if len(keyFields) >= 4 {
510 0 : k.Value = []byte(keyFields[3])
511 0 : }
512 0 : s.Keys = append(s.Keys, k)
513 : }
514 0 : for i := 1; i < len(s.Keys); i++ {
515 0 : if s.Keys[i-1].Trailer < s.Keys[i].Trailer {
516 0 : panic(fmt.Sprintf("span keys not sorted: %s %s", s.Keys[i-1], s.Keys[i]))
517 : }
518 : }
519 0 : s.KeysOrder = ByTrailerDesc
520 0 : return s
521 : }
|