Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package base
6 :
7 : import (
8 : "bytes"
9 : "encoding/binary"
10 : "fmt"
11 : "slices"
12 : "strconv"
13 : "unicode/utf8"
14 :
15 : "github.com/cockroachdb/errors"
16 : )
17 :
18 : // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
19 : // to' or 'greater than' b.
20 : //
21 : // Both a and b must be valid keys. Note that because of synthetic prefix
22 : // functionality, the Compare function can be called on a key (either from the
23 : // database or passed as an argument for an iterator operation) after the
24 : // synthetic prefix has been removed. In general, this implies that removing any
25 : // leading bytes from a prefix must yield another valid prefix.
26 : //
27 : // A key a is less than b if a's prefix is byte-wise less than b's prefix, or if
28 : // the prefixes are equal and a's suffix is less than b's suffix (according to
29 : // ComparePointSuffixes).
30 : //
31 : // In other words, if prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]:
32 : //
33 : // Compare(a, b) = bytes.Compare(prefix(a), prefix(b)) if not 0,
34 : // otherwise ComparePointSuffixes(suffix(a), suffix(b))
35 : //
36 : // Compare defaults to using the formula above but it can be customized if there
37 : // is a (potentially faster) specialization or it has to compare suffixes
38 : // differently.
39 : type Compare func(a, b []byte) int
40 :
41 : // CompareRangeSuffixes compares two suffixes where either or both suffix
42 : // originates from a range key and returns -1, 0, or +1.
43 : //
44 : // For historical reasons (see
45 : // https://github.com/cockroachdb/cockroach/issues/130533 for a summary), we
46 : // allow this function to be more strict than Compare. Specifically, Compare may
47 : // treat two suffixes as equal whereas CompareRangeSuffixes might not.
48 : //
49 : // CompareRangeSuffixes is allowed to be more strict than see
50 : // ComparePointSuffixes, meaning it may return -1 or +1 when
51 : // ComparePointSuffixes would return 0.
52 : //
53 : // The empty slice suffix must be 'less than' any non-empty suffix.
54 : type CompareRangeSuffixes func(a, b []byte) int
55 :
56 : // ComparePointSuffixes compares two point key suffixes and returns -1, 0, or +1.
57 : //
58 : // For historical reasons (see
59 : // https://github.com/cockroachdb/cockroach/issues/130533 for a summary), this
60 : // function is distinct from CompareRangeSuffixes. Specifically,
61 : // ComparePointSuffixes may treat two suffixes as equal whereas
62 : // CompareRangeSuffixes might not. Unlike CompareRangeSuffixes, this function
63 : // must agree with Compare.
64 : //
65 : // The empty slice suffix must be 'less than' any non-empty suffix.
66 : //
67 : // A full key k is composed of a prefix k[:Split(k)] and suffix k[Split(k):].
68 : // Suffixes are compared to break ties between equal prefixes.
69 : type ComparePointSuffixes func(a, b []byte) int
70 :
71 : // defaultCompare implements Compare in terms of Split and ComparePointSuffixes, as
72 : // mentioned above.
73 1 : func defaultCompare(split Split, compareSuffixes ComparePointSuffixes, a, b []byte) int {
74 1 : an := split(a)
75 1 : bn := split(b)
76 1 : if prefixCmp := bytes.Compare(a[:an], b[:bn]); prefixCmp != 0 {
77 1 : return prefixCmp
78 1 : }
79 1 : return compareSuffixes(a[an:], b[bn:])
80 : }
81 :
82 : // Equal returns true if a and b are equivalent.
83 : //
84 : // For a given Compare, Equal(a,b)=true iff Compare(a,b)=0; that is, Equal is a
85 : // (potentially faster) specialization of Compare.
86 : type Equal func(a, b []byte) bool
87 :
88 : // AbbreviatedKey returns a fixed length prefix of a user key such that
89 : //
90 : // AbbreviatedKey(a) < AbbreviatedKey(b) implies a < b, and
91 : // AbbreviatedKey(a) > AbbreviatedKey(b) implies a > b.
92 : //
93 : // If AbbreviatedKey(a) == AbbreviatedKey(b), an additional comparison is
94 : // required to determine if the two keys are actually equal.
95 : //
96 : // This helps optimize indexed batch comparisons for cache locality. If a Split
97 : // function is specified, AbbreviatedKey usually returns the first eight bytes
98 : // of the user key prefix in the order that gives the correct ordering.
99 : type AbbreviatedKey func(key []byte) uint64
100 :
101 : // FormatKey returns a formatter for the user key.
102 : type FormatKey func(key []byte) fmt.Formatter
103 :
104 : // DefaultFormatter is the default implementation of user key formatting:
105 : // non-ASCII data is formatted as escaped hexadecimal values.
106 1 : var DefaultFormatter FormatKey = func(key []byte) fmt.Formatter {
107 1 : return FormatBytes(key)
108 1 : }
109 :
110 : // FormatValue returns a formatter for the user value. The key is also specified
111 : // for the value formatter in order to support value formatting that is
112 : // dependent on the key.
113 : type FormatValue func(key, value []byte) fmt.Formatter
114 :
115 : // Separator is used to construct SSTable index blocks. A trivial implementation
116 : // is `return append(dst, a...)`, but appending fewer bytes leads to smaller
117 : // SSTables.
118 : //
119 : // Given keys a, b for which Compare(a, b) < 0, Separator produces a key k such
120 : // that:
121 : //
122 : // 1. Compare(a, k) <= 0, and
123 : // 2. Compare(k, b) < 0.
124 : //
125 : // For example, if a and b are the []byte equivalents of the strings "black" and
126 : // "blue", then the function may append "blb" to dst.
127 : type Separator func(dst, a, b []byte) []byte
128 :
129 : // Successor appends to dst a shortened key k given a key a such that
130 : // Compare(a, k) <= 0. A simple implementation may return a unchanged.
131 : // The appended key k must be valid to pass to Compare.
132 : type Successor func(dst, a []byte) []byte
133 :
134 : // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
135 : // appends to dst the smallest prefix key that is larger than the given prefix a.
136 : //
137 : // ImmediateSuccessor must generate a prefix key k such that:
138 : //
139 : // Split(k) == len(k) and Compare(a, k) < 0
140 : //
141 : // and there exists no representable prefix key k2 such that:
142 : //
143 : // Split(k2) == len(k2) and Compare(a, k2) < 0 and Compare(k2, k) < 0
144 : //
145 : // As an example, an implementation built on the natural byte ordering using
146 : // bytes.Compare could append a `\0` to `a`.
147 : //
148 : // The appended key must be valid to pass to Compare.
149 : type ImmediateSuccessor func(dst, a []byte) []byte
150 :
151 : // Split returns the length of the prefix of the user key that corresponds to
152 : // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
153 : // filters.
154 : //
155 : // The method will only ever be called with valid MVCC keys, that is, keys that
156 : // the user could potentially store in the database. Pebble does not know which
157 : // keys are MVCC keys and which are not, and may call Split on both MVCC keys
158 : // and non-MVCC keys.
159 : //
160 : // A trivial MVCC scheme is one in which Split() returns len(a). This
161 : // corresponds to assigning a constant version to each key in the database. For
162 : // performance reasons, it is preferable to use a `nil` split in this case.
163 : //
164 : // Let prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]. The following
165 : // properties must hold:
166 : //
167 : // 1. A key consisting of just a prefix must sort before all other keys with
168 : // that prefix:
169 : //
170 : // If len(suffix(a)) > 0, then Compare(prefix(a), a) < 0.
171 : //
172 : // 2. Prefixes must be used to order keys before suffixes:
173 : //
174 : // If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0.
175 : // If Compare(prefix(a), prefix(b)) < 0, then Compare(a, b) < 0
176 : //
177 : // 3. Suffixes themselves must be valid keys and comparable, respecting the same
178 : // ordering as within a key:
179 : //
180 : // If Compare(prefix(a), prefix(b)) = 0, then
181 : // Compare(a, b) = ComparePointSuffixes(suffix(a), suffix(b))
182 : type Split func(a []byte) int
183 :
184 : // Prefix returns the prefix of the key k, using s to split the key.
185 1 : func (s Split) Prefix(k []byte) []byte {
186 1 : i := s(k)
187 1 : return k[:i:i]
188 1 : }
189 :
190 : // HasSuffix returns true if the key k has a suffix remaining after
191 : // Split is called on it. For keys where the entirety of the key is
192 : // returned by Split, HasSuffix will return false.
193 1 : func (s Split) HasSuffix(k []byte) bool {
194 1 : return s(k) < len(k)
195 1 : }
196 :
197 : // DefaultSplit is a trivial implementation of Split which always returns the
198 : // full key.
199 0 : var DefaultSplit Split = func(key []byte) int { return len(key) }
200 :
201 : // Comparer defines a total ordering over the space of []byte keys: a 'less
202 : // than' relationship.
203 : type Comparer struct {
204 : // The following must always be specified.
205 : AbbreviatedKey AbbreviatedKey
206 : Separator Separator
207 : Successor Successor
208 :
209 : // ImmediateSuccessor must be specified if range keys are used.
210 : ImmediateSuccessor ImmediateSuccessor
211 :
212 : // Split defaults to a trivial implementation that returns the full key length
213 : // if it is not specified.
214 : Split Split
215 :
216 : // CompareRangeSuffixes defaults to bytes.Compare if it is not specified.
217 : CompareRangeSuffixes CompareRangeSuffixes
218 : // ComparePointSuffixes defaults to bytes.Compare if it is not specified.
219 : ComparePointSuffixes ComparePointSuffixes
220 :
221 : // Compare defaults to a generic implementation that uses Split,
222 : // bytes.Compare, and ComparePointSuffixes if it is not specified.
223 : Compare Compare
224 : // Equal defaults to using Compare() == 0 if it is not specified.
225 : Equal Equal
226 : // FormatKey defaults to the DefaultFormatter if it is not specified.
227 : FormatKey FormatKey
228 :
229 : // FormatValue is optional.
230 : FormatValue FormatValue
231 :
232 : // Name is the name of the comparer.
233 : //
234 : // The on-disk format stores the comparer name, and opening a database with a
235 : // different comparer from the one it was created with will result in an
236 : // error.
237 : Name string
238 : }
239 :
240 : // EnsureDefaults ensures that all non-optional fields are set.
241 : //
242 : // If c is nil, returns DefaultComparer.
243 : //
244 : // If any fields need to be set, returns a modified copy of c.
245 1 : func (c *Comparer) EnsureDefaults() *Comparer {
246 1 : if c == nil {
247 0 : return DefaultComparer
248 0 : }
249 1 : if c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
250 0 : panic("invalid Comparer: mandatory field not set")
251 : }
252 1 : if c.CompareRangeSuffixes != nil && c.Compare != nil && c.Equal != nil && c.Split != nil && c.FormatKey != nil {
253 1 : return c
254 1 : }
255 0 : n := &Comparer{}
256 0 : *n = *c
257 0 :
258 0 : if n.Split == nil {
259 0 : n.Split = DefaultSplit
260 0 : }
261 0 : if n.CompareRangeSuffixes == nil && n.Compare == nil && n.Equal == nil {
262 0 : n.CompareRangeSuffixes = bytes.Compare
263 0 : n.Compare = bytes.Compare
264 0 : n.Equal = bytes.Equal
265 0 : } else {
266 0 : if n.CompareRangeSuffixes == nil {
267 0 : n.CompareRangeSuffixes = bytes.Compare
268 0 : }
269 0 : if n.Compare == nil {
270 0 : n.Compare = func(a, b []byte) int {
271 0 : return defaultCompare(n.Split, n.ComparePointSuffixes, a, b)
272 0 : }
273 : }
274 0 : if n.Equal == nil {
275 0 : n.Equal = func(a, b []byte) bool {
276 0 : return n.Compare(a, b) == 0
277 0 : }
278 : }
279 : }
280 0 : if n.FormatKey == nil {
281 0 : n.FormatKey = DefaultFormatter
282 0 : }
283 0 : return n
284 : }
285 :
286 : // DefaultComparer is the default implementation of the Comparer interface.
287 : // It uses the natural ordering, consistent with bytes.Compare.
288 : var DefaultComparer = &Comparer{
289 : ComparePointSuffixes: bytes.Compare,
290 : CompareRangeSuffixes: bytes.Compare,
291 : Compare: bytes.Compare,
292 : Equal: bytes.Equal,
293 :
294 1 : AbbreviatedKey: func(key []byte) uint64 {
295 1 : if len(key) >= 8 {
296 1 : return binary.BigEndian.Uint64(key)
297 1 : }
298 1 : var v uint64
299 1 : for _, b := range key {
300 1 : v <<= 8
301 1 : v |= uint64(b)
302 1 : }
303 1 : return v << uint(8*(8-len(key)))
304 : },
305 :
306 : Split: DefaultSplit,
307 :
308 : FormatKey: DefaultFormatter,
309 :
310 1 : Separator: func(dst, a, b []byte) []byte {
311 1 : i, n := SharedPrefixLen(a, b), len(dst)
312 1 : dst = append(dst, a...)
313 1 :
314 1 : min := len(a)
315 1 : if min > len(b) {
316 1 : min = len(b)
317 1 : }
318 1 : if i >= min {
319 1 : // Do not shorten if one string is a prefix of the other.
320 1 : return dst
321 1 : }
322 :
323 1 : if a[i] >= b[i] {
324 0 : // b is smaller than a or a is already the shortest possible.
325 0 : return dst
326 0 : }
327 :
328 1 : if i < len(b)-1 || a[i]+1 < b[i] {
329 1 : i += n
330 1 : dst[i]++
331 1 : return dst[:i+1]
332 1 : }
333 :
334 1 : i += n + 1
335 1 : for ; i < len(dst); i++ {
336 1 : if dst[i] != 0xff {
337 1 : dst[i]++
338 1 : return dst[:i+1]
339 1 : }
340 : }
341 0 : return dst
342 : },
343 :
344 1 : Successor: func(dst, a []byte) (ret []byte) {
345 1 : for i := 0; i < len(a); i++ {
346 1 : if a[i] != 0xff {
347 1 : dst = append(dst, a[:i+1]...)
348 1 : dst[len(dst)-1]++
349 1 : return dst
350 1 : }
351 : }
352 : // a is a run of 0xffs, leave it alone.
353 0 : return append(dst, a...)
354 : },
355 :
356 0 : ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
357 0 : return append(append(dst, a...), 0x00)
358 0 : },
359 :
360 : // This name is part of the C++ Level-DB implementation's default file
361 : // format, and should not be changed.
362 : Name: "leveldb.BytewiseComparator",
363 : }
364 :
365 : // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
366 : // This function can be useful in implementing the Comparer interface.
367 1 : func SharedPrefixLen(a, b []byte) int {
368 1 : i, n := 0, len(a)
369 1 : if n > len(b) {
370 1 : n = len(b)
371 1 : }
372 1 : asUint64 := func(c []byte, i int) uint64 {
373 1 : return binary.LittleEndian.Uint64(c[i:])
374 1 : }
375 1 : for i < n-7 && asUint64(a, i) == asUint64(b, i) {
376 1 : i += 8
377 1 : }
378 1 : for i < n && a[i] == b[i] {
379 1 : i++
380 1 : }
381 1 : return i
382 : }
383 :
384 : // MinUserKey returns the smaller of two user keys. If one of the keys is nil,
385 : // the other one is returned.
386 1 : func MinUserKey(cmp Compare, a, b []byte) []byte {
387 1 : if a != nil && (b == nil || cmp(a, b) < 0) {
388 1 : return a
389 1 : }
390 1 : return b
391 : }
392 :
393 : // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
394 : // data.
395 : type FormatBytes []byte
396 :
397 : const lowerhex = "0123456789abcdef"
398 :
399 : // Format implements the fmt.Formatter interface.
400 1 : func (p FormatBytes) Format(s fmt.State, c rune) {
401 1 : buf := make([]byte, 0, len(p))
402 1 : for _, b := range p {
403 1 : if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
404 1 : buf = append(buf, b)
405 1 : continue
406 : }
407 0 : buf = append(buf, `\x`...)
408 0 : buf = append(buf, lowerhex[b>>4])
409 0 : buf = append(buf, lowerhex[b&0xF])
410 : }
411 1 : s.Write(buf)
412 : }
413 :
414 : // MakeAssertComparer creates a Comparer that is the same with the given
415 : // Comparer except that it asserts that the Compare and Equal functions adhere
416 : // to their specifications.
417 1 : func MakeAssertComparer(c Comparer) Comparer {
418 1 : return Comparer{
419 1 : Compare: func(a []byte, b []byte) int {
420 1 : res := c.Compare(a, b)
421 1 : // Verify that Compare is consistent with the default implementation.
422 1 : if expected := defaultCompare(c.Split, c.ComparePointSuffixes, a, b); res != expected {
423 0 : panic(AssertionFailedf("%s: Compare(%s, %s)=%d, expected %d",
424 0 : c.Name, c.FormatKey(a), c.FormatKey(b), res, expected))
425 : }
426 1 : return res
427 : },
428 :
429 1 : Equal: func(a []byte, b []byte) bool {
430 1 : eq := c.Equal(a, b)
431 1 : // Verify that Equal is consistent with Compare.
432 1 : if expected := c.Compare(a, b); eq != (expected == 0) {
433 0 : panic("Compare and Equal are not consistent")
434 : }
435 1 : return eq
436 : },
437 :
438 : // TODO(radu): add more checks.
439 : ComparePointSuffixes: c.ComparePointSuffixes,
440 : CompareRangeSuffixes: c.CompareRangeSuffixes,
441 : AbbreviatedKey: c.AbbreviatedKey,
442 : Separator: c.Separator,
443 : Successor: c.Successor,
444 : ImmediateSuccessor: c.ImmediateSuccessor,
445 : FormatKey: c.FormatKey,
446 : Split: c.Split,
447 : FormatValue: c.FormatValue,
448 : Name: c.Name,
449 : }
450 : }
451 :
452 : // CheckComparer is a mini test suite that verifies a comparer implementation.
453 : //
454 : // It takes lists of valid prefixes and suffixes. It is recommended that both
455 : // lists have at least three elements.
456 0 : func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error {
457 0 : // Empty slice is always a valid suffix.
458 0 : suffixes = append(suffixes, nil)
459 0 :
460 0 : // Verify the suffixes have a consistent ordering.
461 0 : slices.SortFunc(suffixes, c.CompareRangeSuffixes)
462 0 : if !slices.IsSortedFunc(suffixes, c.CompareRangeSuffixes) {
463 0 : return errors.Errorf("CompareRangeSuffixes is inconsistent")
464 0 : }
465 : // Verify the ordering imposed by CompareRangeSuffixes is considered a valid
466 : // ordering for point suffixes. CompareRangeSuffixes imposes a stricter
467 : // ordering than ComaprePointSuffixes, but a CompareRangesSuffixes ordering
468 : // must be a valid ordering for point suffixes.
469 0 : if !slices.IsSortedFunc(suffixes, c.ComparePointSuffixes) {
470 0 : return errors.Errorf("ComparePointSuffixes is inconsistent")
471 0 : }
472 :
473 0 : n := len(prefixes)
474 0 : // Removing leading bytes from prefixes must yield valid prefixes.
475 0 : for i := 0; i < n; i++ {
476 0 : for j := 1; j < len(prefixes[i]); j++ {
477 0 : prefixes = append(prefixes, prefixes[i][j:])
478 0 : }
479 : }
480 :
481 : // Check the split function.
482 0 : for _, p := range prefixes {
483 0 : for _, s := range suffixes {
484 0 : key := slices.Concat(p, s)
485 0 : if n := c.Split(key); n != len(p) {
486 0 : return errors.Errorf("incorrect Split result %d on '%x' (prefix '%x' suffix '%x')", n, key, p, s)
487 0 : }
488 : }
489 0 : for i := 1; i < len(suffixes); i++ {
490 0 : a := slices.Concat(p, suffixes[i-1])
491 0 : b := slices.Concat(p, suffixes[i])
492 0 : // Make sure the Compare function agrees with ComparePointSuffixes.
493 0 : if cmp := c.Compare(a, b); cmp > 0 {
494 0 : return errors.Errorf("Compare(%s, %s)=%d, expected <= 0", c.FormatKey(a), c.FormatKey(b), cmp)
495 0 : }
496 : }
497 : }
498 :
499 : // Check the Compare/Equals functions on all possible combinations.
500 0 : for _, ap := range prefixes {
501 0 : for _, as := range suffixes {
502 0 : a := slices.Concat(ap, as)
503 0 : for _, bp := range prefixes {
504 0 : for _, bs := range suffixes {
505 0 : b := slices.Concat(bp, bs)
506 0 : result := c.Compare(a, b)
507 0 : if (result == 0) != c.Equal(a, b) {
508 0 : return errors.Errorf("Equal(%s, %s) doesn't agree with Compare", c.FormatKey(a), c.FormatKey(b))
509 0 : }
510 :
511 0 : if prefixCmp := bytes.Compare(ap, bp); prefixCmp != 0 {
512 0 : if result != prefixCmp {
513 0 : return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, prefixCmp)
514 0 : }
515 0 : } else {
516 0 : // The prefixes are equal, so Compare's result should
517 0 : // agree with ComparePointSuffixes.
518 0 : if suffixCmp := c.ComparePointSuffixes(as, bs); result != suffixCmp {
519 0 : return errors.Errorf("Compare(%s, %s)=%d but ComparePointSuffixes(%q, %q)=%d",
520 0 : c.FormatKey(a), c.FormatKey(b), result, as, bs, suffixCmp)
521 0 : }
522 : // If result == 0, CompareRangeSuffixes may not agree
523 : // with ComparePointSuffixes, but otherwise it should.
524 0 : if result != 0 {
525 0 : if suffixCmp := c.CompareRangeSuffixes(as, bs); result != suffixCmp {
526 0 : return errors.Errorf("Compare(%s, %s)=%d, expected %d",
527 0 : c.FormatKey(a), c.FormatKey(b), result, suffixCmp)
528 0 : }
529 : }
530 : }
531 : }
532 : }
533 : }
534 : }
535 :
536 : // TODO(radu): check more methods.
537 0 : return nil
538 : }
|