LCOV - code coverage report
Current view: top level - pebble/internal/base - comparer.go (source / functions) Hit Total Coverage
Test: 2024-10-05 08:16Z 649e50ad - tests + meta.lcov Lines: 168 195 86.2 %
Date: 2024-10-05 08:17:02 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package base
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "encoding/binary"
      10             :         "fmt"
      11             :         "slices"
      12             :         "strconv"
      13             :         "unicode/utf8"
      14             : 
      15             :         "github.com/cockroachdb/errors"
      16             : )
      17             : 
      18             : // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
      19             : // to' or 'greater than' b.
      20             : //
      21             : // Both a and b must be valid keys. Note that because of synthetic prefix
      22             : // functionality, the Compare function can be called on a key (either from the
      23             : // database or passed as an argument for an iterator operation) after the
      24             : // synthetic prefix has been removed. In general, this implies that removing any
      25             : // leading bytes from a prefix must yield another valid prefix.
      26             : //
      27             : // A key a is less than b if a's prefix is byte-wise less than b's prefix, or if
      28             : // the prefixes are equal and a's suffix is less than b's suffix (according to
      29             : // CompareSuffixes).
      30             : //
      31             : // In other words, if prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]:
      32             : //
      33             : //        Compare(a, b) = bytes.Compare(prefix(a), prefix(b)) if not 0,
      34             : //                              otherwise either 0 or CompareSuffixes(suffix(a), suffix(b))
      35             : //
      36             : // The "either 0" part is because CompareSuffixes is allowed to be more strict;
      37             : // see CompareSuffixes.
      38             : //
      39             : // Compare defaults to using the formula above but it can be customized if there
      40             : // is a (potentially faster) specialization or it has to compare suffixes
      41             : // differently.
      42             : type Compare func(a, b []byte) int
      43             : 
      44             : // CompareSuffixes compares two key suffixes and returns -1, 0, or +1.
      45             : //
      46             : // For historical reasons (see
      47             : // https://github.com/cockroachdb/cockroach/issues/130533 for a summary), we
      48             : // allow this function to be more strict than Compare. Specifically, Compare may
      49             : // treat two suffixes as equal whereas CompareSuffixes might not.
      50             : //
      51             : // The empty slice suffix must be 'less than' any non-empty suffix.
      52             : //
      53             : // A full key k is composed of a prefix k[:Split(k)] and suffix k[Split(k):].
      54             : // Suffixes are compared to break ties between equal prefixes.
      55             : type CompareSuffixes func(a, b []byte) int
      56             : 
      57             : // defaultCompare implements Compare in terms of Split and CompareSuffixes, as
      58             : // mentioned above.
      59           2 : func defaultCompare(split Split, compareSuffixes CompareSuffixes, a, b []byte) int {
      60           2 :         an := split(a)
      61           2 :         bn := split(b)
      62           2 :         if prefixCmp := bytes.Compare(a[:an], b[:bn]); prefixCmp != 0 {
      63           2 :                 return prefixCmp
      64           2 :         }
      65           2 :         return compareSuffixes(a[an:], b[bn:])
      66             : }
      67             : 
      68             : // Equal returns true if a and b are equivalent.
      69             : //
      70             : // For a given Compare, Equal(a,b)=true iff Compare(a,b)=0; that is, Equal is a
      71             : // (potentially faster) specialization of Compare.
      72             : type Equal func(a, b []byte) bool
      73             : 
      74             : // AbbreviatedKey returns a fixed length prefix of a user key such that
      75             : //
      76             : //      AbbreviatedKey(a) < AbbreviatedKey(b) implies a < b, and
      77             : //      AbbreviatedKey(a) > AbbreviatedKey(b) implies a > b.
      78             : //
      79             : // If AbbreviatedKey(a) == AbbreviatedKey(b), an additional comparison is
      80             : // required to determine if the two keys are actually equal.
      81             : //
      82             : // This helps optimize indexed batch comparisons for cache locality. If a Split
      83             : // function is specified, AbbreviatedKey usually returns the first eight bytes
      84             : // of the user key prefix in the order that gives the correct ordering.
      85             : type AbbreviatedKey func(key []byte) uint64
      86             : 
      87             : // FormatKey returns a formatter for the user key.
      88             : type FormatKey func(key []byte) fmt.Formatter
      89             : 
      90             : // DefaultFormatter is the default implementation of user key formatting:
      91             : // non-ASCII data is formatted as escaped hexadecimal values.
      92           2 : var DefaultFormatter FormatKey = func(key []byte) fmt.Formatter {
      93           2 :         return FormatBytes(key)
      94           2 : }
      95             : 
      96             : // FormatValue returns a formatter for the user value. The key is also specified
      97             : // for the value formatter in order to support value formatting that is
      98             : // dependent on the key.
      99             : type FormatValue func(key, value []byte) fmt.Formatter
     100             : 
     101             : // Separator is used to construct SSTable index blocks. A trivial implementation
     102             : // is `return append(dst, a...)`, but appending fewer bytes leads to smaller
     103             : // SSTables.
     104             : //
     105             : // Given keys a, b for which Compare(a, b) < 0, Separator produces a key k such
     106             : // that:
     107             : //
     108             : // 1. Compare(a, k) <= 0, and
     109             : // 2. Compare(k, b) < 0.
     110             : //
     111             : // For example, if a and b are the []byte equivalents of the strings "black" and
     112             : // "blue", then the function may append "blb" to dst.
     113             : type Separator func(dst, a, b []byte) []byte
     114             : 
     115             : // Successor appends to dst a shortened key k given a key a such that
     116             : // Compare(a, k) <= 0. A simple implementation may return a unchanged.
     117             : // The appended key k must be valid to pass to Compare.
     118             : type Successor func(dst, a []byte) []byte
     119             : 
     120             : // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
     121             : // appends to dst the smallest prefix key that is larger than the given prefix a.
     122             : //
     123             : // ImmediateSuccessor must generate a prefix key k such that:
     124             : //
     125             : //      Split(k) == len(k) and Compare(a, k) < 0
     126             : //
     127             : // and there exists no representable prefix key k2 such that:
     128             : //
     129             : //      Split(k2) == len(k2) and Compare(a, k2) < 0 and Compare(k2, k) < 0
     130             : //
     131             : // As an example, an implementation built on the natural byte ordering using
     132             : // bytes.Compare could append a `\0` to `a`.
     133             : //
     134             : // The appended key must be valid to pass to Compare.
     135             : type ImmediateSuccessor func(dst, a []byte) []byte
     136             : 
     137             : // Split returns the length of the prefix of the user key that corresponds to
     138             : // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
     139             : // filters.
     140             : //
     141             : // The method will only ever be called with valid MVCC keys, that is, keys that
     142             : // the user could potentially store in the database. Pebble does not know which
     143             : // keys are MVCC keys and which are not, and may call Split on both MVCC keys
     144             : // and non-MVCC keys.
     145             : //
     146             : // A trivial MVCC scheme is one in which Split() returns len(a). This
     147             : // corresponds to assigning a constant version to each key in the database. For
     148             : // performance reasons, it is preferable to use a `nil` split in this case.
     149             : //
     150             : // Let prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]. The following
     151             : // properties must hold:
     152             : //
     153             : //  1. A key consisting of just a prefix must sort before all other keys with
     154             : //     that prefix:
     155             : //
     156             : //     If len(suffix(a)) > 0, then Compare(prefix(a), a) < 0.
     157             : //
     158             : //  2. Prefixes must be used to order keys before suffixes:
     159             : //
     160             : //     If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0.
     161             : //     If Compare(prefix(a), prefix(b)) < 0, then Compare(a, b) < 0
     162             : //
     163             : //  3. Suffixes themselves must be valid keys and comparable, respecting the same
     164             : //     ordering as within a key:
     165             : //
     166             : //     If Compare(prefix(a), prefix(b)) = 0, then Compare(a, b) = Compare(suffix(a), suffix(b)).
     167             : type Split func(a []byte) int
     168             : 
     169             : // Prefix returns the prefix of the key k, using s to split the key.
     170           2 : func (s Split) Prefix(k []byte) []byte {
     171           2 :         i := s(k)
     172           2 :         return k[:i:i]
     173           2 : }
     174             : 
     175             : // DefaultSplit is a trivial implementation of Split which always returns the
     176             : // full key.
     177           1 : var DefaultSplit Split = func(key []byte) int { return len(key) }
     178             : 
     179             : // Comparer defines a total ordering over the space of []byte keys: a 'less
     180             : // than' relationship.
     181             : type Comparer struct {
     182             :         // The following must always be specified.
     183             :         AbbreviatedKey AbbreviatedKey
     184             :         Separator      Separator
     185             :         Successor      Successor
     186             : 
     187             :         // ImmediateSuccessor must be specified if range keys are used.
     188             :         ImmediateSuccessor ImmediateSuccessor
     189             : 
     190             :         // Split defaults to a trivial implementation that returns the full key length
     191             :         // if it is not specified.
     192             :         Split Split
     193             : 
     194             :         // CompareSuffixes defaults to bytes.Compare if it is not specified.
     195             :         CompareSuffixes CompareSuffixes
     196             : 
     197             :         // Compare defaults to a generic implementation that uses Split,
     198             :         // bytes.Compare, and CompareSuffixes if it is not specified.
     199             :         Compare Compare
     200             :         // Equal defaults to using Compare() == 0 if it is not specified.
     201             :         Equal Equal
     202             :         // FormatKey defaults to the DefaultFormatter if it is not specified.
     203             :         FormatKey FormatKey
     204             : 
     205             :         // FormatValue is optional.
     206             :         FormatValue FormatValue
     207             : 
     208             :         // Name is the name of the comparer.
     209             :         //
     210             :         // The on-disk format stores the comparer name, and opening a database with a
     211             :         // different comparer from the one it was created with will result in an
     212             :         // error.
     213             :         Name string
     214             : }
     215             : 
     216             : // EnsureDefaults ensures that all non-optional fields are set.
     217             : //
     218             : // If c is nil, returns DefaultComparer.
     219             : //
     220             : // If any fields need to be set, returns a modified copy of c.
     221           2 : func (c *Comparer) EnsureDefaults() *Comparer {
     222           2 :         if c == nil {
     223           1 :                 return DefaultComparer
     224           1 :         }
     225           2 :         if c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
     226           0 :                 panic("invalid Comparer: mandatory field not set")
     227             :         }
     228           2 :         if c.CompareSuffixes != nil && c.Compare != nil && c.Equal != nil && c.Split != nil && c.FormatKey != nil {
     229           2 :                 return c
     230           2 :         }
     231           1 :         n := &Comparer{}
     232           1 :         *n = *c
     233           1 : 
     234           1 :         if n.Split == nil {
     235           1 :                 n.Split = DefaultSplit
     236           1 :         }
     237           1 :         if n.CompareSuffixes == nil && n.Compare == nil && n.Equal == nil {
     238           0 :                 n.CompareSuffixes = bytes.Compare
     239           0 :                 n.Compare = bytes.Compare
     240           0 :                 n.Equal = bytes.Equal
     241           1 :         } else {
     242           1 :                 if n.CompareSuffixes == nil {
     243           1 :                         n.CompareSuffixes = bytes.Compare
     244           1 :                 }
     245           1 :                 if n.Compare == nil {
     246           0 :                         n.Compare = func(a, b []byte) int {
     247           0 :                                 return defaultCompare(n.Split, n.CompareSuffixes, a, b)
     248           0 :                         }
     249             :                 }
     250           1 :                 if n.Equal == nil {
     251           1 :                         n.Equal = func(a, b []byte) bool {
     252           0 :                                 return n.Compare(a, b) == 0
     253           0 :                         }
     254             :                 }
     255             :         }
     256           1 :         if n.FormatKey == nil {
     257           1 :                 n.FormatKey = DefaultFormatter
     258           1 :         }
     259           1 :         return n
     260             : }
     261             : 
     262             : // DefaultComparer is the default implementation of the Comparer interface.
     263             : // It uses the natural ordering, consistent with bytes.Compare.
     264             : var DefaultComparer = &Comparer{
     265             :         CompareSuffixes: bytes.Compare,
     266             :         Compare:         bytes.Compare,
     267             :         Equal:           bytes.Equal,
     268             : 
     269           2 :         AbbreviatedKey: func(key []byte) uint64 {
     270           2 :                 if len(key) >= 8 {
     271           2 :                         return binary.BigEndian.Uint64(key)
     272           2 :                 }
     273           2 :                 var v uint64
     274           2 :                 for _, b := range key {
     275           2 :                         v <<= 8
     276           2 :                         v |= uint64(b)
     277           2 :                 }
     278           2 :                 return v << uint(8*(8-len(key)))
     279             :         },
     280             : 
     281             :         Split: DefaultSplit,
     282             : 
     283             :         FormatKey: DefaultFormatter,
     284             : 
     285           2 :         Separator: func(dst, a, b []byte) []byte {
     286           2 :                 i, n := SharedPrefixLen(a, b), len(dst)
     287           2 :                 dst = append(dst, a...)
     288           2 : 
     289           2 :                 min := len(a)
     290           2 :                 if min > len(b) {
     291           2 :                         min = len(b)
     292           2 :                 }
     293           2 :                 if i >= min {
     294           2 :                         // Do not shorten if one string is a prefix of the other.
     295           2 :                         return dst
     296           2 :                 }
     297             : 
     298           2 :                 if a[i] >= b[i] {
     299           1 :                         // b is smaller than a or a is already the shortest possible.
     300           1 :                         return dst
     301           1 :                 }
     302             : 
     303           2 :                 if i < len(b)-1 || a[i]+1 < b[i] {
     304           2 :                         i += n
     305           2 :                         dst[i]++
     306           2 :                         return dst[:i+1]
     307           2 :                 }
     308             : 
     309           1 :                 i += n + 1
     310           1 :                 for ; i < len(dst); i++ {
     311           1 :                         if dst[i] != 0xff {
     312           1 :                                 dst[i]++
     313           1 :                                 return dst[:i+1]
     314           1 :                         }
     315             :                 }
     316           1 :                 return dst
     317             :         },
     318             : 
     319           2 :         Successor: func(dst, a []byte) (ret []byte) {
     320           2 :                 for i := 0; i < len(a); i++ {
     321           2 :                         if a[i] != 0xff {
     322           2 :                                 dst = append(dst, a[:i+1]...)
     323           2 :                                 dst[len(dst)-1]++
     324           2 :                                 return dst
     325           2 :                         }
     326             :                 }
     327             :                 // a is a run of 0xffs, leave it alone.
     328           1 :                 return append(dst, a...)
     329             :         },
     330             : 
     331           0 :         ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
     332           0 :                 return append(append(dst, a...), 0x00)
     333           0 :         },
     334             : 
     335             :         // This name is part of the C++ Level-DB implementation's default file
     336             :         // format, and should not be changed.
     337             :         Name: "leveldb.BytewiseComparator",
     338             : }
     339             : 
     340             : // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
     341             : // This function can be useful in implementing the Comparer interface.
     342           2 : func SharedPrefixLen(a, b []byte) int {
     343           2 :         i, n := 0, len(a)
     344           2 :         if n > len(b) {
     345           2 :                 n = len(b)
     346           2 :         }
     347           2 :         asUint64 := func(c []byte, i int) uint64 {
     348           2 :                 return binary.LittleEndian.Uint64(c[i:])
     349           2 :         }
     350           2 :         for i < n-7 && asUint64(a, i) == asUint64(b, i) {
     351           2 :                 i += 8
     352           2 :         }
     353           2 :         for i < n && a[i] == b[i] {
     354           2 :                 i++
     355           2 :         }
     356           2 :         return i
     357             : }
     358             : 
     359             : // MinUserKey returns the smaller of two user keys. If one of the keys is nil,
     360             : // the other one is returned.
     361           2 : func MinUserKey(cmp Compare, a, b []byte) []byte {
     362           2 :         if a != nil && (b == nil || cmp(a, b) < 0) {
     363           2 :                 return a
     364           2 :         }
     365           2 :         return b
     366             : }
     367             : 
     368             : // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
     369             : // data.
     370             : type FormatBytes []byte
     371             : 
     372             : const lowerhex = "0123456789abcdef"
     373             : 
     374             : // Format implements the fmt.Formatter interface.
     375           2 : func (p FormatBytes) Format(s fmt.State, c rune) {
     376           2 :         buf := make([]byte, 0, len(p))
     377           2 :         for _, b := range p {
     378           2 :                 if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
     379           2 :                         buf = append(buf, b)
     380           2 :                         continue
     381             :                 }
     382           1 :                 buf = append(buf, `\x`...)
     383           1 :                 buf = append(buf, lowerhex[b>>4])
     384           1 :                 buf = append(buf, lowerhex[b&0xF])
     385             :         }
     386           2 :         s.Write(buf)
     387             : }
     388             : 
     389             : // MakeAssertComparer creates a Comparer that is the same with the given
     390             : // Comparer except that it asserts that the Compare and Equal functions adhere
     391             : // to their specifications.
     392           2 : func MakeAssertComparer(c Comparer) Comparer {
     393           2 :         return Comparer{
     394           2 :                 Compare: func(a []byte, b []byte) int {
     395           2 :                         res := c.Compare(a, b)
     396           2 :                         // Verify that Compare is consistent with the default implementation.
     397           2 :                         if expected := defaultCompare(c.Split, c.CompareSuffixes, a, b); res != expected {
     398           0 :                                 panic(AssertionFailedf("%s: Compare(%s, %s)=%d, expected %d",
     399           0 :                                         c.Name, c.FormatKey(a), c.FormatKey(b), res, expected))
     400             :                         }
     401           2 :                         return res
     402             :                 },
     403             : 
     404           2 :                 Equal: func(a []byte, b []byte) bool {
     405           2 :                         eq := c.Equal(a, b)
     406           2 :                         // Verify that Equal is consistent with Compare.
     407           2 :                         if expected := c.Compare(a, b); eq != (expected == 0) {
     408           0 :                                 panic("Compare and Equal are not consistent")
     409             :                         }
     410           2 :                         return eq
     411             :                 },
     412             : 
     413             :                 // TODO(radu): add more checks.
     414             :                 CompareSuffixes:    c.CompareSuffixes,
     415             :                 AbbreviatedKey:     c.AbbreviatedKey,
     416             :                 Separator:          c.Separator,
     417             :                 Successor:          c.Successor,
     418             :                 ImmediateSuccessor: c.ImmediateSuccessor,
     419             :                 FormatKey:          c.FormatKey,
     420             :                 Split:              c.Split,
     421             :                 FormatValue:        c.FormatValue,
     422             :                 Name:               c.Name,
     423             :         }
     424             : }
     425             : 
     426             : // CheckComparer is a mini test suite that verifies a comparer implementation.
     427             : //
     428             : // It takes lists of valid prefixes and suffixes. It is recommended that both
     429             : // lists have at least three elements.
     430           1 : func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error {
     431           1 :         // Empty slice is always a valid suffix.
     432           1 :         suffixes = append(suffixes, nil)
     433           1 : 
     434           1 :         // Verify the suffixes have a consistent ordering.
     435           1 :         slices.SortFunc(suffixes, c.CompareSuffixes)
     436           1 :         if !slices.IsSortedFunc(suffixes, c.CompareSuffixes) {
     437           0 :                 return errors.Errorf("CompareSuffixes is inconsistent")
     438           0 :         }
     439             : 
     440           1 :         n := len(prefixes)
     441           1 :         // Removing leading bytes from prefixes must yield valid prefixes.
     442           1 :         for i := 0; i < n; i++ {
     443           1 :                 for j := 1; j < len(prefixes[i]); j++ {
     444           1 :                         prefixes = append(prefixes, prefixes[i][j:])
     445           1 :                 }
     446             :         }
     447             : 
     448             :         // Check the split function.
     449           1 :         for _, p := range prefixes {
     450           1 :                 for _, s := range suffixes {
     451           1 :                         key := slices.Concat(p, s)
     452           1 :                         if n := c.Split(key); n != len(p) {
     453           0 :                                 return errors.Errorf("incorrect Split result %d on '%x' (prefix '%x' suffix '%x')", n, key, p, s)
     454           0 :                         }
     455             :                 }
     456           1 :                 for i := 1; i < len(suffixes); i++ {
     457           1 :                         a := slices.Concat(p, suffixes[i-1])
     458           1 :                         b := slices.Concat(p, suffixes[i])
     459           1 :                         // Make sure the Compare function agrees with CompareSuffixes, with the
     460           1 :                         // caveat that it can consider some consecutive suffixes to be equal.
     461           1 :                         if cmp := c.Compare(a, b); cmp > 0 {
     462           0 :                                 return errors.Errorf("Compare(%s, %s)=%d, expected <= 0", c.FormatKey(a), c.FormatKey(b), cmp)
     463           0 :                         }
     464             :                 }
     465             :         }
     466             : 
     467             :         // Check the Compare/Equals functions on all possible combinations.
     468           1 :         for _, ap := range prefixes {
     469           1 :                 for _, as := range suffixes {
     470           1 :                         a := slices.Concat(ap, as)
     471           1 :                         for _, bp := range prefixes {
     472           1 :                                 for _, bs := range suffixes {
     473           1 :                                         b := slices.Concat(bp, bs)
     474           1 :                                         result := c.Compare(a, b)
     475           1 :                                         if (result == 0) != c.Equal(a, b) {
     476           0 :                                                 return errors.Errorf("Equal(%s, %s) doesn't agree with Compare", c.FormatKey(a), c.FormatKey(b))
     477           0 :                                         }
     478             : 
     479           1 :                                         if prefixCmp := bytes.Compare(ap, bp); prefixCmp != 0 {
     480           1 :                                                 if result != prefixCmp {
     481           0 :                                                         return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, prefixCmp)
     482           0 :                                                 }
     483           1 :                                         } else {
     484           1 :                                                 // We allow Compare to return equality for suffixes even when
     485           1 :                                                 // CompareSuffixes does not.
     486           1 :                                                 if result != 0 {
     487           1 :                                                         if suffixCmp := c.CompareSuffixes(as, bs); result != suffixCmp {
     488           0 :                                                                 return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, suffixCmp)
     489           0 :                                                         }
     490             :                                                 }
     491             :                                         }
     492             :                                 }
     493             :                         }
     494             :                 }
     495             :         }
     496             : 
     497             :         // TODO(radu): check more methods.
     498           1 :         return nil
     499             : }

Generated by: LCOV version 1.14