LCOV - code coverage report
Current view: top level - pebble/internal/base - comparer.go (source / functions) Hit Total Coverage
Test: 2024-05-01 08:17Z 481df348 - meta test only.lcov Lines: 72 101 71.3 %
Date: 2024-05-01 08:17:59 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package base
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "encoding/binary"
      10             :         "fmt"
      11             :         "strconv"
      12             :         "unicode/utf8"
      13             : )
      14             : 
      15             : // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
      16             : // to' or 'greater than' b. The empty slice must be 'less than' any non-empty
      17             : // slice.
      18             : //
      19             : // Compare is used to compare user keys, such as those passed as arguments to
      20             : // the various DB methods, as well as those returned from Separator, Successor,
      21             : // and Split. It is also used to compare key suffixes, i.e. the remainder of the
      22             : // key after Split.
      23             : type Compare func(a, b []byte) int
      24             : 
      25             : // Equal returns true if a and b are equivalent.
      26             : //
      27             : // For a given Compare, Equal(a,b)=true iff Compare(a,b)=0; that is, Equal is a
      28             : // (potentially faster) specialization of Compare.
      29             : type Equal func(a, b []byte) bool
      30             : 
      31             : // AbbreviatedKey returns a fixed length prefix of a user key such that
      32             : //
      33             : //      AbbreviatedKey(a) < AbbreviatedKey(b) implies a < b, and
      34             : //      AbbreviatedKey(a) > AbbreviatedKey(b) implies a > b.
      35             : //
      36             : // If AbbreviatedKey(a) == AbbreviatedKey(b), an additional comparison is
      37             : // required to determine if the two keys are actually equal.
      38             : //
      39             : // This helps optimize indexed batch comparisons for cache locality. If a Split
      40             : // function is specified, AbbreviatedKey usually returns the first eight bytes
      41             : // of the user key prefix in the order that gives the correct ordering.
      42             : type AbbreviatedKey func(key []byte) uint64
      43             : 
      44             : // FormatKey returns a formatter for the user key.
      45             : type FormatKey func(key []byte) fmt.Formatter
      46             : 
      47             : // DefaultFormatter is the default implementation of user key formatting:
      48             : // non-ASCII data is formatted as escaped hexadecimal values.
      49           1 : var DefaultFormatter FormatKey = func(key []byte) fmt.Formatter {
      50           1 :         return FormatBytes(key)
      51           1 : }
      52             : 
      53             : // FormatValue returns a formatter for the user value. The key is also specified
      54             : // for the value formatter in order to support value formatting that is
      55             : // dependent on the key.
      56             : type FormatValue func(key, value []byte) fmt.Formatter
      57             : 
      58             : // Separator is used to construct SSTable index blocks. A trivial implementation
      59             : // is `return append(dst, a...)`, but appending fewer bytes leads to smaller
      60             : // SSTables.
      61             : //
      62             : // Given keys a, b for which Compare(a, b) < 0, Separator produces a key k such
      63             : // that:
      64             : //
      65             : // 1. Compare(a, k) <= 0, and
      66             : // 2. Compare(k, b) < 0.
      67             : //
      68             : // For example, if a and b are the []byte equivalents of the strings "black" and
      69             : // "blue", then the function may append "blb" to dst.
      70             : type Separator func(dst, a, b []byte) []byte
      71             : 
      72             : // Successor appends to dst a shortened key k given a key a such that
      73             : // Compare(a, k) <= 0. A simple implementation may return a unchanged.
      74             : // The appended key k must be valid to pass to Compare.
      75             : type Successor func(dst, a []byte) []byte
      76             : 
      77             : // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
      78             : // appends to dst the smallest prefix key that is larger than the given prefix a.
      79             : //
      80             : // ImmediateSuccessor must generate a prefix key k such that:
      81             : //
      82             : //      Split(k) == len(k) and Compare(a, k) < 0
      83             : //
      84             : // and there exists no representable prefix key k2 such that:
      85             : //
      86             : //      Split(k2) == len(k2) and Compare(a, k2) < 0 and Compare(k2, k) < 0
      87             : //
      88             : // As an example, an implementation built on the natural byte ordering using
      89             : // bytes.Compare could append a `\0` to `a`.
      90             : //
      91             : // The appended key must be valid to pass to Compare.
      92             : type ImmediateSuccessor func(dst, a []byte) []byte
      93             : 
      94             : // Split returns the length of the prefix of the user key that corresponds to
      95             : // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
      96             : // filters.
      97             : //
      98             : // The method will only ever be called with valid MVCC keys, that is, keys that
      99             : // the user could potentially store in the database. Pebble does not know which
     100             : // keys are MVCC keys and which are not, and may call Split on both MVCC keys
     101             : // and non-MVCC keys.
     102             : //
     103             : // A trivial MVCC scheme is one in which Split() returns len(a). This
     104             : // corresponds to assigning a constant version to each key in the database. For
     105             : // performance reasons, it is preferable to use a `nil` split in this case.
     106             : //
     107             : // Let prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]. The following
     108             : // properties must hold:
     109             : //
     110             : //  1. A key consisting of just a prefix must sort before all other keys with
     111             : //     that prefix:
     112             : //
     113             : //     If len(suffix(a)) > 0, then Compare(prefix(a), a) < 0.
     114             : //
     115             : //  2. Prefixes must be used to order keys before suffixes:
     116             : //
     117             : //     If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0.
     118             : //     If Compare(prefix(a), prefix(b)) < 0, then Compare(a, b) < 0
     119             : //
     120             : //  3. Suffixes themselves must be valid keys and comparable, respecting the same
     121             : //     ordering as within a key:
     122             : //
     123             : //     If Compare(prefix(a), prefix(b)) = 0, then Compare(a, b) = Compare(suffix(a), suffix(b)).
     124             : type Split func(a []byte) int
     125             : 
     126             : // DefaultSplit is a trivial implementation of Split which always returns the
     127             : // full key.
     128           0 : var DefaultSplit Split = func(key []byte) int { return len(key) }
     129             : 
     130             : // Comparer defines a total ordering over the space of []byte keys: a 'less
     131             : // than' relationship.
     132             : type Comparer struct {
     133             :         // These fields must always be specified.
     134             :         Compare        Compare
     135             :         AbbreviatedKey AbbreviatedKey
     136             :         Separator      Separator
     137             :         Successor      Successor
     138             : 
     139             :         // ImmediateSuccessor must be specified if range keys are used.
     140             :         ImmediateSuccessor ImmediateSuccessor
     141             : 
     142             :         // Equal defaults to using Compare() == 0 if it is not specified.
     143             :         Equal Equal
     144             :         // FormatKey defaults to the DefaultFormatter if it is not specified.
     145             :         FormatKey FormatKey
     146             :         // Split defaults to a trivial implementation that returns the full key length
     147             :         // if it is not specified.
     148             :         Split Split
     149             : 
     150             :         // FormatValue is optional.
     151             :         FormatValue FormatValue
     152             : 
     153             :         // Name is the name of the comparer.
     154             :         //
     155             :         // The on-disk format stores the comparer name, and opening a database with a
     156             :         // different comparer from the one it was created with will result in an
     157             :         // error.
     158             :         Name string
     159             : }
     160             : 
     161             : // EnsureDefaults ensures that all non-optional fields are set.
     162             : //
     163             : // If c is nil, returns DefaultComparer.
     164             : //
     165             : // If any fields need to be set, returns a modified copy of c.
     166           1 : func (c *Comparer) EnsureDefaults() *Comparer {
     167           1 :         if c == nil {
     168           0 :                 return DefaultComparer
     169           0 :         }
     170           1 :         if c.Compare == nil || c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
     171           0 :                 panic("invalid Comparer: mandatory field not set")
     172             :         }
     173           1 :         if c.Equal != nil && c.Split != nil && c.FormatKey != nil {
     174           1 :                 return c
     175           1 :         }
     176           0 :         n := &Comparer{}
     177           0 :         *n = *c
     178           0 :         if n.Equal == nil {
     179           0 :                 cmp := n.Compare
     180           0 :                 n.Equal = func(a, b []byte) bool {
     181           0 :                         return cmp(a, b) == 0
     182           0 :                 }
     183             :         }
     184           0 :         if n.Split == nil {
     185           0 :                 n.Split = DefaultSplit
     186           0 :         }
     187           0 :         if n.FormatKey == nil {
     188           0 :                 n.FormatKey = DefaultFormatter
     189           0 :         }
     190           0 :         return n
     191             : }
     192             : 
     193             : // DefaultComparer is the default implementation of the Comparer interface.
     194             : // It uses the natural ordering, consistent with bytes.Compare.
     195             : var DefaultComparer = &Comparer{
     196             :         Compare: bytes.Compare,
     197             :         Equal:   bytes.Equal,
     198             : 
     199           1 :         AbbreviatedKey: func(key []byte) uint64 {
     200           1 :                 if len(key) >= 8 {
     201           1 :                         return binary.BigEndian.Uint64(key)
     202           1 :                 }
     203           1 :                 var v uint64
     204           1 :                 for _, b := range key {
     205           1 :                         v <<= 8
     206           1 :                         v |= uint64(b)
     207           1 :                 }
     208           1 :                 return v << uint(8*(8-len(key)))
     209             :         },
     210             : 
     211             :         Split: DefaultSplit,
     212             : 
     213             :         FormatKey: DefaultFormatter,
     214             : 
     215           1 :         Separator: func(dst, a, b []byte) []byte {
     216           1 :                 i, n := SharedPrefixLen(a, b), len(dst)
     217           1 :                 dst = append(dst, a...)
     218           1 : 
     219           1 :                 min := len(a)
     220           1 :                 if min > len(b) {
     221           1 :                         min = len(b)
     222           1 :                 }
     223           1 :                 if i >= min {
     224           1 :                         // Do not shorten if one string is a prefix of the other.
     225           1 :                         return dst
     226           1 :                 }
     227             : 
     228           1 :                 if a[i] >= b[i] {
     229           0 :                         // b is smaller than a or a is already the shortest possible.
     230           0 :                         return dst
     231           0 :                 }
     232             : 
     233           1 :                 if i < len(b)-1 || a[i]+1 < b[i] {
     234           1 :                         i += n
     235           1 :                         dst[i]++
     236           1 :                         return dst[:i+1]
     237           1 :                 }
     238             : 
     239           1 :                 i += n + 1
     240           1 :                 for ; i < len(dst); i++ {
     241           1 :                         if dst[i] != 0xff {
     242           1 :                                 dst[i]++
     243           1 :                                 return dst[:i+1]
     244           1 :                         }
     245             :                 }
     246           0 :                 return dst
     247             :         },
     248             : 
     249           1 :         Successor: func(dst, a []byte) (ret []byte) {
     250           1 :                 for i := 0; i < len(a); i++ {
     251           1 :                         if a[i] != 0xff {
     252           1 :                                 dst = append(dst, a[:i+1]...)
     253           1 :                                 dst[len(dst)-1]++
     254           1 :                                 return dst
     255           1 :                         }
     256             :                 }
     257             :                 // a is a run of 0xffs, leave it alone.
     258           0 :                 return append(dst, a...)
     259             :         },
     260             : 
     261           0 :         ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
     262           0 :                 return append(append(dst, a...), 0x00)
     263           0 :         },
     264             : 
     265             :         // This name is part of the C++ Level-DB implementation's default file
     266             :         // format, and should not be changed.
     267             :         Name: "leveldb.BytewiseComparator",
     268             : }
     269             : 
     270             : // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
     271             : // This function can be useful in implementing the Comparer interface.
     272           1 : func SharedPrefixLen(a, b []byte) int {
     273           1 :         i, n := 0, len(a)
     274           1 :         if n > len(b) {
     275           1 :                 n = len(b)
     276           1 :         }
     277           1 :         asUint64 := func(c []byte, i int) uint64 {
     278           1 :                 return binary.LittleEndian.Uint64(c[i:])
     279           1 :         }
     280           1 :         for i < n-7 && asUint64(a, i) == asUint64(b, i) {
     281           1 :                 i += 8
     282           1 :         }
     283           1 :         for i < n && a[i] == b[i] {
     284           1 :                 i++
     285           1 :         }
     286           1 :         return i
     287             : }
     288             : 
     289             : // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
     290             : // data.
     291             : type FormatBytes []byte
     292             : 
     293             : const lowerhex = "0123456789abcdef"
     294             : 
     295             : // Format implements the fmt.Formatter interface.
     296           1 : func (p FormatBytes) Format(s fmt.State, c rune) {
     297           1 :         buf := make([]byte, 0, len(p))
     298           1 :         for _, b := range p {
     299           1 :                 if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
     300           1 :                         buf = append(buf, b)
     301           1 :                         continue
     302             :                 }
     303           0 :                 buf = append(buf, `\x`...)
     304           0 :                 buf = append(buf, lowerhex[b>>4])
     305           0 :                 buf = append(buf, lowerhex[b&0xF])
     306             :         }
     307           1 :         s.Write(buf)
     308             : }

Generated by: LCOV version 1.14