Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package base
6 :
7 : import (
8 : "bytes"
9 : "encoding/binary"
10 : "fmt"
11 : "strconv"
12 : "unicode/utf8"
13 : )
14 :
15 : // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
16 : // to' or 'greater than' b. The two arguments can only be 'equal' if their
17 : // contents are exactly equal. Furthermore, the empty slice must be 'less than'
18 : // any non-empty slice. Compare is used to compare user keys, such as those
19 : // passed as arguments to the various DB methods, as well as those returned
20 : // from Separator, Successor, and Split.
21 : type Compare func(a, b []byte) int
22 :
23 : // Equal returns true if a and b are equivalent. For a given Compare,
24 : // Equal(a,b) must return true iff Compare(a,b) returns zero, that is,
25 : // Equal is a (potentially faster) specialization of Compare.
26 : type Equal func(a, b []byte) bool
27 :
28 : // AbbreviatedKey returns a fixed length prefix of a user key such that AbbreviatedKey(a)
29 : // < AbbreviatedKey(b) iff a < b and AbbreviatedKey(a) > AbbreviatedKey(b) iff a > b. If
30 : // AbbreviatedKey(a) == AbbreviatedKey(b) an additional comparison is required to
31 : // determine if the two keys are actually equal.
32 : //
33 : // This helps optimize indexed batch comparisons for cache locality. If a Split
34 : // function is specified, AbbreviatedKey usually returns the first eight bytes
35 : // of the user key prefix in the order that gives the correct ordering.
36 : type AbbreviatedKey func(key []byte) uint64
37 :
38 : // FormatKey returns a formatter for the user key.
39 : type FormatKey func(key []byte) fmt.Formatter
40 :
41 : // FormatValue returns a formatter for the user value. The key is also
42 : // specified for the value formatter in order to support value formatting that
43 : // is dependent on the key.
44 : type FormatValue func(key, value []byte) fmt.Formatter
45 :
46 : // Separator is used to construct SSTable index blocks. A trivial implementation
47 : // is `return a`, but appending fewer bytes leads to smaller SSTables.
48 : //
49 : // Given keys a, b for which Compare(a, b) < 0, Separator returns a key k such
50 : // that:
51 : //
52 : // 1. Compare(a, k) <= 0, and
53 : // 2. Compare(k, b) < 0.
54 : //
55 : // As a special case, b may be nil in which case the second condition is dropped.
56 : //
57 : // For example, if dst, a and b are the []byte equivalents of the strings
58 : // "aqua", "black" and "blue", then the result may be "aquablb".
59 : // Similarly, if the arguments were "aqua", "green" and "", then the result
60 : // may be "aquah".
61 : type Separator func(dst, a, b []byte) []byte
62 :
63 : // Successor returns a shortened key given a key a, such that Compare(k, a) >=
64 : // 0. A simple implementation may return a unchanged. The dst parameter may be
65 : // used to store the returned key, though it is valid to pass nil. The returned
66 : // key must be valid to pass to Compare.
67 : type Successor func(dst, a []byte) []byte
68 :
69 : // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
70 : // returns the smallest key that is larger than the given prefix a.
71 : // ImmediateSuccessor must return a prefix key k such that:
72 : //
73 : // Split(k) == len(k) and Compare(k, a) > 0
74 : //
75 : // and there exists no representable k2 such that:
76 : //
77 : // Split(k2) == len(k2) and Compare(k2, a) > 0 and Compare(k2, k) < 0
78 : //
79 : // As an example, an implementation built on the natural byte ordering using
80 : // bytes.Compare could append a `\0` to `a`.
81 : //
82 : // The dst parameter may be used to store the returned key, though it is valid
83 : // to pass nil. The returned key must be valid to pass to Compare.
84 : type ImmediateSuccessor func(dst, a []byte) []byte
85 :
86 : // Split returns the length of the prefix of the user key that corresponds to
87 : // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
88 : // filters.
89 : //
90 : // The method will only ever be called with valid MVCC keys, that is, keys that
91 : // the user could potentially store in the database. Pebble does not know which
92 : // keys are MVCC keys and which are not, and may call Split on both MVCC keys
93 : // and non-MVCC keys.
94 : //
95 : // A trivial MVCC scheme is one in which Split() returns len(a). This
96 : // corresponds to assigning a constant version to each key in the database. For
97 : // performance reasons, it is preferable to use a `nil` split in this case.
98 : //
99 : // The returned prefix must have the following properties:
100 : //
101 : // 1. The prefix must be a byte prefix:
102 : //
103 : // bytes.HasPrefix(a, prefix(a))
104 : //
105 : // 2. A key consisting of just a prefix must sort before all other keys with
106 : // that prefix:
107 : //
108 : // Compare(prefix(a), a) < 0 if len(suffix(a)) > 0
109 : //
110 : // 3. Prefixes must be used to order keys before suffixes:
111 : //
112 : // If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0
113 : //
114 : // 4. Suffixes themselves must be valid keys and comparable, respecting the same
115 : // ordering as within a key.
116 : //
117 : // If Compare(prefix(a), prefix(b)) == 0, then Compare(suffix(a), suffix(b)) == Compare(a, b)
118 : type Split func(a []byte) int
119 :
120 : // Comparer defines a total ordering over the space of []byte keys: a 'less
121 : // than' relationship.
122 : type Comparer struct {
123 : Compare Compare
124 : Equal Equal
125 : AbbreviatedKey AbbreviatedKey
126 : FormatKey FormatKey
127 : FormatValue FormatValue
128 : Separator Separator
129 : Split Split
130 : Successor Successor
131 : ImmediateSuccessor ImmediateSuccessor
132 :
133 : // Name is the name of the comparer.
134 : //
135 : // The Level-DB on-disk format stores the comparer name, and opening a
136 : // database with a different comparer from the one it was created with
137 : // will result in an error.
138 : Name string
139 : }
140 :
141 : // DefaultFormatter is the default implementation of user key formatting:
142 : // non-ASCII data is formatted as escaped hexadecimal values.
143 1 : var DefaultFormatter = func(key []byte) fmt.Formatter {
144 1 : return FormatBytes(key)
145 1 : }
146 :
147 : // DefaultComparer is the default implementation of the Comparer interface.
148 : // It uses the natural ordering, consistent with bytes.Compare.
149 : var DefaultComparer = &Comparer{
150 : Compare: bytes.Compare,
151 : Equal: bytes.Equal,
152 :
153 1 : AbbreviatedKey: func(key []byte) uint64 {
154 1 : if len(key) >= 8 {
155 1 : return binary.BigEndian.Uint64(key)
156 1 : }
157 1 : var v uint64
158 1 : for _, b := range key {
159 1 : v <<= 8
160 1 : v |= uint64(b)
161 1 : }
162 1 : return v << uint(8*(8-len(key)))
163 : },
164 :
165 : FormatKey: DefaultFormatter,
166 :
167 1 : Separator: func(dst, a, b []byte) []byte {
168 1 : i, n := SharedPrefixLen(a, b), len(dst)
169 1 : dst = append(dst, a...)
170 1 :
171 1 : min := len(a)
172 1 : if min > len(b) {
173 1 : min = len(b)
174 1 : }
175 1 : if i >= min {
176 1 : // Do not shorten if one string is a prefix of the other.
177 1 : return dst
178 1 : }
179 :
180 1 : if a[i] >= b[i] {
181 0 : // b is smaller than a or a is already the shortest possible.
182 0 : return dst
183 0 : }
184 :
185 1 : if i < len(b)-1 || a[i]+1 < b[i] {
186 1 : i += n
187 1 : dst[i]++
188 1 : return dst[:i+1]
189 1 : }
190 :
191 0 : i += n + 1
192 0 : for ; i < len(dst); i++ {
193 0 : if dst[i] != 0xff {
194 0 : dst[i]++
195 0 : return dst[:i+1]
196 0 : }
197 : }
198 0 : return dst
199 : },
200 :
201 1 : Successor: func(dst, a []byte) (ret []byte) {
202 1 : for i := 0; i < len(a); i++ {
203 1 : if a[i] != 0xff {
204 1 : dst = append(dst, a[:i+1]...)
205 1 : dst[len(dst)-1]++
206 1 : return dst
207 1 : }
208 : }
209 : // a is a run of 0xffs, leave it alone.
210 0 : return append(dst, a...)
211 : },
212 :
213 0 : ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
214 0 : return append(append(dst, a...), 0x00)
215 0 : },
216 :
217 : // This name is part of the C++ Level-DB implementation's default file
218 : // format, and should not be changed.
219 : Name: "leveldb.BytewiseComparator",
220 : }
221 :
222 : // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
223 : // This function can be useful in implementing the Comparer interface.
224 1 : func SharedPrefixLen(a, b []byte) int {
225 1 : i, n := 0, len(a)
226 1 : if n > len(b) {
227 1 : n = len(b)
228 1 : }
229 1 : asUint64 := func(c []byte, i int) uint64 {
230 1 : return binary.LittleEndian.Uint64(c[i:])
231 1 : }
232 1 : for i < n-7 && asUint64(a, i) == asUint64(b, i) {
233 0 : i += 8
234 0 : }
235 1 : for i < n && a[i] == b[i] {
236 1 : i++
237 1 : }
238 1 : return i
239 : }
240 :
241 : // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
242 : // data.
243 : type FormatBytes []byte
244 :
245 : const lowerhex = "0123456789abcdef"
246 :
247 : // Format implements the fmt.Formatter interface.
248 1 : func (p FormatBytes) Format(s fmt.State, c rune) {
249 1 : buf := make([]byte, 0, len(p))
250 1 : for _, b := range p {
251 1 : if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
252 1 : buf = append(buf, b)
253 1 : continue
254 : }
255 0 : buf = append(buf, `\x`...)
256 0 : buf = append(buf, lowerhex[b>>4])
257 0 : buf = append(buf, lowerhex[b&0xF])
258 : }
259 1 : s.Write(buf)
260 : }
|