Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package base
6 :
7 : import (
8 : "bytes"
9 : "encoding/binary"
10 : "fmt"
11 : "strconv"
12 : "unicode/utf8"
13 : )
14 :
15 : // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
16 : // to' or 'greater than' b. The empty slice must be 'less than' any non-empty
17 : // slice.
18 : //
19 : // Compare is used to compare user keys, such as those passed as arguments to
20 : // the various DB methods, as well as those returned from Separator, Successor,
21 : // and Split.
22 : type Compare func(a, b []byte) int
23 :
24 : // Equal returns true if a and b are equivalent.
25 : //
26 : // For a given Compare, Equal(a,b)=true iff Compare(a,b)=0; that is, Equal is a
27 : // (potentially faster) specialization of Compare.
28 : type Equal func(a, b []byte) bool
29 :
30 : // AbbreviatedKey returns a fixed length prefix of a user key such that
31 : //
32 : // AbbreviatedKey(a) < AbbreviatedKey(b) implies a < b, and
33 : // AbbreviatedKey(a) > AbbreviatedKey(b) implies a > b.
34 : //
35 : // If AbbreviatedKey(a) == AbbreviatedKey(b), an additional comparison is
36 : // required to determine if the two keys are actually equal.
37 : //
38 : // This helps optimize indexed batch comparisons for cache locality. If a Split
39 : // function is specified, AbbreviatedKey usually returns the first eight bytes
40 : // of the user key prefix in the order that gives the correct ordering.
41 : type AbbreviatedKey func(key []byte) uint64
42 :
43 : // FormatKey returns a formatter for the user key.
44 : type FormatKey func(key []byte) fmt.Formatter
45 :
46 : // DefaultFormatter is the default implementation of user key formatting:
47 : // non-ASCII data is formatted as escaped hexadecimal values.
48 1 : var DefaultFormatter FormatKey = func(key []byte) fmt.Formatter {
49 1 : return FormatBytes(key)
50 1 : }
51 :
52 : // FormatValue returns a formatter for the user value. The key is also specified
53 : // for the value formatter in order to support value formatting that is
54 : // dependent on the key.
55 : type FormatValue func(key, value []byte) fmt.Formatter
56 :
57 : // Separator is used to construct SSTable index blocks. A trivial implementation
58 : // is `return a`, but appending fewer bytes leads to smaller SSTables.
59 : //
60 : // Given keys a, b for which Compare(a, b) < 0, Separator returns a key k such
61 : // that:
62 : //
63 : // 1. Compare(a, k) <= 0, and
64 : // 2. Compare(k, b) < 0.
65 : //
66 : // As a special case, b may be nil in which case the second condition is dropped.
67 : //
68 : // For example, if dst, a and b are the []byte equivalents of the strings
69 : // "aqua", "black" and "blue", then the result may be "aquablb".
70 : // Similarly, if the arguments were "aqua", "green" and "", then the result
71 : // may be "aquah".
72 : type Separator func(dst, a, b []byte) []byte
73 :
74 : // Successor returns a shortened key given a key a, such that Compare(k, a) >=
75 : // 0. A simple implementation may return a unchanged. The dst parameter may be
76 : // used to store the returned key, though it is valid to pass nil. The returned
77 : // key must be valid to pass to Compare.
78 : type Successor func(dst, a []byte) []byte
79 :
80 : // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
81 : // returns the smallest prefix key that is larger than the given prefix a.
82 : //
83 : // ImmediateSuccessor must return a prefix key k such that:
84 : //
85 : // Split(k) == len(k) and Compare(k, a) > 0
86 : //
87 : // and there exists no representable k2 such that:
88 : //
89 : // Split(k2) == len(k2) and Compare(k2, a) > 0 and Compare(k2, k) < 0
90 : //
91 : // As an example, an implementation built on the natural byte ordering using
92 : // bytes.Compare could append a `\0` to `a`.
93 : //
94 : // The dst parameter may be used to store the returned key, though it is valid
95 : // to pass nil. The returned key must be valid to pass to Compare.
96 : type ImmediateSuccessor func(dst, a []byte) []byte
97 :
98 : // Split returns the length of the prefix of the user key that corresponds to
99 : // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
100 : // filters.
101 : //
102 : // The method will only ever be called with valid MVCC keys, that is, keys that
103 : // the user could potentially store in the database. Pebble does not know which
104 : // keys are MVCC keys and which are not, and may call Split on both MVCC keys
105 : // and non-MVCC keys.
106 : //
107 : // A trivial MVCC scheme is one in which Split() returns len(a). This
108 : // corresponds to assigning a constant version to each key in the database. For
109 : // performance reasons, it is preferable to use a `nil` split in this case.
110 : //
111 : // The returned prefix must have the following properties:
112 : //
113 : // 1. The prefix must be a byte prefix:
114 : //
115 : // bytes.HasPrefix(a, prefix(a))
116 : //
117 : // 2. A key consisting of just a prefix must sort before all other keys with
118 : // that prefix:
119 : //
120 : // Compare(prefix(a), a) < 0 if len(suffix(a)) > 0
121 : //
122 : // 3. Prefixes must be used to order keys before suffixes:
123 : //
124 : // If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0
125 : //
126 : // 4. Suffixes themselves must be valid keys and comparable, respecting the same
127 : // ordering as within a key.
128 : //
129 : // If Compare(prefix(a), prefix(b)) == 0, then Compare(suffix(a), suffix(b)) == Compare(a, b)
130 : type Split func(a []byte) int
131 :
132 : // DefaultSplit is a trivial implementation of Split which always returns the
133 : // full key.
134 1 : var DefaultSplit Split = func(key []byte) int { return len(key) }
135 :
136 : // Comparer defines a total ordering over the space of []byte keys: a 'less
137 : // than' relationship.
138 : type Comparer struct {
139 : // These fields must always be specified.
140 : Compare Compare
141 : AbbreviatedKey AbbreviatedKey
142 : Separator Separator
143 : Successor Successor
144 :
145 : // ImmediateSuccessor must be specified if range keys are used.
146 : ImmediateSuccessor ImmediateSuccessor
147 :
148 : // Equal defaults to using Compare() == 0 if it is not specified.
149 : Equal Equal
150 : // FormatKey defaults to the DefaultFormatter if it is not specified.
151 : FormatKey FormatKey
152 : // Split defaults to a trivial implementation that returns the full key length
153 : // if it is not specified.
154 : Split Split
155 :
156 : // FormatValue is optional.
157 : FormatValue FormatValue
158 :
159 : // Name is the name of the comparer.
160 : //
161 : // The on-disk format stores the comparer name, and opening a database with a
162 : // different comparer from the one it was created with will result in an
163 : // error.
164 : Name string
165 : }
166 :
167 : // EnsureDefaults ensures that all non-optional fields are set.
168 : //
169 : // If c is nil, returns DefaultComparer.
170 : //
171 : // If any fields need to be set, returns a modified copy of c.
172 1 : func (c *Comparer) EnsureDefaults() *Comparer {
173 1 : if c == nil {
174 1 : return DefaultComparer
175 1 : }
176 1 : if c.Compare == nil || c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
177 0 : panic("invalid Comparer: mandatory field not set")
178 : }
179 1 : if c.Equal != nil && c.Split != nil && c.FormatKey != nil {
180 1 : return c
181 1 : }
182 1 : n := &Comparer{}
183 1 : *n = *c
184 1 : if n.Equal == nil {
185 1 : cmp := n.Compare
186 1 : n.Equal = func(a, b []byte) bool {
187 0 : return cmp(a, b) == 0
188 0 : }
189 : }
190 1 : if n.Split == nil {
191 1 : n.Split = DefaultSplit
192 1 : }
193 1 : if n.FormatKey == nil {
194 1 : n.FormatKey = DefaultFormatter
195 1 : }
196 1 : return n
197 : }
198 :
199 : // DefaultComparer is the default implementation of the Comparer interface.
200 : // It uses the natural ordering, consistent with bytes.Compare.
201 : var DefaultComparer = &Comparer{
202 : Compare: bytes.Compare,
203 : Equal: bytes.Equal,
204 :
205 1 : AbbreviatedKey: func(key []byte) uint64 {
206 1 : if len(key) >= 8 {
207 1 : return binary.BigEndian.Uint64(key)
208 1 : }
209 1 : var v uint64
210 1 : for _, b := range key {
211 1 : v <<= 8
212 1 : v |= uint64(b)
213 1 : }
214 1 : return v << uint(8*(8-len(key)))
215 : },
216 :
217 : Split: DefaultSplit,
218 :
219 : FormatKey: DefaultFormatter,
220 :
221 1 : Separator: func(dst, a, b []byte) []byte {
222 1 : i, n := SharedPrefixLen(a, b), len(dst)
223 1 : dst = append(dst, a...)
224 1 :
225 1 : min := len(a)
226 1 : if min > len(b) {
227 1 : min = len(b)
228 1 : }
229 1 : if i >= min {
230 1 : // Do not shorten if one string is a prefix of the other.
231 1 : return dst
232 1 : }
233 :
234 1 : if a[i] >= b[i] {
235 1 : // b is smaller than a or a is already the shortest possible.
236 1 : return dst
237 1 : }
238 :
239 1 : if i < len(b)-1 || a[i]+1 < b[i] {
240 1 : i += n
241 1 : dst[i]++
242 1 : return dst[:i+1]
243 1 : }
244 :
245 1 : i += n + 1
246 1 : for ; i < len(dst); i++ {
247 1 : if dst[i] != 0xff {
248 1 : dst[i]++
249 1 : return dst[:i+1]
250 1 : }
251 : }
252 1 : return dst
253 : },
254 :
255 1 : Successor: func(dst, a []byte) (ret []byte) {
256 1 : for i := 0; i < len(a); i++ {
257 1 : if a[i] != 0xff {
258 1 : dst = append(dst, a[:i+1]...)
259 1 : dst[len(dst)-1]++
260 1 : return dst
261 1 : }
262 : }
263 : // a is a run of 0xffs, leave it alone.
264 1 : return append(dst, a...)
265 : },
266 :
267 0 : ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
268 0 : return append(append(dst, a...), 0x00)
269 0 : },
270 :
271 : // This name is part of the C++ Level-DB implementation's default file
272 : // format, and should not be changed.
273 : Name: "leveldb.BytewiseComparator",
274 : }
275 :
276 : // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
277 : // This function can be useful in implementing the Comparer interface.
278 1 : func SharedPrefixLen(a, b []byte) int {
279 1 : i, n := 0, len(a)
280 1 : if n > len(b) {
281 1 : n = len(b)
282 1 : }
283 1 : asUint64 := func(c []byte, i int) uint64 {
284 1 : return binary.LittleEndian.Uint64(c[i:])
285 1 : }
286 1 : for i < n-7 && asUint64(a, i) == asUint64(b, i) {
287 1 : i += 8
288 1 : }
289 1 : for i < n && a[i] == b[i] {
290 1 : i++
291 1 : }
292 1 : return i
293 : }
294 :
295 : // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
296 : // data.
297 : type FormatBytes []byte
298 :
299 : const lowerhex = "0123456789abcdef"
300 :
301 : // Format implements the fmt.Formatter interface.
302 1 : func (p FormatBytes) Format(s fmt.State, c rune) {
303 1 : buf := make([]byte, 0, len(p))
304 1 : for _, b := range p {
305 1 : if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
306 1 : buf = append(buf, b)
307 1 : continue
308 : }
309 1 : buf = append(buf, `\x`...)
310 1 : buf = append(buf, lowerhex[b>>4])
311 1 : buf = append(buf, lowerhex[b&0xF])
312 : }
313 1 : s.Write(buf)
314 : }
|