Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package base
6 :
7 : import (
8 : "bytes"
9 : "encoding/binary"
10 : "fmt"
11 : "slices"
12 : "strconv"
13 : "unicode/utf8"
14 :
15 : "github.com/cockroachdb/errors"
16 : )
17 :
18 : // CompareSuffixes compares two key suffixes and returns -1, 0, or +1.
19 : //
20 : // The empty slice suffix must be 'less than' any non-empty suffix.
21 : //
22 : // A full key k is composed of a prefix k[:Split(k)] and suffix k[Split(k):].
23 : // Suffixes are compared to break ties between equal prefixes.
24 : type CompareSuffixes func(a, b []byte) int
25 :
26 : // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
27 : // to' or 'greater than' b.
28 : //
29 : // Both a and b must be valid keys.
30 : //
31 : // A key a is less than b if a's prefix is byte-wise less than b's prefix, or if
32 : // the prefixes are equal and a's suffix is less than b's suffix (according to
33 : // CompareSuffixes).
34 : //
35 : // In other words, if prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]:
36 : //
37 : // Compare(a, b) = bytes.Compare(prefix(a), prefix(b)) if not 0, or
38 : // CompareSuffixes(suffix(a), suffix(b)) otherwise.
39 : //
40 : // Compare defaults to using the formula above but it can be customized if there
41 : // is a (potentially faster) specialization.
42 : type Compare func(a, b []byte) int
43 :
44 : // defaultCompare implements Compare in terms of Split and CompareSuffixes, as
45 : // mentioned above.
46 1 : func defaultCompare(split Split, compareSuffixes CompareSuffixes, a, b []byte) int {
47 1 : an := split(a)
48 1 : bn := split(b)
49 1 : if prefixCmp := bytes.Compare(a[:an], b[:bn]); prefixCmp != 0 {
50 1 : return prefixCmp
51 1 : }
52 1 : return compareSuffixes(a[an:], b[bn:])
53 : }
54 :
55 : // Equal returns true if a and b are equivalent.
56 : //
57 : // For a given Compare, Equal(a,b)=true iff Compare(a,b)=0; that is, Equal is a
58 : // (potentially faster) specialization of Compare.
59 : type Equal func(a, b []byte) bool
60 :
61 : // AbbreviatedKey returns a fixed length prefix of a user key such that
62 : //
63 : // AbbreviatedKey(a) < AbbreviatedKey(b) implies a < b, and
64 : // AbbreviatedKey(a) > AbbreviatedKey(b) implies a > b.
65 : //
66 : // If AbbreviatedKey(a) == AbbreviatedKey(b), an additional comparison is
67 : // required to determine if the two keys are actually equal.
68 : //
69 : // This helps optimize indexed batch comparisons for cache locality. If a Split
70 : // function is specified, AbbreviatedKey usually returns the first eight bytes
71 : // of the user key prefix in the order that gives the correct ordering.
72 : type AbbreviatedKey func(key []byte) uint64
73 :
74 : // FormatKey returns a formatter for the user key.
75 : type FormatKey func(key []byte) fmt.Formatter
76 :
77 : // DefaultFormatter is the default implementation of user key formatting:
78 : // non-ASCII data is formatted as escaped hexadecimal values.
79 1 : var DefaultFormatter FormatKey = func(key []byte) fmt.Formatter {
80 1 : return FormatBytes(key)
81 1 : }
82 :
83 : // FormatValue returns a formatter for the user value. The key is also specified
84 : // for the value formatter in order to support value formatting that is
85 : // dependent on the key.
86 : type FormatValue func(key, value []byte) fmt.Formatter
87 :
88 : // Separator is used to construct SSTable index blocks. A trivial implementation
89 : // is `return append(dst, a...)`, but appending fewer bytes leads to smaller
90 : // SSTables.
91 : //
92 : // Given keys a, b for which Compare(a, b) < 0, Separator produces a key k such
93 : // that:
94 : //
95 : // 1. Compare(a, k) <= 0, and
96 : // 2. Compare(k, b) < 0.
97 : //
98 : // For example, if a and b are the []byte equivalents of the strings "black" and
99 : // "blue", then the function may append "blb" to dst.
100 : type Separator func(dst, a, b []byte) []byte
101 :
102 : // Successor appends to dst a shortened key k given a key a such that
103 : // Compare(a, k) <= 0. A simple implementation may return a unchanged.
104 : // The appended key k must be valid to pass to Compare.
105 : type Successor func(dst, a []byte) []byte
106 :
107 : // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
108 : // appends to dst the smallest prefix key that is larger than the given prefix a.
109 : //
110 : // ImmediateSuccessor must generate a prefix key k such that:
111 : //
112 : // Split(k) == len(k) and Compare(a, k) < 0
113 : //
114 : // and there exists no representable prefix key k2 such that:
115 : //
116 : // Split(k2) == len(k2) and Compare(a, k2) < 0 and Compare(k2, k) < 0
117 : //
118 : // As an example, an implementation built on the natural byte ordering using
119 : // bytes.Compare could append a `\0` to `a`.
120 : //
121 : // The appended key must be valid to pass to Compare.
122 : type ImmediateSuccessor func(dst, a []byte) []byte
123 :
124 : // Split returns the length of the prefix of the user key that corresponds to
125 : // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
126 : // filters.
127 : //
128 : // The method will only ever be called with valid MVCC keys, that is, keys that
129 : // the user could potentially store in the database. Pebble does not know which
130 : // keys are MVCC keys and which are not, and may call Split on both MVCC keys
131 : // and non-MVCC keys.
132 : //
133 : // A trivial MVCC scheme is one in which Split() returns len(a). This
134 : // corresponds to assigning a constant version to each key in the database. For
135 : // performance reasons, it is preferable to use a `nil` split in this case.
136 : //
137 : // Let prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]. The following
138 : // properties must hold:
139 : //
140 : // 1. A key consisting of just a prefix must sort before all other keys with
141 : // that prefix:
142 : //
143 : // If len(suffix(a)) > 0, then Compare(prefix(a), a) < 0.
144 : //
145 : // 2. Prefixes must be used to order keys before suffixes:
146 : //
147 : // If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0.
148 : // If Compare(prefix(a), prefix(b)) < 0, then Compare(a, b) < 0
149 : //
150 : // 3. Suffixes themselves must be valid keys and comparable, respecting the same
151 : // ordering as within a key:
152 : //
153 : // If Compare(prefix(a), prefix(b)) = 0, then Compare(a, b) = Compare(suffix(a), suffix(b)).
154 : type Split func(a []byte) int
155 :
156 : // Prefix returns the prefix of the key k, using s to split the key.
157 1 : func (s Split) Prefix(k []byte) []byte {
158 1 : i := s(k)
159 1 : return k[:i:i]
160 1 : }
161 :
162 : // DefaultSplit is a trivial implementation of Split which always returns the
163 : // full key.
164 0 : var DefaultSplit Split = func(key []byte) int { return len(key) }
165 :
166 : // Comparer defines a total ordering over the space of []byte keys: a 'less
167 : // than' relationship.
168 : type Comparer struct {
169 : // The following must always be specified.
170 : AbbreviatedKey AbbreviatedKey
171 : Separator Separator
172 : Successor Successor
173 :
174 : // ImmediateSuccessor must be specified if range keys are used.
175 : ImmediateSuccessor ImmediateSuccessor
176 :
177 : // Split defaults to a trivial implementation that returns the full key length
178 : // if it is not specified.
179 : Split Split
180 :
181 : // CompareSuffixes defaults to bytes.Compare if it is not specified.
182 : CompareSuffixes CompareSuffixes
183 :
184 : // Compare defaults to a generic implementation that uses Split,
185 : // bytes.Compare, and CompareSuffixes if it is not specified.
186 : Compare Compare
187 : // Equal defaults to using Compare() == 0 if it is not specified.
188 : Equal Equal
189 : // FormatKey defaults to the DefaultFormatter if it is not specified.
190 : FormatKey FormatKey
191 :
192 : // FormatValue is optional.
193 : FormatValue FormatValue
194 :
195 : // Name is the name of the comparer.
196 : //
197 : // The on-disk format stores the comparer name, and opening a database with a
198 : // different comparer from the one it was created with will result in an
199 : // error.
200 : Name string
201 : }
202 :
203 : // EnsureDefaults ensures that all non-optional fields are set.
204 : //
205 : // If c is nil, returns DefaultComparer.
206 : //
207 : // If any fields need to be set, returns a modified copy of c.
208 1 : func (c *Comparer) EnsureDefaults() *Comparer {
209 1 : if c == nil {
210 0 : return DefaultComparer
211 0 : }
212 1 : if c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
213 0 : panic("invalid Comparer: mandatory field not set")
214 : }
215 1 : if c.CompareSuffixes != nil && c.Compare != nil && c.Equal != nil && c.Split != nil && c.FormatKey != nil {
216 1 : return c
217 1 : }
218 0 : n := &Comparer{}
219 0 : *n = *c
220 0 :
221 0 : if n.Split == nil {
222 0 : n.Split = DefaultSplit
223 0 : }
224 0 : if n.CompareSuffixes == nil && n.Compare == nil && n.Equal == nil {
225 0 : n.CompareSuffixes = bytes.Compare
226 0 : n.Compare = bytes.Compare
227 0 : n.Equal = bytes.Equal
228 0 : } else {
229 0 : if n.CompareSuffixes == nil {
230 0 : n.CompareSuffixes = bytes.Compare
231 0 : }
232 0 : if n.Compare == nil {
233 0 : n.Compare = func(a, b []byte) int {
234 0 : return defaultCompare(n.Split, n.CompareSuffixes, a, b)
235 0 : }
236 : }
237 0 : if n.Equal == nil {
238 0 : n.Equal = func(a, b []byte) bool {
239 0 : return n.Compare(a, b) == 0
240 0 : }
241 : }
242 : }
243 0 : if n.FormatKey == nil {
244 0 : n.FormatKey = DefaultFormatter
245 0 : }
246 0 : return n
247 : }
248 :
249 : // DefaultComparer is the default implementation of the Comparer interface.
250 : // It uses the natural ordering, consistent with bytes.Compare.
251 : var DefaultComparer = &Comparer{
252 : CompareSuffixes: bytes.Compare,
253 : Compare: bytes.Compare,
254 : Equal: bytes.Equal,
255 :
256 1 : AbbreviatedKey: func(key []byte) uint64 {
257 1 : if len(key) >= 8 {
258 1 : return binary.BigEndian.Uint64(key)
259 1 : }
260 1 : var v uint64
261 1 : for _, b := range key {
262 1 : v <<= 8
263 1 : v |= uint64(b)
264 1 : }
265 1 : return v << uint(8*(8-len(key)))
266 : },
267 :
268 : Split: DefaultSplit,
269 :
270 : FormatKey: DefaultFormatter,
271 :
272 1 : Separator: func(dst, a, b []byte) []byte {
273 1 : i, n := SharedPrefixLen(a, b), len(dst)
274 1 : dst = append(dst, a...)
275 1 :
276 1 : min := len(a)
277 1 : if min > len(b) {
278 1 : min = len(b)
279 1 : }
280 1 : if i >= min {
281 1 : // Do not shorten if one string is a prefix of the other.
282 1 : return dst
283 1 : }
284 :
285 1 : if a[i] >= b[i] {
286 0 : // b is smaller than a or a is already the shortest possible.
287 0 : return dst
288 0 : }
289 :
290 1 : if i < len(b)-1 || a[i]+1 < b[i] {
291 1 : i += n
292 1 : dst[i]++
293 1 : return dst[:i+1]
294 1 : }
295 :
296 0 : i += n + 1
297 0 : for ; i < len(dst); i++ {
298 0 : if dst[i] != 0xff {
299 0 : dst[i]++
300 0 : return dst[:i+1]
301 0 : }
302 : }
303 0 : return dst
304 : },
305 :
306 1 : Successor: func(dst, a []byte) (ret []byte) {
307 1 : for i := 0; i < len(a); i++ {
308 1 : if a[i] != 0xff {
309 1 : dst = append(dst, a[:i+1]...)
310 1 : dst[len(dst)-1]++
311 1 : return dst
312 1 : }
313 : }
314 : // a is a run of 0xffs, leave it alone.
315 0 : return append(dst, a...)
316 : },
317 :
318 0 : ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
319 0 : return append(append(dst, a...), 0x00)
320 0 : },
321 :
322 : // This name is part of the C++ Level-DB implementation's default file
323 : // format, and should not be changed.
324 : Name: "leveldb.BytewiseComparator",
325 : }
326 :
327 : // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
328 : // This function can be useful in implementing the Comparer interface.
329 1 : func SharedPrefixLen(a, b []byte) int {
330 1 : i, n := 0, len(a)
331 1 : if n > len(b) {
332 1 : n = len(b)
333 1 : }
334 1 : asUint64 := func(c []byte, i int) uint64 {
335 1 : return binary.LittleEndian.Uint64(c[i:])
336 1 : }
337 1 : for i < n-7 && asUint64(a, i) == asUint64(b, i) {
338 1 : i += 8
339 1 : }
340 1 : for i < n && a[i] == b[i] {
341 1 : i++
342 1 : }
343 1 : return i
344 : }
345 :
346 : // MinUserKey returns the smaller of two user keys. If one of the keys is nil,
347 : // the other one is returned.
348 1 : func MinUserKey(cmp Compare, a, b []byte) []byte {
349 1 : if a != nil && (b == nil || cmp(a, b) < 0) {
350 1 : return a
351 1 : }
352 1 : return b
353 : }
354 :
355 : // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
356 : // data.
357 : type FormatBytes []byte
358 :
359 : const lowerhex = "0123456789abcdef"
360 :
361 : // Format implements the fmt.Formatter interface.
362 1 : func (p FormatBytes) Format(s fmt.State, c rune) {
363 1 : buf := make([]byte, 0, len(p))
364 1 : for _, b := range p {
365 1 : if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
366 1 : buf = append(buf, b)
367 1 : continue
368 : }
369 0 : buf = append(buf, `\x`...)
370 0 : buf = append(buf, lowerhex[b>>4])
371 0 : buf = append(buf, lowerhex[b&0xF])
372 : }
373 1 : s.Write(buf)
374 : }
375 :
376 : // MakeAssertComparer creates a Comparer that is the same with the given
377 : // Comparer except that it asserts that the Compare and Equal functions adhere
378 : // to their specifications.
379 1 : func MakeAssertComparer(c Comparer) Comparer {
380 1 : return Comparer{
381 1 : Compare: func(a []byte, b []byte) int {
382 1 : res := c.Compare(a, b)
383 1 : // Verify that Compare is consistent with the default implementation.
384 1 : if expected := defaultCompare(c.Split, c.CompareSuffixes, a, b); res != expected {
385 0 : panic(AssertionFailedf("%s: Compare(%s, %s)=%d, expected %d",
386 0 : c.Name, c.FormatKey(a), c.FormatKey(b), res, expected))
387 : }
388 1 : return res
389 : },
390 :
391 1 : Equal: func(a []byte, b []byte) bool {
392 1 : eq := c.Equal(a, b)
393 1 : // Verify that Equal is consistent with Compare.
394 1 : if expected := c.Compare(a, b); eq != (expected == 0) {
395 0 : panic("Compare and Equal are not consistent")
396 : }
397 1 : return eq
398 : },
399 :
400 : // TODO(radu): add more checks.
401 : CompareSuffixes: c.CompareSuffixes,
402 : AbbreviatedKey: c.AbbreviatedKey,
403 : Separator: c.Separator,
404 : Successor: c.Successor,
405 : ImmediateSuccessor: c.ImmediateSuccessor,
406 : FormatKey: c.FormatKey,
407 : Split: c.Split,
408 : FormatValue: c.FormatValue,
409 : Name: c.Name,
410 : }
411 : }
412 :
413 : // CheckComparer is a mini test suite that verifies a comparer implementation.
414 : //
415 : // It takes lists of valid prefixes and suffixes. It is recommended that both
416 : // lists have at least three elements.
417 0 : func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error {
418 0 : // Empty slice is always a valid suffix.
419 0 : suffixes = append(suffixes, nil)
420 0 :
421 0 : // Verify the suffixes have a consistent ordering.
422 0 : slices.SortFunc(suffixes, c.CompareSuffixes)
423 0 : if !slices.IsSortedFunc(suffixes, c.CompareSuffixes) {
424 0 : return errors.Errorf("CompareSuffixes is inconsistent")
425 0 : }
426 :
427 : // Check the split function.
428 0 : for _, p := range prefixes {
429 0 : for _, s := range suffixes {
430 0 : key := slices.Concat(p, s)
431 0 : if n := c.Split(key); n != len(p) {
432 0 : return errors.Errorf("incorrect Split result %d on '%x' (prefix '%x' suffix '%x')", n, key, p, s)
433 0 : }
434 : }
435 : }
436 :
437 : // Check the Compare/Equals functions on all possible combinations.
438 0 : for _, ap := range prefixes {
439 0 : for _, as := range suffixes {
440 0 : a := slices.Concat(ap, as)
441 0 : for _, bp := range prefixes {
442 0 : for _, bs := range suffixes {
443 0 : b := slices.Concat(bp, bs)
444 0 : result := c.Compare(a, b)
445 0 :
446 0 : expected := bytes.Compare(ap, bp)
447 0 : if expected == 0 {
448 0 : expected = c.CompareSuffixes(as, bs)
449 0 : }
450 :
451 0 : if (result == 0) != c.Equal(a, b) {
452 0 : return errors.Errorf("Equal(%s, %s) doesn't agree with Compare", c.FormatKey(a), c.FormatKey(b))
453 0 : }
454 :
455 0 : if result != expected {
456 0 : return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, expected)
457 0 : }
458 : }
459 : }
460 : }
461 : }
462 :
463 : // TODO(radu): check more methods.
464 0 : return nil
465 : }
|