Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package block
6 :
7 : import (
8 : "bytes"
9 : "encoding/binary"
10 : "fmt"
11 :
12 : "github.com/cespare/xxhash/v2"
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/crc"
16 : )
17 :
18 : // Handle is the file offset and length of a block.
19 : type Handle struct {
20 : Offset, Length uint64
21 : }
22 :
23 : // EncodeVarints encodes the block handle into dst using a variable-width
24 : // encoding and returns the number of bytes written.
25 2 : func (h Handle) EncodeVarints(dst []byte) int {
26 2 : n := binary.PutUvarint(dst, h.Offset)
27 2 : m := binary.PutUvarint(dst[n:], h.Length)
28 2 : return n + m
29 2 : }
30 :
31 : // HandleWithProperties is used for data blocks and first/lower level index
32 : // blocks, since they can be annotated using BlockPropertyCollectors.
33 : type HandleWithProperties struct {
34 : Handle
35 : Props []byte
36 : }
37 :
38 : // EncodeVarints encodes the block handle and properties into dst using a
39 : // variable-width encoding and returns the number of bytes written.
40 2 : func (h HandleWithProperties) EncodeVarints(dst []byte) []byte {
41 2 : n := h.Handle.EncodeVarints(dst)
42 2 : dst = append(dst[:n], h.Props...)
43 2 : return dst
44 2 : }
45 :
46 : // DecodeHandle returns the block handle encoded in a variable-width encoding at
47 : // the start of src, as well as the number of bytes it occupies. It returns zero
48 : // if given invalid input. A block handle for a data block or a first/lower
49 : // level index block should not be decoded using DecodeHandle since the caller
50 : // may validate that the number of bytes decoded is equal to the length of src,
51 : // which will be false if the properties are not decoded. In those cases the
52 : // caller should use DecodeHandleWithProperties.
53 2 : func DecodeHandle(src []byte) (Handle, int) {
54 2 : offset, n := binary.Uvarint(src)
55 2 : length, m := binary.Uvarint(src[n:])
56 2 : if n == 0 || m == 0 {
57 0 : return Handle{}, 0
58 0 : }
59 2 : return Handle{Offset: offset, Length: length}, n + m
60 : }
61 :
62 : // DecodeHandleWithProperties returns the block handle and properties encoded in
63 : // a variable-width encoding at the start of src. src needs to be exactly the
64 : // length that was encoded. This method must be used for data block and
65 : // first/lower level index blocks. The properties in the block handle point to
66 : // the bytes in src.
67 2 : func DecodeHandleWithProperties(src []byte) (HandleWithProperties, error) {
68 2 : bh, n := DecodeHandle(src)
69 2 : if n == 0 {
70 0 : return HandleWithProperties{}, errors.Errorf("invalid block.Handle")
71 0 : }
72 2 : return HandleWithProperties{
73 2 : Handle: bh,
74 2 : Props: src[n:],
75 2 : }, nil
76 : }
77 :
78 : // TrailerLen is the length of the trailer at the end of a block.
79 : const TrailerLen = 5
80 :
81 : // Trailer is the trailer at the end of a block, encoding the block type
82 : // (compression) and a checksum.
83 : type Trailer = [TrailerLen]byte
84 :
85 : // MakeTrailer constructs a trailer from a block type and a checksum.
86 2 : func MakeTrailer(blockType byte, checksum uint32) (t Trailer) {
87 2 : t[0] = blockType
88 2 : binary.LittleEndian.PutUint32(t[1:5], checksum)
89 2 : return t
90 2 : }
91 :
92 : // ChecksumType specifies the checksum used for blocks.
93 : type ChecksumType byte
94 :
95 : // The available checksum types. These values are part of the durable format and
96 : // should not be changed.
97 : const (
98 : ChecksumTypeNone ChecksumType = 0
99 : ChecksumTypeCRC32c ChecksumType = 1
100 : ChecksumTypeXXHash ChecksumType = 2
101 : ChecksumTypeXXHash64 ChecksumType = 3
102 : )
103 :
104 : // String implements fmt.Stringer.
105 1 : func (t ChecksumType) String() string {
106 1 : switch t {
107 1 : case ChecksumTypeCRC32c:
108 1 : return "crc32c"
109 0 : case ChecksumTypeNone:
110 0 : return "none"
111 0 : case ChecksumTypeXXHash:
112 0 : return "xxhash"
113 0 : case ChecksumTypeXXHash64:
114 0 : return "xxhash64"
115 0 : default:
116 0 : panic(errors.Newf("sstable: unknown checksum type: %d", t))
117 : }
118 : }
119 :
120 : // A Checksummer calculates checksums for blocks.
121 : type Checksummer struct {
122 : Type ChecksumType
123 : xxHasher *xxhash.Digest
124 : }
125 :
126 : // Checksum computes a checksum over the provided block and block type.
127 2 : func (c *Checksummer) Checksum(block []byte, blockType []byte) (checksum uint32) {
128 2 : // Calculate the checksum.
129 2 : switch c.Type {
130 2 : case ChecksumTypeCRC32c:
131 2 : checksum = crc.New(block).Update(blockType).Value()
132 1 : case ChecksumTypeXXHash64:
133 1 : if c.xxHasher == nil {
134 1 : c.xxHasher = xxhash.New()
135 1 : } else {
136 1 : c.xxHasher.Reset()
137 1 : }
138 1 : c.xxHasher.Write(block)
139 1 : c.xxHasher.Write(blockType)
140 1 : checksum = uint32(c.xxHasher.Sum64())
141 0 : default:
142 0 : panic(errors.Newf("unsupported checksum type: %d", c.Type))
143 : }
144 2 : return checksum
145 : }
146 :
147 : // DataBlockIterator is a type constraint for implementations of block iterators
148 : // over data blocks. It's currently satisifed by the *rowblk.Iter type.
149 : //
150 : // DataBlockIterator requires that the type be a pointer to its type parameter,
151 : // D, to allow sstable iterators embed the block iterator within its struct. See
152 : // this example from the Go generics proposal:
153 : // https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#pointer-method-example
154 : type DataBlockIterator[D any] interface {
155 : base.InternalIterator
156 :
157 : // Handle returns the handle to the block.
158 : Handle() BufferHandle
159 : // InitHandle initializes the block from the provided buffer handle.
160 : InitHandle(base.Compare, base.Split, BufferHandle, IterTransforms) error
161 : // Valid returns true if the iterator is currently positioned at a valid KV.
162 : Valid() bool
163 : // KV returns the key-value pair at the current iterator position. The
164 : // iterator must be Valid().
165 : KV() *base.InternalKV
166 : // ResetForReuse resets the iterator so that it may be used for iteration
167 : // over a new block. It returns the non-pointer D type to allow resetting
168 : // while initializing the containing struct, eg::
169 : // iter = sstableIter{dataBlockIter: iter.dataBlockIter.ResetForReuse()}
170 : ResetForReuse() D
171 : // FirstUserKey returns the first user key contained within the data block.
172 : FirstUserKey() []byte
173 : // Invalidate invalidates the block iterator, removing references to the block
174 : // it was initialized with.
175 : Invalidate()
176 : // IsDataInvalidated returns true when the iterator has been invalidated
177 : // using an Invalidate call.
178 : //
179 : // NB: this is different from Valid which indicates whether the current *KV*
180 : // is valid.
181 : IsDataInvalidated() bool
182 :
183 : *D // non-interface type constraint element
184 : }
185 :
186 : // IndexBlockIterator is a type constraint for implementations of block
187 : // iterators over index blocks. It's currently satisifed by the
188 : // *rowblk.IndexIter type.
189 : //
190 : // IndexBlockIterator requires that the type be a pointer to its type parameter,
191 : // I, to allow sstable iterators embed the block iterator within its struct. See
192 : // this example from the Go generics proposal:
193 : // https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#pointer-method-example
194 : type IndexBlockIterator[I any] interface {
195 : // InitHandle initializes an iterator from the provided block handle.
196 : InitHandle(base.Compare, base.Split, BufferHandle, IterTransforms) error
197 : // ResetForReuse resets the index iterator for reuse, retaining buffers to
198 : // avoid future allocations.
199 : ResetForReuse() I
200 : // Valid returns true if the iterator is currently positioned at a valid
201 : // block handle.
202 : Valid() bool
203 : // IsDataInvalidated returns true when the iterator has been invalidated
204 : // using an Invalidate call.
205 : //
206 : // NB: this is different from Valid which indicates whether the iterator is
207 : // currently positioned over a valid block entry.
208 : IsDataInvalidated() bool
209 : // Invalidate invalidates the block iterator, removing references to the
210 : // block it was initialized with.
211 : Invalidate()
212 : // Handle returns the underlying block buffer handle, if the iterator was
213 : // initialized with one.
214 : Handle() BufferHandle
215 : // Separator returns the separator at the iterator's current position. The
216 : // iterator must be positioned at a valid row. A Separator is a user key
217 : // guaranteed to be greater than or equal to every key contained within the
218 : // referenced block(s).
219 : Separator() []byte
220 : // BlockHandleWithProperties decodes the block handle with any encoded
221 : // properties at the iterator's current position.
222 : BlockHandleWithProperties() (HandleWithProperties, error)
223 : // SeekGE seeks the index iterator to the first block entry with a separator
224 : // key greater or equal to the given key. If it returns true, the iterator
225 : // is positioned over the first block that might contain the key [key], and
226 : // following blocks have keys ≥ Separator(). It returns false if the seek
227 : // key is greater than all index block separators.
228 : SeekGE(key []byte) bool
229 : // First seeks index iterator to the first block entry. It returns false if
230 : // the index block is empty.
231 : First() bool
232 : // Last seeks index iterator to the last block entry. It returns false if
233 : // the index block is empty.
234 : Last() bool
235 : // Next steps the index iterator to the next block entry. It returns false
236 : // if the index block is exhausted.
237 : Next() bool
238 : // Prev steps the index iterator to the previous block entry. It returns
239 : // false if the index block is exhausted.
240 : Prev() bool
241 : // Close closes the iterator, releasing any resources it holds.
242 : Close() error
243 :
244 : *I // non-interface type constraint element
245 : }
246 :
247 : // IterTransforms allow on-the-fly transformation of data at iteration time.
248 : //
249 : // These transformations could in principle be implemented as block transforms
250 : // (at least for non-virtual sstables), but applying them during iteration is
251 : // preferable.
252 : type IterTransforms struct {
253 : SyntheticSeqNum SyntheticSeqNum
254 : HideObsoletePoints bool
255 : SyntheticPrefix SyntheticPrefix
256 : SyntheticSuffix SyntheticSuffix
257 : }
258 :
259 : // NoTransforms is the default value for IterTransforms.
260 : var NoTransforms = IterTransforms{}
261 :
262 : // FragmentIterTransforms allow on-the-fly transformation of range deletion or
263 : // range key data at iteration time.
264 : type FragmentIterTransforms struct {
265 : SyntheticSeqNum SyntheticSeqNum
266 : // ElideSameSeqNum, if true, returns only the first-occurring (in forward
267 : // order) keyspan.Key for each sequence number.
268 : ElideSameSeqNum bool
269 : SyntheticPrefix SyntheticPrefix
270 : SyntheticSuffix SyntheticSuffix
271 : }
272 :
273 : // NoFragmentTransforms is the default value for IterTransforms.
274 : var NoFragmentTransforms = FragmentIterTransforms{}
275 :
276 : // SyntheticSeqNum is used to override all sequence numbers in a table. It is
277 : // set to a non-zero value when the table was created externally and ingested
278 : // whole.
279 : type SyntheticSeqNum base.SeqNum
280 :
281 : // NoSyntheticSeqNum is the default zero value for SyntheticSeqNum, which
282 : // disables overriding the sequence number.
283 : const NoSyntheticSeqNum SyntheticSeqNum = 0
284 :
285 : // SyntheticSuffix will replace every suffix of every point key surfaced during
286 : // block iteration. A synthetic suffix can be used if:
287 : // 1. no two keys in the sst share the same prefix; and
288 : // 2. pebble.Compare(prefix + replacementSuffix, prefix + originalSuffix) < 0,
289 : // for all keys in the backing sst which have a suffix (i.e. originalSuffix
290 : // is not empty).
291 : //
292 : // Range dels are not supported when synthetic suffix is used.
293 : //
294 : // For range keys, the synthetic suffix applies to the suffix that is part of
295 : // RangeKeySet - if it is non-empty, it is replaced with the SyntheticSuffix.
296 : // RangeKeyUnset keys are not supported when a synthetic suffix is used.
297 : type SyntheticSuffix []byte
298 :
299 : // IsSet returns true if the synthetic suffix is not enpty.
300 2 : func (ss SyntheticSuffix) IsSet() bool {
301 2 : return len(ss) > 0
302 2 : }
303 :
304 : // SyntheticPrefix represents a byte slice that is implicitly prepended to every
305 : // key in a file being read or accessed by a reader. Note that the table is
306 : // assumed to contain "prefix-less" keys that become full keys when prepended
307 : // with the synthetic prefix. The table's bloom filters are constructed only on
308 : // the "prefix-less" keys in the table, but interactions with the file including
309 : // seeks and reads, will all behave as if the file had been constructed from
310 : // keys that did include the prefix. Note that all Compare operations may act on
311 : // a prefix-less key as the synthetic prefix will never modify key metadata
312 : // stored in the key suffix.
313 : //
314 : // NB: Since this transformation currently only applies to point keys, a block
315 : // with range keys cannot be iterated over with a synthetic prefix.
316 : type SyntheticPrefix []byte
317 :
318 : // IsSet returns true if the synthetic prefix is not enpty.
319 2 : func (sp SyntheticPrefix) IsSet() bool {
320 2 : return len(sp) > 0
321 2 : }
322 :
323 : // Apply prepends the synthetic prefix to a key.
324 2 : func (sp SyntheticPrefix) Apply(key []byte) []byte {
325 2 : res := make([]byte, 0, len(sp)+len(key))
326 2 : res = append(res, sp...)
327 2 : res = append(res, key...)
328 2 : return res
329 2 : }
330 :
331 : // Invert removes the synthetic prefix from a key.
332 2 : func (sp SyntheticPrefix) Invert(key []byte) []byte {
333 2 : res, ok := bytes.CutPrefix(key, sp)
334 2 : if !ok {
335 0 : panic(fmt.Sprintf("unexpected prefix: %s", key))
336 : }
337 2 : return res
338 : }
|