Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package block
6 :
7 : import (
8 : "bytes"
9 : "encoding/binary"
10 : "fmt"
11 :
12 : "github.com/cespare/xxhash/v2"
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/crc"
16 : )
17 :
18 : // Handle is the file offset and length of a block.
19 : type Handle struct {
20 : Offset, Length uint64
21 : }
22 :
23 : // EncodeVarints encodes the block handle into dst using a variable-width
24 : // encoding and returns the number of bytes written.
25 2 : func (h Handle) EncodeVarints(dst []byte) int {
26 2 : n := binary.PutUvarint(dst, h.Offset)
27 2 : m := binary.PutUvarint(dst[n:], h.Length)
28 2 : return n + m
29 2 : }
30 :
31 : // HandleWithProperties is used for data blocks and first/lower level index
32 : // blocks, since they can be annotated using BlockPropertyCollectors.
33 : type HandleWithProperties struct {
34 : Handle
35 : Props []byte
36 : }
37 :
38 : // EncodeVarints encodes the block handle and properties into dst using a
39 : // variable-width encoding and returns the number of bytes written.
40 2 : func (h HandleWithProperties) EncodeVarints(dst []byte) []byte {
41 2 : n := h.Handle.EncodeVarints(dst)
42 2 : dst = append(dst[:n], h.Props...)
43 2 : return dst
44 2 : }
45 :
46 : // DecodeHandle returns the block handle encoded in a variable-width encoding at
47 : // the start of src, as well as the number of bytes it occupies. It returns zero
48 : // if given invalid input. A block handle for a data block or a first/lower
49 : // level index block should not be decoded using DecodeHandle since the caller
50 : // may validate that the number of bytes decoded is equal to the length of src,
51 : // which will be false if the properties are not decoded. In those cases the
52 : // caller should use DecodeHandleWithProperties.
53 2 : func DecodeHandle(src []byte) (Handle, int) {
54 2 : offset, n := binary.Uvarint(src)
55 2 : length, m := binary.Uvarint(src[n:])
56 2 : if n == 0 || m == 0 {
57 0 : return Handle{}, 0
58 0 : }
59 2 : return Handle{Offset: offset, Length: length}, n + m
60 : }
61 :
62 : // DecodeHandleWithProperties returns the block handle and properties encoded in
63 : // a variable-width encoding at the start of src. src needs to be exactly the
64 : // length that was encoded. This method must be used for data block and
65 : // first/lower level index blocks. The properties in the block handle point to
66 : // the bytes in src.
67 2 : func DecodeHandleWithProperties(src []byte) (HandleWithProperties, error) {
68 2 : bh, n := DecodeHandle(src)
69 2 : if n == 0 {
70 0 : return HandleWithProperties{}, errors.Errorf("invalid block.Handle")
71 0 : }
72 2 : return HandleWithProperties{
73 2 : Handle: bh,
74 2 : Props: src[n:],
75 2 : }, nil
76 : }
77 :
78 : // TrailerLen is the length of the trailer at the end of a block.
79 : const TrailerLen = 5
80 :
81 : // Trailer is the trailer at the end of a block, encoding the block type
82 : // (compression) and a checksum.
83 : type Trailer = [TrailerLen]byte
84 :
85 : // MakeTrailer constructs a trailer from a block type and a checksum.
86 2 : func MakeTrailer(blockType byte, checksum uint32) (t Trailer) {
87 2 : t[0] = blockType
88 2 : binary.LittleEndian.PutUint32(t[1:5], checksum)
89 2 : return t
90 2 : }
91 :
92 : // ChecksumType specifies the checksum used for blocks.
93 : type ChecksumType byte
94 :
95 : // The available checksum types. These values are part of the durable format and
96 : // should not be changed.
97 : const (
98 : ChecksumTypeNone ChecksumType = 0
99 : ChecksumTypeCRC32c ChecksumType = 1
100 : ChecksumTypeXXHash ChecksumType = 2
101 : ChecksumTypeXXHash64 ChecksumType = 3
102 : )
103 :
104 : // String implements fmt.Stringer.
105 1 : func (t ChecksumType) String() string {
106 1 : switch t {
107 1 : case ChecksumTypeCRC32c:
108 1 : return "crc32c"
109 0 : case ChecksumTypeNone:
110 0 : return "none"
111 0 : case ChecksumTypeXXHash:
112 0 : return "xxhash"
113 0 : case ChecksumTypeXXHash64:
114 0 : return "xxhash64"
115 0 : default:
116 0 : panic(errors.Newf("sstable: unknown checksum type: %d", t))
117 : }
118 : }
119 :
120 : // A Checksummer calculates checksums for blocks.
121 : type Checksummer struct {
122 : Type ChecksumType
123 : xxHasher *xxhash.Digest
124 : }
125 :
126 : // Checksum computes a checksum over the provided block and block type.
127 2 : func (c *Checksummer) Checksum(block []byte, blockType []byte) (checksum uint32) {
128 2 : // Calculate the checksum.
129 2 : switch c.Type {
130 2 : case ChecksumTypeCRC32c:
131 2 : checksum = crc.New(block).Update(blockType).Value()
132 1 : case ChecksumTypeXXHash64:
133 1 : if c.xxHasher == nil {
134 1 : c.xxHasher = xxhash.New()
135 1 : } else {
136 1 : c.xxHasher.Reset()
137 1 : }
138 1 : c.xxHasher.Write(block)
139 1 : c.xxHasher.Write(blockType)
140 1 : checksum = uint32(c.xxHasher.Sum64())
141 0 : default:
142 0 : panic(errors.Newf("unsupported checksum type: %d", c.Type))
143 : }
144 2 : return checksum
145 : }
146 :
147 : // DataBlockIterator is a type constraint for implementations of block iterators
148 : // over data blocks. It's currently satisifed by the *rowblk.Iter type.
149 : type DataBlockIterator interface {
150 : base.InternalIterator
151 :
152 : // Handle returns the handle to the block.
153 : Handle() BufferHandle
154 : // InitHandle initializes the block from the provided buffer handle.
155 : InitHandle(base.Compare, base.Split, BufferHandle, IterTransforms) error
156 : // Valid returns true if the iterator is currently positioned at a valid KV.
157 : Valid() bool
158 : // KV returns the key-value pair at the current iterator position. The
159 : // iterator must be Valid().
160 : KV() *base.InternalKV
161 : // FirstUserKey returns the first user key contained within the data block.
162 : FirstUserKey() []byte
163 : // Invalidate invalidates the block iterator, removing references to the block
164 : // it was initialized with.
165 : Invalidate()
166 : // IsDataInvalidated returns true when the iterator has been invalidated
167 : // using an Invalidate call.
168 : //
169 : // NB: this is different from Valid which indicates whether the current *KV*
170 : // is valid.
171 : IsDataInvalidated() bool
172 : }
173 :
174 : // IndexBlockIterator is an interface for implementations of block
175 : // iterators over index blocks. It's currently satisifed by the
176 : // *rowblk.IndexIter type.
177 : type IndexBlockIterator interface {
178 : // Init initializes the block iterator from the provided block.
179 : Init(base.Compare, base.Split, []byte, IterTransforms) error
180 : // InitHandle initializes an iterator from the provided block handle.
181 : InitHandle(base.Compare, base.Split, BufferHandle, IterTransforms) error
182 : // Valid returns true if the iterator is currently positioned at a valid
183 : // block handle.
184 : Valid() bool
185 : // IsDataInvalidated returns true when the iterator has been invalidated
186 : // using an Invalidate call.
187 : //
188 : // NB: this is different from Valid which indicates whether the iterator is
189 : // currently positioned over a valid block entry.
190 : IsDataInvalidated() bool
191 : // Invalidate invalidates the block iterator, removing references to the
192 : // block it was initialized with.
193 : Invalidate()
194 : // Handle returns the underlying block buffer handle, if the iterator was
195 : // initialized with one.
196 : Handle() BufferHandle
197 : // Separator returns the separator at the iterator's current position. The
198 : // iterator must be positioned at a valid row. A Separator is a user key
199 : // guaranteed to be greater than or equal to every key contained within the
200 : // referenced block(s).
201 : Separator() []byte
202 : // BlockHandleWithProperties decodes the block handle with any encoded
203 : // properties at the iterator's current position.
204 : BlockHandleWithProperties() (HandleWithProperties, error)
205 : // SeekGE seeks the index iterator to the first block entry with a separator
206 : // key greater or equal to the given key. If it returns true, the iterator
207 : // is positioned over the first block that might contain the key [key], and
208 : // following blocks have keys ≥ Separator(). It returns false if the seek
209 : // key is greater than all index block separators.
210 : SeekGE(key []byte) bool
211 : // First seeks index iterator to the first block entry. It returns false if
212 : // the index block is empty.
213 : First() bool
214 : // Last seeks index iterator to the last block entry. It returns false if
215 : // the index block is empty.
216 : Last() bool
217 : // Next steps the index iterator to the next block entry. It returns false
218 : // if the index block is exhausted.
219 : Next() bool
220 : // Prev steps the index iterator to the previous block entry. It returns
221 : // false if the index block is exhausted.
222 : Prev() bool
223 : // Close closes the iterator, releasing any resources it holds.
224 : Close() error
225 : }
226 :
227 : // IterTransforms allow on-the-fly transformation of data at iteration time.
228 : //
229 : // These transformations could in principle be implemented as block transforms
230 : // (at least for non-virtual sstables), but applying them during iteration is
231 : // preferable.
232 : type IterTransforms struct {
233 : SyntheticSeqNum SyntheticSeqNum
234 : HideObsoletePoints bool
235 : SyntheticPrefix SyntheticPrefix
236 : SyntheticSuffix SyntheticSuffix
237 : }
238 :
239 : // NoTransforms is the default value for IterTransforms.
240 : var NoTransforms = IterTransforms{}
241 :
242 : // FragmentIterTransforms allow on-the-fly transformation of range deletion or
243 : // range key data at iteration time.
244 : type FragmentIterTransforms struct {
245 : SyntheticSeqNum SyntheticSeqNum
246 : // ElideSameSeqNum, if true, returns only the first-occurring (in forward
247 : // order) keyspan.Key for each sequence number.
248 : ElideSameSeqNum bool
249 : SyntheticPrefix SyntheticPrefix
250 : SyntheticSuffix SyntheticSuffix
251 : }
252 :
253 : // NoFragmentTransforms is the default value for IterTransforms.
254 : var NoFragmentTransforms = FragmentIterTransforms{}
255 :
256 : // SyntheticSeqNum is used to override all sequence numbers in a table. It is
257 : // set to a non-zero value when the table was created externally and ingested
258 : // whole.
259 : type SyntheticSeqNum base.SeqNum
260 :
261 : // NoSyntheticSeqNum is the default zero value for SyntheticSeqNum, which
262 : // disables overriding the sequence number.
263 : const NoSyntheticSeqNum SyntheticSeqNum = 0
264 :
265 : // SyntheticSuffix will replace every suffix of every point key surfaced during
266 : // block iteration. A synthetic suffix can be used if:
267 : // 1. no two keys in the sst share the same prefix; and
268 : // 2. pebble.Compare(prefix + replacementSuffix, prefix + originalSuffix) < 0,
269 : // for all keys in the backing sst which have a suffix (i.e. originalSuffix
270 : // is not empty).
271 : //
272 : // Range dels are not supported when synthetic suffix is used.
273 : //
274 : // For range keys, the synthetic suffix applies to the suffix that is part of
275 : // RangeKeySet - if it is non-empty, it is replaced with the SyntheticSuffix.
276 : // RangeKeyUnset keys are not supported when a synthetic suffix is used.
277 : type SyntheticSuffix []byte
278 :
279 : // IsSet returns true if the synthetic suffix is not enpty.
280 2 : func (ss SyntheticSuffix) IsSet() bool {
281 2 : return len(ss) > 0
282 2 : }
283 :
284 : // SyntheticPrefix represents a byte slice that is implicitly prepended to every
285 : // key in a file being read or accessed by a reader. Note that the table is
286 : // assumed to contain "prefix-less" keys that become full keys when prepended
287 : // with the synthetic prefix. The table's bloom filters are constructed only on
288 : // the "prefix-less" keys in the table, but interactions with the file including
289 : // seeks and reads, will all behave as if the file had been constructed from
290 : // keys that did include the prefix. Note that all Compare operations may act on
291 : // a prefix-less key as the synthetic prefix will never modify key metadata
292 : // stored in the key suffix.
293 : //
294 : // NB: Since this transformation currently only applies to point keys, a block
295 : // with range keys cannot be iterated over with a synthetic prefix.
296 : type SyntheticPrefix []byte
297 :
298 : // IsSet returns true if the synthetic prefix is not enpty.
299 2 : func (sp SyntheticPrefix) IsSet() bool {
300 2 : return len(sp) > 0
301 2 : }
302 :
303 : // Apply prepends the synthetic prefix to a key.
304 2 : func (sp SyntheticPrefix) Apply(key []byte) []byte {
305 2 : res := make([]byte, 0, len(sp)+len(key))
306 2 : res = append(res, sp...)
307 2 : res = append(res, key...)
308 2 : return res
309 2 : }
310 :
311 : // Invert removes the synthetic prefix from a key.
312 2 : func (sp SyntheticPrefix) Invert(key []byte) []byte {
313 2 : res, ok := bytes.CutPrefix(key, sp)
314 2 : if !ok {
315 0 : panic(fmt.Sprintf("unexpected prefix: %s", key))
316 : }
317 2 : return res
318 : }
|