Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "encoding/binary"
11 : "fmt"
12 :
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/binfmt"
16 : "github.com/cockroachdb/pebble/internal/invariants"
17 : "github.com/cockroachdb/pebble/internal/keyspan"
18 : "github.com/cockroachdb/pebble/internal/treeprinter"
19 : )
20 :
21 : // the keyspan header encodes a 32-bit count of the number of unique boundary
22 : // user keys in the block.
23 : const keyspanHeaderSize = 4
24 :
25 : // keyspan block column indexes
26 : const (
27 : // Columns with 1 row per unique boundary user key contained within the
28 : // block (with the count indicated via the keyspan custom block header).
29 : keyspanColBoundaryUserKeys = 0
30 : keyspanColBoundaryKeyIndices = 1
31 : // Columns with 1 row per keyspan.Key (with the count indicated via the
32 : // columnar header's row count).
33 : keyspanColTrailers = 2
34 : keyspanColSuffixes = 3
35 : keyspanColValues = 4
36 : keyspanColumnCount = 5
37 : )
38 :
39 : // A KeyspanBlockWriter writes keyspan blocks. See the colblk package
40 : // documentation for more details on the schema.
41 : type KeyspanBlockWriter struct {
42 : equal base.Equal
43 :
44 : // boundary columns
45 : boundaryUserKeys RawBytesBuilder
46 : boundaryKeyIndexes UintBuilder
47 :
48 : // keyspan.Key columns
49 : trailers UintBuilder
50 : suffixes RawBytesBuilder
51 : values RawBytesBuilder
52 :
53 : enc blockEncoder
54 : keyCount int
55 : unsafeLastUserKey []byte
56 : }
57 :
58 : // Init initializes a keyspan block writer.
59 0 : func (w *KeyspanBlockWriter) Init(equal base.Equal) {
60 0 : w.equal = equal
61 0 : w.boundaryUserKeys.Init()
62 0 : w.boundaryKeyIndexes.Init()
63 0 : w.trailers.Init()
64 0 : w.suffixes.Init()
65 0 : w.values.Init()
66 0 : w.keyCount = 0
67 0 : w.unsafeLastUserKey = nil
68 0 : }
69 :
70 : // Reset resets the keyspan block writer to an empty state, retaining memory for
71 : // reuse.
72 0 : func (w *KeyspanBlockWriter) Reset() {
73 0 : w.boundaryUserKeys.Reset()
74 0 : w.boundaryKeyIndexes.Reset()
75 0 : w.trailers.Reset()
76 0 : w.suffixes.Reset()
77 0 : w.values.Reset()
78 0 : w.enc.reset()
79 0 : w.keyCount = 0
80 0 : w.unsafeLastUserKey = nil
81 0 : }
82 :
83 : // AddSpan appends a new Span to the pending block. Spans must already be
84 : // fragmented (non-overlapping) and added in sorted order.
85 0 : func (w *KeyspanBlockWriter) AddSpan(s keyspan.Span) {
86 0 : // When keyspans are fragmented, abutting spans share a user key. One span's
87 0 : // end key is the next span's start key. Check if the previous user key
88 0 : // equals this span's start key, and avoid encoding it again if so.
89 0 : if w.unsafeLastUserKey == nil || !w.equal(w.unsafeLastUserKey, s.Start) {
90 0 : w.boundaryKeyIndexes.Set(w.boundaryUserKeys.rows, uint64(w.keyCount))
91 0 : w.boundaryUserKeys.Put(s.Start)
92 0 : }
93 : // The end key must be strictly greater than the start key and spans are
94 : // already sorted, so the end key is guaranteed to not be present in the
95 : // column yet. We need to encode it.
96 0 : w.boundaryKeyIndexes.Set(w.boundaryUserKeys.rows, uint64(w.keyCount+len(s.Keys)))
97 0 : w.boundaryUserKeys.Put(s.End)
98 0 :
99 0 : // Hold on to a slice of the copy of s.End we just added to the bytes
100 0 : // builder so that we can compare it to the next span's start key.
101 0 : w.unsafeLastUserKey = w.boundaryUserKeys.data[len(w.boundaryUserKeys.data)-len(s.End):]
102 0 :
103 0 : // Encode each keyspan.Key in the span.
104 0 : for i := range s.Keys {
105 0 : w.trailers.Set(w.keyCount, uint64(s.Keys[i].Trailer))
106 0 : w.suffixes.Put(s.Keys[i].Suffix)
107 0 : w.values.Put(s.Keys[i].Value)
108 0 : w.keyCount++
109 0 : }
110 : }
111 :
112 : // KeyCount returns the count of keyspan.Keys written to the writer.
113 0 : func (w *KeyspanBlockWriter) KeyCount() int {
114 0 : return w.keyCount
115 0 : }
116 :
117 : // UnsafeBoundaryKeys returns the smallest and largest keys written to the
118 : // keyspan block so far. The returned internal keys have user keys that point
119 : // directly into the block writer's memory and must not be mutated.
120 0 : func (w *KeyspanBlockWriter) UnsafeBoundaryKeys() (smallest, largest base.InternalKey) {
121 0 : if w.keyCount == 0 {
122 0 : return smallest, largest
123 0 : }
124 0 : smallest.UserKey = w.boundaryUserKeys.UnsafeGet(0)
125 0 : smallest.Trailer = base.InternalKeyTrailer(w.trailers.Get(0))
126 0 : largest.UserKey = w.boundaryUserKeys.UnsafeGet(w.boundaryUserKeys.rows - 1)
127 0 : largest.Trailer = base.MakeTrailer(base.SeqNumMax,
128 0 : base.InternalKeyTrailer(w.trailers.Get(w.keyCount-1)).Kind())
129 0 : return smallest, largest
130 : }
131 :
132 : // Size returns the size of the pending block.
133 0 : func (w *KeyspanBlockWriter) Size() int {
134 0 : off := blockHeaderSize(keyspanColumnCount, keyspanHeaderSize)
135 0 : // Span boundary columns (with userKeyCount elements).
136 0 : off = w.boundaryUserKeys.Size(w.boundaryUserKeys.rows, off)
137 0 : off = w.boundaryKeyIndexes.Size(w.boundaryUserKeys.rows, off)
138 0 :
139 0 : // keyspan.Key columns (with keyCount elements).
140 0 : off = w.trailers.Size(w.keyCount, off)
141 0 : off = w.suffixes.Size(w.keyCount, off)
142 0 : off = w.values.Size(w.keyCount, off)
143 0 : off++ // trailing padding
144 0 : return int(off)
145 0 : }
146 :
147 : // Finish finalizes the pending block and returns the encoded block.
148 0 : func (w *KeyspanBlockWriter) Finish() []byte {
149 0 : w.enc.init(w.Size(), Header{
150 0 : Version: Version1,
151 0 : Columns: keyspanColumnCount,
152 0 : Rows: uint32(w.keyCount),
153 0 : }, keyspanHeaderSize)
154 0 :
155 0 : // The keyspan block has a 4-byte custom header used to encode the number of
156 0 : // user keys encoded within the user key and start indices columns. All
157 0 : // other columns have the number of rows indicated by the shared columnar
158 0 : // block header.
159 0 : binary.LittleEndian.PutUint32(w.enc.data()[:keyspanHeaderSize], uint32(w.boundaryUserKeys.rows))
160 0 :
161 0 : // Columns with userKeyCount elements.
162 0 : w.enc.encode(w.boundaryUserKeys.rows, &w.boundaryUserKeys)
163 0 : w.enc.encode(w.boundaryUserKeys.rows, &w.boundaryKeyIndexes)
164 0 : // Columns with keyCount elements.
165 0 : w.enc.encode(w.keyCount, &w.trailers)
166 0 : w.enc.encode(w.keyCount, &w.suffixes)
167 0 : w.enc.encode(w.keyCount, &w.values)
168 0 : return w.enc.finish()
169 0 : }
170 :
171 : // String returns a string representation of the pending block's state.
172 0 : func (w *KeyspanBlockWriter) String() string {
173 0 : var buf bytes.Buffer
174 0 : size := uint32(w.Size())
175 0 : fmt.Fprintf(&buf, "size=%d:\n", size)
176 0 :
177 0 : fmt.Fprint(&buf, "0: user keys: ")
178 0 : w.boundaryUserKeys.WriteDebug(&buf, w.boundaryUserKeys.rows)
179 0 : fmt.Fprintln(&buf)
180 0 : fmt.Fprint(&buf, "1: start indices: ")
181 0 : w.boundaryKeyIndexes.WriteDebug(&buf, w.boundaryUserKeys.rows)
182 0 : fmt.Fprintln(&buf)
183 0 :
184 0 : fmt.Fprint(&buf, "2: trailers: ")
185 0 : w.trailers.WriteDebug(&buf, w.keyCount)
186 0 : fmt.Fprintln(&buf)
187 0 : fmt.Fprint(&buf, "3: suffixes: ")
188 0 : w.suffixes.WriteDebug(&buf, w.keyCount)
189 0 : fmt.Fprintln(&buf)
190 0 : fmt.Fprint(&buf, "4: values: ")
191 0 : w.values.WriteDebug(&buf, w.keyCount)
192 0 : fmt.Fprintln(&buf)
193 0 :
194 0 : return buf.String()
195 0 : }
196 :
197 : // A KeyspanReader exposes facilities for reading a keyspan block. A
198 : // KeyspanReader is safe for concurrent use and may be used by multiple
199 : // KeyspanIters concurrently.
200 : type KeyspanReader struct {
201 : blockReader BlockReader
202 : // Span boundary columns with boundaryKeysCount elements.
203 : boundaryKeysCount uint32
204 : boundaryKeys RawBytes
205 : boundaryKeyIndices UnsafeUints
206 :
207 : // keyspan.Key columns with blockReader.header.Rows elements.
208 : trailers UnsafeUints
209 : suffixes RawBytes
210 : values RawBytes
211 : }
212 :
213 : // Init initializes the keyspan reader with the given block data.
214 0 : func (r *KeyspanReader) Init(data []byte) {
215 0 : r.boundaryKeysCount = binary.LittleEndian.Uint32(data[:4])
216 0 : r.blockReader.Init(data, keyspanHeaderSize)
217 0 : // The boundary key columns have a different number of rows than the other
218 0 : // columns, so we call DecodeColumn directly, taking care to pass in
219 0 : // rows=r.boundaryKeysCount.
220 0 : r.boundaryKeys = DecodeColumn(&r.blockReader, keyspanColBoundaryUserKeys,
221 0 : int(r.boundaryKeysCount), DataTypeBytes, DecodeRawBytes)
222 0 : r.boundaryKeyIndices = DecodeColumn(&r.blockReader, keyspanColBoundaryKeyIndices,
223 0 : int(r.boundaryKeysCount), DataTypeUint, DecodeUnsafeUints)
224 0 :
225 0 : r.trailers = r.blockReader.Uints(keyspanColTrailers)
226 0 : r.suffixes = r.blockReader.RawBytes(keyspanColSuffixes)
227 0 : r.values = r.blockReader.RawBytes(keyspanColValues)
228 0 : }
229 :
230 : // DebugString prints a human-readable explanation of the keyspan block's binary
231 : // representation.
232 0 : func (r *KeyspanReader) DebugString() string {
233 0 : f := binfmt.New(r.blockReader.data).LineWidth(20)
234 0 : r.Describe(f)
235 0 : return f.String()
236 0 : }
237 :
238 : // Describe describes the binary format of the keyspan block, assuming
239 : // f.Offset() is positioned at the beginning of the same keyspan block described
240 : // by r.
241 0 : func (r *KeyspanReader) Describe(f *binfmt.Formatter) {
242 0 : // Set the relative offset. When loaded into memory, the beginning of blocks
243 0 : // are aligned. Padding that ensures alignment is done relative to the
244 0 : // current offset. Setting the relative offset ensures that if we're
245 0 : // describing this block within a larger structure (eg, f.Offset()>0), we
246 0 : // compute padding appropriately assuming the current byte f.Offset() is
247 0 : // aligned.
248 0 : f.SetAnchorOffset()
249 0 :
250 0 : f.CommentLine("keyspan block header")
251 0 : f.HexBytesln(4, "user key count: %d", r.boundaryKeysCount)
252 0 : r.blockReader.headerToBinFormatter(f)
253 0 :
254 0 : for i := 0; i < keyspanColumnCount; i++ {
255 0 : // Not all columns in a keyspan block have the same number of rows; the
256 0 : // boundary columns columns are different (and their lengths are held in
257 0 : // the keyspan block header that precedes the ordinary columnar block
258 0 : // header).
259 0 : rows := int(r.blockReader.header.Rows)
260 0 : if i == keyspanColBoundaryUserKeys || i == keyspanColBoundaryKeyIndices {
261 0 : rows = int(r.boundaryKeysCount)
262 0 : }
263 0 : r.blockReader.columnToBinFormatter(f, i, rows)
264 : }
265 0 : f.HexBytesln(1, "block padding byte")
266 : }
267 :
268 : // searchBoundaryKeys returns the index of the first boundary key greater than
269 : // or equal to key and whether or not the key was found exactly.
270 0 : func (r *KeyspanReader) searchBoundaryKeys(cmp base.Compare, key []byte) (index int, equal bool) {
271 0 : i, j := 0, int(r.boundaryKeysCount)
272 0 : for i < j {
273 0 : h := int(uint(i+j) >> 1) // avoid overflow when computing h
274 0 : // i ≤ h < j
275 0 : switch cmp(key, r.boundaryKeys.At(h)) {
276 0 : case +1:
277 0 : i = h + 1
278 0 : case 0:
279 0 : return h, true
280 0 : default:
281 0 : // -1
282 0 : j = h
283 : }
284 : }
285 0 : return i, false
286 : }
287 :
288 : // A KeyspanIter is an iterator over a keyspan block. It implements the
289 : // keyspan.FragmentIterator interface.
290 : type KeyspanIter struct {
291 : r *KeyspanReader
292 : cmp base.Compare
293 : span keyspan.Span
294 : // When positioned, the current span's start key is the user key at
295 : // i.r.userKeys.At(i.startBoundIndex)
296 : // and the current span's end key is the user key at
297 : // i.r.userKeys.At(i.startBoundIndex+1)
298 : startBoundIndex int
299 : keyBuf [2]keyspan.Key
300 : }
301 :
302 : // Assert that KeyspanIter implements the FragmentIterator interface.
303 : var _ keyspan.FragmentIterator = (*KeyspanIter)(nil)
304 :
305 : // Init initializes the iterator with the given comparison function and keyspan
306 : // reader.
307 0 : func (i *KeyspanIter) Init(cmp base.Compare, r *KeyspanReader) {
308 0 : i.r = r
309 0 : i.cmp = cmp
310 0 : i.span.Start, i.span.End = nil, nil
311 0 : i.startBoundIndex = -1
312 0 : if i.span.Keys == nil {
313 0 : i.span.Keys = i.keyBuf[:0]
314 0 : }
315 : }
316 :
317 : // SeekGE moves the iterator to the first span covering a key greater than
318 : // or equal to the given key. This is equivalent to seeking to the first
319 : // span with an end key greater than the given key.
320 0 : func (i *KeyspanIter) SeekGE(key []byte) (*keyspan.Span, error) {
321 0 : // Seek among the boundary keys.
322 0 : j, eq := i.r.searchBoundaryKeys(i.cmp, key)
323 0 : // If the found boundary key does not exactly equal the given key, it's
324 0 : // strictly greater than key. We need to back up one to consider the span
325 0 : // that ends at the this boundary key.
326 0 : if !eq {
327 0 : j = max(j-1, 0)
328 0 : }
329 0 : return i.gatherKeysForward(j), nil
330 : }
331 :
332 : // SeekLT moves the iterator to the last span covering a key less than the
333 : // given key. This is equivalent to seeking to the last span with a start
334 : // key less than the given key.
335 0 : func (i *KeyspanIter) SeekLT(key []byte) (*keyspan.Span, error) {
336 0 : // Seek among the boundary keys.
337 0 : j, eq := i.r.searchBoundaryKeys(i.cmp, key)
338 0 : // If eq is true, the found boundary key exactly equals the given key. A
339 0 : // span that starts at exactly [key] does not contain any keys strictly less
340 0 : // than [key], so back up one index.
341 0 : if eq {
342 0 : j--
343 0 : }
344 : // If all boundaries are less than [key], or only the last boundary is
345 : // greater than the key, then we want the last span so we clamp the index to
346 : // the second to last boundary.
347 0 : return i.gatherKeysBackward(min(j, int(i.r.boundaryKeysCount)-2)), nil
348 : }
349 :
350 : // First moves the iterator to the first span.
351 0 : func (i *KeyspanIter) First() (*keyspan.Span, error) {
352 0 : return i.gatherKeysForward(0), nil
353 0 : }
354 :
355 : // Last moves the iterator to the last span.
356 0 : func (i *KeyspanIter) Last() (*keyspan.Span, error) {
357 0 : return i.gatherKeysBackward(int(i.r.boundaryKeysCount) - 2), nil
358 0 : }
359 :
360 : // Next moves the iterator to the next span.
361 0 : func (i *KeyspanIter) Next() (*keyspan.Span, error) {
362 0 : return i.gatherKeysForward(i.startBoundIndex + 1), nil
363 0 : }
364 :
365 : // Prev moves the iterator to the previous span.
366 0 : func (i *KeyspanIter) Prev() (*keyspan.Span, error) {
367 0 : return i.gatherKeysBackward(max(i.startBoundIndex-1, -1)), nil
368 0 : }
369 :
370 : // gatherKeysForward returns the first non-empty Span in the forward direction,
371 : // starting with the span formed by using the boundary key at index
372 : // [startBoundIndex] as the span's start boundary.
373 0 : func (i *KeyspanIter) gatherKeysForward(startBoundIndex int) *keyspan.Span {
374 0 : if invariants.Enabled && startBoundIndex < 0 {
375 0 : panic(errors.AssertionFailedf("out of bounds: i.startBoundIndex=%d", startBoundIndex))
376 : }
377 0 : i.startBoundIndex = startBoundIndex
378 0 : if i.startBoundIndex >= int(i.r.boundaryKeysCount)-1 {
379 0 : return nil
380 0 : }
381 0 : if !i.isNonemptySpan(i.startBoundIndex) {
382 0 : if i.startBoundIndex == int(i.r.boundaryKeysCount)-2 {
383 0 : // Corruption error
384 0 : panic(base.CorruptionErrorf("keyspan block has empty span at end"))
385 : }
386 0 : i.startBoundIndex++
387 0 : if !i.isNonemptySpan(i.startBoundIndex) {
388 0 : panic(base.CorruptionErrorf("keyspan block has consecutive empty spans"))
389 : }
390 : }
391 0 : return i.materializeSpan()
392 : }
393 :
394 : // gatherKeysBackward returns the first non-empty Span in the backward direction,
395 : // starting with the span formed by using the boundary key at index
396 : // [startBoundIndex] as the span's start boundary.
397 0 : func (i *KeyspanIter) gatherKeysBackward(startBoundIndex int) *keyspan.Span {
398 0 : i.startBoundIndex = startBoundIndex
399 0 : if i.startBoundIndex < 0 {
400 0 : return nil
401 0 : }
402 0 : if invariants.Enabled && i.startBoundIndex >= int(i.r.boundaryKeysCount)-1 {
403 0 : panic(errors.AssertionFailedf("out of bounds: i.startBoundIndex=%d, i.r.boundaryKeysCount=%d",
404 0 : i.startBoundIndex, i.r.boundaryKeysCount))
405 : }
406 0 : if !i.isNonemptySpan(i.startBoundIndex) {
407 0 : if i.startBoundIndex == 0 {
408 0 : // Corruption error
409 0 : panic(base.CorruptionErrorf("keyspan block has empty span at beginning"))
410 : }
411 0 : i.startBoundIndex--
412 0 : if !i.isNonemptySpan(i.startBoundIndex) {
413 0 : panic(base.CorruptionErrorf("keyspan block has consecutive empty spans"))
414 : }
415 : }
416 0 : return i.materializeSpan()
417 : }
418 :
419 : // isNonemptySpan returns true if the span starting at i.startBoundIndex
420 : // contains keys.
421 0 : func (i *KeyspanIter) isNonemptySpan(startBoundIndex int) bool {
422 0 : return i.r.boundaryKeyIndices.At(startBoundIndex) < i.r.boundaryKeyIndices.At(startBoundIndex+1)
423 0 : }
424 :
425 : // materializeSpan constructs the current span from i.startBoundIndex and
426 : // i.{start,end}KeyIndex.
427 0 : func (i *KeyspanIter) materializeSpan() *keyspan.Span {
428 0 : i.span = keyspan.Span{
429 0 : Start: i.r.boundaryKeys.At(i.startBoundIndex),
430 0 : End: i.r.boundaryKeys.At(i.startBoundIndex + 1),
431 0 : Keys: i.span.Keys[:0],
432 0 : }
433 0 : startIndex := i.r.boundaryKeyIndices.At(i.startBoundIndex)
434 0 : endIndex := i.r.boundaryKeyIndices.At(i.startBoundIndex + 1)
435 0 : if cap(i.span.Keys) < int(endIndex-startIndex) {
436 0 : i.span.Keys = make([]keyspan.Key, 0, int(endIndex-startIndex))
437 0 : }
438 0 : for j := startIndex; j < endIndex; j++ {
439 0 : i.span.Keys = append(i.span.Keys, keyspan.Key{
440 0 : Trailer: base.InternalKeyTrailer(i.r.trailers.At(int(j))),
441 0 : Suffix: i.r.suffixes.At(int(j)),
442 0 : Value: i.r.values.At(int(j)),
443 0 : })
444 0 : }
445 0 : return &i.span
446 : }
447 :
448 : // Close closes the iterator.
449 0 : func (i *KeyspanIter) Close() {}
450 :
451 : // SetContext implements keyspan.FragmentIterator.
452 0 : func (i *KeyspanIter) SetContext(context.Context) {}
453 :
454 : // WrapChildren implements keyspan.FragmentIterator.
455 0 : func (i *KeyspanIter) WrapChildren(keyspan.WrapFn) {}
456 :
457 : // DebugTree is part of the FragmentIterator interface.
458 0 : func (i *KeyspanIter) DebugTree(tp treeprinter.Node) {
459 0 : tp.Childf("%T(%p)", i, i)
460 0 : }
|