Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "encoding/binary"
11 : "fmt"
12 :
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : "github.com/cockroachdb/pebble/internal/binfmt"
16 : "github.com/cockroachdb/pebble/internal/invariants"
17 : "github.com/cockroachdb/pebble/internal/keyspan"
18 : "github.com/cockroachdb/pebble/internal/treeprinter"
19 : )
20 :
21 : // the keyspan header encodes a 32-bit count of the number of unique boundary
22 : // user keys in the block.
23 : const keyspanHeaderSize = 4
24 :
25 : // keyspan block column indexes
26 : const (
27 : // Columns with 1 row per unique boundary user key contained within the
28 : // block (with the count indicated via the keyspan custom block header).
29 : keyspanColBoundaryUserKeys = 0
30 : keyspanColBoundaryKeyIndices = 1
31 : // Columns with 1 row per keyspan.Key (with the count indicated via the
32 : // columnar header's row count).
33 : keyspanColTrailers = 2
34 : keyspanColSuffixes = 3
35 : keyspanColValues = 4
36 : keyspanColumnCount = 5
37 : )
38 :
39 : // A KeyspanBlockWriter writes keyspan blocks. See the colblk package
40 : // documentation for more details on the schema.
41 : type KeyspanBlockWriter struct {
42 : equal base.Equal
43 :
44 : // boundary columns
45 : boundaryUserKeys RawBytesBuilder
46 : boundaryKeyIndexes UintBuilder
47 :
48 : // keyspan.Key columns
49 : trailers UintBuilder
50 : suffixes RawBytesBuilder
51 : values RawBytesBuilder
52 :
53 : enc blockEncoder
54 : keyCount int
55 : unsafeLastUserKey []byte
56 : }
57 :
58 : // Init initializes a keyspan block writer.
59 1 : func (w *KeyspanBlockWriter) Init(equal base.Equal) {
60 1 : w.equal = equal
61 1 : w.boundaryUserKeys.Init()
62 1 : w.boundaryKeyIndexes.Init()
63 1 : w.trailers.Init()
64 1 : w.suffixes.Init()
65 1 : w.values.Init()
66 1 : w.keyCount = 0
67 1 : w.unsafeLastUserKey = nil
68 1 : }
69 :
70 : // Reset resets the keyspan block writer to an empty state, retaining memory for
71 : // reuse.
72 1 : func (w *KeyspanBlockWriter) Reset() {
73 1 : w.boundaryUserKeys.Reset()
74 1 : w.boundaryKeyIndexes.Reset()
75 1 : w.trailers.Reset()
76 1 : w.suffixes.Reset()
77 1 : w.values.Reset()
78 1 : w.enc.reset()
79 1 : w.keyCount = 0
80 1 : w.unsafeLastUserKey = nil
81 1 : }
82 :
83 : // AddSpan appends a new Span to the pending block. Spans must already be
84 : // fragmented (non-overlapping) and added in sorted order.
85 1 : func (w *KeyspanBlockWriter) AddSpan(s keyspan.Span) {
86 1 : // When keyspans are fragmented, abutting spans share a user key. One span's
87 1 : // end key is the next span's start key. Check if the previous user key
88 1 : // equals this span's start key, and avoid encoding it again if so.
89 1 : if w.unsafeLastUserKey == nil || !w.equal(w.unsafeLastUserKey, s.Start) {
90 1 : w.boundaryKeyIndexes.Set(w.boundaryUserKeys.rows, uint64(w.keyCount))
91 1 : w.boundaryUserKeys.Put(s.Start)
92 1 : }
93 : // The end key must be strictly greater than the start key and spans are
94 : // already sorted, so the end key is guaranteed to not be present in the
95 : // column yet. We need to encode it.
96 1 : w.boundaryKeyIndexes.Set(w.boundaryUserKeys.rows, uint64(w.keyCount+len(s.Keys)))
97 1 : w.boundaryUserKeys.Put(s.End)
98 1 :
99 1 : // Hold on to a slice of the copy of s.End we just added to the bytes
100 1 : // builder so that we can compare it to the next span's start key.
101 1 : w.unsafeLastUserKey = w.boundaryUserKeys.data[len(w.boundaryUserKeys.data)-len(s.End):]
102 1 :
103 1 : // Encode each keyspan.Key in the span.
104 1 : for i := range s.Keys {
105 1 : w.trailers.Set(w.keyCount, uint64(s.Keys[i].Trailer))
106 1 : w.suffixes.Put(s.Keys[i].Suffix)
107 1 : w.values.Put(s.Keys[i].Value)
108 1 : w.keyCount++
109 1 : }
110 : }
111 :
112 : // KeyCount returns the count of keyspan.Keys written to the writer.
113 1 : func (w *KeyspanBlockWriter) KeyCount() int {
114 1 : return w.keyCount
115 1 : }
116 :
117 : // UnsafeBoundaryKeys returns the smallest and largest keys written to the
118 : // keyspan block so far. The returned internal keys have user keys that point
119 : // directly into the block writer's memory and must not be mutated.
120 1 : func (w *KeyspanBlockWriter) UnsafeBoundaryKeys() (smallest, largest base.InternalKey) {
121 1 : if w.keyCount == 0 {
122 0 : return smallest, largest
123 0 : }
124 1 : smallest.UserKey = w.boundaryUserKeys.UnsafeGet(0)
125 1 : smallest.Trailer = base.InternalKeyTrailer(w.trailers.Get(0))
126 1 : largest.UserKey = w.boundaryUserKeys.UnsafeGet(w.boundaryUserKeys.rows - 1)
127 1 : largest.Trailer = base.MakeTrailer(base.SeqNumMax,
128 1 : base.InternalKeyTrailer(w.trailers.Get(w.keyCount-1)).Kind())
129 1 : return smallest, largest
130 : }
131 :
132 : // Size returns the size of the pending block.
133 1 : func (w *KeyspanBlockWriter) Size() int {
134 1 : off := blockHeaderSize(keyspanColumnCount, keyspanHeaderSize)
135 1 : // Span boundary columns (with userKeyCount elements).
136 1 : off = w.boundaryUserKeys.Size(w.boundaryUserKeys.rows, off)
137 1 : off = w.boundaryKeyIndexes.Size(w.boundaryUserKeys.rows, off)
138 1 :
139 1 : // keyspan.Key columns (with keyCount elements).
140 1 : off = w.trailers.Size(w.keyCount, off)
141 1 : off = w.suffixes.Size(w.keyCount, off)
142 1 : off = w.values.Size(w.keyCount, off)
143 1 : off++ // trailing padding
144 1 : return int(off)
145 1 : }
146 :
147 : // Finish finalizes the pending block and returns the encoded block.
148 1 : func (w *KeyspanBlockWriter) Finish() []byte {
149 1 : w.enc.init(w.Size(), Header{
150 1 : Version: Version1,
151 1 : Columns: keyspanColumnCount,
152 1 : Rows: uint32(w.keyCount),
153 1 : }, keyspanHeaderSize)
154 1 :
155 1 : // The keyspan block has a 4-byte custom header used to encode the number of
156 1 : // user keys encoded within the user key and start indices columns. All
157 1 : // other columns have the number of rows indicated by the shared columnar
158 1 : // block header.
159 1 : binary.LittleEndian.PutUint32(w.enc.data()[:keyspanHeaderSize], uint32(w.boundaryUserKeys.rows))
160 1 :
161 1 : // Columns with userKeyCount elements.
162 1 : w.enc.encode(w.boundaryUserKeys.rows, &w.boundaryUserKeys)
163 1 : w.enc.encode(w.boundaryUserKeys.rows, &w.boundaryKeyIndexes)
164 1 : // Columns with keyCount elements.
165 1 : w.enc.encode(w.keyCount, &w.trailers)
166 1 : w.enc.encode(w.keyCount, &w.suffixes)
167 1 : w.enc.encode(w.keyCount, &w.values)
168 1 : return w.enc.finish()
169 1 : }
170 :
171 : // String returns a string representation of the pending block's state.
172 1 : func (w *KeyspanBlockWriter) String() string {
173 1 : var buf bytes.Buffer
174 1 : size := uint32(w.Size())
175 1 : fmt.Fprintf(&buf, "size=%d:\n", size)
176 1 :
177 1 : fmt.Fprint(&buf, "0: user keys: ")
178 1 : w.boundaryUserKeys.WriteDebug(&buf, w.boundaryUserKeys.rows)
179 1 : fmt.Fprintln(&buf)
180 1 : fmt.Fprint(&buf, "1: start indices: ")
181 1 : w.boundaryKeyIndexes.WriteDebug(&buf, w.boundaryUserKeys.rows)
182 1 : fmt.Fprintln(&buf)
183 1 :
184 1 : fmt.Fprint(&buf, "2: trailers: ")
185 1 : w.trailers.WriteDebug(&buf, w.keyCount)
186 1 : fmt.Fprintln(&buf)
187 1 : fmt.Fprint(&buf, "3: suffixes: ")
188 1 : w.suffixes.WriteDebug(&buf, w.keyCount)
189 1 : fmt.Fprintln(&buf)
190 1 : fmt.Fprint(&buf, "4: values: ")
191 1 : w.values.WriteDebug(&buf, w.keyCount)
192 1 : fmt.Fprintln(&buf)
193 1 :
194 1 : return buf.String()
195 1 : }
196 :
197 : // A KeyspanReader exposes facilities for reading a keyspan block. A
198 : // KeyspanReader is safe for concurrent use and may be used by multiple
199 : // KeyspanIters concurrently.
200 : type KeyspanReader struct {
201 : blockReader BlockReader
202 : // Span boundary columns with boundaryKeysCount elements.
203 : boundaryKeysCount uint32
204 : boundaryKeys RawBytes
205 : boundaryKeyIndices UnsafeUints
206 :
207 : // keyspan.Key columns with blockReader.header.Rows elements.
208 : trailers UnsafeUints
209 : suffixes RawBytes
210 : values RawBytes
211 : }
212 :
213 : // Init initializes the keyspan reader with the given block data.
214 1 : func (r *KeyspanReader) Init(data []byte) {
215 1 : r.boundaryKeysCount = binary.LittleEndian.Uint32(data[:4])
216 1 : r.blockReader.Init(data, keyspanHeaderSize)
217 1 : // The boundary key columns have a different number of rows than the other
218 1 : // columns, so we call DecodeColumn directly, taking care to pass in
219 1 : // rows=r.boundaryKeysCount.
220 1 : r.boundaryKeys = DecodeColumn(&r.blockReader, keyspanColBoundaryUserKeys,
221 1 : int(r.boundaryKeysCount), DataTypeBytes, DecodeRawBytes)
222 1 : r.boundaryKeyIndices = DecodeColumn(&r.blockReader, keyspanColBoundaryKeyIndices,
223 1 : int(r.boundaryKeysCount), DataTypeUint, DecodeUnsafeUints)
224 1 :
225 1 : r.trailers = r.blockReader.Uints(keyspanColTrailers)
226 1 : r.suffixes = r.blockReader.RawBytes(keyspanColSuffixes)
227 1 : r.values = r.blockReader.RawBytes(keyspanColValues)
228 1 : }
229 :
230 : // DebugString prints a human-readable explanation of the keyspan block's binary
231 : // representation.
232 1 : func (r *KeyspanReader) DebugString() string {
233 1 : f := binfmt.New(r.blockReader.data).LineWidth(20)
234 1 : r.Describe(f)
235 1 : return f.String()
236 1 : }
237 :
238 : // Describe describes the binary format of the keyspan block, assuming
239 : // f.Offset() is positioned at the beginning of the same keyspan block described
240 : // by r.
241 1 : func (r *KeyspanReader) Describe(f *binfmt.Formatter) {
242 1 : // Set the relative offset. When loaded into memory, the beginning of blocks
243 1 : // are aligned. Padding that ensures alignment is done relative to the
244 1 : // current offset. Setting the relative offset ensures that if we're
245 1 : // describing this block within a larger structure (eg, f.Offset()>0), we
246 1 : // compute padding appropriately assuming the current byte f.Offset() is
247 1 : // aligned.
248 1 : f.SetAnchorOffset()
249 1 :
250 1 : f.CommentLine("keyspan block header")
251 1 : f.HexBytesln(4, "user key count: %d", r.boundaryKeysCount)
252 1 : r.blockReader.headerToBinFormatter(f)
253 1 :
254 1 : for i := 0; i < keyspanColumnCount; i++ {
255 1 : // Not all columns in a keyspan block have the same number of rows; the
256 1 : // boundary columns columns are different (and their lengths are held in
257 1 : // the keyspan block header that precedes the ordinary columnar block
258 1 : // header).
259 1 : rows := int(r.blockReader.header.Rows)
260 1 : if i == keyspanColBoundaryUserKeys || i == keyspanColBoundaryKeyIndices {
261 1 : rows = int(r.boundaryKeysCount)
262 1 : }
263 1 : r.blockReader.columnToBinFormatter(f, i, rows)
264 : }
265 1 : f.HexBytesln(1, "block padding byte")
266 : }
267 :
268 : // searchBoundaryKeys returns the index of the first boundary key greater than
269 : // or equal to key and whether or not the key was found exactly.
270 1 : func (r *KeyspanReader) searchBoundaryKeys(cmp base.Compare, key []byte) (index int, equal bool) {
271 1 : i, j := 0, int(r.boundaryKeysCount)
272 1 : for i < j {
273 1 : h := int(uint(i+j) >> 1) // avoid overflow when computing h
274 1 : // i ≤ h < j
275 1 : switch cmp(key, r.boundaryKeys.At(h)) {
276 1 : case +1:
277 1 : i = h + 1
278 1 : case 0:
279 1 : return h, true
280 1 : default:
281 1 : // -1
282 1 : j = h
283 : }
284 : }
285 1 : return i, false
286 : }
287 :
288 : // A KeyspanIter is an iterator over a keyspan block. It implements the
289 : // keyspan.FragmentIterator interface.
290 : type KeyspanIter struct {
291 : r *KeyspanReader
292 : cmp base.Compare
293 : span keyspan.Span
294 : // When positioned, the current span's start key is the user key at
295 : // i.r.userKeys.At(i.startBoundIndex)
296 : // and the current span's end key is the user key at
297 : // i.r.userKeys.At(i.startBoundIndex+1)
298 : startBoundIndex int
299 : keyBuf [2]keyspan.Key
300 : }
301 :
302 : // Assert that KeyspanIter implements the FragmentIterator interface.
303 : var _ keyspan.FragmentIterator = (*KeyspanIter)(nil)
304 :
305 : // Init initializes the iterator with the given comparison function and keyspan
306 : // reader.
307 1 : func (i *KeyspanIter) Init(cmp base.Compare, r *KeyspanReader) {
308 1 : i.r = r
309 1 : i.cmp = cmp
310 1 : i.span.Start, i.span.End = nil, nil
311 1 : i.startBoundIndex = -1
312 1 : if i.span.Keys == nil {
313 1 : i.span.Keys = i.keyBuf[:0]
314 1 : }
315 : }
316 :
317 : // SeekGE moves the iterator to the first span covering a key greater than
318 : // or equal to the given key. This is equivalent to seeking to the first
319 : // span with an end key greater than the given key.
320 1 : func (i *KeyspanIter) SeekGE(key []byte) (*keyspan.Span, error) {
321 1 : // Seek among the boundary keys.
322 1 : j, eq := i.r.searchBoundaryKeys(i.cmp, key)
323 1 : // If the found boundary key does not exactly equal the given key, it's
324 1 : // strictly greater than key. We need to back up one to consider the span
325 1 : // that ends at the this boundary key.
326 1 : if !eq {
327 1 : j = max(j-1, 0)
328 1 : }
329 1 : return i.gatherKeysForward(j), nil
330 : }
331 :
332 : // SeekLT moves the iterator to the last span covering a key less than the
333 : // given key. This is equivalent to seeking to the last span with a start
334 : // key less than the given key.
335 1 : func (i *KeyspanIter) SeekLT(key []byte) (*keyspan.Span, error) {
336 1 : // Seek among the boundary keys.
337 1 : j, eq := i.r.searchBoundaryKeys(i.cmp, key)
338 1 : // If eq is true, the found boundary key exactly equals the given key. A
339 1 : // span that starts at exactly [key] does not contain any keys strictly less
340 1 : // than [key], so back up one index.
341 1 : if eq {
342 1 : j--
343 1 : }
344 : // If all boundaries are less than [key], or only the last boundary is
345 : // greater than the key, then we want the last span so we clamp the index to
346 : // the second to last boundary.
347 1 : return i.gatherKeysBackward(min(j, int(i.r.boundaryKeysCount)-2)), nil
348 : }
349 :
350 : // First moves the iterator to the first span.
351 1 : func (i *KeyspanIter) First() (*keyspan.Span, error) {
352 1 : return i.gatherKeysForward(0), nil
353 1 : }
354 :
355 : // Last moves the iterator to the last span.
356 1 : func (i *KeyspanIter) Last() (*keyspan.Span, error) {
357 1 : return i.gatherKeysBackward(int(i.r.boundaryKeysCount) - 2), nil
358 1 : }
359 :
360 : // Next moves the iterator to the next span.
361 1 : func (i *KeyspanIter) Next() (*keyspan.Span, error) {
362 1 : return i.gatherKeysForward(i.startBoundIndex + 1), nil
363 1 : }
364 :
365 : // Prev moves the iterator to the previous span.
366 1 : func (i *KeyspanIter) Prev() (*keyspan.Span, error) {
367 1 : return i.gatherKeysBackward(max(i.startBoundIndex-1, -1)), nil
368 1 : }
369 :
370 : // gatherKeysForward returns the first non-empty Span in the forward direction,
371 : // starting with the span formed by using the boundary key at index
372 : // [startBoundIndex] as the span's start boundary.
373 1 : func (i *KeyspanIter) gatherKeysForward(startBoundIndex int) *keyspan.Span {
374 1 : if invariants.Enabled && startBoundIndex < 0 {
375 0 : panic(errors.AssertionFailedf("out of bounds: i.startBoundIndex=%d", startBoundIndex))
376 : }
377 1 : i.startBoundIndex = startBoundIndex
378 1 : if i.startBoundIndex >= int(i.r.boundaryKeysCount)-1 {
379 1 : return nil
380 1 : }
381 1 : if !i.isNonemptySpan(i.startBoundIndex) {
382 1 : if i.startBoundIndex == int(i.r.boundaryKeysCount)-2 {
383 0 : // Corruption error
384 0 : panic(base.CorruptionErrorf("keyspan block has empty span at end"))
385 : }
386 1 : i.startBoundIndex++
387 1 : if !i.isNonemptySpan(i.startBoundIndex) {
388 0 : panic(base.CorruptionErrorf("keyspan block has consecutive empty spans"))
389 : }
390 : }
391 1 : return i.materializeSpan()
392 : }
393 :
394 : // gatherKeysBackward returns the first non-empty Span in the backward direction,
395 : // starting with the span formed by using the boundary key at index
396 : // [startBoundIndex] as the span's start boundary.
397 1 : func (i *KeyspanIter) gatherKeysBackward(startBoundIndex int) *keyspan.Span {
398 1 : i.startBoundIndex = startBoundIndex
399 1 : if i.startBoundIndex < 0 {
400 1 : return nil
401 1 : }
402 1 : if invariants.Enabled && i.startBoundIndex >= int(i.r.boundaryKeysCount)-1 {
403 0 : panic(errors.AssertionFailedf("out of bounds: i.startBoundIndex=%d, i.r.boundaryKeysCount=%d",
404 0 : i.startBoundIndex, i.r.boundaryKeysCount))
405 : }
406 1 : if !i.isNonemptySpan(i.startBoundIndex) {
407 1 : if i.startBoundIndex == 0 {
408 0 : // Corruption error
409 0 : panic(base.CorruptionErrorf("keyspan block has empty span at beginning"))
410 : }
411 1 : i.startBoundIndex--
412 1 : if !i.isNonemptySpan(i.startBoundIndex) {
413 0 : panic(base.CorruptionErrorf("keyspan block has consecutive empty spans"))
414 : }
415 : }
416 1 : return i.materializeSpan()
417 : }
418 :
419 : // isNonemptySpan returns true if the span starting at i.startBoundIndex
420 : // contains keys.
421 1 : func (i *KeyspanIter) isNonemptySpan(startBoundIndex int) bool {
422 1 : return i.r.boundaryKeyIndices.At(startBoundIndex) < i.r.boundaryKeyIndices.At(startBoundIndex+1)
423 1 : }
424 :
425 : // materializeSpan constructs the current span from i.startBoundIndex and
426 : // i.{start,end}KeyIndex.
427 1 : func (i *KeyspanIter) materializeSpan() *keyspan.Span {
428 1 : i.span = keyspan.Span{
429 1 : Start: i.r.boundaryKeys.At(i.startBoundIndex),
430 1 : End: i.r.boundaryKeys.At(i.startBoundIndex + 1),
431 1 : Keys: i.span.Keys[:0],
432 1 : }
433 1 : startIndex := i.r.boundaryKeyIndices.At(i.startBoundIndex)
434 1 : endIndex := i.r.boundaryKeyIndices.At(i.startBoundIndex + 1)
435 1 : if cap(i.span.Keys) < int(endIndex-startIndex) {
436 1 : i.span.Keys = make([]keyspan.Key, 0, int(endIndex-startIndex))
437 1 : }
438 1 : for j := startIndex; j < endIndex; j++ {
439 1 : i.span.Keys = append(i.span.Keys, keyspan.Key{
440 1 : Trailer: base.InternalKeyTrailer(i.r.trailers.At(int(j))),
441 1 : Suffix: i.r.suffixes.At(int(j)),
442 1 : Value: i.r.values.At(int(j)),
443 1 : })
444 1 : }
445 1 : return &i.span
446 : }
447 :
448 : // Close closes the iterator.
449 0 : func (i *KeyspanIter) Close() {}
450 :
451 : // SetContext implements keyspan.FragmentIterator.
452 0 : func (i *KeyspanIter) SetContext(context.Context) {}
453 :
454 : // WrapChildren implements keyspan.FragmentIterator.
455 0 : func (i *KeyspanIter) WrapChildren(keyspan.WrapFn) {}
456 :
457 : // DebugTree is part of the FragmentIterator interface.
458 0 : func (i *KeyspanIter) DebugTree(tp treeprinter.Node) {
459 0 : tp.Childf("%T(%p)", i, i)
460 0 : }
|