Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "bytes"
9 :
10 : "github.com/cockroachdb/errors"
11 : "github.com/cockroachdb/pebble/internal/base"
12 : "github.com/cockroachdb/pebble/internal/binfmt"
13 : "github.com/cockroachdb/pebble/internal/invariants"
14 : "github.com/cockroachdb/pebble/sstable/block"
15 : )
16 :
17 : const indexBlockCustomHeaderSize = 0
18 :
19 : // IndexBlockWriter writes columnar index blocks. The writer is used for both
20 : // first-level and second-level index blocks. The index block schema consists of
21 : // three primary columns:
22 : // - Separators: a user key that is ≥ the largest user key in the
23 : // corresponding entry, and ≤ the smallest user key in the next entry.
24 : // Note that this allows consecutive separators to be equal. This is
25 : // possible when snapshots required we preserve duplicate user keys at
26 : // different sequence numbers.
27 : // - Offsets: the offset of the end of the corresponding block.
28 : // - Lengths: the length in bytes of the corresponding block.
29 : // - Block properties: a slice encoding arbitrary user-defined block
30 : // properties.
31 : //
32 : // TODO(jackson): Consider splitting separators into prefixes and suffixes (even
33 : // without user-defined columns). This would allow us to use prefix compression
34 : // for the prefix. Separators should typically be suffixless unless two KVs with
35 : // the same prefix straddle a block boundary. We would need to use a buffer to
36 : // materialize the separator key when we need to use it outside the context of
37 : // seeking within the block.
38 : type IndexBlockWriter struct {
39 : separators RawBytesBuilder
40 : offsets UintBuilder
41 : lengths UintBuilder
42 : blockProperties RawBytesBuilder
43 : rows int
44 : enc blockEncoder
45 : }
46 :
47 : const (
48 : indexBlockColumnSeparator = iota
49 : indexBlockColumnOffsets
50 : indexBlockColumnLengths
51 : indexBlockColumnBlockProperties
52 : indexBlockColumnCount
53 : )
54 :
55 : // Init initializes the index block writer.
56 1 : func (w *IndexBlockWriter) Init(blockProperties ...ColumnWriter) {
57 1 : w.separators.Init()
58 1 : w.offsets.Init()
59 1 : w.lengths.InitWithDefault()
60 1 : w.blockProperties.Init()
61 1 : w.rows = 0
62 1 : }
63 :
64 : // Reset resets the index block writer to its initial state, retaining buffers.
65 1 : func (w *IndexBlockWriter) Reset() {
66 1 : w.separators.Reset()
67 1 : w.offsets.Reset()
68 1 : w.lengths.Reset()
69 1 : w.blockProperties.Reset()
70 1 : w.rows = 0
71 1 : w.enc.reset()
72 1 : }
73 :
74 : // Rows returns the number of entries in the index block so far.
75 1 : func (w *IndexBlockWriter) Rows() int {
76 1 : return w.rows
77 1 : }
78 :
79 : // AddBlockHandle adds a new separator and end offset of a data block to the
80 : // index block. Add returns the index of the row.
81 : //
82 : // AddBlockHandle should only be used for first-level index blocks.
83 : func (w *IndexBlockWriter) AddBlockHandle(
84 : separator []byte, handle block.Handle, blockProperties []byte,
85 1 : ) int {
86 1 : idx := w.rows
87 1 : w.separators.Put(separator)
88 1 : w.offsets.Set(w.rows, handle.Offset)
89 1 : w.lengths.Set(w.rows, handle.Length)
90 1 : w.blockProperties.Put(blockProperties)
91 1 : w.rows++
92 1 : return idx
93 1 : }
94 :
95 : // UnsafeSeparator returns the separator of the i'th entry.
96 1 : func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte {
97 1 : return w.separators.UnsafeGet(i)
98 1 : }
99 :
100 : // Size returns the size of the pending index block.
101 1 : func (w *IndexBlockWriter) Size() int {
102 1 : return w.size(w.rows)
103 1 : }
104 :
105 1 : func (w *IndexBlockWriter) size(rows int) int {
106 1 : off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize)
107 1 : off = w.separators.Size(rows, off)
108 1 : off = w.offsets.Size(rows, off)
109 1 : off = w.lengths.Size(rows, off)
110 1 : off = w.blockProperties.Size(rows, off)
111 1 : off++
112 1 : return int(off)
113 1 : }
114 :
115 : // Finish serializes the pending index block, including the first [rows] rows.
116 : // The value of [rows] must be Rows() or Rows()-1.
117 1 : func (w *IndexBlockWriter) Finish(rows int) []byte {
118 1 : if invariants.Enabled && rows != w.rows && rows != w.rows-1 {
119 0 : panic(errors.AssertionFailedf("index block has %d rows; asked to finish %d", w.rows, rows))
120 : }
121 :
122 1 : w.enc.init(w.size(rows), Header{
123 1 : Version: Version1,
124 1 : Columns: indexBlockColumnCount,
125 1 : Rows: uint32(rows),
126 1 : }, indexBlockCustomHeaderSize)
127 1 : w.enc.encode(rows, &w.separators)
128 1 : w.enc.encode(rows, &w.offsets)
129 1 : w.enc.encode(rows, &w.lengths)
130 1 : w.enc.encode(rows, &w.blockProperties)
131 1 : return w.enc.finish()
132 : }
133 :
134 : // An IndexReader reads columnar index blocks.
135 : type IndexReader struct {
136 : separators RawBytes
137 : offsets UnsafeUints
138 : lengths UnsafeUints // only used for second-level index blocks
139 : blockProps RawBytes
140 : br BlockReader
141 : }
142 :
143 : // Init initializes the index reader with the given serialized index block.
144 1 : func (r *IndexReader) Init(data []byte) {
145 1 : r.br.Init(data, indexBlockCustomHeaderSize)
146 1 : r.separators = r.br.RawBytes(indexBlockColumnSeparator)
147 1 : r.offsets = r.br.Uints(indexBlockColumnOffsets)
148 1 : r.lengths = r.br.Uints(indexBlockColumnLengths)
149 1 : r.blockProps = r.br.RawBytes(indexBlockColumnBlockProperties)
150 1 : }
151 :
152 : // DebugString prints a human-readable explanation of the keyspan block's binary
153 : // representation.
154 1 : func (r *IndexReader) DebugString() string {
155 1 : f := binfmt.New(r.br.data).LineWidth(20)
156 1 : r.Describe(f)
157 1 : return f.String()
158 1 : }
159 :
160 : // Describe describes the binary format of the index block, assuming f.Offset()
161 : // is positioned at the beginning of the same index block described by r.
162 1 : func (r *IndexReader) Describe(f *binfmt.Formatter) {
163 1 : // Set the relative offset. When loaded into memory, the beginning of blocks
164 1 : // are aligned. Padding that ensures alignment is done relative to the
165 1 : // current offset. Setting the relative offset ensures that if we're
166 1 : // describing this block within a larger structure (eg, f.Offset()>0), we
167 1 : // compute padding appropriately assuming the current byte f.Offset() is
168 1 : // aligned.
169 1 : f.SetAnchorOffset()
170 1 :
171 1 : f.CommentLine("index block header")
172 1 : r.br.headerToBinFormatter(f)
173 1 : for i := 0; i < indexBlockColumnCount; i++ {
174 1 : r.br.columnToBinFormatter(f, i, int(r.br.header.Rows))
175 1 : }
176 1 : f.HexBytesln(1, "block padding byte")
177 : }
178 :
179 : // IndexIter is an iterator over the block entries in an index block.
180 : type IndexIter struct {
181 : r *IndexReader
182 : n int
183 : row int
184 :
185 : h block.BufferHandle
186 : allocReader IndexReader
187 : }
188 :
189 : // Assert that IndexIter satisfies the block.IndexBlockIterator interface.
190 : var _ block.IndexBlockIterator = (*IndexIter)(nil)
191 :
192 : // InitReader initializes an index iterator from the provided reader.
193 1 : func (i *IndexIter) InitReader(r *IndexReader) {
194 1 : *i = IndexIter{r: r, n: int(r.br.header.Rows), allocReader: i.allocReader}
195 1 : }
196 :
197 : // Init initializes an iterator from the provided block data slice.
198 : func (i *IndexIter) Init(
199 : cmp base.Compare, split base.Split, blk []byte, transforms block.IterTransforms,
200 1 : ) error {
201 1 : // TODO(jackson): Handle the transforms.
202 1 : i.allocReader.Init(blk)
203 1 : i.InitReader(&i.allocReader)
204 1 : return nil
205 1 : }
206 :
207 : // InitHandle initializes an iterator from the provided block handle.
208 : func (i *IndexIter) InitHandle(
209 : cmp base.Compare, split base.Split, block block.BufferHandle, transforms block.IterTransforms,
210 0 : ) error {
211 0 : // TODO(jackson): Handle the transforms.
212 0 :
213 0 : // TODO(jackson): If block.h != nil, use a *IndexReader that's allocated
214 0 : // when the block is loaded into the block cache. On cache hits, this will
215 0 : // reduce the amount of setup necessary to use an iterator. (It's relatively
216 0 : // common to open an iterator and perform just a few seeks, so avoiding the
217 0 : // overhead can be material.)
218 0 : i.h.Release()
219 0 : i.h = block
220 0 : return i.Init(cmp, split, i.h.Get(), transforms)
221 0 : }
222 :
223 : // RowIndex returns the index of the block entry at the iterator's current
224 : // position.
225 0 : func (i *IndexIter) RowIndex() int {
226 0 : return i.row
227 0 : }
228 :
229 : // ResetForReuse resets the IndexIter for reuse, retaining buffers to avoid
230 : // future allocations.
231 0 : func (i *IndexIter) ResetForReuse() IndexIter {
232 0 : return IndexIter{ /* nothing to retain */ }
233 0 : }
234 :
235 : // Valid returns true if the iterator is currently positioned at a valid block
236 : // handle.
237 0 : func (i *IndexIter) Valid() bool {
238 0 : return 0 <= i.row && i.row < int(i.r.br.header.Rows)
239 0 : }
240 :
241 : // Invalidate invalidates the block iterator, removing references to the block
242 : // it was initialized with.
243 0 : func (i *IndexIter) Invalidate() {
244 0 : i.r = nil
245 0 : i.h = block.BufferHandle{}
246 0 : }
247 :
248 : // IsDataInvalidated returns true when the iterator has been invalidated
249 : // using an Invalidate call. NB: this is different from Valid.
250 0 : func (i *IndexIter) IsDataInvalidated() bool {
251 0 : return i.r == nil
252 0 : }
253 :
254 : // Handle returns the underlying block buffer handle, if the iterator was
255 : // initialized with one.
256 0 : func (i *IndexIter) Handle() block.BufferHandle {
257 0 : return i.h
258 0 : }
259 :
260 : // Separator returns the separator at the iterator's current position. The
261 : // iterator must be positioned at a valid row.
262 0 : func (i *IndexIter) Separator() []byte {
263 0 : return i.r.separators.At(i.row)
264 0 : }
265 :
266 : // BlockHandleWithProperties decodes the block handle with any encoded
267 : // properties at the iterator's current position.
268 1 : func (i *IndexIter) BlockHandleWithProperties() (block.HandleWithProperties, error) {
269 1 : return block.HandleWithProperties{
270 1 : Handle: block.Handle{
271 1 : Offset: i.r.offsets.At(i.row),
272 1 : Length: i.r.lengths.At(i.row),
273 1 : },
274 1 : Props: i.r.blockProps.At(i.row),
275 1 : }, nil
276 1 : }
277 :
278 : // SeekGE seeks the index iterator to the first block entry with a separator key
279 : // greater or equal to the given key. It returns false if the seek key is
280 : // greater than all index block separators.
281 1 : func (i *IndexIter) SeekGE(key []byte) bool {
282 1 : // Define f(-1) == false and f(upper) == true.
283 1 : // Invariant: f(index-1) == false, f(upper) == true.
284 1 : index, upper := 0, i.n
285 1 : for index < upper {
286 1 : h := int(uint(index+upper) >> 1) // avoid overflow when computing h
287 1 : // index ≤ h < upper
288 1 :
289 1 : // TODO(jackson): Is Bytes.At or Bytes.Slice(Bytes.Offset(h),
290 1 : // Bytes.Offset(h+1)) faster in this code?
291 1 : c := bytes.Compare(key, i.r.separators.At(h))
292 1 : if c > 0 {
293 1 : index = h + 1 // preserves f(index-1) == false
294 1 : } else {
295 1 : upper = h // preserves f(upper) == true
296 1 : }
297 : }
298 : // index == upper, f(index-1) == false, and f(upper) (= f(index)) == true => answer is index.
299 1 : i.row = index
300 1 : return index < i.n
301 : }
302 :
303 : // First seeks index iterator to the first block entry. It returns false if the
304 : // index block is empty.
305 1 : func (i *IndexIter) First() bool {
306 1 : i.row = 0
307 1 : return i.n > 0
308 1 : }
309 :
310 : // Last seeks index iterator to the last block entry. It returns false if the
311 : // index block is empty.
312 1 : func (i *IndexIter) Last() bool {
313 1 : i.row = i.n - 1
314 1 : return i.n > 0
315 1 : }
316 :
317 : // Next steps the index iterator to the next block entry. It returns false if
318 : // the index block is exhausted.
319 1 : func (i *IndexIter) Next() bool {
320 1 : i.row++
321 1 : return i.row < i.n
322 1 : }
323 :
324 : // Prev steps the index iterator to the previous block entry. It returns false
325 : // if the index block is exhausted.
326 1 : func (i *IndexIter) Prev() bool {
327 1 : i.row--
328 1 : return i.row >= 0
329 1 : }
330 :
331 : // Close closes the iterator, releasing any resources it holds.
332 1 : func (i *IndexIter) Close() error {
333 1 : i.h.Release()
334 1 : *i = IndexIter{}
335 1 : return nil
336 1 : }
|