Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "github.com/cockroachdb/errors"
9 : "github.com/cockroachdb/pebble/internal/base"
10 : "github.com/cockroachdb/pebble/internal/binfmt"
11 : "github.com/cockroachdb/pebble/internal/invariants"
12 : "github.com/cockroachdb/pebble/sstable/block"
13 : )
14 :
15 : const indexBlockCustomHeaderSize = 0
16 :
17 : // IndexBlockWriter writes columnar index blocks. The writer is used for both
18 : // first-level and second-level index blocks. The index block schema consists of
19 : // three primary columns:
20 : // - Separators: a user key that is ≥ the largest user key in the
21 : // corresponding entry, and ≤ the smallest user key in the next entry.
22 : // Note that this allows consecutive separators to be equal. This is
23 : // possible when snapshots required we preserve duplicate user keys at
24 : // different sequence numbers.
25 : // - Offsets: the offset of the end of the corresponding block.
26 : // - Lengths: the length in bytes of the corresponding block.
27 : // - Block properties: a slice encoding arbitrary user-defined block
28 : // properties.
29 : //
30 : // TODO(jackson): Consider splitting separators into prefixes and suffixes (even
31 : // without user-defined columns). This would allow us to use prefix compression
32 : // for the prefix. Separators should typically be suffixless unless two KVs with
33 : // the same prefix straddle a block boundary. We would need to use a buffer to
34 : // materialize the separator key when we need to use it outside the context of
35 : // seeking within the block.
36 : type IndexBlockWriter struct {
37 : separators RawBytesBuilder
38 : offsets UintBuilder
39 : lengths UintBuilder
40 : blockProperties RawBytesBuilder
41 : rows int
42 : enc blockEncoder
43 : }
44 :
45 : const (
46 : indexBlockColumnSeparator = iota
47 : indexBlockColumnOffsets
48 : indexBlockColumnLengths
49 : indexBlockColumnBlockProperties
50 : indexBlockColumnCount
51 : )
52 :
53 : // Init initializes the index block writer.
54 0 : func (w *IndexBlockWriter) Init() {
55 0 : w.separators.Init()
56 0 : w.offsets.Init()
57 0 : w.lengths.InitWithDefault()
58 0 : w.blockProperties.Init()
59 0 : w.rows = 0
60 0 : }
61 :
62 : // Reset resets the index block writer to its initial state, retaining buffers.
63 0 : func (w *IndexBlockWriter) Reset() {
64 0 : w.separators.Reset()
65 0 : w.offsets.Reset()
66 0 : w.lengths.Reset()
67 0 : w.blockProperties.Reset()
68 0 : w.rows = 0
69 0 : w.enc.reset()
70 0 : }
71 :
72 : // Rows returns the number of entries in the index block so far.
73 0 : func (w *IndexBlockWriter) Rows() int {
74 0 : return w.rows
75 0 : }
76 :
77 : // AddBlockHandle adds a new separator and end offset of a data block to the
78 : // index block. Add returns the index of the row.
79 : //
80 : // AddBlockHandle should only be used for first-level index blocks.
81 : func (w *IndexBlockWriter) AddBlockHandle(
82 : separator []byte, handle block.Handle, blockProperties []byte,
83 0 : ) int {
84 0 : idx := w.rows
85 0 : w.separators.Put(separator)
86 0 : w.offsets.Set(w.rows, handle.Offset)
87 0 : w.lengths.Set(w.rows, handle.Length)
88 0 : w.blockProperties.Put(blockProperties)
89 0 : w.rows++
90 0 : return idx
91 0 : }
92 :
93 : // UnsafeSeparator returns the separator of the i'th entry.
94 0 : func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte {
95 0 : return w.separators.UnsafeGet(i)
96 0 : }
97 :
98 : // Size returns the size of the pending index block.
99 0 : func (w *IndexBlockWriter) Size() int {
100 0 : return w.size(w.rows)
101 0 : }
102 :
103 0 : func (w *IndexBlockWriter) size(rows int) int {
104 0 : off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize)
105 0 : off = w.separators.Size(rows, off)
106 0 : off = w.offsets.Size(rows, off)
107 0 : off = w.lengths.Size(rows, off)
108 0 : off = w.blockProperties.Size(rows, off)
109 0 : off++
110 0 : return int(off)
111 0 : }
112 :
113 : // Finish serializes the pending index block, including the first [rows] rows.
114 : // The value of [rows] must be Rows() or Rows()-1.
115 0 : func (w *IndexBlockWriter) Finish(rows int) []byte {
116 0 : if invariants.Enabled && rows != w.rows && rows != w.rows-1 {
117 0 : panic(errors.AssertionFailedf("index block has %d rows; asked to finish %d", w.rows, rows))
118 : }
119 :
120 0 : w.enc.init(w.size(rows), Header{
121 0 : Version: Version1,
122 0 : Columns: indexBlockColumnCount,
123 0 : Rows: uint32(rows),
124 0 : }, indexBlockCustomHeaderSize)
125 0 : w.enc.encode(rows, &w.separators)
126 0 : w.enc.encode(rows, &w.offsets)
127 0 : w.enc.encode(rows, &w.lengths)
128 0 : w.enc.encode(rows, &w.blockProperties)
129 0 : return w.enc.finish()
130 : }
131 :
132 : // An IndexReader reads columnar index blocks.
133 : type IndexReader struct {
134 : separators RawBytes
135 : offsets UnsafeUints
136 : lengths UnsafeUints // only used for second-level index blocks
137 : blockProps RawBytes
138 : br BlockReader
139 : }
140 :
141 : // Init initializes the index reader with the given serialized index block.
142 0 : func (r *IndexReader) Init(data []byte) {
143 0 : r.br.Init(data, indexBlockCustomHeaderSize)
144 0 : r.separators = r.br.RawBytes(indexBlockColumnSeparator)
145 0 : r.offsets = r.br.Uints(indexBlockColumnOffsets)
146 0 : r.lengths = r.br.Uints(indexBlockColumnLengths)
147 0 : r.blockProps = r.br.RawBytes(indexBlockColumnBlockProperties)
148 0 : }
149 :
150 : // DebugString prints a human-readable explanation of the keyspan block's binary
151 : // representation.
152 0 : func (r *IndexReader) DebugString() string {
153 0 : f := binfmt.New(r.br.data).LineWidth(20)
154 0 : r.Describe(f)
155 0 : return f.String()
156 0 : }
157 :
158 : // Describe describes the binary format of the index block, assuming f.Offset()
159 : // is positioned at the beginning of the same index block described by r.
160 0 : func (r *IndexReader) Describe(f *binfmt.Formatter) {
161 0 : // Set the relative offset. When loaded into memory, the beginning of blocks
162 0 : // are aligned. Padding that ensures alignment is done relative to the
163 0 : // current offset. Setting the relative offset ensures that if we're
164 0 : // describing this block within a larger structure (eg, f.Offset()>0), we
165 0 : // compute padding appropriately assuming the current byte f.Offset() is
166 0 : // aligned.
167 0 : f.SetAnchorOffset()
168 0 :
169 0 : f.CommentLine("index block header")
170 0 : r.br.headerToBinFormatter(f)
171 0 : for i := 0; i < indexBlockColumnCount; i++ {
172 0 : r.br.columnToBinFormatter(f, i, int(r.br.header.Rows))
173 0 : }
174 0 : f.HexBytesln(1, "block padding byte")
175 : }
176 :
177 : // IndexIter is an iterator over the block entries in an index block.
178 : type IndexIter struct {
179 : compare base.Compare
180 : r *IndexReader
181 : n int
182 : row int
183 :
184 : h block.BufferHandle
185 : allocReader IndexReader
186 : }
187 :
188 : // Assert that IndexIter satisfies the block.IndexBlockIterator interface.
189 : var _ block.IndexBlockIterator = (*IndexIter)(nil)
190 :
191 : // InitReader initializes an index iterator from the provided reader.
192 0 : func (i *IndexIter) InitReader(compare base.Compare, r *IndexReader) {
193 0 : *i = IndexIter{
194 0 : compare: compare,
195 0 : r: r,
196 0 : n: int(r.br.header.Rows),
197 0 : h: i.h,
198 0 : allocReader: i.allocReader,
199 0 : }
200 0 : }
201 :
202 : // Init initializes an iterator from the provided block data slice.
203 : func (i *IndexIter) Init(
204 : cmp base.Compare, split base.Split, blk []byte, transforms block.IterTransforms,
205 0 : ) error {
206 0 : i.h.Release()
207 0 : i.h = block.BufferHandle{}
208 0 : // TODO(jackson): Handle the transforms.
209 0 : i.allocReader.Init(blk)
210 0 : i.InitReader(cmp, &i.allocReader)
211 0 : return nil
212 0 : }
213 :
214 : // InitHandle initializes an iterator from the provided block handle.
215 : func (i *IndexIter) InitHandle(
216 : cmp base.Compare, split base.Split, blk block.BufferHandle, transforms block.IterTransforms,
217 0 : ) error {
218 0 : // TODO(jackson): Handle the transforms.
219 0 :
220 0 : // TODO(jackson): If block.h != nil, use a *IndexReader that's allocated
221 0 : // when the block is loaded into the block cache. On cache hits, this will
222 0 : // reduce the amount of setup necessary to use an iterator. (It's relatively
223 0 : // common to open an iterator and perform just a few seeks, so avoiding the
224 0 : // overhead can be material.)
225 0 : i.h.Release()
226 0 : i.h = blk
227 0 : i.allocReader.Init(i.h.Get())
228 0 : i.InitReader(cmp, &i.allocReader)
229 0 : return nil
230 0 : }
231 :
232 : // RowIndex returns the index of the block entry at the iterator's current
233 : // position.
234 0 : func (i *IndexIter) RowIndex() int {
235 0 : return i.row
236 0 : }
237 :
238 : // ResetForReuse resets the IndexIter for reuse, retaining buffers to avoid
239 : // future allocations.
240 0 : func (i *IndexIter) ResetForReuse() IndexIter {
241 0 : if invariants.Enabled && i.h != (block.BufferHandle{}) {
242 0 : panic(errors.AssertionFailedf("IndexIter reset for reuse with non-empty handle"))
243 : }
244 0 : return IndexIter{ /* nothing to retain */ }
245 : }
246 :
247 : // Valid returns true if the iterator is currently positioned at a valid block
248 : // handle.
249 0 : func (i *IndexIter) Valid() bool {
250 0 : return 0 <= i.row && i.row < i.n
251 0 : }
252 :
253 : // Invalidate invalidates the block iterator, removing references to the block
254 : // it was initialized with.
255 0 : func (i *IndexIter) Invalidate() {
256 0 : i.r = nil
257 0 : i.n = 0
258 0 : }
259 :
260 : // IsDataInvalidated returns true when the iterator has been invalidated
261 : // using an Invalidate call. NB: this is different from Valid.
262 0 : func (i *IndexIter) IsDataInvalidated() bool {
263 0 : return i.r == nil
264 0 : }
265 :
266 : // Handle returns the underlying block buffer handle, if the iterator was
267 : // initialized with one.
268 0 : func (i *IndexIter) Handle() block.BufferHandle {
269 0 : return i.h
270 0 : }
271 :
272 : // Separator returns the separator at the iterator's current position. The
273 : // iterator must be positioned at a valid row.
274 0 : func (i *IndexIter) Separator() []byte {
275 0 : return i.r.separators.At(i.row)
276 0 : }
277 :
278 : // BlockHandleWithProperties decodes the block handle with any encoded
279 : // properties at the iterator's current position.
280 0 : func (i *IndexIter) BlockHandleWithProperties() (block.HandleWithProperties, error) {
281 0 : return block.HandleWithProperties{
282 0 : Handle: block.Handle{
283 0 : Offset: i.r.offsets.At(i.row),
284 0 : Length: i.r.lengths.At(i.row),
285 0 : },
286 0 : Props: i.r.blockProps.At(i.row),
287 0 : }, nil
288 0 : }
289 :
290 : // SeekGE seeks the index iterator to the first block entry with a separator key
291 : // greater or equal to the given key. It returns false if the seek key is
292 : // greater than all index block separators.
293 0 : func (i *IndexIter) SeekGE(key []byte) bool {
294 0 : // Define f(-1) == false and f(upper) == true.
295 0 : // Invariant: f(index-1) == false, f(upper) == true.
296 0 : index, upper := 0, i.n
297 0 : for index < upper {
298 0 : h := int(uint(index+upper) >> 1) // avoid overflow when computing h
299 0 : // index ≤ h < upper
300 0 :
301 0 : // TODO(jackson): Is Bytes.At or Bytes.Slice(Bytes.Offset(h),
302 0 : // Bytes.Offset(h+1)) faster in this code?
303 0 : c := i.compare(key, i.r.separators.At(h))
304 0 : if c > 0 {
305 0 : index = h + 1 // preserves f(index-1) == false
306 0 : } else {
307 0 : upper = h // preserves f(upper) == true
308 0 : }
309 : }
310 : // index == upper, f(index-1) == false, and f(upper) (= f(index)) == true => answer is index.
311 0 : i.row = index
312 0 : return index < i.n
313 : }
314 :
315 : // First seeks index iterator to the first block entry. It returns false if the
316 : // index block is empty.
317 0 : func (i *IndexIter) First() bool {
318 0 : i.row = 0
319 0 : return i.n > 0
320 0 : }
321 :
322 : // Last seeks index iterator to the last block entry. It returns false if the
323 : // index block is empty.
324 0 : func (i *IndexIter) Last() bool {
325 0 : i.row = i.n - 1
326 0 : return i.n > 0
327 0 : }
328 :
329 : // Next steps the index iterator to the next block entry. It returns false if
330 : // the index block is exhausted in the forward direction. A call to Next while
331 : // already exhausted in the forward direction is a no-op.
332 0 : func (i *IndexIter) Next() bool {
333 0 : i.row = min(i.n, i.row+1)
334 0 : return i.row < i.n
335 0 : }
336 :
337 : // Prev steps the index iterator to the previous block entry. It returns false
338 : // if the index block is exhausted in the reverse direction. A call to Prev
339 : // while already exhausted in the reverse direction is a no-op.
340 0 : func (i *IndexIter) Prev() bool {
341 0 : i.row = max(-1, i.row-1)
342 0 : return i.row >= 0 && i.row < i.n
343 0 : }
344 :
345 : // Close closes the iterator, releasing any resources it holds.
346 0 : func (i *IndexIter) Close() error {
347 0 : i.h.Release()
348 0 : *i = IndexIter{}
349 0 : return nil
350 0 : }
|