Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "bytes"
9 :
10 : "github.com/cockroachdb/pebble/internal/base"
11 : "github.com/cockroachdb/pebble/internal/binfmt"
12 : "github.com/cockroachdb/pebble/sstable/block"
13 : )
14 :
15 : const indexBlockCustomHeaderSize = 0
16 :
17 : // IndexBlockWriter writes columnar index blocks. The writer is used for both
18 : // first-level and second-level index blocks. The index block schema consists of
19 : // three primary columns:
20 : // - Separators: a user key that is ≥ the largest user key in the
21 : // corresponding entry, and ≤ the smallest user key in the next entry.
22 : // Note that this allows consecutive separators to be equal. This is
23 : // possible when snapshots required we preserve duplicate user keys at
24 : // different sequence numbers.
25 : // - Offsets: the offset of the end of the corresponding block.
26 : // - Lengths: the length in bytes of the corresponding block.
27 : // - Block properties: a slice encoding arbitrary user-defined block
28 : // properties.
29 : //
30 : // TODO(jackson): Consider splitting separators into prefixes and suffixes (even
31 : // without user-defined columns). This would allow us to use prefix compression
32 : // for the prefix. Separators should typically be suffixless unless two KVs with
33 : // the same prefix straddle a block boundary. We would need to use a buffer to
34 : // materialize the separator key when we need to use it outside the context of
35 : // seeking within the block.
36 : type IndexBlockWriter struct {
37 : separators RawBytesBuilder
38 : offsets UintBuilder
39 : lengths UintBuilder
40 : blockProperties RawBytesBuilder
41 : rows int
42 : enc blockEncoder
43 : }
44 :
45 : const (
46 : indexBlockColumnSeparator = iota
47 : indexBlockColumnOffsets
48 : indexBlockColumnLengths
49 : indexBlockColumnBlockProperties
50 : indexBlockColumnCount
51 : )
52 :
53 : // Init initializes the index block writer.
54 1 : func (w *IndexBlockWriter) Init(blockProperties ...ColumnWriter) {
55 1 : w.separators.Init()
56 1 : w.offsets.Init()
57 1 : w.lengths.InitWithDefault()
58 1 : w.blockProperties.Init()
59 1 : w.rows = 0
60 1 : }
61 :
62 : // Reset resets the index block writer to its initial state, retaining buffers.
63 0 : func (w *IndexBlockWriter) Reset() {
64 0 : w.separators.Reset()
65 0 : w.offsets.Reset()
66 0 : w.lengths.Reset()
67 0 : w.blockProperties.Reset()
68 0 : w.rows = 0
69 0 : w.enc.reset()
70 0 : }
71 :
72 : // Rows returns the number of entries in the index block so far.
73 1 : func (w *IndexBlockWriter) Rows() int {
74 1 : return w.rows
75 1 : }
76 :
77 : // AddBlockHandle adds a new separator and end offset of a data block to the
78 : // index block. Add returns the index of the row.
79 : //
80 : // AddBlockHandle should only be used for first-level index blocks.
81 : func (w *IndexBlockWriter) AddBlockHandle(
82 : separator []byte, handle block.Handle, blockProperties []byte,
83 1 : ) int {
84 1 : idx := w.rows
85 1 : w.separators.Put(separator)
86 1 : w.offsets.Set(w.rows, handle.Offset)
87 1 : w.lengths.Set(w.rows, handle.Length)
88 1 : w.blockProperties.Put(blockProperties)
89 1 : w.rows++
90 1 : return idx
91 1 : }
92 :
93 : // UnsafeSeparator returns the separator of the i'th entry.
94 1 : func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte {
95 1 : return w.separators.UnsafeGet(i)
96 1 : }
97 :
98 : // Size returns the size of the pending index block.
99 1 : func (w *IndexBlockWriter) Size() int {
100 1 : off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize)
101 1 : off = w.separators.Size(w.rows, off)
102 1 : off = w.offsets.Size(w.rows, off)
103 1 : off = w.lengths.Size(w.rows, off)
104 1 : off = w.blockProperties.Size(w.rows, off)
105 1 : off++
106 1 : return int(off)
107 1 : }
108 :
109 : // Finish serializes the pending index block.
110 1 : func (w *IndexBlockWriter) Finish() []byte {
111 1 : w.enc.init(w.Size(), Header{
112 1 : Version: Version1,
113 1 : Columns: indexBlockColumnCount,
114 1 : Rows: uint32(w.rows),
115 1 : }, indexBlockCustomHeaderSize)
116 1 : w.enc.encode(w.rows, &w.separators)
117 1 : w.enc.encode(w.rows, &w.offsets)
118 1 : w.enc.encode(w.rows, &w.lengths)
119 1 : w.enc.encode(w.rows, &w.blockProperties)
120 1 : return w.enc.finish()
121 1 : }
122 :
123 : // An IndexReader reads columnar index blocks.
124 : type IndexReader struct {
125 : separators RawBytes
126 : offsets UnsafeUints
127 : lengths UnsafeUints // only used for second-level index blocks
128 : blockProps RawBytes
129 : br BlockReader
130 : }
131 :
132 : // Init initializes the index reader with the given serialized index block.
133 1 : func (r *IndexReader) Init(data []byte) {
134 1 : r.br.Init(data, indexBlockCustomHeaderSize)
135 1 : r.separators = r.br.RawBytes(indexBlockColumnSeparator)
136 1 : r.offsets = r.br.Uints(indexBlockColumnOffsets)
137 1 : r.lengths = r.br.Uints(indexBlockColumnLengths)
138 1 : r.blockProps = r.br.RawBytes(indexBlockColumnBlockProperties)
139 1 : }
140 :
141 : // DebugString prints a human-readable explanation of the keyspan block's binary
142 : // representation.
143 1 : func (r *IndexReader) DebugString() string {
144 1 : f := binfmt.New(r.br.data).LineWidth(20)
145 1 : r.Describe(f)
146 1 : return f.String()
147 1 : }
148 :
149 : // Describe describes the binary format of the index block, assuming f.Offset()
150 : // is positioned at the beginning of the same index block described by r.
151 1 : func (r *IndexReader) Describe(f *binfmt.Formatter) {
152 1 : // Set the relative offset. When loaded into memory, the beginning of blocks
153 1 : // are aligned. Padding that ensures alignment is done relative to the
154 1 : // current offset. Setting the relative offset ensures that if we're
155 1 : // describing this block within a larger structure (eg, f.Offset()>0), we
156 1 : // compute padding appropriately assuming the current byte f.Offset() is
157 1 : // aligned.
158 1 : f.SetAnchorOffset()
159 1 :
160 1 : f.CommentLine("index block header")
161 1 : r.br.headerToBinFormatter(f)
162 1 : for i := 0; i < indexBlockColumnCount; i++ {
163 1 : r.br.columnToBinFormatter(f, i, int(r.br.header.Rows))
164 1 : }
165 1 : f.HexBytesln(1, "block padding byte")
166 : }
167 :
168 : // IndexIter is an iterator over the block entries in an index block.
169 : type IndexIter struct {
170 : r *IndexReader
171 : n int
172 : row int
173 :
174 : h block.BufferHandle
175 : allocReader IndexReader
176 : }
177 :
178 : // Assert that IndexIter satisfies the block.IndexBlockIterator interface.
179 : var _ block.IndexBlockIterator = (*IndexIter)(nil)
180 :
181 : // InitReader initializes an index iterator from the provided reader.
182 1 : func (i *IndexIter) InitReader(r *IndexReader) {
183 1 : *i = IndexIter{r: r, n: int(r.br.header.Rows), allocReader: i.allocReader}
184 1 : }
185 :
186 : // Init initializes an iterator from the provided block data slice.
187 : func (i *IndexIter) Init(
188 : cmp base.Compare, split base.Split, blk []byte, transforms block.IterTransforms,
189 0 : ) error {
190 0 : // TODO(jackson): Handle the transforms.
191 0 : i.allocReader.Init(blk)
192 0 : i.InitReader(&i.allocReader)
193 0 : return nil
194 0 : }
195 :
196 : // InitHandle initializes an iterator from the provided block handle.
197 : func (i *IndexIter) InitHandle(
198 : cmp base.Compare, split base.Split, block block.BufferHandle, transforms block.IterTransforms,
199 0 : ) error {
200 0 : // TODO(jackson): Handle the transforms.
201 0 :
202 0 : // TODO(jackson): If block.h != nil, use a *IndexReader that's allocated
203 0 : // when the block is loaded into the block cache. On cache hits, this will
204 0 : // reduce the amount of setup necessary to use an iterator. (It's relatively
205 0 : // common to open an iterator and perform just a few seeks, so avoiding the
206 0 : // overhead can be material.)
207 0 : i.h.Release()
208 0 : i.h = block
209 0 : return i.Init(cmp, split, i.h.Get(), transforms)
210 0 : }
211 :
212 : // RowIndex returns the index of the block entry at the iterator's current
213 : // position.
214 0 : func (i *IndexIter) RowIndex() int {
215 0 : return i.row
216 0 : }
217 :
218 : // ResetForReuse resets the IndexIter for reuse, retaining buffers to avoid
219 : // future allocations.
220 0 : func (i *IndexIter) ResetForReuse() IndexIter {
221 0 : return IndexIter{ /* nothing to retain */ }
222 0 : }
223 :
224 : // Valid returns true if the iterator is currently positioned at a valid block
225 : // handle.
226 0 : func (i *IndexIter) Valid() bool {
227 0 : return 0 <= i.row && i.row < int(i.r.br.header.Rows)
228 0 : }
229 :
230 : // Invalidate invalidates the block iterator, removing references to the block
231 : // it was initialized with.
232 0 : func (i *IndexIter) Invalidate() {
233 0 : i.r = nil
234 0 : i.h = block.BufferHandle{}
235 0 : }
236 :
237 : // IsDataInvalidated returns true when the iterator has been invalidated
238 : // using an Invalidate call. NB: this is different from Valid.
239 0 : func (i *IndexIter) IsDataInvalidated() bool {
240 0 : return i.r == nil
241 0 : }
242 :
243 : // Handle returns the underlying block buffer handle, if the iterator was
244 : // initialized with one.
245 0 : func (i *IndexIter) Handle() block.BufferHandle {
246 0 : return i.h
247 0 : }
248 :
249 : // Separator returns the separator at the iterator's current position. The
250 : // iterator must be positioned at a valid row.
251 0 : func (i *IndexIter) Separator() []byte {
252 0 : return i.r.separators.At(i.row)
253 0 : }
254 :
255 : // BlockHandleWithProperties decodes the block handle with any encoded
256 : // properties at the iterator's current position.
257 1 : func (i *IndexIter) BlockHandleWithProperties() (block.HandleWithProperties, error) {
258 1 : return block.HandleWithProperties{
259 1 : Handle: block.Handle{
260 1 : Offset: i.r.offsets.At(i.row),
261 1 : Length: i.r.lengths.At(i.row),
262 1 : },
263 1 : Props: i.r.blockProps.At(i.row),
264 1 : }, nil
265 1 : }
266 :
267 : // SeekGE seeks the index iterator to the first block entry with a separator key
268 : // greater or equal to the given key. It returns false if the seek key is
269 : // greater than all index block separators.
270 1 : func (i *IndexIter) SeekGE(key []byte) bool {
271 1 : // Define f(-1) == false and f(upper) == true.
272 1 : // Invariant: f(index-1) == false, f(upper) == true.
273 1 : index, upper := 0, i.n
274 1 : for index < upper {
275 1 : h := int(uint(index+upper) >> 1) // avoid overflow when computing h
276 1 : // index ≤ h < upper
277 1 :
278 1 : // TODO(jackson): Is Bytes.At or Bytes.Slice(Bytes.Offset(h),
279 1 : // Bytes.Offset(h+1)) faster in this code?
280 1 : c := bytes.Compare(key, i.r.separators.At(h))
281 1 : if c > 0 {
282 1 : index = h + 1 // preserves f(index-1) == false
283 1 : } else {
284 1 : upper = h // preserves f(upper) == true
285 1 : }
286 : }
287 : // index == upper, f(index-1) == false, and f(upper) (= f(index)) == true => answer is index.
288 1 : i.row = index
289 1 : return index < i.n
290 : }
291 :
292 : // First seeks index iterator to the first block entry. It returns false if the
293 : // index block is empty.
294 1 : func (i *IndexIter) First() bool {
295 1 : i.row = 0
296 1 : return i.n > 0
297 1 : }
298 :
299 : // Last seeks index iterator to the last block entry. It returns false if the
300 : // index block is empty.
301 1 : func (i *IndexIter) Last() bool {
302 1 : i.row = i.n - 1
303 1 : return i.n > 0
304 1 : }
305 :
306 : // Next steps the index iterator to the next block entry. It returns false if
307 : // the index block is exhausted.
308 1 : func (i *IndexIter) Next() bool {
309 1 : i.row++
310 1 : return i.row < i.n
311 1 : }
312 :
313 : // Prev steps the index iterator to the previous block entry. It returns false
314 : // if the index block is exhausted.
315 1 : func (i *IndexIter) Prev() bool {
316 1 : i.row--
317 1 : return i.row >= 0
318 1 : }
319 :
320 : // Close closes the iterator, releasing any resources it holds.
321 0 : func (i *IndexIter) Close() error {
322 0 : i.h.Release()
323 0 : *i = IndexIter{}
324 0 : return nil
325 0 : }
|