Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "fmt"
9 : "io"
10 : "math/bits"
11 : "slices"
12 : "strings"
13 : "unsafe"
14 :
15 : "github.com/cockroachdb/errors"
16 : "github.com/cockroachdb/pebble/internal/binfmt"
17 : )
18 :
19 : // Bitmap is a bitmap structure built on a []uint64. A bitmap utilizes ~1
20 : // physical bit/logical bit (~0.125 bytes/row). The bitmap is encoded into an
21 : // 8-byte aligned array of 64-bit words which is (nRows+63)/64 words in length.
22 : //
23 : // A summary bitmap is stored after the primary bitmap in which each bit in the
24 : // summary bitmap corresponds to 1 word in the primary bitmap. A bit is set in
25 : // the summary bitmap if the corresponding word in the primary bitmap is
26 : // non-zero. The summary bitmap accelerates predecessor and successor
27 : // operations.
28 : type Bitmap struct {
29 : data UnsafeRawSlice[uint64]
30 : bitCount int
31 : }
32 :
33 : // Assert that Bitmap implements Array[bool].
34 : var _ Array[bool] = Bitmap{}
35 :
36 : // DecodeBitmap decodes the structure of a Bitmap and returns a Bitmap that
37 : // reads from b supporting bitCount logical bits. No bounds checking is
38 : // performed, so the caller must guarantee the bitmap is appropriately sized and
39 : // the provided bitCount correctly identifies the number of bits in the bitmap.
40 1 : func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset uint32) {
41 1 : sz := bitmapRequiredSize(bitCount)
42 1 : off = align(off, align64)
43 1 : if len(b) < int(off)+sz {
44 0 : panic(errors.AssertionFailedf("bitmap of %d bits requires at least %d bytes; provided with %d-byte slice",
45 0 : bitCount, bitmapRequiredSize(bitCount), len(b[off:])))
46 : }
47 1 : return Bitmap{
48 1 : data: makeUnsafeRawSlice[uint64](unsafe.Pointer(&b[off])),
49 1 : bitCount: bitCount,
50 1 : }, off + uint32(sz)
51 : }
52 :
53 : // Assert that DecodeBitmap implements DecodeFunc.
54 : var _ DecodeFunc[Bitmap] = DecodeBitmap
55 :
56 : // At returns true if the bit at position i is set and false otherwise.
57 1 : func (b Bitmap) At(i int) bool {
58 1 : return (b.data.At(i>>6 /* i/64 */) & (1 << uint(i%64))) != 0
59 1 : }
60 :
61 : // Successor returns the next bit greater than or equal to i set in the bitmap.
62 : // The i parameter must be in [0, bitCount). Returns the number of bits
63 : // represented by the bitmap if no next bit is set.
64 1 : func (b Bitmap) Successor(i int) int {
65 1 : // nextInWord returns the index of the smallest set bit with an index >= bit
66 1 : // within the provided word. The returned index is an index local to the
67 1 : // word.
68 1 : nextInWord := func(word uint64, bit uint) int {
69 1 : // We want to find the index of the next set bit. We can accomplish this
70 1 : // by clearing the trailing `bit` bits from the word and counting the
71 1 : // number of trailing zeros. For example, consider the word and bit=37:
72 1 : //
73 1 : // word: 1010101010111111111110000001110101010101011111111111000000111011
74 1 : //
75 1 : // 1<<bit: 0000000000000000000000000010000000000000000000000000000000000000
76 1 : // 1<<bit-1: 0000000000000000000000000001111111111111111111111111111111111111
77 1 : // ^1<<bit-1: 1111111111111111111111111110000000000000000000000000000000000000
78 1 : // word&^1<<bit-1: 1010101010111111111110000000000000000000000000000000000000000000
79 1 : //
80 1 : // Counting the trailing zeroes of this last value gives us 43. For
81 1 : // visualizing, 1<<43 is:
82 1 : //
83 1 : // 0000000000000000000010000000000000000000000000000000000000000000
84 1 : //
85 1 : return bits.TrailingZeros64(word &^ ((1 << bit) - 1))
86 1 : }
87 :
88 1 : wordIdx := i >> 6 // i/64
89 1 : // Fast path for common case of reasonably dense bitmaps; if the there's a
90 1 : // bit > i set in the same word, return it.
91 1 : if next := nextInWord(b.data.At(wordIdx), uint(i%64)); next < 64 {
92 1 : return wordIdx<<6 + next
93 1 : }
94 :
95 : // Consult summary structure to find the next word with a set bit. The word
96 : // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
97 : // wordIdx/64'th summary word. We want to know if any of the other later
98 : // words that are summarized together have a set bit. We call [nextInWord]
99 : // on the summary word to get the index of which word has a set bit, if any.
100 1 : summaryTableOffset, summaryTableEnd := b.summaryTableBounds()
101 1 : summaryWordIdx := summaryTableOffset + wordIdx>>6
102 1 : summaryNext := nextInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)+1)
103 1 : // If [summaryNext] == 64, then there are no set bits in any of the earlier
104 1 : // words represented by the summary word at [summaryWordIdx]. In that case,
105 1 : // we need to keep scanning the summary table forwards.
106 1 : if summaryNext == 64 {
107 1 : for summaryWordIdx++; ; summaryWordIdx++ {
108 1 : // When we fall off the end of the summary table, we've determined
109 1 : // there are no set bits after i across the entirety of the bitmap.
110 1 : if summaryWordIdx >= summaryTableEnd {
111 1 : return b.bitCount
112 1 : }
113 0 : if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
114 0 : summaryNext = bits.TrailingZeros64(summaryWord)
115 0 : break
116 : }
117 : }
118 : }
119 : // The summary word index and the summaryNext together tell us which word
120 : // has a set bit. The number of leading zeros in the word itself tell us
121 : // which bit is set.
122 0 : wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryNext
123 0 : return (wordIdx << 6) + bits.TrailingZeros64(b.data.At(wordIdx))
124 : }
125 :
126 : // Predecessor returns the previous bit less than or equal to i set in the
127 : // bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous
128 : // bit is set.
129 1 : func (b Bitmap) Predecessor(i int) int {
130 1 : // prevInWord returns the index of the largest set bit ≤ bit within the
131 1 : // provided word. The returned index is an index local to the word. Returns
132 1 : // -1 if no set bit is found.
133 1 : prevInWord := func(word uint64, bit uint) int {
134 1 : // We want to find the index of the previous set bit. We can accomplish
135 1 : // this by clearing the leading `bit` bits from the word and counting
136 1 : // the number of leading zeros. For example, consider the word and
137 1 : // bit=42:
138 1 : //
139 1 : // word: 1010101010111111111110000001110101010101011111111111000000111011
140 1 : //
141 1 : // 1<<(bit+1): 0000000000000000000010000000000000000000000000000000000000000000
142 1 : // 1<<(bit+1)-1: 0000000000000000000001111111111111111111111111111111111111111111
143 1 : // word&1<<(bit+1)-1: 0000000000000000000000000001110101010101011111111111000000111011
144 1 : //
145 1 : // Counting the leading zeroes of this last value gives us 27 leading
146 1 : // zeros. 63-27 gives index 36. For visualizing, 1<<36 is:
147 1 : //
148 1 : // 0000000000000000000000000001000000000000000000000000000000000000
149 1 : //
150 1 : return 63 - bits.LeadingZeros64(word&((1<<(bit+1))-1))
151 1 : }
152 :
153 1 : wordIdx := i >> 6 // i/64
154 1 : // Fast path for common case of reasonably dense bitmaps; if the there's a
155 1 : // bit < i set in the same word, return it.
156 1 : if prev := prevInWord(b.data.At(wordIdx), uint(i%64)); prev >= 0 {
157 1 : return (wordIdx << 6) + prev
158 1 : }
159 :
160 : // Consult summary structure to find the next word with a set bit. The word
161 : // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
162 : // wordIdx/64'th summary word. We want to know if any of other earlier words
163 : // that are summarized together have a set bit. We call [prevInWord] on the
164 : // summary word to get the index of which word has a set bit, if any.
165 1 : summaryTableOffset, _ := b.summaryTableBounds()
166 1 : summaryWordIdx := summaryTableOffset + wordIdx>>6
167 1 : summaryPrev := prevInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)-1)
168 1 : // If [summaryPrev] is negative, then there are no set bits in any of the
169 1 : // earlier words represented by the summary word at [summaryWordIdx]. In
170 1 : // that case, we need to keep scanning the summary table backwards.
171 1 : if summaryPrev < 0 {
172 1 : for summaryWordIdx--; ; summaryWordIdx-- {
173 1 : // When we fall below the beginning of the summary table, we've
174 1 : // determined there are no set bits before i across the entirety of
175 1 : // the bitmap.
176 1 : if summaryWordIdx < summaryTableOffset {
177 1 : return -1
178 1 : }
179 0 : if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
180 0 : summaryPrev = 63 - bits.LeadingZeros64(summaryWord)
181 0 : break
182 : }
183 : }
184 : }
185 : // The summary word index and the summary prev together tell us which word
186 : // has a set bit. The number of trailing zeros in the word itself tell us
187 : // which bit is set.
188 0 : wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryPrev
189 0 : return (wordIdx << 6) + 63 - bits.LeadingZeros64(b.data.At(wordIdx))
190 : }
191 :
192 1 : func (b Bitmap) summaryTableBounds() (startOffset, endOffset int) {
193 1 : startOffset = (b.bitCount + 63) >> 6
194 1 : endOffset = startOffset + startOffset>>6
195 1 : return startOffset, endOffset
196 1 : }
197 :
198 : // String returns a string representation of the entire bitmap.
199 0 : func (b Bitmap) String() string {
200 0 : var sb strings.Builder
201 0 : for w := 0; w < (b.bitCount+63)/64; w++ {
202 0 : fmt.Fprintf(&sb, "%064b", b.data.At(w))
203 0 : }
204 0 : return sb.String()
205 : }
206 :
207 : // BitmapBuilder constructs a Bitmap. Bits are default false.
208 : type BitmapBuilder struct {
209 : words []uint64
210 : }
211 :
212 : // Assert that BitmapBuilder implements ColumnWriter.
213 : var _ ColumnWriter = (*BitmapBuilder)(nil)
214 :
215 1 : func bitmapRequiredSize(total int) int {
216 1 : nWords := (total + 63) >> 6 // divide by 64
217 1 : nSummaryWords := (nWords + 63) >> 6 // divide by 64
218 1 : return (nWords + nSummaryWords) << 3 // multiply by 8
219 1 : }
220 :
221 : // Set sets the bit at position i if v is true and clears the bit at position i
222 : // otherwise. Callers need not call Set if v is false and Set(i, true) has not
223 : // been called yet.
224 1 : func (b *BitmapBuilder) Set(i int, v bool) {
225 1 : w := i >> 6 // divide by 64
226 1 : for len(b.words) <= w {
227 1 : b.words = append(b.words, 0)
228 1 : }
229 1 : if v {
230 1 : b.words[w] |= 1 << uint(i%64)
231 1 : } else {
232 1 : b.words[w] &^= 1 << uint(i%64)
233 1 : }
234 : }
235 :
236 : // Reset resets the bitmap to the empty state.
237 1 : func (b *BitmapBuilder) Reset() {
238 1 : clear(b.words)
239 1 : b.words = b.words[:0]
240 1 : }
241 :
242 : // NumColumns implements the ColumnWriter interface.
243 1 : func (b *BitmapBuilder) NumColumns() int { return 1 }
244 :
245 : // DataType implements the ColumnWriter interface.
246 1 : func (b *BitmapBuilder) DataType(int) DataType { return DataTypeBool }
247 :
248 : // Size implements the ColumnWriter interface.
249 1 : func (b *BitmapBuilder) Size(rows int, offset uint32) uint32 {
250 1 : offset = align(offset, align64)
251 1 : return offset + uint32(bitmapRequiredSize(rows))
252 1 : }
253 :
254 : // Invert inverts the bitmap, setting all bits that are not set and clearing all
255 : // bits that are set. If the bitmap's tail is sparse and is not large enough to
256 : // represent nRows rows, it's first materialized.
257 1 : func (b *BitmapBuilder) Invert(nRows int) {
258 1 : // If the tail of b is sparse, fill in zeroes before inverting.
259 1 : nBitmapWords := (nRows + 63) >> 6
260 1 : b.words = slices.Grow(b.words, nBitmapWords-len(b.words))[:nBitmapWords]
261 1 : for i := range b.words {
262 1 : b.words[i] = ^b.words[i]
263 1 : }
264 : }
265 :
266 : // Finish finalizes the bitmap, computing the per-word summary bitmap and
267 : // writing the resulting data to buf at offset.
268 1 : func (b *BitmapBuilder) Finish(col, nRows int, offset uint32, buf []byte) uint32 {
269 1 : offset = alignWithZeroes(buf, offset, align64)
270 1 : dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset]))
271 1 :
272 1 : nBitmapWords := (nRows + 63) >> 6
273 1 : // Truncate the bitmap to the number of words required to represent nRows.
274 1 : // The caller may have written more bits than nRows and no longer cares to
275 1 : // write them out.
276 1 : if len(b.words) > nBitmapWords {
277 1 : b.words = b.words[:nBitmapWords]
278 1 : }
279 : // Ensure the last word of the bitmap does not contain any set bits beyond
280 : // the last row. This is not just for determinism but also to ensure that
281 : // the summary bitmap is correct (which is necessary for Bitmap.Successor
282 : // correctness).
283 1 : if i := nRows % 64; len(b.words) >= nBitmapWords && i != 0 {
284 1 : b.words[nBitmapWords-1] &= (1 << i) - 1
285 1 : }
286 :
287 : // Copy all the words of the bitmap into the destination buffer.
288 1 : offset += uint32(copy(dest.Slice(len(b.words)), b.words)) << align64Shift
289 1 :
290 1 : // The caller may have written fewer than nRows rows if the tail is all
291 1 : // zeroes, relying on these bits being implicitly zero. If the tail of b is
292 1 : // sparse, fill in zeroes.
293 1 : for i := len(b.words); i < nBitmapWords; i++ {
294 1 : dest.set(i, 0)
295 1 : offset += align64
296 1 : }
297 :
298 : // Add the summary bitmap.
299 1 : nSummaryWords := (nBitmapWords + 63) >> 6
300 1 : for i := 0; i < nSummaryWords; i++ {
301 1 : wordsOff := (i << 6) // i*64
302 1 : nWords := min(64, len(b.words)-wordsOff)
303 1 : var summaryWord uint64
304 1 : for j := 0; j < nWords; j++ {
305 1 : if (b.words)[wordsOff+j] != 0 {
306 1 : summaryWord |= 1 << j
307 1 : }
308 : }
309 1 : dest.set(nBitmapWords+i, summaryWord)
310 : }
311 1 : return offset + uint32(nSummaryWords)<<align64Shift
312 : }
313 :
314 : // WriteDebug implements the ColumnWriter interface.
315 0 : func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) {
316 0 : // TODO(jackson): Add more detailed debugging information.
317 0 : fmt.Fprint(w, "bitmap")
318 0 : }
319 :
320 1 : func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
321 1 : if aligned := align(f.Offset(), 8); aligned-f.Offset() != 0 {
322 1 : f.HexBytesln(aligned-f.Offset(), "padding to align to 64-bit boundary")
323 1 : }
324 1 : bitmapWords := (rows + 63) / 64
325 1 : for i := 0; i < bitmapWords; i++ {
326 1 : f.Line(8).Append("b ").Binary(8).Done("bitmap word %d", i)
327 1 : }
328 1 : summaryWords := (bitmapWords + 63) / 64
329 1 : for i := 0; i < summaryWords; i++ {
330 1 : f.Line(8).Append("b ").Binary(8).Done("bitmap summary word %d-%d", i*64, i*64+63)
331 1 : }
332 : }
|