LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - bitmap.go (source / functions) Hit Total Coverage
Test: 2024-08-15 08:17Z 25ac6781 - tests + meta.lcov Lines: 179 201 89.1 %
Date: 2024-08-15 08:18:12 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "fmt"
       9             :         "io"
      10             :         "math/bits"
      11             :         "slices"
      12             :         "strings"
      13             :         "unsafe"
      14             : 
      15             :         "github.com/cockroachdb/errors"
      16             :         "github.com/cockroachdb/pebble/internal/binfmt"
      17             : )
      18             : 
      19             : // Bitmap is a bitmap structure built on a []uint64. A bitmap utilizes ~1
      20             : // physical bit/logical bit (~0.125 bytes/row). The bitmap is encoded into an
      21             : // 8-byte aligned array of 64-bit words which is (nRows+63)/64 words in length.
      22             : //
      23             : // A summary bitmap is stored after the primary bitmap in which each bit in the
      24             : // summary bitmap corresponds to 1 word in the primary bitmap. A bit is set in
      25             : // the summary bitmap if the corresponding word in the primary bitmap is
      26             : // non-zero. The summary bitmap accelerates predecessor and successor
      27             : // operations.
      28             : type Bitmap struct {
      29             :         data     UnsafeRawSlice[uint64]
      30             :         bitCount int
      31             : }
      32             : 
      33             : // Assert that Bitmap implements Array[bool].
      34             : var _ Array[bool] = Bitmap{}
      35             : 
      36             : // DecodeBitmap decodes the structure of a Bitmap and returns a Bitmap that
      37             : // reads from b supporting bitCount logical bits. No bounds checking is
      38             : // performed, so the caller must guarantee the bitmap is appropriately sized and
      39             : // the provided bitCount correctly identifies the number of bits in the bitmap.
      40           1 : func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset uint32) {
      41           1 :         sz := bitmapRequiredSize(bitCount)
      42           1 :         off = align(off, align64)
      43           1 :         if len(b) < int(off)+sz {
      44           0 :                 panic(errors.AssertionFailedf("bitmap of %d bits requires at least %d bytes; provided with %d-byte slice",
      45           0 :                         bitCount, bitmapRequiredSize(bitCount), len(b[off:])))
      46             :         }
      47           1 :         return Bitmap{
      48           1 :                 data:     makeUnsafeRawSlice[uint64](unsafe.Pointer(&b[off])),
      49           1 :                 bitCount: bitCount,
      50           1 :         }, off + uint32(sz)
      51             : }
      52             : 
      53             : // Assert that DecodeBitmap implements DecodeFunc.
      54             : var _ DecodeFunc[Bitmap] = DecodeBitmap
      55             : 
      56             : // At returns true if the bit at position i is set and false otherwise.
      57           1 : func (b Bitmap) At(i int) bool {
      58           1 :         return (b.data.At(i>>6 /* i/64 */) & (1 << uint(i%64))) != 0
      59           1 : }
      60             : 
      61             : // Successor returns the next bit greater than or equal to i set in the bitmap.
      62             : // The i parameter must be in [0, bitCount). Returns the number of bits
      63             : // represented by the bitmap if no next bit is set.
      64           1 : func (b Bitmap) Successor(i int) int {
      65           1 :         // nextInWord returns the index of the smallest set bit with an index >= bit
      66           1 :         // within the provided word.  The returned index is an index local to the
      67           1 :         // word.
      68           1 :         nextInWord := func(word uint64, bit uint) int {
      69           1 :                 // We want to find the index of the next set bit. We can accomplish this
      70           1 :                 // by clearing the trailing `bit` bits from the word and counting the
      71           1 :                 // number of trailing zeros. For example, consider the word and bit=37:
      72           1 :                 //
      73           1 :                 //           word: 1010101010111111111110000001110101010101011111111111000000111011
      74           1 :                 //
      75           1 :                 //         1<<bit: 0000000000000000000000000010000000000000000000000000000000000000
      76           1 :                 //       1<<bit-1: 0000000000000000000000000001111111111111111111111111111111111111
      77           1 :                 //      ^1<<bit-1: 1111111111111111111111111110000000000000000000000000000000000000
      78           1 :                 // word&^1<<bit-1: 1010101010111111111110000000000000000000000000000000000000000000
      79           1 :                 //
      80           1 :                 // Counting the trailing zeroes of this last value gives us 43. For
      81           1 :                 // visualizing, 1<<43 is:
      82           1 :                 //
      83           1 :                 //                 0000000000000000000010000000000000000000000000000000000000000000
      84           1 :                 //
      85           1 :                 return bits.TrailingZeros64(word &^ ((1 << bit) - 1))
      86           1 :         }
      87             : 
      88           1 :         wordIdx := i >> 6 // i/64
      89           1 :         // Fast path for common case of reasonably dense bitmaps; if the there's a
      90           1 :         // bit > i set in the same word, return it.
      91           1 :         if next := nextInWord(b.data.At(wordIdx), uint(i%64)); next < 64 {
      92           1 :                 return wordIdx<<6 + next
      93           1 :         }
      94             : 
      95             :         // Consult summary structure to find the next word with a set bit. The word
      96             :         // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
      97             :         // wordIdx/64'th summary word. We want to know if any of the other later
      98             :         // words that are summarized together have a set bit. We call [nextInWord]
      99             :         // on the summary word to get the index of which word has a set bit, if any.
     100           1 :         summaryTableOffset, summaryTableEnd := b.summaryTableBounds()
     101           1 :         summaryWordIdx := summaryTableOffset + wordIdx>>6
     102           1 :         summaryNext := nextInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)+1)
     103           1 :         // If [summaryNext] == 64, then there are no set bits in any of the earlier
     104           1 :         // words represented by the summary word at [summaryWordIdx]. In that case,
     105           1 :         // we need to keep scanning the summary table forwards.
     106           1 :         if summaryNext == 64 {
     107           1 :                 for summaryWordIdx++; ; summaryWordIdx++ {
     108           1 :                         // When we fall off the end of the summary table, we've determined
     109           1 :                         // there are no set bits after i across the entirety of the bitmap.
     110           1 :                         if summaryWordIdx >= summaryTableEnd {
     111           1 :                                 return b.bitCount
     112           1 :                         }
     113           0 :                         if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
     114           0 :                                 summaryNext = bits.TrailingZeros64(summaryWord)
     115           0 :                                 break
     116             :                         }
     117             :                 }
     118             :         }
     119             :         // The summary word index and the summaryNext together tell us which word
     120             :         // has a set bit. The number of leading zeros in the word itself tell us
     121             :         // which bit is set.
     122           0 :         wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryNext
     123           0 :         return (wordIdx << 6) + bits.TrailingZeros64(b.data.At(wordIdx))
     124             : }
     125             : 
     126             : // Predecessor returns the previous bit less than or equal to i set in the
     127             : // bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous
     128             : // bit is set.
     129           1 : func (b Bitmap) Predecessor(i int) int {
     130           1 :         // prevInWord returns the index of the largest set bit ≤ bit within the
     131           1 :         // provided word. The returned index is an index local to the word. Returns
     132           1 :         // -1 if no set bit is found.
     133           1 :         prevInWord := func(word uint64, bit uint) int {
     134           1 :                 // We want to find the index of the previous set bit. We can accomplish
     135           1 :                 // this by clearing the leading `bit` bits from the word and counting
     136           1 :                 // the number of leading zeros. For example, consider the word and
     137           1 :                 // bit=42:
     138           1 :                 //
     139           1 :                 //              word: 1010101010111111111110000001110101010101011111111111000000111011
     140           1 :                 //
     141           1 :                 //        1<<(bit+1): 0000000000000000000010000000000000000000000000000000000000000000
     142           1 :                 //      1<<(bit+1)-1: 0000000000000000000001111111111111111111111111111111111111111111
     143           1 :                 // word&1<<(bit+1)-1: 0000000000000000000000000001110101010101011111111111000000111011
     144           1 :                 //
     145           1 :                 // Counting the leading zeroes of this last value gives us 27 leading
     146           1 :                 // zeros. 63-27 gives index 36. For visualizing, 1<<36 is:
     147           1 :                 //
     148           1 :                 //                    0000000000000000000000000001000000000000000000000000000000000000
     149           1 :                 //
     150           1 :                 return 63 - bits.LeadingZeros64(word&((1<<(bit+1))-1))
     151           1 :         }
     152             : 
     153           1 :         wordIdx := i >> 6 // i/64
     154           1 :         // Fast path for common case of reasonably dense bitmaps; if the there's a
     155           1 :         // bit < i set in the same word, return it.
     156           1 :         if prev := prevInWord(b.data.At(wordIdx), uint(i%64)); prev >= 0 {
     157           1 :                 return (wordIdx << 6) + prev
     158           1 :         }
     159             : 
     160             :         // Consult summary structure to find the next word with a set bit. The word
     161             :         // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
     162             :         // wordIdx/64'th summary word. We want to know if any of other earlier words
     163             :         // that are summarized together have a set bit. We call [prevInWord] on the
     164             :         // summary word to get the index of which word has a set bit, if any.
     165           1 :         summaryTableOffset, _ := b.summaryTableBounds()
     166           1 :         summaryWordIdx := summaryTableOffset + wordIdx>>6
     167           1 :         summaryPrev := prevInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)-1)
     168           1 :         // If [summaryPrev] is negative, then there are no set bits in any of the
     169           1 :         // earlier words represented by the summary word at [summaryWordIdx]. In
     170           1 :         // that case, we need to keep scanning the summary table backwards.
     171           1 :         if summaryPrev < 0 {
     172           1 :                 for summaryWordIdx--; ; summaryWordIdx-- {
     173           1 :                         // When we fall below the beginning of the summary table, we've
     174           1 :                         // determined there are no set bits before i across the entirety of
     175           1 :                         // the bitmap.
     176           1 :                         if summaryWordIdx < summaryTableOffset {
     177           1 :                                 return -1
     178           1 :                         }
     179           0 :                         if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
     180           0 :                                 summaryPrev = 63 - bits.LeadingZeros64(summaryWord)
     181           0 :                                 break
     182             :                         }
     183             :                 }
     184             :         }
     185             :         // The summary word index and the summary prev together tell us which word
     186             :         // has a set bit. The number of trailing zeros in the word itself tell us
     187             :         // which bit is set.
     188           0 :         wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryPrev
     189           0 :         return (wordIdx << 6) + 63 - bits.LeadingZeros64(b.data.At(wordIdx))
     190             : }
     191             : 
     192           1 : func (b Bitmap) summaryTableBounds() (startOffset, endOffset int) {
     193           1 :         startOffset = (b.bitCount + 63) >> 6
     194           1 :         endOffset = startOffset + startOffset>>6
     195           1 :         return startOffset, endOffset
     196           1 : }
     197             : 
     198             : // String returns a string representation of the entire bitmap.
     199           0 : func (b Bitmap) String() string {
     200           0 :         var sb strings.Builder
     201           0 :         for w := 0; w < (b.bitCount+63)/64; w++ {
     202           0 :                 fmt.Fprintf(&sb, "%064b", b.data.At(w))
     203           0 :         }
     204           0 :         return sb.String()
     205             : }
     206             : 
     207             : // BitmapBuilder constructs a Bitmap. Bits are default false.
     208             : type BitmapBuilder struct {
     209             :         words []uint64
     210             : }
     211             : 
     212             : // Assert that BitmapBuilder implements ColumnWriter.
     213             : var _ ColumnWriter = (*BitmapBuilder)(nil)
     214             : 
     215           1 : func bitmapRequiredSize(total int) int {
     216           1 :         nWords := (total + 63) >> 6          // divide by 64
     217           1 :         nSummaryWords := (nWords + 63) >> 6  // divide by 64
     218           1 :         return (nWords + nSummaryWords) << 3 // multiply by 8
     219           1 : }
     220             : 
     221             : // Set sets the bit at position i if v is true and clears the bit at position i
     222             : // otherwise. Callers need not call Set if v is false and Set(i, true) has not
     223             : // been called yet.
     224           1 : func (b *BitmapBuilder) Set(i int, v bool) {
     225           1 :         w := i >> 6 // divide by 64
     226           1 :         for len(b.words) <= w {
     227           1 :                 b.words = append(b.words, 0)
     228           1 :         }
     229           1 :         if v {
     230           1 :                 b.words[w] |= 1 << uint(i%64)
     231           1 :         } else {
     232           1 :                 b.words[w] &^= 1 << uint(i%64)
     233           1 :         }
     234             : }
     235             : 
     236             : // Reset resets the bitmap to the empty state.
     237           1 : func (b *BitmapBuilder) Reset() {
     238           1 :         clear(b.words)
     239           1 :         b.words = b.words[:0]
     240           1 : }
     241             : 
     242             : // NumColumns implements the ColumnWriter interface.
     243           1 : func (b *BitmapBuilder) NumColumns() int { return 1 }
     244             : 
     245             : // DataType implements the ColumnWriter interface.
     246           1 : func (b *BitmapBuilder) DataType(int) DataType { return DataTypeBool }
     247             : 
     248             : // Size implements the ColumnWriter interface.
     249           1 : func (b *BitmapBuilder) Size(rows int, offset uint32) uint32 {
     250           1 :         offset = align(offset, align64)
     251           1 :         return offset + uint32(bitmapRequiredSize(rows))
     252           1 : }
     253             : 
     254             : // Invert inverts the bitmap, setting all bits that are not set and clearing all
     255             : // bits that are set. If the bitmap's tail is sparse and is not large enough to
     256             : // represent nRows rows, it's first materialized.
     257           1 : func (b *BitmapBuilder) Invert(nRows int) {
     258           1 :         // If the tail of b is sparse, fill in zeroes before inverting.
     259           1 :         nBitmapWords := (nRows + 63) >> 6
     260           1 :         b.words = slices.Grow(b.words, nBitmapWords-len(b.words))[:nBitmapWords]
     261           1 :         for i := range b.words {
     262           1 :                 b.words[i] = ^b.words[i]
     263           1 :         }
     264             : }
     265             : 
     266             : // Finish finalizes the bitmap, computing the per-word summary bitmap and
     267             : // writing the resulting data to buf at offset.
     268           1 : func (b *BitmapBuilder) Finish(col, nRows int, offset uint32, buf []byte) uint32 {
     269           1 :         offset = alignWithZeroes(buf, offset, align64)
     270           1 :         dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset]))
     271           1 : 
     272           1 :         nBitmapWords := (nRows + 63) >> 6
     273           1 :         // Truncate the bitmap to the number of words required to represent nRows.
     274           1 :         // The caller may have written more bits than nRows and no longer cares to
     275           1 :         // write them out.
     276           1 :         if len(b.words) > nBitmapWords {
     277           1 :                 b.words = b.words[:nBitmapWords]
     278           1 :         }
     279             :         // Ensure the last word of the bitmap does not contain any set bits beyond
     280             :         // the last row. This is not just for determinism but also to ensure that
     281             :         // the summary bitmap is correct (which is necessary for Bitmap.Successor
     282             :         // correctness).
     283           1 :         if i := nRows % 64; len(b.words) >= nBitmapWords && i != 0 {
     284           1 :                 b.words[nBitmapWords-1] &= (1 << i) - 1
     285           1 :         }
     286             : 
     287             :         // Copy all the words of the bitmap into the destination buffer.
     288           1 :         offset += uint32(copy(dest.Slice(len(b.words)), b.words)) << align64Shift
     289           1 : 
     290           1 :         // The caller may have written fewer than nRows rows if the tail is all
     291           1 :         // zeroes, relying on these bits being implicitly zero. If the tail of b is
     292           1 :         // sparse, fill in zeroes.
     293           1 :         for i := len(b.words); i < nBitmapWords; i++ {
     294           1 :                 dest.set(i, 0)
     295           1 :                 offset += align64
     296           1 :         }
     297             : 
     298             :         // Add the summary bitmap.
     299           1 :         nSummaryWords := (nBitmapWords + 63) >> 6
     300           1 :         for i := 0; i < nSummaryWords; i++ {
     301           1 :                 wordsOff := (i << 6) // i*64
     302           1 :                 nWords := min(64, len(b.words)-wordsOff)
     303           1 :                 var summaryWord uint64
     304           1 :                 for j := 0; j < nWords; j++ {
     305           1 :                         if (b.words)[wordsOff+j] != 0 {
     306           1 :                                 summaryWord |= 1 << j
     307           1 :                         }
     308             :                 }
     309           1 :                 dest.set(nBitmapWords+i, summaryWord)
     310             :         }
     311           1 :         return offset + uint32(nSummaryWords)<<align64Shift
     312             : }
     313             : 
     314             : // WriteDebug implements the ColumnWriter interface.
     315           0 : func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) {
     316           0 :         // TODO(jackson): Add more detailed debugging information.
     317           0 :         fmt.Fprint(w, "bitmap")
     318           0 : }
     319             : 
     320           1 : func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
     321           1 :         if aligned := align(f.Offset(), 8); aligned-f.Offset() != 0 {
     322           1 :                 f.HexBytesln(aligned-f.Offset(), "padding to align to 64-bit boundary")
     323           1 :         }
     324           1 :         bitmapWords := (rows + 63) / 64
     325           1 :         for i := 0; i < bitmapWords; i++ {
     326           1 :                 f.Line(8).Append("b ").Binary(8).Done("bitmap word %d", i)
     327           1 :         }
     328           1 :         summaryWords := (bitmapWords + 63) / 64
     329           1 :         for i := 0; i < summaryWords; i++ {
     330           1 :                 f.Line(8).Append("b ").Binary(8).Done("bitmap summary word %d-%d", i*64, i*64+63)
     331           1 :         }
     332             : }

Generated by: LCOV version 1.14