LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - bitmap.go (source / functions) Hit Total Coverage
Test: 2024-08-29 08:16Z 983d98b1 - tests only.lcov Lines: 222 237 93.7 %
Date: 2024-08-29 08:16:47 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "fmt"
       9             :         "io"
      10             :         "math/bits"
      11             :         "slices"
      12             :         "strings"
      13             :         "unsafe"
      14             : 
      15             :         "github.com/cockroachdb/errors"
      16             :         "github.com/cockroachdb/pebble/internal/binfmt"
      17             : )
      18             : 
      19             : // Bitmap is a bitmap structure built on a []uint64. A bitmap utilizes ~1
      20             : // physical bit/logical bit (~0.125 bytes/row). The bitmap is encoded into an
      21             : // 8-byte aligned array of 64-bit words which is (nRows+63)/64 words in length.
      22             : //
      23             : // A summary bitmap is stored after the primary bitmap in which each bit in the
      24             : // summary bitmap corresponds to 1 word in the primary bitmap. A bit is set in
      25             : // the summary bitmap if the corresponding word in the primary bitmap is
      26             : // non-zero. The summary bitmap accelerates predecessor and successor
      27             : // operations.
      28             : type Bitmap struct {
      29             :         // data contains the bitmap data, according to defaultBitmapEncoding, or it
      30             :         // is nil if the bitmap is all zeros.
      31             :         data     UnsafeRawSlice[uint64]
      32             :         bitCount int
      33             : }
      34             : 
      35             : // Assert that Bitmap implements Array[bool].
      36             : var _ Array[bool] = Bitmap{}
      37             : 
      38             : // DecodeBitmap decodes the structure of a Bitmap and returns a Bitmap that
      39             : // reads from b supporting bitCount logical bits. No bounds checking is
      40             : // performed, so the caller must guarantee the bitmap is appropriately sized and
      41             : // the provided bitCount correctly identifies the number of bits in the bitmap.
      42           1 : func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset uint32) {
      43           1 :         encoding := bitmapEncoding(b[off])
      44           1 :         off++
      45           1 :         if encoding == zeroBitmapEncoding {
      46           1 :                 return Bitmap{bitCount: bitCount}, off
      47           1 :         }
      48           1 :         off = align(off, align64)
      49           1 :         sz := bitmapRequiredSize(bitCount)
      50           1 :         if len(b) < int(off)+sz {
      51           0 :                 panic(errors.AssertionFailedf("bitmap of %d bits requires at least %d bytes; provided with %d-byte slice",
      52           0 :                         bitCount, bitmapRequiredSize(bitCount), len(b[off:])))
      53             :         }
      54           1 :         return Bitmap{
      55           1 :                 data:     makeUnsafeRawSlice[uint64](unsafe.Pointer(&b[off])),
      56           1 :                 bitCount: bitCount,
      57           1 :         }, off + uint32(sz)
      58             : }
      59             : 
      60             : // Assert that DecodeBitmap implements DecodeFunc.
      61             : var _ DecodeFunc[Bitmap] = DecodeBitmap
      62             : 
      63             : // At returns true if the bit at position i is set and false otherwise.
      64           1 : func (b Bitmap) At(i int) bool {
      65           1 :         if b.data.ptr == nil {
      66           1 :                 // zero bitmap case.
      67           1 :                 return false
      68           1 :         }
      69             :         // Inline b.data.At(i/64).
      70             :         // The offset of the correct word is i / 64 * 8 = (i >> 3) &^ 0b111
      71           1 :         const mask = ^uintptr(0b111)
      72           1 :         val := *(*uint64)(unsafe.Pointer(uintptr(b.data.ptr) + (uintptr(i)>>3)&mask))
      73           1 :         return val&(1<<(uint(i)&63)) != 0
      74             : }
      75             : 
      76             : // Successor returns the next bit greater than or equal to i set in the bitmap.
      77             : // The i parameter must be in [0, bitCount). Returns the number of bits
      78             : // represented by the bitmap if no next bit is set.
      79           1 : func (b Bitmap) Successor(i int) int {
      80           1 :         if b.data.ptr == nil {
      81           1 :                 // Zero bitmap case.
      82           1 :                 return b.bitCount
      83           1 :         }
      84             :         // nextInWord returns the index of the smallest set bit with an index >= bit
      85             :         // within the provided word.  The returned index is an index local to the
      86             :         // word.
      87           1 :         nextInWord := func(word uint64, bit uint) int {
      88           1 :                 // We want to find the index of the next set bit. We can accomplish this
      89           1 :                 // by clearing the trailing `bit` bits from the word and counting the
      90           1 :                 // number of trailing zeros. For example, consider the word and bit=37:
      91           1 :                 //
      92           1 :                 //           word: 1010101010111111111110000001110101010101011111111111000000111011
      93           1 :                 //
      94           1 :                 //         1<<bit: 0000000000000000000000000010000000000000000000000000000000000000
      95           1 :                 //       1<<bit-1: 0000000000000000000000000001111111111111111111111111111111111111
      96           1 :                 //      ^1<<bit-1: 1111111111111111111111111110000000000000000000000000000000000000
      97           1 :                 // word&^1<<bit-1: 1010101010111111111110000000000000000000000000000000000000000000
      98           1 :                 //
      99           1 :                 // Counting the trailing zeroes of this last value gives us 43. For
     100           1 :                 // visualizing, 1<<43 is:
     101           1 :                 //
     102           1 :                 //                 0000000000000000000010000000000000000000000000000000000000000000
     103           1 :                 //
     104           1 :                 return bits.TrailingZeros64(word &^ ((1 << bit) - 1))
     105           1 :         }
     106             : 
     107           1 :         wordIdx := i >> 6 // i/64
     108           1 :         // Fast path for common case of reasonably dense bitmaps; if the there's a
     109           1 :         // bit > i set in the same word, return it.
     110           1 :         if next := nextInWord(b.data.At(wordIdx), uint(i%64)); next < 64 {
     111           1 :                 return wordIdx<<6 + next
     112           1 :         }
     113             : 
     114             :         // Consult summary structure to find the next word with a set bit. The word
     115             :         // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
     116             :         // wordIdx/64'th summary word. We want to know if any of the other later
     117             :         // words that are summarized together have a set bit. We call [nextInWord]
     118             :         // on the summary word to get the index of which word has a set bit, if any.
     119           1 :         summaryTableOffset, summaryTableEnd := b.summaryTableBounds()
     120           1 :         summaryWordIdx := summaryTableOffset + wordIdx>>6
     121           1 :         summaryNext := nextInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)+1)
     122           1 :         // If [summaryNext] == 64, then there are no set bits in any of the earlier
     123           1 :         // words represented by the summary word at [summaryWordIdx]. In that case,
     124           1 :         // we need to keep scanning the summary table forwards.
     125           1 :         if summaryNext == 64 {
     126           1 :                 for summaryWordIdx++; ; summaryWordIdx++ {
     127           1 :                         // When we fall off the end of the summary table, we've determined
     128           1 :                         // there are no set bits after i across the entirety of the bitmap.
     129           1 :                         if summaryWordIdx >= summaryTableEnd {
     130           1 :                                 return b.bitCount
     131           1 :                         }
     132           0 :                         if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
     133           0 :                                 summaryNext = bits.TrailingZeros64(summaryWord)
     134           0 :                                 break
     135             :                         }
     136             :                 }
     137             :         }
     138             :         // The summary word index and the summaryNext together tell us which word
     139             :         // has a set bit. The number of leading zeros in the word itself tell us
     140             :         // which bit is set.
     141           1 :         wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryNext
     142           1 :         return (wordIdx << 6) + bits.TrailingZeros64(b.data.At(wordIdx))
     143             : }
     144             : 
     145             : // Predecessor returns the previous bit less than or equal to i set in the
     146             : // bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous
     147             : // bit is set.
     148           1 : func (b Bitmap) Predecessor(i int) int {
     149           1 :         if b.data.ptr == nil {
     150           1 :                 // Zero bitmap case.
     151           1 :                 return -1
     152           1 :         }
     153             :         // prevInWord returns the index of the largest set bit ≤ bit within the
     154             :         // provided word. The returned index is an index local to the word. Returns
     155             :         // -1 if no set bit is found.
     156           1 :         prevInWord := func(word uint64, bit uint) int {
     157           1 :                 // We want to find the index of the previous set bit. We can accomplish
     158           1 :                 // this by clearing the leading `bit` bits from the word and counting
     159           1 :                 // the number of leading zeros. For example, consider the word and
     160           1 :                 // bit=42:
     161           1 :                 //
     162           1 :                 //              word: 1010101010111111111110000001110101010101011111111111000000111011
     163           1 :                 //
     164           1 :                 //        1<<(bit+1): 0000000000000000000010000000000000000000000000000000000000000000
     165           1 :                 //      1<<(bit+1)-1: 0000000000000000000001111111111111111111111111111111111111111111
     166           1 :                 // word&1<<(bit+1)-1: 0000000000000000000000000001110101010101011111111111000000111011
     167           1 :                 //
     168           1 :                 // Counting the leading zeroes of this last value gives us 27 leading
     169           1 :                 // zeros. 63-27 gives index 36. For visualizing, 1<<36 is:
     170           1 :                 //
     171           1 :                 //                    0000000000000000000000000001000000000000000000000000000000000000
     172           1 :                 //
     173           1 :                 return 63 - bits.LeadingZeros64(word&((1<<(bit+1))-1))
     174           1 :         }
     175             : 
     176           1 :         wordIdx := i >> 6 // i/64
     177           1 :         // Fast path for common case of reasonably dense bitmaps; if the there's a
     178           1 :         // bit < i set in the same word, return it.
     179           1 :         if prev := prevInWord(b.data.At(wordIdx), uint(i%64)); prev >= 0 {
     180           1 :                 return (wordIdx << 6) + prev
     181           1 :         }
     182             : 
     183             :         // Consult summary structure to find the next word with a set bit. The word
     184             :         // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
     185             :         // wordIdx/64'th summary word. We want to know if any of other earlier words
     186             :         // that are summarized together have a set bit. We call [prevInWord] on the
     187             :         // summary word to get the index of which word has a set bit, if any.
     188           1 :         summaryTableOffset, _ := b.summaryTableBounds()
     189           1 :         summaryWordIdx := summaryTableOffset + wordIdx>>6
     190           1 :         summaryPrev := prevInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)-1)
     191           1 :         // If [summaryPrev] is negative, then there are no set bits in any of the
     192           1 :         // earlier words represented by the summary word at [summaryWordIdx]. In
     193           1 :         // that case, we need to keep scanning the summary table backwards.
     194           1 :         if summaryPrev < 0 {
     195           1 :                 for summaryWordIdx--; ; summaryWordIdx-- {
     196           1 :                         // When we fall below the beginning of the summary table, we've
     197           1 :                         // determined there are no set bits before i across the entirety of
     198           1 :                         // the bitmap.
     199           1 :                         if summaryWordIdx < summaryTableOffset {
     200           1 :                                 return -1
     201           1 :                         }
     202           0 :                         if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
     203           0 :                                 summaryPrev = 63 - bits.LeadingZeros64(summaryWord)
     204           0 :                                 break
     205             :                         }
     206             :                 }
     207             :         }
     208             :         // The summary word index and the summary prev together tell us which word
     209             :         // has a set bit. The number of trailing zeros in the word itself tell us
     210             :         // which bit is set.
     211           1 :         wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryPrev
     212           1 :         return (wordIdx << 6) + 63 - bits.LeadingZeros64(b.data.At(wordIdx))
     213             : }
     214             : 
     215           1 : func (b Bitmap) summaryTableBounds() (startOffset, endOffset int) {
     216           1 :         startOffset = (b.bitCount + 63) >> 6
     217           1 :         endOffset = startOffset + startOffset>>6
     218           1 :         return startOffset, endOffset
     219           1 : }
     220             : 
     221             : // String returns a string representation of the entire bitmap.
     222           0 : func (b Bitmap) String() string {
     223           0 :         var sb strings.Builder
     224           0 :         for w := 0; w < (b.bitCount+63)/64; w++ {
     225           0 :                 fmt.Fprintf(&sb, "%064b", b.data.At(w))
     226           0 :         }
     227           0 :         return sb.String()
     228             : }
     229             : 
     230             : // BitmapBuilder constructs a Bitmap. Bits default to false.
     231             : type BitmapBuilder struct {
     232             :         words []uint64
     233             : }
     234             : 
     235             : type bitmapEncoding uint8
     236             : 
     237             : const (
     238             :         // defaultBitmapEncoding encodes the bitmap using ⌈n/64⌉ words followed by
     239             :         // ⌈⌈n/64⌉/64⌉ summary words.
     240             :         defaultBitmapEncoding bitmapEncoding = iota
     241             :         // zeroBitmapEncoding is used for the special case when the bitmap is empty.
     242             :         zeroBitmapEncoding
     243             : )
     244             : 
     245             : // Assert that BitmapBuilder implements ColumnWriter.
     246             : var _ ColumnWriter = (*BitmapBuilder)(nil)
     247             : 
     248             : // bitmapRequiredSize returns the size of an encoded bitmap in bytes, using the
     249             : // defaultBitmapEncoding.
     250           1 : func bitmapRequiredSize(total int) int {
     251           1 :         nWords := (total + 63) >> 6          // divide by 64
     252           1 :         nSummaryWords := (nWords + 63) >> 6  // divide by 64
     253           1 :         return (nWords + nSummaryWords) << 3 // multiply by 8
     254           1 : }
     255             : 
     256             : // Set sets the bit at position i to true.
     257           1 : func (b *BitmapBuilder) Set(i int) {
     258           1 :         w := i >> 6 // divide by 64
     259           1 :         for len(b.words) <= w {
     260           1 :                 b.words = append(b.words, 0)
     261           1 :         }
     262           1 :         b.words[w] |= 1 << uint(i%64)
     263             : }
     264             : 
     265             : // isZero returns true if no bits are set and Invert was not called.
     266           1 : func (b *BitmapBuilder) isZero() bool {
     267           1 :         return len(b.words) == 0
     268           1 : }
     269             : 
     270             : // Reset resets the bitmap to the empty state.
     271           1 : func (b *BitmapBuilder) Reset() {
     272           1 :         clear(b.words)
     273           1 :         b.words = b.words[:0]
     274           1 : }
     275             : 
     276             : // NumColumns implements the ColumnWriter interface.
     277           1 : func (b *BitmapBuilder) NumColumns() int { return 1 }
     278             : 
     279             : // DataType implements the ColumnWriter interface.
     280           1 : func (b *BitmapBuilder) DataType(int) DataType { return DataTypeBool }
     281             : 
     282             : // Size implements the ColumnWriter interface.
     283           1 : func (b *BitmapBuilder) Size(rows int, offset uint32) uint32 {
     284           1 :         // First byte will be the encoding type.
     285           1 :         offset++
     286           1 :         if b.isZero() {
     287           1 :                 return offset
     288           1 :         }
     289           1 :         offset = align(offset, align64)
     290           1 :         return offset + uint32(bitmapRequiredSize(rows))
     291             : }
     292             : 
     293             : // InvertedSize returns the size of the encoded bitmap, assuming Invert will be called.
     294           1 : func (b *BitmapBuilder) InvertedSize(rows int, offset uint32) uint32 {
     295           1 :         // First byte will be the encoding type.
     296           1 :         offset++
     297           1 :         // An inverted bitmap will never use all-zeros encoding (even if it happens to
     298           1 :         // be all zero).
     299           1 :         offset = align(offset, align64)
     300           1 :         return offset + uint32(bitmapRequiredSize(rows))
     301           1 : }
     302             : 
     303             : // Invert inverts the bitmap, setting all bits that are not set and clearing all
     304             : // bits that are set. If the bitmap's tail is sparse and is not large enough to
     305             : // represent nRows rows, it's first materialized.
     306             : //
     307             : // Note that Invert can affect the Size of the bitmap. Use InvertedSize() if you
     308             : // intend to invert the bitmap before finishing.
     309           1 : func (b *BitmapBuilder) Invert(nRows int) {
     310           1 :         // If the tail of b is sparse, fill in zeroes before inverting.
     311           1 :         nBitmapWords := (nRows + 63) >> 6
     312           1 :         b.words = slices.Grow(b.words, nBitmapWords-len(b.words))[:nBitmapWords]
     313           1 :         for i := range b.words {
     314           1 :                 b.words[i] = ^b.words[i]
     315           1 :         }
     316             : }
     317             : 
     318             : // Finish finalizes the bitmap, computing the per-word summary bitmap and
     319             : // writing the resulting data to buf at offset.
     320           1 : func (b *BitmapBuilder) Finish(col, nRows int, offset uint32, buf []byte) uint32 {
     321           1 :         if b.isZero() {
     322           1 :                 buf[offset] = byte(zeroBitmapEncoding)
     323           1 :                 return offset + 1
     324           1 :         }
     325           1 :         buf[offset] = byte(defaultBitmapEncoding)
     326           1 :         offset++
     327           1 :         offset = alignWithZeroes(buf, offset, align64)
     328           1 :         dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset]))
     329           1 : 
     330           1 :         nBitmapWords := (nRows + 63) >> 6
     331           1 :         // Truncate the bitmap to the number of words required to represent nRows.
     332           1 :         // The caller may have written more bits than nRows and no longer cares to
     333           1 :         // write them out.
     334           1 :         if len(b.words) > nBitmapWords {
     335           1 :                 b.words = b.words[:nBitmapWords]
     336           1 :         }
     337             :         // Ensure the last word of the bitmap does not contain any set bits beyond
     338             :         // the last row. This is not just for determinism but also to ensure that
     339             :         // the summary bitmap is correct (which is necessary for Bitmap.Successor
     340             :         // correctness).
     341           1 :         if i := nRows % 64; len(b.words) >= nBitmapWords && i != 0 {
     342           1 :                 b.words[nBitmapWords-1] &= (1 << i) - 1
     343           1 :         }
     344             : 
     345             :         // Copy all the words of the bitmap into the destination buffer.
     346           1 :         offset += uint32(copy(dest.Slice(len(b.words)), b.words)) << align64Shift
     347           1 : 
     348           1 :         // The caller may have written fewer than nRows rows if the tail is all
     349           1 :         // zeroes, relying on these bits being implicitly zero. If the tail of b is
     350           1 :         // sparse, fill in zeroes.
     351           1 :         for i := len(b.words); i < nBitmapWords; i++ {
     352           1 :                 dest.set(i, 0)
     353           1 :                 offset += align64
     354           1 :         }
     355             : 
     356             :         // Add the summary bitmap.
     357           1 :         nSummaryWords := (nBitmapWords + 63) >> 6
     358           1 :         for i := 0; i < nSummaryWords; i++ {
     359           1 :                 wordsOff := (i << 6) // i*64
     360           1 :                 nWords := min(64, len(b.words)-wordsOff)
     361           1 :                 var summaryWord uint64
     362           1 :                 for j := 0; j < nWords; j++ {
     363           1 :                         if (b.words)[wordsOff+j] != 0 {
     364           1 :                                 summaryWord |= 1 << j
     365           1 :                         }
     366             :                 }
     367           1 :                 dest.set(nBitmapWords+i, summaryWord)
     368             :         }
     369           1 :         return offset + uint32(nSummaryWords)<<align64Shift
     370             : }
     371             : 
     372             : // WriteDebug implements the ColumnWriter interface.
     373           1 : func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) {
     374           1 :         // TODO(jackson): Add more detailed debugging information.
     375           1 :         fmt.Fprint(w, "bitmap")
     376           1 : }
     377             : 
     378           1 : func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
     379           1 :         encoding := bitmapEncoding(f.PeekUint(1))
     380           1 :         f.HexBytesln(1, "bitmap encoding")
     381           1 :         if encoding == zeroBitmapEncoding {
     382           1 :                 return
     383           1 :         }
     384           1 :         if encoding != defaultBitmapEncoding {
     385           0 :                 panic(fmt.Sprintf("unknown bitmap encoding %d", encoding))
     386             :         }
     387           1 :         if aligned := align(f.Offset(), 8); aligned-f.Offset() != 0 {
     388           1 :                 f.HexBytesln(aligned-f.Offset(), "padding to align to 64-bit boundary")
     389           1 :         }
     390           1 :         bitmapWords := (rows + 63) / 64
     391           1 :         for i := 0; i < bitmapWords; i++ {
     392           1 :                 f.Line(8).Append("b ").Binary(8).Done("bitmap word %d", i)
     393           1 :         }
     394           1 :         summaryWords := (bitmapWords + 63) / 64
     395           1 :         for i := 0; i < summaryWords; i++ {
     396           1 :                 f.Line(8).Append("b ").Binary(8).Done("bitmap summary word %d-%d", i*64, i*64+63)
     397           1 :         }
     398             : }

Generated by: LCOV version 1.14