LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - bitmap.go (source / functions) Hit Total Coverage
Test: 2024-09-13 08:16Z 0665a3e1 - meta test only.lcov Lines: 0 237 0.0 %
Date: 2024-09-13 08:17:06 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "fmt"
       9             :         "io"
      10             :         "math/bits"
      11             :         "slices"
      12             :         "strings"
      13             :         "unsafe"
      14             : 
      15             :         "github.com/cockroachdb/errors"
      16             :         "github.com/cockroachdb/pebble/internal/binfmt"
      17             : )
      18             : 
      19             : // Bitmap is a bitmap structure built on a []uint64. A bitmap utilizes ~1
      20             : // physical bit/logical bit (~0.125 bytes/row). The bitmap is encoded into an
      21             : // 8-byte aligned array of 64-bit words which is (nRows+63)/64 words in length.
      22             : //
      23             : // A summary bitmap is stored after the primary bitmap in which each bit in the
      24             : // summary bitmap corresponds to 1 word in the primary bitmap. A bit is set in
      25             : // the summary bitmap if the corresponding word in the primary bitmap is
      26             : // non-zero. The summary bitmap accelerates predecessor and successor
      27             : // operations.
      28             : type Bitmap struct {
      29             :         // data contains the bitmap data, according to defaultBitmapEncoding, or it
      30             :         // is nil if the bitmap is all zeros.
      31             :         data     UnsafeRawSlice[uint64]
      32             :         bitCount int
      33             : }
      34             : 
      35             : // Assert that Bitmap implements Array[bool].
      36             : var _ Array[bool] = Bitmap{}
      37             : 
      38             : // DecodeBitmap decodes the structure of a Bitmap and returns a Bitmap that
      39             : // reads from b supporting bitCount logical bits. No bounds checking is
      40             : // performed, so the caller must guarantee the bitmap is appropriately sized and
      41             : // the provided bitCount correctly identifies the number of bits in the bitmap.
      42           0 : func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset uint32) {
      43           0 :         encoding := bitmapEncoding(b[off])
      44           0 :         off++
      45           0 :         if encoding == zeroBitmapEncoding {
      46           0 :                 return Bitmap{bitCount: bitCount}, off
      47           0 :         }
      48           0 :         off = align(off, align64)
      49           0 :         sz := bitmapRequiredSize(bitCount)
      50           0 :         if len(b) < int(off)+sz {
      51           0 :                 panic(errors.AssertionFailedf("bitmap of %d bits requires at least %d bytes; provided with %d-byte slice",
      52           0 :                         bitCount, bitmapRequiredSize(bitCount), len(b[off:])))
      53             :         }
      54           0 :         return Bitmap{
      55           0 :                 data:     makeUnsafeRawSlice[uint64](unsafe.Pointer(&b[off])),
      56           0 :                 bitCount: bitCount,
      57           0 :         }, off + uint32(sz)
      58             : }
      59             : 
      60             : // Assert that DecodeBitmap implements DecodeFunc.
      61             : var _ DecodeFunc[Bitmap] = DecodeBitmap
      62             : 
      63             : // At returns true if the bit at position i is set and false otherwise.
      64           0 : func (b Bitmap) At(i int) bool {
      65           0 :         if b.data.ptr == nil {
      66           0 :                 // zero bitmap case.
      67           0 :                 return false
      68           0 :         }
      69             :         // Inline b.data.At(i/64).
      70             :         // The offset of the correct word is i / 64 * 8 = (i >> 3) &^ 0b111
      71           0 :         const mask = ^uintptr(0b111)
      72           0 :         val := *(*uint64)(unsafe.Pointer(uintptr(b.data.ptr) + (uintptr(i)>>3)&mask))
      73           0 :         return val&(1<<(uint(i)&63)) != 0
      74             : }
      75             : 
      76             : // Successor returns the next bit greater than or equal to i set in the bitmap.
      77             : // The i parameter must be in [0, bitCount). Returns the number of bits
      78             : // represented by the bitmap if no next bit is set.
      79           0 : func (b Bitmap) Successor(i int) int {
      80           0 :         if b.data.ptr == nil {
      81           0 :                 // Zero bitmap case.
      82           0 :                 return b.bitCount
      83           0 :         }
      84             :         // nextInWord returns the index of the smallest set bit with an index >= bit
      85             :         // within the provided word.  The returned index is an index local to the
      86             :         // word.
      87           0 :         nextInWord := func(word uint64, bit uint) int {
      88           0 :                 // We want to find the index of the next set bit. We can accomplish this
      89           0 :                 // by clearing the trailing `bit` bits from the word and counting the
      90           0 :                 // number of trailing zeros. For example, consider the word and bit=37:
      91           0 :                 //
      92           0 :                 //           word: 1010101010111111111110000001110101010101011111111111000000111011
      93           0 :                 //
      94           0 :                 //         1<<bit: 0000000000000000000000000010000000000000000000000000000000000000
      95           0 :                 //       1<<bit-1: 0000000000000000000000000001111111111111111111111111111111111111
      96           0 :                 //      ^1<<bit-1: 1111111111111111111111111110000000000000000000000000000000000000
      97           0 :                 // word&^1<<bit-1: 1010101010111111111110000000000000000000000000000000000000000000
      98           0 :                 //
      99           0 :                 // Counting the trailing zeroes of this last value gives us 43. For
     100           0 :                 // visualizing, 1<<43 is:
     101           0 :                 //
     102           0 :                 //                 0000000000000000000010000000000000000000000000000000000000000000
     103           0 :                 //
     104           0 :                 return bits.TrailingZeros64(word &^ ((1 << bit) - 1))
     105           0 :         }
     106             : 
     107           0 :         wordIdx := i >> 6 // i/64
     108           0 :         // Fast path for common case of reasonably dense bitmaps; if the there's a
     109           0 :         // bit > i set in the same word, return it.
     110           0 :         if next := nextInWord(b.data.At(wordIdx), uint(i%64)); next < 64 {
     111           0 :                 return wordIdx<<6 + next
     112           0 :         }
     113             : 
     114             :         // Consult summary structure to find the next word with a set bit. The word
     115             :         // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
     116             :         // wordIdx/64'th summary word. We want to know if any of the other later
     117             :         // words that are summarized together have a set bit. We call [nextInWord]
     118             :         // on the summary word to get the index of which word has a set bit, if any.
     119           0 :         summaryTableOffset, summaryTableEnd := b.summaryTableBounds()
     120           0 :         summaryWordIdx := summaryTableOffset + wordIdx>>6
     121           0 :         summaryNext := nextInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)+1)
     122           0 :         // If [summaryNext] == 64, then there are no set bits in any of the earlier
     123           0 :         // words represented by the summary word at [summaryWordIdx]. In that case,
     124           0 :         // we need to keep scanning the summary table forwards.
     125           0 :         if summaryNext == 64 {
     126           0 :                 for summaryWordIdx++; ; summaryWordIdx++ {
     127           0 :                         // When we fall off the end of the summary table, we've determined
     128           0 :                         // there are no set bits after i across the entirety of the bitmap.
     129           0 :                         if summaryWordIdx >= summaryTableEnd {
     130           0 :                                 return b.bitCount
     131           0 :                         }
     132           0 :                         if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
     133           0 :                                 summaryNext = bits.TrailingZeros64(summaryWord)
     134           0 :                                 break
     135             :                         }
     136             :                 }
     137             :         }
     138             :         // The summary word index and the summaryNext together tell us which word
     139             :         // has a set bit. The number of leading zeros in the word itself tell us
     140             :         // which bit is set.
     141           0 :         wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryNext
     142           0 :         return (wordIdx << 6) + bits.TrailingZeros64(b.data.At(wordIdx))
     143             : }
     144             : 
     145             : // Predecessor returns the previous bit less than or equal to i set in the
     146             : // bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous
     147             : // bit is set.
     148           0 : func (b Bitmap) Predecessor(i int) int {
     149           0 :         if b.data.ptr == nil {
     150           0 :                 // Zero bitmap case.
     151           0 :                 return -1
     152           0 :         }
     153             :         // prevInWord returns the index of the largest set bit ≤ bit within the
     154             :         // provided word. The returned index is an index local to the word. Returns
     155             :         // -1 if no set bit is found.
     156           0 :         prevInWord := func(word uint64, bit uint) int {
     157           0 :                 // We want to find the index of the previous set bit. We can accomplish
     158           0 :                 // this by clearing the leading `bit` bits from the word and counting
     159           0 :                 // the number of leading zeros. For example, consider the word and
     160           0 :                 // bit=42:
     161           0 :                 //
     162           0 :                 //              word: 1010101010111111111110000001110101010101011111111111000000111011
     163           0 :                 //
     164           0 :                 //        1<<(bit+1): 0000000000000000000010000000000000000000000000000000000000000000
     165           0 :                 //      1<<(bit+1)-1: 0000000000000000000001111111111111111111111111111111111111111111
     166           0 :                 // word&1<<(bit+1)-1: 0000000000000000000000000001110101010101011111111111000000111011
     167           0 :                 //
     168           0 :                 // Counting the leading zeroes of this last value gives us 27 leading
     169           0 :                 // zeros. 63-27 gives index 36. For visualizing, 1<<36 is:
     170           0 :                 //
     171           0 :                 //                    0000000000000000000000000001000000000000000000000000000000000000
     172           0 :                 //
     173           0 :                 return 63 - bits.LeadingZeros64(word&((1<<(bit+1))-1))
     174           0 :         }
     175             : 
     176           0 :         wordIdx := i >> 6 // i/64
     177           0 :         // Fast path for common case of reasonably dense bitmaps; if the there's a
     178           0 :         // bit < i set in the same word, return it.
     179           0 :         if prev := prevInWord(b.data.At(wordIdx), uint(i%64)); prev >= 0 {
     180           0 :                 return (wordIdx << 6) + prev
     181           0 :         }
     182             : 
     183             :         // Consult summary structure to find the next word with a set bit. The word
     184             :         // we just checked (wordIdx) is represented by the wordIdx%64'th bit in the
     185             :         // wordIdx/64'th summary word. We want to know if any of other earlier words
     186             :         // that are summarized together have a set bit. We call [prevInWord] on the
     187             :         // summary word to get the index of which word has a set bit, if any.
     188           0 :         summaryTableOffset, _ := b.summaryTableBounds()
     189           0 :         summaryWordIdx := summaryTableOffset + wordIdx>>6
     190           0 :         summaryPrev := prevInWord(b.data.At(summaryWordIdx), uint(wordIdx%64)-1)
     191           0 :         // If [summaryPrev] is negative, then there are no set bits in any of the
     192           0 :         // earlier words represented by the summary word at [summaryWordIdx]. In
     193           0 :         // that case, we need to keep scanning the summary table backwards.
     194           0 :         if summaryPrev < 0 {
     195           0 :                 for summaryWordIdx--; ; summaryWordIdx-- {
     196           0 :                         // When we fall below the beginning of the summary table, we've
     197           0 :                         // determined there are no set bits before i across the entirety of
     198           0 :                         // the bitmap.
     199           0 :                         if summaryWordIdx < summaryTableOffset {
     200           0 :                                 return -1
     201           0 :                         }
     202           0 :                         if summaryWord := b.data.At(summaryWordIdx); summaryWord != 0 {
     203           0 :                                 summaryPrev = 63 - bits.LeadingZeros64(summaryWord)
     204           0 :                                 break
     205             :                         }
     206             :                 }
     207             :         }
     208             :         // The summary word index and the summary prev together tell us which word
     209             :         // has a set bit. The number of trailing zeros in the word itself tell us
     210             :         // which bit is set.
     211           0 :         wordIdx = ((summaryWordIdx - summaryTableOffset) << 6) + summaryPrev
     212           0 :         return (wordIdx << 6) + 63 - bits.LeadingZeros64(b.data.At(wordIdx))
     213             : }
     214             : 
     215           0 : func (b Bitmap) summaryTableBounds() (startOffset, endOffset int) {
     216           0 :         startOffset = (b.bitCount + 63) >> 6
     217           0 :         endOffset = startOffset + startOffset>>6
     218           0 :         return startOffset, endOffset
     219           0 : }
     220             : 
     221             : // String returns a string representation of the entire bitmap.
     222           0 : func (b Bitmap) String() string {
     223           0 :         var sb strings.Builder
     224           0 :         for w := 0; w < (b.bitCount+63)/64; w++ {
     225           0 :                 fmt.Fprintf(&sb, "%064b", b.data.At(w))
     226           0 :         }
     227           0 :         return sb.String()
     228             : }
     229             : 
     230             : // BitmapBuilder constructs a Bitmap. Bits default to false.
     231             : type BitmapBuilder struct {
     232             :         words []uint64
     233             : }
     234             : 
     235             : type bitmapEncoding uint8
     236             : 
     237             : const (
     238             :         // defaultBitmapEncoding encodes the bitmap using ⌈n/64⌉ words followed by
     239             :         // ⌈⌈n/64⌉/64⌉ summary words.
     240             :         defaultBitmapEncoding bitmapEncoding = iota
     241             :         // zeroBitmapEncoding is used for the special case when the bitmap is empty.
     242             :         zeroBitmapEncoding
     243             : )
     244             : 
     245             : // Assert that BitmapBuilder implements ColumnWriter.
     246             : var _ ColumnWriter = (*BitmapBuilder)(nil)
     247             : 
     248             : // bitmapRequiredSize returns the size of an encoded bitmap in bytes, using the
     249             : // defaultBitmapEncoding.
     250           0 : func bitmapRequiredSize(total int) int {
     251           0 :         nWords := (total + 63) >> 6          // divide by 64
     252           0 :         nSummaryWords := (nWords + 63) >> 6  // divide by 64
     253           0 :         return (nWords + nSummaryWords) << 3 // multiply by 8
     254           0 : }
     255             : 
     256             : // Set sets the bit at position i to true.
     257           0 : func (b *BitmapBuilder) Set(i int) {
     258           0 :         w := i >> 6 // divide by 64
     259           0 :         for len(b.words) <= w {
     260           0 :                 b.words = append(b.words, 0)
     261           0 :         }
     262           0 :         b.words[w] |= 1 << uint(i%64)
     263             : }
     264             : 
     265             : // isZero returns true if no bits are set and Invert was not called.
     266           0 : func (b *BitmapBuilder) isZero() bool {
     267           0 :         return len(b.words) == 0
     268           0 : }
     269             : 
     270             : // Reset resets the bitmap to the empty state.
     271           0 : func (b *BitmapBuilder) Reset() {
     272           0 :         clear(b.words)
     273           0 :         b.words = b.words[:0]
     274           0 : }
     275             : 
     276             : // NumColumns implements the ColumnWriter interface.
     277           0 : func (b *BitmapBuilder) NumColumns() int { return 1 }
     278             : 
     279             : // DataType implements the ColumnWriter interface.
     280           0 : func (b *BitmapBuilder) DataType(int) DataType { return DataTypeBool }
     281             : 
     282             : // Size implements the ColumnWriter interface.
     283           0 : func (b *BitmapBuilder) Size(rows int, offset uint32) uint32 {
     284           0 :         // First byte will be the encoding type.
     285           0 :         offset++
     286           0 :         if b.isZero() {
     287           0 :                 return offset
     288           0 :         }
     289           0 :         offset = align(offset, align64)
     290           0 :         return offset + uint32(bitmapRequiredSize(rows))
     291             : }
     292             : 
     293             : // InvertedSize returns the size of the encoded bitmap, assuming Invert will be called.
     294           0 : func (b *BitmapBuilder) InvertedSize(rows int, offset uint32) uint32 {
     295           0 :         // First byte will be the encoding type.
     296           0 :         offset++
     297           0 :         // An inverted bitmap will never use all-zeros encoding (even if it happens to
     298           0 :         // be all zero).
     299           0 :         offset = align(offset, align64)
     300           0 :         return offset + uint32(bitmapRequiredSize(rows))
     301           0 : }
     302             : 
     303             : // Invert inverts the bitmap, setting all bits that are not set and clearing all
     304             : // bits that are set. If the bitmap's tail is sparse and is not large enough to
     305             : // represent nRows rows, it's first materialized.
     306             : //
     307             : // Note that Invert can affect the Size of the bitmap. Use InvertedSize() if you
     308             : // intend to invert the bitmap before finishing.
     309           0 : func (b *BitmapBuilder) Invert(nRows int) {
     310           0 :         // If the tail of b is sparse, fill in zeroes before inverting.
     311           0 :         nBitmapWords := (nRows + 63) >> 6
     312           0 :         b.words = slices.Grow(b.words, nBitmapWords-len(b.words))[:nBitmapWords]
     313           0 :         for i := range b.words {
     314           0 :                 b.words[i] = ^b.words[i]
     315           0 :         }
     316             : }
     317             : 
     318             : // Finish finalizes the bitmap, computing the per-word summary bitmap and
     319             : // writing the resulting data to buf at offset.
     320           0 : func (b *BitmapBuilder) Finish(col, nRows int, offset uint32, buf []byte) uint32 {
     321           0 :         if b.isZero() {
     322           0 :                 buf[offset] = byte(zeroBitmapEncoding)
     323           0 :                 return offset + 1
     324           0 :         }
     325           0 :         buf[offset] = byte(defaultBitmapEncoding)
     326           0 :         offset++
     327           0 :         offset = alignWithZeroes(buf, offset, align64)
     328           0 :         dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset]))
     329           0 : 
     330           0 :         nBitmapWords := (nRows + 63) >> 6
     331           0 :         // Truncate the bitmap to the number of words required to represent nRows.
     332           0 :         // The caller may have written more bits than nRows and no longer cares to
     333           0 :         // write them out.
     334           0 :         if len(b.words) > nBitmapWords {
     335           0 :                 b.words = b.words[:nBitmapWords]
     336           0 :         }
     337             :         // Ensure the last word of the bitmap does not contain any set bits beyond
     338             :         // the last row. This is not just for determinism but also to ensure that
     339             :         // the summary bitmap is correct (which is necessary for Bitmap.Successor
     340             :         // correctness).
     341           0 :         if i := nRows % 64; len(b.words) >= nBitmapWords && i != 0 {
     342           0 :                 b.words[nBitmapWords-1] &= (1 << i) - 1
     343           0 :         }
     344             : 
     345             :         // Copy all the words of the bitmap into the destination buffer.
     346           0 :         offset += uint32(copy(dest.Slice(len(b.words)), b.words)) << align64Shift
     347           0 : 
     348           0 :         // The caller may have written fewer than nRows rows if the tail is all
     349           0 :         // zeroes, relying on these bits being implicitly zero. If the tail of b is
     350           0 :         // sparse, fill in zeroes.
     351           0 :         for i := len(b.words); i < nBitmapWords; i++ {
     352           0 :                 dest.set(i, 0)
     353           0 :                 offset += align64
     354           0 :         }
     355             : 
     356             :         // Add the summary bitmap.
     357           0 :         nSummaryWords := (nBitmapWords + 63) >> 6
     358           0 :         for i := 0; i < nSummaryWords; i++ {
     359           0 :                 wordsOff := (i << 6) // i*64
     360           0 :                 nWords := min(64, len(b.words)-wordsOff)
     361           0 :                 var summaryWord uint64
     362           0 :                 for j := 0; j < nWords; j++ {
     363           0 :                         if (b.words)[wordsOff+j] != 0 {
     364           0 :                                 summaryWord |= 1 << j
     365           0 :                         }
     366             :                 }
     367           0 :                 dest.set(nBitmapWords+i, summaryWord)
     368             :         }
     369           0 :         return offset + uint32(nSummaryWords)<<align64Shift
     370             : }
     371             : 
     372             : // WriteDebug implements the ColumnWriter interface.
     373           0 : func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) {
     374           0 :         // TODO(jackson): Add more detailed debugging information.
     375           0 :         fmt.Fprint(w, "bitmap")
     376           0 : }
     377             : 
     378           0 : func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
     379           0 :         encoding := bitmapEncoding(f.PeekUint(1))
     380           0 :         f.HexBytesln(1, "bitmap encoding")
     381           0 :         if encoding == zeroBitmapEncoding {
     382           0 :                 return
     383           0 :         }
     384           0 :         if encoding != defaultBitmapEncoding {
     385           0 :                 panic(fmt.Sprintf("unknown bitmap encoding %d", encoding))
     386             :         }
     387           0 :         if aligned := align(f.RelativeOffset(), 8); aligned-f.RelativeOffset() != 0 {
     388           0 :                 f.HexBytesln(aligned-f.RelativeOffset(), "padding to align to 64-bit boundary")
     389           0 :         }
     390           0 :         bitmapWords := (rows + 63) / 64
     391           0 :         for i := 0; i < bitmapWords; i++ {
     392           0 :                 f.Line(8).Append("b ").Binary(8).Done("bitmap word %d", i)
     393           0 :         }
     394           0 :         summaryWords := (bitmapWords + 63) / 64
     395           0 :         for i := 0; i < summaryWords; i++ {
     396           0 :                 f.Line(8).Append("b ").Binary(8).Done("bitmap summary word %d-%d", i*64, i*64+63)
     397           0 :         }
     398             : }

Generated by: LCOV version 1.14