LCOV - code coverage report
Current view: top level - pebble/sstable/sstable - runlength_bitmap.go (source / functions) Coverage Total Hit
Test: 2025-09-29 08:19Z e52aa891 - meta test only.lcov Lines: 88.0 % 117 103
Test Date: 2025-09-29 08:21:14 Functions: - 0 0

            Line data    Source code
       1              : // Copyright 2025 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2              : // of this source code is governed by a BSD-style license that can be found in
       3              : // the LICENSE file.
       4              : 
       5              : package sstable
       6              : 
       7              : import (
       8              :         "encoding/binary"
       9              :         "iter"
      10              : 
      11              :         "github.com/cockroachdb/pebble/internal/invariants"
      12              : )
      13              : 
      14              : // BitmapRunLengthEncoder encodes a bitmap. It uses a run-length encoding for
      15              : // runs of all ones and all zeros. The encoding is intended for compactly
      16              : // representing bitmaps with significant spatial locality and when readers only
      17              : // read the bitmap sequentially.
      18              : //
      19              : // The BitmapRunLengthEncoder considers groups of 8-bits of the bitmap at a
      20              : // time. If one of these 8-bit groups (a byte) contains all set bits (0xFF) or
      21              : // all unset bits (0x00), the encoder uses a run-length encoding for the byte
      22              : // and any consecutive identical bytes. All other bytes are encoded as
      23              : // themselves.
      24              : //
      25              : // For example, an all-set bitmap of 1024 bits would be encoded as the 0xFF byte
      26              : // followed by the varint encoding of 128 (1024/8 = 128).
      27              : type BitmapRunLengthEncoder struct {
      28              :         // buf holds the encoded bitmap up until the beginning of the pending run of
      29              :         // all-set bits indicated by allSetRunLength.
      30              :         buf []byte
      31              :         // allSetRunLength is the count of consecutive pending bytes that are all
      32              :         // set (preceding the byte currently being buffered in currByte). When the
      33              :         // current byte is finished, if allSetRunLength is greater than zero, the
      34              :         // all-set sequence must be encoded to buf before the current byte.
      35              :         allSetRunLength int
      36              :         // currByte is the current byte being buffered. After the first call to Set,
      37              :         // currByte always has at least 1 set bit. currByteIndex is the index of the
      38              :         // buffered byte in a fully-materialized bitmap. That is, currByte
      39              :         // accumulates the bits within the bitmap in indices [currByteIndex*8,
      40              :         // (currByteIndex+1)*8).
      41              :         currByte      byte
      42              :         currByteIndex int
      43              : }
      44              : 
      45              : // Init initializes or resets the encoder to its initial state.
      46            1 : func (bb *BitmapRunLengthEncoder) Init() {
      47            1 :         invariants.MaybeMangle(bb.buf)
      48            1 :         bb.buf = bb.buf[:0]
      49            1 :         bb.allSetRunLength = 0
      50            1 :         bb.currByte = 0
      51            1 :         bb.currByteIndex = -1
      52            1 : }
      53              : 
      54              : // Set records that the i'th bit in the bitmap is Set. Set must be called with i
      55              : // in increasing order.
      56            1 : func (bb *BitmapRunLengthEncoder) Set(i int) {
      57            1 :         // bi is the index of the byte that would contain the i'th bit in a
      58            1 :         // fully-materialized bitmap
      59            1 :         bi := i / 8
      60            1 : 
      61            1 :         // If the i'th bit falls into the same byte as the previous call to set,
      62            1 :         // we can just set the bit in currByte.
      63            1 :         if bi == bb.currByteIndex {
      64            1 :                 bb.currByte |= 1 << (i % 8)
      65            1 :                 return
      66            1 :         }
      67              : 
      68              :         // The i'th bit falls into a new byte. We may need to finish encoding
      69              :         // previous bits whose state is buffered on the encoder.
      70              : 
      71              :         // Append what we accumulated within currByte.
      72            1 :         switch bb.currByte {
      73            1 :         case 0x00:
      74              :                 // If and only if this is the first call to set, currByte is all
      75              :                 // zeros and we have nothing to append.
      76            1 :         case 0xFF:
      77            1 :                 // The current byte contains all set bits. Increment the count of the
      78            1 :                 // current pending run.
      79            1 :                 bb.allSetRunLength++
      80            1 :         default:
      81            1 :                 // The current byte contains a mix of set and unset bits. If there's a
      82            1 :                 // pending all-ones run, encode it.
      83            1 :                 bb.maybeFlushAllSetRun()
      84            1 :                 // And append the mixed byte to buf.
      85            1 :                 bb.buf = append(bb.buf, bb.currByte)
      86              :         }
      87              : 
      88              :         // If there's a gap between the index of the byte we just finished and the
      89              :         // index of the new byte we're beginning, we need to append a run of zeros.
      90            1 :         if n := bi - bb.currByteIndex - 1; n > 0 {
      91            1 :                 bb.maybeFlushAllSetRun()
      92            1 :                 bb.buf = append(bb.buf, 0x00)
      93            1 :                 bb.buf = binary.AppendUvarint(bb.buf, uint64(n))
      94            1 :         }
      95              : 
      96              :         // Save the bit on currByte.
      97            1 :         bb.currByteIndex = bi
      98            1 :         bb.currByte = 1 << (i % 8)
      99              : }
     100              : 
     101            1 : func (bb *BitmapRunLengthEncoder) maybeFlushAllSetRun() {
     102            1 :         if bb.allSetRunLength > 0 {
     103            1 :                 bb.buf = append(bb.buf, 0xFF)
     104            1 :                 bb.buf = binary.AppendUvarint(bb.buf, uint64(bb.allSetRunLength))
     105            1 :                 bb.allSetRunLength = 0
     106            1 :         }
     107              : }
     108              : 
     109              : // FinishAndAppend appends the encoded bitmap to buf and returns the result.
     110            1 : func (bb *BitmapRunLengthEncoder) FinishAndAppend(buf []byte) []byte {
     111            1 :         // Finish the current pending state.
     112            1 :         switch bb.currByte {
     113            1 :         case 0xFF:
     114            1 :                 bb.allSetRunLength++
     115            1 :                 bb.maybeFlushAllSetRun()
     116            0 :         case 0x00:
     117              :                 // Only possible if Set was never called. Do nothing.
     118            1 :         default:
     119            1 :                 bb.maybeFlushAllSetRun()
     120            1 :                 bb.buf = append(bb.buf, bb.currByte)
     121              :         }
     122            1 :         return append(buf, bb.buf...)
     123              : }
     124              : 
     125              : // Size returns the current number of bytes in the buf, accounting for the buf
     126              : // state when our current pending byte has been written. This is useful for
     127              : // estimating the size of the bitmap without having to copy the result of
     128              : // FinishAndAppend.
     129            1 : func (bb *BitmapRunLengthEncoder) Size() int {
     130            1 :         switch bb.currByte {
     131            1 :         case 0xFF:
     132            1 :                 // 1 byte for 0xFF + n bytes for varint encoding of
     133            1 :                 // (allSetRunLength + 1).
     134            1 :                 v := bb.allSetRunLength + 1
     135            1 :                 varintSize := 1
     136            1 :                 for v >= 0x80 {
     137            0 :                         v >>= 7
     138            0 :                         varintSize++
     139            0 :                 }
     140            1 :                 return len(bb.buf) + 1 + varintSize
     141            0 :         case 0x00:
     142            0 :                 // Only possible if Set was never called. Do nothing.
     143            0 :                 return len(bb.buf)
     144            1 :         default:
     145            1 :                 if bb.allSetRunLength > 0 {
     146            1 :                         // 1 byte for 0xFF + n bytes for varint encoding of
     147            1 :                         // allSetRunLength + 1 byte for current byte.
     148            1 :                         v := bb.allSetRunLength
     149            1 :                         varintSize := 1
     150            1 :                         for v >= 0x80 {
     151            0 :                                 v >>= 7
     152            0 :                                 varintSize++
     153            0 :                         }
     154            1 :                         return len(bb.buf) + 1 + varintSize + 1
     155              :                 }
     156              :                 // For mixed byte case without pending all-set run: just 1 byte for
     157              :                 // current byte
     158            1 :                 return len(bb.buf) + 1
     159              :         }
     160              : }
     161              : 
     162              : // IterSetBitsInRunLengthBitmap returns an iterator over the indices of set bits
     163              : // within the provided run-length encoded bitmap.
     164            1 : func IterSetBitsInRunLengthBitmap(bitmap []byte) iter.Seq[int] {
     165            1 :         return func(yield func(int) bool) {
     166            1 :                 // i is the index of the current byte in the bitmap.
     167            1 :                 i := 0
     168            1 :                 // bit is the index of the current bit in the overall bitmap.
     169            1 :                 bit := 0
     170            1 :                 for i < len(bitmap) {
     171            1 :                         b := bitmap[i]
     172            1 :                         i++
     173            1 :                         switch b {
     174            1 :                         case 0x00:
     175            1 :                                 // Decode the varint. A decoded value of v indicates that there
     176            1 :                                 // are 8*v consecutive unset bits in the bitmap.
     177            1 :                                 v, n := binary.Uvarint(bitmap[i:])
     178            1 :                                 i += n
     179            1 :                                 bit += 8 * int(v)
     180            1 :                         case 0xFF:
     181            1 :                                 // Decode the varint. A decoded value of v indicates that there
     182            1 :                                 // are 8*v consecutive set bits in the bitmap.
     183            1 :                                 v, n := binary.Uvarint(bitmap[i:])
     184            1 :                                 i += n
     185            1 :                                 for j := range 8 * int(v) {
     186            1 :                                         if !yield(bit + j) {
     187            0 :                                                 return
     188            0 :                                         }
     189              :                                 }
     190            1 :                                 bit += 8 * int(v)
     191            1 :                         default:
     192            1 :                                 // b is a byte that is neither all ones nor all zeros. Check
     193            1 :                                 // each bit and yield the index of any set bits to the iterator.
     194            1 :                                 for j := range 8 {
     195            1 :                                         if b&(1<<j) == 0 {
     196            1 :                                                 continue
     197              :                                         }
     198            1 :                                         if !yield(bit + j) {
     199            0 :                                                 return
     200            0 :                                         }
     201              :                                 }
     202            1 :                                 bit += 8
     203              :                         }
     204              :                 }
     205              :         }
     206              : }
        

Generated by: LCOV version 2.0-1