LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - raw_bytes.go (source / functions) Hit Total Coverage
Test: 2024-11-15 08:17Z 9ed54bc4 - meta test only.lcov Lines: 65 99 65.7 %
Date: 2024-11-15 08:17:58 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "fmt"
      10             :         "io"
      11             :         "unsafe"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/binfmt"
      14             :         "github.com/cockroachdb/pebble/internal/treeprinter"
      15             : )
      16             : 
      17             : // RawBytes holds an array of byte slices, stored as a concatenated data section
      18             : // and a series of offsets for each slice. Byte slices within RawBytes are
      19             : // stored in their entirety without any compression, ensuring stability without
      20             : // copying.
      21             : //
      22             : // # Representation
      23             : //
      24             : // An array of N byte slices encodes N+1 offsets. The beginning of the data
      25             : // representation holds an offsets table, in the same encoding as a
      26             : // DataTypeUint32 column. The integer offsets may be encoded using smaller width
      27             : // integers to save space if all offsets fit within an 8-bit or 16-bit uint.
      28             : // Each offset is relative to the beginning of the string data section (after
      29             : // the offset table).
      30             : //
      31             : // The use of UintEncoding conserves space in the common case. In the context of
      32             : // CockroachDB, the vast majority of offsets will fit in 16-bits when using 32
      33             : // KiB blocks (the size in use by CockroachDB). However, a single value larger
      34             : // than 65535 bytes requires an offset too large to fit within 16 bits, in which
      35             : // case offsets will be encoded as 32-bit integers.
      36             : //
      37             : //      +-------------------------------------------------------------------+
      38             : //      |        a uint offsets table, usually encoded with 16-bits,        |
      39             : //      |                possibly padded for alignment                      |
      40             : //      |                      (see UintEncoding)                           |
      41             : //      +-------------------------------------------------------------------+
      42             : //      |                           String Data                             |
      43             : //      |  abcabcada....                                                    |
      44             : //      +-------------------------------------------------------------------+
      45             : //
      46             : // The UintEncoding bits of the ColumnEncoding for a RawBytes column describes
      47             : // the encoding of the offset table.
      48             : type RawBytes struct {
      49             :         slices  int
      50             :         offsets UnsafeOffsets
      51             :         start   unsafe.Pointer
      52             :         data    unsafe.Pointer
      53             : }
      54             : 
      55             : // Assert that RawBytes implements Array[[]byte].
      56             : var _ Array[[]byte] = RawBytes{}
      57             : 
      58             : // DecodeRawBytes decodes the structure of a RawBytes, constructing an accessor
      59             : // for an array of byte slices constructed by RawBytesBuilder. Count must be the
      60             : // number of byte slices within the array.
      61           1 : func DecodeRawBytes(b []byte, offset uint32, count int) (rawBytes RawBytes, endOffset uint32) {
      62           1 :         if count == 0 {
      63           1 :                 return RawBytes{}, offset
      64           1 :         }
      65           1 :         offsets, dataOff := DecodeUnsafeOffsets(b, offset, count+1 /* +1 offset */)
      66           1 :         return RawBytes{
      67           1 :                 slices:  count,
      68           1 :                 offsets: offsets,
      69           1 :                 start:   unsafe.Pointer(&b[offset]),
      70           1 :                 data:    unsafe.Pointer(&b[dataOff]),
      71           1 :         }, dataOff + offsets.At(count)
      72             : }
      73             : 
      74             : // Assert that DecodeRawBytes implements DecodeFunc.
      75             : var _ DecodeFunc[RawBytes] = DecodeRawBytes
      76             : 
      77           0 : func defaultSliceFormatter(x []byte) string {
      78           0 :         if bytes.ContainsFunc(x, func(r rune) bool { return r < 32 || r > 126 }) {
      79           0 :                 return fmt.Sprintf("%q", x)
      80           0 :         }
      81           0 :         return string(x)
      82             : }
      83             : 
      84             : func rawBytesToBinFormatter(
      85             :         f *binfmt.Formatter, tp treeprinter.Node, count int, sliceFormatter func([]byte) string,
      86           0 : ) {
      87           0 :         if count == 0 {
      88           0 :                 return
      89           0 :         }
      90           0 :         if sliceFormatter == nil {
      91           0 :                 sliceFormatter = defaultSliceFormatter
      92           0 :         }
      93             : 
      94           0 :         rb, _ := DecodeRawBytes(f.RelativeData(), uint32(f.RelativeOffset()), count)
      95           0 :         dataOffset := uint64(f.RelativeOffset()) + uint64(uintptr(rb.data)-uintptr(rb.start))
      96           0 :         n := tp.Child("offsets table")
      97           0 :         uintsToBinFormatter(f, n, count+1, func(offset, base uint64) string {
      98           0 :                 // NB: base is always zero for RawBytes columns.
      99           0 :                 return fmt.Sprintf("%d [%d overall]", offset+base, offset+base+dataOffset)
     100           0 :         })
     101           0 :         n = tp.Child("data")
     102           0 :         for i := 0; i < rb.slices; i++ {
     103           0 :                 s := rb.At(i)
     104           0 :                 f.HexBytesln(len(s), "data[%d]: %s", i, sliceFormatter(s))
     105           0 :         }
     106           0 :         f.ToTreePrinter(n)
     107             : }
     108             : 
     109           1 : func (b *RawBytes) ptr(offset uint32) unsafe.Pointer {
     110           1 :         return unsafe.Pointer(uintptr(b.data) + uintptr(offset))
     111           1 : }
     112             : 
     113             : //gcassert:inline
     114           1 : func (b *RawBytes) slice(start, end uint32) []byte {
     115           1 :         return unsafe.Slice((*byte)(b.ptr(start)), end-start)
     116           1 : }
     117             : 
     118             : // At returns the []byte at index i. The returned slice should not be mutated.
     119           1 : func (b RawBytes) At(i int) []byte {
     120           1 :         return b.slice(b.offsets.At2(i))
     121           1 : }
     122             : 
     123             : // Slices returns the number of []byte slices encoded within the RawBytes.
     124           0 : func (b *RawBytes) Slices() int {
     125           0 :         return b.slices
     126           0 : }
     127             : 
     128             : // RawBytesBuilder encodes a column of byte slices.
     129             : type RawBytesBuilder struct {
     130             :         rows    int
     131             :         data    []byte
     132             :         offsets UintBuilder
     133             : }
     134             : 
     135             : // Assert that *RawBytesBuilder implements ColumnWriter.
     136             : var _ ColumnWriter = (*RawBytesBuilder)(nil)
     137             : 
     138             : // Init initializes the builder for first-time use.
     139           1 : func (b *RawBytesBuilder) Init() {
     140           1 :         b.offsets.Init()
     141           1 :         b.Reset()
     142           1 : }
     143             : 
     144             : // Reset resets the builder to an empty state.
     145           1 : func (b *RawBytesBuilder) Reset() {
     146           1 :         b.rows = 0
     147           1 :         b.data = b.data[:0]
     148           1 :         b.offsets.Reset()
     149           1 :         // Add an initial offset of zero to streamline the logic in RawBytes.At() to
     150           1 :         // avoid needing a special case for row 0.
     151           1 :         b.offsets.Set(0, 0)
     152           1 : }
     153             : 
     154             : // NumColumns implements ColumnWriter.
     155           1 : func (b *RawBytesBuilder) NumColumns() int { return 1 }
     156             : 
     157             : // DataType implements ColumnWriter.
     158           1 : func (b *RawBytesBuilder) DataType(int) DataType { return DataTypeBytes }
     159             : 
     160             : // Put appends the provided byte slice to the builder.
     161           1 : func (b *RawBytesBuilder) Put(s []byte) {
     162           1 :         b.data = append(b.data, s...)
     163           1 :         b.rows++
     164           1 :         b.offsets.Set(b.rows, uint64(len(b.data)))
     165           1 : }
     166             : 
     167             : // PutConcat appends a single byte slice formed by the concatenation of the two
     168             : // byte slice arguments.
     169           1 : func (b *RawBytesBuilder) PutConcat(s1, s2 []byte) {
     170           1 :         b.data = append(append(b.data, s1...), s2...)
     171           1 :         b.rows++
     172           1 :         b.offsets.Set(b.rows, uint64(len(b.data)))
     173           1 : }
     174             : 
     175             : // Rows returns the count of slices that have been added to the builder.
     176           0 : func (b *RawBytesBuilder) Rows() int {
     177           0 :         return b.rows
     178           0 : }
     179             : 
     180             : // UnsafeGet returns the i'th slice added to the builder. The returned slice is
     181             : // owned by the builder and must not be mutated.
     182           1 : func (b *RawBytesBuilder) UnsafeGet(i int) []byte {
     183           1 :         if b.rows == 0 {
     184           1 :                 return nil
     185           1 :         }
     186           1 :         return b.data[b.offsets.array.elems.At(i):b.offsets.array.elems.At(i+1)]
     187             : }
     188             : 
     189             : // Finish writes the serialized byte slices to buf starting at offset. The buf
     190             : // slice must be sufficiently large to store the serialized output. The caller
     191             : // should use [Size] to size buf appropriately before calling Finish.
     192           1 : func (b *RawBytesBuilder) Finish(col, rows int, offset uint32, buf []byte) uint32 {
     193           1 :         if rows == 0 {
     194           1 :                 return offset
     195           1 :         }
     196           1 :         dataLen := b.offsets.Get(rows)
     197           1 :         offset = b.offsets.Finish(0, rows+1, offset, buf)
     198           1 :         // Copy the data section.
     199           1 :         return offset + uint32(copy(buf[offset:], b.data[:dataLen]))
     200             : }
     201             : 
     202             : // Size computes the size required to encode the byte slices beginning in a
     203             : // buffer at the provided offset. The offset is required to ensure proper
     204             : // alignment. The returned uint32 is the offset of the first byte after the end
     205             : // of the encoded data. To compute the size in bytes, subtract the [offset]
     206             : // passed into Size from the returned offset.
     207           1 : func (b *RawBytesBuilder) Size(rows int, offset uint32) uint32 {
     208           1 :         if rows == 0 {
     209           1 :                 return offset
     210           1 :         }
     211             :         // Get the size needed to encode the rows+1 offsets.
     212           1 :         offset = b.offsets.Size(rows+1, offset)
     213           1 :         // Add the value of offset[rows] since that is the accumulated size of the
     214           1 :         // first [rows] slices.
     215           1 :         return offset + uint32(b.offsets.Get(rows))
     216             : }
     217             : 
     218             : // WriteDebug implements Encoder.
     219           0 : func (b *RawBytesBuilder) WriteDebug(w io.Writer, rows int) {
     220           0 :         fmt.Fprintf(w, "bytes: %d rows set; %d bytes in data", b.rows, len(b.data))
     221           0 : }

Generated by: LCOV version 1.14