LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - raw_bytes.go (source / functions) Hit Total Coverage
Test: 2024-10-04 08:16Z 6fa80f28 - tests only.lcov Lines: 93 99 93.9 %
Date: 2024-10-04 08:17:13 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "fmt"
      10             :         "io"
      11             :         "unsafe"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/binfmt"
      14             : )
      15             : 
      16             : // RawBytes holds an array of byte slices, stored as a concatenated data section
      17             : // and a series of offsets for each slice. Byte slices within RawBytes are
      18             : // stored in their entirety without any compression, ensuring stability without
      19             : // copying.
      20             : //
      21             : // # Representation
      22             : //
      23             : // An array of N byte slices encodes N+1 offsets. The beginning of the data
      24             : // representation holds an offsets table, in the same encoding as a
      25             : // DataTypeUint32 column. The integer offsets may be encoded using smaller width
      26             : // integers to save space if all offsets fit within an 8-bit or 16-bit uint.
      27             : // Each offset is relative to the beginning of the string data section (after
      28             : // the offset table).
      29             : //
      30             : // The use of UintEncoding conserves space in the common case. In the context of
      31             : // CockroachDB, the vast majority of offsets will fit in 16-bits when using 32
      32             : // KiB blocks (the size in use by CockroachDB). However, a single value larger
      33             : // than 65535 bytes requires an offset too large to fit within 16 bits, in which
      34             : // case offsets will be encoded as 32-bit integers.
      35             : //
      36             : //      +-------------------------------------------------------------------+
      37             : //      |        a uint offsets table, usually encoded with 16-bits,        |
      38             : //      |                possibly padded for alignment                      |
      39             : //      |                      (see UintEncoding)                           |
      40             : //      +-------------------------------------------------------------------+
      41             : //      |                           String Data                             |
      42             : //      |  abcabcada....                                                    |
      43             : //      +-------------------------------------------------------------------+
      44             : //
      45             : // The UintEncoding bits of the ColumnEncoding for a RawBytes column describes
      46             : // the encoding of the offset table.
      47             : type RawBytes struct {
      48             :         slices  int
      49             :         offsets UnsafeOffsets
      50             :         start   unsafe.Pointer
      51             :         data    unsafe.Pointer
      52             : }
      53             : 
      54             : // Assert that RawBytes implements Array[[]byte].
      55             : var _ Array[[]byte] = RawBytes{}
      56             : 
      57             : // DecodeRawBytes decodes the structure of a RawBytes, constructing an accessor
      58             : // for an array of byte slices constructed by RawBytesBuilder. Count must be the
      59             : // number of byte slices within the array.
      60           1 : func DecodeRawBytes(b []byte, offset uint32, count int) (rawBytes RawBytes, endOffset uint32) {
      61           1 :         if count == 0 {
      62           1 :                 return RawBytes{}, offset
      63           1 :         }
      64           1 :         offsets, dataOff := DecodeUnsafeOffsets(b, offset, count+1 /* +1 offset */)
      65           1 :         return RawBytes{
      66           1 :                 slices:  count,
      67           1 :                 offsets: offsets,
      68           1 :                 start:   unsafe.Pointer(&b[offset]),
      69           1 :                 data:    unsafe.Pointer(&b[dataOff]),
      70           1 :         }, dataOff + offsets.At(count)
      71             : }
      72             : 
      73             : // Assert that DecodeRawBytes implements DecodeFunc.
      74             : var _ DecodeFunc[RawBytes] = DecodeRawBytes
      75             : 
      76           1 : func defaultSliceFormatter(x []byte) string {
      77           1 :         if bytes.ContainsFunc(x, func(r rune) bool { return r < 32 || r > 126 }) {
      78           1 :                 return fmt.Sprintf("%q", x)
      79           1 :         }
      80           1 :         return string(x)
      81             : }
      82             : 
      83           1 : func rawBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter func([]byte) string) {
      84           1 :         if count == 0 {
      85           1 :                 return
      86           1 :         }
      87           1 :         if sliceFormatter == nil {
      88           1 :                 sliceFormatter = defaultSliceFormatter
      89           1 :         }
      90             : 
      91           1 :         rb, _ := DecodeRawBytes(f.RelativeData(), uint32(f.RelativeOffset()), count)
      92           1 :         dataOffset := uint64(f.RelativeOffset()) + uint64(uintptr(rb.data)-uintptr(rb.start))
      93           1 :         f.CommentLine("rawbytes")
      94           1 :         f.CommentLine("offsets table")
      95           1 :         uintsToBinFormatter(f, count+1, func(offset, base uint64) string {
      96           1 :                 // NB: base is always zero for RawBytes columns.
      97           1 :                 return fmt.Sprintf("%d [%d overall]", offset+base, offset+base+dataOffset)
      98           1 :         })
      99           1 :         f.CommentLine("data")
     100           1 :         for i := 0; i < rb.slices; i++ {
     101           1 :                 s := rb.At(i)
     102           1 :                 f.HexBytesln(len(s), "data[%d]: %s", i, sliceFormatter(s))
     103           1 :         }
     104             : }
     105             : 
     106           1 : func (b *RawBytes) ptr(offset uint32) unsafe.Pointer {
     107           1 :         return unsafe.Pointer(uintptr(b.data) + uintptr(offset))
     108           1 : }
     109             : 
     110             : //gcassert:inline
     111           1 : func (b *RawBytes) slice(start, end uint32) []byte {
     112           1 :         return unsafe.Slice((*byte)(b.ptr(start)), end-start)
     113           1 : }
     114             : 
     115             : // At returns the []byte at index i. The returned slice should not be mutated.
     116           1 : func (b RawBytes) At(i int) []byte {
     117           1 :         return b.slice(b.offsets.At2(i))
     118           1 : }
     119             : 
     120             : // Slices returns the number of []byte slices encoded within the RawBytes.
     121           0 : func (b *RawBytes) Slices() int {
     122           0 :         return b.slices
     123           0 : }
     124             : 
     125             : // RawBytesBuilder encodes a column of byte slices.
     126             : type RawBytesBuilder struct {
     127             :         rows    int
     128             :         data    []byte
     129             :         offsets UintBuilder
     130             : }
     131             : 
     132             : // Assert that *RawBytesBuilder implements ColumnWriter.
     133             : var _ ColumnWriter = (*RawBytesBuilder)(nil)
     134             : 
     135             : // Init initializes the builder for first-time use.
     136           1 : func (b *RawBytesBuilder) Init() {
     137           1 :         b.offsets.Init()
     138           1 :         b.Reset()
     139           1 : }
     140             : 
     141             : // Reset resets the builder to an empty state.
     142           1 : func (b *RawBytesBuilder) Reset() {
     143           1 :         b.rows = 0
     144           1 :         b.data = b.data[:0]
     145           1 :         b.offsets.Reset()
     146           1 :         // Add an initial offset of zero to streamline the logic in RawBytes.At() to
     147           1 :         // avoid needing a special case for row 0.
     148           1 :         b.offsets.Set(0, 0)
     149           1 : }
     150             : 
     151             : // NumColumns implements ColumnWriter.
     152           1 : func (b *RawBytesBuilder) NumColumns() int { return 1 }
     153             : 
     154             : // DataType implements ColumnWriter.
     155           1 : func (b *RawBytesBuilder) DataType(int) DataType { return DataTypeBytes }
     156             : 
     157             : // Put appends the provided byte slice to the builder.
     158           1 : func (b *RawBytesBuilder) Put(s []byte) {
     159           1 :         b.data = append(b.data, s...)
     160           1 :         b.rows++
     161           1 :         b.offsets.Set(b.rows, uint64(len(b.data)))
     162           1 : }
     163             : 
     164             : // PutConcat appends a single byte slice formed by the concatenation of the two
     165             : // byte slice arguments.
     166           1 : func (b *RawBytesBuilder) PutConcat(s1, s2 []byte) {
     167           1 :         b.data = append(append(b.data, s1...), s2...)
     168           1 :         b.rows++
     169           1 :         b.offsets.Set(b.rows, uint64(len(b.data)))
     170           1 : }
     171             : 
     172             : // Rows returns the count of slices that have been added to the builder.
     173           0 : func (b *RawBytesBuilder) Rows() int {
     174           0 :         return b.rows
     175           0 : }
     176             : 
     177             : // UnsafeGet returns the i'th slice added to the builder. The returned slice is
     178             : // owned by the builder and must not be mutated.
     179           1 : func (b *RawBytesBuilder) UnsafeGet(i int) []byte {
     180           1 :         if b.rows == 0 {
     181           1 :                 return nil
     182           1 :         }
     183           1 :         return b.data[b.offsets.array.elems.At(i):b.offsets.array.elems.At(i+1)]
     184             : }
     185             : 
     186             : // Finish writes the serialized byte slices to buf starting at offset. The buf
     187             : // slice must be sufficiently large to store the serialized output. The caller
     188             : // should use [Size] to size buf appropriately before calling Finish.
     189           1 : func (b *RawBytesBuilder) Finish(col, rows int, offset uint32, buf []byte) uint32 {
     190           1 :         if rows == 0 {
     191           1 :                 return offset
     192           1 :         }
     193           1 :         dataLen := b.offsets.Get(rows)
     194           1 :         offset = b.offsets.Finish(0, rows+1, offset, buf)
     195           1 :         // Copy the data section.
     196           1 :         return offset + uint32(copy(buf[offset:], b.data[:dataLen]))
     197             : }
     198             : 
     199             : // Size computes the size required to encode the byte slices beginning in a
     200             : // buffer at the provided offset. The offset is required to ensure proper
     201             : // alignment. The returned uint32 is the offset of the first byte after the end
     202             : // of the encoded data. To compute the size in bytes, subtract the [offset]
     203             : // passed into Size from the returned offset.
     204           1 : func (b *RawBytesBuilder) Size(rows int, offset uint32) uint32 {
     205           1 :         if rows == 0 {
     206           1 :                 return offset
     207           1 :         }
     208             :         // Get the size needed to encode the rows+1 offsets.
     209           1 :         offset = b.offsets.Size(rows+1, offset)
     210           1 :         // Add the value of offset[rows] since that is the accumulated size of the
     211           1 :         // first [rows] slices.
     212           1 :         return offset + uint32(b.offsets.Get(rows))
     213             : }
     214             : 
     215             : // WriteDebug implements Encoder.
     216           1 : func (b *RawBytesBuilder) WriteDebug(w io.Writer, rows int) {
     217           1 :         fmt.Fprintf(w, "bytes: %d rows set; %d bytes in data", b.rows, len(b.data))
     218           1 : }

Generated by: LCOV version 1.14