LCOV - code coverage report
Current view: top level - pebble/sstable/colblk - raw_bytes.go (source / functions) Hit Total Coverage
Test: 2024-09-15 08:16Z 6c9ad29b - meta test only.lcov Lines: 0 99 0.0 %
Date: 2024-09-15 08:17:01 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package colblk
       6             : 
       7             : import (
       8             :         "bytes"
       9             :         "fmt"
      10             :         "io"
      11             :         "unsafe"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/binfmt"
      14             : )
      15             : 
      16             : // RawBytes holds an array of byte slices, stored as a concatenated data section
      17             : // and a series of offsets for each slice. Byte slices within RawBytes are
      18             : // stored in their entirety without any compression, ensuring stability without
      19             : // copying.
      20             : //
      21             : // # Representation
      22             : //
      23             : // An array of N byte slices encodes N+1 offsets. The beginning of the data
      24             : // representation holds an offsets table, in the same encoding as a
      25             : // DataTypeUint32 column. The integer offsets may be encoded using smaller width
      26             : // integers to save space if all offsets fit within an 8-bit or 16-bit uint.
      27             : // Each offset is relative to the beginning of the string data section (after
      28             : // the offset table).
      29             : //
      30             : // The use of UintEncoding conserves space in the common case. In the context of
      31             : // CockroachDB, the vast majority of offsets will fit in 16-bits when using 32
      32             : // KiB blocks (the size in use by CockroachDB). However, a single value larger
      33             : // than 65535 bytes requires an offset too large to fit within 16 bits, in which
      34             : // case offsets will be encoded as 32-bit integers.
      35             : //
      36             : //      +-------------------------------------------------------------------+
      37             : //      |        a uint offsets table, usually encoded with 16-bits,        |
      38             : //      |                possibly padded for alignment                      |
      39             : //      |                      (see UintEncoding)                           |
      40             : //      +-------------------------------------------------------------------+
      41             : //      |                           String Data                             |
      42             : //      |  abcabcada....                                                    |
      43             : //      +-------------------------------------------------------------------+
      44             : //
      45             : // The UintEncoding bits of the ColumnEncoding for a RawBytes column describes
      46             : // the encoding of the offset table.
      47             : type RawBytes struct {
      48             :         slices  int
      49             :         offsets UnsafeOffsets
      50             :         start   unsafe.Pointer
      51             :         data    unsafe.Pointer
      52             : }
      53             : 
      54             : // Assert that RawBytes implements Array[[]byte].
      55             : var _ Array[[]byte] = RawBytes{}
      56             : 
      57             : // DecodeRawBytes decodes the structure of a RawBytes, constructing an accessor
      58             : // for an array of byte slices constructed by RawBytesBuilder. Count must be the
      59             : // number of byte slices within the array.
      60           0 : func DecodeRawBytes(b []byte, offset uint32, count int) (rawBytes RawBytes, endOffset uint32) {
      61           0 :         if count == 0 {
      62           0 :                 return RawBytes{}, offset
      63           0 :         }
      64           0 :         offsets, dataOff := DecodeUnsafeOffsets(b, offset, count+1 /* +1 offset */)
      65           0 :         return RawBytes{
      66           0 :                 slices:  count,
      67           0 :                 offsets: offsets,
      68           0 :                 start:   unsafe.Pointer(&b[offset]),
      69           0 :                 data:    unsafe.Pointer(&b[dataOff]),
      70           0 :         }, dataOff + offsets.At(count)
      71             : }
      72             : 
      73             : // Assert that DecodeRawBytes implements DecodeFunc.
      74             : var _ DecodeFunc[RawBytes] = DecodeRawBytes
      75             : 
      76           0 : func defaultSliceFormatter(x []byte) string {
      77           0 :         if bytes.ContainsFunc(x, func(r rune) bool { return r < 32 || r > 126 }) {
      78           0 :                 return fmt.Sprintf("%q", x)
      79           0 :         }
      80           0 :         return string(x)
      81             : }
      82             : 
      83           0 : func rawBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter func([]byte) string) {
      84           0 :         if count == 0 {
      85           0 :                 return
      86           0 :         }
      87           0 :         if sliceFormatter == nil {
      88           0 :                 sliceFormatter = defaultSliceFormatter
      89           0 :         }
      90             : 
      91           0 :         rb, _ := DecodeRawBytes(f.RelativeData(), uint32(f.RelativeOffset()), count)
      92           0 :         dataOffset := uint64(f.RelativeOffset()) + uint64(uintptr(rb.data)-uintptr(rb.start))
      93           0 :         f.CommentLine("rawbytes")
      94           0 :         f.CommentLine("offsets table")
      95           0 :         uintsToBinFormatter(f, count+1, func(offset, base uint64) string {
      96           0 :                 // NB: base is always zero for RawBytes columns.
      97           0 :                 return fmt.Sprintf("%d [%d overall]", offset+base, offset+base+dataOffset)
      98           0 :         })
      99           0 :         f.CommentLine("data")
     100           0 :         for i := 0; i < rb.slices; i++ {
     101           0 :                 s := rb.At(i)
     102           0 :                 f.HexBytesln(len(s), "data[%d]: %s", i, sliceFormatter(s))
     103           0 :         }
     104             : }
     105             : 
     106           0 : func (b *RawBytes) ptr(offset uint32) unsafe.Pointer {
     107           0 :         return unsafe.Pointer(uintptr(b.data) + uintptr(offset))
     108           0 : }
     109             : 
     110           0 : func (b *RawBytes) slice(start, end uint32) []byte {
     111           0 :         return unsafe.Slice((*byte)(b.ptr(start)), end-start)
     112           0 : }
     113             : 
     114             : // At returns the []byte at index i. The returned slice should not be mutated.
     115           0 : func (b RawBytes) At(i int) []byte {
     116           0 :         return b.slice(b.offsets.At(i), b.offsets.At(i+1))
     117           0 : }
     118             : 
     119             : // Slices returns the number of []byte slices encoded within the RawBytes.
     120           0 : func (b *RawBytes) Slices() int {
     121           0 :         return b.slices
     122           0 : }
     123             : 
     124             : // RawBytesBuilder encodes a column of byte slices.
     125             : type RawBytesBuilder struct {
     126             :         rows    int
     127             :         data    []byte
     128             :         offsets UintBuilder
     129             : }
     130             : 
     131             : // Assert that *RawBytesBuilder implements ColumnWriter.
     132             : var _ ColumnWriter = (*RawBytesBuilder)(nil)
     133             : 
     134             : // Init initializes the builder for first-time use.
     135           0 : func (b *RawBytesBuilder) Init() {
     136           0 :         b.offsets.Init()
     137           0 :         b.Reset()
     138           0 : }
     139             : 
     140             : // Reset resets the builder to an empty state.
     141           0 : func (b *RawBytesBuilder) Reset() {
     142           0 :         b.rows = 0
     143           0 :         b.data = b.data[:0]
     144           0 :         b.offsets.Reset()
     145           0 :         // Add an initial offset of zero to streamline the logic in RawBytes.At() to
     146           0 :         // avoid needing a special case for row 0.
     147           0 :         b.offsets.Set(0, 0)
     148           0 : }
     149             : 
     150             : // NumColumns implements ColumnWriter.
     151           0 : func (b *RawBytesBuilder) NumColumns() int { return 1 }
     152             : 
     153             : // DataType implements ColumnWriter.
     154           0 : func (b *RawBytesBuilder) DataType(int) DataType { return DataTypeBytes }
     155             : 
     156             : // Put appends the provided byte slice to the builder.
     157           0 : func (b *RawBytesBuilder) Put(s []byte) {
     158           0 :         b.data = append(b.data, s...)
     159           0 :         b.rows++
     160           0 :         b.offsets.Set(b.rows, uint64(len(b.data)))
     161           0 : }
     162             : 
     163             : // PutConcat appends a single byte slice formed by the concatenation of the two
     164             : // byte slice arguments.
     165           0 : func (b *RawBytesBuilder) PutConcat(s1, s2 []byte) {
     166           0 :         b.data = append(append(b.data, s1...), s2...)
     167           0 :         b.rows++
     168           0 :         b.offsets.Set(b.rows, uint64(len(b.data)))
     169           0 : }
     170             : 
     171             : // Rows returns the count of slices that have been added to the builder.
     172           0 : func (b *RawBytesBuilder) Rows() int {
     173           0 :         return b.rows
     174           0 : }
     175             : 
     176             : // UnsafeGet returns the i'th slice added to the builder. The returned slice is
     177             : // owned by the builder and must not be mutated.
     178           0 : func (b *RawBytesBuilder) UnsafeGet(i int) []byte {
     179           0 :         if b.rows == 0 {
     180           0 :                 return nil
     181           0 :         }
     182           0 :         return b.data[b.offsets.array.elems.At(i):b.offsets.array.elems.At(i+1)]
     183             : }
     184             : 
     185             : // Finish writes the serialized byte slices to buf starting at offset. The buf
     186             : // slice must be sufficiently large to store the serialized output. The caller
     187             : // should use [Size] to size buf appropriately before calling Finish.
     188           0 : func (b *RawBytesBuilder) Finish(col, rows int, offset uint32, buf []byte) uint32 {
     189           0 :         if rows == 0 {
     190           0 :                 return offset
     191           0 :         }
     192           0 :         dataLen := b.offsets.Get(rows)
     193           0 :         offset = b.offsets.Finish(0, rows+1, offset, buf)
     194           0 :         // Copy the data section.
     195           0 :         return offset + uint32(copy(buf[offset:], b.data[:dataLen]))
     196             : }
     197             : 
     198             : // Size computes the size required to encode the byte slices beginning in a
     199             : // buffer at the provided offset. The offset is required to ensure proper
     200             : // alignment. The returned uint32 is the offset of the first byte after the end
     201             : // of the encoded data. To compute the size in bytes, subtract the [offset]
     202             : // passed into Size from the returned offset.
     203           0 : func (b *RawBytesBuilder) Size(rows int, offset uint32) uint32 {
     204           0 :         if rows == 0 {
     205           0 :                 return offset
     206           0 :         }
     207             :         // Get the size needed to encode the rows+1 offsets.
     208           0 :         offset = b.offsets.Size(rows+1, offset)
     209           0 :         // Add the value of offset[rows] since that is the accumulated size of the
     210           0 :         // first [rows] slices.
     211           0 :         return offset + uint32(b.offsets.Get(rows))
     212             : }
     213             : 
     214             : // WriteDebug implements Encoder.
     215           0 : func (b *RawBytesBuilder) WriteDebug(w io.Writer, rows int) {
     216           0 :         fmt.Fprintf(w, "bytes: %d rows set; %d bytes in data", b.rows, len(b.data))
     217           0 : }

Generated by: LCOV version 1.14