Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package colblk 6 : 7 : import "io" 8 : 9 : // DataType describes the logical type of a column's values. Some data types 10 : // have multiple possible physical representations. Encoding a column may choose 11 : // between possible physical representations depending on the distribution of 12 : // values and the size of the resulting physical representation. 13 : type DataType uint8 14 : 15 : const ( 16 : // DataTypeInvalid represents an unset or invalid data type. 17 : DataTypeInvalid DataType = 0 18 : // DataTypeBool is a data type encoding a bool per row. 19 : DataTypeBool DataType = 1 20 : // DataTypeUint8 is a data type encoding a fixed 8 bits per row. 21 : DataTypeUint8 DataType = 2 22 : // DataTypeUint16 is a data type encoding a fixed 16 bits per row. 23 : DataTypeUint16 DataType = 3 24 : // DataTypeUint32 is a data type encoding a fixed 32 bits per row. 25 : DataTypeUint32 DataType = 4 26 : // DataTypeUint64 is a data type encoding a fixed 64 bits per row. 27 : DataTypeUint64 DataType = 5 28 : // DataTypeBytes is a data type encoding a variable-length byte string per 29 : // row. 30 : DataTypeBytes DataType = 6 31 : // DataTypePrefixBytes is a data type encoding variable-length, 32 : // lexicographically-sorted byte strings, with prefix compression. 33 : DataTypePrefixBytes DataType = 7 34 : 35 : dataTypesCount DataType = 8 36 : ) 37 : 38 : var dataTypeName [dataTypesCount]string = [dataTypesCount]string{ 39 : DataTypeInvalid: "invalid", 40 : DataTypeBool: "bool", 41 : DataTypeUint8: "uint8", 42 : DataTypeUint16: "uint16", 43 : DataTypeUint32: "uint32", 44 : DataTypeUint64: "uint64", 45 : DataTypeBytes: "bytes", 46 : DataTypePrefixBytes: "prefixbytes", 47 : } 48 : 49 : // String returns a human-readable string representation of the data type. 50 1 : func (t DataType) String() string { 51 1 : return dataTypeName[t] 52 1 : } 53 : 54 1 : func (t DataType) uintWidth() uint32 { 55 1 : if t >= DataTypeUint8 && t <= DataTypeUint64 { 56 1 : rv := 1 << (t - DataTypeUint8) 57 1 : if rv > 8 { 58 0 : panic("width greater than 8 bytes") 59 : } 60 1 : return uint32(rv) 61 : } 62 0 : panic("not a unit") 63 : } 64 : 65 : // ColumnWriter is an interface implemented by column encoders that accumulate a 66 : // column's values and then serialize them. 67 : type ColumnWriter interface { 68 : Encoder 69 : // NumColumns returns the number of columns the ColumnWriter will encode. 70 : NumColumns() int 71 : // DataType returns the data type of the col'th column. 72 : DataType(col int) DataType 73 : // Finish serializes the column at the specified index, writing the column's 74 : // data to buf at offset, and returning the offset at which the next column 75 : // should be encoded. 76 : // 77 : // The supplied buf must have enough space at the provided offset to fit the 78 : // column. The caller may use Size() to calculate the exact size required. 79 : // The caller passes the number of rows they want to serialize. All 80 : // implementations of Finish must support cases where rows is the number of 81 : // rows the caller has set, or one less. Some implementations may be more 82 : // permissive. 83 : // 84 : // The provided column index must be less than NumColumns(). Finish is 85 : // called for each index < NumColumns() in order. 86 : // 87 : // The provided buf must be word-aligned (at offset 0). If a column writer 88 : // requires a particularly alignment, it's responsible for padding offset 89 : // appropriately first. 90 : Finish(col, rows int, offset uint32, buf []byte) (nextOffset uint32) 91 : } 92 : 93 : // Encoder is an interface implemented by column encoders. 94 : type Encoder interface { 95 : // Reset clears the ColumnWriter's internal state, preparing it for reuse. 96 : Reset() 97 : // Size returns the size required to encode the column's current values. 98 : // 99 : // The `rows` argument must be the current number of logical rows in the 100 : // column. Some implementations support defaults, and these implementations 101 : // rely on the caller to inform them the current number of logical rows. The 102 : // provided `rows` must be greater than or equal to the largest row set + 1. 103 : // In other words, Size does not support determining the size of a column's 104 : // earlier size before additional rows were added. 105 : Size(rows int, offset uint32) uint32 106 : // WriteDebug writes a human-readable description of the current column 107 : // state to the provided writer. 108 : WriteDebug(w io.Writer, rows int) 109 : } 110 : 111 : // A DecodeFunc decodes a data structure from a byte slice, returning an 112 : // accessor for the data and the offset of the first byte after the structure. 113 : // The rows argument must be number of logical rows encoded within the data 114 : // structure. 115 : type DecodeFunc[T any] func(buf []byte, offset uint32, rows int) (decoded T, nextOffset uint32) 116 : 117 : // An Array provides indexed access to an array of values. 118 : type Array[V any] interface { 119 : // At returns the i'th value in the array. 120 : At(i int) V 121 : } 122 : 123 : // Clone clones the first n elements of the array a. 124 1 : func Clone[V any](a Array[V], n int) []V { 125 1 : c := make([]V, n) 126 1 : for i := 0; i < n; i++ { 127 1 : c[i] = a.At(i) 128 1 : } 129 1 : return c 130 : }