Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package colblk 6 : 7 : import "io" 8 : 9 : // DataType describes the logical type of a column's values. Some data types 10 : // have multiple possible physical representations. Encoding a column may choose 11 : // between possible physical representations depending on the distribution of 12 : // values and the size of the resulting physical representation. 13 : type DataType uint8 14 : 15 : const ( 16 : // DataTypeInvalid represents an unset or invalid data type. 17 : DataTypeInvalid DataType = 0 18 : // DataTypeBool is a data type encoding a bool per row. 19 : DataTypeBool DataType = 1 20 : // DataTypeUint is a data type encoding a fixed 8 bits per row. 21 : DataTypeUint DataType = 2 22 : // DataTypeBytes is a data type encoding a variable-length byte string per 23 : // row. 24 : DataTypeBytes DataType = 3 25 : // DataTypePrefixBytes is a data type encoding variable-length, 26 : // lexicographically-sorted byte strings, with prefix compression. 27 : DataTypePrefixBytes DataType = 4 28 : 29 : dataTypesCount DataType = 5 30 : ) 31 : 32 : var dataTypeName [dataTypesCount]string = [dataTypesCount]string{ 33 : DataTypeInvalid: "invalid", 34 : DataTypeBool: "bool", 35 : DataTypeUint: "uint", 36 : DataTypeBytes: "bytes", 37 : DataTypePrefixBytes: "prefixbytes", 38 : } 39 : 40 : // String returns a human-readable string representation of the data type. 41 1 : func (t DataType) String() string { 42 1 : return dataTypeName[t] 43 1 : } 44 : 45 : // ColumnWriter is an interface implemented by column encoders that accumulate a 46 : // column's values and then serialize them. 47 : type ColumnWriter interface { 48 : Encoder 49 : // NumColumns returns the number of columns the ColumnWriter will encode. 50 : NumColumns() int 51 : // DataType returns the data type of the col'th column. 52 : DataType(col int) DataType 53 : // Finish serializes the column at the specified index, writing the column's 54 : // data to buf at offset, and returning the offset at which the next column 55 : // should be encoded. 56 : // 57 : // The supplied buf must have enough space at the provided offset to fit the 58 : // column. The caller may use Size() to calculate the exact size required. 59 : // The caller passes the number of rows they want to serialize. All 60 : // implementations of Finish must support cases where rows is the number of 61 : // rows the caller has set, or one less. Some implementations may be more 62 : // permissive. 63 : // 64 : // The provided column index must be less than NumColumns(). Finish is 65 : // called for each index < NumColumns() in order. 66 : // 67 : // The provided buf must be word-aligned (at offset 0). If a column writer 68 : // requires a particularly alignment, it's responsible for padding offset 69 : // appropriately first. 70 : Finish(col, rows int, offset uint32, buf []byte) (nextOffset uint32) 71 : } 72 : 73 : // Encoder is an interface implemented by column encoders. 74 : type Encoder interface { 75 : // Reset clears the ColumnWriter's internal state, preparing it for reuse. 76 : Reset() 77 : // Size returns the size required to encode the column's current values. 78 : // 79 : // The `rows` argument must be the current number of logical rows in the 80 : // column. Some implementations support defaults, and these implementations 81 : // rely on the caller to inform them the current number of logical rows. The 82 : // provided `rows` must be greater than or equal to the largest row set + 1. 83 : // In other words, Size does not support determining the size of a column's 84 : // earlier size before additional rows were added. 85 : Size(rows int, offset uint32) uint32 86 : // WriteDebug writes a human-readable description of the current column 87 : // state to the provided writer. 88 : WriteDebug(w io.Writer, rows int) 89 : } 90 : 91 : // A DecodeFunc decodes a data structure from a byte slice, returning an 92 : // accessor for the data and the offset of the first byte after the structure. 93 : // The rows argument must be number of logical rows encoded within the data 94 : // structure. 95 : type DecodeFunc[T any] func(buf []byte, offset uint32, rows int) (decoded T, nextOffset uint32) 96 : 97 : // An Array provides indexed access to an array of values. 98 : type Array[V any] interface { 99 : // At returns the i'th value in the array. 100 : At(i int) V 101 : } 102 : 103 : // Clone clones the first n elements of the array a. 104 1 : func Clone[V any](a Array[V], n int) []V { 105 1 : c := make([]V, n) 106 1 : for i := 0; i < n; i++ { 107 1 : c[i] = a.At(i) 108 1 : } 109 1 : return c 110 : }