Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "encoding/binary"
9 : "unsafe"
10 :
11 : "github.com/cockroachdb/errors"
12 : "golang.org/x/exp/constraints"
13 : )
14 :
15 : // UnsafeRawSlice maintains a pointer to a slice of elements of type T.
16 : // UnsafeRawSlice provides no bounds checking.
17 : type UnsafeRawSlice[T constraints.Integer] struct {
18 : ptr unsafe.Pointer
19 : }
20 :
21 2 : func makeUnsafeRawSlice[T constraints.Integer](ptr unsafe.Pointer) UnsafeRawSlice[T] {
22 2 : if align(uintptr(ptr), unsafe.Sizeof(T(0))) != uintptr(ptr) {
23 0 : panic(errors.AssertionFailedf("slice pointer %p not %d-byte aligned", ptr, unsafe.Sizeof(T(0))))
24 : }
25 2 : return UnsafeRawSlice[T]{ptr: ptr}
26 : }
27 :
28 : // At returns the `i`-th element of the slice.
29 2 : func (s UnsafeRawSlice[T]) At(i int) T {
30 2 : return *(*T)(unsafe.Pointer(uintptr(s.ptr) + unsafe.Sizeof(T(0))*uintptr(i)))
31 2 : }
32 :
33 : // Slice returns a go []T slice containing the first `len` elements of the
34 : // unsafe slice.
35 2 : func (s UnsafeRawSlice[T]) Slice(len int) []T {
36 2 : return unsafe.Slice((*T)(s.ptr), len)
37 2 : }
38 :
39 : // set mutates the slice, setting the `i`-th value to `v`.
40 2 : func (s UnsafeRawSlice[T]) set(i int, v T) {
41 2 : *(*T)(unsafe.Pointer(uintptr(s.ptr) + unsafe.Sizeof(T(0))*uintptr(i))) = v
42 2 : }
43 :
44 : // UnsafeUints exposes a read-only view of integers from a column, transparently
45 : // decoding data based on the UintEncoding.
46 : //
47 : // See UintEncoding and UintBuilder.
48 : type UnsafeUints struct {
49 : base uint64
50 : ptr unsafe.Pointer
51 : width uint8
52 : }
53 :
54 : // Assert that UnsafeIntegerSlice implements Array.
55 : var _ Array[uint64] = UnsafeUints{}
56 :
57 : // DecodeUnsafeUints decodes the structure of a slice of uints from a
58 : // byte slice.
59 2 : func DecodeUnsafeUints(b []byte, off uint32, rows int) (_ UnsafeUints, endOffset uint32) {
60 2 : if rows == 0 {
61 2 : // NB: &b[off] is actually pointing beyond the uints serialization. We
62 2 : // ensure this is always valid at the block-level by appending a
63 2 : // trailing 0x00 block padding byte to all serialized columnar blocks.
64 2 : // This means &b[off] will always point to a valid, allocated byte even
65 2 : // if this is the last column of the block.
66 2 : return makeUnsafeUints(0, unsafe.Pointer(&b[off]), 0), off
67 2 : }
68 2 : encoding := UintEncoding(b[off])
69 2 : if !encoding.IsValid() {
70 0 : panic(errors.AssertionFailedf("invalid encoding 0x%x", b[off:off+1]))
71 : }
72 2 : off++
73 2 : var base uint64
74 2 : if encoding.IsDelta() {
75 2 : base = binary.LittleEndian.Uint64(b[off:])
76 2 : off += 8
77 2 : }
78 2 : w := encoding.Width()
79 2 : if w > 0 {
80 2 : off = align(off, uint32(w))
81 2 : }
82 2 : return makeUnsafeUints(base, unsafe.Pointer(&b[off]), w), off + uint32(rows*w)
83 : }
84 :
85 : // Assert that DecodeUnsafeIntegerSlice implements DecodeFunc.
86 : var _ DecodeFunc[UnsafeUints] = DecodeUnsafeUints
87 :
88 2 : func makeUnsafeUints(base uint64, ptr unsafe.Pointer, width int) UnsafeUints {
89 2 : switch width {
90 2 : case 0, 1, 2, 4, 8:
91 0 : default:
92 0 : panic("invalid width")
93 : }
94 2 : return UnsafeUints{
95 2 : base: base,
96 2 : ptr: ptr,
97 2 : width: uint8(width),
98 2 : }
99 : }
100 :
101 : // At returns the `i`-th element.
102 2 : func (s UnsafeUints) At(i int) uint64 {
103 2 : // TODO(radu): this implementation assumes little-endian architecture.
104 2 :
105 2 : // One of the most common case is decoding timestamps, which require the full
106 2 : // 8 bytes (2^32 nanoseconds is only ~4 seconds).
107 2 : if s.width == 8 {
108 1 : // NB: The slice encodes 64-bit integers, there is no base (it doesn't save
109 1 : // any bits to compute a delta). We cast directly into a *uint64 pointer and
110 1 : // don't add the base.
111 1 : return *(*uint64)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align64Shift))
112 1 : }
113 : // Another common case is 0 width, when all keys have zero logical timestamps.
114 2 : if s.width == 0 {
115 2 : return s.base
116 2 : }
117 2 : if s.width == 4 {
118 2 : return s.base + uint64(*(*uint32)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align32Shift)))
119 2 : }
120 2 : if s.width == 2 {
121 2 : return s.base + uint64(*(*uint16)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align16Shift)))
122 2 : }
123 2 : return s.base + uint64(*(*uint8)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i))))
124 : }
125 :
126 : // UnsafeOffsets is a specialization of UnsafeInts (providing the same
127 : // functionality) which is optimized when the integers are offsets inside a
128 : // column block. It can only be used with 0, 1, 2, or 4 byte encoding without
129 : // delta.
130 : type UnsafeOffsets struct {
131 : ptr unsafe.Pointer
132 : width uint8
133 : }
134 :
135 : // DecodeUnsafeOffsets decodes the structure of a slice of offsets from a byte
136 : // slice.
137 2 : func DecodeUnsafeOffsets(b []byte, off uint32, rows int) (_ UnsafeOffsets, endOffset uint32) {
138 2 : ints, endOffset := DecodeUnsafeUints(b, off, rows)
139 2 : if ints.base != 0 || ints.width == 8 {
140 0 : panic(errors.AssertionFailedf("unexpected offsets encoding (base=%d, width=%d)", ints.base, ints.width))
141 : }
142 2 : return UnsafeOffsets{
143 2 : ptr: ints.ptr,
144 2 : width: ints.width,
145 2 : }, endOffset
146 : }
147 :
148 : // At returns the `i`-th offset.
149 : //
150 : //gcassert:inline
151 2 : func (s UnsafeOffsets) At(i int) uint32 {
152 2 : // TODO(radu): this implementation assumes little-endian architecture.
153 2 :
154 2 : // We expect offsets to be encoded as 16-bit integers in most cases.
155 2 : if s.width == 2 {
156 2 : return uint32(*(*uint16)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align16Shift)))
157 2 : }
158 2 : if s.width <= 1 {
159 2 : if s.width == 0 {
160 2 : return 0
161 2 : }
162 2 : return uint32(*(*uint8)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i))))
163 : }
164 1 : return *(*uint32)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align32Shift))
165 : }
166 :
167 : // At2 returns the `i`-th and `i+1`-th offsets.
168 : //
169 : //gcassert:inline
170 2 : func (s UnsafeOffsets) At2(i int) (uint32, uint32) {
171 2 : // TODO(radu): this implementation assumes little-endian architecture.
172 2 :
173 2 : // We expect offsets to be encoded as 16-bit integers in most cases.
174 2 : if s.width == 2 {
175 2 : v := *(*uint32)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align16Shift))
176 2 : return v & 0xFFFF, v >> 16
177 2 : }
178 2 : if s.width <= 1 {
179 2 : if s.width == 0 {
180 2 : return 0, 0
181 2 : }
182 2 : v := *(*uint16)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)))
183 2 : return uint32(v & 0xFF), uint32(v >> 8)
184 : }
185 1 : v := *(*uint64)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align32Shift))
186 1 : return uint32(v), uint32(v >> 32)
187 : }
188 :
189 : // UnsafeBuf provides a buffer without bounds checking. Every buf has a len and
190 : // capacity.
191 : type UnsafeBuf struct {
192 : ptr unsafe.Pointer
193 : len int
194 : cap int
195 : }
196 :
197 : // Alloc allocates a buffer of size n, without zeroing its contents or copying
198 : // previous buffer contents.
199 0 : func (b *UnsafeBuf) Alloc(n int) {
200 0 : b.ptr = mallocgc(uintptr(n), nil, false)
201 0 : b.cap = n
202 0 : }
203 :
204 : // UnsafeSlice returns the current contents of the buf.
205 0 : func (b *UnsafeBuf) UnsafeSlice() []byte {
206 0 : return unsafe.Slice((*byte)(b.ptr), b.len)
207 0 : }
|