Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package colblk
6 :
7 : import (
8 : "encoding/binary"
9 : "unsafe"
10 :
11 : "github.com/cockroachdb/errors"
12 : "golang.org/x/exp/constraints"
13 : )
14 :
15 : // UnsafeRawSlice maintains a pointer to a slice of elements of type T.
16 : // UnsafeRawSlice provides no bounds checking.
17 : type UnsafeRawSlice[T constraints.Integer] struct {
18 : ptr unsafe.Pointer
19 : }
20 :
21 1 : func makeUnsafeRawSlice[T constraints.Integer](ptr unsafe.Pointer) UnsafeRawSlice[T] {
22 1 : if align(uintptr(ptr), unsafe.Sizeof(T(0))) != uintptr(ptr) {
23 0 : panic(errors.AssertionFailedf("slice pointer %p not %d-byte aligned", ptr, unsafe.Sizeof(T(0))))
24 : }
25 1 : return UnsafeRawSlice[T]{ptr: ptr}
26 : }
27 :
28 : // At returns the `i`-th element of the slice.
29 1 : func (s UnsafeRawSlice[T]) At(i int) T {
30 1 : return *(*T)(unsafe.Pointer(uintptr(s.ptr) + unsafe.Sizeof(T(0))*uintptr(i)))
31 1 : }
32 :
33 : // Slice returns a go []T slice containing the first `len` elements of the
34 : // unsafe slice.
35 1 : func (s UnsafeRawSlice[T]) Slice(len int) []T {
36 1 : return unsafe.Slice((*T)(s.ptr), len)
37 1 : }
38 :
39 : // set mutates the slice, setting the `i`-th value to `v`.
40 1 : func (s UnsafeRawSlice[T]) set(i int, v T) {
41 1 : *(*T)(unsafe.Pointer(uintptr(s.ptr) + unsafe.Sizeof(T(0))*uintptr(i))) = v
42 1 : }
43 :
44 : // UnsafeUints exposes a read-only view of integers from a column, transparently
45 : // decoding data based on the UintEncoding.
46 : //
47 : // See UintEncoding and UintBuilder.
48 : type UnsafeUints struct {
49 : base uint64
50 : ptr unsafe.Pointer
51 : width uint8
52 : }
53 :
54 : // Assert that UnsafeIntegerSlice implements Array.
55 : var _ Array[uint64] = UnsafeUints{}
56 :
57 : // DecodeUnsafeUints decodes the structure of a slice of uints from a
58 : // byte slice.
59 1 : func DecodeUnsafeUints(b []byte, off uint32, rows int) (_ UnsafeUints, endOffset uint32) {
60 1 : if rows == 0 {
61 1 : // NB: &b[off] is actually pointing beyond the uints serialization. We
62 1 : // ensure this is always valid at the block-level by appending a
63 1 : // trailing 0x00 block padding byte to all serialized columnar blocks.
64 1 : // This means &b[off] will always point to a valid, allocated byte even
65 1 : // if this is the last column of the block.
66 1 : return makeUnsafeUints(0, unsafe.Pointer(&b[off]), 0), off
67 1 : }
68 1 : encoding := UintEncoding(b[off])
69 1 : if !encoding.IsValid() {
70 0 : panic(errors.AssertionFailedf("invalid encoding 0x%x", b[off:off+1]))
71 : }
72 1 : off++
73 1 : var base uint64
74 1 : if encoding.IsDelta() {
75 1 : base = binary.LittleEndian.Uint64(b[off:])
76 1 : off += 8
77 1 : }
78 1 : w := encoding.Width()
79 1 : if w > 0 {
80 1 : off = align(off, uint32(w))
81 1 : }
82 1 : return makeUnsafeUints(base, unsafe.Pointer(&b[off]), w), off + uint32(rows*w)
83 : }
84 :
85 : // Assert that DecodeUnsafeIntegerSlice implements DecodeFunc.
86 : var _ DecodeFunc[UnsafeUints] = DecodeUnsafeUints
87 :
88 1 : func makeUnsafeUints(base uint64, ptr unsafe.Pointer, width int) UnsafeUints {
89 1 : switch width {
90 1 : case 0, 1, 2, 4, 8:
91 0 : default:
92 0 : panic("invalid width")
93 : }
94 1 : return UnsafeUints{
95 1 : base: base,
96 1 : ptr: ptr,
97 1 : width: uint8(width),
98 1 : }
99 : }
100 :
101 : // At returns the `i`-th element.
102 1 : func (s UnsafeUints) At(i int) uint64 {
103 1 : // TODO(radu): this implementation assumes little-endian architecture.
104 1 :
105 1 : // One of the most common case is decoding timestamps, which require the full
106 1 : // 8 bytes (2^32 nanoseconds is only ~4 seconds).
107 1 : if s.width == 8 {
108 0 : // NB: The slice encodes 64-bit integers, there is no base (it doesn't save
109 0 : // any bits to compute a delta). We cast directly into a *uint64 pointer and
110 0 : // don't add the base.
111 0 : return *(*uint64)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align64Shift))
112 0 : }
113 : // Another common case is 0 width, when all keys have zero logical timestamps.
114 1 : if s.width == 0 {
115 1 : return s.base
116 1 : }
117 1 : if s.width == 4 {
118 1 : return s.base + uint64(*(*uint32)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align32Shift)))
119 1 : }
120 1 : if s.width == 2 {
121 1 : return s.base + uint64(*(*uint16)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align16Shift)))
122 1 : }
123 1 : return s.base + uint64(*(*uint8)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i))))
124 : }
125 :
126 : // UnsafeOffsets is a specialization of UnsafeInts (providing the same
127 : // functionality) which is optimized when the integers are offsets inside a
128 : // column block. It can only be used with 0, 1, 2, or 4 byte encoding without
129 : // delta.
130 : type UnsafeOffsets struct {
131 : ptr unsafe.Pointer
132 : width uint8
133 : }
134 :
135 : // DecodeUnsafeOffsets decodes the structure of a slice of offsets from a byte
136 : // slice.
137 1 : func DecodeUnsafeOffsets(b []byte, off uint32, rows int) (_ UnsafeOffsets, endOffset uint32) {
138 1 : ints, endOffset := DecodeUnsafeUints(b, off, rows)
139 1 : if ints.base != 0 || ints.width == 8 {
140 0 : panic(errors.AssertionFailedf("unexpected offsets encoding (base=%d, width=%d)", ints.base, ints.width))
141 : }
142 1 : return UnsafeOffsets{
143 1 : ptr: ints.ptr,
144 1 : width: ints.width,
145 1 : }, endOffset
146 : }
147 :
148 : // At returns the `i`-th offset.
149 : //
150 : //gcassert:inline
151 1 : func (s UnsafeOffsets) At(i int) uint32 {
152 1 : // TODO(radu): this implementation assumes little-endian architecture.
153 1 :
154 1 : // We expect offsets to be encoded as 16-bit integers in most cases.
155 1 : if s.width == 2 {
156 1 : return uint32(*(*uint16)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align16Shift)))
157 1 : }
158 1 : if s.width <= 1 {
159 1 : if s.width == 0 {
160 1 : return 0
161 1 : }
162 1 : return uint32(*(*uint8)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i))))
163 : }
164 0 : return *(*uint32)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align32Shift))
165 : }
166 :
167 : // At2 returns the `i`-th and `i+1`-th offsets.
168 : //
169 : //gcassert:inline
170 1 : func (s UnsafeOffsets) At2(i int) (uint32, uint32) {
171 1 : // TODO(radu): this implementation assumes little-endian architecture.
172 1 :
173 1 : // We expect offsets to be encoded as 16-bit integers in most cases.
174 1 : if s.width == 2 {
175 1 : v := *(*uint32)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align16Shift))
176 1 : return v & 0xFFFF, v >> 16
177 1 : }
178 1 : if s.width <= 1 {
179 1 : if s.width == 0 {
180 1 : return 0, 0
181 1 : }
182 1 : v := *(*uint16)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)))
183 1 : return uint32(v & 0xFF), uint32(v >> 8)
184 : }
185 0 : v := *(*uint64)(unsafe.Pointer(uintptr(s.ptr) + uintptr(i)<<align32Shift))
186 0 : return uint32(v), uint32(v >> 32)
187 : }
188 :
189 : // UnsafeBuf provides a buffer without bounds checking. Every buf has a len and
190 : // capacity.
191 : type UnsafeBuf struct {
192 : ptr unsafe.Pointer
193 : len int
194 : cap int
195 : }
196 :
197 : // Alloc allocates a buffer of size n, without zeroing its contents or copying
198 : // previous buffer contents.
199 0 : func (b *UnsafeBuf) Alloc(n int) {
200 0 : b.ptr = mallocgc(uintptr(n), nil, false)
201 0 : b.cap = n
202 0 : }
203 :
204 : // UnsafeSlice returns the current contents of the buf.
205 0 : func (b *UnsafeBuf) UnsafeSlice() []byte {
206 0 : return unsafe.Slice((*byte)(b.ptr), b.len)
207 0 : }
|