/src/bson-rust/src/binary/vector.rs
Line | Count | Source |
1 | | use std::{ |
2 | | convert::{TryFrom, TryInto}, |
3 | | mem::size_of, |
4 | | }; |
5 | | |
6 | | use super::{Binary, Error, Result}; |
7 | | use crate::{spec::BinarySubtype, Bson, RawBson}; |
8 | | |
9 | | const INT8: u8 = 0x03; |
10 | | const FLOAT32: u8 = 0x27; |
11 | | const PACKED_BIT: u8 = 0x10; |
12 | | |
13 | | /// A vector of numeric values. This type can be converted into a [`Binary`] of subtype |
14 | | /// [`BinarySubtype::Vector`]. |
15 | | /// |
16 | | /// ```rust |
17 | | /// # use bson::binary::{Binary, Vector}; |
18 | | /// let vector = Vector::Int8(vec![0, 1, 2]); |
19 | | /// let binary = Binary::from(vector); |
20 | | /// ``` |
21 | | /// |
22 | | /// `Vector` serializes to and deserializes from a `Binary`. |
23 | | /// |
24 | | /// ```rust |
25 | | /// # use serde::{Serialize, Deserialize}; |
26 | | /// # use bson::{binary::Vector, error::Result, spec::ElementType}; |
27 | | /// #[derive(Serialize, Deserialize)] |
28 | | /// struct Data { |
29 | | /// vector: Vector, |
30 | | /// } |
31 | | /// |
32 | | /// let data = Data { vector: Vector::Int8(vec![0, 1, 2]) }; |
33 | | /// let document = bson::serialize_to_document(&data).unwrap(); |
34 | | /// assert_eq!(document.get("vector").unwrap().element_type(), ElementType::Binary); |
35 | | /// |
36 | | /// let data: Data = bson::deserialize_from_document(document).unwrap(); |
37 | | /// assert_eq!(data.vector, Vector::Int8(vec![0, 1, 2])); |
38 | | /// ``` |
39 | | /// |
40 | | /// See the |
41 | | /// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md) |
42 | | /// for more details. |
43 | | #[derive(Clone, Debug, PartialEq)] |
44 | | pub enum Vector { |
45 | | /// A vector of `i8` values. |
46 | | Int8(Vec<i8>), |
47 | | |
48 | | /// A vector of `f32` values. |
49 | | Float32(Vec<f32>), |
50 | | |
51 | | /// A vector of packed bits. See [`PackedBitVector::new`] for more details. |
52 | | PackedBit(PackedBitVector), |
53 | | } |
54 | | |
55 | | /// A vector of packed bits. This type can be constructed by calling [`PackedBitVector::new`]. |
56 | | #[derive(Clone, Debug, PartialEq)] |
57 | | pub struct PackedBitVector { |
58 | | vector: Vec<u8>, |
59 | | padding: u8, |
60 | | } |
61 | | |
62 | | impl PackedBitVector { |
63 | | /// Construct a new `PackedBitVector`. Each `u8` value in the provided `vector` represents 8 |
64 | | /// single-bit elements in little-endian format. For example, the following vector: |
65 | | /// |
66 | | /// ```rust |
67 | | /// # use bson::{binary::PackedBitVector, error::Result}; |
68 | | /// # fn main() -> Result<()> { |
69 | | /// let packed_bits = vec![238, 224]; |
70 | | /// let vector = PackedBitVector::new(packed_bits, 0)?; |
71 | | /// # Ok(()) |
72 | | /// # } |
73 | | /// ``` |
74 | | /// |
75 | | /// represents a 16-bit vector containing the following values: |
76 | | /// |
77 | | /// ```text |
78 | | /// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0] |
79 | | /// ``` |
80 | | /// |
81 | | /// Padding can optionally be specified to ignore a number of least-significant bits in the |
82 | | /// final byte. For example, the vector in the previous example with a padding of 4 would |
83 | | /// represent a 12-bit vector containing the following values: |
84 | | /// |
85 | | /// ```text |
86 | | /// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0] |
87 | | /// ``` |
88 | | /// |
89 | | /// Padding must be within 0-7 inclusive. Padding must be 0 or unspecified if the provided |
90 | | /// vector is empty. The ignored bits in the vector must all be 0. |
91 | 0 | pub fn new(vector: Vec<u8>, padding: impl Into<Option<u8>>) -> Result<Self> { |
92 | 0 | let padding = padding.into().unwrap_or(0); |
93 | 0 | if !(0..8).contains(&padding) { |
94 | 0 | return Err(Error::binary(format!( |
95 | 0 | "vector padding must be within 0-7 inclusive, got {padding}" |
96 | 0 | ))); |
97 | 0 | } |
98 | 0 | match vector.last() { |
99 | 0 | Some(last) => { |
100 | 0 | if last.trailing_zeros() < u32::from(padding) { |
101 | 0 | return Err(Error::binary( |
102 | 0 | "the ignored bits in a packed bit vector must all be 0", |
103 | 0 | )); |
104 | 0 | } |
105 | | } |
106 | | None => { |
107 | 0 | if padding != 0 { |
108 | 0 | return Err(Error::binary(format!( |
109 | 0 | "cannot specify non-zero padding if the provided vector is empty, got \ |
110 | 0 | {padding}" |
111 | 0 | ))); |
112 | 0 | } |
113 | | } |
114 | | } |
115 | 0 | Ok(Self { vector, padding }) |
116 | 0 | } |
117 | | } |
118 | | |
119 | | impl Vector { |
120 | | /// Construct a [`Vector`] from the given bytes. See the |
121 | | /// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md#specification) |
122 | | /// for details on the expected byte format. |
123 | 0 | pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self> { |
124 | 0 | let bytes = bytes.as_ref(); |
125 | | |
126 | 0 | if bytes.len() < 2 { |
127 | 0 | return Err(Error::binary(format!( |
128 | 0 | "the provided vector bytes must have a length of at least 2, got {}", |
129 | 0 | bytes.len() |
130 | 0 | ))); |
131 | 0 | } |
132 | | |
133 | 0 | let d_type = bytes[0]; |
134 | 0 | let padding = bytes[1]; |
135 | 0 | if d_type != PACKED_BIT && padding != 0 { |
136 | 0 | return Err(Error::binary(format!( |
137 | 0 | "padding can only be specified for a packed bit vector (data type {}), got type {}", |
138 | 0 | PACKED_BIT, d_type |
139 | 0 | ))); |
140 | 0 | } |
141 | 0 | let number_bytes = &bytes[2..]; |
142 | | |
143 | 0 | match d_type { |
144 | | INT8 => { |
145 | 0 | let vector = number_bytes |
146 | 0 | .iter() |
147 | 0 | .map(|n| i8::from_le_bytes([*n])) |
148 | 0 | .collect(); |
149 | 0 | Ok(Self::Int8(vector)) |
150 | | } |
151 | | FLOAT32 => { |
152 | | const F32_BYTES: usize = size_of::<f32>(); |
153 | | |
154 | 0 | let mut vector = Vec::new(); |
155 | 0 | for chunk in number_bytes.chunks(F32_BYTES) { |
156 | 0 | let bytes: [u8; F32_BYTES] = chunk.try_into().map_err(|_| { |
157 | 0 | Error::binary(format!( |
158 | 0 | "f32 vector values must be {} bytes, got {:?}", |
159 | | F32_BYTES, chunk, |
160 | | )) |
161 | 0 | })?; |
162 | 0 | vector.push(f32::from_le_bytes(bytes)); |
163 | | } |
164 | 0 | Ok(Self::Float32(vector)) |
165 | | } |
166 | | PACKED_BIT => { |
167 | 0 | let packed_bit_vector = PackedBitVector::new(number_bytes.to_vec(), padding)?; |
168 | 0 | Ok(Self::PackedBit(packed_bit_vector)) |
169 | | } |
170 | 0 | other => Err(Error::binary(format!( |
171 | 0 | "unsupported vector data type: {other}" |
172 | 0 | ))), |
173 | | } |
174 | 0 | } |
175 | | |
176 | 0 | fn d_type(&self) -> u8 { |
177 | 0 | match self { |
178 | 0 | Self::Int8(_) => INT8, |
179 | 0 | Self::Float32(_) => FLOAT32, |
180 | 0 | Self::PackedBit(_) => PACKED_BIT, |
181 | | } |
182 | 0 | } |
183 | | |
184 | 0 | fn padding(&self) -> u8 { |
185 | 0 | match self { |
186 | 0 | Self::Int8(_) => 0, |
187 | 0 | Self::Float32(_) => 0, |
188 | 0 | Self::PackedBit(PackedBitVector { padding, .. }) => *padding, |
189 | | } |
190 | 0 | } |
191 | | } |
192 | | |
193 | | impl From<&Vector> for Binary { |
194 | 0 | fn from(vector: &Vector) -> Self { |
195 | 0 | let d_type = vector.d_type(); |
196 | 0 | let padding = vector.padding(); |
197 | 0 | let mut bytes = vec![d_type, padding]; |
198 | | |
199 | 0 | match vector { |
200 | 0 | Vector::Int8(vector) => { |
201 | 0 | for n in vector { |
202 | 0 | bytes.extend_from_slice(&n.to_le_bytes()); |
203 | 0 | } |
204 | | } |
205 | 0 | Vector::Float32(vector) => { |
206 | 0 | for n in vector { |
207 | 0 | bytes.extend_from_slice(&n.to_le_bytes()); |
208 | 0 | } |
209 | | } |
210 | 0 | Vector::PackedBit(PackedBitVector { vector, .. }) => { |
211 | 0 | for n in vector { |
212 | 0 | bytes.extend_from_slice(&n.to_le_bytes()); |
213 | 0 | } |
214 | | } |
215 | | } |
216 | | |
217 | 0 | Self { |
218 | 0 | subtype: BinarySubtype::Vector, |
219 | 0 | bytes, |
220 | 0 | } |
221 | 0 | } |
222 | | } |
223 | | |
224 | | impl From<Vector> for Binary { |
225 | 0 | fn from(vector: Vector) -> Binary { |
226 | 0 | Self::from(&vector) |
227 | 0 | } |
228 | | } |
229 | | |
230 | | impl TryFrom<&Binary> for Vector { |
231 | | type Error = Error; |
232 | | |
233 | 0 | fn try_from(binary: &Binary) -> Result<Self> { |
234 | 0 | if binary.subtype != BinarySubtype::Vector { |
235 | 0 | return Err(Error::binary(format!( |
236 | 0 | "expected vector binary subtype, got {:?}", |
237 | 0 | binary.subtype |
238 | 0 | ))); |
239 | 0 | } |
240 | 0 | Self::from_bytes(&binary.bytes) |
241 | 0 | } |
242 | | } |
243 | | |
244 | | impl TryFrom<Binary> for Vector { |
245 | | type Error = Error; |
246 | | |
247 | 0 | fn try_from(binary: Binary) -> std::result::Result<Self, Self::Error> { |
248 | 0 | Self::try_from(&binary) |
249 | 0 | } |
250 | | } |
251 | | |
252 | | // Convenience impl to allow passing a Vector directly into the doc! macro. From<&Vector> is already |
253 | | // implemented by a blanket impl in src/bson.rs. |
254 | | impl From<Vector> for Bson { |
255 | 0 | fn from(vector: Vector) -> Self { |
256 | 0 | Self::Binary(Binary::from(vector)) |
257 | 0 | } |
258 | | } |
259 | | |
260 | | // Convenience impls to allow passing a Vector directly into the rawdoc! macro |
261 | | impl From<&Vector> for RawBson { |
262 | 0 | fn from(vector: &Vector) -> Self { |
263 | 0 | Self::Binary(Binary::from(vector)) |
264 | 0 | } |
265 | | } |
266 | | |
267 | | impl From<Vector> for RawBson { |
268 | 0 | fn from(vector: Vector) -> Self { |
269 | 0 | Self::from(&vector) |
270 | 0 | } |
271 | | } |
272 | | |
273 | | #[cfg(feature = "serde")] |
274 | | impl serde::Serialize for Vector { |
275 | 0 | fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> |
276 | 0 | where |
277 | 0 | S: serde::Serializer, |
278 | | { |
279 | 0 | let binary = Binary::from(self); |
280 | 0 | binary.serialize(serializer) |
281 | 0 | } |
282 | | } |
283 | | |
284 | | #[cfg(feature = "serde")] |
285 | | impl<'de> serde::Deserialize<'de> for Vector { |
286 | 0 | fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error> |
287 | 0 | where |
288 | 0 | D: serde::Deserializer<'de>, |
289 | | { |
290 | 0 | let binary = Binary::deserialize(deserializer)?; |
291 | 0 | Self::try_from(binary).map_err(serde::de::Error::custom) |
292 | 0 | } |
293 | | } |