/rust/registry/src/index.crates.io-1949cf8c6b5b557f/half-1.8.3/src/slice.rs
Line | Count | Source |
1 | | //! Contains utility functions and traits to convert between slices of [`u16`] bits and [`f16`] or |
2 | | //! [`bf16`] numbers. |
3 | | //! |
4 | | //! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices, |
5 | | //! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]` |
6 | | //! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of |
7 | | //! larger buffers of floating point values, and are automatically included in the |
8 | | //! [`prelude`][crate::prelude] module. |
9 | | |
10 | | use crate::{bf16, binary16::convert, f16}; |
11 | | #[cfg(feature = "alloc")] |
12 | | use alloc::vec::Vec; |
13 | | use core::slice; |
14 | | |
15 | | /// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations. |
16 | | /// |
17 | | /// This trait is sealed and cannot be implemented outside of this crate. |
18 | | pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice { |
19 | | /// Reinterprets a slice of [`f16`] or [`bf16`] numbers as a slice of [`u16`] bits. |
20 | | /// |
21 | | /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory |
22 | | /// location as `self`. |
23 | | /// |
24 | | /// # Examples |
25 | | /// |
26 | | /// ```rust |
27 | | /// # use half::prelude::*; |
28 | | /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; |
29 | | /// let int_buffer = float_buffer.reinterpret_cast(); |
30 | | /// |
31 | | /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]); |
32 | | /// ``` |
33 | | fn reinterpret_cast(&self) -> &[u16]; |
34 | | |
35 | | /// Reinterprets a mutable slice of [`f16`] or [`bf16`] numbers as a mutable slice of [`u16`]. |
36 | | /// bits |
37 | | /// |
38 | | /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, |
39 | | /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed. |
40 | | /// |
41 | | /// # Examples |
42 | | /// |
43 | | /// ```rust |
44 | | /// # use half::prelude::*; |
45 | | /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; |
46 | | /// |
47 | | /// { |
48 | | /// let int_buffer = float_buffer.reinterpret_cast_mut(); |
49 | | /// |
50 | | /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); |
51 | | /// |
52 | | /// // Mutating the u16 slice will mutating the original |
53 | | /// int_buffer[0] = 0; |
54 | | /// } |
55 | | /// |
56 | | /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error. |
57 | | /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]); |
58 | | /// ``` |
59 | | fn reinterpret_cast_mut(&mut self) -> &mut [u16]; |
60 | | |
61 | | /// Converts all of the elements of a `[f32]` slice into [`f16`] or [`bf16`] values in `self`. |
62 | | /// |
63 | | /// The length of `src` must be the same as `self`. |
64 | | /// |
65 | | /// The conversion operation is vectorized over the slice, meaning the conversion may be more |
66 | | /// efficient than converting individual elements on some hardware that supports SIMD |
67 | | /// conversions. See [crate documentation](crate) for more information on hardware conversion |
68 | | /// support. |
69 | | /// |
70 | | /// # Panics |
71 | | /// |
72 | | /// This function will panic if the two slices have different lengths. |
73 | | /// |
74 | | /// # Examples |
75 | | /// ```rust |
76 | | /// # use half::prelude::*; |
77 | | /// // Initialize an empty buffer |
78 | | /// let mut buffer = [0u16; 4]; |
79 | | /// let buffer = buffer.reinterpret_cast_mut::<f16>(); |
80 | | /// |
81 | | /// let float_values = [1., 2., 3., 4.]; |
82 | | /// |
83 | | /// // Now convert |
84 | | /// buffer.convert_from_f32_slice(&float_values); |
85 | | /// |
86 | | /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); |
87 | | /// ``` |
88 | | fn convert_from_f32_slice(&mut self, src: &[f32]); |
89 | | |
90 | | /// Converts all of the elements of a `[f64]` slice into [`f16`] or [`bf16`] values in `self`. |
91 | | /// |
92 | | /// The length of `src` must be the same as `self`. |
93 | | /// |
94 | | /// The conversion operation is vectorized over the slice, meaning the conversion may be more |
95 | | /// efficient than converting individual elements on some hardware that supports SIMD |
96 | | /// conversions. See [crate documentation](crate) for more information on hardware conversion |
97 | | /// support. |
98 | | /// |
99 | | /// # Panics |
100 | | /// |
101 | | /// This function will panic if the two slices have different lengths. |
102 | | /// |
103 | | /// # Examples |
104 | | /// ```rust |
105 | | /// # use half::prelude::*; |
106 | | /// // Initialize an empty buffer |
107 | | /// let mut buffer = [0u16; 4]; |
108 | | /// let buffer = buffer.reinterpret_cast_mut::<f16>(); |
109 | | /// |
110 | | /// let float_values = [1., 2., 3., 4.]; |
111 | | /// |
112 | | /// // Now convert |
113 | | /// buffer.convert_from_f64_slice(&float_values); |
114 | | /// |
115 | | /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); |
116 | | /// ``` |
117 | | fn convert_from_f64_slice(&mut self, src: &[f64]); |
118 | | |
119 | | /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in `dst`. |
120 | | /// |
121 | | /// The length of `src` must be the same as `self`. |
122 | | /// |
123 | | /// The conversion operation is vectorized over the slice, meaning the conversion may be more |
124 | | /// efficient than converting individual elements on some hardware that supports SIMD |
125 | | /// conversions. See [crate documentation](crate) for more information on hardware conversion |
126 | | /// support. |
127 | | /// |
128 | | /// # Panics |
129 | | /// |
130 | | /// This function will panic if the two slices have different lengths. |
131 | | /// |
132 | | /// # Examples |
133 | | /// ```rust |
134 | | /// # use half::prelude::*; |
135 | | /// // Initialize an empty buffer |
136 | | /// let mut buffer = [0f32; 4]; |
137 | | /// |
138 | | /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; |
139 | | /// |
140 | | /// // Now convert |
141 | | /// half_values.convert_to_f32_slice(&mut buffer); |
142 | | /// |
143 | | /// assert_eq!(buffer, [1., 2., 3., 4.]); |
144 | | /// ``` |
145 | | fn convert_to_f32_slice(&self, dst: &mut [f32]); |
146 | | |
147 | | /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in `dst`. |
148 | | /// |
149 | | /// The length of `src` must be the same as `self`. |
150 | | /// |
151 | | /// The conversion operation is vectorized over the slice, meaning the conversion may be more |
152 | | /// efficient than converting individual elements on some hardware that supports SIMD |
153 | | /// conversions. See [crate documentation](crate) for more information on hardware conversion |
154 | | /// support. |
155 | | /// |
156 | | /// # Panics |
157 | | /// |
158 | | /// This function will panic if the two slices have different lengths. |
159 | | /// |
160 | | /// # Examples |
161 | | /// ```rust |
162 | | /// # use half::prelude::*; |
163 | | /// // Initialize an empty buffer |
164 | | /// let mut buffer = [0f64; 4]; |
165 | | /// |
166 | | /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; |
167 | | /// |
168 | | /// // Now convert |
169 | | /// half_values.convert_to_f64_slice(&mut buffer); |
170 | | /// |
171 | | /// assert_eq!(buffer, [1., 2., 3., 4.]); |
172 | | /// ``` |
173 | | fn convert_to_f64_slice(&self, dst: &mut [f64]); |
174 | | |
175 | | // Because trait is sealed, we can get away with different interfaces between features. |
176 | | |
177 | | /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in a new |
178 | | /// vector |
179 | | /// |
180 | | /// The conversion operation is vectorized over the slice, meaning the conversion may be more |
181 | | /// efficient than converting individual elements on some hardware that supports SIMD |
182 | | /// conversions. See [crate documentation](crate) for more information on hardware conversion |
183 | | /// support. |
184 | | /// |
185 | | /// This method is only available with the `std` or `alloc` feature. |
186 | | /// |
187 | | /// # Examples |
188 | | /// ```rust |
189 | | /// # use half::prelude::*; |
190 | | /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; |
191 | | /// let vec = half_values.to_f32_vec(); |
192 | | /// |
193 | | /// assert_eq!(vec, vec![1., 2., 3., 4.]); |
194 | | /// ``` |
195 | | #[cfg(any(feature = "alloc", feature = "std"))] |
196 | | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] |
197 | | fn to_f32_vec(&self) -> Vec<f32>; |
198 | | |
199 | | /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in a new |
200 | | /// vector. |
201 | | /// |
202 | | /// The conversion operation is vectorized over the slice, meaning the conversion may be more |
203 | | /// efficient than converting individual elements on some hardware that supports SIMD |
204 | | /// conversions. See [crate documentation](crate) for more information on hardware conversion |
205 | | /// support. |
206 | | /// |
207 | | /// This method is only available with the `std` or `alloc` feature. |
208 | | /// |
209 | | /// # Examples |
210 | | /// ```rust |
211 | | /// # use half::prelude::*; |
212 | | /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; |
213 | | /// let vec = half_values.to_f64_vec(); |
214 | | /// |
215 | | /// assert_eq!(vec, vec![1., 2., 3., 4.]); |
216 | | /// ``` |
217 | | #[cfg(feature = "alloc")] |
218 | | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] |
219 | | fn to_f64_vec(&self) -> Vec<f64>; |
220 | | } |
221 | | |
222 | | /// Extensions to `[u16]` slices to support reinterpret operations. |
223 | | /// |
224 | | /// This trait is sealed and cannot be implemented outside of this crate. |
225 | | pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice { |
226 | | /// Reinterprets a slice of [`u16`] bits as a slice of [`f16`] or [`bf16`] numbers. |
227 | | /// |
228 | | /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. |
229 | | /// |
230 | | /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory |
231 | | /// location as `self`. |
232 | | /// |
233 | | /// # Examples |
234 | | /// |
235 | | /// ```rust |
236 | | /// # use half::prelude::*; |
237 | | /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; |
238 | | /// let float_buffer: &[f16] = int_buffer.reinterpret_cast(); |
239 | | /// |
240 | | /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); |
241 | | /// |
242 | | /// // You may have to specify the cast type directly if the compiler can't infer the type. |
243 | | /// // The following is also valid in Rust. |
244 | | /// let typed_buffer = int_buffer.reinterpret_cast::<f16>(); |
245 | | /// ``` |
246 | | fn reinterpret_cast<H>(&self) -> &[H] |
247 | | where |
248 | | H: crate::private::SealedHalf; |
249 | | |
250 | | /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`f16`] or [`bf16`] |
251 | | /// numbers. |
252 | | /// |
253 | | /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. |
254 | | /// |
255 | | /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, |
256 | | /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed. |
257 | | /// |
258 | | /// # Examples |
259 | | /// |
260 | | /// ```rust |
261 | | /// # use half::prelude::*; |
262 | | /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; |
263 | | /// |
264 | | /// { |
265 | | /// let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut(); |
266 | | /// |
267 | | /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); |
268 | | /// |
269 | | /// // Mutating the f16 slice will mutating the original |
270 | | /// float_buffer[0] = f16::from_f32(0.); |
271 | | /// } |
272 | | /// |
273 | | /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error. |
274 | | /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); |
275 | | /// |
276 | | /// // You may have to specify the cast type directly if the compiler can't infer the type. |
277 | | /// // The following is also valid in Rust. |
278 | | /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>(); |
279 | | /// ``` |
280 | | fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] |
281 | | where |
282 | | H: crate::private::SealedHalf; |
283 | | } |
284 | | |
285 | | mod private { |
286 | | use crate::{bf16, f16}; |
287 | | |
288 | | pub trait SealedHalfFloatSlice {} |
289 | | impl SealedHalfFloatSlice for [f16] {} |
290 | | impl SealedHalfFloatSlice for [bf16] {} |
291 | | |
292 | | pub trait SealedHalfBitsSlice {} |
293 | | impl SealedHalfBitsSlice for [u16] {} |
294 | | } |
295 | | |
296 | | impl HalfFloatSliceExt for [f16] { |
297 | | #[inline] |
298 | 0 | fn reinterpret_cast(&self) -> &[u16] { |
299 | 0 | let pointer = self.as_ptr() as *const u16; |
300 | 0 | let length = self.len(); |
301 | | // SAFETY: We are reconstructing full length of original slice, using its same lifetime, |
302 | | // and the size of elements are identical |
303 | 0 | unsafe { slice::from_raw_parts(pointer, length) } |
304 | 0 | } |
305 | | |
306 | | #[inline] |
307 | 0 | fn reinterpret_cast_mut(&mut self) -> &mut [u16] { |
308 | 0 | let pointer = self.as_ptr() as *mut u16; |
309 | 0 | let length = self.len(); |
310 | | // SAFETY: We are reconstructing full length of original slice, using its same lifetime, |
311 | | // and the size of elements are identical |
312 | 0 | unsafe { slice::from_raw_parts_mut(pointer, length) } |
313 | 0 | } |
314 | | |
315 | 0 | fn convert_from_f32_slice(&mut self, src: &[f32]) { |
316 | 0 | assert_eq!( |
317 | 0 | self.len(), |
318 | 0 | src.len(), |
319 | 0 | "destination and source slices have different lengths" |
320 | | ); |
321 | | |
322 | 0 | let mut chunks = src.chunks_exact(4); |
323 | 0 | let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder |
324 | 0 | for chunk in &mut chunks { |
325 | 0 | let vec = convert::f32x4_to_f16x4(chunk); |
326 | 0 | let dst_idx = chunk_count * 4; |
327 | 0 | self[dst_idx..dst_idx + 4].copy_from_slice(vec.reinterpret_cast()); |
328 | 0 | chunk_count += 1; |
329 | 0 | } |
330 | | |
331 | | // Process remainder |
332 | 0 | if !chunks.remainder().is_empty() { |
333 | 0 | let mut buf = [0f32; 4]; |
334 | 0 | buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder()); |
335 | 0 | let vec = convert::f32x4_to_f16x4(&buf); |
336 | 0 | let dst_idx = chunk_count * 4; |
337 | 0 | self[dst_idx..dst_idx + chunks.remainder().len()] |
338 | 0 | .copy_from_slice(vec[..chunks.remainder().len()].reinterpret_cast()); |
339 | 0 | } |
340 | 0 | } |
341 | | |
342 | 0 | fn convert_from_f64_slice(&mut self, src: &[f64]) { |
343 | 0 | assert_eq!( |
344 | 0 | self.len(), |
345 | 0 | src.len(), |
346 | 0 | "destination and source slices have different lengths" |
347 | | ); |
348 | | |
349 | 0 | let mut chunks = src.chunks_exact(4); |
350 | 0 | let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder |
351 | 0 | for chunk in &mut chunks { |
352 | 0 | let vec = convert::f64x4_to_f16x4(chunk); |
353 | 0 | let dst_idx = chunk_count * 4; |
354 | 0 | self[dst_idx..dst_idx + 4].copy_from_slice(vec.reinterpret_cast()); |
355 | 0 | chunk_count += 1; |
356 | 0 | } |
357 | | |
358 | | // Process remainder |
359 | 0 | if !chunks.remainder().is_empty() { |
360 | 0 | let mut buf = [0f64; 4]; |
361 | 0 | buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder()); |
362 | 0 | let vec = convert::f64x4_to_f16x4(&buf); |
363 | 0 | let dst_idx = chunk_count * 4; |
364 | 0 | self[dst_idx..dst_idx + chunks.remainder().len()] |
365 | 0 | .copy_from_slice(vec[..chunks.remainder().len()].reinterpret_cast()); |
366 | 0 | } |
367 | 0 | } |
368 | | |
369 | 0 | fn convert_to_f32_slice(&self, dst: &mut [f32]) { |
370 | 0 | assert_eq!( |
371 | 0 | self.len(), |
372 | 0 | dst.len(), |
373 | 0 | "destination and source slices have different lengths" |
374 | | ); |
375 | | |
376 | 0 | let mut chunks = self.chunks_exact(4); |
377 | 0 | let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder |
378 | 0 | for chunk in &mut chunks { |
379 | 0 | let vec = convert::f16x4_to_f32x4(chunk.reinterpret_cast()); |
380 | 0 | let dst_idx = chunk_count * 4; |
381 | 0 | dst[dst_idx..dst_idx + 4].copy_from_slice(&vec); |
382 | 0 | chunk_count += 1; |
383 | 0 | } |
384 | | |
385 | | // Process remainder |
386 | 0 | if !chunks.remainder().is_empty() { |
387 | 0 | let mut buf = [0u16; 4]; |
388 | 0 | buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder().reinterpret_cast()); |
389 | 0 | let vec = convert::f16x4_to_f32x4(&buf); |
390 | 0 | let dst_idx = chunk_count * 4; |
391 | 0 | dst[dst_idx..dst_idx + chunks.remainder().len()] |
392 | 0 | .copy_from_slice(&vec[..chunks.remainder().len()]); |
393 | 0 | } |
394 | 0 | } |
395 | | |
396 | 0 | fn convert_to_f64_slice(&self, dst: &mut [f64]) { |
397 | 0 | assert_eq!( |
398 | 0 | self.len(), |
399 | 0 | dst.len(), |
400 | 0 | "destination and source slices have different lengths" |
401 | | ); |
402 | | |
403 | 0 | let mut chunks = self.chunks_exact(4); |
404 | 0 | let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder |
405 | 0 | for chunk in &mut chunks { |
406 | 0 | let vec = convert::f16x4_to_f64x4(chunk.reinterpret_cast()); |
407 | 0 | let dst_idx = chunk_count * 4; |
408 | 0 | dst[dst_idx..dst_idx + 4].copy_from_slice(&vec); |
409 | 0 | chunk_count += 1; |
410 | 0 | } |
411 | | |
412 | | // Process remainder |
413 | 0 | if !chunks.remainder().is_empty() { |
414 | 0 | let mut buf = [0u16; 4]; |
415 | 0 | buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder().reinterpret_cast()); |
416 | 0 | let vec = convert::f16x4_to_f64x4(&buf); |
417 | 0 | let dst_idx = chunk_count * 4; |
418 | 0 | dst[dst_idx..dst_idx + chunks.remainder().len()] |
419 | 0 | .copy_from_slice(&vec[..chunks.remainder().len()]); |
420 | 0 | } |
421 | 0 | } |
422 | | |
423 | | #[cfg(any(feature = "alloc", feature = "std"))] |
424 | | #[inline] |
425 | | fn to_f32_vec(&self) -> Vec<f32> { |
426 | | let mut vec = vec![0f32; self.len()]; |
427 | | self.convert_to_f32_slice(&mut vec); |
428 | | vec |
429 | | } |
430 | | |
431 | | #[cfg(any(feature = "alloc", feature = "std"))] |
432 | | #[inline] |
433 | | fn to_f64_vec(&self) -> Vec<f64> { |
434 | | let mut vec = vec![0f64; self.len()]; |
435 | | self.convert_to_f64_slice(&mut vec); |
436 | | vec |
437 | | } |
438 | | } |
439 | | |
440 | | impl HalfFloatSliceExt for [bf16] { |
441 | | #[inline] |
442 | 0 | fn reinterpret_cast(&self) -> &[u16] { |
443 | 0 | let pointer = self.as_ptr() as *const u16; |
444 | 0 | let length = self.len(); |
445 | | // SAFETY: We are reconstructing full length of original slice, using its same lifetime, |
446 | | // and the size of elements are identical |
447 | 0 | unsafe { slice::from_raw_parts(pointer, length) } |
448 | 0 | } |
449 | | |
450 | | #[inline] |
451 | 0 | fn reinterpret_cast_mut(&mut self) -> &mut [u16] { |
452 | 0 | let pointer = self.as_ptr() as *mut u16; |
453 | 0 | let length = self.len(); |
454 | | // SAFETY: We are reconstructing full length of original slice, using its same lifetime, |
455 | | // and the size of elements are identical |
456 | 0 | unsafe { slice::from_raw_parts_mut(pointer, length) } |
457 | 0 | } |
458 | | |
459 | 0 | fn convert_from_f32_slice(&mut self, src: &[f32]) { |
460 | 0 | assert_eq!( |
461 | 0 | self.len(), |
462 | 0 | src.len(), |
463 | 0 | "destination and source slices have different lengths" |
464 | | ); |
465 | | |
466 | | // Just use regular loop here until there's any bf16 SIMD support. |
467 | 0 | for (i, f) in src.iter().enumerate() { |
468 | 0 | self[i] = bf16::from_f32(*f); |
469 | 0 | } |
470 | 0 | } |
471 | | |
472 | 0 | fn convert_from_f64_slice(&mut self, src: &[f64]) { |
473 | 0 | assert_eq!( |
474 | 0 | self.len(), |
475 | 0 | src.len(), |
476 | 0 | "destination and source slices have different lengths" |
477 | | ); |
478 | | |
479 | | // Just use regular loop here until there's any bf16 SIMD support. |
480 | 0 | for (i, f) in src.iter().enumerate() { |
481 | 0 | self[i] = bf16::from_f64(*f); |
482 | 0 | } |
483 | 0 | } |
484 | | |
485 | 0 | fn convert_to_f32_slice(&self, dst: &mut [f32]) { |
486 | 0 | assert_eq!( |
487 | 0 | self.len(), |
488 | 0 | dst.len(), |
489 | 0 | "destination and source slices have different lengths" |
490 | | ); |
491 | | |
492 | | // Just use regular loop here until there's any bf16 SIMD support. |
493 | 0 | for (i, f) in self.iter().enumerate() { |
494 | 0 | dst[i] = f.to_f32(); |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | 0 | fn convert_to_f64_slice(&self, dst: &mut [f64]) { |
499 | 0 | assert_eq!( |
500 | 0 | self.len(), |
501 | 0 | dst.len(), |
502 | 0 | "destination and source slices have different lengths" |
503 | | ); |
504 | | |
505 | | // Just use regular loop here until there's any bf16 SIMD support. |
506 | 0 | for (i, f) in self.iter().enumerate() { |
507 | 0 | dst[i] = f.to_f64(); |
508 | 0 | } |
509 | 0 | } |
510 | | |
511 | | #[cfg(any(feature = "alloc", feature = "std"))] |
512 | | #[inline] |
513 | | fn to_f32_vec(&self) -> Vec<f32> { |
514 | | let mut vec = vec![0f32; self.len()]; |
515 | | self.convert_to_f32_slice(&mut vec); |
516 | | vec |
517 | | } |
518 | | |
519 | | #[cfg(any(feature = "alloc", feature = "std"))] |
520 | | #[inline] |
521 | | fn to_f64_vec(&self) -> Vec<f64> { |
522 | | let mut vec = vec![0f64; self.len()]; |
523 | | self.convert_to_f64_slice(&mut vec); |
524 | | vec |
525 | | } |
526 | | } |
527 | | |
528 | | impl HalfBitsSliceExt for [u16] { |
529 | | // Since we sealed all the traits involved, these are safe. |
530 | | #[inline] |
531 | 0 | fn reinterpret_cast<H>(&self) -> &[H] |
532 | 0 | where |
533 | 0 | H: crate::private::SealedHalf, |
534 | | { |
535 | 0 | let pointer = self.as_ptr() as *const H; |
536 | 0 | let length = self.len(); |
537 | | // SAFETY: We are reconstructing full length of original slice, using its same lifetime, |
538 | | // and the size of elements are identical |
539 | 0 | unsafe { slice::from_raw_parts(pointer, length) } |
540 | 0 | } |
541 | | |
542 | | #[inline] |
543 | 0 | fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] |
544 | 0 | where |
545 | 0 | H: crate::private::SealedHalf, |
546 | | { |
547 | 0 | let pointer = self.as_mut_ptr() as *mut H; |
548 | 0 | let length = self.len(); |
549 | | // SAFETY: We are reconstructing full length of original slice, using its same lifetime, |
550 | | // and the size of elements are identical |
551 | 0 | unsafe { slice::from_raw_parts_mut(pointer, length) } |
552 | 0 | } |
553 | | } |
554 | | |
555 | | #[doc(hidden)] |
556 | | #[deprecated( |
557 | | since = "1.4.0", |
558 | | note = "use `HalfBitsSliceExt::reinterpret_cast_mut` instead" |
559 | | )] |
560 | | #[inline] |
561 | 0 | pub fn from_bits_mut(bits: &mut [u16]) -> &mut [f16] { |
562 | 0 | bits.reinterpret_cast_mut() |
563 | 0 | } |
564 | | |
565 | | #[doc(hidden)] |
566 | | #[deprecated( |
567 | | since = "1.4.0", |
568 | | note = "use `HalfFloatSliceExt::reinterpret_cast_mut` instead" |
569 | | )] |
570 | | #[inline] |
571 | 0 | pub fn to_bits_mut(bits: &mut [f16]) -> &mut [u16] { |
572 | 0 | bits.reinterpret_cast_mut() |
573 | 0 | } |
574 | | |
575 | | #[doc(hidden)] |
576 | | #[deprecated( |
577 | | since = "1.4.0", |
578 | | note = "use `HalfBitsSliceExt::reinterpret_cast` instead" |
579 | | )] |
580 | | #[inline] |
581 | 0 | pub fn from_bits(bits: &[u16]) -> &[f16] { |
582 | 0 | bits.reinterpret_cast() |
583 | 0 | } |
584 | | |
585 | | #[doc(hidden)] |
586 | | #[deprecated( |
587 | | since = "1.4.0", |
588 | | note = "use `HalfFloatSliceExt::reinterpret_cast` instead" |
589 | | )] |
590 | | #[inline] |
591 | 0 | pub fn to_bits(bits: &[f16]) -> &[u16] { |
592 | 0 | bits.reinterpret_cast() |
593 | 0 | } |
594 | | |
595 | | #[allow(clippy::float_cmp)] |
596 | | #[cfg(test)] |
597 | | mod test { |
598 | | use super::{HalfBitsSliceExt, HalfFloatSliceExt}; |
599 | | use crate::{bf16, f16}; |
600 | | |
601 | | #[test] |
602 | | fn test_slice_conversions_f16() { |
603 | | let bits = &[ |
604 | | f16::E.to_bits(), |
605 | | f16::PI.to_bits(), |
606 | | f16::EPSILON.to_bits(), |
607 | | f16::FRAC_1_SQRT_2.to_bits(), |
608 | | ]; |
609 | | let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; |
610 | | |
611 | | // Convert from bits to numbers |
612 | | let from_bits = bits.reinterpret_cast::<f16>(); |
613 | | assert_eq!(from_bits, numbers); |
614 | | |
615 | | // Convert from numbers back to bits |
616 | | let to_bits = from_bits.reinterpret_cast(); |
617 | | assert_eq!(to_bits, bits); |
618 | | } |
619 | | |
620 | | #[test] |
621 | | fn test_mutablility_f16() { |
622 | | let mut bits_array = [f16::PI.to_bits()]; |
623 | | let bits = &mut bits_array[..]; |
624 | | |
625 | | { |
626 | | // would not compile without these braces |
627 | | let numbers = bits.reinterpret_cast_mut(); |
628 | | numbers[0] = f16::E; |
629 | | } |
630 | | |
631 | | assert_eq!(bits, &[f16::E.to_bits()]); |
632 | | |
633 | | bits[0] = f16::LN_2.to_bits(); |
634 | | assert_eq!(bits, &[f16::LN_2.to_bits()]); |
635 | | } |
636 | | |
637 | | #[test] |
638 | | fn test_slice_conversions_bf16() { |
639 | | let bits = &[ |
640 | | bf16::E.to_bits(), |
641 | | bf16::PI.to_bits(), |
642 | | bf16::EPSILON.to_bits(), |
643 | | bf16::FRAC_1_SQRT_2.to_bits(), |
644 | | ]; |
645 | | let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; |
646 | | |
647 | | // Convert from bits to numbers |
648 | | let from_bits = bits.reinterpret_cast::<bf16>(); |
649 | | assert_eq!(from_bits, numbers); |
650 | | |
651 | | // Convert from numbers back to bits |
652 | | let to_bits = from_bits.reinterpret_cast(); |
653 | | assert_eq!(to_bits, bits); |
654 | | } |
655 | | |
656 | | #[test] |
657 | | fn test_mutablility_bf16() { |
658 | | let mut bits_array = [bf16::PI.to_bits()]; |
659 | | let bits = &mut bits_array[..]; |
660 | | |
661 | | { |
662 | | // would not compile without these braces |
663 | | let numbers = bits.reinterpret_cast_mut(); |
664 | | numbers[0] = bf16::E; |
665 | | } |
666 | | |
667 | | assert_eq!(bits, &[bf16::E.to_bits()]); |
668 | | |
669 | | bits[0] = bf16::LN_2.to_bits(); |
670 | | assert_eq!(bits, &[bf16::LN_2.to_bits()]); |
671 | | } |
672 | | |
673 | | #[test] |
674 | | fn slice_convert_f16_f32() { |
675 | | // Exact chunks |
676 | | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; |
677 | | let vf16 = [ |
678 | | f16::from_f32(1.), |
679 | | f16::from_f32(2.), |
680 | | f16::from_f32(3.), |
681 | | f16::from_f32(4.), |
682 | | f16::from_f32(5.), |
683 | | f16::from_f32(6.), |
684 | | f16::from_f32(7.), |
685 | | f16::from_f32(8.), |
686 | | ]; |
687 | | let mut buf32 = vf32; |
688 | | let mut buf16 = vf16; |
689 | | |
690 | | vf16.convert_to_f32_slice(&mut buf32); |
691 | | assert_eq!(&vf32, &buf32); |
692 | | |
693 | | buf16.convert_from_f32_slice(&vf32); |
694 | | assert_eq!(&vf16, &buf16); |
695 | | |
696 | | // Partial with chunks |
697 | | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; |
698 | | let vf16 = [ |
699 | | f16::from_f32(1.), |
700 | | f16::from_f32(2.), |
701 | | f16::from_f32(3.), |
702 | | f16::from_f32(4.), |
703 | | f16::from_f32(5.), |
704 | | f16::from_f32(6.), |
705 | | f16::from_f32(7.), |
706 | | f16::from_f32(8.), |
707 | | f16::from_f32(9.), |
708 | | ]; |
709 | | let mut buf32 = vf32; |
710 | | let mut buf16 = vf16; |
711 | | |
712 | | vf16.convert_to_f32_slice(&mut buf32); |
713 | | assert_eq!(&vf32, &buf32); |
714 | | |
715 | | buf16.convert_from_f32_slice(&vf32); |
716 | | assert_eq!(&vf16, &buf16); |
717 | | |
718 | | // Partial with chunks |
719 | | let vf32 = [1., 2.]; |
720 | | let vf16 = [f16::from_f32(1.), f16::from_f32(2.)]; |
721 | | let mut buf32 = vf32; |
722 | | let mut buf16 = vf16; |
723 | | |
724 | | vf16.convert_to_f32_slice(&mut buf32); |
725 | | assert_eq!(&vf32, &buf32); |
726 | | |
727 | | buf16.convert_from_f32_slice(&vf32); |
728 | | assert_eq!(&vf16, &buf16); |
729 | | } |
730 | | |
731 | | #[test] |
732 | | fn slice_convert_bf16_f32() { |
733 | | // Exact chunks |
734 | | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; |
735 | | let vf16 = [ |
736 | | bf16::from_f32(1.), |
737 | | bf16::from_f32(2.), |
738 | | bf16::from_f32(3.), |
739 | | bf16::from_f32(4.), |
740 | | bf16::from_f32(5.), |
741 | | bf16::from_f32(6.), |
742 | | bf16::from_f32(7.), |
743 | | bf16::from_f32(8.), |
744 | | ]; |
745 | | let mut buf32 = vf32; |
746 | | let mut buf16 = vf16; |
747 | | |
748 | | vf16.convert_to_f32_slice(&mut buf32); |
749 | | assert_eq!(&vf32, &buf32); |
750 | | |
751 | | buf16.convert_from_f32_slice(&vf32); |
752 | | assert_eq!(&vf16, &buf16); |
753 | | |
754 | | // Partial with chunks |
755 | | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; |
756 | | let vf16 = [ |
757 | | bf16::from_f32(1.), |
758 | | bf16::from_f32(2.), |
759 | | bf16::from_f32(3.), |
760 | | bf16::from_f32(4.), |
761 | | bf16::from_f32(5.), |
762 | | bf16::from_f32(6.), |
763 | | bf16::from_f32(7.), |
764 | | bf16::from_f32(8.), |
765 | | bf16::from_f32(9.), |
766 | | ]; |
767 | | let mut buf32 = vf32; |
768 | | let mut buf16 = vf16; |
769 | | |
770 | | vf16.convert_to_f32_slice(&mut buf32); |
771 | | assert_eq!(&vf32, &buf32); |
772 | | |
773 | | buf16.convert_from_f32_slice(&vf32); |
774 | | assert_eq!(&vf16, &buf16); |
775 | | |
776 | | // Partial with chunks |
777 | | let vf32 = [1., 2.]; |
778 | | let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)]; |
779 | | let mut buf32 = vf32; |
780 | | let mut buf16 = vf16; |
781 | | |
782 | | vf16.convert_to_f32_slice(&mut buf32); |
783 | | assert_eq!(&vf32, &buf32); |
784 | | |
785 | | buf16.convert_from_f32_slice(&vf32); |
786 | | assert_eq!(&vf16, &buf16); |
787 | | } |
788 | | |
789 | | #[test] |
790 | | fn slice_convert_f16_f64() { |
791 | | // Exact chunks |
792 | | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; |
793 | | let vf16 = [ |
794 | | f16::from_f64(1.), |
795 | | f16::from_f64(2.), |
796 | | f16::from_f64(3.), |
797 | | f16::from_f64(4.), |
798 | | f16::from_f64(5.), |
799 | | f16::from_f64(6.), |
800 | | f16::from_f64(7.), |
801 | | f16::from_f64(8.), |
802 | | ]; |
803 | | let mut buf64 = vf64; |
804 | | let mut buf16 = vf16; |
805 | | |
806 | | vf16.convert_to_f64_slice(&mut buf64); |
807 | | assert_eq!(&vf64, &buf64); |
808 | | |
809 | | buf16.convert_from_f64_slice(&vf64); |
810 | | assert_eq!(&vf16, &buf16); |
811 | | |
812 | | // Partial with chunks |
813 | | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; |
814 | | let vf16 = [ |
815 | | f16::from_f64(1.), |
816 | | f16::from_f64(2.), |
817 | | f16::from_f64(3.), |
818 | | f16::from_f64(4.), |
819 | | f16::from_f64(5.), |
820 | | f16::from_f64(6.), |
821 | | f16::from_f64(7.), |
822 | | f16::from_f64(8.), |
823 | | f16::from_f64(9.), |
824 | | ]; |
825 | | let mut buf64 = vf64; |
826 | | let mut buf16 = vf16; |
827 | | |
828 | | vf16.convert_to_f64_slice(&mut buf64); |
829 | | assert_eq!(&vf64, &buf64); |
830 | | |
831 | | buf16.convert_from_f64_slice(&vf64); |
832 | | assert_eq!(&vf16, &buf16); |
833 | | |
834 | | // Partial with chunks |
835 | | let vf64 = [1., 2.]; |
836 | | let vf16 = [f16::from_f64(1.), f16::from_f64(2.)]; |
837 | | let mut buf64 = vf64; |
838 | | let mut buf16 = vf16; |
839 | | |
840 | | vf16.convert_to_f64_slice(&mut buf64); |
841 | | assert_eq!(&vf64, &buf64); |
842 | | |
843 | | buf16.convert_from_f64_slice(&vf64); |
844 | | assert_eq!(&vf16, &buf16); |
845 | | } |
846 | | |
847 | | #[test] |
848 | | fn slice_convert_bf16_f64() { |
849 | | // Exact chunks |
850 | | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; |
851 | | let vf16 = [ |
852 | | bf16::from_f64(1.), |
853 | | bf16::from_f64(2.), |
854 | | bf16::from_f64(3.), |
855 | | bf16::from_f64(4.), |
856 | | bf16::from_f64(5.), |
857 | | bf16::from_f64(6.), |
858 | | bf16::from_f64(7.), |
859 | | bf16::from_f64(8.), |
860 | | ]; |
861 | | let mut buf64 = vf64; |
862 | | let mut buf16 = vf16; |
863 | | |
864 | | vf16.convert_to_f64_slice(&mut buf64); |
865 | | assert_eq!(&vf64, &buf64); |
866 | | |
867 | | buf16.convert_from_f64_slice(&vf64); |
868 | | assert_eq!(&vf16, &buf16); |
869 | | |
870 | | // Partial with chunks |
871 | | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; |
872 | | let vf16 = [ |
873 | | bf16::from_f64(1.), |
874 | | bf16::from_f64(2.), |
875 | | bf16::from_f64(3.), |
876 | | bf16::from_f64(4.), |
877 | | bf16::from_f64(5.), |
878 | | bf16::from_f64(6.), |
879 | | bf16::from_f64(7.), |
880 | | bf16::from_f64(8.), |
881 | | bf16::from_f64(9.), |
882 | | ]; |
883 | | let mut buf64 = vf64; |
884 | | let mut buf16 = vf16; |
885 | | |
886 | | vf16.convert_to_f64_slice(&mut buf64); |
887 | | assert_eq!(&vf64, &buf64); |
888 | | |
889 | | buf16.convert_from_f64_slice(&vf64); |
890 | | assert_eq!(&vf16, &buf16); |
891 | | |
892 | | // Partial with chunks |
893 | | let vf64 = [1., 2.]; |
894 | | let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)]; |
895 | | let mut buf64 = vf64; |
896 | | let mut buf16 = vf16; |
897 | | |
898 | | vf16.convert_to_f64_slice(&mut buf64); |
899 | | assert_eq!(&vf64, &buf64); |
900 | | |
901 | | buf16.convert_from_f64_slice(&vf64); |
902 | | assert_eq!(&vf16, &buf16); |
903 | | } |
904 | | |
905 | | #[test] |
906 | | #[should_panic] |
907 | | fn convert_from_f32_slice_len_mismatch_panics() { |
908 | | let mut slice1 = [f16::ZERO; 3]; |
909 | | let slice2 = [0f32; 4]; |
910 | | slice1.convert_from_f32_slice(&slice2); |
911 | | } |
912 | | |
913 | | #[test] |
914 | | #[should_panic] |
915 | | fn convert_from_f64_slice_len_mismatch_panics() { |
916 | | let mut slice1 = [f16::ZERO; 3]; |
917 | | let slice2 = [0f64; 4]; |
918 | | slice1.convert_from_f64_slice(&slice2); |
919 | | } |
920 | | |
921 | | #[test] |
922 | | #[should_panic] |
923 | | fn convert_to_f32_slice_len_mismatch_panics() { |
924 | | let slice1 = [f16::ZERO; 3]; |
925 | | let mut slice2 = [0f32; 4]; |
926 | | slice1.convert_to_f32_slice(&mut slice2); |
927 | | } |
928 | | |
929 | | #[test] |
930 | | #[should_panic] |
931 | | fn convert_to_f64_slice_len_mismatch_panics() { |
932 | | let slice1 = [f16::ZERO; 3]; |
933 | | let mut slice2 = [0f64; 4]; |
934 | | slice1.convert_to_f64_slice(&mut slice2); |
935 | | } |
936 | | } |