/rust/registry/src/index.crates.io-6f17d22bba15001f/lz4_flex-0.11.1/src/fastcpy.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! # FastCpy |
2 | | //! |
3 | | //! The Rust Compiler calls `memcpy` for slices of unknown length. |
4 | | //! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). |
5 | | //! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. |
6 | | //! |
7 | | //! `fastcpy` is designed to contain not too much assembly, so the overhead is low. |
8 | | //! |
9 | | //! As fall back the standard `memcpy` is called |
10 | | //! |
11 | | //! ## Double Copy Trick |
12 | | //! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). |
13 | | //! E.g. Slice of length 6 can be copied with two uncoditional copy operations. |
14 | | //! |
15 | | //! /// [1, 2, 3, 4, 5, 6] |
16 | | //! /// [1, 2, 3, 4] |
17 | | //! /// [3, 4, 5, 6] |
18 | | //! |
19 | | |
20 | | #[inline] |
21 | 0 | pub fn slice_copy(src: &[u8], dst: &mut [u8]) { |
22 | | #[inline(never)] |
23 | | #[cold] |
24 | | #[track_caller] |
25 | 0 | fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { |
26 | 0 | panic!( |
27 | 0 | "source slice length ({}) does not match destination slice length ({})", |
28 | 0 | src_len, dst_len, |
29 | 0 | ); |
30 | | } |
31 | | |
32 | 0 | if src.len() != dst.len() { |
33 | 0 | len_mismatch_fail(src.len(), dst.len()); |
34 | 0 | } |
35 | 0 | let len = src.len(); |
36 | 0 |
|
37 | 0 | if src.is_empty() { |
38 | 0 | return; |
39 | 0 | } |
40 | 0 |
|
41 | 0 | if len < 4 { |
42 | 0 | short_copy(src, dst); |
43 | 0 | return; |
44 | 0 | } |
45 | 0 |
|
46 | 0 | if len < 8 { |
47 | 0 | double_copy_trick::<4>(src, dst); |
48 | 0 | return; |
49 | 0 | } |
50 | 0 |
|
51 | 0 | if len <= 16 { |
52 | 0 | double_copy_trick::<8>(src, dst); |
53 | 0 | return; |
54 | 0 | } |
55 | 0 |
|
56 | 0 | if len <= 32 { |
57 | 0 | double_copy_trick::<16>(src, dst); |
58 | 0 | return; |
59 | 0 | } |
60 | 0 |
|
61 | 0 | /// The code will use the vmovdqu instruction to copy 32 bytes at a time. |
62 | 0 | #[cfg(target_feature = "avx")] |
63 | 0 | { |
64 | 0 | if len <= 64 { |
65 | 0 | double_copy_trick::<32>(src, dst); |
66 | 0 | return; |
67 | 0 | } |
68 | 0 | } |
69 | 0 |
|
70 | 0 | // For larger sizes we use the default, which calls memcpy |
71 | 0 | // memcpy does some virtual memory tricks to copy large chunks of memory. |
72 | 0 | // |
73 | 0 | // The theory should be that the checks above don't cost much relative to the copy call for |
74 | 0 | // larger copies. |
75 | 0 | // The bounds checks in `copy_from_slice` are elided. |
76 | 0 | dst.copy_from_slice(src); |
77 | 0 | } |
78 | | |
79 | | #[inline(always)] |
80 | 0 | fn short_copy(src: &[u8], dst: &mut [u8]) { |
81 | 0 | let len = src.len(); |
82 | 0 |
|
83 | 0 | // length 1-3 |
84 | 0 | dst[0] = src[0]; |
85 | 0 | if len >= 2 { |
86 | 0 | double_copy_trick::<2>(src, dst); |
87 | 0 | } |
88 | 0 | } |
89 | | |
90 | | #[inline(always)] |
91 | | /// [1, 2, 3, 4, 5, 6] |
92 | | /// [1, 2, 3, 4] |
93 | | /// [3, 4, 5, 6] |
94 | 0 | fn double_copy_trick<const SIZE: usize>(src: &[u8], dst: &mut [u8]) { |
95 | 0 | dst[0..SIZE].copy_from_slice(&src[0..SIZE]); |
96 | 0 | dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]); |
97 | 0 | } |
98 | | |
99 | | #[cfg(test)] |
100 | | mod tests { |
101 | | use super::slice_copy; |
102 | | use alloc::vec::Vec; |
103 | | use proptest::prelude::*; |
104 | | proptest! { |
105 | | #[test] |
106 | | fn test_fast_short_slice_copy(left: Vec<u8>) { |
107 | | let mut right = vec![0u8; left.len()]; |
108 | | slice_copy(&left, &mut right); |
109 | | prop_assert_eq!(&left, &right); |
110 | | } |
111 | | } |
112 | | |
113 | | #[test] |
114 | | fn test_fast_short_slice_copy_edge_cases() { |
115 | | for len in 0..(512 * 2) { |
116 | | let left = (0..len).map(|i| i as u8).collect::<Vec<_>>(); |
117 | | let mut right = vec![0u8; len]; |
118 | | slice_copy(&left, &mut right); |
119 | | assert_eq!(left, right); |
120 | | } |
121 | | } |
122 | | |
123 | | #[test] |
124 | | fn test_fail2() { |
125 | | let left = vec![ |
126 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
127 | | 24, 25, 26, 27, 28, 29, 30, 31, 32, |
128 | | ]; |
129 | | let mut right = vec![0u8; left.len()]; |
130 | | slice_copy(&left, &mut right); |
131 | | assert_eq!(left, right); |
132 | | } |
133 | | |
134 | | #[test] |
135 | | fn test_fail() { |
136 | | let left = vec![ |
137 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
138 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
139 | | 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
140 | | ]; |
141 | | let mut right = vec![0u8; left.len()]; |
142 | | slice_copy(&left, &mut right); |
143 | | assert_eq!(left, right); |
144 | | } |
145 | | } |