/rust/registry/src/index.crates.io-6f17d22bba15001f/httparse-1.10.1/src/simd/swar.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /// SWAR: SIMD Within A Register |
2 | | /// SIMD validator backend that validates register-sized chunks of data at a time. |
3 | | use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes}; |
4 | | |
5 | | // Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8 |
6 | | const BLOCK_SIZE: usize = core::mem::size_of::<usize>(); |
7 | | type ByteBlock = [u8; BLOCK_SIZE]; |
8 | | |
9 | | #[inline] |
10 | 0 | pub fn match_uri_vectored(bytes: &mut Bytes) { |
11 | | loop { |
12 | 0 | if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { |
13 | 0 | let n = match_uri_char_8_swar(bytes8); |
14 | 0 | // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes |
15 | 0 | // in `bytes`, so calling `advance(n)` is safe. |
16 | 0 | unsafe { |
17 | 0 | bytes.advance(n); |
18 | 0 | } |
19 | 0 | if n == BLOCK_SIZE { |
20 | 0 | continue; |
21 | 0 | } |
22 | 0 | } |
23 | 0 | if let Some(b) = bytes.peek() { |
24 | 0 | if is_uri_token(b) { |
25 | | // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte |
26 | | // in bytes, so calling advance is safe. |
27 | 0 | unsafe { |
28 | 0 | bytes.advance(1); |
29 | 0 | } |
30 | 0 | continue; |
31 | 0 | } |
32 | 0 | } |
33 | 0 | break; |
34 | 0 | } |
35 | 0 | } |
36 | | |
37 | | #[inline] |
38 | 0 | pub fn match_header_value_vectored(bytes: &mut Bytes) { |
39 | | loop { |
40 | 0 | if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { |
41 | 0 | let n = match_header_value_char_8_swar(bytes8); |
42 | 0 | // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes |
43 | 0 | // in `bytes`, so calling `advance(n)` is safe. |
44 | 0 | unsafe { |
45 | 0 | bytes.advance(n); |
46 | 0 | } |
47 | 0 | if n == BLOCK_SIZE { |
48 | 0 | continue; |
49 | 0 | } |
50 | 0 | } |
51 | 0 | if let Some(b) = bytes.peek() { |
52 | 0 | if is_header_value_token(b) { |
53 | | // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte |
54 | | // in bytes, so calling advance is safe. |
55 | 0 | unsafe { |
56 | 0 | bytes.advance(1); |
57 | 0 | } |
58 | 0 | continue; |
59 | 0 | } |
60 | 0 | } |
61 | 0 | break; |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | #[inline] |
66 | 0 | pub fn match_header_name_vectored(bytes: &mut Bytes) { |
67 | 0 | while let Some(block) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { |
68 | 0 | let n = match_block(is_header_name_token, block); |
69 | 0 | // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes |
70 | 0 | // in `bytes`, so calling `advance(n)` is safe. |
71 | 0 | unsafe { |
72 | 0 | bytes.advance(n); |
73 | 0 | } |
74 | 0 | if n != BLOCK_SIZE { |
75 | 0 | return; |
76 | 0 | } |
77 | | } |
78 | | // SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the |
79 | | // end, but no further. |
80 | 0 | unsafe { bytes.advance(match_tail(is_header_name_token, bytes.as_ref())) }; |
81 | 0 | } |
82 | | |
83 | | // Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon |
84 | | #[cold] |
85 | | #[inline] |
86 | 0 | fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize { |
87 | 0 | for (i, &b) in bytes.iter().enumerate() { |
88 | 0 | if !f(b) { |
89 | 0 | return i; |
90 | 0 | } |
91 | | } |
92 | 0 | bytes.len() |
93 | 0 | } |
94 | | |
95 | | // Naive fallback block matcher |
96 | | #[inline(always)] |
97 | 0 | fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize { |
98 | 0 | for (i, &b) in block.iter().enumerate() { |
99 | 0 | if !f(b) { |
100 | 0 | return i; |
101 | 0 | } |
102 | | } |
103 | 0 | BLOCK_SIZE |
104 | 0 | } |
105 | | |
106 | | // A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44) |
107 | | // creates a u64 whose bytes are each equal to b |
108 | 0 | const fn uniform_block(b: u8) -> usize { |
109 | 0 | (b as u64 * 0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */) as usize |
110 | 0 | } |
111 | | |
112 | | // A byte-wise range-check on an entire word/block, |
113 | | // ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255` |
114 | | #[inline] |
115 | 0 | fn match_uri_char_8_swar(block: ByteBlock) -> usize { |
116 | | // 33 <= (x != 127) <= 255 |
117 | | const M: u8 = 0x21; |
118 | | // uniform block full of exclamation mark (!) (33). |
119 | | const BM: usize = uniform_block(M); |
120 | | // uniform block full of 1. |
121 | | const ONE: usize = uniform_block(0x01); |
122 | | // uniform block full of DEL (127). |
123 | | const DEL: usize = uniform_block(0x7f); |
124 | | // uniform block full of 128. |
125 | | const M128: usize = uniform_block(128); |
126 | | |
127 | 0 | let x = usize::from_ne_bytes(block); // Really just a transmute |
128 | 0 | let lt = x.wrapping_sub(BM) & !x; // <= m |
129 | 0 |
|
130 | 0 | let xor_del = x ^ DEL; |
131 | 0 | let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL |
132 | 0 |
|
133 | 0 | offsetnz((lt | eq_del) & M128) |
134 | 0 | } |
135 | | |
136 | | // A byte-wise range-check on an entire word/block, |
137 | | // ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255` |
138 | | #[inline] |
139 | 0 | fn match_header_value_char_8_swar(block: ByteBlock) -> usize { |
140 | | // 32 <= (x != 127) <= 255 |
141 | | const M: u8 = 0x20; |
142 | | // uniform block full of exclamation mark (!) (33). |
143 | | const BM: usize = uniform_block(M); |
144 | | // uniform block full of 1. |
145 | | const ONE: usize = uniform_block(0x01); |
146 | | // uniform block full of DEL (127). |
147 | | const DEL: usize = uniform_block(0x7f); |
148 | | // uniform block full of 128. |
149 | | const M128: usize = uniform_block(128); |
150 | | |
151 | 0 | let x = usize::from_ne_bytes(block); // Really just a transmute |
152 | 0 | let lt = x.wrapping_sub(BM) & !x; // <= m |
153 | 0 |
|
154 | 0 | let xor_del = x ^ DEL; |
155 | 0 | let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL |
156 | 0 |
|
157 | 0 | offsetnz((lt | eq_del) & M128) |
158 | 0 | } |
159 | | |
160 | | /// Check block to find offset of first non-zero byte |
161 | | // NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit |
162 | | #[inline] |
163 | 0 | fn offsetnz(block: usize) -> usize { |
164 | 0 | // fast path optimistic case (common for long valid sequences) |
165 | 0 | if block == 0 { |
166 | 0 | return BLOCK_SIZE; |
167 | 0 | } |
168 | | |
169 | | // perf: rust will unroll this loop |
170 | 0 | for (i, b) in block.to_ne_bytes().iter().copied().enumerate() { |
171 | 0 | if b != 0 { |
172 | 0 | return i; |
173 | 0 | } |
174 | | } |
175 | 0 | unreachable!() |
176 | 0 | } |
177 | | |
178 | | #[test] |
179 | | fn test_is_header_value_block() { |
180 | | let is_header_value_block = |b| match_header_value_char_8_swar(b) == BLOCK_SIZE; |
181 | | |
182 | | // 0..32 => false |
183 | | for b in 0..32_u8 { |
184 | | assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b); |
185 | | } |
186 | | // 32..=126 => true |
187 | | for b in 32..=126_u8 { |
188 | | assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b); |
189 | | } |
190 | | // 127 => false |
191 | | assert!(!is_header_value_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F'); |
192 | | // 128..=255 => true |
193 | | for b in 128..=255_u8 { |
194 | | assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b); |
195 | | } |
196 | | |
197 | | |
198 | | #[cfg(target_pointer_width = "64")] |
199 | | { |
200 | | // A few sanity checks on non-uniform bytes for safe-measure |
201 | | assert!(!is_header_value_block(*b"foo.com\n")); |
202 | | assert!(!is_header_value_block(*b"o.com\r\nU")); |
203 | | } |
204 | | } |
205 | | |
206 | | #[test] |
207 | | fn test_is_uri_block() { |
208 | | let is_uri_block = |b| match_uri_char_8_swar(b) == BLOCK_SIZE; |
209 | | |
210 | | // 0..33 => false |
211 | | for b in 0..33_u8 { |
212 | | assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b); |
213 | | } |
214 | | // 33..=126 => true |
215 | | for b in 33..=126_u8 { |
216 | | assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b); |
217 | | } |
218 | | // 127 => false |
219 | | assert!(!is_uri_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F'); |
220 | | // 128..=255 => true |
221 | | for b in 128..=255_u8 { |
222 | | assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b); |
223 | | } |
224 | | } |
225 | | |
226 | | #[test] |
227 | | fn test_offsetnz() { |
228 | | let seq = [0_u8; BLOCK_SIZE]; |
229 | | for i in 0..BLOCK_SIZE { |
230 | | let mut seq = seq; |
231 | | seq[i] = 1; |
232 | | let x = usize::from_ne_bytes(seq); |
233 | | assert_eq!(offsetnz(x), i); |
234 | | } |
235 | | } |