/rust/registry/src/index.crates.io-1949cf8c6b5b557f/bytecount-0.6.9/src/integer_simd.rs
Line | Count | Source |
1 | | #[cfg(not(feature = "runtime-dispatch-simd"))] |
2 | | use core::{mem, ptr, usize}; |
3 | | #[cfg(feature = "runtime-dispatch-simd")] |
4 | | use std::{mem, ptr, usize}; |
5 | | |
6 | 0 | fn splat(byte: u8) -> usize { |
7 | 0 | let lo = usize::MAX / 0xFF; |
8 | 0 | lo * byte as usize |
9 | 0 | } |
10 | | |
11 | 0 | unsafe fn usize_load_unchecked(bytes: &[u8], offset: usize) -> usize { |
12 | 0 | let mut output = 0; |
13 | 0 | ptr::copy_nonoverlapping( |
14 | 0 | bytes.as_ptr().add(offset), |
15 | 0 | &mut output as *mut usize as *mut u8, |
16 | 0 | mem::size_of::<usize>(), |
17 | | ); |
18 | 0 | output |
19 | 0 | } |
20 | | |
21 | 0 | fn bytewise_equal(lhs: usize, rhs: usize) -> usize { |
22 | 0 | let lo = usize::MAX / 0xFF; |
23 | 0 | let hi = lo << 7; |
24 | | |
25 | 0 | let x = lhs ^ rhs; |
26 | 0 | !((((x & !hi) + !hi) | x) >> 7) & lo |
27 | 0 | } |
28 | | |
29 | 0 | fn sum_usize(values: usize) -> usize { |
30 | 0 | let every_other_byte_lo = usize::MAX / 0xFFFF; |
31 | 0 | let every_other_byte = every_other_byte_lo * 0xFF; |
32 | | |
33 | | // Pairwise reduction to avoid overflow on next step. |
34 | 0 | let pair_sum: usize = (values & every_other_byte) + ((values >> 8) & every_other_byte); |
35 | | |
36 | | // Multiplication results in top two bytes holding sum. |
37 | 0 | pair_sum.wrapping_mul(every_other_byte_lo) >> ((mem::size_of::<usize>() - 2) * 8) |
38 | 0 | } |
39 | | |
40 | 0 | fn is_leading_utf8_byte(values: usize) -> usize { |
41 | | // a leading UTF-8 byte is one which does not start with the bits 10. |
42 | 0 | ((!values >> 7) | (values >> 6)) & splat(1) |
43 | 0 | } |
44 | | |
45 | 0 | pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { |
46 | 0 | let chunksize = mem::size_of::<usize>(); |
47 | 0 | assert!(haystack.len() >= chunksize); |
48 | | |
49 | | unsafe { |
50 | 0 | let mut offset = 0; |
51 | 0 | let mut count = 0; |
52 | | |
53 | 0 | let needles = splat(needle); |
54 | | |
55 | | // 2040 |
56 | 0 | while haystack.len() >= offset + chunksize * 255 { |
57 | 0 | let mut counts = 0; |
58 | 0 | for _ in 0..255 { |
59 | 0 | counts += bytewise_equal(usize_load_unchecked(haystack, offset), needles); |
60 | 0 | offset += chunksize; |
61 | 0 | } |
62 | 0 | count += sum_usize(counts); |
63 | | } |
64 | | |
65 | | // 8 |
66 | 0 | let mut counts = 0; |
67 | 0 | for i in 0..(haystack.len() - offset) / chunksize { |
68 | 0 | counts += bytewise_equal( |
69 | 0 | usize_load_unchecked(haystack, offset + i * chunksize), |
70 | 0 | needles, |
71 | 0 | ); |
72 | 0 | } |
73 | 0 | if haystack.len() % 8 != 0 { |
74 | 0 | let mask = usize::from_le(!(!0 >> ((haystack.len() % chunksize) * 8))); |
75 | 0 | counts += bytewise_equal( |
76 | 0 | usize_load_unchecked(haystack, haystack.len() - chunksize), |
77 | 0 | needles, |
78 | 0 | ) & mask; |
79 | 0 | } |
80 | 0 | count += sum_usize(counts); |
81 | | |
82 | 0 | count |
83 | | } |
84 | 0 | } |
85 | | |
86 | 0 | pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { |
87 | 0 | let chunksize = mem::size_of::<usize>(); |
88 | 0 | assert!(utf8_chars.len() >= chunksize); |
89 | | |
90 | | unsafe { |
91 | 0 | let mut offset = 0; |
92 | 0 | let mut count = 0; |
93 | | |
94 | | // 2040 |
95 | 0 | while utf8_chars.len() >= offset + chunksize * 255 { |
96 | 0 | let mut counts = 0; |
97 | 0 | for _ in 0..255 { |
98 | 0 | counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset)); |
99 | 0 | offset += chunksize; |
100 | 0 | } |
101 | 0 | count += sum_usize(counts); |
102 | | } |
103 | | |
104 | | // 8 |
105 | 0 | let mut counts = 0; |
106 | 0 | for i in 0..(utf8_chars.len() - offset) / chunksize { |
107 | 0 | counts += |
108 | 0 | is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize)); |
109 | 0 | } |
110 | 0 | if utf8_chars.len() % 8 != 0 { |
111 | 0 | let mask = usize::from_le(!(!0 >> ((utf8_chars.len() % chunksize) * 8))); |
112 | 0 | counts += is_leading_utf8_byte(usize_load_unchecked( |
113 | 0 | utf8_chars, |
114 | 0 | utf8_chars.len() - chunksize, |
115 | 0 | )) & mask; |
116 | 0 | } |
117 | 0 | count += sum_usize(counts); |
118 | | |
119 | 0 | count |
120 | | } |
121 | 0 | } |