/rust/registry/src/index.crates.io-6f17d22bba15001f/simdutf8-0.1.5/src/implementation/helpers.rs
Line | Count | Source (jump to first uncovered line) |
1 | | type Utf8ErrorCompat = crate::compat::Utf8Error; |
2 | | |
3 | | #[inline] |
4 | 0 | pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { |
5 | 0 | #[allow(clippy::cast_possible_truncation)] |
6 | 0 | match core::str::from_utf8(&input[offset..]) { |
7 | 0 | Ok(_) => Ok(()), |
8 | 0 | Err(err) => Err(Utf8ErrorCompat { |
9 | 0 | valid_up_to: err.valid_up_to() + offset, |
10 | 0 | error_len: err.error_len().map(|len| { |
11 | 0 | // never truncates since std::str::err::Utf8Error::error_len() never returns value larger than 4 |
12 | 0 | len as u8 |
13 | 0 | }), |
14 | 0 | }), |
15 | | } |
16 | 0 | } |
17 | | |
18 | | #[cold] |
19 | | #[allow(dead_code)] |
20 | | #[allow(clippy::unwrap_used)] |
21 | 0 | pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { |
22 | 0 | let offset = if failing_block_pos == 0 { |
23 | | // Error must be in this block since it is the first. |
24 | 0 | 0 |
25 | | } else { |
26 | | // The previous block is OK except for a possible continuation over the block boundary. |
27 | | // We go backwards over the last three bytes of the previous block and find the |
28 | | // last non-continuation byte as a starting point for an std validation. If the last |
29 | | // three bytes are all continuation bytes then the previous block ends with a four byte |
30 | | // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the |
31 | | // current block in that case. |
32 | 0 | (1..=3) |
33 | 0 | .find(|i| input[failing_block_pos - i] >> 6 != 0b10) |
34 | 0 | .map_or(failing_block_pos, |i| failing_block_pos - i) |
35 | | }; |
36 | | // UNWRAP: safe because the SIMD UTF-8 validation found an error |
37 | 0 | validate_utf8_at_offset(input, offset).unwrap_err() |
38 | 0 | } |
39 | | |
40 | | #[allow(dead_code)] |
41 | 295k | pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64( |
42 | 295k | mut src: *const u8, |
43 | 295k | mut dest: *mut u8, |
44 | 295k | mut len: usize, |
45 | 295k | ) { |
46 | | // This gets properly auto-vectorized on AVX 2 and SSE 4.2 |
47 | | #[inline] |
48 | 887k | unsafe fn memcpy_u64(src: &mut *const u8, dest: &mut *mut u8) { |
49 | 887k | #[allow(clippy::cast_ptr_alignment)] |
50 | 887k | dest.cast::<u64>() |
51 | 887k | .write_unaligned(src.cast::<u64>().read_unaligned()); |
52 | 887k | *src = src.offset(8); |
53 | 887k | *dest = dest.offset(8); |
54 | 887k | } |
55 | 295k | if len >= 32 { |
56 | 118k | memcpy_u64(&mut src, &mut dest); |
57 | 118k | memcpy_u64(&mut src, &mut dest); |
58 | 118k | memcpy_u64(&mut src, &mut dest); |
59 | 118k | memcpy_u64(&mut src, &mut dest); |
60 | 118k | len -= 32; |
61 | 177k | } |
62 | 295k | if len >= 16 { |
63 | 130k | memcpy_u64(&mut src, &mut dest); |
64 | 130k | memcpy_u64(&mut src, &mut dest); |
65 | 130k | len -= 16; |
66 | 165k | } |
67 | 295k | if len >= 8 { |
68 | 151k | memcpy_u64(&mut src, &mut dest); |
69 | 151k | len -= 8; |
70 | 151k | } |
71 | 1.39M | while len > 0 { |
72 | 1.09M | *dest = *src; |
73 | 1.09M | src = src.offset(1); |
74 | 1.09M | dest = dest.offset(1); |
75 | 1.09M | len -= 1; |
76 | 1.09M | } |
77 | 295k | } |
78 | | |
79 | | pub(crate) const SIMD_CHUNK_SIZE: usize = 64; |
80 | | |
81 | | #[repr(C, align(32))] |
82 | | #[allow(dead_code)] |
83 | | pub(crate) struct Utf8CheckAlgorithm<T> { |
84 | | pub(crate) prev: T, |
85 | | pub(crate) incomplete: T, |
86 | | pub(crate) error: T, |
87 | | } |
88 | | |
89 | | #[repr(C, align(16))] |
90 | | #[allow(dead_code)] |
91 | | pub(crate) struct TempSimdChunkA16(pub(crate) [u8; SIMD_CHUNK_SIZE]); |
92 | | |
93 | | #[allow(dead_code)] |
94 | | impl TempSimdChunkA16 { |
95 | | #[inline] |
96 | 0 | pub(crate) const fn new() -> Self { |
97 | 0 | Self([0; SIMD_CHUNK_SIZE]) |
98 | 0 | } |
99 | | } |
100 | | |
101 | | #[repr(C, align(32))] |
102 | | #[allow(dead_code)] |
103 | | pub(crate) struct TempSimdChunkA32(pub(crate) [u8; SIMD_CHUNK_SIZE]); |
104 | | |
105 | | #[allow(dead_code)] |
106 | | impl TempSimdChunkA32 { |
107 | | #[inline] |
108 | 295k | pub(crate) const fn new() -> Self { |
109 | 295k | Self([0; SIMD_CHUNK_SIZE]) |
110 | 295k | } |
111 | | } |
112 | | |
113 | | #[derive(Clone, Copy)] |
114 | | #[allow(dead_code)] |
115 | | pub(crate) struct SimdU8Value<T>(pub(crate) T) |
116 | | where |
117 | | T: Copy; |