Coverage Report

Created: 2026-01-16 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/bstr-1.12.1/src/ascii.rs
Line
Count
Source
1
// The following ~400 lines of code exists for exactly one purpose, which is
2
// to optimize this code:
3
//
4
//     byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len())
5
//
6
// Yes... Overengineered is a word that comes to mind, but this is effectively
7
// a very similar problem to memchr, and virtually nobody has been able to
8
// resist optimizing the crap out of that (except for perhaps the BSD and MUSL
9
// folks). In particular, this routine makes a very common case (ASCII) very
10
// fast, which seems worth it. We do stop short of adding AVX variants of the
11
// code below in order to retain our sanity and also to avoid needing to deal
12
// with runtime target feature detection. RESIST!
13
//
14
// In order to understand the SIMD version below, it would be good to read this
15
// comment describing how my memchr routine works:
16
// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106
17
//
18
// The primary difference with memchr is that for ASCII, we can do a bit less
19
// work. In particular, we don't need to detect the presence of a specific
20
// byte, but rather, whether any byte has its most significant bit set. That
21
// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
22
// _mm_movemask_epi8.
23
24
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
25
const USIZE_BYTES: usize = core::mem::size_of::<usize>();
26
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
27
const ALIGN_MASK: usize = core::mem::align_of::<usize>() - 1;
28
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
29
const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
30
31
// This is a mask where the most significant bit of each byte in the usize
32
// is set. We test this bit to determine whether a character is ASCII or not.
33
// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
34
// most significant bit is not set.
35
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
36
const ASCII_MASK_U64: u64 = 0x8080808080808080;
37
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
38
const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
39
40
/// Returns the index of the first non ASCII byte in the given slice.
41
///
42
/// If slice only contains ASCII bytes, then the length of the slice is
43
/// returned.
44
977k
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
45
    #[cfg(any(miri, not(target_arch = "x86_64")))]
46
    {
47
        first_non_ascii_byte_fallback(slice)
48
    }
49
50
    #[cfg(all(not(miri), target_arch = "x86_64"))]
51
    {
52
977k
        first_non_ascii_byte_sse2(slice)
53
    }
54
977k
}
Unexecuted instantiation: bstr::ascii::first_non_ascii_byte
bstr::ascii::first_non_ascii_byte
Line
Count
Source
44
22.3k
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
45
    #[cfg(any(miri, not(target_arch = "x86_64")))]
46
    {
47
        first_non_ascii_byte_fallback(slice)
48
    }
49
50
    #[cfg(all(not(miri), target_arch = "x86_64"))]
51
    {
52
22.3k
        first_non_ascii_byte_sse2(slice)
53
    }
54
22.3k
}
bstr::ascii::first_non_ascii_byte
Line
Count
Source
44
792k
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
45
    #[cfg(any(miri, not(target_arch = "x86_64")))]
46
    {
47
        first_non_ascii_byte_fallback(slice)
48
    }
49
50
    #[cfg(all(not(miri), target_arch = "x86_64"))]
51
    {
52
792k
        first_non_ascii_byte_sse2(slice)
53
    }
54
792k
}
bstr::ascii::first_non_ascii_byte
Line
Count
Source
44
162k
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
45
    #[cfg(any(miri, not(target_arch = "x86_64")))]
46
    {
47
        first_non_ascii_byte_fallback(slice)
48
    }
49
50
    #[cfg(all(not(miri), target_arch = "x86_64"))]
51
    {
52
162k
        first_non_ascii_byte_sse2(slice)
53
    }
54
162k
}
55
56
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
57
fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
58
    let start_ptr = slice.as_ptr();
59
    let end_ptr = slice[slice.len()..].as_ptr();
60
    let mut ptr = start_ptr;
61
62
    unsafe {
63
        if slice.len() < USIZE_BYTES {
64
            return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
65
        }
66
67
        let chunk = read_unaligned_usize(ptr);
68
        let mask = chunk & ASCII_MASK;
69
        if mask != 0 {
70
            return first_non_ascii_byte_mask(mask);
71
        }
72
73
        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & ALIGN_MASK));
74
        debug_assert!(ptr > start_ptr);
75
        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
76
        if slice.len() >= FALLBACK_LOOP_SIZE {
77
            while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) {
78
                debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
79
80
                let a = *(ptr as *const usize);
81
                let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
82
                if (a | b) & ASCII_MASK != 0 {
83
                    // What a kludge. We wrap the position finding code into
84
                    // a non-inlineable function, which makes the codegen in
85
                    // the tight loop above a bit better by avoiding a
86
                    // couple extra movs. We pay for it by two additional
87
                    // stores, but only in the case of finding a non-ASCII
88
                    // byte.
89
                    #[inline(never)]
90
                    unsafe fn findpos(
91
                        start_ptr: *const u8,
92
                        ptr: *const u8,
93
                    ) -> usize {
94
                        let a = *(ptr as *const usize);
95
                        let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
96
97
                        let mut at = sub(ptr, start_ptr);
98
                        let maska = a & ASCII_MASK;
99
                        if maska != 0 {
100
                            return at + first_non_ascii_byte_mask(maska);
101
                        }
102
103
                        at += USIZE_BYTES;
104
                        let maskb = b & ASCII_MASK;
105
                        debug_assert!(maskb != 0);
106
                        return at + first_non_ascii_byte_mask(maskb);
107
                    }
108
                    return findpos(start_ptr, ptr);
109
                }
110
                ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE);
111
            }
112
        }
113
        first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
114
    }
115
}
116
117
#[cfg(all(not(miri), target_arch = "x86_64"))]
118
977k
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
119
    use core::arch::x86_64::*;
120
121
    const VECTOR_SIZE: usize = core::mem::size_of::<__m128i>();
122
    const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
123
    const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
124
125
977k
    let start_ptr = slice.as_ptr();
126
977k
    let end_ptr = slice[slice.len()..].as_ptr();
127
977k
    let mut ptr = start_ptr;
128
129
    unsafe {
130
977k
        if slice.len() < VECTOR_SIZE {
131
489k
            return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
132
488k
        }
133
134
488k
        let chunk = _mm_loadu_si128(ptr as *const __m128i);
135
488k
        let mask = _mm_movemask_epi8(chunk);
136
488k
        if mask != 0 {
137
75.0k
            return mask.trailing_zeros() as usize;
138
413k
        }
139
140
413k
        ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
141
413k
        debug_assert!(ptr > start_ptr);
142
413k
        debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
143
413k
        if slice.len() >= VECTOR_LOOP_SIZE {
144
4.76M
            while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
145
4.74M
                debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
146
147
4.74M
                let a = _mm_load_si128(ptr as *const __m128i);
148
4.74M
                let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
149
4.74M
                let c =
150
4.74M
                    _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
151
4.74M
                let d =
152
4.74M
                    _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
153
154
4.74M
                let or1 = _mm_or_si128(a, b);
155
4.74M
                let or2 = _mm_or_si128(c, d);
156
4.74M
                let or3 = _mm_or_si128(or1, or2);
157
4.74M
                if _mm_movemask_epi8(or3) != 0 {
158
91.4k
                    let mut at = sub(ptr, start_ptr);
159
91.4k
                    let mask = _mm_movemask_epi8(a);
160
91.4k
                    if mask != 0 {
161
22.2k
                        return at + mask.trailing_zeros() as usize;
162
69.2k
                    }
163
164
69.2k
                    at += VECTOR_SIZE;
165
69.2k
                    let mask = _mm_movemask_epi8(b);
166
69.2k
                    if mask != 0 {
167
23.0k
                        return at + mask.trailing_zeros() as usize;
168
46.2k
                    }
169
170
46.2k
                    at += VECTOR_SIZE;
171
46.2k
                    let mask = _mm_movemask_epi8(c);
172
46.2k
                    if mask != 0 {
173
29.3k
                        return at + mask.trailing_zeros() as usize;
174
16.8k
                    }
175
176
16.8k
                    at += VECTOR_SIZE;
177
16.8k
                    let mask = _mm_movemask_epi8(d);
178
16.8k
                    debug_assert!(mask != 0);
179
16.8k
                    return at + mask.trailing_zeros() as usize;
180
4.65M
                }
181
4.65M
                ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
182
            }
183
304k
        }
184
493k
        while ptr <= end_ptr.sub(VECTOR_SIZE) {
185
174k
            debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
186
187
174k
            let chunk = _mm_loadu_si128(ptr as *const __m128i);
188
174k
            let mask = _mm_movemask_epi8(chunk);
189
174k
            if mask != 0 {
190
2.90k
                return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
191
171k
            }
192
171k
            ptr = ptr.add(VECTOR_SIZE);
193
        }
194
318k
        first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
195
    }
196
977k
}
Unexecuted instantiation: bstr::ascii::first_non_ascii_byte_sse2
bstr::ascii::first_non_ascii_byte_sse2
Line
Count
Source
118
22.3k
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
119
    use core::arch::x86_64::*;
120
121
    const VECTOR_SIZE: usize = core::mem::size_of::<__m128i>();
122
    const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
123
    const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
124
125
22.3k
    let start_ptr = slice.as_ptr();
126
22.3k
    let end_ptr = slice[slice.len()..].as_ptr();
127
22.3k
    let mut ptr = start_ptr;
128
129
    unsafe {
130
22.3k
        if slice.len() < VECTOR_SIZE {
131
387
            return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
132
22.0k
        }
133
134
22.0k
        let chunk = _mm_loadu_si128(ptr as *const __m128i);
135
22.0k
        let mask = _mm_movemask_epi8(chunk);
136
22.0k
        if mask != 0 {
137
3.30k
            return mask.trailing_zeros() as usize;
138
18.7k
        }
139
140
18.7k
        ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
141
18.7k
        debug_assert!(ptr > start_ptr);
142
18.7k
        debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
143
18.7k
        if slice.len() >= VECTOR_LOOP_SIZE {
144
194k
            while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
145
194k
                debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
146
147
194k
                let a = _mm_load_si128(ptr as *const __m128i);
148
194k
                let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
149
194k
                let c =
150
194k
                    _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
151
194k
                let d =
152
194k
                    _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
153
154
194k
                let or1 = _mm_or_si128(a, b);
155
194k
                let or2 = _mm_or_si128(c, d);
156
194k
                let or3 = _mm_or_si128(or1, or2);
157
194k
                if _mm_movemask_epi8(or3) != 0 {
158
18.4k
                    let mut at = sub(ptr, start_ptr);
159
18.4k
                    let mask = _mm_movemask_epi8(a);
160
18.4k
                    if mask != 0 {
161
2.80k
                        return at + mask.trailing_zeros() as usize;
162
15.6k
                    }
163
164
15.6k
                    at += VECTOR_SIZE;
165
15.6k
                    let mask = _mm_movemask_epi8(b);
166
15.6k
                    if mask != 0 {
167
2.83k
                        return at + mask.trailing_zeros() as usize;
168
12.8k
                    }
169
170
12.8k
                    at += VECTOR_SIZE;
171
12.8k
                    let mask = _mm_movemask_epi8(c);
172
12.8k
                    if mask != 0 {
173
7.99k
                        return at + mask.trailing_zeros() as usize;
174
4.82k
                    }
175
176
4.82k
                    at += VECTOR_SIZE;
177
4.82k
                    let mask = _mm_movemask_epi8(d);
178
4.82k
                    debug_assert!(mask != 0);
179
4.82k
                    return at + mask.trailing_zeros() as usize;
180
175k
                }
181
175k
                ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
182
            }
183
135
        }
184
438
        while ptr <= end_ptr.sub(VECTOR_SIZE) {
185
271
            debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
186
187
271
            let chunk = _mm_loadu_si128(ptr as *const __m128i);
188
271
            let mask = _mm_movemask_epi8(chunk);
189
271
            if mask != 0 {
190
82
                return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
191
189
            }
192
189
            ptr = ptr.add(VECTOR_SIZE);
193
        }
194
167
        first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
195
    }
196
22.3k
}
bstr::ascii::first_non_ascii_byte_sse2
Line
Count
Source
118
792k
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
119
    use core::arch::x86_64::*;
120
121
    const VECTOR_SIZE: usize = core::mem::size_of::<__m128i>();
122
    const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
123
    const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
124
125
792k
    let start_ptr = slice.as_ptr();
126
792k
    let end_ptr = slice[slice.len()..].as_ptr();
127
792k
    let mut ptr = start_ptr;
128
129
    unsafe {
130
792k
        if slice.len() < VECTOR_SIZE {
131
439k
            return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
132
353k
        }
133
134
353k
        let chunk = _mm_loadu_si128(ptr as *const __m128i);
135
353k
        let mask = _mm_movemask_epi8(chunk);
136
353k
        if mask != 0 {
137
4.45k
            return mask.trailing_zeros() as usize;
138
348k
        }
139
140
348k
        ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
141
348k
        debug_assert!(ptr > start_ptr);
142
348k
        debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
143
348k
        if slice.len() >= VECTOR_LOOP_SIZE {
144
4.09M
            while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
145
4.07M
                debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
146
147
4.07M
                let a = _mm_load_si128(ptr as *const __m128i);
148
4.07M
                let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
149
4.07M
                let c =
150
4.07M
                    _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
151
4.07M
                let d =
152
4.07M
                    _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
153
154
4.07M
                let or1 = _mm_or_si128(a, b);
155
4.07M
                let or2 = _mm_or_si128(c, d);
156
4.07M
                let or3 = _mm_or_si128(or1, or2);
157
4.07M
                if _mm_movemask_epi8(or3) != 0 {
158
34.6k
                    let mut at = sub(ptr, start_ptr);
159
34.6k
                    let mask = _mm_movemask_epi8(a);
160
34.6k
                    if mask != 0 {
161
7.10k
                        return at + mask.trailing_zeros() as usize;
162
27.5k
                    }
163
164
27.5k
                    at += VECTOR_SIZE;
165
27.5k
                    let mask = _mm_movemask_epi8(b);
166
27.5k
                    if mask != 0 {
167
11.7k
                        return at + mask.trailing_zeros() as usize;
168
15.8k
                    }
169
170
15.8k
                    at += VECTOR_SIZE;
171
15.8k
                    let mask = _mm_movemask_epi8(c);
172
15.8k
                    if mask != 0 {
173
9.77k
                        return at + mask.trailing_zeros() as usize;
174
6.03k
                    }
175
176
6.03k
                    at += VECTOR_SIZE;
177
6.03k
                    let mask = _mm_movemask_epi8(d);
178
6.03k
                    debug_assert!(mask != 0);
179
6.03k
                    return at + mask.trailing_zeros() as usize;
180
4.04M
                }
181
4.04M
                ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
182
            }
183
299k
        }
184
479k
        while ptr <= end_ptr.sub(VECTOR_SIZE) {
185
165k
            debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
186
187
165k
            let chunk = _mm_loadu_si128(ptr as *const __m128i);
188
165k
            let mask = _mm_movemask_epi8(chunk);
189
165k
            if mask != 0 {
190
314
                return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
191
165k
            }
192
165k
            ptr = ptr.add(VECTOR_SIZE);
193
        }
194
313k
        first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
195
    }
196
792k
}
bstr::ascii::first_non_ascii_byte_sse2
Line
Count
Source
118
162k
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
119
    use core::arch::x86_64::*;
120
121
    const VECTOR_SIZE: usize = core::mem::size_of::<__m128i>();
122
    const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
123
    const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
124
125
162k
    let start_ptr = slice.as_ptr();
126
162k
    let end_ptr = slice[slice.len()..].as_ptr();
127
162k
    let mut ptr = start_ptr;
128
129
    unsafe {
130
162k
        if slice.len() < VECTOR_SIZE {
131
49.9k
            return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
132
112k
        }
133
134
112k
        let chunk = _mm_loadu_si128(ptr as *const __m128i);
135
112k
        let mask = _mm_movemask_epi8(chunk);
136
112k
        if mask != 0 {
137
67.2k
            return mask.trailing_zeros() as usize;
138
45.5k
        }
139
140
45.5k
        ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
141
45.5k
        debug_assert!(ptr > start_ptr);
142
45.5k
        debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
143
45.5k
        if slice.len() >= VECTOR_LOOP_SIZE {
144
475k
            while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
145
473k
                debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
146
147
473k
                let a = _mm_load_si128(ptr as *const __m128i);
148
473k
                let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
149
473k
                let c =
150
473k
                    _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
151
473k
                let d =
152
473k
                    _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
153
154
473k
                let or1 = _mm_or_si128(a, b);
155
473k
                let or2 = _mm_or_si128(c, d);
156
473k
                let or3 = _mm_or_si128(or1, or2);
157
473k
                if _mm_movemask_epi8(or3) != 0 {
158
38.3k
                    let mut at = sub(ptr, start_ptr);
159
38.3k
                    let mask = _mm_movemask_epi8(a);
160
38.3k
                    if mask != 0 {
161
12.3k
                        return at + mask.trailing_zeros() as usize;
162
25.9k
                    }
163
164
25.9k
                    at += VECTOR_SIZE;
165
25.9k
                    let mask = _mm_movemask_epi8(b);
166
25.9k
                    if mask != 0 {
167
8.42k
                        return at + mask.trailing_zeros() as usize;
168
17.5k
                    }
169
170
17.5k
                    at += VECTOR_SIZE;
171
17.5k
                    let mask = _mm_movemask_epi8(c);
172
17.5k
                    if mask != 0 {
173
11.5k
                        return at + mask.trailing_zeros() as usize;
174
6.01k
                    }
175
176
6.01k
                    at += VECTOR_SIZE;
177
6.01k
                    let mask = _mm_movemask_epi8(d);
178
6.01k
                    debug_assert!(mask != 0);
179
6.01k
                    return at + mask.trailing_zeros() as usize;
180
435k
                }
181
435k
                ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
182
            }
183
5.00k
        }
184
13.3k
        while ptr <= end_ptr.sub(VECTOR_SIZE) {
185
8.67k
            debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
186
187
8.67k
            let chunk = _mm_loadu_si128(ptr as *const __m128i);
188
8.67k
            let mask = _mm_movemask_epi8(chunk);
189
8.67k
            if mask != 0 {
190
2.50k
                return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
191
6.17k
            }
192
6.17k
            ptr = ptr.add(VECTOR_SIZE);
193
        }
194
4.63k
        first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
195
    }
196
162k
}
197
198
#[inline(always)]
199
808k
unsafe fn first_non_ascii_byte_slow(
200
808k
    start_ptr: *const u8,
201
808k
    end_ptr: *const u8,
202
808k
    mut ptr: *const u8,
203
808k
) -> usize {
204
808k
    debug_assert!(start_ptr <= ptr);
205
808k
    debug_assert!(ptr <= end_ptr);
206
207
6.20M
    while ptr < end_ptr {
208
5.41M
        if *ptr > 0x7F {
209
19.8k
            return sub(ptr, start_ptr);
210
5.39M
        }
211
5.39M
        ptr = ptr.offset(1);
212
    }
213
788k
    sub(end_ptr, start_ptr)
214
808k
}
Unexecuted instantiation: bstr::ascii::first_non_ascii_byte_slow
bstr::ascii::first_non_ascii_byte_slow
Line
Count
Source
199
554
unsafe fn first_non_ascii_byte_slow(
200
554
    start_ptr: *const u8,
201
554
    end_ptr: *const u8,
202
554
    mut ptr: *const u8,
203
554
) -> usize {
204
554
    debug_assert!(start_ptr <= ptr);
205
554
    debug_assert!(ptr <= end_ptr);
206
207
3.41k
    while ptr < end_ptr {
208
2.96k
        if *ptr > 0x7F {
209
106
            return sub(ptr, start_ptr);
210
2.85k
        }
211
2.85k
        ptr = ptr.offset(1);
212
    }
213
448
    sub(end_ptr, start_ptr)
214
554
}
bstr::ascii::first_non_ascii_byte_slow
Line
Count
Source
199
752k
unsafe fn first_non_ascii_byte_slow(
200
752k
    start_ptr: *const u8,
201
752k
    end_ptr: *const u8,
202
752k
    mut ptr: *const u8,
203
752k
) -> usize {
204
752k
    debug_assert!(start_ptr <= ptr);
205
752k
    debug_assert!(ptr <= end_ptr);
206
207
5.92M
    while ptr < end_ptr {
208
5.17M
        if *ptr > 0x7F {
209
956
            return sub(ptr, start_ptr);
210
5.17M
        }
211
5.17M
        ptr = ptr.offset(1);
212
    }
213
752k
    sub(end_ptr, start_ptr)
214
752k
}
bstr::ascii::first_non_ascii_byte_slow
Line
Count
Source
199
54.6k
unsafe fn first_non_ascii_byte_slow(
200
54.6k
    start_ptr: *const u8,
201
54.6k
    end_ptr: *const u8,
202
54.6k
    mut ptr: *const u8,
203
54.6k
) -> usize {
204
54.6k
    debug_assert!(start_ptr <= ptr);
205
54.6k
    debug_assert!(ptr <= end_ptr);
206
207
271k
    while ptr < end_ptr {
208
235k
        if *ptr > 0x7F {
209
18.7k
            return sub(ptr, start_ptr);
210
216k
        }
211
216k
        ptr = ptr.offset(1);
212
    }
213
35.8k
    sub(end_ptr, start_ptr)
214
54.6k
}
215
216
/// Compute the position of the first ASCII byte in the given mask.
217
///
218
/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is
219
/// 8 contiguous bytes of the slice being checked where *at least* one of those
220
/// bytes is not an ASCII byte.
221
///
222
/// The position returned is always in the inclusive range [0, 7].
223
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
224
fn first_non_ascii_byte_mask(mask: usize) -> usize {
225
    #[cfg(target_endian = "little")]
226
    {
227
        mask.trailing_zeros() as usize / 8
228
    }
229
    #[cfg(target_endian = "big")]
230
    {
231
        mask.leading_zeros() as usize / 8
232
    }
233
}
234
235
/// Increment the given pointer by the given amount.
236
4.65M
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
237
4.65M
    ptr.add(amt)
238
4.65M
}
Unexecuted instantiation: bstr::ascii::ptr_add
bstr::ascii::ptr_add
Line
Count
Source
236
175k
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
237
175k
    ptr.add(amt)
238
175k
}
bstr::ascii::ptr_add
Line
Count
Source
236
4.04M
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
237
4.04M
    ptr.add(amt)
238
4.04M
}
bstr::ascii::ptr_add
Line
Count
Source
236
435k
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
237
435k
    ptr.add(amt)
238
435k
}
239
240
/// Decrement the given pointer by the given amount.
241
4.76M
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
242
4.76M
    ptr.sub(amt)
243
4.76M
}
Unexecuted instantiation: bstr::ascii::ptr_sub
bstr::ascii::ptr_sub
Line
Count
Source
241
194k
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
242
194k
    ptr.sub(amt)
243
194k
}
bstr::ascii::ptr_sub
Line
Count
Source
241
4.09M
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
242
4.09M
    ptr.sub(amt)
243
4.09M
}
bstr::ascii::ptr_sub
Line
Count
Source
241
475k
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
242
475k
    ptr.sub(amt)
243
475k
}
244
245
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
246
unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
247
    use core::ptr;
248
249
    let mut n: usize = 0;
250
    ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
251
    n
252
}
253
254
/// Subtract `b` from `a` and return the difference. `a` should be greater than
255
/// or equal to `b`.
256
1.07M
fn sub(a: *const u8, b: *const u8) -> usize {
257
1.07M
    debug_assert!(a >= b);
258
1.07M
    (a as usize) - (b as usize)
259
1.07M
}
Unexecuted instantiation: bstr::ascii::sub
bstr::ascii::sub
Line
Count
Source
256
19.3k
fn sub(a: *const u8, b: *const u8) -> usize {
257
19.3k
    debug_assert!(a >= b);
258
19.3k
    (a as usize) - (b as usize)
259
19.3k
}
bstr::ascii::sub
Line
Count
Source
256
953k
fn sub(a: *const u8, b: *const u8) -> usize {
257
953k
    debug_assert!(a >= b);
258
953k
    (a as usize) - (b as usize)
259
953k
}
bstr::ascii::sub
Line
Count
Source
256
104k
fn sub(a: *const u8, b: *const u8) -> usize {
257
104k
    debug_assert!(a >= b);
258
104k
    (a as usize) - (b as usize)
259
104k
}
260
261
#[cfg(test)]
262
mod tests {
263
    use super::*;
264
265
    // Our testing approach here is to try and exhaustively test every case.
266
    // This includes the position at which a non-ASCII byte occurs in addition
267
    // to the alignment of the slice that we're searching.
268
269
    #[test]
270
    fn positive_fallback_forward() {
271
        for i in 0..517 {
272
            let s = "a".repeat(i);
273
            assert_eq!(
274
                i,
275
                first_non_ascii_byte_fallback(s.as_bytes()),
276
                "i: {:?}, len: {:?}, s: {:?}",
277
                i,
278
                s.len(),
279
                s
280
            );
281
        }
282
    }
283
284
    #[test]
285
    #[cfg(target_arch = "x86_64")]
286
    #[cfg(not(miri))]
287
    fn positive_sse2_forward() {
288
        for i in 0..517 {
289
            let b = "a".repeat(i).into_bytes();
290
            assert_eq!(b.len(), first_non_ascii_byte_sse2(&b));
291
        }
292
    }
293
294
    #[test]
295
    #[cfg(not(miri))]
296
    fn negative_fallback_forward() {
297
        for i in 0..517 {
298
            for align in 0..65 {
299
                let mut s = "a".repeat(i);
300
                s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
301
                let s = s.get(align..).unwrap_or("");
302
                assert_eq!(
303
                    i.saturating_sub(align),
304
                    first_non_ascii_byte_fallback(s.as_bytes()),
305
                    "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
306
                    i,
307
                    align,
308
                    s.len(),
309
                    s
310
                );
311
            }
312
        }
313
    }
314
315
    #[test]
316
    #[cfg(target_arch = "x86_64")]
317
    #[cfg(not(miri))]
318
    fn negative_sse2_forward() {
319
        for i in 0..517 {
320
            for align in 0..65 {
321
                let mut s = "a".repeat(i);
322
                s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
323
                let s = s.get(align..).unwrap_or("");
324
                assert_eq!(
325
                    i.saturating_sub(align),
326
                    first_non_ascii_byte_sse2(s.as_bytes()),
327
                    "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
328
                    i,
329
                    align,
330
                    s.len(),
331
                    s
332
                );
333
            }
334
        }
335
    }
336
}