Coverage Report

Created: 2026-01-16 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/memchr-2.4.1/src/memchr/fallback.rs
Line
Count
Source
1
// This module defines pure Rust platform independent implementations of all
2
// the memchr routines. We do our best to make them fast. Some of them may even
3
// get auto-vectorized.
4
5
use core::{cmp, usize};
6
7
#[cfg(target_pointer_width = "16")]
8
const USIZE_BYTES: usize = 2;
9
10
#[cfg(target_pointer_width = "32")]
11
const USIZE_BYTES: usize = 4;
12
13
#[cfg(target_pointer_width = "64")]
14
const USIZE_BYTES: usize = 8;
15
16
// The number of bytes to loop at in one iteration of memchr/memrchr.
17
const LOOP_SIZE: usize = 2 * USIZE_BYTES;
18
19
/// Return `true` if `x` contains any zero byte.
20
///
21
/// From *Matters Computational*, J. Arndt
22
///
23
/// "The idea is to subtract one from each of the bytes and then look for
24
/// bytes where the borrow propagated all the way to the most significant
25
/// bit."
26
#[inline(always)]
27
0
fn contains_zero_byte(x: usize) -> bool {
28
    const LO_U64: u64 = 0x0101010101010101;
29
    const HI_U64: u64 = 0x8080808080808080;
30
31
    const LO_USIZE: usize = LO_U64 as usize;
32
    const HI_USIZE: usize = HI_U64 as usize;
33
34
0
    x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
35
0
}
36
37
/// Repeat the given byte into a word size number. That is, every 8 bits
38
/// is equivalent to the given byte. For example, if `b` is `\x4E` or
39
/// `01001110` in binary, then the returned value on a 32-bit system would be:
40
/// `01001110_01001110_01001110_01001110`.
41
#[inline(always)]
42
0
fn repeat_byte(b: u8) -> usize {
43
0
    (b as usize) * (usize::MAX / 255)
44
0
}
45
46
0
pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
47
0
    let vn1 = repeat_byte(n1);
48
0
    let confirm = |byte| byte == n1;
49
0
    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
50
0
    let align = USIZE_BYTES - 1;
51
0
    let start_ptr = haystack.as_ptr();
52
0
    let mut ptr = start_ptr;
53
54
    unsafe {
55
0
        let end_ptr = start_ptr.add(haystack.len());
56
0
        if haystack.len() < USIZE_BYTES {
57
0
            return forward_search(start_ptr, end_ptr, ptr, confirm);
58
0
        }
59
60
0
        let chunk = (ptr as *const usize).read_unaligned();
61
0
        if contains_zero_byte(chunk ^ vn1) {
62
0
            return forward_search(start_ptr, end_ptr, ptr, confirm);
63
0
        }
64
65
0
        ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
66
0
        debug_assert!(ptr > start_ptr);
67
0
        debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
68
0
        while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
69
0
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
70
71
0
            let a = *(ptr as *const usize);
72
0
            let b = *(ptr.add(USIZE_BYTES) as *const usize);
73
0
            let eqa = contains_zero_byte(a ^ vn1);
74
0
            let eqb = contains_zero_byte(b ^ vn1);
75
0
            if eqa || eqb {
76
0
                break;
77
0
            }
78
0
            ptr = ptr.add(LOOP_SIZE);
79
        }
80
0
        forward_search(start_ptr, end_ptr, ptr, confirm)
81
    }
82
0
}
83
84
/// Like `memchr`, but searches for two bytes instead of one.
85
0
pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
86
0
    let vn1 = repeat_byte(n1);
87
0
    let vn2 = repeat_byte(n2);
88
0
    let confirm = |byte| byte == n1 || byte == n2;
89
0
    let align = USIZE_BYTES - 1;
90
0
    let start_ptr = haystack.as_ptr();
91
0
    let mut ptr = start_ptr;
92
93
    unsafe {
94
0
        let end_ptr = start_ptr.add(haystack.len());
95
0
        if haystack.len() < USIZE_BYTES {
96
0
            return forward_search(start_ptr, end_ptr, ptr, confirm);
97
0
        }
98
99
0
        let chunk = (ptr as *const usize).read_unaligned();
100
0
        let eq1 = contains_zero_byte(chunk ^ vn1);
101
0
        let eq2 = contains_zero_byte(chunk ^ vn2);
102
0
        if eq1 || eq2 {
103
0
            return forward_search(start_ptr, end_ptr, ptr, confirm);
104
0
        }
105
106
0
        ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
107
0
        debug_assert!(ptr > start_ptr);
108
0
        debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
109
0
        while ptr <= end_ptr.sub(USIZE_BYTES) {
110
0
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
111
112
0
            let chunk = *(ptr as *const usize);
113
0
            let eq1 = contains_zero_byte(chunk ^ vn1);
114
0
            let eq2 = contains_zero_byte(chunk ^ vn2);
115
0
            if eq1 || eq2 {
116
0
                break;
117
0
            }
118
0
            ptr = ptr.add(USIZE_BYTES);
119
        }
120
0
        forward_search(start_ptr, end_ptr, ptr, confirm)
121
    }
122
0
}
123
124
/// Like `memchr`, but searches for three bytes instead of one.
125
0
pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
126
0
    let vn1 = repeat_byte(n1);
127
0
    let vn2 = repeat_byte(n2);
128
0
    let vn3 = repeat_byte(n3);
129
0
    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
130
0
    let align = USIZE_BYTES - 1;
131
0
    let start_ptr = haystack.as_ptr();
132
0
    let mut ptr = start_ptr;
133
134
    unsafe {
135
0
        let end_ptr = start_ptr.add(haystack.len());
136
0
        if haystack.len() < USIZE_BYTES {
137
0
            return forward_search(start_ptr, end_ptr, ptr, confirm);
138
0
        }
139
140
0
        let chunk = (ptr as *const usize).read_unaligned();
141
0
        let eq1 = contains_zero_byte(chunk ^ vn1);
142
0
        let eq2 = contains_zero_byte(chunk ^ vn2);
143
0
        let eq3 = contains_zero_byte(chunk ^ vn3);
144
0
        if eq1 || eq2 || eq3 {
145
0
            return forward_search(start_ptr, end_ptr, ptr, confirm);
146
0
        }
147
148
0
        ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
149
0
        debug_assert!(ptr > start_ptr);
150
0
        debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
151
0
        while ptr <= end_ptr.sub(USIZE_BYTES) {
152
0
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
153
154
0
            let chunk = *(ptr as *const usize);
155
0
            let eq1 = contains_zero_byte(chunk ^ vn1);
156
0
            let eq2 = contains_zero_byte(chunk ^ vn2);
157
0
            let eq3 = contains_zero_byte(chunk ^ vn3);
158
0
            if eq1 || eq2 || eq3 {
159
0
                break;
160
0
            }
161
0
            ptr = ptr.add(USIZE_BYTES);
162
        }
163
0
        forward_search(start_ptr, end_ptr, ptr, confirm)
164
    }
165
0
}
166
167
/// Return the last index matching the byte `x` in `text`.
168
0
pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
169
0
    let vn1 = repeat_byte(n1);
170
0
    let confirm = |byte| byte == n1;
171
0
    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
172
0
    let align = USIZE_BYTES - 1;
173
0
    let start_ptr = haystack.as_ptr();
174
175
    unsafe {
176
0
        let end_ptr = start_ptr.add(haystack.len());
177
0
        let mut ptr = end_ptr;
178
0
        if haystack.len() < USIZE_BYTES {
179
0
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
180
0
        }
181
182
0
        let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
183
0
        if contains_zero_byte(chunk ^ vn1) {
184
0
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
185
0
        }
186
187
0
        ptr = (end_ptr as usize & !align) as *const u8;
188
0
        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
189
0
        while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
190
0
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
191
192
0
            let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
193
0
            let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
194
0
            let eqa = contains_zero_byte(a ^ vn1);
195
0
            let eqb = contains_zero_byte(b ^ vn1);
196
0
            if eqa || eqb {
197
0
                break;
198
0
            }
199
0
            ptr = ptr.sub(loop_size);
200
        }
201
0
        reverse_search(start_ptr, end_ptr, ptr, confirm)
202
    }
203
0
}
204
205
/// Like `memrchr`, but searches for two bytes instead of one.
206
0
pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
207
0
    let vn1 = repeat_byte(n1);
208
0
    let vn2 = repeat_byte(n2);
209
0
    let confirm = |byte| byte == n1 || byte == n2;
210
0
    let align = USIZE_BYTES - 1;
211
0
    let start_ptr = haystack.as_ptr();
212
213
    unsafe {
214
0
        let end_ptr = start_ptr.add(haystack.len());
215
0
        let mut ptr = end_ptr;
216
0
        if haystack.len() < USIZE_BYTES {
217
0
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
218
0
        }
219
220
0
        let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
221
0
        let eq1 = contains_zero_byte(chunk ^ vn1);
222
0
        let eq2 = contains_zero_byte(chunk ^ vn2);
223
0
        if eq1 || eq2 {
224
0
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
225
0
        }
226
227
0
        ptr = (end_ptr as usize & !align) as *const u8;
228
0
        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
229
0
        while ptr >= start_ptr.add(USIZE_BYTES) {
230
0
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
231
232
0
            let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
233
0
            let eq1 = contains_zero_byte(chunk ^ vn1);
234
0
            let eq2 = contains_zero_byte(chunk ^ vn2);
235
0
            if eq1 || eq2 {
236
0
                break;
237
0
            }
238
0
            ptr = ptr.sub(USIZE_BYTES);
239
        }
240
0
        reverse_search(start_ptr, end_ptr, ptr, confirm)
241
    }
242
0
}
243
244
/// Like `memrchr`, but searches for three bytes instead of one.
245
0
pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
246
0
    let vn1 = repeat_byte(n1);
247
0
    let vn2 = repeat_byte(n2);
248
0
    let vn3 = repeat_byte(n3);
249
0
    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
250
0
    let align = USIZE_BYTES - 1;
251
0
    let start_ptr = haystack.as_ptr();
252
253
    unsafe {
254
0
        let end_ptr = start_ptr.add(haystack.len());
255
0
        let mut ptr = end_ptr;
256
0
        if haystack.len() < USIZE_BYTES {
257
0
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
258
0
        }
259
260
0
        let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned();
261
0
        let eq1 = contains_zero_byte(chunk ^ vn1);
262
0
        let eq2 = contains_zero_byte(chunk ^ vn2);
263
0
        let eq3 = contains_zero_byte(chunk ^ vn3);
264
0
        if eq1 || eq2 || eq3 {
265
0
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
266
0
        }
267
268
0
        ptr = (end_ptr as usize & !align) as *const u8;
269
0
        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
270
0
        while ptr >= start_ptr.add(USIZE_BYTES) {
271
0
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
272
273
0
            let chunk = *(ptr.sub(USIZE_BYTES) as *const usize);
274
0
            let eq1 = contains_zero_byte(chunk ^ vn1);
275
0
            let eq2 = contains_zero_byte(chunk ^ vn2);
276
0
            let eq3 = contains_zero_byte(chunk ^ vn3);
277
0
            if eq1 || eq2 || eq3 {
278
0
                break;
279
0
            }
280
0
            ptr = ptr.sub(USIZE_BYTES);
281
        }
282
0
        reverse_search(start_ptr, end_ptr, ptr, confirm)
283
    }
284
0
}
285
286
#[inline(always)]
287
0
unsafe fn forward_search<F: Fn(u8) -> bool>(
288
0
    start_ptr: *const u8,
289
0
    end_ptr: *const u8,
290
0
    mut ptr: *const u8,
291
0
    confirm: F,
292
0
) -> Option<usize> {
293
0
    debug_assert!(start_ptr <= ptr);
294
0
    debug_assert!(ptr <= end_ptr);
295
296
0
    while ptr < end_ptr {
297
0
        if confirm(*ptr) {
298
0
            return Some(sub(ptr, start_ptr));
299
0
        }
300
0
        ptr = ptr.offset(1);
301
    }
302
0
    None
303
0
}
304
305
#[inline(always)]
306
0
unsafe fn reverse_search<F: Fn(u8) -> bool>(
307
0
    start_ptr: *const u8,
308
0
    end_ptr: *const u8,
309
0
    mut ptr: *const u8,
310
0
    confirm: F,
311
0
) -> Option<usize> {
312
0
    debug_assert!(start_ptr <= ptr);
313
0
    debug_assert!(ptr <= end_ptr);
314
315
0
    while ptr > start_ptr {
316
0
        ptr = ptr.offset(-1);
317
0
        if confirm(*ptr) {
318
0
            return Some(sub(ptr, start_ptr));
319
0
        }
320
    }
321
0
    None
322
0
}
323
324
/// Subtract `b` from `a` and return the difference. `a` should be greater than
325
/// or equal to `b`.
326
0
fn sub(a: *const u8, b: *const u8) -> usize {
327
0
    debug_assert!(a >= b);
328
0
    (a as usize) - (b as usize)
329
0
}