Coverage Report

Created: 2024-12-17 06:15

/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.4.9/src/util/start.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
Provides helpers for dealing with start state configurations in DFAs.
3
*/
4
5
use crate::util::{
6
    look::LookMatcher,
7
    search::{Anchored, Input},
8
    wire::{self, DeserializeError, SerializeError},
9
};
10
11
/// The configuration used to determine a DFA's start state for a search.
12
///
13
/// A DFA has a single starting state in the typical textbook description. That
14
/// is, it corresponds to the set of all starting states for the NFA that built
15
/// it, along with their espsilon closures. In this crate, however, DFAs have
16
/// many possible start states due to a few factors:
17
///
18
/// * DFAs support the ability to run either anchored or unanchored searches.
19
/// Each type of search needs its own start state. For example, an unanchored
20
/// search requires starting at a state corresponding to a regex with a
21
/// `(?s-u:.)*?` prefix, which will match through anything.
22
/// * DFAs also optionally support starting an anchored search for any one
23
/// specific pattern. Each such pattern requires its own start state.
24
/// * If a look-behind assertion like `^` or `\b` is used in the regex, then
25
/// the DFA will need to inspect a single byte immediately before the start of
26
/// the search to choose the correct start state.
27
///
28
/// Indeed, this configuration precisely encapsulates all of the above factors.
29
/// The [`Config::anchored`] method sets which kind of anchored search to
30
/// perform while the [`Config::look_behind`] method provides a way to set
31
/// the byte that occurs immediately before the start of the search.
32
///
33
/// Generally speaking, this type is only useful when you want to run searches
34
/// without using an [`Input`]. In particular, an `Input` wants a haystack
35
/// slice, but callers may not have a contiguous sequence of bytes as a
36
/// haystack in all cases. This type provides a lower level of control such
37
/// that callers can provide their own anchored configuration and look-behind
38
/// byte explicitly.
39
///
40
/// # Example
41
///
42
/// This shows basic usage that permits running a search with a DFA without
43
/// using the `Input` abstraction.
44
///
45
/// ```
46
/// use regex_automata::{
47
///     dfa::{Automaton, dense},
48
///     util::start,
49
///     Anchored,
50
/// };
51
///
52
/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
53
/// let haystack = "quartz";
54
///
55
/// let config = start::Config::new().anchored(Anchored::Yes);
56
/// let mut state = dfa.start_state(&config)?;
57
/// for &b in haystack.as_bytes().iter() {
58
///     state = dfa.next_state(state, b);
59
/// }
60
/// state = dfa.next_eoi_state(state);
61
/// assert!(dfa.is_match_state(state));
62
///
63
/// # Ok::<(), Box<dyn std::error::Error>>(())
64
/// ```
65
///
66
/// This example shows how to correctly run a search that doesn't begin at
67
/// the start of a haystack. Notice how we set the look-behind byte, and as
68
/// a result, the `\b` assertion does not match.
69
///
70
/// ```
71
/// use regex_automata::{
72
///     dfa::{Automaton, dense},
73
///     util::start,
74
///     Anchored,
75
/// };
76
///
77
/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
78
/// let haystack = "quartz";
79
///
80
/// let config = start::Config::new()
81
///     .anchored(Anchored::Yes)
82
///     .look_behind(Some(b'q'));
83
/// let mut state = dfa.start_state(&config)?;
84
/// for &b in haystack.as_bytes().iter().skip(1) {
85
///     state = dfa.next_state(state, b);
86
/// }
87
/// state = dfa.next_eoi_state(state);
88
/// // No match!
89
/// assert!(!dfa.is_match_state(state));
90
///
91
/// # Ok::<(), Box<dyn std::error::Error>>(())
92
/// ```
93
///
94
/// If we had instead not set a look-behind byte, then the DFA would assume
95
/// that it was starting at the beginning of the haystack, and thus `\b` should
96
/// match. This in turn would result in erroneously reporting a match:
97
///
98
/// ```
99
/// use regex_automata::{
100
///     dfa::{Automaton, dense},
101
///     util::start,
102
///     Anchored,
103
/// };
104
///
105
/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
106
/// let haystack = "quartz";
107
///
108
/// // Whoops, forgot the look-behind byte...
109
/// let config = start::Config::new().anchored(Anchored::Yes);
110
/// let mut state = dfa.start_state(&config)?;
111
/// for &b in haystack.as_bytes().iter().skip(1) {
112
///     state = dfa.next_state(state, b);
113
/// }
114
/// state = dfa.next_eoi_state(state);
115
/// // And now we get a match unexpectedly.
116
/// assert!(dfa.is_match_state(state));
117
///
118
/// # Ok::<(), Box<dyn std::error::Error>>(())
119
/// ```
120
#[derive(Clone, Debug)]
121
pub struct Config {
122
    look_behind: Option<u8>,
123
    anchored: Anchored,
124
}
125
126
impl Config {
127
    /// Create a new default start configuration.
128
    ///
129
    /// The default is an unanchored search that starts at the beginning of the
130
    /// haystack.
131
0
    pub fn new() -> Config {
132
0
        Config { anchored: Anchored::No, look_behind: None }
133
0
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::new
Unexecuted instantiation: <regex_automata::util::start::Config>::new
134
135
    /// A convenience routine for building a start configuration from an
136
    /// [`Input`] for a forward search.
137
    ///
138
    /// This automatically sets the look-behind byte to the byte immediately
139
    /// preceding the start of the search. If the start of the search is at
140
    /// offset `0`, then no look-behind byte is set.
141
16.3k
    pub fn from_input_forward(input: &Input<'_>) -> Config {
142
16.3k
        let look_behind = input
143
16.3k
            .start()
144
16.3k
            .checked_sub(1)
145
16.3k
            .and_then(|i| input.haystack().get(i).copied());
Unexecuted instantiation: <regex_automata::util::start::Config>::from_input_forward::{closure#0}
Unexecuted instantiation: <regex_automata::util::start::Config>::from_input_forward::{closure#0}
146
16.3k
        Config { look_behind, anchored: input.get_anchored() }
147
16.3k
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::from_input_forward
<regex_automata::util::start::Config>::from_input_forward
Line
Count
Source
141
16.3k
    pub fn from_input_forward(input: &Input<'_>) -> Config {
142
16.3k
        let look_behind = input
143
16.3k
            .start()
144
16.3k
            .checked_sub(1)
145
16.3k
            .and_then(|i| input.haystack().get(i).copied());
146
16.3k
        Config { look_behind, anchored: input.get_anchored() }
147
16.3k
    }
148
149
    /// A convenience routine for building a start configuration from an
150
    /// [`Input`] for a reverse search.
151
    ///
152
    /// This automatically sets the look-behind byte to the byte immediately
153
    /// following the end of the search. If the end of the search is at
154
    /// offset `haystack.len()`, then no look-behind byte is set.
155
0
    pub fn from_input_reverse(input: &Input<'_>) -> Config {
156
0
        let look_behind = input.haystack().get(input.end()).copied();
157
0
        Config { look_behind, anchored: input.get_anchored() }
158
0
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::from_input_reverse
Unexecuted instantiation: <regex_automata::util::start::Config>::from_input_reverse
159
160
    /// Set the look-behind byte at the start of a search.
161
    ///
162
    /// Unless the search is intended to logically start at the beginning of a
163
    /// haystack, this should _always_ be set to the byte immediately preceding
164
    /// the start of the search. If no look-behind byte is set, then the start
165
    /// configuration will assume it is at the beginning of the haystack. For
166
    /// example, the anchor `^` will match.
167
    ///
168
    /// The default is that no look-behind byte is set.
169
0
    pub fn look_behind(mut self, byte: Option<u8>) -> Config {
170
0
        self.look_behind = byte;
171
0
        self
172
0
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::look_behind
Unexecuted instantiation: <regex_automata::util::start::Config>::look_behind
173
174
    /// Set the anchored mode of a search.
175
    ///
176
    /// The default is an unanchored search.
177
0
    pub fn anchored(mut self, mode: Anchored) -> Config {
178
0
        self.anchored = mode;
179
0
        self
180
0
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::anchored
Unexecuted instantiation: <regex_automata::util::start::Config>::anchored
181
182
    /// Return the look-behind byte in this configuration, if one exists.
183
16.3k
    pub fn get_look_behind(&self) -> Option<u8> {
184
16.3k
        self.look_behind
185
16.3k
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::get_look_behind
<regex_automata::util::start::Config>::get_look_behind
Line
Count
Source
183
16.3k
    pub fn get_look_behind(&self) -> Option<u8> {
184
16.3k
        self.look_behind
185
16.3k
    }
186
187
    /// Return the anchored mode in this configuration.
188
16.3k
    pub fn get_anchored(&self) -> Anchored {
189
16.3k
        self.anchored
190
16.3k
    }
Unexecuted instantiation: <regex_automata::util::start::Config>::get_anchored
<regex_automata::util::start::Config>::get_anchored
Line
Count
Source
188
16.3k
    pub fn get_anchored(&self) -> Anchored {
189
16.3k
        self.anchored
190
16.3k
    }
191
}
192
193
/// A map from every possible byte value to its corresponding starting
194
/// configuration.
195
///
196
/// This map is used in order to lookup the start configuration for a particular
197
/// position in a haystack. This start configuration is then used in
198
/// combination with things like the anchored mode and pattern ID to fully
199
/// determine the start state.
200
///
201
/// Generally speaking, this map is only used for fully compiled DFAs and lazy
202
/// DFAs. For NFAs (including the one-pass DFA), the start state is generally
203
/// selected by virtue of traversing the NFA state graph. DFAs do the same
204
/// thing, but at build time and not search time. (Well, technically the lazy
205
/// DFA does it at search time, but it does enough work to cache the full
206
/// result of the epsilon closure that the NFA engines tend to need to do.)
207
#[derive(Clone)]
208
pub(crate) struct StartByteMap {
209
    map: [Start; 256],
210
}
211
212
impl StartByteMap {
213
    /// Create a new map from byte values to their corresponding starting
214
    /// configurations. The map is determined, in part, by how look-around
215
    /// assertions are matched via the matcher given.
216
2
    pub(crate) fn new(lookm: &LookMatcher) -> StartByteMap {
217
2
        let mut map = [Start::NonWordByte; 256];
218
2
        map[usize::from(b'\n')] = Start::LineLF;
219
2
        map[usize::from(b'\r')] = Start::LineCR;
220
2
        map[usize::from(b'_')] = Start::WordByte;
221
2
222
2
        let mut byte = b'0';
223
22
        while byte <= b'9' {
224
20
            map[usize::from(byte)] = Start::WordByte;
225
20
            byte += 1;
226
20
        }
227
2
        byte = b'A';
228
54
        while byte <= b'Z' {
229
52
            map[usize::from(byte)] = Start::WordByte;
230
52
            byte += 1;
231
52
        }
232
2
        byte = b'a';
233
54
        while byte <= b'z' {
234
52
            map[usize::from(byte)] = Start::WordByte;
235
52
            byte += 1;
236
52
        }
237
238
2
        let lineterm = lookm.get_line_terminator();
239
2
        // If our line terminator is normal, then it is already handled by
240
2
        // the LineLF and LineCR configurations. But if it's weird, then we
241
2
        // overwrite whatever was there before for that terminator with a
242
2
        // special configuration. The trick here is that if the terminator
243
2
        // is, say, a word byte like `a`, then callers seeing this start
244
2
        // configuration need to account for that and build their DFA state as
245
2
        // if it *also* came from a word byte.
246
2
        if lineterm != b'\r' && lineterm != b'\n' {
247
0
            map[usize::from(lineterm)] = Start::CustomLineTerminator;
248
2
        }
249
2
        StartByteMap { map }
250
2
    }
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::new
<regex_automata::util::start::StartByteMap>::new
Line
Count
Source
216
2
    pub(crate) fn new(lookm: &LookMatcher) -> StartByteMap {
217
2
        let mut map = [Start::NonWordByte; 256];
218
2
        map[usize::from(b'\n')] = Start::LineLF;
219
2
        map[usize::from(b'\r')] = Start::LineCR;
220
2
        map[usize::from(b'_')] = Start::WordByte;
221
2
222
2
        let mut byte = b'0';
223
22
        while byte <= b'9' {
224
20
            map[usize::from(byte)] = Start::WordByte;
225
20
            byte += 1;
226
20
        }
227
2
        byte = b'A';
228
54
        while byte <= b'Z' {
229
52
            map[usize::from(byte)] = Start::WordByte;
230
52
            byte += 1;
231
52
        }
232
2
        byte = b'a';
233
54
        while byte <= b'z' {
234
52
            map[usize::from(byte)] = Start::WordByte;
235
52
            byte += 1;
236
52
        }
237
238
2
        let lineterm = lookm.get_line_terminator();
239
2
        // If our line terminator is normal, then it is already handled by
240
2
        // the LineLF and LineCR configurations. But if it's weird, then we
241
2
        // overwrite whatever was there before for that terminator with a
242
2
        // special configuration. The trick here is that if the terminator
243
2
        // is, say, a word byte like `a`, then callers seeing this start
244
2
        // configuration need to account for that and build their DFA state as
245
2
        // if it *also* came from a word byte.
246
2
        if lineterm != b'\r' && lineterm != b'\n' {
247
0
            map[usize::from(lineterm)] = Start::CustomLineTerminator;
248
2
        }
249
2
        StartByteMap { map }
250
2
    }
251
252
    /// Return the starting configuration for the given look-behind byte.
253
    ///
254
    /// If no look-behind exists, callers should use `Start::Text`.
255
    #[cfg_attr(feature = "perf-inline", inline(always))]
256
0
    pub(crate) fn get(&self, byte: u8) -> Start {
257
0
        self.map[usize::from(byte)]
258
0
    }
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::get
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::get
259
260
    /// Deserializes a byte class map from the given slice. If the slice is of
261
    /// insufficient length or otherwise contains an impossible mapping, then
262
    /// an error is returned. Upon success, the number of bytes read along with
263
    /// the map are returned. The number of bytes read is always a multiple of
264
    /// 8.
265
0
    pub(crate) fn from_bytes(
266
0
        slice: &[u8],
267
0
    ) -> Result<(StartByteMap, usize), DeserializeError> {
268
0
        wire::check_slice_len(slice, 256, "start byte map")?;
269
0
        let mut map = [Start::NonWordByte; 256];
270
0
        for (i, &repr) in slice[..256].iter().enumerate() {
271
0
            map[i] = match Start::from_usize(usize::from(repr)) {
272
0
                Some(start) => start,
273
                None => {
274
0
                    return Err(DeserializeError::generic(
275
0
                        "found invalid starting configuration",
276
0
                    ))
277
                }
278
            };
279
        }
280
0
        Ok((StartByteMap { map }, 256))
281
0
    }
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::from_bytes
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::from_bytes
282
283
    /// Writes this map to the given byte buffer. if the given buffer is too
284
    /// small, then an error is returned. Upon success, the total number of
285
    /// bytes written is returned. The number of bytes written is guaranteed to
286
    /// be a multiple of 8.
287
0
    pub(crate) fn write_to(
288
0
        &self,
289
0
        dst: &mut [u8],
290
0
    ) -> Result<usize, SerializeError> {
291
0
        let nwrite = self.write_to_len();
292
0
        if dst.len() < nwrite {
293
0
            return Err(SerializeError::buffer_too_small("start byte map"));
294
0
        }
295
0
        for (i, &start) in self.map.iter().enumerate() {
296
0
            dst[i] = start.as_u8();
297
0
        }
298
0
        Ok(nwrite)
299
0
    }
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::write_to
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::write_to
300
301
    /// Returns the total number of bytes written by `write_to`.
302
0
    pub(crate) fn write_to_len(&self) -> usize {
303
0
        256
304
0
    }
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::write_to_len
Unexecuted instantiation: <regex_automata::util::start::StartByteMap>::write_to_len
305
}
306
307
impl core::fmt::Debug for StartByteMap {
308
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
309
        use crate::util::escape::DebugByte;
310
311
0
        write!(f, "StartByteMap{{")?;
312
0
        for byte in 0..=255 {
313
0
            if byte > 0 {
314
0
                write!(f, ", ")?;
315
0
            }
316
0
            let start = self.map[usize::from(byte)];
317
0
            write!(f, "{:?} => {:?}", DebugByte(byte), start)?;
318
        }
319
0
        write!(f, "}}")?;
320
0
        Ok(())
321
0
    }
Unexecuted instantiation: <regex_automata::util::start::StartByteMap as core::fmt::Debug>::fmt
Unexecuted instantiation: <regex_automata::util::start::StartByteMap as core::fmt::Debug>::fmt
322
}
323
324
/// Represents the six possible starting configurations of a DFA search.
325
///
326
/// The starting configuration is determined by inspecting the beginning
327
/// of the haystack (up to 1 byte). Ultimately, this along with a pattern ID
328
/// (if specified) and the type of search (anchored or not) is what selects the
329
/// start state to use in a DFA.
330
///
331
/// As one example, if a DFA only supports unanchored searches and does not
332
/// support anchored searches for each pattern, then it will have at most 6
333
/// distinct start states. (Some start states may be reused if determinization
334
/// can determine that they will be equivalent.) If the DFA supports both
335
/// anchored and unanchored searches, then it will have a maximum of 12
336
/// distinct start states. Finally, if the DFA also supports anchored searches
337
/// for each pattern, then it can have up to `12 + (N * 6)` start states, where
338
/// `N` is the number of patterns.
339
///
340
/// Handling each of these starting configurations in the context of DFA
341
/// determinization can be *quite* tricky and subtle. But the code is small
342
/// and can be found at `crate::util::determinize::set_lookbehind_from_start`.
343
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
344
pub(crate) enum Start {
345
    /// This occurs when the starting position is not any of the ones below.
346
    NonWordByte = 0,
347
    /// This occurs when the byte immediately preceding the start of the search
348
    /// is an ASCII word byte.
349
    WordByte = 1,
350
    /// This occurs when the starting position of the search corresponds to the
351
    /// beginning of the haystack.
352
    Text = 2,
353
    /// This occurs when the byte immediately preceding the start of the search
354
    /// is a line terminator. Specifically, `\n`.
355
    LineLF = 3,
356
    /// This occurs when the byte immediately preceding the start of the search
357
    /// is a line terminator. Specifically, `\r`.
358
    LineCR = 4,
359
    /// This occurs when a custom line terminator has been set via a
360
    /// `LookMatcher`, and when that line terminator is neither a `\r` or a
361
    /// `\n`.
362
    ///
363
    /// If the custom line terminator is a word byte, then this start
364
    /// configuration is still selected. DFAs that implement word boundary
365
    /// assertions will likely need to check whether the custom line terminator
366
    /// is a word byte, in which case, it should behave as if the byte
367
    /// satisfies `\b` in addition to multi-line anchors.
368
    CustomLineTerminator = 5,
369
}
370
371
impl Start {
372
    /// Return the starting state corresponding to the given integer. If no
373
    /// starting state exists for the given integer, then None is returned.
374
0
    pub(crate) fn from_usize(n: usize) -> Option<Start> {
375
0
        match n {
376
0
            0 => Some(Start::NonWordByte),
377
0
            1 => Some(Start::WordByte),
378
0
            2 => Some(Start::Text),
379
0
            3 => Some(Start::LineLF),
380
0
            4 => Some(Start::LineCR),
381
0
            5 => Some(Start::CustomLineTerminator),
382
0
            _ => None,
383
        }
384
0
    }
Unexecuted instantiation: <regex_automata::util::start::Start>::from_usize
Unexecuted instantiation: <regex_automata::util::start::Start>::from_usize
385
386
    /// Returns the total number of starting state configurations.
387
8
    pub(crate) fn len() -> usize {
388
8
        6
389
8
    }
Unexecuted instantiation: <regex_automata::util::start::Start>::len
<regex_automata::util::start::Start>::len
Line
Count
Source
387
8
    pub(crate) fn len() -> usize {
388
8
        6
389
8
    }
390
391
    /// Return this starting configuration as `u8` integer. It is guaranteed to
392
    /// be less than `Start::len()`.
393
    #[cfg_attr(feature = "perf-inline", inline(always))]
394
16.3k
    pub(crate) fn as_u8(&self) -> u8 {
395
16.3k
        // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
396
16.3k
        // actual int.
397
16.3k
        *self as u8
398
16.3k
    }
Unexecuted instantiation: <regex_automata::util::start::Start>::as_u8
<regex_automata::util::start::Start>::as_u8
Line
Count
Source
394
16.3k
    pub(crate) fn as_u8(&self) -> u8 {
395
16.3k
        // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
396
16.3k
        // actual int.
397
16.3k
        *self as u8
398
16.3k
    }
399
400
    /// Return this starting configuration as a `usize` integer. It is
401
    /// guaranteed to be less than `Start::len()`.
402
    #[cfg_attr(feature = "perf-inline", inline(always))]
403
16.3k
    pub(crate) fn as_usize(&self) -> usize {
404
16.3k
        usize::from(self.as_u8())
405
16.3k
    }
Unexecuted instantiation: <regex_automata::util::start::Start>::as_usize
<regex_automata::util::start::Start>::as_usize
Line
Count
Source
403
16.3k
    pub(crate) fn as_usize(&self) -> usize {
404
16.3k
        usize::from(self.as_u8())
405
16.3k
    }
406
}
407
408
#[cfg(test)]
409
mod tests {
410
    use super::*;
411
412
    #[test]
413
    fn start_fwd_done_range() {
414
        let smap = StartByteMap::new(&LookMatcher::default());
415
        let input = Input::new("").range(1..0);
416
        let config = Config::from_input_forward(&input);
417
        let start =
418
            config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
419
        assert_eq!(Start::Text, start);
420
    }
421
422
    #[test]
423
    fn start_rev_done_range() {
424
        let smap = StartByteMap::new(&LookMatcher::default());
425
        let input = Input::new("").range(1..0);
426
        let config = Config::from_input_reverse(&input);
427
        let start =
428
            config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
429
        assert_eq!(Start::Text, start);
430
    }
431
432
    #[test]
433
    fn start_fwd() {
434
        let f = |haystack, start, end| {
435
            let smap = StartByteMap::new(&LookMatcher::default());
436
            let input = Input::new(haystack).range(start..end);
437
            let config = Config::from_input_forward(&input);
438
            let start =
439
                config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
440
            start
441
        };
442
443
        assert_eq!(Start::Text, f("", 0, 0));
444
        assert_eq!(Start::Text, f("abc", 0, 3));
445
        assert_eq!(Start::Text, f("\nabc", 0, 3));
446
447
        assert_eq!(Start::LineLF, f("\nabc", 1, 3));
448
449
        assert_eq!(Start::LineCR, f("\rabc", 1, 3));
450
451
        assert_eq!(Start::WordByte, f("abc", 1, 3));
452
453
        assert_eq!(Start::NonWordByte, f(" abc", 1, 3));
454
    }
455
456
    #[test]
457
    fn start_rev() {
458
        let f = |haystack, start, end| {
459
            let smap = StartByteMap::new(&LookMatcher::default());
460
            let input = Input::new(haystack).range(start..end);
461
            let config = Config::from_input_reverse(&input);
462
            let start =
463
                config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
464
            start
465
        };
466
467
        assert_eq!(Start::Text, f("", 0, 0));
468
        assert_eq!(Start::Text, f("abc", 0, 3));
469
        assert_eq!(Start::Text, f("abc\n", 0, 4));
470
471
        assert_eq!(Start::LineLF, f("abc\nz", 0, 3));
472
473
        assert_eq!(Start::LineCR, f("abc\rz", 0, 3));
474
475
        assert_eq!(Start::WordByte, f("abc", 0, 2));
476
477
        assert_eq!(Start::NonWordByte, f("abc ", 0, 3));
478
    }
479
}