Coverage Report

Created: 2025-06-16 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.2.0/src/util/start.rs
Line
Count
Source (jump to first uncovered line)
1
/// Represents the four possible starting configurations of a DFA search.
2
///
3
/// The starting configuration is determined by inspecting the the beginning of
4
/// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if
5
/// specified) is what selects the start state to use in a DFA.
6
///
7
/// In a DFA that doesn't have starting states for each pattern, then it will
8
/// have a maximum of four DFA start states. If the DFA was compiled with start
9
/// states for each pattern, then it will have a maximum of four DFA start
10
/// states for searching for any pattern, and then another maximum of four DFA
11
/// start states for executing an anchored search for each pattern.
12
///
13
/// This ends up being represented as a table in the DFA (whether lazy or fully
14
/// built) where the stride of that table is 4, and each entry is an index into
15
/// the state transition table. Note though that multiple entries in the table
16
/// might point to the same state if the states would otherwise be equivalent.
17
/// (This is guaranteed by DFA minimization and may even be accomplished by
18
/// normal determinization, since it attempts to reuse equivalent states too.)
19
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
20
pub(crate) enum Start {
21
    /// This occurs when the starting position is not any of the ones below.
22
    NonWordByte = 0,
23
    /// This occurs when the byte immediately preceding the start of the search
24
    /// is an ASCII word byte.
25
    WordByte = 1,
26
    /// This occurs when the starting position of the search corresponds to the
27
    /// beginning of the haystack.
28
    Text = 2,
29
    /// This occurs when the byte immediately preceding the start of the search
30
    /// is a line terminator. Specifically, `\n`.
31
    Line = 3,
32
}
33
34
impl Start {
35
    /// Return the starting state corresponding to the given integer. If no
36
    /// starting state exists for the given integer, then None is returned.
37
0
    pub(crate) fn from_usize(n: usize) -> Option<Start> {
38
0
        match n {
39
0
            0 => Some(Start::NonWordByte),
40
0
            1 => Some(Start::WordByte),
41
0
            2 => Some(Start::Text),
42
0
            3 => Some(Start::Line),
43
0
            _ => None,
44
        }
45
0
    }
46
47
    /// Returns the total number of starting state configurations.
48
0
    pub(crate) fn count() -> usize {
49
0
        4
50
0
    }
51
52
    /// Returns the starting state configuration for the given search
53
    /// parameters. If the given offset range is not valid, then this panics.
54
    #[inline(always)]
55
0
    pub(crate) fn from_position_fwd(
56
0
        bytes: &[u8],
57
0
        start: usize,
58
0
        end: usize,
59
0
    ) -> Start {
60
0
        assert!(
61
0
            bytes.get(start..end).is_some(),
62
0
            "{}..{} is invalid",
63
            start,
64
            end
65
        );
66
0
        if start == 0 {
67
0
            Start::Text
68
0
        } else if bytes[start - 1] == b'\n' {
69
0
            Start::Line
70
0
        } else if crate::util::is_word_byte(bytes[start - 1]) {
71
0
            Start::WordByte
72
        } else {
73
0
            Start::NonWordByte
74
        }
75
0
    }
76
77
    /// Returns the starting state configuration for a reverse search with the
78
    /// given search parameters. If the given offset range is not valid, then
79
    /// this panics.
80
    #[inline(always)]
81
0
    pub(crate) fn from_position_rev(
82
0
        bytes: &[u8],
83
0
        start: usize,
84
0
        end: usize,
85
0
    ) -> Start {
86
0
        assert!(
87
0
            bytes.get(start..end).is_some(),
88
0
            "{}..{} is invalid",
89
            start,
90
            end
91
        );
92
0
        if end == bytes.len() {
93
0
            Start::Text
94
0
        } else if bytes[end] == b'\n' {
95
0
            Start::Line
96
0
        } else if crate::util::is_word_byte(bytes[end]) {
97
0
            Start::WordByte
98
        } else {
99
0
            Start::NonWordByte
100
        }
101
0
    }
102
103
    /// Return this starting configuration as an integer. It is guaranteed to
104
    /// be less than `Start::count()`.
105
    #[inline(always)]
106
0
    pub(crate) fn as_usize(&self) -> usize {
107
0
        *self as usize
108
0
    }
109
}