/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.2.0/src/util/start.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /// Represents the four possible starting configurations of a DFA search. |
2 | | /// |
3 | | /// The starting configuration is determined by inspecting the the beginning of |
4 | | /// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if |
5 | | /// specified) is what selects the start state to use in a DFA. |
6 | | /// |
7 | | /// In a DFA that doesn't have starting states for each pattern, then it will |
8 | | /// have a maximum of four DFA start states. If the DFA was compiled with start |
9 | | /// states for each pattern, then it will have a maximum of four DFA start |
10 | | /// states for searching for any pattern, and then another maximum of four DFA |
11 | | /// start states for executing an anchored search for each pattern. |
12 | | /// |
13 | | /// This ends up being represented as a table in the DFA (whether lazy or fully |
14 | | /// built) where the stride of that table is 4, and each entry is an index into |
15 | | /// the state transition table. Note though that multiple entries in the table |
16 | | /// might point to the same state if the states would otherwise be equivalent. |
17 | | /// (This is guaranteed by DFA minimization and may even be accomplished by |
18 | | /// normal determinization, since it attempts to reuse equivalent states too.) |
19 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
20 | | pub(crate) enum Start { |
21 | | /// This occurs when the starting position is not any of the ones below. |
22 | | NonWordByte = 0, |
23 | | /// This occurs when the byte immediately preceding the start of the search |
24 | | /// is an ASCII word byte. |
25 | | WordByte = 1, |
26 | | /// This occurs when the starting position of the search corresponds to the |
27 | | /// beginning of the haystack. |
28 | | Text = 2, |
29 | | /// This occurs when the byte immediately preceding the start of the search |
30 | | /// is a line terminator. Specifically, `\n`. |
31 | | Line = 3, |
32 | | } |
33 | | |
34 | | impl Start { |
35 | | /// Return the starting state corresponding to the given integer. If no |
36 | | /// starting state exists for the given integer, then None is returned. |
37 | 0 | pub(crate) fn from_usize(n: usize) -> Option<Start> { |
38 | 0 | match n { |
39 | 0 | 0 => Some(Start::NonWordByte), |
40 | 0 | 1 => Some(Start::WordByte), |
41 | 0 | 2 => Some(Start::Text), |
42 | 0 | 3 => Some(Start::Line), |
43 | 0 | _ => None, |
44 | | } |
45 | 0 | } |
46 | | |
47 | | /// Returns the total number of starting state configurations. |
48 | 0 | pub(crate) fn count() -> usize { |
49 | 0 | 4 |
50 | 0 | } |
51 | | |
52 | | /// Returns the starting state configuration for the given search |
53 | | /// parameters. If the given offset range is not valid, then this panics. |
54 | | #[inline(always)] |
55 | 0 | pub(crate) fn from_position_fwd( |
56 | 0 | bytes: &[u8], |
57 | 0 | start: usize, |
58 | 0 | end: usize, |
59 | 0 | ) -> Start { |
60 | 0 | assert!( |
61 | 0 | bytes.get(start..end).is_some(), |
62 | 0 | "{}..{} is invalid", |
63 | | start, |
64 | | end |
65 | | ); |
66 | 0 | if start == 0 { |
67 | 0 | Start::Text |
68 | 0 | } else if bytes[start - 1] == b'\n' { |
69 | 0 | Start::Line |
70 | 0 | } else if crate::util::is_word_byte(bytes[start - 1]) { |
71 | 0 | Start::WordByte |
72 | | } else { |
73 | 0 | Start::NonWordByte |
74 | | } |
75 | 0 | } |
76 | | |
77 | | /// Returns the starting state configuration for a reverse search with the |
78 | | /// given search parameters. If the given offset range is not valid, then |
79 | | /// this panics. |
80 | | #[inline(always)] |
81 | 0 | pub(crate) fn from_position_rev( |
82 | 0 | bytes: &[u8], |
83 | 0 | start: usize, |
84 | 0 | end: usize, |
85 | 0 | ) -> Start { |
86 | 0 | assert!( |
87 | 0 | bytes.get(start..end).is_some(), |
88 | 0 | "{}..{} is invalid", |
89 | | start, |
90 | | end |
91 | | ); |
92 | 0 | if end == bytes.len() { |
93 | 0 | Start::Text |
94 | 0 | } else if bytes[end] == b'\n' { |
95 | 0 | Start::Line |
96 | 0 | } else if crate::util::is_word_byte(bytes[end]) { |
97 | 0 | Start::WordByte |
98 | | } else { |
99 | 0 | Start::NonWordByte |
100 | | } |
101 | 0 | } |
102 | | |
103 | | /// Return this starting configuration as an integer. It is guaranteed to |
104 | | /// be less than `Start::count()`. |
105 | | #[inline(always)] |
106 | 0 | pub(crate) fn as_usize(&self) -> usize { |
107 | 0 | *self as usize |
108 | 0 | } |
109 | | } |