/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-automata-0.2.0/src/dfa/regex.rs

Source
/*!
A DFA-backed `Regex`.

This module provides [`Regex`], which is defined generically over the
[`Automaton`] trait. A `Regex` implements convenience routines you might have
come to expect, such as finding the start/end of a match and iterating over
all non-overlapping matches. This `Regex` type is limited in its capabilities
to what a DFA can provide. Therefore, APIs involving capturing groups, for
example, are not provided.

Internally, a `Regex` is composed of two DFAs. One is a "forward" DFA that
finds the end offset of a match, where as the other is a "reverse" DFA that
find the start offset of a match.

See the [parent module](crate::dfa) for examples.
*/

#[cfg(feature = "alloc")]
use alloc::vec::Vec;

use crate::{
    dfa::automaton::{Automaton, OverlappingState},
    util::prefilter::{self, Prefilter},
    MatchError, MultiMatch,
};
#[cfg(feature = "alloc")]
use crate::{
    dfa::{dense, error::Error, sparse},
    nfa::thompson,
    util::matchtypes::MatchKind,
};

// When the alloc feature is enabled, the regex type sets its A type parameter
// to default to an owned dense DFA. But without alloc, we set no default. This
// makes things a lot more convenient in the common case, since writing out the
// DFA types is pretty annoying.
//
// Since we have two different definitions but only want to write one doc
// string, we use a macro to capture the doc and other attributes once and then
// repeat them for each definition.
macro_rules! define_regex_type {
    ($(#[$doc:meta])*) => {
        #[cfg(feature = "alloc")]
        $(#[$doc])*
        pub struct Regex<A = dense::OwnedDFA, P = prefilter::None> {
            prefilter: Option<P>,
            forward: A,
            reverse: A,
            utf8: bool,
        }

        #[cfg(not(feature = "alloc"))]
        $(#[$doc])*
        pub struct Regex<A, P = prefilter::None> {
            prefilter: Option<P>,
            forward: A,
            reverse: A,
            utf8: bool,
        }
    };
}

define_regex_type!(
    /// A regular expression that uses deterministic finite automata for fast
    /// searching.
    ///
    /// A regular expression is comprised of two DFAs, a "forward" DFA and a
    /// "reverse" DFA. The forward DFA is responsible for detecting the end of
    /// a match while the reverse DFA is responsible for detecting the start
    /// of a match. Thus, in order to find the bounds of any given match, a
    /// forward search must first be run followed by a reverse search. A match
    /// found by the forward DFA guarantees that the reverse DFA will also find
    /// a match.
    ///
    /// The type of the DFA used by a `Regex` corresponds to the `A` type
    /// parameter, which must satisfy the [`Automaton`] trait. Typically,
    /// `A` is either a [`dense::DFA`](crate::dfa::dense::DFA) or a
    /// [`sparse::DFA`](crate::dfa::sparse::DFA), where dense DFAs use more
    /// memory but search faster, while sparse DFAs use less memory but search
    /// more slowly.
    ///
    /// By default, a regex's automaton type parameter is set to
    /// `dense::DFA<Vec<u32>>` when the `alloc` feature is enabled. For most
    /// in-memory work loads, this is the most convenient type that gives the
    /// best search performance. When the `alloc` feature is disabled, no
    /// default type is used.
    ///
    /// A `Regex` also has a `P` type parameter, which is used to select the
    /// prefilter used during search. By default, no prefilter is enabled by
    /// setting the type to default to [`prefilter::None`]. A prefilter can be
    /// enabled by using the [`Regex::prefilter`] method.
    ///
    /// # When should I use this?
    ///
    /// Generally speaking, if you can afford the overhead of building a full
    /// DFA for your regex, and you don't need things like capturing groups,
    /// then this is a good choice if you're looking to optimize for matching
    /// speed. Note however that its speed may be worse than a general purpose
    /// regex engine if you don't select a good [prefilter].
    ///
    /// # Earliest vs Leftmost vs Overlapping
    ///
    /// The search routines exposed on a `Regex` reflect three different ways
    /// of searching:
    ///
    /// * "earliest" means to stop as soon as a match has been detected.
    /// * "leftmost" means to continue matching until the underlying
    ///   automaton cannot advance. This reflects "standard" searching you
    ///   might be used to in other regex engines. e.g., This permits
    ///   non-greedy and greedy searching to work as you would expect.
    /// * "overlapping" means to find all possible matches, even if they
    ///   overlap.
    ///
    /// Generally speaking, when doing an overlapping search, you'll want to
    /// build your regex DFAs with [`MatchKind::All`] semantics. Using
    /// [`MatchKind::LeftmostFirst`] semantics with overlapping searches is
    /// likely to lead to odd behavior since `LeftmostFirst` specifically omits
    /// some matches that can never be reported due to its semantics.
    ///
    /// The following example shows the differences between how these different
    /// types of searches impact looking for matches of `[a-z]+` in the
    /// haystack `abc`.
    ///
    /// ```
    /// use regex_automata::{dfa::{self, dense}, MatchKind, MultiMatch};
    ///
    /// let pattern = r"[a-z]+";
    /// let haystack = "abc".as_bytes();
    ///
    /// // With leftmost-first semantics, we test "earliest" and "leftmost".
    /// let re = dfa::regex::Builder::new()
    ///     .dense(dense::Config::new().match_kind(MatchKind::LeftmostFirst))
    ///     .build(pattern)?;
    ///
    /// // "earliest" searching isn't impacted by greediness
    /// let mut it = re.find_earliest_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 1)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 1, 2)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 2, 3)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// // "leftmost" searching supports greediness (and non-greediness)
    /// let mut it = re.find_leftmost_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// // For overlapping, we want "all" match kind semantics.
    /// let re = dfa::regex::Builder::new()
    ///     .dense(dense::Config::new().match_kind(MatchKind::All))
    ///     .build(pattern)?;
    ///
    /// // In the overlapping search, we find all three possible matches
    /// // starting at the beginning of the haystack.
    /// let mut it = re.find_overlapping_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 1)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 0, 2)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// # Sparse DFAs
    ///
    /// Since a `Regex` is generic over the [`Automaton`] trait, it can be
    /// used with any kind of DFA. While this crate constructs dense DFAs by
    /// default, it is easy enough to build corresponding sparse DFAs, and then
    /// build a regex from them:
    ///
    /// ```
    /// use regex_automata::dfa::regex::Regex;
    ///
    /// // First, build a regex that uses dense DFAs.
    /// let dense_re = Regex::new("foo[0-9]+")?;
    ///
    /// // Second, build sparse DFAs from the forward and reverse dense DFAs.
    /// let fwd = dense_re.forward().to_sparse()?;
    /// let rev = dense_re.reverse().to_sparse()?;
    ///
    /// // Third, build a new regex from the constituent sparse DFAs.
    /// let sparse_re = Regex::builder().build_from_dfas(fwd, rev);
    ///
    /// // A regex that uses sparse DFAs can be used just like with dense DFAs.
    /// assert_eq!(true, sparse_re.is_match(b"foo123"));
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// Alternatively, one can use a [`Builder`] to construct a sparse DFA
    /// more succinctly. (Note though that dense DFAs are still constructed
    /// first internally, and then converted to sparse DFAs, as in the example
    /// above.)
    ///
    /// ```
    /// use regex_automata::dfa::regex::Regex;
    ///
    /// let sparse_re = Regex::builder().build_sparse(r"foo[0-9]+")?;
    /// // A regex that uses sparse DFAs can be used just like with dense DFAs.
    /// assert!(sparse_re.is_match(b"foo123"));
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// # Fallibility
    ///
    /// In non-default configurations, the DFAs generated in this module may
    /// return an error during a search. (Currently, the only way this happens
    /// is if quit bytes are added or Unicode word boundaries are heuristically
    /// enabled, both of which are turned off by default.) For convenience, the
    /// main search routines, like [`find_leftmost`](Regex::find_leftmost),
    /// will panic if an error occurs. However, if you need to use DFAs
    /// which may produce an error at search time, then there are fallible
    /// equivalents of all search routines. For example, for `find_leftmost`,
    /// its fallible analog is [`try_find_leftmost`](Regex::try_find_leftmost).
    /// The routines prefixed with `try_` return `Result<Option<MultiMatch>,
    /// MatchError>`, where as the infallible routines simply return
    /// `Option<MultiMatch>`.
    ///
    /// # Example
    ///
    /// This example shows how to cause a search to terminate if it sees a
    /// `\n` byte, and handle the error returned. This could be useful if, for
    /// example, you wanted to prevent a user supplied pattern from matching
    /// across a line boundary.
    ///
    /// ```
    /// use regex_automata::{dfa::{self, regex::Regex}, MatchError};
    ///
    /// let re = Regex::builder()
    ///     .dense(dfa::dense::Config::new().quit(b'\n', true))
    ///     .build(r"foo\p{any}+bar")?;
    ///
    /// let haystack = "foo\nbar".as_bytes();
    /// // Normally this would produce a match, since \p{any} contains '\n'.
    /// // But since we instructed the automaton to enter a quit state if a
    /// // '\n' is observed, this produces a match error instead.
    /// let expected = MatchError::Quit { byte: 0x0A, offset: 3 };
    /// let got = re.try_find_leftmost(haystack).unwrap_err();
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    #[derive(Clone, Debug)]
);

#[cfg(feature = "alloc")]
impl Regex {
    /// Parse the given regular expression using the default configuration and
    /// return the corresponding regex.
    ///
    /// If you want a non-default configuration, then use the [`Builder`] to
    /// set your own configuration.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// let re = Regex::new("foo[0-9]+bar")?;
    /// assert_eq!(
    ///     Some(MultiMatch::must(0, 3, 14)),
    ///     re.find_leftmost(b"zzzfoo12345barzzz"),
    /// );
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn new(pattern: &str) -> Result<Regex, Error> {
        Builder::new().build(pattern)
    }

    /// Like `new`, but parses multiple patterns into a single "regex set."
    /// This similarly uses the default regex configuration.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// let re = Regex::new_many(&["[a-z]+", "[0-9]+"])?;
    ///
    /// let mut it = re.find_leftmost_iter(b"abc 1 foo 4567 0 quux");
    /// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
    /// assert_eq!(Some(MultiMatch::must(1, 4, 5)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 6, 9)), it.next());
    /// assert_eq!(Some(MultiMatch::must(1, 10, 14)), it.next());
    /// assert_eq!(Some(MultiMatch::must(1, 15, 16)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 17, 21)), it.next());
    /// assert_eq!(None, it.next());
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<Regex, Error> {
        Builder::new().build_many(patterns)
    }
}

#[cfg(feature = "alloc")]
impl Regex<sparse::DFA<Vec<u8>>> {
    /// Parse the given regular expression using the default configuration,
    /// except using sparse DFAs, and return the corresponding regex.
    ///
    /// If you want a non-default configuration, then use the [`Builder`] to
    /// set your own configuration.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// let re = Regex::new_sparse("foo[0-9]+bar")?;
    /// assert_eq!(
    ///     Some(MultiMatch::must(0, 3, 14)),
    ///     re.find_leftmost(b"zzzfoo12345barzzz"),
    /// );
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn new_sparse(
        pattern: &str,
    ) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
        Builder::new().build_sparse(pattern)
    }

    /// Like `new`, but parses multiple patterns into a single "regex set"
    /// using sparse DFAs. This otherwise similarly uses the default regex
    /// configuration.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// let re = Regex::new_many_sparse(&["[a-z]+", "[0-9]+"])?;
    ///
    /// let mut it = re.find_leftmost_iter(b"abc 1 foo 4567 0 quux");
    /// assert_eq!(Some(MultiMatch::must(0, 0, 3)), it.next());
    /// assert_eq!(Some(MultiMatch::must(1, 4, 5)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 6, 9)), it.next());
    /// assert_eq!(Some(MultiMatch::must(1, 10, 14)), it.next());
    /// assert_eq!(Some(MultiMatch::must(1, 15, 16)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 17, 21)), it.next());
    /// assert_eq!(None, it.next());
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn new_many_sparse<P: AsRef<str>>(
        patterns: &[P],
    ) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
        Builder::new().build_many_sparse(patterns)
    }
}

/// Convenience routines for regex construction.
#[cfg(feature = "alloc")]
impl Regex {
    /// Return a default configuration for a `Regex`.
    ///
    /// This is a convenience routine to avoid needing to import the `Config`
    /// type when customizing the construction of a regex.
    ///
    /// # Example
    ///
    /// This example shows how to disable UTF-8 mode for `Regex` iteration.
    /// When UTF-8 mode is disabled, the position immediately following an
    /// empty match is where the next search begins, instead of the next
    /// position of a UTF-8 encoded codepoint.
    ///
    /// ```
    /// use regex_automata::{dfa::regex::Regex, MultiMatch};
    ///
    /// let re = Regex::builder()
    ///     .configure(Regex::config().utf8(false))
    ///     .build(r"")?;
    /// let haystack = "a☃z".as_bytes();
    /// let mut it = re.find_leftmost_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 0)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 1, 1)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 2, 2)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 3, 3)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 4, 4)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 5, 5)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn config() -> Config {
        Config::new()
    }

    /// Return a builder for configuring the construction of a `Regex`.
    ///
    /// This is a convenience routine to avoid needing to import the
    /// [`Builder`] type in common cases.
    ///
    /// # Example
    ///
    /// This example shows how to use the builder to disable UTF-8 mode
    /// everywhere.
    ///
    /// ```
    /// use regex_automata::{
    ///     dfa::regex::Regex,
    ///     nfa::thompson,
    ///     MultiMatch, SyntaxConfig,
    /// };
    ///
    /// let re = Regex::builder()
    ///     .configure(Regex::config().utf8(false))
    ///     .syntax(SyntaxConfig::new().utf8(false))
    ///     .thompson(thompson::Config::new().utf8(false))
    ///     .build(r"foo(?-u:[^b])ar.*")?;
    /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
    /// let expected = Some(MultiMatch::must(0, 1, 9));
    /// let got = re.find_leftmost(haystack);
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn builder() -> Builder {
        Builder::new()
    }
}

/// Standard search routines for finding and iterating over matches.
impl<A: Automaton, P: Prefilter> Regex<A, P> {
    /// Returns true if and only if this regex matches the given haystack.
    ///
    /// This routine may short circuit if it knows that scanning future input
    /// will never lead to a different result. In particular, if the underlying
    /// DFA enters a match state or a dead state, then this routine will return
    /// `true` or `false`, respectively, without inspecting any future input.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_is_match`](Regex::try_is_match).
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::dfa::regex::Regex;
    ///
    /// let re = Regex::new("foo[0-9]+bar")?;
    /// assert_eq!(true, re.is_match(b"foo12345bar"));
    /// assert_eq!(false, re.is_match(b"foobar"));
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn is_match(&self, haystack: &[u8]) -> bool {
        self.is_match_at(haystack, 0, haystack.len())
    }

    /// Returns the first position at which a match is found.
    ///
    /// This routine stops scanning input in precisely the same circumstances
    /// as `is_match`. The key difference is that this routine returns the
    /// position at which it stopped scanning input if and only if a match
    /// was found. If no match is found, then `None` is returned.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_earliest`](Regex::try_find_earliest).
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// // Normally, the leftmost first match would greedily consume as many
    /// // decimal digits as it could. But a match is detected as soon as one
    /// // digit is seen.
    /// let re = Regex::new("foo[0-9]+")?;
    /// assert_eq!(
    ///     Some(MultiMatch::must(0, 0, 4)),
    ///     re.find_earliest(b"foo12345"),
    /// );
    ///
    /// // Normally, the end of the leftmost first match here would be 3,
    /// // but the "earliest" match semantics detect a match earlier.
    /// let re = Regex::new("abc|a")?;
    /// assert_eq!(Some(MultiMatch::must(0, 0, 1)), re.find_earliest(b"abc"));
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn find_earliest(&self, haystack: &[u8]) -> Option<MultiMatch> {
        self.find_earliest_at(haystack, 0, haystack.len())
    }

    /// Returns the start and end offset of the leftmost match. If no match
    /// exists, then `None` is returned.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_leftmost`](Regex::try_find_leftmost).
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// // Greediness is applied appropriately when compared to find_earliest.
    /// let re = Regex::new("foo[0-9]+")?;
    /// assert_eq!(
    ///     Some(MultiMatch::must(0, 3, 11)),
    ///     re.find_leftmost(b"zzzfoo12345zzz"),
    /// );
    ///
    /// // Even though a match is found after reading the first byte (`a`),
    /// // the default leftmost-first match semantics demand that we find the
    /// // earliest match that prefers earlier parts of the pattern over latter
    /// // parts.
    /// let re = Regex::new("abc|a")?;
    /// assert_eq!(Some(MultiMatch::must(0, 0, 3)), re.find_leftmost(b"abc"));
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn find_leftmost(&self, haystack: &[u8]) -> Option<MultiMatch> {
        self.find_leftmost_at(haystack, 0, haystack.len())
    }

    /// Search for the first overlapping match in `haystack`.
    ///
    /// This routine is principally useful when searching for multiple patterns
    /// on inputs where multiple patterns may match the same regions of text.
    /// In particular, callers must preserve the automaton's search state from
    /// prior calls so that the implementation knows where the last match
    /// occurred and which pattern was reported.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_overlapping`](Regex::try_find_overlapping).
    ///
    /// # Example
    ///
    /// This example shows how to run an overlapping search with multiple
    /// regexes.
    ///
    /// ```
    /// use regex_automata::{dfa::{self, regex::Regex}, MatchKind, MultiMatch};
    ///
    /// let re = Regex::builder()
    ///     .dense(dfa::dense::Config::new().match_kind(MatchKind::All))
    ///     .build_many(&[r"\w+$", r"\S+$"])?;
    /// let haystack = "@foo".as_bytes();
    /// let mut state = dfa::OverlappingState::start();
    ///
    /// let expected = Some(MultiMatch::must(1, 0, 4));
    /// let got = re.find_overlapping(haystack, &mut state);
    /// assert_eq!(expected, got);
    ///
    /// // The first pattern also matches at the same position, so re-running
    /// // the search will yield another match. Notice also that the first
    /// // pattern is returned after the second. This is because the second
    /// // pattern begins its match before the first, is therefore an earlier
    /// // match and is thus reported first.
    /// let expected = Some(MultiMatch::must(0, 1, 4));
    /// let got = re.find_overlapping(haystack, &mut state);
    /// assert_eq!(expected, got);
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn find_overlapping(
        &self,
        haystack: &[u8],
        state: &mut OverlappingState,
    ) -> Option<MultiMatch> {
        self.find_overlapping_at(haystack, 0, haystack.len(), state)
    }

    /// Returns an iterator over all non-overlapping "earliest" matches.
    ///
    /// Match positions are reported as soon as a match is known to occur, even
    /// if the standard leftmost match would be longer.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error during iteration, then iteration
    /// panics. This only occurs in non-default configurations where quit bytes
    /// are used or Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_earliest_iter`](Regex::try_find_earliest_iter).
    ///
    /// # Example
    ///
    /// This example shows how to run an "earliest" iterator.
    ///
    /// ```
    /// use regex_automata::{dfa::regex::Regex, MultiMatch};
    ///
    /// let re = Regex::new("[0-9]+")?;
    /// let haystack = "123".as_bytes();
    ///
    /// // Normally, a standard leftmost iterator would return a single
    /// // match, but since "earliest" detects matches earlier, we get
    /// // three matches.
    /// let mut it = re.find_earliest_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 1)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 1, 2)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 2, 3)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn find_earliest_iter<'r, 't>(
        &'r self,
        haystack: &'t [u8],
    ) -> FindEarliestMatches<'r, 't, A, P> {
        FindEarliestMatches::new(self, haystack)
    }

    /// Returns an iterator over all non-overlapping leftmost matches in the
    /// given bytes. If no match exists, then the iterator yields no elements.
    ///
    /// This corresponds to the "standard" regex search iterator.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error during iteration, then iteration
    /// panics. This only occurs in non-default configurations where quit bytes
    /// are used or Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_leftmost_iter`](Regex::try_find_leftmost_iter).
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// let re = Regex::new("foo[0-9]+")?;
    /// let text = b"foo1 foo12 foo123";
    /// let matches: Vec<MultiMatch> = re.find_leftmost_iter(text).collect();
    /// assert_eq!(matches, vec![
    ///     MultiMatch::must(0, 0, 4),
    ///     MultiMatch::must(0, 5, 10),
    ///     MultiMatch::must(0, 11, 17),
    /// ]);
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn find_leftmost_iter<'r, 't>(
        &'r self,
        haystack: &'t [u8],
    ) -> FindLeftmostMatches<'r, 't, A, P> {
        FindLeftmostMatches::new(self, haystack)
    }

    /// Returns an iterator over all overlapping matches in the given haystack.
    ///
    /// This routine is principally useful when searching for multiple patterns
    /// on inputs where multiple patterns may match the same regions of text.
    /// The iterator takes care of handling the overlapping state that must be
    /// threaded through every search.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error during iteration, then iteration
    /// panics. This only occurs in non-default configurations where quit bytes
    /// are used or Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_overlapping_iter`](Regex::try_find_overlapping_iter).
    ///
    /// # Example
    ///
    /// This example shows how to run an overlapping search with multiple
    /// regexes.
    ///
    /// ```
    /// use regex_automata::{dfa::{self, regex::Regex}, MatchKind, MultiMatch};
    ///
    /// let re = Regex::builder()
    ///     .dense(dfa::dense::Config::new().match_kind(MatchKind::All))
    ///     .build_many(&[r"\w+$", r"\S+$"])?;
    /// let haystack = "@foo".as_bytes();
    ///
    /// let mut it = re.find_overlapping_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(1, 0, 4)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 1, 4)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn find_overlapping_iter<'r, 't>(
        &'r self,
        haystack: &'t [u8],
    ) -> FindOverlappingMatches<'r, 't, A, P> {
        FindOverlappingMatches::new(self, haystack)
    }
}

/// Lower level infallible search routines that permit controlling where
/// the search starts and ends in a particular sequence. This is useful for
/// executing searches that need to take surrounding context into account. This
/// is required for correctly implementing iteration because of look-around
/// operators (`^`, `$`, `\b`).
impl<A: Automaton, P: Prefilter> Regex<A, P> {
    /// Returns true if and only if this regex matches the given haystack.
    ///
    /// This routine may short circuit if it knows that scanning future input
    /// will never lead to a different result. In particular, if the underlying
    /// DFA enters a match state or a dead state, then this routine will return
    /// `true` or `false`, respectively, without inspecting any future input.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_is_match_at`](Regex::try_is_match_at).
    pub fn is_match_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> bool {
        self.try_is_match_at(haystack, start, end).unwrap()
    }

    /// Returns the first position at which a match is found.
    ///
    /// This routine stops scanning input in precisely the same circumstances
    /// as `is_match`. The key difference is that this routine returns the
    /// position at which it stopped scanning input if and only if a match
    /// was found. If no match is found, then `None` is returned.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// This is useful when implementing an iterator over matches
    /// within the same haystack, which cannot be done correctly by simply
    /// providing a subslice of `haystack`.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_earliest_at`](Regex::try_find_earliest_at).
    pub fn find_earliest_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Option<MultiMatch> {
        self.try_find_earliest_at(haystack, start, end).unwrap()
    }

    /// Returns the same as `find_leftmost`, but starts the search at the given
    /// offset.
    ///
    /// The significance of the starting point is that it takes the surrounding
    /// context into consideration. For example, if the DFA is anchored, then
    /// a match can only occur when `start == 0`.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// This is useful when implementing an iterator over matches within the
    /// same haystack, which cannot be done correctly by simply providing a
    /// subslice of `haystack`.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_leftmost_at`](Regex::try_find_leftmost_at).
    pub fn find_leftmost_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Option<MultiMatch> {
        self.try_find_leftmost_at(haystack, start, end).unwrap()
    }

    /// Search for the first overlapping match within a given range of
    /// `haystack`.
    ///
    /// This routine is principally useful when searching for multiple patterns
    /// on inputs where multiple patterns may match the same regions of text.
    /// In particular, callers must preserve the automaton's search state from
    /// prior calls so that the implementation knows where the last match
    /// occurred and which pattern was reported.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// This is useful when implementing an iterator over matches
    /// within the same haystack, which cannot be done correctly by simply
    /// providing a subslice of `haystack`.
    ///
    /// # Panics
    ///
    /// If the underlying DFAs return an error, then this routine panics. This
    /// only occurs in non-default configurations where quit bytes are used or
    /// Unicode word boundaries are heuristically enabled.
    ///
    /// The fallible version of this routine is
    /// [`try_find_overlapping_at`](Regex::try_find_overlapping_at).
    pub fn find_overlapping_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
        state: &mut OverlappingState,
    ) -> Option<MultiMatch> {
        self.try_find_overlapping_at(haystack, start, end, state).unwrap()
    }
}

/// Fallible search routines. These may return an error when the underlying
/// DFAs have been configured in a way that permits them to fail during a
/// search.
///
/// Errors during search only occur when the DFA has been explicitly
/// configured to do so, usually by specifying one or more "quit" bytes or by
/// heuristically enabling Unicode word boundaries.
///
/// Errors will never be returned using the default configuration. So these
/// fallible routines are only needed for particular configurations.
impl<A: Automaton, P: Prefilter> Regex<A, P> {
    /// Returns true if and only if this regex matches the given haystack.
    ///
    /// This routine may short circuit if it knows that scanning future input
    /// will never lead to a different result. In particular, if the underlying
    /// DFA enters a match state or a dead state, then this routine will return
    /// `true` or `false`, respectively, without inspecting any future input.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`is_match`](Regex::is_match).
    pub fn try_is_match(&self, haystack: &[u8]) -> Result<bool, MatchError> {
        self.try_is_match_at(haystack, 0, haystack.len())
    }

    /// Returns the first position at which a match is found.
    ///
    /// This routine stops scanning input in precisely the same circumstances
    /// as `is_match`. The key difference is that this routine returns the
    /// position at which it stopped scanning input if and only if a match
    /// was found. If no match is found, then `None` is returned.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_earliest`](Regex::find_earliest).
    pub fn try_find_earliest(
        &self,
        haystack: &[u8],
    ) -> Result<Option<MultiMatch>, MatchError> {
        self.try_find_earliest_at(haystack, 0, haystack.len())
    }

    /// Returns the start and end offset of the leftmost match. If no match
    /// exists, then `None` is returned.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_leftmost`](Regex::find_leftmost).
    pub fn try_find_leftmost(
        &self,
        haystack: &[u8],
    ) -> Result<Option<MultiMatch>, MatchError> {
        self.try_find_leftmost_at(haystack, 0, haystack.len())
    }

    /// Search for the first overlapping match in `haystack`.
    ///
    /// This routine is principally useful when searching for multiple patterns
    /// on inputs where multiple patterns may match the same regions of text.
    /// In particular, callers must preserve the automaton's search state from
    /// prior calls so that the implementation knows where the last match
    /// occurred and which pattern was reported.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_overlapping`](Regex::find_overlapping).
    pub fn try_find_overlapping(
        &self,
        haystack: &[u8],
        state: &mut OverlappingState,
    ) -> Result<Option<MultiMatch>, MatchError> {
        self.try_find_overlapping_at(haystack, 0, haystack.len(), state)
    }

    /// Returns an iterator over all non-overlapping "earliest" matches.
    ///
    /// Match positions are reported as soon as a match is known to occur, even
    /// if the standard leftmost match would be longer.
    ///
    /// # Errors
    ///
    /// This iterator only yields errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_earliest_iter`](Regex::find_earliest_iter).
    pub fn try_find_earliest_iter<'r, 't>(
        &'r self,
        haystack: &'t [u8],
    ) -> TryFindEarliestMatches<'r, 't, A, P> {
        TryFindEarliestMatches::new(self, haystack)
    }

    /// Returns an iterator over all non-overlapping leftmost matches in the
    /// given bytes. If no match exists, then the iterator yields no elements.
    ///
    /// This corresponds to the "standard" regex search iterator.
    ///
    /// # Errors
    ///
    /// This iterator only yields errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_leftmost_iter`](Regex::find_leftmost_iter).
    pub fn try_find_leftmost_iter<'r, 't>(
        &'r self,
        haystack: &'t [u8],
    ) -> TryFindLeftmostMatches<'r, 't, A, P> {
        TryFindLeftmostMatches::new(self, haystack)
    }

    /// Returns an iterator over all overlapping matches in the given haystack.
    ///
    /// This routine is principally useful when searching for multiple patterns
    /// on inputs where multiple patterns may match the same regions of text.
    /// The iterator takes care of handling the overlapping state that must be
    /// threaded through every search.
    ///
    /// # Errors
    ///
    /// This iterator only yields errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_overlapping_iter`](Regex::find_overlapping_iter).
    pub fn try_find_overlapping_iter<'r, 't>(
        &'r self,
        haystack: &'t [u8],
    ) -> TryFindOverlappingMatches<'r, 't, A, P> {
        TryFindOverlappingMatches::new(self, haystack)
    }
}

/// Lower level fallible search routines that permit controlling where the
/// search starts and ends in a particular sequence.
impl<A: Automaton, P: Prefilter> Regex<A, P> {
    /// Returns true if and only if this regex matches the given haystack.
    ///
    /// This routine may short circuit if it knows that scanning future input
    /// will never lead to a different result. In particular, if the underlying
    /// DFA enters a match state or a dead state, then this routine will return
    /// `true` or `false`, respectively, without inspecting any future input.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used, Unicode word boundaries are heuristically
    /// enabled or limits are set on the number of times the lazy DFA's cache
    /// may be cleared.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`is_match_at`](Regex::is_match_at).
    pub fn try_is_match_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Result<bool, MatchError> {
        self.forward()
            .find_earliest_fwd_at(
                self.scanner().as_mut(),
                None,
                haystack,
                start,
                end,
            )
            .map(|x| x.is_some())
    }

    /// Returns the first position at which a match is found.
    ///
    /// This routine stops scanning input in precisely the same circumstances
    /// as `is_match`. The key difference is that this routine returns the
    /// position at which it stopped scanning input if and only if a match
    /// was found. If no match is found, then `None` is returned.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// This is useful when implementing an iterator over matches
    /// within the same haystack, which cannot be done correctly by simply
    /// providing a subslice of `haystack`.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_earliest_at`](Regex::find_earliest_at).
    pub fn try_find_earliest_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<MultiMatch>, MatchError> {
        self.try_find_earliest_at_imp(
            self.scanner().as_mut(),
            haystack,
            start,
            end,
        )
    }

    /// The implementation of "earliest" searching, where a prefilter scanner
    /// may be given.
    fn try_find_earliest_at_imp(
        &self,
        pre: Option<&mut prefilter::Scanner>,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<MultiMatch>, MatchError> {
        // N.B. We use `&&A` here to call `Automaton` methods, which ensures
        // that we always use the `impl Automaton for &A` for calling methods.
        // Since this is the usual way that automata are used, this helps
        // reduce the number of monomorphized copies of the search code.
        let (fwd, rev) = (self.forward(), self.reverse());
        let end = match (&fwd)
            .find_earliest_fwd_at(pre, None, haystack, start, end)?
        {
            None => return Ok(None),
            Some(end) => end,
        };
        // N.B. The only time we need to tell the reverse searcher the pattern
        // to match is in the overlapping case, since it's ambiguous. In the
        // leftmost case, I have tentatively convinced myself that it isn't
        // necessary and the reverse search will always find the same pattern
        // to match as the forward search. But I lack a rigorous proof.
        let start = (&rev)
            .find_earliest_rev_at(None, haystack, start, end.offset())?
            .expect("reverse search must match if forward search does");
        assert_eq!(
            start.pattern(),
            end.pattern(),
            "forward and reverse search must match same pattern"
        );
        assert!(start.offset() <= end.offset());
        Ok(Some(MultiMatch::new(end.pattern(), start.offset(), end.offset())))
    }

    /// Returns the start and end offset of the leftmost match. If no match
    /// exists, then `None` is returned.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// This is useful when implementing an iterator over matches
    /// within the same haystack, which cannot be done correctly by simply
    /// providing a subslice of `haystack`.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_leftmost_at`](Regex::find_leftmost_at).
    pub fn try_find_leftmost_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<MultiMatch>, MatchError> {
        self.try_find_leftmost_at_imp(
            self.scanner().as_mut(),
            haystack,
            start,
            end,
        )
    }

    /// The implementation of leftmost searching, where a prefilter scanner
    /// may be given.
    fn try_find_leftmost_at_imp(
        &self,
        scanner: Option<&mut prefilter::Scanner>,
        haystack: &[u8],
        start: usize,
        end: usize,
    ) -> Result<Option<MultiMatch>, MatchError> {
        // N.B. We use `&&A` here to call `Automaton` methods, which ensures
        // that we always use the `impl Automaton for &A` for calling methods.
        // Since this is the usual way that automata are used, this helps
        // reduce the number of monomorphized copies of the search code.
        let (fwd, rev) = (self.forward(), self.reverse());
        let end = match (&fwd)
            .find_leftmost_fwd_at(scanner, None, haystack, start, end)?
        {
            None => return Ok(None),
            Some(end) => end,
        };
        // N.B. The only time we need to tell the reverse searcher the pattern
        // to match is in the overlapping case, since it's ambiguous. In the
        // leftmost case, I have tentatively convinced myself that it isn't
        // necessary and the reverse search will always find the same pattern
        // to match as the forward search. But I lack a rigorous proof. Why not
        // just provide the pattern anyway? Well, if it is needed, then leaving
        // it out gives us a chance to find a witness.
        let start = (&rev)
            .find_leftmost_rev_at(None, haystack, start, end.offset())?
            .expect("reverse search must match if forward search does");
        assert_eq!(
            start.pattern(),
            end.pattern(),
            "forward and reverse search must match same pattern",
        );
        assert!(start.offset() <= end.offset());
        Ok(Some(MultiMatch::new(end.pattern(), start.offset(), end.offset())))
    }

    /// Search for the first overlapping match within a given range of
    /// `haystack`.
    ///
    /// This routine is principally useful when searching for multiple patterns
    /// on inputs where multiple patterns may match the same regions of text.
    /// In particular, callers must preserve the automaton's search state from
    /// prior calls so that the implementation knows where the last match
    /// occurred and which pattern was reported.
    ///
    /// # Searching a substring of the haystack
    ///
    /// Being an "at" search routine, this permits callers to search a
    /// substring of `haystack` by specifying a range in `haystack`.
    /// Why expose this as an API instead of just asking callers to use
    /// `&input[start..end]`? The reason is that regex matching often wants
    /// to take the surrounding context into account in order to handle
    /// look-around (`^`, `$` and `\b`).
    ///
    /// This is useful when implementing an iterator over matches
    /// within the same haystack, which cannot be done correctly by simply
    /// providing a subslice of `haystack`.
    ///
    /// # Errors
    ///
    /// This routine only errors if the search could not complete. For
    /// DFA-based regexes, this only occurs in a non-default configuration
    /// where quit bytes are used or Unicode word boundaries are heuristically
    /// enabled.
    ///
    /// When a search cannot complete, callers cannot know whether a match
    /// exists or not.
    ///
    /// The infallible (panics on error) version of this routine is
    /// [`find_overlapping_at`](Regex::find_overlapping_at).
    pub fn try_find_overlapping_at(
        &self,
        haystack: &[u8],
        start: usize,
        end: usize,
        state: &mut OverlappingState,
    ) -> Result<Option<MultiMatch>, MatchError> {
        self.try_find_overlapping_at_imp(
            self.scanner().as_mut(),
            haystack,
            start,
            end,
            state,
        )
    }

    /// The implementation of overlapping search at a given range in
    /// `haystack`, where `scanner` is a prefilter (if active) and `state` is
    /// the current state of the search.
    fn try_find_overlapping_at_imp(
        &self,
        scanner: Option<&mut prefilter::Scanner>,
        haystack: &[u8],
        start: usize,
        end: usize,
        state: &mut OverlappingState,
    ) -> Result<Option<MultiMatch>, MatchError> {
        // N.B. We use `&&A` here to call `Automaton` methods, which ensures
        // that we always use the `impl Automaton for &A` for calling methods.
        // Since this is the usual way that automata are used, this helps
        // reduce the number of monomorphized copies of the search code.
        let (fwd, rev) = (self.forward(), self.reverse());
        // TODO: Decide whether it's worth making this assert work. It doesn't
        // work currently because 'has_starts_for_each_pattern' isn't on the
        // Automaton trait. Without this assert, we still get a panic, but it's
        // a bit more inscrutable.
        // assert!(
        // rev.has_starts_for_each_pattern(),
        // "overlapping searches require that the reverse DFA is \
        // compiled with the 'starts_for_each_pattern' option",
        // );
        let end = match (&fwd).find_overlapping_fwd_at(
            scanner, None, haystack, start, end, state,
        )? {
            None => return Ok(None),
            Some(end) => end,
        };
        // Unlike the leftmost cases, the reverse overlapping search may match
        // a different pattern than the forward search. See test failures when
        // using `None` instead of `Some(end.pattern())` below. Thus, we must
        // run our reverse search using the pattern that matched in the forward
        // direction.
        let start = (&rev)
            .find_leftmost_rev_at(
                Some(end.pattern()),
                haystack,
                0,
                end.offset(),
            )?
            .expect("reverse search must match if forward search does");
        assert!(start.offset() <= end.offset());
        assert_eq!(start.pattern(), end.pattern());
        Ok(Some(MultiMatch::new(end.pattern(), start.offset(), end.offset())))
    }
}

/// Non-search APIs for querying information about the regex and setting a
/// prefilter.
impl<A: Automaton, P: Prefilter> Regex<A, P> {
    /// Attach the given prefilter to this regex.
    pub fn with_prefilter<Q: Prefilter>(self, prefilter: Q) -> Regex<A, Q> {
        Regex {
            prefilter: Some(prefilter),
            forward: self.forward,
            reverse: self.reverse,
            utf8: self.utf8,
        }
    }

    /// Remove any prefilter from this regex.
    pub fn without_prefilter(self) -> Regex<A> {
        Regex {
            prefilter: None,
            forward: self.forward,
            reverse: self.reverse,
            utf8: self.utf8,
        }
    }

    /// Return the underlying DFA responsible for forward matching.
    ///
    /// This is useful for accessing the underlying DFA and converting it to
    /// some other format or size. See the [`Builder::build_from_dfas`] docs
    /// for an example of where this might be useful.
    pub fn forward(&self) -> &A {
        &self.forward
    }

    /// Return the underlying DFA responsible for reverse matching.
    ///
    /// This is useful for accessing the underlying DFA and converting it to
    /// some other format or size. See the [`Builder::build_from_dfas`] docs
    /// for an example of where this might be useful.
    pub fn reverse(&self) -> &A {
        &self.reverse
    }

    /// Returns the total number of patterns matched by this regex.
    ///
    /// # Example
    ///
    /// ```
    /// use regex_automata::{MultiMatch, dfa::regex::Regex};
    ///
    /// let re = Regex::new_many(&[r"[a-z]+", r"[0-9]+", r"\w+"])?;
    /// assert_eq!(3, re.pattern_count());
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn pattern_count(&self) -> usize {
        assert_eq!(
            self.forward().pattern_count(),
            self.reverse().pattern_count()
        );
        self.forward().pattern_count()
    }

    /// Convenience function for returning this regex's prefilter as a trait
    /// object.
    ///
    /// If this regex doesn't have a prefilter, then `None` is returned.
    pub fn prefilter(&self) -> Option<&dyn Prefilter> {
        match self.prefilter {
            None => None,
            Some(ref x) => Some(&*x),
        }
    }

    /// Convenience function for returning a prefilter scanner.
    fn scanner(&self) -> Option<prefilter::Scanner> {
        self.prefilter().map(prefilter::Scanner::new)
    }
}

/// An iterator over all non-overlapping earliest matches for a particular
/// infallible search.
///
/// The iterator yields a [`MultiMatch`] value until no more matches could be
/// found. If the underlying search returns an error, then this panics.
///
/// `A` is the type used to represent the underlying DFAs used by the regex,
/// while `P` is the type of prefilter used, if any. The lifetime variables are
/// as follows:
///
/// * `'r` is the lifetime of the regular expression itself.
/// * `'t` is the lifetime of the text being searched.
#[derive(Clone, Debug)]
pub struct FindEarliestMatches<'r, 't, A, P>(
    TryFindEarliestMatches<'r, 't, A, P>,
);

impl<'r, 't, A: Automaton, P: Prefilter> FindEarliestMatches<'r, 't, A, P> {
    fn new(
        re: &'r Regex<A, P>,
        text: &'t [u8],
    ) -> FindEarliestMatches<'r, 't, A, P> {
        FindEarliestMatches(TryFindEarliestMatches::new(re, text))
    }
}

impl<'r, 't, A: Automaton, P: Prefilter> Iterator
    for FindEarliestMatches<'r, 't, A, P>
{
    type Item = MultiMatch;

    fn next(&mut self) -> Option<MultiMatch> {
        next_unwrap(self.0.next())
    }
}

/// An iterator over all non-overlapping leftmost matches for a particular
/// infallible search.
///
/// The iterator yields a [`MultiMatch`] value until no more matches could be
/// found. If the underlying search returns an error, then this panics.
///
/// `A` is the type used to represent the underlying DFAs used by the regex,
/// while `P` is the type of prefilter used, if any. The lifetime variables are
/// as follows:
///
/// * `'r` is the lifetime of the regular expression itself.
/// * `'t` is the lifetime of the text being searched.
#[derive(Clone, Debug)]
pub struct FindLeftmostMatches<'r, 't, A, P>(
    TryFindLeftmostMatches<'r, 't, A, P>,
);

impl<'r, 't, A: Automaton, P: Prefilter> FindLeftmostMatches<'r, 't, A, P> {
    fn new(
        re: &'r Regex<A, P>,
        text: &'t [u8],
    ) -> FindLeftmostMatches<'r, 't, A, P> {
        FindLeftmostMatches(TryFindLeftmostMatches::new(re, text))
    }
}

impl<'r, 't, A: Automaton, P: Prefilter> Iterator
    for FindLeftmostMatches<'r, 't, A, P>
{
    type Item = MultiMatch;

    fn next(&mut self) -> Option<MultiMatch> {
        next_unwrap(self.0.next())
    }
}

/// An iterator over all overlapping matches for a particular infallible
/// search.
///
/// The iterator yields a [`MultiMatch`] value until no more matches could be
/// found. If the underlying search returns an error, then this panics.
///
/// `A` is the type used to represent the underlying DFAs used by the regex,
/// while `P` is the type of prefilter used, if any. The lifetime variables are
/// as follows:
///
/// * `'r` is the lifetime of the regular expression itself.
/// * `'t` is the lifetime of the text being searched.
#[derive(Clone, Debug)]
pub struct FindOverlappingMatches<'r, 't, A: Automaton, P>(
    TryFindOverlappingMatches<'r, 't, A, P>,
);

impl<'r, 't, A: Automaton, P: Prefilter> FindOverlappingMatches<'r, 't, A, P> {
    fn new(
        re: &'r Regex<A, P>,
        text: &'t [u8],
    ) -> FindOverlappingMatches<'r, 't, A, P> {
        FindOverlappingMatches(TryFindOverlappingMatches::new(re, text))
    }
}

impl<'r, 't, A: Automaton, P: Prefilter> Iterator
    for FindOverlappingMatches<'r, 't, A, P>
{
    type Item = MultiMatch;

    fn next(&mut self) -> Option<MultiMatch> {
        next_unwrap(self.0.next())
    }
}

/// An iterator over all non-overlapping earliest matches for a particular
/// fallible search.
///
/// The iterator yields a [`MultiMatch`] value until no more matches could be
/// found.
///
/// `A` is the type used to represent the underlying DFAs used by the regex,
/// while `P` is the type of prefilter used, if any. The lifetime variables are
/// as follows:
///
/// * `'r` is the lifetime of the regular expression itself.
/// * `'t` is the lifetime of the text being searched.
#[derive(Clone, Debug)]
pub struct TryFindEarliestMatches<'r, 't, A, P> {
    re: &'r Regex<A, P>,
    scanner: Option<prefilter::Scanner<'r>>,
    text: &'t [u8],
    last_end: usize,
    last_match: Option<usize>,
}

impl<'r, 't, A: Automaton, P: Prefilter> TryFindEarliestMatches<'r, 't, A, P> {
    fn new(
        re: &'r Regex<A, P>,
        text: &'t [u8],
    ) -> TryFindEarliestMatches<'r, 't, A, P> {
        let scanner = re.scanner();
        TryFindEarliestMatches {
            re,
            scanner,
            text,
            last_end: 0,
            last_match: None,
        }
    }
}

impl<'r, 't, A: Automaton, P: Prefilter> Iterator
    for TryFindEarliestMatches<'r, 't, A, P>
{
    type Item = Result<MultiMatch, MatchError>;

    fn next(&mut self) -> Option<Result<MultiMatch, MatchError>> {
        if self.last_end > self.text.len() {
            return None;
        }
        let result = self.re.try_find_earliest_at_imp(
            self.scanner.as_mut(),
            self.text,
            self.last_end,
            self.text.len(),
        );
        let m = match result {
            Err(err) => return Some(Err(err)),
            Ok(None) => return None,
            Ok(Some(m)) => m,
        };
        if m.is_empty() {
            // This is an empty match. To ensure we make progress, start
            // the next search at the smallest possible starting position
            // of the next match following this one.
            self.last_end = if self.re.utf8 {
                crate::util::next_utf8(self.text, m.end())
            } else {
                m.end() + 1
            };
            // Don't accept empty matches immediately following a match.
            // Just move on to the next match.
            if Some(m.end()) == self.last_match {
                return self.next();
            }
        } else {
            self.last_end = m.end();
        }
        self.last_match = Some(m.end());
        Some(Ok(m))
    }
}

/// An iterator over all non-overlapping leftmost matches for a particular
/// fallible search.
///
/// The iterator yields a [`MultiMatch`] value until no more matches could be
/// found.
///
/// `A` is the type used to represent the underlying DFAs used by the regex,
/// while `P` is the type of prefilter used, if any. The lifetime variables are
/// as follows:
///
/// * `'r` is the lifetime of the regular expression itself.
/// * `'t` is the lifetime of the text being searched.
#[derive(Clone, Debug)]
pub struct TryFindLeftmostMatches<'r, 't, A, P> {
    re: &'r Regex<A, P>,
    scanner: Option<prefilter::Scanner<'r>>,
    text: &'t [u8],
    last_end: usize,
    last_match: Option<usize>,
}

impl<'r, 't, A: Automaton, P: Prefilter> TryFindLeftmostMatches<'r, 't, A, P> {
    fn new(
        re: &'r Regex<A, P>,
        text: &'t [u8],
    ) -> TryFindLeftmostMatches<'r, 't, A, P> {
        let scanner = re.scanner();
        TryFindLeftmostMatches {
            re,
            scanner,
            text,
            last_end: 0,
            last_match: None,
        }
    }
}

impl<'r, 't, A: Automaton, P: Prefilter> Iterator
    for TryFindLeftmostMatches<'r, 't, A, P>
{
    type Item = Result<MultiMatch, MatchError>;

    fn next(&mut self) -> Option<Result<MultiMatch, MatchError>> {
        if self.last_end > self.text.len() {
            return None;
        }
        let result = self.re.try_find_leftmost_at_imp(
            self.scanner.as_mut(),
            self.text,
            self.last_end,
            self.text.len(),
        );
        let m = match result {
            Err(err) => return Some(Err(err)),
            Ok(None) => return None,
            Ok(Some(m)) => m,
        };
        if m.is_empty() {
            // This is an empty match. To ensure we make progress, start
            // the next search at the smallest possible starting position
            // of the next match following this one.
            self.last_end = if self.re.utf8 {
                crate::util::next_utf8(self.text, m.end())
            } else {
                m.end() + 1
            };
            // Don't accept empty matches immediately following a match.
            // Just move on to the next match.
            if Some(m.end()) == self.last_match {
                return self.next();
            }
        } else {
            self.last_end = m.end();
        }
        self.last_match = Some(m.end());
        Some(Ok(m))
    }
}

/// An iterator over all overlapping matches for a particular fallible search.
///
/// The iterator yields a [`MultiMatch`] value until no more matches could be
/// found.
///
/// `A` is the type used to represent the underlying DFAs used by the regex,
/// while `P` is the type of prefilter used, if any. The lifetime variables are
/// as follows:
///
/// * `'r` is the lifetime of the regular expression itself.
/// * `'t` is the lifetime of the text being searched.
#[derive(Clone, Debug)]
pub struct TryFindOverlappingMatches<'r, 't, A: Automaton, P> {
    re: &'r Regex<A, P>,
    scanner: Option<prefilter::Scanner<'r>>,
    text: &'t [u8],
    last_end: usize,
    state: OverlappingState,
}

impl<'r, 't, A: Automaton, P: Prefilter>
    TryFindOverlappingMatches<'r, 't, A, P>
{
    fn new(
        re: &'r Regex<A, P>,
        text: &'t [u8],
    ) -> TryFindOverlappingMatches<'r, 't, A, P> {
        let scanner = re.scanner();
        TryFindOverlappingMatches {
            re,
            scanner,
            text,
            last_end: 0,
            state: OverlappingState::start(),
        }
    }
}

impl<'r, 't, A: Automaton, P: Prefilter> Iterator
    for TryFindOverlappingMatches<'r, 't, A, P>
{
    type Item = Result<MultiMatch, MatchError>;

    fn next(&mut self) -> Option<Result<MultiMatch, MatchError>> {
        if self.last_end > self.text.len() {
            return None;
        }
        let result = self.re.try_find_overlapping_at_imp(
            self.scanner.as_mut(),
            self.text,
            self.last_end,
            self.text.len(),
            &mut self.state,
        );
        let m = match result {
            Err(err) => return Some(Err(err)),
            Ok(None) => return None,
            Ok(Some(m)) => m,
        };
        // Unlike the non-overlapping case, we're OK with empty matches at this
        // level. In particular, the overlapping search algorithm is itself
        // responsible for ensuring that progress is always made.
        self.last_end = m.end();
        Some(Ok(m))
    }
}

/// The configuration used for compiling a DFA-backed regex.
///
/// A regex configuration is a simple data object that is typically used with
/// [`Builder::configure`].
#[cfg(feature = "alloc")]
#[derive(Clone, Copy, Debug, Default)]
pub struct Config {
    utf8: Option<bool>,
}

#[cfg(feature = "alloc")]
impl Config {
    /// Return a new default regex compiler configuration.
    pub fn new() -> Config {
        Config::default()
    }

    /// Whether to enable UTF-8 mode or not.
    ///
    /// When UTF-8 mode is enabled (the default) and an empty match is seen,
    /// the iterators on [`Regex`] will always start the next search at the
    /// next UTF-8 encoded codepoint when searching valid UTF-8. When UTF-8
    /// mode is disabled, such searches are begun at the next byte offset.
    ///
    /// If this mode is enabled and invalid UTF-8 is given to search, then
    /// behavior is unspecified.
    ///
    /// Generally speaking, one should enable this when
    /// [`SyntaxConfig::utf8`](crate::SyntaxConfig::utf8)
    /// and
    /// [`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8)
    /// are enabled, and disable it otherwise.
    ///
    /// # Example
    ///
    /// This example demonstrates the differences between when this option is
    /// enabled and disabled. The differences only arise when the regex can
    /// return matches of length zero.
    ///
    /// In this first snippet, we show the results when UTF-8 mode is disabled.
    ///
    /// ```
    /// use regex_automata::{dfa::regex::Regex, MultiMatch};
    ///
    /// let re = Regex::builder()
    ///     .configure(Regex::config().utf8(false))
    ///     .build(r"")?;
    /// let haystack = "a☃z".as_bytes();
    /// let mut it = re.find_leftmost_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 0)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 1, 1)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 2, 2)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 3, 3)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 4, 4)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 5, 5)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// And in this snippet, we execute the same search on the same haystack,
    /// but with UTF-8 mode enabled. Notice that byte offsets that would
    /// otherwise split the encoding of `☃` are not returned.
    ///
    /// ```
    /// use regex_automata::{dfa::regex::Regex, MultiMatch};
    ///
    /// let re = Regex::builder()
    ///     .configure(Regex::config().utf8(true))
    ///     .build(r"")?;
    /// let haystack = "a☃z".as_bytes();
    /// let mut it = re.find_leftmost_iter(haystack);
    /// assert_eq!(Some(MultiMatch::must(0, 0, 0)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 1, 1)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 4, 4)), it.next());
    /// assert_eq!(Some(MultiMatch::must(0, 5, 5)), it.next());
    /// assert_eq!(None, it.next());
    ///
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn utf8(mut self, yes: bool) -> Config {
        self.utf8 = Some(yes);
        self
    }

    /// Returns true if and only if this configuration has UTF-8 mode enabled.
    ///
    /// When UTF-8 mode is enabled and an empty match is seen, the iterators on
    /// [`Regex`] will always start the next search at the next UTF-8 encoded
    /// codepoint. When UTF-8 mode is disabled, such searches are begun at the
    /// next byte offset.
    pub fn get_utf8(&self) -> bool {
        self.utf8.unwrap_or(true)
    }

    /// Overwrite the default configuration such that the options in `o` are
    /// always used. If an option in `o` is not set, then the corresponding
    /// option in `self` is used. If it's not set in `self` either, then it
    /// remains not set.
    pub(crate) fn overwrite(self, o: Config) -> Config {
        Config { utf8: o.utf8.or(self.utf8) }
    }
}

/// A builder for a regex based on deterministic finite automatons.
///
/// This builder permits configuring options for the syntax of a pattern, the
/// NFA construction, the DFA construction and finally the regex searching
/// itself. This builder is different from a general purpose regex builder in
/// that it permits fine grain configuration of the construction process. The
/// trade off for this is complexity, and the possibility of setting a
/// configuration that might not make sense. For example, there are three
/// different UTF-8 modes:
///
/// * [`SyntaxConfig::utf8`](crate::SyntaxConfig::utf8) controls whether the
/// pattern itself can contain sub-expressions that match invalid UTF-8.
/// * [`nfa::thompson::Config::utf8`](crate::nfa::thompson::Config::utf8)
/// controls whether the implicit unanchored prefix added to the NFA can
/// match through invalid UTF-8 or not.
/// * [`Config::utf8`] controls how the regex iterators themselves advance
/// the starting position of the next search when a match with zero length is
/// found.
///
/// Generally speaking, callers will want to either enable all of these or
/// disable all of these.
///
/// Internally, building a regex requires building two DFAs, where one is
/// responsible for finding the end of a match and the other is responsible
/// for finding the start of a match. If you only need to detect whether
/// something matched, or only the end of a match, then you should use a
/// [`dense::Builder`] to construct a single DFA, which is cheaper than
/// building two DFAs.
///
/// # Build methods
///
/// This builder has a few "build" methods. In general, it's the result of
/// combining the following parameters:
///
/// * Building one or many regexes.
/// * Building a regex with dense or sparse DFAs.
///
/// The simplest "build" method is [`Builder::build`]. It accepts a single
/// pattern and builds a dense DFA using `usize` for the state identifier
/// representation.
///
/// The most general "build" method is [`Builder::build_many`], which permits
/// building a regex that searches for multiple patterns simultaneously while
/// using a specific state identifier representation.
///
/// The most flexible "build" method, but hardest to use, is
/// [`Builder::build_from_dfas`]. This exposes the fact that a [`Regex`] is
/// just a pair of DFAs, and this method allows you to specify those DFAs
/// exactly.
///
/// # Example
///
/// This example shows how to disable UTF-8 mode in the syntax, the NFA and
/// the regex itself. This is generally what you want for matching on
/// arbitrary bytes.
///
/// ```
/// use regex_automata::{
///     dfa::regex::Regex, nfa::thompson, MultiMatch, SyntaxConfig
/// };
///
/// let re = Regex::builder()
///     .configure(Regex::config().utf8(false))
///     .syntax(SyntaxConfig::new().utf8(false))
///     .thompson(thompson::Config::new().utf8(false))
///     .build(r"foo(?-u:[^b])ar.*")?;
/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
/// let expected = Some(MultiMatch::must(0, 1, 9));
/// let got = re.find_leftmost(haystack);
/// assert_eq!(expected, got);
/// // Notice that `(?-u:[^b])` matches invalid UTF-8,
/// // but the subsequent `.*` does not! Disabling UTF-8
/// // on the syntax permits this. Notice also that the
/// // search was unanchored and skipped over invalid UTF-8.
/// // Disabling UTF-8 on the Thompson NFA permits this.
/// //
/// // N.B. This example does not show the impact of
/// // disabling UTF-8 mode on Config, since that
/// // only impacts regexes that can produce matches of
/// // length 0.
/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]);
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
pub struct Builder {
    config: Config,
    dfa: dense::Builder,
}

#[cfg(feature = "alloc")]
impl Builder {
    /// Create a new regex builder with the default configuration.
    pub fn new() -> Builder {
        Builder { config: Config::default(), dfa: dense::Builder::new() }
    }

    /// Build a regex from the given pattern.
    ///
    /// If there was a problem parsing or compiling the pattern, then an error
    /// is returned.
    pub fn build(&self, pattern: &str) -> Result<Regex, Error> {
        self.build_many(&[pattern])
    }

    /// Build a regex from the given pattern using sparse DFAs.
    ///
    /// If there was a problem parsing or compiling the pattern, then an error
    /// is returned.
    pub fn build_sparse(
        &self,
        pattern: &str,
    ) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
        self.build_many_sparse(&[pattern])
    }

    /// Build a regex from the given patterns.
    pub fn build_many<P: AsRef<str>>(
        &self,
        patterns: &[P],
    ) -> Result<Regex, Error> {
        let forward = self.dfa.build_many(patterns)?;
        let reverse = self
            .dfa
            .clone()
            .configure(
                dense::Config::new()
                    .anchored(true)
                    .match_kind(MatchKind::All)
                    .starts_for_each_pattern(true),
            )
            .thompson(thompson::Config::new().reverse(true))
            .build_many(patterns)?;
        Ok(self.build_from_dfas(forward, reverse))
    }

    /// Build a sparse regex from the given patterns.
    pub fn build_many_sparse<P: AsRef<str>>(
        &self,
        patterns: &[P],
    ) -> Result<Regex<sparse::DFA<Vec<u8>>>, Error> {
        let re = self.build_many(patterns)?;
        let forward = re.forward().to_sparse()?;
        let reverse = re.reverse().to_sparse()?;
        Ok(self.build_from_dfas(forward, reverse))
    }

    /// Build a regex from its component forward and reverse DFAs.
    ///
    /// This is useful when deserializing a regex from some arbitrary
    /// memory region. This is also useful for building regexes from other
    /// types of DFAs.
    ///
    /// If you're building the DFAs from scratch instead of building new DFAs
    /// from other DFAs, then you'll need to make sure that the reverse DFA is
    /// configured correctly to match the intended semantics. Namely:
    ///
    /// * It should be anchored.
    /// * It should use [`MatchKind::All`] semantics.
    /// * It should match in reverse.
    /// * It should have anchored start states compiled for each pattern.
    /// * Otherwise, its configuration should match the forward DFA.
    ///
    /// If these conditions are satisfied, then behavior of searches is
    /// unspecified.
    ///
    /// Note that when using this constructor, only the configuration from
    /// [`Config`] is applied. The only configuration settings on this builder
    /// only apply when the builder owns the construction of the DFAs
    /// themselves.
    ///
    /// # Example
    ///
    /// This example is a bit a contrived. The usual use of these methods
    /// would involve serializing `initial_re` somewhere and then deserializing
    /// it later to build a regex. But in this case, we do everything in
    /// memory.
    ///
    /// ```
    /// use regex_automata::dfa::regex::Regex;
    ///
    /// let initial_re = Regex::new("foo[0-9]+")?;
    /// assert_eq!(true, initial_re.is_match(b"foo123"));
    ///
    /// let (fwd, rev) = (initial_re.forward(), initial_re.reverse());
    /// let re = Regex::builder().build_from_dfas(fwd, rev);
    /// assert_eq!(true, re.is_match(b"foo123"));
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    ///
    /// This example shows how to build a `Regex` that uses sparse DFAs instead
    /// of dense DFAs without using one of the convenience `build_sparse`
    /// routines:
    ///
    /// ```
    /// use regex_automata::dfa::regex::Regex;
    ///
    /// let initial_re = Regex::new("foo[0-9]+")?;
    /// assert_eq!(true, initial_re.is_match(b"foo123"));
    ///
    /// let fwd = initial_re.forward().to_sparse()?;
    /// let rev = initial_re.reverse().to_sparse()?;
    /// let re = Regex::builder().build_from_dfas(fwd, rev);
    /// assert_eq!(true, re.is_match(b"foo123"));
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// ```
    pub fn build_from_dfas<A: Automaton>(
        &self,
        forward: A,
        reverse: A,
    ) -> Regex<A> {
        let utf8 = self.config.get_utf8();
        Regex { prefilter: None, forward, reverse, utf8 }
    }

    /// Apply the given regex configuration options to this builder.
    pub fn configure(&mut self, config: Config) -> &mut Builder {
        self.config = self.config.overwrite(config);
        self
    }

    /// Set the syntax configuration for this builder using
    /// [`SyntaxConfig`](crate::SyntaxConfig).
    ///
    /// This permits setting things like case insensitivity, Unicode and multi
    /// line mode.
    pub fn syntax(
        &mut self,
        config: crate::util::syntax::SyntaxConfig,
    ) -> &mut Builder {
        self.dfa.syntax(config);
        self
    }

    /// Set the Thompson NFA configuration for this builder using
    /// [`nfa::thompson::Config`](thompson::Config).
    ///
    /// This permits setting things like whether additional time should be
    /// spent shrinking the size of the NFA.
    pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
        self.dfa.thompson(config);
        self
    }

    /// Set the dense DFA compilation configuration for this builder using
    /// [`dense::Config`](dense::Config).
    ///
    /// This permits setting things like whether the underlying DFAs should
    /// be minimized.
    pub fn dense(&mut self, config: dense::Config) -> &mut Builder {
        self.dfa.configure(config);
        self
    }
}

#[cfg(feature = "alloc")]
impl Default for Builder {
    fn default() -> Builder {
        Builder::new()
    }
}

#[inline(always)]
fn next_unwrap(
    item: Option<Result<MultiMatch, MatchError>>,
) -> Option<MultiMatch> {
    match item {
        None => None,
        Some(Ok(m)) => Some(m),
        Some(Err(err)) => panic!(
            "unexpected regex search error: {}\n\
             to handle search errors, use try_ methods",
            err,
        ),
    }
}

Coverage Report

Created: 2025-09-27 06:45