Coverage Report

Created: 2026-03-31 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/fst-0.4.7/src/automaton/mod.rs
Line
Count
Source
1
#[cfg(feature = "levenshtein")]
2
pub use self::levenshtein::{Levenshtein, LevenshteinError};
3
4
#[cfg(feature = "levenshtein")]
5
mod levenshtein;
6
7
/// Automaton describes types that behave as a finite automaton.
8
///
9
/// All implementors of this trait are represented by *byte based* automata.
10
/// Stated differently, all transitions in the automata correspond to a single
11
/// byte in the input.
12
///
13
/// This implementation choice is important for a couple reasons:
14
///
15
/// 1. The set of possible transitions in each node is small, which may make
16
///    efficient memory usage easier.
17
/// 2. The finite state transducers in this crate are all byte based, so any
18
///    automata used on them must also be byte based.
19
///
20
/// In practice, this does present somewhat of a problem, for example, if
21
/// you're storing UTF-8 encoded strings in a finite state transducer. Consider
22
/// using a `Levenshtein` automaton, which accepts a query string and an edit
23
/// distance. The edit distance should apply to some notion of *character*,
24
/// which could be represented by at least 1-4 bytes in a UTF-8 encoding (for
25
/// some definition of "character"). Therefore, the automaton must have UTF-8
26
/// decoding built into it. This can be tricky to implement, so you may find
27
/// the [`utf8-ranges`](https://crates.io/crates/utf8-ranges) crate useful.
28
pub trait Automaton {
29
    /// The type of the state used in the automaton.
30
    type State;
31
32
    /// Returns a single start state for this automaton.
33
    ///
34
    /// This method should always return the same value for each
35
    /// implementation.
36
    fn start(&self) -> Self::State;
37
38
    /// Returns true if and only if `state` is a match state.
39
    fn is_match(&self, state: &Self::State) -> bool;
40
41
    /// Returns true if and only if `state` can lead to a match in zero or more
42
    /// steps.
43
    ///
44
    /// If this returns `false`, then no sequence of inputs from this state
45
    /// should ever produce a match. If this does not follow, then those match
46
    /// states may never be reached. In other words, behavior may be incorrect.
47
    ///
48
    /// If this returns `true` even when no match is possible, then behavior
49
    /// will be correct, but callers may be forced to do additional work.
50
0
    fn can_match(&self, _state: &Self::State) -> bool {
51
0
        true
52
0
    }
53
54
    /// Returns true if and only if `state` matches and must match no matter
55
    /// what steps are taken.
56
    ///
57
    /// If this returns `true`, then every sequence of inputs from this state
58
    /// produces a match. If this does not follow, then those match states may
59
    /// never be reached. In other words, behavior may be incorrect.
60
    ///
61
    /// If this returns `false` even when every sequence of inputs will lead to
62
    /// a match, then behavior will be correct, but callers may be forced to do
63
    /// additional work.
64
0
    fn will_always_match(&self, _state: &Self::State) -> bool {
65
0
        false
66
0
    }
67
68
    /// Return the next state given `state` and an input.
69
    fn accept(&self, state: &Self::State, byte: u8) -> Self::State;
70
71
    /// If applicable, return the next state when the end of a key is seen.
72
0
    fn accept_eof(&self, _: &Self::State) -> Option<Self::State> {
73
0
        None
74
0
    }
75
76
    /// Returns an automaton that matches the strings that start with something
77
    /// this automaton matches.
78
0
    fn starts_with(self) -> StartsWith<Self>
79
0
    where
80
0
        Self: Sized,
81
    {
82
0
        StartsWith(self)
83
0
    }
84
85
    /// Returns an automaton that matches the strings matched by either this or
86
    /// the other automaton.
87
0
    fn union<Rhs: Automaton>(self, rhs: Rhs) -> Union<Self, Rhs>
88
0
    where
89
0
        Self: Sized,
90
    {
91
0
        Union(self, rhs)
92
0
    }
93
94
    /// Returns an automaton that matches the strings matched by both this and
95
    /// the other automaton.
96
0
    fn intersection<Rhs: Automaton>(self, rhs: Rhs) -> Intersection<Self, Rhs>
97
0
    where
98
0
        Self: Sized,
99
    {
100
0
        Intersection(self, rhs)
101
0
    }
102
103
    /// Returns an automaton that matches the strings not matched by this
104
    /// automaton.
105
0
    fn complement(self) -> Complement<Self>
106
0
    where
107
0
        Self: Sized,
108
    {
109
0
        Complement(self)
110
0
    }
111
}
112
113
impl<'a, T: Automaton> Automaton for &'a T {
114
    type State = T::State;
115
116
0
    fn start(&self) -> T::State {
117
0
        (*self).start()
118
0
    }
119
120
0
    fn is_match(&self, state: &T::State) -> bool {
121
0
        (*self).is_match(state)
122
0
    }
123
124
0
    fn can_match(&self, state: &T::State) -> bool {
125
0
        (*self).can_match(state)
126
0
    }
127
128
0
    fn will_always_match(&self, state: &T::State) -> bool {
129
0
        (*self).will_always_match(state)
130
0
    }
131
132
0
    fn accept(&self, state: &T::State, byte: u8) -> T::State {
133
0
        (*self).accept(state, byte)
134
0
    }
135
136
0
    fn accept_eof(&self, state: &Self::State) -> Option<Self::State> {
137
0
        (*self).accept_eof(state)
138
0
    }
139
}
140
141
/// An automaton that matches if the input equals to a specific string.
142
///
143
/// It can be used in combination with [`StartsWith`] to search strings
144
/// starting with a given prefix.
145
///
146
/// ```rust
147
/// extern crate fst;
148
///
149
/// use fst::{Automaton, IntoStreamer, Streamer, Set};
150
/// use fst::automaton::Str;
151
///
152
/// # fn main() { example().unwrap(); }
153
/// fn example() -> Result<(), Box<dyn std::error::Error>> {
154
///     let paths = vec!["/home/projects/bar", "/home/projects/foo", "/tmp/foo"];
155
///     let set = Set::from_iter(paths)?;
156
///
157
///     // Build our prefix query.
158
///     let prefix = Str::new("/home").starts_with();
159
///
160
///     // Apply our query to the set we built.
161
///     let mut stream = set.search(prefix).into_stream();
162
///
163
///     let matches = stream.into_strs()?;
164
///     assert_eq!(matches, vec!["/home/projects/bar", "/home/projects/foo"]);
165
///     Ok(())
166
/// }
167
/// ```
168
#[derive(Clone, Debug)]
169
pub struct Str<'a> {
170
    string: &'a [u8],
171
}
172
173
impl<'a> Str<'a> {
174
    /// Constructs automaton that matches an exact string.
175
    #[inline]
176
0
    pub fn new(string: &'a str) -> Str<'a> {
177
0
        Str { string: string.as_bytes() }
178
0
    }
179
}
180
181
impl<'a> Automaton for Str<'a> {
182
    type State = Option<usize>;
183
184
    #[inline]
185
0
    fn start(&self) -> Option<usize> {
186
0
        Some(0)
187
0
    }
188
189
    #[inline]
190
0
    fn is_match(&self, pos: &Option<usize>) -> bool {
191
0
        *pos == Some(self.string.len())
192
0
    }
193
194
    #[inline]
195
0
    fn can_match(&self, pos: &Option<usize>) -> bool {
196
0
        pos.is_some()
197
0
    }
198
199
    #[inline]
200
0
    fn accept(&self, pos: &Option<usize>, byte: u8) -> Option<usize> {
201
        // if we aren't already past the end...
202
0
        if let Some(pos) = *pos {
203
            // and there is still a matching byte at the current position...
204
0
            if self.string.get(pos).cloned() == Some(byte) {
205
                // then move forward
206
0
                return Some(pos + 1);
207
0
            }
208
0
        }
209
        // otherwise we're either past the end or didn't match the byte
210
0
        None
211
0
    }
212
}
213
214
/// An automaton that matches if the input contains a specific subsequence.
215
///
216
/// It can be used to build a simple fuzzy-finder.
217
///
218
/// ```rust
219
/// extern crate fst;
220
///
221
/// use fst::{IntoStreamer, Streamer, Set};
222
/// use fst::automaton::Subsequence;
223
///
224
/// # fn main() { example().unwrap(); }
225
/// fn example() -> Result<(), Box<dyn std::error::Error>> {
226
///     let paths = vec!["/home/projects/bar", "/home/projects/foo", "/tmp/foo"];
227
///     let set = Set::from_iter(paths)?;
228
///
229
///     // Build our fuzzy query.
230
///     let subseq = Subsequence::new("hpf");
231
///
232
///     // Apply our fuzzy query to the set we built.
233
///     let mut stream = set.search(subseq).into_stream();
234
///
235
///     let matches = stream.into_strs()?;
236
///     assert_eq!(matches, vec!["/home/projects/foo"]);
237
///     Ok(())
238
/// }
239
/// ```
240
#[derive(Clone, Debug)]
241
pub struct Subsequence<'a> {
242
    subseq: &'a [u8],
243
}
244
245
impl<'a> Subsequence<'a> {
246
    /// Constructs automaton that matches input containing the
247
    /// specified subsequence.
248
    #[inline]
249
0
    pub fn new(subsequence: &'a str) -> Subsequence<'a> {
250
0
        Subsequence { subseq: subsequence.as_bytes() }
251
0
    }
252
}
253
254
impl<'a> Automaton for Subsequence<'a> {
255
    type State = usize;
256
257
    #[inline]
258
0
    fn start(&self) -> usize {
259
0
        0
260
0
    }
261
262
    #[inline]
263
0
    fn is_match(&self, &state: &usize) -> bool {
264
0
        state == self.subseq.len()
265
0
    }
266
267
    #[inline]
268
0
    fn can_match(&self, _: &usize) -> bool {
269
0
        true
270
0
    }
271
272
    #[inline]
273
0
    fn will_always_match(&self, &state: &usize) -> bool {
274
0
        state == self.subseq.len()
275
0
    }
276
277
    #[inline]
278
0
    fn accept(&self, &state: &usize, byte: u8) -> usize {
279
0
        if state == self.subseq.len() {
280
0
            return state;
281
0
        }
282
0
        state + (byte == self.subseq[state]) as usize
283
0
    }
284
}
285
286
/// An automaton that always matches.
287
///
288
/// This is useful in a generic context as a way to express that no automaton
289
/// should be used.
290
#[derive(Clone, Debug)]
291
pub struct AlwaysMatch;
292
293
impl Automaton for AlwaysMatch {
294
    type State = ();
295
296
    #[inline]
297
0
    fn start(&self) -> () {
298
0
        ()
299
0
    }
300
    #[inline]
301
0
    fn is_match(&self, _: &()) -> bool {
302
0
        true
303
0
    }
304
    #[inline]
305
0
    fn can_match(&self, _: &()) -> bool {
306
0
        true
307
0
    }
308
    #[inline]
309
0
    fn will_always_match(&self, _: &()) -> bool {
310
0
        true
311
0
    }
312
    #[inline]
313
0
    fn accept(&self, _: &(), _: u8) -> () {
314
0
        ()
315
0
    }
316
}
317
318
/// An automaton that matches a string that begins with something that the
319
/// wrapped automaton matches.
320
#[derive(Clone, Debug)]
321
pub struct StartsWith<A>(A);
322
323
/// The `Automaton` state for `StartsWith<A>`.
324
pub struct StartsWithState<A: Automaton>(StartsWithStateKind<A>);
325
326
enum StartsWithStateKind<A: Automaton> {
327
    Done,
328
    Running(A::State),
329
}
330
331
impl<A: Automaton> Automaton for StartsWith<A> {
332
    type State = StartsWithState<A>;
333
334
0
    fn start(&self) -> StartsWithState<A> {
335
        StartsWithState({
336
0
            let inner = self.0.start();
337
0
            if self.0.is_match(&inner) {
338
0
                StartsWithStateKind::Done
339
            } else {
340
0
                StartsWithStateKind::Running(inner)
341
            }
342
        })
343
0
    }
344
345
0
    fn is_match(&self, state: &StartsWithState<A>) -> bool {
346
0
        match state.0 {
347
0
            StartsWithStateKind::Done => true,
348
0
            StartsWithStateKind::Running(_) => false,
349
        }
350
0
    }
351
352
0
    fn can_match(&self, state: &StartsWithState<A>) -> bool {
353
0
        match state.0 {
354
0
            StartsWithStateKind::Done => true,
355
0
            StartsWithStateKind::Running(ref inner) => self.0.can_match(inner),
356
        }
357
0
    }
358
359
0
    fn will_always_match(&self, state: &StartsWithState<A>) -> bool {
360
0
        match state.0 {
361
0
            StartsWithStateKind::Done => true,
362
0
            StartsWithStateKind::Running(_) => false,
363
        }
364
0
    }
365
366
0
    fn accept(
367
0
        &self,
368
0
        state: &StartsWithState<A>,
369
0
        byte: u8,
370
0
    ) -> StartsWithState<A> {
371
0
        StartsWithState(match state.0 {
372
0
            StartsWithStateKind::Done => StartsWithStateKind::Done,
373
0
            StartsWithStateKind::Running(ref inner) => {
374
0
                let next_inner = self.0.accept(inner, byte);
375
0
                if self.0.is_match(&next_inner) {
376
0
                    StartsWithStateKind::Done
377
                } else {
378
0
                    StartsWithStateKind::Running(next_inner)
379
                }
380
            }
381
        })
382
0
    }
383
}
384
385
/// An automaton that matches when one of its component automata match.
386
#[derive(Clone, Debug)]
387
pub struct Union<A, B>(A, B);
388
389
/// The `Automaton` state for `Union<A, B>`.
390
pub struct UnionState<A: Automaton, B: Automaton>(A::State, B::State);
391
392
impl<A: Automaton, B: Automaton> Automaton for Union<A, B> {
393
    type State = UnionState<A, B>;
394
395
0
    fn start(&self) -> UnionState<A, B> {
396
0
        UnionState(self.0.start(), self.1.start())
397
0
    }
398
399
0
    fn is_match(&self, state: &UnionState<A, B>) -> bool {
400
0
        self.0.is_match(&state.0) || self.1.is_match(&state.1)
401
0
    }
402
403
0
    fn can_match(&self, state: &UnionState<A, B>) -> bool {
404
0
        self.0.can_match(&state.0) || self.1.can_match(&state.1)
405
0
    }
406
407
0
    fn will_always_match(&self, state: &UnionState<A, B>) -> bool {
408
0
        self.0.will_always_match(&state.0)
409
0
            || self.1.will_always_match(&state.1)
410
0
    }
411
412
0
    fn accept(&self, state: &UnionState<A, B>, byte: u8) -> UnionState<A, B> {
413
0
        UnionState(
414
0
            self.0.accept(&state.0, byte),
415
0
            self.1.accept(&state.1, byte),
416
0
        )
417
0
    }
418
}
419
420
/// An automaton that matches when both of its component automata match.
421
#[derive(Clone, Debug)]
422
pub struct Intersection<A, B>(A, B);
423
424
/// The `Automaton` state for `Intersection<A, B>`.
425
pub struct IntersectionState<A: Automaton, B: Automaton>(A::State, B::State);
426
427
impl<A: Automaton, B: Automaton> Automaton for Intersection<A, B> {
428
    type State = IntersectionState<A, B>;
429
430
0
    fn start(&self) -> IntersectionState<A, B> {
431
0
        IntersectionState(self.0.start(), self.1.start())
432
0
    }
433
434
0
    fn is_match(&self, state: &IntersectionState<A, B>) -> bool {
435
0
        self.0.is_match(&state.0) && self.1.is_match(&state.1)
436
0
    }
437
438
0
    fn can_match(&self, state: &IntersectionState<A, B>) -> bool {
439
0
        self.0.can_match(&state.0) && self.1.can_match(&state.1)
440
0
    }
441
442
0
    fn will_always_match(&self, state: &IntersectionState<A, B>) -> bool {
443
0
        self.0.will_always_match(&state.0)
444
0
            && self.1.will_always_match(&state.1)
445
0
    }
446
447
0
    fn accept(
448
0
        &self,
449
0
        state: &IntersectionState<A, B>,
450
0
        byte: u8,
451
0
    ) -> IntersectionState<A, B> {
452
0
        IntersectionState(
453
0
            self.0.accept(&state.0, byte),
454
0
            self.1.accept(&state.1, byte),
455
0
        )
456
0
    }
457
}
458
459
/// An automaton that matches exactly when the automaton it wraps does not.
460
#[derive(Clone, Debug)]
461
pub struct Complement<A>(A);
462
463
/// The `Automaton` state for `Complement<A>`.
464
pub struct ComplementState<A: Automaton>(A::State);
465
466
impl<A: Automaton> Automaton for Complement<A> {
467
    type State = ComplementState<A>;
468
469
0
    fn start(&self) -> ComplementState<A> {
470
0
        ComplementState(self.0.start())
471
0
    }
472
473
0
    fn is_match(&self, state: &ComplementState<A>) -> bool {
474
0
        !self.0.is_match(&state.0)
475
0
    }
476
477
0
    fn can_match(&self, state: &ComplementState<A>) -> bool {
478
0
        !self.0.will_always_match(&state.0)
479
0
    }
480
481
0
    fn will_always_match(&self, state: &ComplementState<A>) -> bool {
482
0
        !self.0.can_match(&state.0)
483
0
    }
484
485
0
    fn accept(
486
0
        &self,
487
0
        state: &ComplementState<A>,
488
0
        byte: u8,
489
0
    ) -> ComplementState<A> {
490
0
        ComplementState(self.0.accept(&state.0, byte))
491
0
    }
492
}