Coverage Report

Created: 2025-07-18 06:22

/rust/registry/src/index.crates.io-6f17d22bba15001f/zerotrie-0.1.3/src/cursor.rs
Line
Count
Source (jump to first uncovered line)
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
//! Types for walking stepwise through a trie.
6
//!
7
//! For examples, see the `.cursor()` functions
8
//! and the `Cursor` types in this module.
9
10
use crate::reader;
11
use crate::ZeroAsciiIgnoreCaseTrie;
12
use crate::ZeroTrieSimpleAscii;
13
14
use core::fmt;
15
16
impl<Store> ZeroTrieSimpleAscii<Store>
17
where
18
    Store: AsRef<[u8]> + ?Sized,
19
{
20
    /// Gets a cursor into the current trie.
21
    ///
22
    /// Useful to query a trie with data that is not a slice.
23
    ///
24
    /// This is currently supported only on [`ZeroTrieSimpleAscii`]
25
    /// and [`ZeroAsciiIgnoreCaseTrie`].
26
    ///
27
    /// # Examples
28
    ///
29
    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
30
    ///
31
    /// ```
32
    /// use core::fmt::Write;
33
    /// use zerotrie::ZeroTrieSimpleAscii;
34
    ///
35
    /// // A trie with two values: "abc" and "abcdef"
36
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
37
    ///
38
    /// // Get out the value for "abc"
39
    /// let mut cursor = trie.cursor();
40
    /// write!(&mut cursor, "abc");
41
    /// assert_eq!(cursor.take_value(), Some(0));
42
    /// ```
43
    ///
44
    /// Find the longest prefix match:
45
    ///
46
    /// ```
47
    /// use zerotrie::ZeroTrieSimpleAscii;
48
    ///
49
    /// // A trie with two values: "abc" and "abcdef"
50
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
51
    ///
52
    /// // Find the longest prefix of the string "abcdxy":
53
    /// let query = b"abcdxy";
54
    /// let mut longest_prefix = 0;
55
    /// let mut cursor = trie.cursor();
56
    /// for (i, b) in query.iter().enumerate() {
57
    ///     // Checking is_empty() is not required, but it is
58
    ///     // good for efficiency
59
    ///     if cursor.is_empty() {
60
    ///         break;
61
    ///     }
62
    ///     if cursor.take_value().is_some() {
63
    ///         longest_prefix = i;
64
    ///     }
65
    ///     cursor.step(*b);
66
    /// }
67
    ///
68
    /// // The longest prefix is "abc" which is length 3:
69
    /// assert_eq!(longest_prefix, 3);
70
    /// ```
71
    #[inline]
72
0
    pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
73
0
        ZeroTrieSimpleAsciiCursor {
74
0
            trie: self.as_borrowed_slice(),
75
0
        }
76
0
    }
77
}
78
79
impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
80
where
81
    Store: AsRef<[u8]> + ?Sized,
82
{
83
    /// Gets a cursor into the current trie.
84
    ///
85
    /// Useful to query a trie with data that is not a slice.
86
    ///
87
    /// This is currently supported only on [`ZeroTrieSimpleAscii`]
88
    /// and [`ZeroAsciiIgnoreCaseTrie`].
89
    ///
90
    /// # Examples
91
    ///
92
    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
93
    ///
94
    /// ```
95
    /// use core::fmt::Write;
96
    /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
97
    ///
98
    /// // A trie with two values: "aBc" and "aBcdEf"
99
    /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
100
    ///
101
    /// // Get out the value for "abc" (case-insensitive!)
102
    /// let mut cursor = trie.cursor();
103
    /// write!(&mut cursor, "abc");
104
    /// assert_eq!(cursor.take_value(), Some(0));
105
    /// ```
106
    ///
107
    /// For more examples, see [`ZeroTrieSimpleAscii::cursor`].
108
    #[inline]
109
0
    pub fn cursor(&self) -> ZeroAsciiIgnoreCaseTrieCursor {
110
0
        ZeroAsciiIgnoreCaseTrieCursor {
111
0
            trie: self.as_borrowed_slice(),
112
0
        }
113
0
    }
Unexecuted instantiation: <zerotrie::zerotrie::ZeroAsciiIgnoreCaseTrie<zerovec::zerovec::ZeroVec<u8>>>::cursor
Unexecuted instantiation: <zerotrie::zerotrie::ZeroAsciiIgnoreCaseTrie<_>>::cursor
114
}
115
116
impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
117
    /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
118
    /// having to doubly anchor the trie to the stack.
119
    #[inline]
120
0
    pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
121
0
        ZeroTrieSimpleAsciiCursor { trie: self }
122
0
    }
123
}
124
125
impl<'a> ZeroAsciiIgnoreCaseTrie<&'a [u8]> {
126
    /// Same as [`ZeroAsciiIgnoreCaseTrie::cursor()`] but moves self to avoid
127
    /// having to doubly anchor the trie to the stack.
128
    #[inline]
129
0
    pub fn into_cursor(self) -> ZeroAsciiIgnoreCaseTrieCursor<'a> {
130
0
        ZeroAsciiIgnoreCaseTrieCursor { trie: self }
131
0
    }
132
}
133
134
/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
135
///
136
/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
137
// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
138
#[derive(Debug, Clone)]
139
pub struct ZeroTrieSimpleAsciiCursor<'a> {
140
    trie: ZeroTrieSimpleAscii<&'a [u8]>,
141
}
142
143
/// A cursor into a [`ZeroAsciiIgnoreCaseTrie`], useful for stepwise lookup.
144
///
145
/// For examples, see [`ZeroAsciiIgnoreCaseTrie::cursor()`].
146
// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
147
#[derive(Debug, Clone)]
148
pub struct ZeroAsciiIgnoreCaseTrieCursor<'a> {
149
    trie: ZeroAsciiIgnoreCaseTrie<&'a [u8]>,
150
}
151
152
/// Information about a probed edge.
153
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
154
#[non_exhaustive] // no need to destructure or construct this in userland
155
pub struct AsciiProbeResult {
156
    /// The character's byte value between this node and its parent.
157
    pub byte: u8,
158
    /// The number of siblings of this node, _including itself_.
159
    pub total_siblings: u8,
160
}
161
162
impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
163
    /// Steps the cursor one character into the trie based on the character's byte value.
164
    ///
165
    /// # Examples
166
    ///
167
    /// Unrolled loop checking for string presence at every step:
168
    ///
169
    /// ```
170
    /// use zerotrie::ZeroTrieSimpleAscii;
171
    ///
172
    /// // A trie with two values: "abc" and "abcdef"
173
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
174
    ///
175
    /// // Search the trie for the string "abcdxy"
176
    /// let mut cursor = trie.cursor();
177
    /// assert_eq!(cursor.take_value(), None); // ""
178
    /// cursor.step(b'a');
179
    /// assert_eq!(cursor.take_value(), None); // "a"
180
    /// cursor.step(b'b');
181
    /// assert_eq!(cursor.take_value(), None); // "ab"
182
    /// cursor.step(b'c');
183
    /// assert_eq!(cursor.take_value(), Some(0)); // "abc"
184
    /// cursor.step(b'd');
185
    /// assert_eq!(cursor.take_value(), None); // "abcd"
186
    /// assert!(!cursor.is_empty());
187
    /// cursor.step(b'x'); // no strings have the prefix "abcdx"
188
    /// assert!(cursor.is_empty());
189
    /// assert_eq!(cursor.take_value(), None); // "abcdx"
190
    /// cursor.step(b'y');
191
    /// assert_eq!(cursor.take_value(), None); // "abcdxy"
192
    /// ```
193
    ///
194
    /// If the byte is not ASCII, the cursor will become empty:
195
    ///
196
    /// ```
197
    /// use zerotrie::ZeroTrieSimpleAscii;
198
    ///
199
    /// // A trie with two values: "abc" and "abcdef"
200
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
201
    ///
202
    /// let mut cursor = trie.cursor();
203
    /// assert_eq!(cursor.take_value(), None); // ""
204
    /// cursor.step(b'a');
205
    /// assert_eq!(cursor.take_value(), None); // "a"
206
    /// cursor.step(b'b');
207
    /// assert_eq!(cursor.take_value(), None); // "ab"
208
    /// cursor.step(b'\xFF');
209
    /// assert!(cursor.is_empty());
210
    /// assert_eq!(cursor.take_value(), None);
211
    /// ```
212
    #[inline]
213
0
    pub fn step(&mut self, byte: u8) {
214
0
        reader::step_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, byte);
215
0
    }
216
217
    /// Takes the value at the current position.
218
    ///
219
    /// Calling this function on a new cursor is equivalent to calling `.get()`
220
    /// with the empty string (except that it can only be called once).
221
    ///
222
    /// # Examples
223
    ///
224
    /// ```
225
    /// use zerotrie::ZeroTrieSimpleAscii;
226
    ///
227
    /// // A trie with two values: "" and "abc"
228
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
229
    ///
230
    /// assert_eq!(Some(0), trie.get(""));
231
    /// let mut cursor = trie.cursor();
232
    /// assert_eq!(Some(0), cursor.take_value());
233
    /// assert_eq!(None, cursor.take_value());
234
    /// ```
235
    #[inline]
236
0
    pub fn take_value(&mut self) -> Option<usize> {
237
0
        reader::take_value(&mut self.trie.store)
238
0
    }
239
240
    /// Steps the cursor one character into the trie based on an edge index,
241
    /// returning the corresponding character as a byte.
242
    ///
243
    /// This function is similar to [`Self::step()`], but it takes an index instead of a char.
244
    /// This enables stepwise iteration over the contents of the trie.
245
    ///
246
    /// If there are multiple possibilities for the next byte, the `index` argument allows
247
    /// visiting them in order. Since this function steps the cursor, the cursor must be
248
    /// cloned (a cheap operation) in order to visit multiple children.
249
    ///
250
    /// # Examples
251
    ///
252
    /// Continually query index 0 to extract the first item from a trie:
253
    ///
254
    /// ```
255
    /// use zerotrie::ZeroTrieSimpleAscii;
256
    ///
257
    /// let data: &[(String, usize)] = &[
258
    ///     ("ab".to_string(), 111),
259
    ///     ("abcxyz".to_string(), 22),
260
    ///     ("abde".to_string(), 333),
261
    ///     ("afg".to_string(), 44),
262
    /// ];
263
    ///
264
    /// let trie: ZeroTrieSimpleAscii<Vec<u8>> =
265
    ///     data.iter().map(|(s, v)| (s.as_str(), *v)).collect();
266
    ///
267
    /// let mut cursor = trie.cursor();
268
    /// let mut key = String::new();
269
    /// let value = loop {
270
    ///     if let Some(value) = cursor.take_value() {
271
    ///         break value;
272
    ///     }
273
    ///     let probe_result = cursor.probe(0).unwrap();
274
    ///     key.push(char::from(probe_result.byte));
275
    /// };
276
    ///
277
    /// assert_eq!(key, "ab");
278
    /// assert_eq!(value, 111);
279
    /// ```
280
    ///
281
    /// Stepwise iterate over all entries in the trie:
282
    ///
283
    /// ```
284
    /// # use zerotrie::ZeroTrieSimpleAscii;
285
    /// # let data: &[(String, usize)] = &[
286
    /// #     ("ab".to_string(), 111),
287
    /// #     ("abcxyz".to_string(), 22),
288
    /// #     ("abde".to_string(), 333),
289
    /// #     ("afg".to_string(), 44)
290
    /// # ];
291
    /// # let trie: ZeroTrieSimpleAscii<Vec<u8>> = data
292
    /// #     .iter()
293
    /// #     .map(|(s, v)| (s.as_str(), *v))
294
    /// #     .collect();
295
    /// // (trie built as in previous example)
296
    ///
297
    /// // Initialize the iteration at the first child of the trie.
298
    /// let mut stack = Vec::from([(trie.cursor(), 0, 0)]);
299
    /// let mut key = Vec::new();
300
    /// let mut results = Vec::new();
301
    /// loop {
302
    ///     let Some((mut cursor, index, suffix_len)) = stack.pop() else {
303
    ///         // Nothing left in the trie.
304
    ///         break;
305
    ///     };
306
    ///     // Check to see if there is a value at the current node.
307
    ///     if let Some(value) = cursor.take_value() {
308
    ///         results.push((String::from_utf8(key.clone()).unwrap(), value));
309
    ///     }
310
    ///     // Now check for children of the current node.
311
    ///     let mut sub_cursor = cursor.clone();
312
    ///     if let Some(probe_result) = sub_cursor.probe(index) {
313
    ///         // Found a child. Add the current byte edge to the key.
314
    ///         key.push(probe_result.byte);
315
    ///         // Add the child to the stack, and also add back the current
316
    ///         // node if there are more siblings to visit.
317
    ///         if index + 1 < probe_result.total_siblings as usize {
318
    ///             stack.push((cursor, index + 1, suffix_len));
319
    ///             stack.push((sub_cursor, 0, 1));
320
    ///         } else {
321
    ///             stack.push((sub_cursor, 0, suffix_len + 1));
322
    ///         }
323
    ///     } else {
324
    ///         // No more children. Pop this node's bytes from the key.
325
    ///         for _ in 0..suffix_len {
326
    ///             key.pop();
327
    ///         }
328
    ///     }
329
    /// }
330
    ///
331
    /// assert_eq!(&results, data);
332
    /// ```
333
0
    pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
334
0
        reader::probe_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, index)
335
0
    }
336
337
    /// Checks whether the cursor points to an empty trie.
338
    ///
339
    /// Use this to determine when to stop iterating.
340
    #[inline]
341
0
    pub fn is_empty(&self) -> bool {
342
0
        self.trie.is_empty()
343
0
    }
344
}
345
346
impl<'a> ZeroAsciiIgnoreCaseTrieCursor<'a> {
347
    /// Steps the cursor one byte into the trie.
348
    ///
349
    /// Returns the byte if matched, which may be a different case than the input byte.
350
    /// If this function returns `None`, any lookup loops can be terminated.
351
    ///
352
    /// # Examples
353
    ///
354
    /// Normalize the case of a value by stepping through an ignore-case trie:
355
    ///
356
    /// ```
357
    /// use std::borrow::Cow;
358
    /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
359
    ///
360
    /// // A trie with two values: "aBc" and "aBcdEf"
361
    /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
362
    ///
363
    /// // Get out the value for "abc" and normalize the key string
364
    /// let mut cursor = trie.cursor();
365
    /// let mut key_str = Cow::Borrowed("abc".as_bytes());
366
    /// let mut i = 0;
367
    /// let value = loop {
368
    ///     let Some(&input_byte) = key_str.get(i) else {
369
    ///         break cursor.take_value();
370
    ///     };
371
    ///     let Some(matched_byte) = cursor.step(input_byte) else {
372
    ///         break None;
373
    ///     };
374
    ///     if matched_byte != input_byte {
375
    ///         key_str.to_mut()[i] = matched_byte;
376
    ///     }
377
    ///     i += 1;
378
    /// };
379
    ///
380
    /// assert_eq!(value, Some(0));
381
    /// assert_eq!(&*key_str, "aBc".as_bytes());
382
    /// ```
383
    ///
384
    /// For more examples, see [`ZeroTrieSimpleAsciiCursor::step`].
385
    #[inline]
386
0
    pub fn step(&mut self, byte: u8) -> Option<u8> {
387
0
        reader::step_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, byte)
388
0
    }
Unexecuted instantiation: <zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor>::step
Unexecuted instantiation: <zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor>::step
389
390
    /// Takes the value at the current position.
391
    ///
392
    /// For more details, see [`ZeroTrieSimpleAsciiCursor::take_value`].
393
    #[inline]
394
0
    pub fn take_value(&mut self) -> Option<usize> {
395
0
        reader::take_value(&mut self.trie.store)
396
0
    }
Unexecuted instantiation: <zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor>::take_value
Unexecuted instantiation: <zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor>::take_value
397
398
    /// Probes the next byte in the cursor.
399
    ///
400
    /// For more details, see [`ZeroTrieSimpleAsciiCursor::probe`].
401
0
    pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
402
0
        reader::probe_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, index)
403
0
    }
404
405
    /// Checks whether the cursor points to an empty trie.
406
    ///
407
    /// For more details, see [`ZeroTrieSimpleAsciiCursor::is_empty`].
408
    #[inline]
409
0
    pub fn is_empty(&self) -> bool {
410
0
        self.trie.is_empty()
411
0
    }
412
}
413
414
impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
415
    /// Steps the cursor through each ASCII byte of the string.
416
    ///
417
    /// If the string contains non-ASCII chars, an error is returned.
418
    ///
419
    /// # Examples
420
    ///
421
    /// ```
422
    /// use core::fmt::Write;
423
    /// use zerotrie::ZeroTrieSimpleAscii;
424
    ///
425
    /// // A trie with two values: "abc" and "abcdef"
426
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
427
    ///
428
    /// let mut cursor = trie.cursor();
429
    /// cursor.write_str("abcdxy").expect("all ASCII");
430
    /// cursor.write_str("🚂").expect_err("non-ASCII");
431
    /// ```
432
0
    fn write_str(&mut self, s: &str) -> fmt::Result {
433
0
        for b in s.bytes() {
434
0
            if !b.is_ascii() {
435
0
                return Err(fmt::Error);
436
0
            }
437
0
            self.step(b);
438
        }
439
0
        Ok(())
440
0
    }
441
442
    /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
443
    /// an error if the char is non-ASCII.
444
    ///
445
    /// # Examples
446
    ///
447
    /// ```
448
    /// use core::fmt::Write;
449
    /// use zerotrie::ZeroTrieSimpleAscii;
450
    ///
451
    /// // A trie with two values: "abc" and "abcdef"
452
    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
453
    ///
454
    /// let mut cursor = trie.cursor();
455
    /// cursor.write_char('a').expect("ASCII");
456
    /// cursor.write_char('x').expect("ASCII");
457
    /// cursor.write_char('🚂').expect_err("non-ASCII");
458
    /// ```
459
0
    fn write_char(&mut self, c: char) -> fmt::Result {
460
0
        if !c.is_ascii() {
461
0
            return Err(fmt::Error);
462
0
        }
463
0
        self.step(c as u8);
464
0
        Ok(())
465
0
    }
466
}
467
468
impl<'a> fmt::Write for ZeroAsciiIgnoreCaseTrieCursor<'a> {
469
    /// Steps the cursor through each ASCII byte of the string.
470
    ///
471
    /// If the string contains non-ASCII chars, an error is returned.
472
0
    fn write_str(&mut self, s: &str) -> fmt::Result {
473
0
        for b in s.bytes() {
474
0
            if !b.is_ascii() {
475
0
                return Err(fmt::Error);
476
0
            }
477
0
            self.step(b);
478
        }
479
0
        Ok(())
480
0
    }
481
482
    /// Equivalent to [`ZeroAsciiIgnoreCaseTrieCursor::step()`], except returns
483
    /// an error if the char is non-ASCII.
484
0
    fn write_char(&mut self, c: char) -> fmt::Result {
485
0
        if !c.is_ascii() {
486
0
            return Err(fmt::Error);
487
0
        }
488
0
        self.step(c as u8);
489
0
        Ok(())
490
0
    }
491
}