Coverage Report

Created: 2026-03-31 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/fst-0.4.7/src/raw/node.rs
Line
Count
Source
1
use std::cmp;
2
use std::fmt;
3
use std::io;
4
use std::ops::Range;
5
6
use crate::bytes;
7
use crate::raw::build::BuilderNode;
8
use crate::raw::common_inputs::{COMMON_INPUTS, COMMON_INPUTS_INV};
9
use crate::raw::{
10
    u64_to_usize, CompiledAddr, Output, Transition, EMPTY_ADDRESS,
11
};
12
13
/// The threshold (in number of transitions) at which an index is created for
14
/// a node's transitions. This speeds up lookup time at the expense of FST
15
/// size.
16
const TRANS_INDEX_THRESHOLD: usize = 32;
17
18
/// Node represents a single state in a finite state transducer.
19
///
20
/// Nodes are very cheap to construct. Notably, they satisfy the `Copy` trait.
21
#[derive(Clone, Copy)]
22
pub struct Node<'f> {
23
    data: &'f [u8],
24
    version: u64,
25
    state: State,
26
    start: CompiledAddr,
27
    end: usize,
28
    is_final: bool,
29
    ntrans: usize,
30
    sizes: PackSizes,
31
    final_output: Output,
32
}
33
34
impl<'f> fmt::Debug for Node<'f> {
35
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36
0
        writeln!(f, "NODE@{}", self.start)?;
37
0
        writeln!(f, "  end_addr: {}", self.end)?;
38
0
        writeln!(f, "  size: {} bytes", self.as_slice().len())?;
39
0
        writeln!(f, "  state: {:?}", self.state)?;
40
0
        writeln!(f, "  is_final: {}", self.is_final())?;
41
0
        writeln!(f, "  final_output: {:?}", self.final_output())?;
42
0
        writeln!(f, "  # transitions: {}", self.len())?;
43
0
        writeln!(f, "  transitions:")?;
44
0
        for t in self.transitions() {
45
0
            writeln!(f, "    {:?}", t)?;
46
        }
47
0
        Ok(())
48
0
    }
49
}
50
51
impl<'f> Node<'f> {
52
    /// Creates a new note at the address given.
53
    ///
54
    /// `data` should be a slice to an entire FST.
55
0
    pub(crate) fn new(
56
0
        version: u64,
57
0
        addr: CompiledAddr,
58
0
        data: &[u8],
59
0
    ) -> Node<'_> {
60
0
        let state = State::new(data, addr);
61
0
        match state {
62
0
            State::EmptyFinal => Node {
63
0
                data: &[],
64
0
                version,
65
0
                state: State::EmptyFinal,
66
0
                start: EMPTY_ADDRESS,
67
0
                end: EMPTY_ADDRESS,
68
0
                is_final: true,
69
0
                ntrans: 0,
70
0
                sizes: PackSizes::new(),
71
0
                final_output: Output::zero(),
72
0
            },
73
0
            State::OneTransNext(s) => {
74
0
                let data = &data[..addr + 1];
75
0
                Node {
76
0
                    data,
77
0
                    version,
78
0
                    state,
79
0
                    start: addr,
80
0
                    end: s.end_addr(data),
81
0
                    is_final: false,
82
0
                    sizes: PackSizes::new(),
83
0
                    ntrans: 1,
84
0
                    final_output: Output::zero(),
85
0
                }
86
            }
87
0
            State::OneTrans(s) => {
88
0
                let data = &data[..addr + 1];
89
0
                let sizes = s.sizes(data);
90
0
                Node {
91
0
                    data,
92
0
                    version,
93
0
                    state,
94
0
                    start: addr,
95
0
                    end: s.end_addr(data, sizes),
96
0
                    is_final: false,
97
0
                    ntrans: 1,
98
0
                    sizes,
99
0
                    final_output: Output::zero(),
100
0
                }
101
            }
102
0
            State::AnyTrans(s) => {
103
0
                let data = &data[..addr + 1];
104
0
                let sizes = s.sizes(data);
105
0
                let ntrans = s.ntrans(data);
106
0
                Node {
107
0
                    data,
108
0
                    version,
109
0
                    state,
110
0
                    start: addr,
111
0
                    end: s.end_addr(version, data, sizes, ntrans),
112
0
                    is_final: s.is_final_state(),
113
0
                    ntrans,
114
0
                    sizes,
115
0
                    final_output: s.final_output(version, data, sizes, ntrans),
116
0
                }
117
            }
118
        }
119
0
    }
120
    /// Returns an iterator over all transitions in this node in lexicographic
121
    /// order.
122
    #[inline]
123
0
    pub fn transitions<'n>(&'n self) -> Transitions<'f, 'n> {
124
0
        Transitions { node: self, range: 0..self.len() }
125
0
    }
126
127
    /// Returns the transition at index `i`.
128
    #[inline(always)]
129
0
    pub fn transition(&self, i: usize) -> Transition {
130
        // The `inline(always)` annotation on this function appears to
131
        // dramatically speed up FST traversal. In particular, measuring the
132
        // time it takes to run `fst range something-big.fst` shows almost a 2x
133
        // improvement with this single `inline(always)` annotation.
134
135
0
        match self.state {
136
0
            State::OneTransNext(s) => {
137
0
                assert!(i == 0);
138
0
                Transition {
139
0
                    inp: s.input(self),
140
0
                    out: Output::zero(),
141
0
                    addr: s.trans_addr(self),
142
0
                }
143
            }
144
0
            State::OneTrans(s) => {
145
0
                assert!(i == 0);
146
0
                Transition {
147
0
                    inp: s.input(self),
148
0
                    out: s.output(self),
149
0
                    addr: s.trans_addr(self),
150
0
                }
151
            }
152
0
            State::AnyTrans(s) => Transition {
153
0
                inp: s.input(self, i),
154
0
                out: s.output(self, i),
155
0
                addr: s.trans_addr(self, i),
156
0
            },
157
0
            State::EmptyFinal => panic!("out of bounds"),
158
        }
159
0
    }
160
161
    /// Returns the transition address of the `i`th transition.
162
    #[inline]
163
0
    pub fn transition_addr(&self, i: usize) -> CompiledAddr {
164
0
        match self.state {
165
0
            State::OneTransNext(s) => {
166
0
                assert!(i == 0);
167
0
                s.trans_addr(self)
168
            }
169
0
            State::OneTrans(s) => {
170
0
                assert!(i == 0);
171
0
                s.trans_addr(self)
172
            }
173
0
            State::AnyTrans(s) => s.trans_addr(self, i),
174
0
            State::EmptyFinal => panic!("out of bounds"),
175
        }
176
0
    }
177
178
    /// Finds the `i`th transition corresponding to the given input byte.
179
    ///
180
    /// If no transition for this byte exists, then `None` is returned.
181
    #[inline]
182
0
    pub fn find_input(&self, b: u8) -> Option<usize> {
183
0
        match self.state {
184
0
            State::OneTransNext(s) if s.input(self) == b => Some(0),
185
0
            State::OneTransNext(_) => None,
186
0
            State::OneTrans(s) if s.input(self) == b => Some(0),
187
0
            State::OneTrans(_) => None,
188
0
            State::AnyTrans(s) => s.find_input(self, b),
189
0
            State::EmptyFinal => None,
190
        }
191
0
    }
192
193
    /// If this node is final and has a terminal output value, then it is
194
    /// returned. Otherwise, a zero output is returned.
195
    #[inline]
196
0
    pub fn final_output(&self) -> Output {
197
0
        self.final_output
198
0
    }
199
200
    /// Returns true if and only if this node corresponds to a final or "match"
201
    /// state in the finite state transducer.
202
    #[inline]
203
0
    pub fn is_final(&self) -> bool {
204
0
        self.is_final
205
0
    }
206
207
    /// Returns the number of transitions in this node.
208
    ///
209
    /// The maximum number of transitions is 256.
210
    #[inline]
211
0
    pub fn len(&self) -> usize {
212
0
        self.ntrans
213
0
    }
214
215
    /// Returns true if and only if this node has zero transitions.
216
    #[inline]
217
0
    pub fn is_empty(&self) -> bool {
218
0
        self.ntrans == 0
219
0
    }
220
221
    /// Return the address of this node.
222
    #[inline]
223
0
    pub fn addr(&self) -> CompiledAddr {
224
0
        self.start
225
0
    }
226
227
    #[doc(hidden)]
228
    #[inline]
229
0
    pub fn as_slice(&self) -> &[u8] {
230
0
        &self.data[self.end..]
231
0
    }
232
233
    #[doc(hidden)]
234
    #[inline]
235
0
    pub fn state(&self) -> &'static str {
236
0
        match self.state {
237
0
            State::OneTransNext(_) => "OTN",
238
0
            State::OneTrans(_) => "OT",
239
0
            State::AnyTrans(_) => "AT",
240
0
            State::EmptyFinal => "EF",
241
        }
242
0
    }
243
244
0
    fn compile<W: io::Write>(
245
0
        wtr: W,
246
0
        last_addr: CompiledAddr,
247
0
        addr: CompiledAddr,
248
0
        node: &BuilderNode,
249
0
    ) -> io::Result<()> {
250
0
        assert!(node.trans.len() <= 256);
251
0
        if node.trans.is_empty()
252
0
            && node.is_final
253
0
            && node.final_output.is_zero()
254
        {
255
0
            return Ok(());
256
0
        } else if node.trans.len() != 1 || node.is_final {
257
0
            StateAnyTrans::compile(wtr, addr, node)
258
        } else {
259
0
            if node.trans[0].addr == last_addr && node.trans[0].out.is_zero() {
260
0
                StateOneTransNext::compile(wtr, addr, node.trans[0].inp)
261
            } else {
262
0
                StateOneTrans::compile(wtr, addr, node.trans[0])
263
            }
264
        }
265
0
    }
266
}
267
268
impl BuilderNode {
269
0
    pub fn compile_to<W: io::Write>(
270
0
        &self,
271
0
        wtr: W,
272
0
        last_addr: CompiledAddr,
273
0
        addr: CompiledAddr,
274
0
    ) -> io::Result<()> {
275
0
        Node::compile(wtr, last_addr, addr, self)
276
0
    }
277
}
278
279
#[derive(Clone, Copy, Debug)]
280
enum State {
281
    OneTransNext(StateOneTransNext),
282
    OneTrans(StateOneTrans),
283
    AnyTrans(StateAnyTrans),
284
    EmptyFinal,
285
}
286
287
// one trans flag (1), next flag (1), common input
288
#[derive(Clone, Copy, Debug)]
289
struct StateOneTransNext(u8);
290
// one trans flag (1), next flag (0), common input
291
#[derive(Clone, Copy, Debug)]
292
struct StateOneTrans(u8);
293
// one trans flag (0), final flag, # transitions
294
#[derive(Clone, Copy, Debug)]
295
struct StateAnyTrans(u8);
296
297
impl State {
298
0
    fn new(data: &[u8], addr: CompiledAddr) -> State {
299
0
        if addr == EMPTY_ADDRESS {
300
0
            return State::EmptyFinal;
301
0
        }
302
0
        let v = data[addr];
303
0
        match (v & 0b11_000000) >> 6 {
304
0
            0b11 => State::OneTransNext(StateOneTransNext(v)),
305
0
            0b10 => State::OneTrans(StateOneTrans(v)),
306
0
            _ => State::AnyTrans(StateAnyTrans(v)),
307
        }
308
0
    }
309
}
310
311
impl StateOneTransNext {
312
0
    fn compile<W: io::Write>(
313
0
        mut wtr: W,
314
0
        _: CompiledAddr,
315
0
        input: u8,
316
0
    ) -> io::Result<()> {
317
0
        let mut state = StateOneTransNext::new();
318
0
        state.set_common_input(input);
319
0
        if state.common_input().is_none() {
320
0
            wtr.write_all(&[input])?;
321
0
        }
322
0
        wtr.write_all(&[state.0])?;
323
0
        Ok(())
324
0
    }
325
326
    #[inline]
327
0
    fn new() -> StateOneTransNext {
328
0
        StateOneTransNext(0b11_000000)
329
0
    }
330
331
    #[inline]
332
0
    fn set_common_input(&mut self, input: u8) {
333
0
        self.0 = (self.0 & 0b11_000000) | common_idx(input, 0b111111);
334
0
    }
335
336
    #[inline]
337
0
    fn common_input(&self) -> Option<u8> {
338
0
        common_input(self.0 & 0b00_111111)
339
0
    }
340
341
    #[inline]
342
0
    fn input_len(&self) -> usize {
343
0
        if self.common_input().is_none() {
344
0
            1
345
        } else {
346
0
            0
347
        }
348
0
    }
349
350
    #[inline]
351
0
    fn end_addr(&self, data: &[u8]) -> usize {
352
0
        data.len() - 1 - self.input_len()
353
0
    }
354
355
    #[inline]
356
0
    fn input(&self, node: &Node<'_>) -> u8 {
357
0
        if let Some(inp) = self.common_input() {
358
0
            inp
359
        } else {
360
0
            node.data[node.start - 1]
361
        }
362
0
    }
363
364
    #[inline]
365
0
    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
366
0
        node.end as CompiledAddr - 1
367
0
    }
368
}
369
370
impl StateOneTrans {
371
0
    fn compile<W: io::Write>(
372
0
        mut wtr: W,
373
0
        addr: CompiledAddr,
374
0
        trans: Transition,
375
0
    ) -> io::Result<()> {
376
0
        let out = trans.out.value();
377
0
        let output_pack_size =
378
0
            if out == 0 { 0 } else { bytes::pack_uint(&mut wtr, out)? };
379
0
        let trans_pack_size = pack_delta(&mut wtr, addr, trans.addr)?;
380
381
0
        let mut pack_sizes = PackSizes::new();
382
0
        pack_sizes.set_output_pack_size(output_pack_size);
383
0
        pack_sizes.set_transition_pack_size(trans_pack_size);
384
0
        wtr.write_all(&[pack_sizes.encode()])?;
385
386
0
        let mut state = StateOneTrans::new();
387
0
        state.set_common_input(trans.inp);
388
0
        if state.common_input().is_none() {
389
0
            wtr.write_all(&[trans.inp])?;
390
0
        }
391
0
        wtr.write_all(&[state.0])?;
392
0
        Ok(())
393
0
    }
394
395
    #[inline]
396
0
    fn new() -> StateOneTrans {
397
0
        StateOneTrans(0b10_000000)
398
0
    }
399
400
    #[inline]
401
0
    fn set_common_input(&mut self, input: u8) {
402
0
        self.0 = (self.0 & 0b10_000000) | common_idx(input, 0b111111);
403
0
    }
404
405
    #[inline]
406
0
    fn common_input(&self) -> Option<u8> {
407
0
        common_input(self.0 & 0b00_111111)
408
0
    }
409
410
    #[inline]
411
0
    fn input_len(&self) -> usize {
412
0
        if self.common_input().is_none() {
413
0
            1
414
        } else {
415
0
            0
416
        }
417
0
    }
418
419
    #[inline]
420
0
    fn sizes(&self, data: &[u8]) -> PackSizes {
421
0
        let i = data.len() - 1 - self.input_len() - 1;
422
0
        PackSizes::decode(data[i])
423
0
    }
424
425
    #[inline]
426
0
    fn end_addr(&self, data: &[u8], sizes: PackSizes) -> usize {
427
0
        data.len() - 1
428
0
        - self.input_len()
429
0
        - 1 // pack size
430
0
        - sizes.transition_pack_size()
431
0
        - sizes.output_pack_size()
432
0
    }
433
434
    #[inline]
435
0
    fn input(&self, node: &Node<'_>) -> u8 {
436
0
        if let Some(inp) = self.common_input() {
437
0
            inp
438
        } else {
439
0
            node.data[node.start - 1]
440
        }
441
0
    }
442
443
    #[inline]
444
0
    fn output(&self, node: &Node<'_>) -> Output {
445
0
        let osize = node.sizes.output_pack_size();
446
0
        if osize == 0 {
447
0
            return Output::zero();
448
0
        }
449
0
        let tsize = node.sizes.transition_pack_size();
450
0
        let i = node.start
451
0
                - self.input_len()
452
0
                - 1 // pack size
453
0
                - tsize - osize;
454
0
        Output::new(bytes::unpack_uint(&node.data[i..], osize as u8))
455
0
    }
456
457
    #[inline]
458
0
    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
459
0
        let tsize = node.sizes.transition_pack_size();
460
0
        let i = node.start
461
0
                - self.input_len()
462
0
                - 1 // pack size
463
0
                - tsize;
464
0
        unpack_delta(&node.data[i..], tsize, node.end)
465
0
    }
466
}
467
468
impl StateAnyTrans {
469
0
    fn compile<W: io::Write>(
470
0
        mut wtr: W,
471
0
        addr: CompiledAddr,
472
0
        node: &BuilderNode,
473
0
    ) -> io::Result<()> {
474
0
        assert!(node.trans.len() <= 256);
475
476
0
        let mut tsize = 0;
477
0
        let mut osize = bytes::pack_size(node.final_output.value());
478
0
        let mut any_outs = !node.final_output.is_zero();
479
0
        for t in &node.trans {
480
0
            tsize = cmp::max(tsize, pack_delta_size(addr, t.addr));
481
0
            osize = cmp::max(osize, bytes::pack_size(t.out.value()));
482
0
            any_outs = any_outs || !t.out.is_zero();
483
        }
484
485
0
        let mut pack_sizes = PackSizes::new();
486
0
        if any_outs {
487
0
            pack_sizes.set_output_pack_size(osize);
488
0
        } else {
489
0
            pack_sizes.set_output_pack_size(0);
490
0
        }
491
0
        pack_sizes.set_transition_pack_size(tsize);
492
493
0
        let mut state = StateAnyTrans::new();
494
0
        state.set_final_state(node.is_final);
495
0
        state.set_state_ntrans(node.trans.len() as u8);
496
497
0
        if any_outs {
498
0
            if node.is_final {
499
0
                bytes::pack_uint_in(
500
0
                    &mut wtr,
501
0
                    node.final_output.value(),
502
0
                    osize,
503
0
                )?;
504
0
            }
505
0
            for t in node.trans.iter().rev() {
506
0
                bytes::pack_uint_in(&mut wtr, t.out.value(), osize)?;
507
            }
508
0
        }
509
0
        for t in node.trans.iter().rev() {
510
0
            pack_delta_in(&mut wtr, addr, t.addr, tsize)?;
511
        }
512
0
        for t in node.trans.iter().rev() {
513
0
            wtr.write_all(&[t.inp])?;
514
        }
515
0
        if node.trans.len() > TRANS_INDEX_THRESHOLD {
516
            // A value of 255 indicates that no transition exists for the byte
517
            // at that index. (Except when there are 256 transitions.) Namely,
518
            // any value greater than or equal to the number of transitions in
519
            // this node indicates an absent transition.
520
0
            let mut index = [255u8; 256];
521
0
            for (i, t) in node.trans.iter().enumerate() {
522
0
                index[t.inp as usize] = i as u8;
523
0
            }
524
0
            wtr.write_all(&index)?;
525
0
        }
526
527
0
        wtr.write_all(&[pack_sizes.encode()])?;
528
0
        if state.state_ntrans().is_none() {
529
0
            if node.trans.len() == 256 {
530
                // 256 can't be represented in a u8, so we abuse the fact that
531
                // the # of transitions can never be 1 here, since 1 is always
532
                // encoded in the state byte.
533
0
                wtr.write_all(&[1])?;
534
            } else {
535
0
                wtr.write_all(&[node.trans.len() as u8])?;
536
            }
537
0
        }
538
0
        wtr.write_all(&[state.0])?;
539
0
        Ok(())
540
0
    }
541
542
    #[inline]
543
0
    fn new() -> StateAnyTrans {
544
0
        StateAnyTrans(0b00_000000)
545
0
    }
546
547
    #[inline]
548
0
    fn set_final_state(&mut self, yes: bool) {
549
0
        if yes {
550
0
            self.0 |= 0b01_000000;
551
0
        }
552
0
    }
553
554
    #[inline]
555
0
    fn is_final_state(&self) -> bool {
556
0
        self.0 & 0b01_000000 == 0b01_000000
557
0
    }
558
559
    #[inline]
560
0
    fn set_state_ntrans(&mut self, n: u8) {
561
0
        if n <= 0b00_111111 {
562
0
            self.0 = (self.0 & 0b11_000000) | n;
563
0
        }
564
0
    }
565
566
    #[inline]
567
0
    fn state_ntrans(&self) -> Option<u8> {
568
0
        let n = self.0 & 0b00_111111;
569
0
        if n == 0 {
570
0
            None
571
        } else {
572
0
            Some(n)
573
        }
574
0
    }
575
576
    #[inline]
577
0
    fn sizes(&self, data: &[u8]) -> PackSizes {
578
0
        let i = data.len() - 1 - self.ntrans_len() - 1;
579
0
        PackSizes::decode(data[i])
580
0
    }
581
582
    #[inline]
583
0
    fn total_trans_size(
584
0
        &self,
585
0
        version: u64,
586
0
        sizes: PackSizes,
587
0
        ntrans: usize,
588
0
    ) -> usize {
589
0
        let index_size = self.trans_index_size(version, ntrans);
590
0
        ntrans + (ntrans * sizes.transition_pack_size()) + index_size
591
0
    }
592
593
    #[inline]
594
0
    fn trans_index_size(&self, version: u64, ntrans: usize) -> usize {
595
0
        if version >= 2 && ntrans > TRANS_INDEX_THRESHOLD {
596
0
            256
597
        } else {
598
0
            0
599
        }
600
0
    }
601
602
    #[inline]
603
0
    fn ntrans_len(&self) -> usize {
604
0
        if self.state_ntrans().is_none() {
605
0
            1
606
        } else {
607
0
            0
608
        }
609
0
    }
610
611
    #[inline]
612
0
    fn ntrans(&self, data: &[u8]) -> usize {
613
0
        if let Some(n) = self.state_ntrans() {
614
0
            n as usize
615
        } else {
616
0
            let n = data[data.len() - 2] as usize;
617
0
            if n == 1 {
618
                // "1" is never a normal legal value here, because if there
619
                // is only 1 transition, then it is encoded in the state byte.
620
0
                256
621
            } else {
622
0
                n
623
            }
624
        }
625
0
    }
626
627
    #[inline]
628
0
    fn final_output(
629
0
        &self,
630
0
        version: u64,
631
0
        data: &[u8],
632
0
        sizes: PackSizes,
633
0
        ntrans: usize,
634
0
    ) -> Output {
635
0
        let osize = sizes.output_pack_size();
636
0
        if osize == 0 || !self.is_final_state() {
637
0
            return Output::zero();
638
0
        }
639
0
        let at = data.len() - 1
640
0
                 - self.ntrans_len()
641
0
                 - 1 // pack size
642
0
                 - self.total_trans_size(version, sizes, ntrans)
643
0
                 - (ntrans * osize) // output values
644
0
                 - osize; // the desired output value
645
0
        Output::new(bytes::unpack_uint(&data[at..], osize as u8))
646
0
    }
647
648
    #[inline]
649
0
    fn end_addr(
650
0
        &self,
651
0
        version: u64,
652
0
        data: &[u8],
653
0
        sizes: PackSizes,
654
0
        ntrans: usize,
655
0
    ) -> usize {
656
0
        let osize = sizes.output_pack_size();
657
0
        let final_osize = if !self.is_final_state() { 0 } else { osize };
658
0
        data.len() - 1
659
0
        - self.ntrans_len()
660
0
        - 1 // pack size
661
0
        - self.total_trans_size(version, sizes, ntrans)
662
0
        - (ntrans * osize) // output values
663
0
        - final_osize // final output
664
0
    }
665
666
    #[inline]
667
0
    fn trans_addr(&self, node: &Node<'_>, i: usize) -> CompiledAddr {
668
0
        assert!(i < node.ntrans);
669
0
        let tsize = node.sizes.transition_pack_size();
670
0
        let at = node.start
671
0
                 - self.ntrans_len()
672
0
                 - 1 // pack size
673
0
                 - self.trans_index_size(node.version, node.ntrans)
674
0
                 - node.ntrans // inputs
675
0
                 - (i * tsize) // the previous transition addresses
676
0
                 - tsize; // the desired transition address
677
0
        unpack_delta(&node.data[at..], tsize, node.end)
678
0
    }
679
680
    #[inline]
681
0
    fn input(&self, node: &Node<'_>, i: usize) -> u8 {
682
0
        let at = node.start
683
0
                 - self.ntrans_len()
684
0
                 - 1 // pack size
685
0
                 - self.trans_index_size(node.version, node.ntrans)
686
0
                 - i
687
0
                 - 1; // the input byte
688
0
        node.data[at]
689
0
    }
690
691
    #[inline]
692
0
    fn find_input(&self, node: &Node<'_>, b: u8) -> Option<usize> {
693
0
        if node.version >= 2 && node.ntrans > TRANS_INDEX_THRESHOLD {
694
0
            let start = node.start
695
0
                        - self.ntrans_len()
696
0
                        - 1 // pack size
697
0
                        - self.trans_index_size(node.version, node.ntrans);
698
0
            let i = node.data[start + b as usize] as usize;
699
0
            if i >= node.ntrans {
700
0
                None
701
            } else {
702
0
                Some(i)
703
            }
704
        } else {
705
0
            let start = node.start
706
0
                        - self.ntrans_len()
707
0
                        - 1 // pack size
708
0
                        - node.ntrans; // inputs
709
0
            let end = start + node.ntrans;
710
0
            let inputs = &node.data[start..end];
711
0
            inputs.iter().position(|&b2| b == b2).map(|i| node.ntrans - i - 1)
712
        }
713
0
    }
714
715
    #[inline]
716
0
    fn output(&self, node: &Node<'_>, i: usize) -> Output {
717
0
        let osize = node.sizes.output_pack_size();
718
0
        if osize == 0 {
719
0
            return Output::zero();
720
0
        }
721
0
        let at = node.start
722
0
                 - self.ntrans_len()
723
0
                 - 1 // pack size
724
0
                 - self.total_trans_size(node.version, node.sizes, node.ntrans)
725
0
                 - (i * osize) // the previous outputs
726
0
                 - osize; // the desired output value
727
0
        Output::new(bytes::unpack_uint(&node.data[at..], osize as u8))
728
0
    }
729
}
730
731
// high 4 bits is transition address packed size.
732
// low 4 bits is output value packed size.
733
//
734
// `0` is a legal value which means there are no transitions/outputs.
735
#[derive(Clone, Copy, Debug)]
736
struct PackSizes(u8);
737
738
impl PackSizes {
739
    #[inline]
740
0
    fn new() -> PackSizes {
741
0
        PackSizes(0)
742
0
    }
743
744
    #[inline]
745
0
    fn decode(v: u8) -> PackSizes {
746
0
        PackSizes(v)
747
0
    }
748
749
    #[inline]
750
0
    fn encode(&self) -> u8 {
751
0
        self.0
752
0
    }
753
754
    #[inline]
755
0
    fn set_transition_pack_size(&mut self, size: u8) {
756
0
        assert!(size <= 8);
757
0
        self.0 = (self.0 & 0b0000_1111) | (size << 4);
758
0
    }
759
760
    #[inline]
761
0
    fn transition_pack_size(&self) -> usize {
762
0
        ((self.0 & 0b1111_0000) >> 4) as usize
763
0
    }
764
765
    #[inline]
766
0
    fn set_output_pack_size(&mut self, size: u8) {
767
0
        assert!(size <= 8);
768
0
        self.0 = (self.0 & 0b1111_0000) | size;
769
0
    }
770
771
    #[inline]
772
0
    fn output_pack_size(&self) -> usize {
773
0
        (self.0 & 0b0000_1111) as usize
774
0
    }
775
}
776
777
/// An iterator over all transitions in a node.
778
///
779
/// `'f` is the lifetime of the underlying fst and `'n` is the lifetime of
780
/// the underlying `Node`.
781
pub struct Transitions<'f, 'n> {
782
    node: &'n Node<'f>,
783
    range: Range<usize>,
784
}
785
786
impl<'f, 'n> Iterator for Transitions<'f, 'n> {
787
    type Item = Transition;
788
789
    #[inline]
790
0
    fn next(&mut self) -> Option<Transition> {
791
0
        self.range.next().map(|i| self.node.transition(i))
792
0
    }
793
}
794
795
/// common_idx translate a byte to an index in the COMMON_INPUTS_INV array.
796
///
797
/// I wonder if it would be prudent to store this mapping in the FST itself.
798
/// The advantage of doing so would mean that common inputs would reflect the
799
/// specific data in the FST. The problem of course is that this table has to
800
/// be computed up front, which is pretty much at odds with the streaming
801
/// nature of the builder.
802
///
803
/// Nevertheless, the *caller* may have a priori knowledge that could be
804
/// supplied to the builder manually, which could then be embedded in the FST.
805
#[inline]
806
0
fn common_idx(input: u8, max: u8) -> u8 {
807
0
    let val = ((COMMON_INPUTS[input as usize] as u32 + 1) % 256) as u8;
808
0
    if val > max {
809
0
        0
810
    } else {
811
0
        val
812
    }
813
0
}
814
815
/// common_input translates a common input index stored in a serialized FST
816
/// to the corresponding byte.
817
#[inline]
818
0
fn common_input(idx: u8) -> Option<u8> {
819
0
    if idx == 0 {
820
0
        None
821
    } else {
822
0
        Some(COMMON_INPUTS_INV[(idx - 1) as usize])
823
    }
824
0
}
825
826
#[inline]
827
0
fn pack_delta<W: io::Write>(
828
0
    wtr: W,
829
0
    node_addr: CompiledAddr,
830
0
    trans_addr: CompiledAddr,
831
0
) -> io::Result<u8> {
832
0
    let nbytes = pack_delta_size(node_addr, trans_addr);
833
0
    pack_delta_in(wtr, node_addr, trans_addr, nbytes)?;
834
0
    Ok(nbytes)
835
0
}
836
837
#[inline]
838
0
fn pack_delta_in<W: io::Write>(
839
0
    wtr: W,
840
0
    node_addr: CompiledAddr,
841
0
    trans_addr: CompiledAddr,
842
0
    nbytes: u8,
843
0
) -> io::Result<()> {
844
0
    let delta_addr = if trans_addr == EMPTY_ADDRESS {
845
0
        EMPTY_ADDRESS
846
    } else {
847
0
        node_addr - trans_addr
848
    };
849
0
    bytes::pack_uint_in(wtr, delta_addr as u64, nbytes)
850
0
}
851
852
#[inline]
853
0
fn pack_delta_size(node_addr: CompiledAddr, trans_addr: CompiledAddr) -> u8 {
854
0
    let delta_addr = if trans_addr == EMPTY_ADDRESS {
855
0
        EMPTY_ADDRESS
856
    } else {
857
0
        node_addr - trans_addr
858
    };
859
0
    bytes::pack_size(delta_addr as u64)
860
0
}
861
862
#[inline]
863
0
fn unpack_delta(
864
0
    slice: &[u8],
865
0
    trans_pack_size: usize,
866
0
    node_addr: usize,
867
0
) -> CompiledAddr {
868
0
    let delta = bytes::unpack_uint(slice, trans_pack_size as u8);
869
0
    let delta_addr = u64_to_usize(delta);
870
0
    if delta_addr == EMPTY_ADDRESS {
871
0
        EMPTY_ADDRESS
872
    } else {
873
0
        node_addr - delta_addr
874
    }
875
0
}
876
877
#[cfg(test)]
878
mod tests {
879
    use quickcheck::{quickcheck, TestResult};
880
881
    use crate::raw::build::BuilderNode;
882
    use crate::raw::node::Node;
883
    use crate::raw::{Builder, CompiledAddr, Output, Transition, VERSION};
884
    use crate::stream::Streamer;
885
886
    const NEVER_LAST: CompiledAddr = std::u64::MAX as CompiledAddr;
887
888
    #[test]
889
    fn prop_emits_inputs() {
890
        fn p(mut bs: Vec<Vec<u8>>) -> TestResult {
891
            bs.sort();
892
            bs.dedup();
893
894
            let mut bfst = Builder::memory();
895
            for word in &bs {
896
                bfst.add(word).unwrap();
897
            }
898
            let fst = bfst.into_fst();
899
            let mut rdr = fst.stream();
900
            let mut words = vec![];
901
            while let Some(w) = rdr.next() {
902
                words.push(w.0.to_owned());
903
            }
904
            TestResult::from_bool(bs == words)
905
        }
906
        quickcheck(p as fn(Vec<Vec<u8>>) -> TestResult)
907
    }
908
909
    fn nodes_equal(compiled: &Node, uncompiled: &BuilderNode) -> bool {
910
        println!("{:?}", compiled);
911
        assert_eq!(compiled.is_final(), uncompiled.is_final);
912
        assert_eq!(compiled.len(), uncompiled.trans.len());
913
        assert_eq!(compiled.final_output(), uncompiled.final_output);
914
        for (ct, ut) in
915
            compiled.transitions().zip(uncompiled.trans.iter().cloned())
916
        {
917
            assert_eq!(ct.inp, ut.inp);
918
            assert_eq!(ct.out, ut.out);
919
            assert_eq!(ct.addr, ut.addr);
920
        }
921
        true
922
    }
923
924
    fn compile(node: &BuilderNode) -> (CompiledAddr, Vec<u8>) {
925
        let mut buf = vec![0; 24];
926
        node.compile_to(&mut buf, NEVER_LAST, 24).unwrap();
927
        (buf.len() as CompiledAddr - 1, buf)
928
    }
929
930
    fn roundtrip(bnode: &BuilderNode) -> bool {
931
        let (addr, bytes) = compile(bnode);
932
        let node = Node::new(VERSION, addr, &bytes);
933
        nodes_equal(&node, &bnode)
934
    }
935
936
    fn trans(addr: CompiledAddr, inp: u8) -> Transition {
937
        Transition { inp, out: Output::zero(), addr }
938
    }
939
940
    #[test]
941
    fn bin_no_trans() {
942
        let bnode = BuilderNode {
943
            is_final: false,
944
            final_output: Output::zero(),
945
            trans: vec![],
946
        };
947
        let (addr, buf) = compile(&bnode);
948
        let node = Node::new(VERSION, addr, &buf);
949
        assert_eq!(node.as_slice().len(), 3);
950
        roundtrip(&bnode);
951
    }
952
953
    #[test]
954
    fn bin_one_trans_common() {
955
        let bnode = BuilderNode {
956
            is_final: false,
957
            final_output: Output::zero(),
958
            trans: vec![trans(20, b'a')],
959
        };
960
        let (addr, buf) = compile(&bnode);
961
        let node = Node::new(VERSION, addr, &buf);
962
        assert_eq!(node.as_slice().len(), 3);
963
        roundtrip(&bnode);
964
    }
965
966
    #[test]
967
    fn bin_one_trans_not_common() {
968
        let bnode = BuilderNode {
969
            is_final: false,
970
            final_output: Output::zero(),
971
            trans: vec![trans(2, b'\xff')],
972
        };
973
        let (addr, buf) = compile(&bnode);
974
        let node = Node::new(VERSION, addr, &buf);
975
        assert_eq!(node.as_slice().len(), 4);
976
        roundtrip(&bnode);
977
    }
978
979
    #[test]
980
    fn bin_many_trans() {
981
        let bnode = BuilderNode {
982
            is_final: false,
983
            final_output: Output::zero(),
984
            trans: vec![
985
                trans(2, b'a'),
986
                trans(3, b'b'),
987
                trans(4, b'c'),
988
                trans(5, b'd'),
989
                trans(6, b'e'),
990
                trans(7, b'f'),
991
            ],
992
        };
993
        let (addr, buf) = compile(&bnode);
994
        let node = Node::new(VERSION, addr, &buf);
995
        assert_eq!(node.as_slice().len(), 14);
996
        roundtrip(&bnode);
997
    }
998
999
    #[test]
1000
    fn node_max_trans() {
1001
        let bnode = BuilderNode {
1002
            is_final: false,
1003
            final_output: Output::zero(),
1004
            trans: (0..256).map(|i| trans(0, i as u8)).collect(),
1005
        };
1006
        let (addr, buf) = compile(&bnode);
1007
        let node = Node::new(VERSION, addr, &buf);
1008
        assert_eq!(node.transitions().count(), 256);
1009
        assert_eq!(node.len(), node.transitions().count());
1010
        roundtrip(&bnode);
1011
    }
1012
}