Coverage Report

Created: 2023-04-25 07:07

/rust/registry/src/index.crates.io-6f17d22bba15001f/unicode-bidi-0.3.8/src/prepare.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2015 The Servo Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! 3.3.3 Preparations for Implicit Processing
11
//!
12
//! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
13
14
use alloc::vec::Vec;
15
use core::cmp::max;
16
use core::ops::Range;
17
18
use super::level::Level;
19
use super::BidiClass::{self, *};
20
21
/// A maximal substring of characters with the same embedding level.
22
///
23
/// Represented as a range of byte indices.
24
pub type LevelRun = Range<usize>;
25
26
/// Output of `isolating_run_sequences` (steps X9-X10)
27
0
#[derive(Debug, PartialEq)]
28
pub struct IsolatingRunSequence {
29
    pub runs: Vec<LevelRun>,
30
    pub sos: BidiClass, // Start-of-sequence type.
31
    pub eos: BidiClass, // End-of-sequence type.
32
}
33
34
/// Compute the set of isolating run sequences.
35
///
36
/// An isolating run sequence is a maximal sequence of level runs such that for all level runs
37
/// except the last one in the sequence, the last character of the run is an isolate initiator
38
/// whose matching PDI is the first character of the next level run in the sequence.
39
///
40
/// Note: This function does *not* return the sequences in order by their first characters.
41
#[cfg_attr(feature = "flame_it", flamer::flame)]
42
0
pub fn isolating_run_sequences(
43
0
    para_level: Level,
44
0
    original_classes: &[BidiClass],
45
0
    levels: &[Level],
46
0
) -> Vec<IsolatingRunSequence> {
47
0
    let runs = level_runs(levels, original_classes);
48
0
49
0
    // Compute the set of isolating run sequences.
50
0
    // <http://www.unicode.org/reports/tr9/#BD13>
51
0
    let mut sequences = Vec::with_capacity(runs.len());
52
0
53
0
    // When we encounter an isolate initiator, we push the current sequence onto the
54
0
    // stack so we can resume it after the matching PDI.
55
0
    let mut stack = vec![Vec::new()];
56
57
0
    for run in runs {
58
0
        assert!(run.len() > 0);
59
0
        assert!(!stack.is_empty());
60
61
0
        let start_class = original_classes[run.start];
62
0
        let end_class = original_classes[run.end - 1];
63
64
0
        let mut sequence = if start_class == PDI && stack.len() > 1 {
65
            // Continue a previous sequence interrupted by an isolate.
66
0
            stack.pop().unwrap()
67
        } else {
68
            // Start a new sequence.
69
0
            Vec::new()
70
        };
71
72
0
        sequence.push(run);
73
0
74
0
        if let RLI | LRI | FSI = end_class {
75
0
            // Resume this sequence after the isolate.
76
0
            stack.push(sequence);
77
0
        } else {
78
0
            // This sequence is finished.
79
0
            sequences.push(sequence);
80
0
        }
81
    }
82
    // Pop any remaning sequences off the stack.
83
0
    sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
84
0
85
0
    // Determine the `sos` and `eos` class for each sequence.
86
0
    // <http://www.unicode.org/reports/tr9/#X10>
87
0
    sequences
88
0
        .into_iter()
89
0
        .map(|sequence: Vec<LevelRun>| {
90
0
            assert!(!sequence.is_empty());
91
92
0
            let start_of_seq = sequence[0].start;
93
0
            let end_of_seq = sequence[sequence.len() - 1].end;
94
0
            let seq_level = levels[start_of_seq];
95
96
            #[cfg(test)]
97
            for run in sequence.clone() {
98
                for idx in run {
99
                    if not_removed_by_x9(&original_classes[idx]) {
100
                        assert_eq!(seq_level, levels[idx]);
101
                    }
102
                }
103
            }
104
105
            // Get the level of the last non-removed char before the runs.
106
0
            let pred_level = match original_classes[..start_of_seq]
107
0
                .iter()
108
0
                .rposition(not_removed_by_x9)
109
            {
110
0
                Some(idx) => levels[idx],
111
0
                None => para_level,
112
            };
113
114
            // Get the level of the next non-removed char after the runs.
115
0
            let succ_level = if let RLI | LRI | FSI = original_classes[end_of_seq - 1] {
116
0
                para_level
117
            } else {
118
0
                match original_classes[end_of_seq..]
119
0
                    .iter()
120
0
                    .position(not_removed_by_x9)
121
                {
122
0
                    Some(idx) => levels[end_of_seq + idx],
123
0
                    None => para_level,
124
                }
125
            };
126
127
0
            IsolatingRunSequence {
128
0
                runs: sequence,
129
0
                sos: max(seq_level, pred_level).bidi_class(),
130
0
                eos: max(seq_level, succ_level).bidi_class(),
131
0
            }
132
0
        })
133
0
        .collect()
134
0
}
135
136
/// Finds the level runs in a paragraph.
137
///
138
/// <http://www.unicode.org/reports/tr9/#BD7>
139
0
fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
140
0
    assert_eq!(levels.len(), original_classes.len());
141
142
0
    let mut runs = Vec::new();
143
0
    if levels.is_empty() {
144
0
        return runs;
145
0
    }
146
0
147
0
    let mut current_run_level = levels[0];
148
0
    let mut current_run_start = 0;
149
0
    for i in 1..levels.len() {
150
0
        if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
151
0
            // End the last run and start a new one.
152
0
            runs.push(current_run_start..i);
153
0
            current_run_level = levels[i];
154
0
            current_run_start = i;
155
0
        }
156
    }
157
0
    runs.push(current_run_start..levels.len());
158
0
159
0
    runs
160
0
}
161
162
/// Should this character be ignored in steps after X9?
163
///
164
/// <http://www.unicode.org/reports/tr9/#X9>
165
0
pub fn removed_by_x9(class: BidiClass) -> bool {
166
0
    match class {
167
0
        RLE | LRE | RLO | LRO | PDF | BN => true,
168
0
        _ => false,
169
    }
170
0
}
171
172
// For use as a predicate for `position` / `rposition`
173
0
pub fn not_removed_by_x9(class: &BidiClass) -> bool {
174
0
    !removed_by_x9(*class)
175
0
}
176
177
#[cfg(test)]
178
mod tests {
179
    use super::*;
180
181
    #[test]
182
    fn test_level_runs() {
183
        assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]);
184
        assert_eq!(
185
            level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]),
186
            &[0..3, 3..5, 5..6, 6..8]
187
        );
188
    }
189
190
    // From <http://www.unicode.org/reports/tr9/#BD13>
191
    #[rustfmt::skip]
192
    #[test]
193
    fn test_isolating_run_sequences() {
194
195
        // == Example 1 ==
196
        // text1·RLE·text2·PDF·RLE·text3·PDF·text4
197
        // index        0    1  2    3    4  5    6  7
198
        let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
199
        let levels =  &[0,   1, 1,   1,   1, 1,   1, 0];
200
        let para_level = Level::ltr();
201
        let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
202
        sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
203
        assert_eq!(
204
            sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
205
            vec![vec![0..2], vec![2..7], vec![7..8]]
206
        );
207
208
        // == Example 2 ==
209
        // text1·RLI·text2·PDI·RLI·text3·PDI·text4
210
        // index        0    1  2    3    4  5    6  7
211
        let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
212
        let levels =  &[0,   0, 1,   0,   0, 1,   0, 0];
213
        let para_level = Level::ltr();
214
        let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
215
        sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
216
        assert_eq!(
217
            sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
218
            vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]]
219
        );
220
221
        // == Example 3 ==
222
        // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
223
        // index        0    1  2    3  4    5  6    7  8    9  10  11  12
224
        let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI,  L];
225
        let levels =  &[0,   0, 1,   1, 2,   3, 3,   3, 2,   1, 1,   0,  0];
226
        let para_level = Level::ltr();
227
        let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
228
        sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
229
        assert_eq!(
230
            sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
231
            vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]]
232
        );
233
    }
234
235
    // From <http://www.unicode.org/reports/tr9/#X10>
236
    #[rustfmt::skip]
237
    #[test]
238
    fn test_isolating_run_sequences_sos_and_eos() {
239
240
        // == Example 1 ==
241
        // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
242
        // index        0    1  2    3  4    5  6    7    8  9   10  11
243
        let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF,  L];
244
        let levels =  &[0,   1, 1,   2, 2,   2, 1,   1,   1, 1,   1,  0];
245
        let para_level = Level::ltr();
246
        let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
247
        sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
248
249
        // text1
250
        assert_eq!(
251
            &sequences[0],
252
            &IsolatingRunSequence {
253
                runs: vec![0..2],
254
                sos: L,
255
                eos: R,
256
            }
257
        );
258
259
        // text2
260
        assert_eq!(
261
            &sequences[1],
262
            &IsolatingRunSequence {
263
                runs: vec![2..4],
264
                sos: R,
265
                eos: L,
266
            }
267
        );
268
269
        // text3
270
        assert_eq!(
271
            &sequences[2],
272
            &IsolatingRunSequence {
273
                runs: vec![4..6],
274
                sos: L,
275
                eos: L,
276
            }
277
        );
278
279
        // text4 text5
280
        assert_eq!(
281
            &sequences[3],
282
            &IsolatingRunSequence {
283
                runs: vec![6..11],
284
                sos: L,
285
                eos: R,
286
            }
287
        );
288
289
        // text6
290
        assert_eq!(
291
            &sequences[4],
292
            &IsolatingRunSequence {
293
                runs: vec![11..12],
294
                sos: R,
295
                eos: L,
296
            }
297
        );
298
299
        // == Example 2 ==
300
        // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
301
        // index        0    1  2    3  4    5  6    7    8  9   10  11
302
        let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI,  L];
303
        let levels =  &[0,   0, 1,   1, 2,   1, 1,   0,   0, 1,   0,  0];
304
        let para_level = Level::ltr();
305
        let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
306
        sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
307
308
        // text1·RLI·PDI·RLI·PDI·text6
309
        assert_eq!(
310
            &sequences[0],
311
            &IsolatingRunSequence {
312
                runs: vec![0..2, 7..9, 10..12],
313
                sos: L,
314
                eos: L,
315
            }
316
        );
317
318
        // text2·LRI·PDI·text4
319
        assert_eq!(
320
            &sequences[1],
321
            &IsolatingRunSequence {
322
                runs: vec![2..4, 5..7],
323
                sos: R,
324
                eos: R,
325
            }
326
        );
327
328
        // text3
329
        assert_eq!(
330
            &sequences[2],
331
            &IsolatingRunSequence {
332
                runs: vec![4..5],
333
                sos: L,
334
                eos: L,
335
            }
336
        );
337
338
        // text5
339
        assert_eq!(
340
            &sequences[3],
341
            &IsolatingRunSequence {
342
                runs: vec![9..10],
343
                sos: R,
344
                eos: R,
345
            }
346
        );
347
    }
348
349
    #[test]
350
    fn test_removed_by_x9() {
351
        let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN];
352
        let not_classes = &[L, RLI, AL, LRI, PDI];
353
        for x in rem_classes {
354
            assert_eq!(removed_by_x9(*x), true);
355
        }
356
        for x in not_classes {
357
            assert_eq!(removed_by_x9(*x), false);
358
        }
359
    }
360
361
    #[test]
362
    fn test_not_removed_by_x9() {
363
        let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI];
364
        for x in non_x9_classes {
365
            assert_eq!(not_removed_by_x9(&x), true);
366
        }
367
    }
368
}