/rust/registry/src/index.crates.io-6f17d22bba15001f/unicode-bidi-0.3.8/src/prepare.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2015 The Servo Project Developers. See the |
2 | | // COPYRIGHT file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | //! 3.3.3 Preparations for Implicit Processing |
11 | | //! |
12 | | //! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing> |
13 | | |
14 | | use alloc::vec::Vec; |
15 | | use core::cmp::max; |
16 | | use core::ops::Range; |
17 | | |
18 | | use super::level::Level; |
19 | | use super::BidiClass::{self, *}; |
20 | | |
21 | | /// A maximal substring of characters with the same embedding level. |
22 | | /// |
23 | | /// Represented as a range of byte indices. |
24 | | pub type LevelRun = Range<usize>; |
25 | | |
26 | | /// Output of `isolating_run_sequences` (steps X9-X10) |
27 | 0 | #[derive(Debug, PartialEq)] |
28 | | pub struct IsolatingRunSequence { |
29 | | pub runs: Vec<LevelRun>, |
30 | | pub sos: BidiClass, // Start-of-sequence type. |
31 | | pub eos: BidiClass, // End-of-sequence type. |
32 | | } |
33 | | |
34 | | /// Compute the set of isolating run sequences. |
35 | | /// |
36 | | /// An isolating run sequence is a maximal sequence of level runs such that for all level runs |
37 | | /// except the last one in the sequence, the last character of the run is an isolate initiator |
38 | | /// whose matching PDI is the first character of the next level run in the sequence. |
39 | | /// |
40 | | /// Note: This function does *not* return the sequences in order by their first characters. |
41 | | #[cfg_attr(feature = "flame_it", flamer::flame)] |
42 | 0 | pub fn isolating_run_sequences( |
43 | 0 | para_level: Level, |
44 | 0 | original_classes: &[BidiClass], |
45 | 0 | levels: &[Level], |
46 | 0 | ) -> Vec<IsolatingRunSequence> { |
47 | 0 | let runs = level_runs(levels, original_classes); |
48 | 0 |
|
49 | 0 | // Compute the set of isolating run sequences. |
50 | 0 | // <http://www.unicode.org/reports/tr9/#BD13> |
51 | 0 | let mut sequences = Vec::with_capacity(runs.len()); |
52 | 0 |
|
53 | 0 | // When we encounter an isolate initiator, we push the current sequence onto the |
54 | 0 | // stack so we can resume it after the matching PDI. |
55 | 0 | let mut stack = vec![Vec::new()]; |
56 | | |
57 | 0 | for run in runs { |
58 | 0 | assert!(run.len() > 0); |
59 | 0 | assert!(!stack.is_empty()); |
60 | | |
61 | 0 | let start_class = original_classes[run.start]; |
62 | 0 | let end_class = original_classes[run.end - 1]; |
63 | | |
64 | 0 | let mut sequence = if start_class == PDI && stack.len() > 1 { |
65 | | // Continue a previous sequence interrupted by an isolate. |
66 | 0 | stack.pop().unwrap() |
67 | | } else { |
68 | | // Start a new sequence. |
69 | 0 | Vec::new() |
70 | | }; |
71 | | |
72 | 0 | sequence.push(run); |
73 | 0 |
|
74 | 0 | if let RLI | LRI | FSI = end_class { |
75 | 0 | // Resume this sequence after the isolate. |
76 | 0 | stack.push(sequence); |
77 | 0 | } else { |
78 | 0 | // This sequence is finished. |
79 | 0 | sequences.push(sequence); |
80 | 0 | } |
81 | | } |
82 | | // Pop any remaning sequences off the stack. |
83 | 0 | sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); |
84 | 0 |
|
85 | 0 | // Determine the `sos` and `eos` class for each sequence. |
86 | 0 | // <http://www.unicode.org/reports/tr9/#X10> |
87 | 0 | sequences |
88 | 0 | .into_iter() |
89 | 0 | .map(|sequence: Vec<LevelRun>| { |
90 | 0 | assert!(!sequence.is_empty()); |
91 | | |
92 | 0 | let start_of_seq = sequence[0].start; |
93 | 0 | let end_of_seq = sequence[sequence.len() - 1].end; |
94 | 0 | let seq_level = levels[start_of_seq]; |
95 | | |
96 | | #[cfg(test)] |
97 | | for run in sequence.clone() { |
98 | | for idx in run { |
99 | | if not_removed_by_x9(&original_classes[idx]) { |
100 | | assert_eq!(seq_level, levels[idx]); |
101 | | } |
102 | | } |
103 | | } |
104 | | |
105 | | // Get the level of the last non-removed char before the runs. |
106 | 0 | let pred_level = match original_classes[..start_of_seq] |
107 | 0 | .iter() |
108 | 0 | .rposition(not_removed_by_x9) |
109 | | { |
110 | 0 | Some(idx) => levels[idx], |
111 | 0 | None => para_level, |
112 | | }; |
113 | | |
114 | | // Get the level of the next non-removed char after the runs. |
115 | 0 | let succ_level = if let RLI | LRI | FSI = original_classes[end_of_seq - 1] { |
116 | 0 | para_level |
117 | | } else { |
118 | 0 | match original_classes[end_of_seq..] |
119 | 0 | .iter() |
120 | 0 | .position(not_removed_by_x9) |
121 | | { |
122 | 0 | Some(idx) => levels[end_of_seq + idx], |
123 | 0 | None => para_level, |
124 | | } |
125 | | }; |
126 | | |
127 | 0 | IsolatingRunSequence { |
128 | 0 | runs: sequence, |
129 | 0 | sos: max(seq_level, pred_level).bidi_class(), |
130 | 0 | eos: max(seq_level, succ_level).bidi_class(), |
131 | 0 | } |
132 | 0 | }) |
133 | 0 | .collect() |
134 | 0 | } |
135 | | |
136 | | /// Finds the level runs in a paragraph. |
137 | | /// |
138 | | /// <http://www.unicode.org/reports/tr9/#BD7> |
139 | 0 | fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> { |
140 | 0 | assert_eq!(levels.len(), original_classes.len()); |
141 | | |
142 | 0 | let mut runs = Vec::new(); |
143 | 0 | if levels.is_empty() { |
144 | 0 | return runs; |
145 | 0 | } |
146 | 0 |
|
147 | 0 | let mut current_run_level = levels[0]; |
148 | 0 | let mut current_run_start = 0; |
149 | 0 | for i in 1..levels.len() { |
150 | 0 | if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { |
151 | 0 | // End the last run and start a new one. |
152 | 0 | runs.push(current_run_start..i); |
153 | 0 | current_run_level = levels[i]; |
154 | 0 | current_run_start = i; |
155 | 0 | } |
156 | | } |
157 | 0 | runs.push(current_run_start..levels.len()); |
158 | 0 |
|
159 | 0 | runs |
160 | 0 | } |
161 | | |
162 | | /// Should this character be ignored in steps after X9? |
163 | | /// |
164 | | /// <http://www.unicode.org/reports/tr9/#X9> |
165 | 0 | pub fn removed_by_x9(class: BidiClass) -> bool { |
166 | 0 | match class { |
167 | 0 | RLE | LRE | RLO | LRO | PDF | BN => true, |
168 | 0 | _ => false, |
169 | | } |
170 | 0 | } |
171 | | |
172 | | // For use as a predicate for `position` / `rposition` |
173 | 0 | pub fn not_removed_by_x9(class: &BidiClass) -> bool { |
174 | 0 | !removed_by_x9(*class) |
175 | 0 | } |
176 | | |
177 | | #[cfg(test)] |
178 | | mod tests { |
179 | | use super::*; |
180 | | |
181 | | #[test] |
182 | | fn test_level_runs() { |
183 | | assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]); |
184 | | assert_eq!( |
185 | | level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]), |
186 | | &[0..3, 3..5, 5..6, 6..8] |
187 | | ); |
188 | | } |
189 | | |
190 | | // From <http://www.unicode.org/reports/tr9/#BD13> |
191 | | #[rustfmt::skip] |
192 | | #[test] |
193 | | fn test_isolating_run_sequences() { |
194 | | |
195 | | // == Example 1 == |
196 | | // text1·RLE·text2·PDF·RLE·text3·PDF·text4 |
197 | | // index 0 1 2 3 4 5 6 7 |
198 | | let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; |
199 | | let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; |
200 | | let para_level = Level::ltr(); |
201 | | let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); |
202 | | sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); |
203 | | assert_eq!( |
204 | | sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), |
205 | | vec![vec![0..2], vec![2..7], vec![7..8]] |
206 | | ); |
207 | | |
208 | | // == Example 2 == |
209 | | // text1·RLI·text2·PDI·RLI·text3·PDI·text4 |
210 | | // index 0 1 2 3 4 5 6 7 |
211 | | let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; |
212 | | let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; |
213 | | let para_level = Level::ltr(); |
214 | | let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); |
215 | | sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); |
216 | | assert_eq!( |
217 | | sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), |
218 | | vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]] |
219 | | ); |
220 | | |
221 | | // == Example 3 == |
222 | | // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7 |
223 | | // index 0 1 2 3 4 5 6 7 8 9 10 11 12 |
224 | | let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; |
225 | | let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; |
226 | | let para_level = Level::ltr(); |
227 | | let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); |
228 | | sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); |
229 | | assert_eq!( |
230 | | sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), |
231 | | vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]] |
232 | | ); |
233 | | } |
234 | | |
235 | | // From <http://www.unicode.org/reports/tr9/#X10> |
236 | | #[rustfmt::skip] |
237 | | #[test] |
238 | | fn test_isolating_run_sequences_sos_and_eos() { |
239 | | |
240 | | // == Example 1 == |
241 | | // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6 |
242 | | // index 0 1 2 3 4 5 6 7 8 9 10 11 |
243 | | let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; |
244 | | let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; |
245 | | let para_level = Level::ltr(); |
246 | | let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); |
247 | | sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); |
248 | | |
249 | | // text1 |
250 | | assert_eq!( |
251 | | &sequences[0], |
252 | | &IsolatingRunSequence { |
253 | | runs: vec![0..2], |
254 | | sos: L, |
255 | | eos: R, |
256 | | } |
257 | | ); |
258 | | |
259 | | // text2 |
260 | | assert_eq!( |
261 | | &sequences[1], |
262 | | &IsolatingRunSequence { |
263 | | runs: vec![2..4], |
264 | | sos: R, |
265 | | eos: L, |
266 | | } |
267 | | ); |
268 | | |
269 | | // text3 |
270 | | assert_eq!( |
271 | | &sequences[2], |
272 | | &IsolatingRunSequence { |
273 | | runs: vec![4..6], |
274 | | sos: L, |
275 | | eos: L, |
276 | | } |
277 | | ); |
278 | | |
279 | | // text4 text5 |
280 | | assert_eq!( |
281 | | &sequences[3], |
282 | | &IsolatingRunSequence { |
283 | | runs: vec![6..11], |
284 | | sos: L, |
285 | | eos: R, |
286 | | } |
287 | | ); |
288 | | |
289 | | // text6 |
290 | | assert_eq!( |
291 | | &sequences[4], |
292 | | &IsolatingRunSequence { |
293 | | runs: vec![11..12], |
294 | | sos: R, |
295 | | eos: L, |
296 | | } |
297 | | ); |
298 | | |
299 | | // == Example 2 == |
300 | | // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6 |
301 | | // index 0 1 2 3 4 5 6 7 8 9 10 11 |
302 | | let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; |
303 | | let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; |
304 | | let para_level = Level::ltr(); |
305 | | let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); |
306 | | sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); |
307 | | |
308 | | // text1·RLI·PDI·RLI·PDI·text6 |
309 | | assert_eq!( |
310 | | &sequences[0], |
311 | | &IsolatingRunSequence { |
312 | | runs: vec![0..2, 7..9, 10..12], |
313 | | sos: L, |
314 | | eos: L, |
315 | | } |
316 | | ); |
317 | | |
318 | | // text2·LRI·PDI·text4 |
319 | | assert_eq!( |
320 | | &sequences[1], |
321 | | &IsolatingRunSequence { |
322 | | runs: vec![2..4, 5..7], |
323 | | sos: R, |
324 | | eos: R, |
325 | | } |
326 | | ); |
327 | | |
328 | | // text3 |
329 | | assert_eq!( |
330 | | &sequences[2], |
331 | | &IsolatingRunSequence { |
332 | | runs: vec![4..5], |
333 | | sos: L, |
334 | | eos: L, |
335 | | } |
336 | | ); |
337 | | |
338 | | // text5 |
339 | | assert_eq!( |
340 | | &sequences[3], |
341 | | &IsolatingRunSequence { |
342 | | runs: vec![9..10], |
343 | | sos: R, |
344 | | eos: R, |
345 | | } |
346 | | ); |
347 | | } |
348 | | |
349 | | #[test] |
350 | | fn test_removed_by_x9() { |
351 | | let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN]; |
352 | | let not_classes = &[L, RLI, AL, LRI, PDI]; |
353 | | for x in rem_classes { |
354 | | assert_eq!(removed_by_x9(*x), true); |
355 | | } |
356 | | for x in not_classes { |
357 | | assert_eq!(removed_by_x9(*x), false); |
358 | | } |
359 | | } |
360 | | |
361 | | #[test] |
362 | | fn test_not_removed_by_x9() { |
363 | | let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI]; |
364 | | for x in non_x9_classes { |
365 | | assert_eq!(not_removed_by_x9(&x), true); |
366 | | } |
367 | | } |
368 | | } |