Coverage Report

Created: 2024-08-22 06:13

/rust/registry/src/index.crates.io-6f17d22bba15001f/unicode-bidi-0.3.15/src/utf16.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2023 The Mozilla Foundation. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
use super::TextSource;
11
12
use alloc::borrow::Cow;
13
use alloc::vec::Vec;
14
use core::char;
15
use core::ops::Range;
16
17
use crate::{
18
    compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
19
    reorder_visual, visual_runs_for_line,
20
};
21
use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo};
22
23
#[cfg(feature = "hardcoded-data")]
24
use crate::HardcodedBidiData;
25
26
/// Initial bidi information of the text (UTF-16 version).
27
///
28
/// Contains the text paragraphs and `BidiClass` of its characters.
29
#[derive(PartialEq, Debug)]
30
pub struct InitialInfo<'text> {
31
    /// The text
32
    pub text: &'text [u16],
33
34
    /// The BidiClass of the character at each code unit in the text.
35
    /// If a character is multiple code units, its class will appear multiple times in the vector.
36
    pub original_classes: Vec<BidiClass>,
37
38
    /// The boundaries and level of each paragraph within the text.
39
    pub paragraphs: Vec<ParagraphInfo>,
40
}
41
42
impl<'text> InitialInfo<'text> {
43
    /// Find the paragraphs and BidiClasses in a string of text.
44
    ///
45
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
46
    ///
47
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
48
    /// character is found before the matching PDI.  If no strong character is found, the class will
49
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
50
    ///
51
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
52
    #[cfg_attr(feature = "flame_it", flamer::flame)]
53
    #[cfg(feature = "hardcoded-data")]
54
0
    pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
55
0
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
56
0
    }
57
58
    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
59
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
60
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
61
    ///
62
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
63
    ///
64
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
65
    /// character is found before the matching PDI.  If no strong character is found, the class will
66
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
67
    #[cfg_attr(feature = "flame_it", flamer::flame)]
68
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
69
0
        data_source: &D,
70
0
        text: &'a [u16],
71
0
        default_para_level: Option<Level>,
72
0
    ) -> InitialInfo<'a> {
73
0
        InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
74
0
    }
75
}
76
77
/// Extended version of InitialInfo (not public API).
78
#[derive(PartialEq, Debug)]
79
struct InitialInfoExt<'text> {
80
    /// The base InitialInfo for the text, recording its paragraphs and bidi classes.
81
    base: InitialInfo<'text>,
82
83
    /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
84
    /// requires no further bidi processing (i.e. there are no RTL characters or bidi
85
    /// control codes present).
86
    pure_ltr: Vec<bool>,
87
}
88
89
impl<'text> InitialInfoExt<'text> {
90
    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
91
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
92
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
93
    ///
94
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
95
    ///
96
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
97
    /// character is found before the matching PDI.  If no strong character is found, the class will
98
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
99
    #[cfg_attr(feature = "flame_it", flamer::flame)]
100
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
101
0
        data_source: &D,
102
0
        text: &'a [u16],
103
0
        default_para_level: Option<Level>,
104
0
    ) -> InitialInfoExt<'a> {
105
0
        let mut paragraphs = Vec::<ParagraphInfo>::new();
106
0
        let mut pure_ltr = Vec::<bool>::new();
107
0
        let (original_classes, _, _) = compute_initial_info(
108
0
            data_source,
109
0
            text,
110
0
            default_para_level,
111
0
            Some((&mut paragraphs, &mut pure_ltr)),
112
0
        );
113
0
114
0
        InitialInfoExt {
115
0
            base: InitialInfo {
116
0
                text,
117
0
                original_classes,
118
0
                paragraphs,
119
0
            },
120
0
            pure_ltr,
121
0
        }
122
0
    }
123
}
124
125
/// Bidi information of the text (UTF-16 version).
126
///
127
/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
128
/// character is multiple code units wide, then its class and level will appear multiple times in these
129
/// vectors.
130
// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
131
#[derive(Debug, PartialEq)]
132
pub struct BidiInfo<'text> {
133
    /// The text
134
    pub text: &'text [u16],
135
136
    /// The BidiClass of the character at each byte in the text.
137
    pub original_classes: Vec<BidiClass>,
138
139
    /// The directional embedding level of each byte in the text.
140
    pub levels: Vec<Level>,
141
142
    /// The boundaries and paragraph embedding level of each paragraph within the text.
143
    ///
144
    /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
145
    /// Or just don't include the first paragraph, which always starts at 0?
146
    pub paragraphs: Vec<ParagraphInfo>,
147
}
148
149
impl<'text> BidiInfo<'text> {
150
    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
151
    ///
152
    ///
153
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
154
    ///
155
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
156
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
157
    ///
158
    /// TODO: Support auto-RTL base direction
159
    #[cfg_attr(feature = "flame_it", flamer::flame)]
160
    #[cfg(feature = "hardcoded-data")]
161
    #[inline]
162
0
    pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
163
0
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
164
0
    }
165
166
    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
167
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
168
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
169
    ///
170
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
171
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
172
    ///
173
    /// TODO: Support auto-RTL base direction
174
    #[cfg_attr(feature = "flame_it", flamer::flame)]
175
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
176
0
        data_source: &D,
177
0
        text: &'a [u16],
178
0
        default_para_level: Option<Level>,
179
0
    ) -> BidiInfo<'a> {
180
0
        let InitialInfoExt { base, pure_ltr, .. } =
181
0
            InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
182
0
183
0
        let mut levels = Vec::<Level>::with_capacity(text.len());
184
0
        let mut processing_classes = base.original_classes.clone();
185
186
0
        for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
187
0
            let text = &text[para.range.clone()];
188
0
            let original_classes = &base.original_classes[para.range.clone()];
189
0
190
0
            compute_bidi_info_for_para(
191
0
                data_source,
192
0
                para,
193
0
                *is_pure_ltr,
194
0
                text,
195
0
                original_classes,
196
0
                &mut processing_classes,
197
0
                &mut levels,
198
0
            );
199
0
        }
200
201
0
        BidiInfo {
202
0
            text,
203
0
            original_classes: base.original_classes,
204
0
            paragraphs: base.paragraphs,
205
0
            levels,
206
0
        }
207
0
    }
208
209
    /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
210
    /// in the paragraph. The returned vector includes bytes that are not included
211
    /// in the `line`, but will not adjust them.
212
    ///
213
    /// This runs [Rule L1], you can run
214
    /// [Rule L2] by calling [`Self::reorder_visual()`].
215
    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
216
    /// to avoid non-byte indices.
217
    ///
218
    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
219
    ///
220
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
221
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
222
    #[cfg_attr(feature = "flame_it", flamer::flame)]
223
0
    pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
224
0
        assert!(line.start <= self.levels.len());
225
0
        assert!(line.end <= self.levels.len());
226
227
0
        let mut levels = self.levels.clone();
228
0
        let line_classes = &self.original_classes[line.clone()];
229
0
        let line_levels = &mut levels[line.clone()];
230
0
        let line_str: &[u16] = &self.text[line.clone()];
231
0
232
0
        reorder_levels(line_classes, line_levels, line_str, para.level);
233
0
234
0
        levels
235
0
    }
236
237
    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
238
    /// in the paragraph. The returned vector includes characters that are not included
239
    /// in the `line`, but will not adjust them.
240
    ///
241
    /// This runs [Rule L1], you can run
242
    /// [Rule L2] by calling [`Self::reorder_visual()`].
243
    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
244
    /// to avoid non-byte indices.
245
    ///
246
    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
247
    ///
248
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
249
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
250
    #[cfg_attr(feature = "flame_it", flamer::flame)]
251
0
    pub fn reordered_levels_per_char(
252
0
        &self,
253
0
        para: &ParagraphInfo,
254
0
        line: Range<usize>,
255
0
    ) -> Vec<Level> {
256
0
        let levels = self.reordered_levels(para, line);
257
0
        self.text.char_indices().map(|(i, _)| levels[i]).collect()
258
0
    }
259
260
    /// Re-order a line based on resolved levels and return the line in display order.
261
    ///
262
    /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
263
    ///
264
    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
265
    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
266
    #[cfg_attr(feature = "flame_it", flamer::flame)]
267
0
    pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
268
0
        if !level::has_rtl(&self.levels[line.clone()]) {
269
0
            return self.text[line].into();
270
0
        }
271
0
        let (levels, runs) = self.visual_runs(para, line.clone());
272
0
        reorder_line(self.text, line, levels, runs)
273
0
    }
274
275
    /// Reorders pre-calculated levels of a sequence of characters.
276
    ///
277
    /// NOTE: This is a convenience method that does not use a `Paragraph`  object. It is
278
    /// intended to be used when an application has determined the levels of the objects (character sequences)
279
    /// and just needs to have them reordered.
280
    ///
281
    /// the index map will result in `indexMap[visualIndex]==logicalIndex`.
282
    ///
283
    /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
284
    /// information about the actual text.
285
    ///
286
    /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
287
    /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
288
    /// is for a single code point.
289
    ///
290
    ///
291
    ///   # # Example
292
    /// ```
293
    /// use unicode_bidi::BidiInfo;
294
    /// use unicode_bidi::Level;
295
    ///
296
    /// let l0 = Level::from(0);
297
    /// let l1 = Level::from(1);
298
    /// let l2 = Level::from(2);
299
    ///
300
    /// let levels = vec![l0, l0, l0, l0];
301
    /// let index_map = BidiInfo::reorder_visual(&levels);
302
    /// assert_eq!(levels.len(), index_map.len());
303
    /// assert_eq!(index_map, [0, 1, 2, 3]);
304
    ///
305
    /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
306
    /// let index_map = BidiInfo::reorder_visual(&levels);
307
    /// assert_eq!(levels.len(), index_map.len());
308
    /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
309
    /// ```
310
    #[cfg_attr(feature = "flame_it", flamer::flame)]
311
    #[inline]
312
0
    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
313
0
        reorder_visual(levels)
314
0
    }
315
316
    /// Find the level runs within a line and return them in visual order.
317
    ///
318
    /// `line` is a range of bytes indices within `levels`.
319
    ///
320
    /// The first return value is a vector of levels used by the reordering algorithm,
321
    /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
322
    /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
323
    /// same level) should be displayed. Within each run, the display order can be checked
324
    /// against the Level vector.
325
    ///
326
    /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
327
    /// as that should be handled by the engine using this API.
328
    ///
329
    /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
330
    /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
331
    /// of producing a level map, since one may wish to deal with the fact that this is operating on
332
    /// byte rather than character indices.
333
    ///
334
    /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
335
    ///
336
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
337
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
338
    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
339
    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
340
    #[cfg_attr(feature = "flame_it", flamer::flame)]
341
    #[inline]
342
0
    pub fn visual_runs(
343
0
        &self,
344
0
        para: &ParagraphInfo,
345
0
        line: Range<usize>,
346
0
    ) -> (Vec<Level>, Vec<LevelRun>) {
347
0
        let levels = self.reordered_levels(para, line.clone());
348
0
        visual_runs_for_line(levels, &line)
349
0
    }
350
351
    /// If processed text has any computed RTL levels
352
    ///
353
    /// This information is usually used to skip re-ordering of text when no RTL level is present
354
    #[inline]
355
0
    pub fn has_rtl(&self) -> bool {
356
0
        level::has_rtl(&self.levels)
357
0
    }
358
}
359
360
/// Bidi information of text treated as a single paragraph.
361
///
362
/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
363
/// character is multiple code units wide, then its class and level will appear multiple times in these
364
/// vectors.
365
#[derive(Debug, PartialEq)]
366
pub struct ParagraphBidiInfo<'text> {
367
    /// The text
368
    pub text: &'text [u16],
369
370
    /// The BidiClass of the character at each byte in the text.
371
    pub original_classes: Vec<BidiClass>,
372
373
    /// The directional embedding level of each byte in the text.
374
    pub levels: Vec<Level>,
375
376
    /// The paragraph embedding level.
377
    pub paragraph_level: Level,
378
379
    /// Whether the paragraph is purely LTR.
380
    pub is_pure_ltr: bool,
381
}
382
383
impl<'text> ParagraphBidiInfo<'text> {
384
    /// Determine the bidi embedding level.
385
    ///
386
    ///
387
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
388
    ///
389
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
390
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
391
    ///
392
    /// TODO: Support auto-RTL base direction
393
    #[cfg_attr(feature = "flame_it", flamer::flame)]
394
    #[cfg(feature = "hardcoded-data")]
395
    #[inline]
396
0
    pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
397
0
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
398
0
    }
399
400
    /// Determine the bidi embedding level, with a custom [`BidiDataSource`]
401
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
402
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
403
    ///
404
    /// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
405
    /// and should be kept in sync with it.
406
    #[cfg_attr(feature = "flame_it", flamer::flame)]
407
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
408
0
        data_source: &D,
409
0
        text: &'a [u16],
410
0
        default_para_level: Option<Level>,
411
0
    ) -> ParagraphBidiInfo<'a> {
412
0
        // Here we could create a ParagraphInitialInfo struct to parallel the one
413
0
        // used by BidiInfo, but there doesn't seem any compelling reason for it.
414
0
        let (original_classes, paragraph_level, is_pure_ltr) =
415
0
            compute_initial_info(data_source, text, default_para_level, None);
416
0
417
0
        let mut levels = Vec::<Level>::with_capacity(text.len());
418
0
        let mut processing_classes = original_classes.clone();
419
0
420
0
        let para_info = ParagraphInfo {
421
0
            range: Range {
422
0
                start: 0,
423
0
                end: text.len(),
424
0
            },
425
0
            level: paragraph_level,
426
0
        };
427
0
428
0
        compute_bidi_info_for_para(
429
0
            data_source,
430
0
            &para_info,
431
0
            is_pure_ltr,
432
0
            text,
433
0
            &original_classes,
434
0
            &mut processing_classes,
435
0
            &mut levels,
436
0
        );
437
0
438
0
        ParagraphBidiInfo {
439
0
            text,
440
0
            original_classes,
441
0
            levels,
442
0
            paragraph_level,
443
0
            is_pure_ltr,
444
0
        }
445
0
    }
446
447
    /// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
448
    /// in the paragraph. The returned vector includes code units that are not included
449
    /// in the `line`, but will not adjust them.
450
    ///
451
    /// See BidiInfo::reordered_levels for details.
452
    ///
453
    /// (This should be kept in sync with BidiInfo::reordered_levels.)
454
    #[cfg_attr(feature = "flame_it", flamer::flame)]
455
0
    pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
456
0
        assert!(line.start <= self.levels.len());
457
0
        assert!(line.end <= self.levels.len());
458
459
0
        let mut levels = self.levels.clone();
460
0
        let line_classes = &self.original_classes[line.clone()];
461
0
        let line_levels = &mut levels[line.clone()];
462
0
463
0
        reorder_levels(
464
0
            line_classes,
465
0
            line_levels,
466
0
            self.text.subrange(line),
467
0
            self.paragraph_level,
468
0
        );
469
0
470
0
        levels
471
0
    }
472
473
    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
474
    /// in the paragraph. The returned vector includes characters that are not included
475
    /// in the `line`, but will not adjust them.
476
    ///
477
    /// See BidiInfo::reordered_levels_per_char for details.
478
    ///
479
    /// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
480
    #[cfg_attr(feature = "flame_it", flamer::flame)]
481
0
    pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
482
0
        let levels = self.reordered_levels(line);
483
0
        self.text.char_indices().map(|(i, _)| levels[i]).collect()
484
0
    }
485
486
    /// Re-order a line based on resolved levels and return the line in display order.
487
    ///
488
    /// See BidiInfo::reorder_line for details.
489
    ///
490
    /// (This should be kept in sync with BidiInfo::reorder_line.)
491
    #[cfg_attr(feature = "flame_it", flamer::flame)]
492
0
    pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
493
0
        if !level::has_rtl(&self.levels[line.clone()]) {
494
0
            return self.text[line].into();
495
0
        }
496
0
        let (levels, runs) = self.visual_runs(line.clone());
497
0
        reorder_line(self.text, line, levels, runs)
498
0
    }
499
500
    /// Reorders pre-calculated levels of a sequence of characters.
501
    ///
502
    /// See BidiInfo::reorder_visual for details.
503
    #[cfg_attr(feature = "flame_it", flamer::flame)]
504
    #[inline]
505
0
    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
506
0
        reorder_visual(levels)
507
0
    }
508
509
    /// Find the level runs within a line and return them in visual order.
510
    ///
511
    /// `line` is a range of code-unit indices within `levels`.
512
    ///
513
    /// See `BidiInfo::visual_runs` for details.
514
    ///
515
    /// (This should be kept in sync with BidiInfo::visual_runs.)
516
    #[cfg_attr(feature = "flame_it", flamer::flame)]
517
    #[inline]
518
0
    pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
519
0
        let levels = self.reordered_levels(line.clone());
520
0
        visual_runs_for_line(levels, &line)
521
0
    }
522
523
    /// If processed text has any computed RTL levels
524
    ///
525
    /// This information is usually used to skip re-ordering of text when no RTL level is present
526
    #[inline]
527
0
    pub fn has_rtl(&self) -> bool {
528
0
        !self.is_pure_ltr
529
0
    }
530
531
    /// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
532
    #[inline]
533
0
    pub fn direction(&self) -> Direction {
534
0
        para_direction(&self.levels)
535
0
    }
536
}
537
538
/// Return a line of the text in display order based on resolved levels.
539
///
540
/// `text`   the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
541
/// `line`   a range of byte indices within `text` corresponding to one line
542
/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
543
/// `runs`   array of `LevelRun`s in visual order
544
///
545
/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
546
/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
547
///
548
/// Returns: the reordered text of the line.
549
///
550
/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
551
///
552
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
553
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
554
0
fn reorder_line<'text>(
555
0
    text: &'text [u16],
556
0
    line: Range<usize>,
557
0
    levels: Vec<Level>,
558
0
    runs: Vec<LevelRun>,
559
0
) -> Cow<'text, [u16]> {
560
0
    // If all isolating run sequences are LTR, no reordering is needed
561
0
    if runs.iter().all(|run| levels[run.start].is_ltr()) {
562
0
        return text[line].into();
563
0
    }
564
0
565
0
    let mut result = Vec::<u16>::with_capacity(line.len());
566
0
    for run in runs {
567
0
        if levels[run.start].is_rtl() {
568
0
            let mut buf = [0; 2];
569
0
            for c in text[run].chars().rev() {
570
0
                result.extend(c.encode_utf16(&mut buf).iter());
571
0
            }
572
0
        } else {
573
0
            result.extend(text[run].iter());
574
0
        }
575
    }
576
0
    result.into()
577
0
}
578
579
/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
580
/// And it supports all operation in the `Paragraph` that needs also its
581
/// `BidiInfo` such as `direction`.
582
#[derive(Debug)]
583
pub struct Paragraph<'a, 'text> {
584
    pub info: &'a BidiInfo<'text>,
585
    pub para: &'a ParagraphInfo,
586
}
587
588
impl<'a, 'text> Paragraph<'a, 'text> {
589
    #[inline]
590
0
    pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
591
0
        Paragraph { info, para }
592
0
    }
593
594
    /// Returns if the paragraph is Left direction, right direction or mixed.
595
    #[inline]
596
0
    pub fn direction(&self) -> Direction {
597
0
        para_direction(&self.info.levels[self.para.range.clone()])
598
0
    }
599
600
    /// Returns the `Level` of a certain character in the paragraph.
601
    #[inline]
602
0
    pub fn level_at(&self, pos: usize) -> Level {
603
0
        let actual_position = self.para.range.start + pos;
604
0
        self.info.levels[actual_position]
605
0
    }
606
}
607
608
/// Implementation of TextSource for UTF-16 text in a [u16] array.
609
/// Note that there could be unpaired surrogates present!
610
611
// Convenience functions to check whether a UTF16 code unit is a surrogate.
612
#[inline]
613
0
fn is_high_surrogate(code: u16) -> bool {
614
0
    (code & 0xFC00) == 0xD800
615
0
}
616
#[inline]
617
0
fn is_low_surrogate(code: u16) -> bool {
618
0
    (code & 0xFC00) == 0xDC00
619
0
}
620
621
impl<'text> TextSource<'text> for [u16] {
622
    type CharIter = Utf16CharIter<'text>;
623
    type CharIndexIter = Utf16CharIndexIter<'text>;
624
    type IndexLenIter = Utf16IndexLenIter<'text>;
625
626
    #[inline]
627
0
    fn len(&self) -> usize {
628
0
        (self as &[u16]).len()
629
0
    }
630
0
    fn char_at(&self, index: usize) -> Option<(char, usize)> {
631
0
        if index >= self.len() {
632
0
            return None;
633
0
        }
634
0
        // Get the indicated code unit and try simply converting it to a char;
635
0
        // this will fail if it is half of a surrogate pair.
636
0
        let c = self[index];
637
0
        if let Some(ch) = char::from_u32(c.into()) {
638
0
            return Some((ch, 1));
639
0
        }
640
0
        // If it's a low surrogate, and was immediately preceded by a high surrogate,
641
0
        // then we're in the middle of a (valid) character, and should return None.
642
0
        if is_low_surrogate(c) && index > 0 && is_high_surrogate(self[index - 1]) {
643
0
            return None;
644
0
        }
645
        // Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
646
0
        if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
647
0
            if let Ok(ch) = ch {
648
                // This must be a surrogate pair, otherwise char::from_u32() above should
649
                // have succeeded!
650
0
                debug_assert!(ch.len_utf16() == 2, "BMP should have already been handled");
651
0
                return Some((ch, ch.len_utf16()));
652
0
            }
653
        } else {
654
0
            debug_assert!(
655
0
                false,
656
                "Why did decode_utf16 return None when we're not at the end?"
657
            );
658
0
            return None;
659
        }
660
        // Failed to decode UTF-16: we must have encountered an unpaired surrogate.
661
        // Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
662
        // and keep indexing correct.
663
0
        Some((char::REPLACEMENT_CHARACTER, 1))
664
0
    }
665
    #[inline]
666
0
    fn subrange(&self, range: Range<usize>) -> &Self {
667
0
        &(self as &[u16])[range]
668
0
    }
669
    #[inline]
670
0
    fn chars(&'text self) -> Self::CharIter {
671
0
        Utf16CharIter::new(&self)
672
0
    }
673
    #[inline]
674
0
    fn char_indices(&'text self) -> Self::CharIndexIter {
675
0
        Utf16CharIndexIter::new(&self)
676
0
    }
677
    #[inline]
678
0
    fn indices_lengths(&'text self) -> Self::IndexLenIter {
679
0
        Utf16IndexLenIter::new(&self)
680
0
    }
681
    #[inline]
682
0
    fn char_len(ch: char) -> usize {
683
0
        ch.len_utf16()
684
0
    }
685
}
686
687
/// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
688
#[derive(Debug)]
689
pub struct Utf16IndexLenIter<'text> {
690
    text: &'text [u16],
691
    cur_pos: usize,
692
}
693
694
impl<'text> Utf16IndexLenIter<'text> {
695
    #[inline]
696
0
    pub fn new(text: &'text [u16]) -> Self {
697
0
        Utf16IndexLenIter { text, cur_pos: 0 }
698
0
    }
699
}
700
701
impl Iterator for Utf16IndexLenIter<'_> {
702
    type Item = (usize, usize);
703
704
    #[inline]
705
0
    fn next(&mut self) -> Option<Self::Item> {
706
0
        if let Some((_, char_len)) = self.text.char_at(self.cur_pos) {
707
0
            let result = (self.cur_pos, char_len);
708
0
            self.cur_pos += char_len;
709
0
            return Some(result);
710
0
        }
711
0
        None
712
0
    }
713
}
714
715
/// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
716
#[derive(Debug)]
717
pub struct Utf16CharIndexIter<'text> {
718
    text: &'text [u16],
719
    cur_pos: usize,
720
}
721
722
impl<'text> Utf16CharIndexIter<'text> {
723
0
    pub fn new(text: &'text [u16]) -> Self {
724
0
        Utf16CharIndexIter { text, cur_pos: 0 }
725
0
    }
726
}
727
728
impl Iterator for Utf16CharIndexIter<'_> {
729
    type Item = (usize, char);
730
731
0
    fn next(&mut self) -> Option<Self::Item> {
732
0
        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
733
0
            let result = (self.cur_pos, ch);
734
0
            self.cur_pos += char_len;
735
0
            return Some(result);
736
0
        }
737
0
        None
738
0
    }
739
}
740
741
/// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
742
/// (Unlike the other iterators above, this also supports reverse iteration.)
743
#[derive(Debug)]
744
pub struct Utf16CharIter<'text> {
745
    text: &'text [u16],
746
    cur_pos: usize,
747
    end_pos: usize,
748
}
749
750
impl<'text> Utf16CharIter<'text> {
751
0
    pub fn new(text: &'text [u16]) -> Self {
752
0
        Utf16CharIter {
753
0
            text,
754
0
            cur_pos: 0,
755
0
            end_pos: text.len(),
756
0
        }
757
0
    }
758
}
759
760
impl Iterator for Utf16CharIter<'_> {
761
    type Item = char;
762
763
0
    fn next(&mut self) -> Option<Self::Item> {
764
0
        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
765
0
            self.cur_pos += char_len;
766
0
            return Some(ch);
767
0
        }
768
0
        None
769
0
    }
770
}
771
772
impl DoubleEndedIterator for Utf16CharIter<'_> {
773
0
    fn next_back(&mut self) -> Option<Self::Item> {
774
0
        if self.end_pos <= self.cur_pos {
775
0
            return None;
776
0
        }
777
0
        self.end_pos -= 1;
778
0
        if let Some(ch) = char::from_u32(self.text[self.end_pos] as u32) {
779
0
            return Some(ch);
780
0
        }
781
0
        if self.end_pos > self.cur_pos {
782
0
            if let Some((ch, char_len)) = self.text.char_at(self.end_pos - 1) {
783
0
                if char_len == 2 {
784
0
                    self.end_pos -= 1;
785
0
                    return Some(ch);
786
0
                }
787
0
            }
788
0
        }
789
0
        Some(char::REPLACEMENT_CHARACTER)
790
0
    }
791
}