Coverage Report

Created: 2025-06-16 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/unicode-bidi-0.3.18/src/utf16.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2023 The Mozilla Foundation. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
use super::TextSource;
11
12
use alloc::borrow::Cow;
13
use alloc::vec::Vec;
14
use core::char;
15
use core::ops::Range;
16
17
use crate::{
18
    compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
19
    reorder_visual, visual_runs_for_line,
20
};
21
use crate::{
22
    BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags,
23
};
24
25
#[cfg(feature = "hardcoded-data")]
26
use crate::HardcodedBidiData;
27
28
/// Initial bidi information of the text (UTF-16 version).
29
///
30
/// Contains the text paragraphs and `BidiClass` of its characters.
31
#[derive(PartialEq, Debug)]
32
pub struct InitialInfo<'text> {
33
    /// The text
34
    pub text: &'text [u16],
35
36
    /// The BidiClass of the character at each code unit in the text.
37
    /// If a character is multiple code units, its class will appear multiple times in the vector.
38
    pub original_classes: Vec<BidiClass>,
39
40
    /// The boundaries and level of each paragraph within the text.
41
    pub paragraphs: Vec<ParagraphInfo>,
42
}
43
44
impl<'text> InitialInfo<'text> {
45
    /// Find the paragraphs and BidiClasses in a string of text.
46
    ///
47
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
48
    ///
49
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
50
    /// character is found before the matching PDI.  If no strong character is found, the class will
51
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
52
    ///
53
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
54
    #[cfg_attr(feature = "flame_it", flamer::flame)]
55
    #[cfg(feature = "hardcoded-data")]
56
    pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
57
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
58
    }
59
60
    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
61
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
62
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
63
    ///
64
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
65
    ///
66
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
67
    /// character is found before the matching PDI.  If no strong character is found, the class will
68
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
69
    #[cfg_attr(feature = "flame_it", flamer::flame)]
70
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
71
0
        data_source: &D,
72
0
        text: &'a [u16],
73
0
        default_para_level: Option<Level>,
74
0
    ) -> InitialInfo<'a> {
75
0
        InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
76
0
    }
77
}
78
79
/// Extended version of InitialInfo (not public API).
80
#[derive(PartialEq, Debug)]
81
struct InitialInfoExt<'text> {
82
    /// The base InitialInfo for the text, recording its paragraphs and bidi classes.
83
    base: InitialInfo<'text>,
84
85
    /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
86
    /// requires no further bidi processing (i.e. there are no RTL characters or bidi
87
    /// control codes present).
88
    flags: Vec<ParagraphInfoFlags>,
89
}
90
91
impl<'text> InitialInfoExt<'text> {
92
    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
93
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
94
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
95
    ///
96
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
97
    ///
98
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
99
    /// character is found before the matching PDI.  If no strong character is found, the class will
100
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
101
    #[cfg_attr(feature = "flame_it", flamer::flame)]
102
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
103
0
        data_source: &D,
104
0
        text: &'a [u16],
105
0
        default_para_level: Option<Level>,
106
0
    ) -> InitialInfoExt<'a> {
107
0
        let mut paragraphs = Vec::<ParagraphInfo>::new();
108
0
        let mut flags = Vec::<ParagraphInfoFlags>::new();
109
0
        let (original_classes, _, _, _) = compute_initial_info(
110
0
            data_source,
111
0
            text,
112
0
            default_para_level,
113
0
            Some((&mut paragraphs, &mut flags)),
114
0
        );
115
0
116
0
        InitialInfoExt {
117
0
            base: InitialInfo {
118
0
                text,
119
0
                original_classes,
120
0
                paragraphs,
121
0
            },
122
0
            flags,
123
0
        }
124
0
    }
125
}
126
127
/// Bidi information of the text (UTF-16 version).
128
///
129
/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
130
/// character is multiple code units wide, then its class and level will appear multiple times in these
131
/// vectors.
132
// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
133
#[derive(Debug, PartialEq)]
134
pub struct BidiInfo<'text> {
135
    /// The text
136
    pub text: &'text [u16],
137
138
    /// The BidiClass of the character at each byte in the text.
139
    pub original_classes: Vec<BidiClass>,
140
141
    /// The directional embedding level of each byte in the text.
142
    pub levels: Vec<Level>,
143
144
    /// The boundaries and paragraph embedding level of each paragraph within the text.
145
    ///
146
    /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
147
    /// Or just don't include the first paragraph, which always starts at 0?
148
    pub paragraphs: Vec<ParagraphInfo>,
149
}
150
151
impl<'text> BidiInfo<'text> {
152
    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
153
    ///
154
    ///
155
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
156
    ///
157
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
158
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
159
    ///
160
    /// TODO: Support auto-RTL base direction
161
    #[cfg_attr(feature = "flame_it", flamer::flame)]
162
    #[cfg(feature = "hardcoded-data")]
163
    #[inline]
164
    pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
165
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
166
    }
167
168
    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
169
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
170
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
171
    ///
172
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
173
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
174
    ///
175
    /// TODO: Support auto-RTL base direction
176
    #[cfg_attr(feature = "flame_it", flamer::flame)]
177
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
178
0
        data_source: &D,
179
0
        text: &'a [u16],
180
0
        default_para_level: Option<Level>,
181
0
    ) -> BidiInfo<'a> {
182
0
        let InitialInfoExt { base, flags, .. } =
183
0
            InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
184
0
185
0
        let mut levels = Vec::<Level>::with_capacity(text.len());
186
0
        let mut processing_classes = base.original_classes.clone();
187
188
0
        for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
189
0
            let text = &text[para.range.clone()];
190
0
            let original_classes = &base.original_classes[para.range.clone()];
191
0
192
0
            compute_bidi_info_for_para(
193
0
                data_source,
194
0
                para,
195
0
                flags.is_pure_ltr,
196
0
                flags.has_isolate_controls,
197
0
                text,
198
0
                original_classes,
199
0
                &mut processing_classes,
200
0
                &mut levels,
201
0
            );
202
0
        }
203
204
0
        BidiInfo {
205
0
            text,
206
0
            original_classes: base.original_classes,
207
0
            paragraphs: base.paragraphs,
208
0
            levels,
209
0
        }
210
0
    }
211
212
    /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
213
    /// in the paragraph. The returned vector includes bytes that are not included
214
    /// in the `line`, but will not adjust them.
215
    ///
216
    /// This runs [Rule L1], you can run
217
    /// [Rule L2] by calling [`Self::reorder_visual()`].
218
    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
219
    /// to avoid non-byte indices.
220
    ///
221
    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
222
    ///
223
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
224
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
225
    #[cfg_attr(feature = "flame_it", flamer::flame)]
226
0
    pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
227
0
        assert!(line.start <= self.levels.len());
228
0
        assert!(line.end <= self.levels.len());
229
230
0
        let mut levels = self.levels.clone();
231
0
        let line_classes = &self.original_classes[line.clone()];
232
0
        let line_levels = &mut levels[line.clone()];
233
0
        let line_str: &[u16] = &self.text[line.clone()];
234
0
235
0
        reorder_levels(line_classes, line_levels, line_str, para.level);
236
0
237
0
        levels
238
0
    }
239
240
    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
241
    /// in the paragraph. The returned vector includes characters that are not included
242
    /// in the `line`, but will not adjust them.
243
    ///
244
    /// This runs [Rule L1], you can run
245
    /// [Rule L2] by calling [`Self::reorder_visual()`].
246
    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
247
    /// to avoid non-byte indices.
248
    ///
249
    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
250
    ///
251
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
252
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
253
    #[cfg_attr(feature = "flame_it", flamer::flame)]
254
0
    pub fn reordered_levels_per_char(
255
0
        &self,
256
0
        para: &ParagraphInfo,
257
0
        line: Range<usize>,
258
0
    ) -> Vec<Level> {
259
0
        let levels = self.reordered_levels(para, line);
260
0
        self.text.char_indices().map(|(i, _)| levels[i]).collect()
261
0
    }
262
263
    /// Re-order a line based on resolved levels and return the line in display order.
264
    ///
265
    /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
266
    ///
267
    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
268
    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
269
    #[cfg_attr(feature = "flame_it", flamer::flame)]
270
0
    pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
271
0
        if !level::has_rtl(&self.levels[line.clone()]) {
272
0
            return self.text[line].into();
273
0
        }
274
0
        let (levels, runs) = self.visual_runs(para, line.clone());
275
0
        reorder_line(self.text, line, levels, runs)
276
0
    }
277
278
    /// Reorders pre-calculated levels of a sequence of characters.
279
    ///
280
    /// NOTE: This is a convenience method that does not use a `Paragraph`  object. It is
281
    /// intended to be used when an application has determined the levels of the objects (character sequences)
282
    /// and just needs to have them reordered.
283
    ///
284
    /// the index map will result in `indexMap[visualIndex]==logicalIndex`.
285
    ///
286
    /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
287
    /// information about the actual text.
288
    ///
289
    /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
290
    /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
291
    /// is for a single code point.
292
    ///
293
    ///
294
    ///   # # Example
295
    /// ```
296
    /// use unicode_bidi::BidiInfo;
297
    /// use unicode_bidi::Level;
298
    ///
299
    /// let l0 = Level::from(0);
300
    /// let l1 = Level::from(1);
301
    /// let l2 = Level::from(2);
302
    ///
303
    /// let levels = vec![l0, l0, l0, l0];
304
    /// let index_map = BidiInfo::reorder_visual(&levels);
305
    /// assert_eq!(levels.len(), index_map.len());
306
    /// assert_eq!(index_map, [0, 1, 2, 3]);
307
    ///
308
    /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
309
    /// let index_map = BidiInfo::reorder_visual(&levels);
310
    /// assert_eq!(levels.len(), index_map.len());
311
    /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
312
    /// ```
313
    #[cfg_attr(feature = "flame_it", flamer::flame)]
314
    #[inline]
315
0
    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
316
0
        reorder_visual(levels)
317
0
    }
318
319
    /// Find the level runs within a line and return them in visual order.
320
    ///
321
    /// `line` is a range of bytes indices within `levels`.
322
    ///
323
    /// The first return value is a vector of levels used by the reordering algorithm,
324
    /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
325
    /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
326
    /// same level) should be displayed. Within each run, the display order can be checked
327
    /// against the Level vector.
328
    ///
329
    /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
330
    /// as that should be handled by the engine using this API.
331
    ///
332
    /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
333
    /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
334
    /// of producing a level map, since one may wish to deal with the fact that this is operating on
335
    /// byte rather than character indices.
336
    ///
337
    /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
338
    ///
339
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
340
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
341
    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
342
    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
343
    #[cfg_attr(feature = "flame_it", flamer::flame)]
344
    #[inline]
345
0
    pub fn visual_runs(
346
0
        &self,
347
0
        para: &ParagraphInfo,
348
0
        line: Range<usize>,
349
0
    ) -> (Vec<Level>, Vec<LevelRun>) {
350
0
        let levels = self.reordered_levels(para, line.clone());
351
0
        visual_runs_for_line(levels, &line)
352
0
    }
353
354
    /// If processed text has any computed RTL levels
355
    ///
356
    /// This information is usually used to skip re-ordering of text when no RTL level is present
357
    #[inline]
358
0
    pub fn has_rtl(&self) -> bool {
359
0
        level::has_rtl(&self.levels)
360
0
    }
361
}
362
363
/// Bidi information of text treated as a single paragraph.
364
///
365
/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
366
/// character is multiple code units wide, then its class and level will appear multiple times in these
367
/// vectors.
368
#[derive(Debug, PartialEq)]
369
pub struct ParagraphBidiInfo<'text> {
370
    /// The text
371
    pub text: &'text [u16],
372
373
    /// The BidiClass of the character at each byte in the text.
374
    pub original_classes: Vec<BidiClass>,
375
376
    /// The directional embedding level of each byte in the text.
377
    pub levels: Vec<Level>,
378
379
    /// The paragraph embedding level.
380
    pub paragraph_level: Level,
381
382
    /// Whether the paragraph is purely LTR.
383
    pub is_pure_ltr: bool,
384
}
385
386
impl<'text> ParagraphBidiInfo<'text> {
387
    /// Determine the bidi embedding level.
388
    ///
389
    ///
390
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
391
    ///
392
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
393
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
394
    ///
395
    /// TODO: Support auto-RTL base direction
396
    #[cfg_attr(feature = "flame_it", flamer::flame)]
397
    #[cfg(feature = "hardcoded-data")]
398
    #[inline]
399
    pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
400
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
401
    }
402
403
    /// Determine the bidi embedding level, with a custom [`BidiDataSource`]
404
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
405
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
406
    ///
407
    /// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
408
    /// and should be kept in sync with it.
409
    #[cfg_attr(feature = "flame_it", flamer::flame)]
410
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
411
0
        data_source: &D,
412
0
        text: &'a [u16],
413
0
        default_para_level: Option<Level>,
414
0
    ) -> ParagraphBidiInfo<'a> {
415
0
        // Here we could create a ParagraphInitialInfo struct to parallel the one
416
0
        // used by BidiInfo, but there doesn't seem any compelling reason for it.
417
0
        let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
418
0
            compute_initial_info(data_source, text, default_para_level, None);
419
0
420
0
        let mut levels = Vec::<Level>::with_capacity(text.len());
421
0
        let mut processing_classes = original_classes.clone();
422
0
423
0
        let para_info = ParagraphInfo {
424
0
            range: Range {
425
0
                start: 0,
426
0
                end: text.len(),
427
0
            },
428
0
            level: paragraph_level,
429
0
        };
430
0
431
0
        compute_bidi_info_for_para(
432
0
            data_source,
433
0
            &para_info,
434
0
            is_pure_ltr,
435
0
            has_isolate_controls,
436
0
            text,
437
0
            &original_classes,
438
0
            &mut processing_classes,
439
0
            &mut levels,
440
0
        );
441
0
442
0
        ParagraphBidiInfo {
443
0
            text,
444
0
            original_classes,
445
0
            levels,
446
0
            paragraph_level,
447
0
            is_pure_ltr,
448
0
        }
449
0
    }
450
451
    /// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
452
    /// in the paragraph. The returned vector includes code units that are not included
453
    /// in the `line`, but will not adjust them.
454
    ///
455
    /// See BidiInfo::reordered_levels for details.
456
    ///
457
    /// (This should be kept in sync with BidiInfo::reordered_levels.)
458
    #[cfg_attr(feature = "flame_it", flamer::flame)]
459
0
    pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
460
0
        assert!(line.start <= self.levels.len());
461
0
        assert!(line.end <= self.levels.len());
462
463
0
        let mut levels = self.levels.clone();
464
0
        let line_classes = &self.original_classes[line.clone()];
465
0
        let line_levels = &mut levels[line.clone()];
466
0
467
0
        reorder_levels(
468
0
            line_classes,
469
0
            line_levels,
470
0
            self.text.subrange(line),
471
0
            self.paragraph_level,
472
0
        );
473
0
474
0
        levels
475
0
    }
476
477
    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
478
    /// in the paragraph. The returned vector includes characters that are not included
479
    /// in the `line`, but will not adjust them.
480
    ///
481
    /// See BidiInfo::reordered_levels_per_char for details.
482
    ///
483
    /// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
484
    #[cfg_attr(feature = "flame_it", flamer::flame)]
485
0
    pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
486
0
        let levels = self.reordered_levels(line);
487
0
        self.text.char_indices().map(|(i, _)| levels[i]).collect()
488
0
    }
489
490
    /// Re-order a line based on resolved levels and return the line in display order.
491
    ///
492
    /// See BidiInfo::reorder_line for details.
493
    ///
494
    /// (This should be kept in sync with BidiInfo::reorder_line.)
495
    #[cfg_attr(feature = "flame_it", flamer::flame)]
496
0
    pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
497
0
        if !level::has_rtl(&self.levels[line.clone()]) {
498
0
            return self.text[line].into();
499
0
        }
500
0
        let (levels, runs) = self.visual_runs(line.clone());
501
0
        reorder_line(self.text, line, levels, runs)
502
0
    }
503
504
    /// Reorders pre-calculated levels of a sequence of characters.
505
    ///
506
    /// See BidiInfo::reorder_visual for details.
507
    #[cfg_attr(feature = "flame_it", flamer::flame)]
508
    #[inline]
509
0
    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
510
0
        reorder_visual(levels)
511
0
    }
512
513
    /// Find the level runs within a line and return them in visual order.
514
    ///
515
    /// `line` is a range of code-unit indices within `levels`.
516
    ///
517
    /// See `BidiInfo::visual_runs` for details.
518
    ///
519
    /// (This should be kept in sync with BidiInfo::visual_runs.)
520
    #[cfg_attr(feature = "flame_it", flamer::flame)]
521
    #[inline]
522
0
    pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
523
0
        let levels = self.reordered_levels(line.clone());
524
0
        visual_runs_for_line(levels, &line)
525
0
    }
526
527
    /// If processed text has any computed RTL levels
528
    ///
529
    /// This information is usually used to skip re-ordering of text when no RTL level is present
530
    #[inline]
531
0
    pub fn has_rtl(&self) -> bool {
532
0
        !self.is_pure_ltr
533
0
    }
534
535
    /// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
536
    #[inline]
537
0
    pub fn direction(&self) -> Direction {
538
0
        para_direction(&self.levels)
539
0
    }
540
}
541
542
/// Return a line of the text in display order based on resolved levels.
543
///
544
/// `text`   the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
545
/// `line`   a range of byte indices within `text` corresponding to one line
546
/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
547
/// `runs`   array of `LevelRun`s in visual order
548
///
549
/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
550
/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
551
///
552
/// Returns: the reordered text of the line.
553
///
554
/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
555
///
556
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
557
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
558
0
fn reorder_line(
559
0
    text: &[u16],
560
0
    line: Range<usize>,
561
0
    levels: Vec<Level>,
562
0
    runs: Vec<LevelRun>,
563
0
) -> Cow<'_, [u16]> {
564
0
    // If all isolating run sequences are LTR, no reordering is needed
565
0
    if runs.iter().all(|run| levels[run.start].is_ltr()) {
566
0
        return text[line].into();
567
0
    }
568
0
569
0
    let mut result = Vec::<u16>::with_capacity(line.len());
570
0
    for run in runs {
571
0
        if levels[run.start].is_rtl() {
572
0
            let mut buf = [0; 2];
573
0
            for c in text[run].chars().rev() {
574
0
                result.extend(c.encode_utf16(&mut buf).iter());
575
0
            }
576
0
        } else {
577
0
            result.extend(text[run].iter());
578
0
        }
579
    }
580
0
    result.into()
581
0
}
582
583
/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
584
/// And it supports all operation in the `Paragraph` that needs also its
585
/// `BidiInfo` such as `direction`.
586
#[derive(Debug)]
587
pub struct Paragraph<'a, 'text> {
588
    pub info: &'a BidiInfo<'text>,
589
    pub para: &'a ParagraphInfo,
590
}
591
592
impl<'a, 'text> Paragraph<'a, 'text> {
593
    #[inline]
594
0
    pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
595
0
        Paragraph { info, para }
596
0
    }
597
598
    /// Returns if the paragraph is Left direction, right direction or mixed.
599
    #[inline]
600
0
    pub fn direction(&self) -> Direction {
601
0
        para_direction(&self.info.levels[self.para.range.clone()])
602
0
    }
603
604
    /// Returns the `Level` of a certain character in the paragraph.
605
    #[inline]
606
0
    pub fn level_at(&self, pos: usize) -> Level {
607
0
        let actual_position = self.para.range.start + pos;
608
0
        self.info.levels[actual_position]
609
0
    }
610
}
611
612
/// Implementation of TextSource for UTF-16 text in a [u16] array.
613
/// Note that there could be unpaired surrogates present!
614
615
// Convenience functions to check whether a UTF16 code unit is a surrogate.
616
#[inline]
617
0
fn is_high_surrogate(code: u16) -> bool {
618
0
    (code & 0xFC00) == 0xD800
619
0
}
620
#[inline]
621
0
fn is_low_surrogate(code: u16) -> bool {
622
0
    (code & 0xFC00) == 0xDC00
623
0
}
624
625
impl<'text> TextSource<'text> for [u16] {
626
    type CharIter = Utf16CharIter<'text>;
627
    type CharIndexIter = Utf16CharIndexIter<'text>;
628
    type IndexLenIter = Utf16IndexLenIter<'text>;
629
630
    #[inline]
631
0
    fn len(&self) -> usize {
632
0
        (self as &[u16]).len()
633
0
    }
634
0
    fn char_at(&self, index: usize) -> Option<(char, usize)> {
635
0
        if index >= self.len() {
636
0
            return None;
637
0
        }
638
0
        // Get the indicated code unit and try simply converting it to a char;
639
0
        // this will fail if it is half of a surrogate pair.
640
0
        let c = self[index];
641
0
        if let Some(ch) = char::from_u32(c.into()) {
642
0
            return Some((ch, 1));
643
0
        }
644
0
        // If it's a low surrogate, and was immediately preceded by a high surrogate,
645
0
        // then we're in the middle of a (valid) character, and should return None.
646
0
        if is_low_surrogate(c) && index > 0 && is_high_surrogate(self[index - 1]) {
647
0
            return None;
648
0
        }
649
        // Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
650
0
        if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
651
0
            if let Ok(ch) = ch {
652
                // This must be a surrogate pair, otherwise char::from_u32() above should
653
                // have succeeded!
654
0
                debug_assert!(ch.len_utf16() == 2, "BMP should have already been handled");
655
0
                return Some((ch, ch.len_utf16()));
656
0
            }
657
        } else {
658
0
            debug_assert!(
659
0
                false,
660
                "Why did decode_utf16 return None when we're not at the end?"
661
            );
662
0
            return None;
663
        }
664
        // Failed to decode UTF-16: we must have encountered an unpaired surrogate.
665
        // Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
666
        // and keep indexing correct.
667
0
        Some((char::REPLACEMENT_CHARACTER, 1))
668
0
    }
669
    #[inline]
670
0
    fn subrange(&self, range: Range<usize>) -> &Self {
671
0
        &(self as &[u16])[range]
672
0
    }
673
    #[inline]
674
0
    fn chars(&'text self) -> Self::CharIter {
675
0
        Utf16CharIter::new(self)
676
0
    }
677
    #[inline]
678
0
    fn char_indices(&'text self) -> Self::CharIndexIter {
679
0
        Utf16CharIndexIter::new(self)
680
0
    }
681
    #[inline]
682
0
    fn indices_lengths(&'text self) -> Self::IndexLenIter {
683
0
        Utf16IndexLenIter::new(self)
684
0
    }
685
    #[inline]
686
0
    fn char_len(ch: char) -> usize {
687
0
        ch.len_utf16()
688
0
    }
689
}
690
691
/// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
692
#[derive(Debug)]
693
pub struct Utf16IndexLenIter<'text> {
694
    text: &'text [u16],
695
    cur_pos: usize,
696
}
697
698
impl<'text> Utf16IndexLenIter<'text> {
699
    #[inline]
700
0
    pub fn new(text: &'text [u16]) -> Self {
701
0
        Utf16IndexLenIter { text, cur_pos: 0 }
702
0
    }
703
}
704
705
impl Iterator for Utf16IndexLenIter<'_> {
706
    type Item = (usize, usize);
707
708
    #[inline]
709
0
    fn next(&mut self) -> Option<Self::Item> {
710
0
        if let Some((_, char_len)) = self.text.char_at(self.cur_pos) {
711
0
            let result = (self.cur_pos, char_len);
712
0
            self.cur_pos += char_len;
713
0
            return Some(result);
714
0
        }
715
0
        None
716
0
    }
717
}
718
719
/// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
720
#[derive(Debug)]
721
pub struct Utf16CharIndexIter<'text> {
722
    text: &'text [u16],
723
    cur_pos: usize,
724
}
725
726
impl<'text> Utf16CharIndexIter<'text> {
727
0
    pub fn new(text: &'text [u16]) -> Self {
728
0
        Utf16CharIndexIter { text, cur_pos: 0 }
729
0
    }
730
}
731
732
impl Iterator for Utf16CharIndexIter<'_> {
733
    type Item = (usize, char);
734
735
0
    fn next(&mut self) -> Option<Self::Item> {
736
0
        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
737
0
            let result = (self.cur_pos, ch);
738
0
            self.cur_pos += char_len;
739
0
            return Some(result);
740
0
        }
741
0
        None
742
0
    }
743
}
744
745
/// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
746
/// (Unlike the other iterators above, this also supports reverse iteration.)
747
#[derive(Debug)]
748
pub struct Utf16CharIter<'text> {
749
    text: &'text [u16],
750
    cur_pos: usize,
751
    end_pos: usize,
752
}
753
754
impl<'text> Utf16CharIter<'text> {
755
0
    pub fn new(text: &'text [u16]) -> Self {
756
0
        Utf16CharIter {
757
0
            text,
758
0
            cur_pos: 0,
759
0
            end_pos: text.len(),
760
0
        }
761
0
    }
762
}
763
764
impl Iterator for Utf16CharIter<'_> {
765
    type Item = char;
766
767
0
    fn next(&mut self) -> Option<Self::Item> {
768
0
        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
769
0
            self.cur_pos += char_len;
770
0
            return Some(ch);
771
0
        }
772
0
        None
773
0
    }
774
}
775
776
impl DoubleEndedIterator for Utf16CharIter<'_> {
777
0
    fn next_back(&mut self) -> Option<Self::Item> {
778
0
        if self.end_pos <= self.cur_pos {
779
0
            return None;
780
0
        }
781
0
        self.end_pos -= 1;
782
0
        if let Some(ch) = char::from_u32(self.text[self.end_pos] as u32) {
783
0
            return Some(ch);
784
0
        }
785
0
        if self.end_pos > self.cur_pos {
786
0
            if let Some((ch, char_len)) = self.text.char_at(self.end_pos - 1) {
787
0
                if char_len == 2 {
788
0
                    self.end_pos -= 1;
789
0
                    return Some(ch);
790
0
                }
791
0
            }
792
0
        }
793
0
        Some(char::REPLACEMENT_CHARACTER)
794
0
    }
795
}