Coverage Report

Created: 2025-08-12 06:35

/rust/registry/src/index.crates.io-6f17d22bba15001f/unicode-bidi-0.3.18/src/lib.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2015 The Servo Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed
11
//! right-to-left and left-to-right text.  It is written in safe Rust, compatible with the
12
//! current stable release.
13
//!
14
//! ## Example
15
//!
16
//! ```rust
17
//! # #[cfg(feature = "hardcoded-data")] {
18
//! use unicode_bidi::BidiInfo;
19
//!
20
//! // This example text is defined using `concat!` because some browsers
21
//! // and text editors have trouble displaying bidi strings.
22
//! let text = concat![
23
//!   "א",
24
//!   "ב",
25
//!   "ג",
26
//!   "a",
27
//!   "b",
28
//!   "c",
29
//! ];
30
//!
31
//! // Resolve embedding levels within the text.  Pass `None` to detect the
32
//! // paragraph level automatically.
33
//! let bidi_info = BidiInfo::new(&text, None);
34
//!
35
//! // This paragraph has embedding level 1 because its first strong character is RTL.
36
//! assert_eq!(bidi_info.paragraphs.len(), 1);
37
//! let para = &bidi_info.paragraphs[0];
38
//! assert_eq!(para.level.number(), 1);
39
//! assert_eq!(para.level.is_rtl(), true);
40
//!
41
//! // Re-ordering is done after wrapping each paragraph into a sequence of
42
//! // lines. For this example, I'll just use a single line that spans the
43
//! // entire paragraph.
44
//! let line = para.range.clone();
45
//!
46
//! let display = bidi_info.reorder_line(para, line);
47
//! assert_eq!(display, concat![
48
//!   "a",
49
//!   "b",
50
//!   "c",
51
//!   "ג",
52
//!   "ב",
53
//!   "א",
54
//! ]);
55
//! # } // feature = "hardcoded-data"
56
//! ```
57
//!
58
//! # Features
59
//!
60
//! - `std`: Enabled by default, but can be disabled to make `unicode_bidi`
61
//!   `#![no_std]` + `alloc` compatible.
62
//! - `hardcoded-data`: Enabled by default. Includes hardcoded Unicode bidi data and more convenient APIs.
63
//! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`]
64
//!   implementations to relevant types.
65
//!
66
//! [tr9]: <http://www.unicode.org/reports/tr9/>
67
68
#![no_std]
69
// We need to link to std to make doc tests work on older Rust versions
70
#[cfg(feature = "std")]
71
extern crate std;
72
#[macro_use]
73
extern crate alloc;
74
#[cfg(feature = "smallvec")]
75
extern crate smallvec;
76
77
pub mod data_source;
78
pub mod deprecated;
79
pub mod format_chars;
80
pub mod level;
81
pub mod utf16;
82
83
mod char_data;
84
mod explicit;
85
mod implicit;
86
mod prepare;
87
88
pub use crate::char_data::{BidiClass, UNICODE_VERSION};
89
pub use crate::data_source::BidiDataSource;
90
pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
91
pub use crate::prepare::{LevelRun, LevelRunVec};
92
93
#[cfg(feature = "hardcoded-data")]
94
pub use crate::char_data::{bidi_class, HardcodedBidiData};
95
96
use alloc::borrow::Cow;
97
use alloc::string::String;
98
use alloc::vec::Vec;
99
use core::char;
100
use core::cmp;
101
use core::iter::repeat;
102
use core::ops::Range;
103
use core::str::CharIndices;
104
#[cfg(feature = "smallvec")]
105
use smallvec::SmallVec;
106
107
use crate::format_chars as chars;
108
use crate::BidiClass::*;
109
110
/// Trait that abstracts over a text source for use by the bidi algorithms.
111
/// We implement this for str (UTF-8) and for [u16] (UTF-16, native-endian).
112
/// (For internal unicode-bidi use; API may be unstable.)
113
/// This trait is sealed and cannot be implemented for types outside this crate.
114
pub trait TextSource<'text>: private::Sealed {
115
    type CharIter: Iterator<Item = char>;
116
    type CharIndexIter: Iterator<Item = (usize, char)>;
117
    type IndexLenIter: Iterator<Item = (usize, usize)>;
118
119
    /// Return the length of the text in code units.
120
    #[doc(hidden)]
121
    fn len(&self) -> usize;
122
123
    /// Get the character at a given code unit index, along with its length in code units.
124
    /// Returns None if index is out of range, or points inside a multi-code-unit character.
125
    /// Returns REPLACEMENT_CHARACTER for any unpaired surrogates in UTF-16.
126
    #[doc(hidden)]
127
    fn char_at(&self, index: usize) -> Option<(char, usize)>;
128
129
    /// Return a subrange of the text, indexed by code units.
130
    /// (We don't implement all of the Index trait, just the minimum we use.)
131
    #[doc(hidden)]
132
    fn subrange(&self, range: Range<usize>) -> &Self;
133
134
    /// An iterator over the text returning Unicode characters,
135
    /// REPLACEMENT_CHAR for invalid code units.
136
    #[doc(hidden)]
137
    fn chars(&'text self) -> Self::CharIter;
138
139
    /// An iterator over the text returning (index, char) tuples,
140
    /// where index is the starting code-unit index of the character,
141
    /// and char is its Unicode value (or REPLACEMENT_CHAR if invalid).
142
    #[doc(hidden)]
143
    fn char_indices(&'text self) -> Self::CharIndexIter;
144
145
    /// An iterator over the text returning (index, length) tuples,
146
    /// where index is the starting code-unit index of the character,
147
    /// and length is its length in code units.
148
    #[doc(hidden)]
149
    fn indices_lengths(&'text self) -> Self::IndexLenIter;
150
151
    /// Number of code units the given character uses.
152
    #[doc(hidden)]
153
    fn char_len(ch: char) -> usize;
154
}
155
156
mod private {
157
    pub trait Sealed {}
158
159
    // Implement for str and [u16] only.
160
    impl Sealed for str {}
161
    impl Sealed for [u16] {}
162
}
163
164
#[derive(PartialEq, Debug)]
165
pub enum Direction {
166
    Ltr,
167
    Rtl,
168
    Mixed,
169
}
170
171
/// Bidi information about a single paragraph
172
#[derive(Clone, Debug, PartialEq)]
173
pub struct ParagraphInfo {
174
    /// The paragraphs boundaries within the text, as byte indices.
175
    ///
176
    /// TODO: Shrink this to only include the starting index?
177
    pub range: Range<usize>,
178
179
    /// The paragraph embedding level.
180
    ///
181
    /// <http://www.unicode.org/reports/tr9/#BD4>
182
    pub level: Level,
183
}
184
185
impl ParagraphInfo {
186
    /// Gets the length of the paragraph in the source text.
187
0
    pub fn len(&self) -> usize {
188
0
        self.range.end - self.range.start
189
0
    }
190
}
191
192
/// Initial bidi information of the text.
193
///
194
/// Contains the text paragraphs and `BidiClass` of its characters.
195
#[derive(PartialEq, Debug)]
196
pub struct InitialInfo<'text> {
197
    /// The text
198
    pub text: &'text str,
199
200
    /// The BidiClass of the character at each byte in the text.
201
    /// If a character is multiple bytes, its class will appear multiple times in the vector.
202
    pub original_classes: Vec<BidiClass>,
203
204
    /// The boundaries and level of each paragraph within the text.
205
    pub paragraphs: Vec<ParagraphInfo>,
206
}
207
208
impl<'text> InitialInfo<'text> {
209
    /// Find the paragraphs and BidiClasses in a string of text.
210
    ///
211
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
212
    ///
213
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
214
    /// character is found before the matching PDI.  If no strong character is found, the class will
215
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
216
    ///
217
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
218
    #[cfg_attr(feature = "flame_it", flamer::flame)]
219
    #[cfg(feature = "hardcoded-data")]
220
    pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo<'_> {
221
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
222
    }
223
224
    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
225
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
226
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
227
    ///
228
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
229
    ///
230
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
231
    /// character is found before the matching PDI.  If no strong character is found, the class will
232
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
233
    #[cfg_attr(feature = "flame_it", flamer::flame)]
234
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
235
0
        data_source: &D,
236
0
        text: &'a str,
237
0
        default_para_level: Option<Level>,
238
0
    ) -> InitialInfo<'a> {
239
0
        InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
240
0
    }
241
}
242
243
/// Extended version of InitialInfo (not public API).
244
#[derive(PartialEq, Debug)]
245
struct InitialInfoExt<'text> {
246
    /// The base InitialInfo for the text, recording its paragraphs and bidi classes.
247
    base: InitialInfo<'text>,
248
249
    /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
250
    /// requires no further bidi processing (i.e. there are no RTL characters or bidi
251
    /// control codes present), and whether any bidi isolation controls are present.
252
    flags: Vec<ParagraphInfoFlags>,
253
}
254
255
#[derive(PartialEq, Debug)]
256
struct ParagraphInfoFlags {
257
    is_pure_ltr: bool,
258
    has_isolate_controls: bool,
259
}
260
261
impl<'text> InitialInfoExt<'text> {
262
    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
263
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
264
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
265
    ///
266
    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
267
    ///
268
    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
269
    /// character is found before the matching PDI.  If no strong character is found, the class will
270
    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
271
    #[cfg_attr(feature = "flame_it", flamer::flame)]
272
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
273
0
        data_source: &D,
274
0
        text: &'a str,
275
0
        default_para_level: Option<Level>,
276
0
    ) -> InitialInfoExt<'a> {
277
0
        let mut paragraphs = Vec::<ParagraphInfo>::new();
278
0
        let mut flags = Vec::<ParagraphInfoFlags>::new();
279
0
        let (original_classes, _, _, _) = compute_initial_info(
280
0
            data_source,
281
0
            text,
282
0
            default_para_level,
283
0
            Some((&mut paragraphs, &mut flags)),
284
0
        );
285
0
286
0
        InitialInfoExt {
287
0
            base: InitialInfo {
288
0
                text,
289
0
                original_classes,
290
0
                paragraphs,
291
0
            },
292
0
            flags,
293
0
        }
294
0
    }
Unexecuted instantiation: <unicode_bidi::InitialInfoExt>::new_with_data_source::<icu_properties::bidi::BidiClassAdapter>
Unexecuted instantiation: <unicode_bidi::InitialInfoExt>::new_with_data_source::<_>
295
}
296
297
/// Implementation of initial-info computation for both BidiInfo and ParagraphBidiInfo.
298
/// To treat the text as (potentially) multiple paragraphs, the caller should pass the
299
/// pair of optional outparam arrays to receive the ParagraphInfo and pure-ltr flags
300
/// for each paragraph. Passing None for split_paragraphs will ignore any paragraph-
301
/// separator characters in the text, treating it just as a single paragraph.
302
/// Returns the array of BidiClass values for each code unit of the text, along with
303
/// the embedding level and pure-ltr flag for the *last* (or only) paragraph.
304
0
fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
305
0
    data_source: &D,
306
0
    text: &'a T,
307
0
    default_para_level: Option<Level>,
308
0
    mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
309
0
) -> (Vec<BidiClass>, Level, bool, bool) {
310
0
    let mut original_classes = Vec::with_capacity(text.len());
311
0
312
0
    // The stack contains the starting code unit index for each nested isolate we're inside.
313
0
    #[cfg(feature = "smallvec")]
314
0
    let mut isolate_stack = SmallVec::<[usize; 8]>::new();
315
0
    #[cfg(not(feature = "smallvec"))]
316
0
    let mut isolate_stack = Vec::new();
317
0
318
0
    debug_assert!(
319
0
        if let Some((ref paragraphs, ref flags)) = split_paragraphs {
320
0
            paragraphs.is_empty() && flags.is_empty()
321
        } else {
322
0
            true
323
        }
324
    );
325
326
0
    let mut para_start = 0;
327
0
    let mut para_level = default_para_level;
328
0
329
0
    // Per-paragraph flag: can subsequent processing be skipped? Set to false if any
330
0
    // RTL characters or bidi control characters are encountered in the paragraph.
331
0
    let mut is_pure_ltr = true;
332
0
    // Set to true if any bidi isolation controls are present in the paragraph.
333
0
    let mut has_isolate_controls = false;
334
335
    #[cfg(feature = "flame_it")]
336
    flame::start("compute_initial_info(): iter text.char_indices()");
337
338
0
    for (i, c) in text.char_indices() {
339
0
        let class = data_source.bidi_class(c);
340
0
341
0
        #[cfg(feature = "flame_it")]
342
0
        flame::start("original_classes.extend()");
343
0
344
0
        let len = T::char_len(c);
345
0
        original_classes.extend(repeat(class).take(len));
346
0
347
0
        #[cfg(feature = "flame_it")]
348
0
        flame::end("original_classes.extend()");
349
0
350
0
        match class {
351
            B => {
352
0
                if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
353
0
                    // P1. Split the text into separate paragraphs. The paragraph separator is kept
354
0
                    // with the previous paragraph.
355
0
                    let para_end = i + len;
356
0
                    paragraphs.push(ParagraphInfo {
357
0
                        range: para_start..para_end,
358
0
                        // P3. If no character is found in p2, set the paragraph level to zero.
359
0
                        level: para_level.unwrap_or(LTR_LEVEL),
360
0
                    });
361
0
                    flags.push(ParagraphInfoFlags {
362
0
                        is_pure_ltr,
363
0
                        has_isolate_controls,
364
0
                    });
365
0
                    // Reset state for the start of the next paragraph.
366
0
                    para_start = para_end;
367
0
                    // TODO: Support defaulting to direction of previous paragraph
368
0
                    //
369
0
                    // <http://www.unicode.org/reports/tr9/#HL1>
370
0
                    para_level = default_para_level;
371
0
                    is_pure_ltr = true;
372
0
                    has_isolate_controls = false;
373
0
                    isolate_stack.clear();
374
0
                }
375
            }
376
377
            L | R | AL => {
378
0
                if class != L {
379
0
                    is_pure_ltr = false;
380
0
                }
381
0
                match isolate_stack.last() {
382
0
                    Some(&start) => {
383
0
                        if original_classes[start] == FSI {
384
                            // X5c. If the first strong character between FSI and its matching
385
                            // PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
386
0
                            for j in 0..T::char_len(chars::FSI) {
387
0
                                original_classes[start + j] = if class == L { LRI } else { RLI };
388
                            }
389
0
                        }
390
                    }
391
392
                    None => {
393
0
                        if para_level.is_none() {
394
                            // P2. Find the first character of type L, AL, or R, while skipping
395
                            // any characters between an isolate initiator and its matching
396
                            // PDI.
397
0
                            para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL });
398
0
                        }
399
                    }
400
                }
401
            }
402
403
0
            AN | LRE | RLE | LRO | RLO => {
404
0
                is_pure_ltr = false;
405
0
            }
406
407
0
            RLI | LRI | FSI => {
408
0
                is_pure_ltr = false;
409
0
                has_isolate_controls = true;
410
0
                isolate_stack.push(i);
411
0
            }
412
413
0
            PDI => {
414
0
                isolate_stack.pop();
415
0
            }
416
417
0
            _ => {}
418
        }
419
    }
420
421
0
    if let Some((paragraphs, flags)) = split_paragraphs {
422
0
        if para_start < text.len() {
423
0
            paragraphs.push(ParagraphInfo {
424
0
                range: para_start..text.len(),
425
0
                level: para_level.unwrap_or(LTR_LEVEL),
426
0
            });
427
0
            flags.push(ParagraphInfoFlags {
428
0
                is_pure_ltr,
429
0
                has_isolate_controls,
430
0
            });
431
0
        }
432
0
        debug_assert_eq!(paragraphs.len(), flags.len());
433
0
    }
434
0
    debug_assert_eq!(original_classes.len(), text.len());
435
436
    #[cfg(feature = "flame_it")]
437
    flame::end("compute_initial_info(): iter text.char_indices()");
438
439
0
    (
440
0
        original_classes,
441
0
        para_level.unwrap_or(LTR_LEVEL),
442
0
        is_pure_ltr,
443
0
        has_isolate_controls,
444
0
    )
445
0
}
Unexecuted instantiation: unicode_bidi::compute_initial_info::<icu_properties::bidi::BidiClassAdapter, str>
Unexecuted instantiation: unicode_bidi::compute_initial_info::<_, _>
446
447
/// Bidi information of the text.
448
///
449
/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text.  If a
450
/// character is multiple bytes wide, then its class and level will appear multiple times in these
451
/// vectors.
452
// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
453
#[derive(Debug, PartialEq)]
454
pub struct BidiInfo<'text> {
455
    /// The text
456
    pub text: &'text str,
457
458
    /// The BidiClass of the character at each byte in the text.
459
    pub original_classes: Vec<BidiClass>,
460
461
    /// The directional embedding level of each byte in the text.
462
    pub levels: Vec<Level>,
463
464
    /// The boundaries and paragraph embedding level of each paragraph within the text.
465
    ///
466
    /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
467
    /// Or just don't include the first paragraph, which always starts at 0?
468
    pub paragraphs: Vec<ParagraphInfo>,
469
}
470
471
impl<'text> BidiInfo<'text> {
472
    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
473
    ///
474
    ///
475
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
476
    ///
477
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
478
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
479
    ///
480
    /// TODO: Support auto-RTL base direction
481
    #[cfg_attr(feature = "flame_it", flamer::flame)]
482
    #[cfg(feature = "hardcoded-data")]
483
    #[inline]
484
    pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo<'_> {
485
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
486
    }
487
488
    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
489
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
490
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
491
    ///
492
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
493
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
494
    ///
495
    /// TODO: Support auto-RTL base direction
496
    #[cfg_attr(feature = "flame_it", flamer::flame)]
497
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
498
0
        data_source: &D,
499
0
        text: &'a str,
500
0
        default_para_level: Option<Level>,
501
0
    ) -> BidiInfo<'a> {
502
0
        let InitialInfoExt { base, flags, .. } =
503
0
            InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
504
0
505
0
        let mut levels = Vec::<Level>::with_capacity(text.len());
506
0
        let mut processing_classes = base.original_classes.clone();
507
508
0
        for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
509
0
            let text = &text[para.range.clone()];
510
0
            let original_classes = &base.original_classes[para.range.clone()];
511
0
512
0
            compute_bidi_info_for_para(
513
0
                data_source,
514
0
                para,
515
0
                flags.is_pure_ltr,
516
0
                flags.has_isolate_controls,
517
0
                text,
518
0
                original_classes,
519
0
                &mut processing_classes,
520
0
                &mut levels,
521
0
            );
522
0
        }
523
524
0
        BidiInfo {
525
0
            text,
526
0
            original_classes: base.original_classes,
527
0
            paragraphs: base.paragraphs,
528
0
            levels,
529
0
        }
530
0
    }
Unexecuted instantiation: <unicode_bidi::BidiInfo>::new_with_data_source::<icu_properties::bidi::BidiClassAdapter>
Unexecuted instantiation: <unicode_bidi::BidiInfo>::new_with_data_source::<_>
531
532
    /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
533
    /// in the paragraph. The returned vector includes bytes that are not included
534
    /// in the `line`, but will not adjust them.
535
    ///
536
    /// This runs [Rule L1], you can run
537
    /// [Rule L2] by calling [`Self::reorder_visual()`].
538
    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
539
    /// to avoid non-byte indices.
540
    ///
541
    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
542
    ///
543
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
544
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
545
    #[cfg_attr(feature = "flame_it", flamer::flame)]
546
0
    pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
547
0
        assert!(line.start <= self.levels.len());
548
0
        assert!(line.end <= self.levels.len());
549
550
0
        let mut levels = self.levels.clone();
551
0
        let line_classes = &self.original_classes[line.clone()];
552
0
        let line_levels = &mut levels[line.clone()];
553
0
554
0
        reorder_levels(
555
0
            line_classes,
556
0
            line_levels,
557
0
            self.text.subrange(line),
558
0
            para.level,
559
0
        );
560
0
561
0
        levels
562
0
    }
563
564
    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
565
    /// in the paragraph. The returned vector includes characters that are not included
566
    /// in the `line`, but will not adjust them.
567
    ///
568
    /// This runs [Rule L1], you can run
569
    /// [Rule L2] by calling [`Self::reorder_visual()`].
570
    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
571
    /// to avoid non-byte indices.
572
    ///
573
    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
574
    ///
575
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
576
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
577
    #[cfg_attr(feature = "flame_it", flamer::flame)]
578
0
    pub fn reordered_levels_per_char(
579
0
        &self,
580
0
        para: &ParagraphInfo,
581
0
        line: Range<usize>,
582
0
    ) -> Vec<Level> {
583
0
        let levels = self.reordered_levels(para, line);
584
0
        self.text.char_indices().map(|(i, _)| levels[i]).collect()
585
0
    }
586
587
    /// Re-order a line based on resolved levels and return the line in display order.
588
    ///
589
    /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
590
    ///
591
    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
592
    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
593
    #[cfg_attr(feature = "flame_it", flamer::flame)]
594
0
    pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
595
0
        if !level::has_rtl(&self.levels[line.clone()]) {
596
0
            return self.text[line].into();
597
0
        }
598
0
        let (levels, runs) = self.visual_runs(para, line.clone());
599
0
        reorder_line(self.text, line, levels, runs)
600
0
    }
601
602
    /// Reorders pre-calculated levels of a sequence of characters.
603
    ///
604
    /// NOTE: This is a convenience method that does not use a `Paragraph`  object. It is
605
    /// intended to be used when an application has determined the levels of the objects (character sequences)
606
    /// and just needs to have them reordered.
607
    ///
608
    /// the index map will result in `indexMap[visualIndex]==logicalIndex`.
609
    ///
610
    /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
611
    /// information about the actual text.
612
    ///
613
    /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
614
    /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
615
    /// is for a single code point.
616
    ///
617
    ///
618
    ///   # # Example
619
    /// ```
620
    /// use unicode_bidi::BidiInfo;
621
    /// use unicode_bidi::Level;
622
    ///
623
    /// let l0 = Level::from(0);
624
    /// let l1 = Level::from(1);
625
    /// let l2 = Level::from(2);
626
    ///
627
    /// let levels = vec![l0, l0, l0, l0];
628
    /// let index_map = BidiInfo::reorder_visual(&levels);
629
    /// assert_eq!(levels.len(), index_map.len());
630
    /// assert_eq!(index_map, [0, 1, 2, 3]);
631
    ///
632
    /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
633
    /// let index_map = BidiInfo::reorder_visual(&levels);
634
    /// assert_eq!(levels.len(), index_map.len());
635
    /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
636
    /// ```
637
    #[cfg_attr(feature = "flame_it", flamer::flame)]
638
    #[inline]
639
0
    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
640
0
        reorder_visual(levels)
641
0
    }
Unexecuted instantiation: <unicode_bidi::BidiInfo>::reorder_visual
Unexecuted instantiation: <unicode_bidi::BidiInfo>::reorder_visual
642
643
    /// Find the level runs within a line and return them in visual order.
644
    ///
645
    /// `line` is a range of bytes indices within `levels`.
646
    ///
647
    /// The first return value is a vector of levels used by the reordering algorithm,
648
    /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
649
    /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
650
    /// same level) should be displayed. Within each run, the display order can be checked
651
    /// against the Level vector.
652
    ///
653
    /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
654
    /// as that should be handled by the engine using this API.
655
    ///
656
    /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
657
    /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
658
    /// of producing a level map, since one may wish to deal with the fact that this is operating on
659
    /// byte rather than character indices.
660
    ///
661
    /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
662
    ///
663
    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
664
    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
665
    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
666
    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
667
    #[cfg_attr(feature = "flame_it", flamer::flame)]
668
    #[inline]
669
0
    pub fn visual_runs(
670
0
        &self,
671
0
        para: &ParagraphInfo,
672
0
        line: Range<usize>,
673
0
    ) -> (Vec<Level>, Vec<LevelRun>) {
674
0
        let levels = self.reordered_levels(para, line.clone());
675
0
        visual_runs_for_line(levels, &line)
676
0
    }
677
678
    /// If processed text has any computed RTL levels
679
    ///
680
    /// This information is usually used to skip re-ordering of text when no RTL level is present
681
    #[inline]
682
0
    pub fn has_rtl(&self) -> bool {
683
0
        level::has_rtl(&self.levels)
684
0
    }
685
}
686
687
/// Bidi information of text treated as a single paragraph.
688
///
689
/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text.  If a
690
/// character is multiple bytes wide, then its class and level will appear multiple times in these
691
/// vectors.
692
#[derive(Debug, PartialEq)]
693
pub struct ParagraphBidiInfo<'text> {
694
    /// The text
695
    pub text: &'text str,
696
697
    /// The BidiClass of the character at each byte in the text.
698
    pub original_classes: Vec<BidiClass>,
699
700
    /// The directional embedding level of each byte in the text.
701
    pub levels: Vec<Level>,
702
703
    /// The paragraph embedding level.
704
    pub paragraph_level: Level,
705
706
    /// Whether the paragraph is purely LTR.
707
    pub is_pure_ltr: bool,
708
}
709
710
impl<'text> ParagraphBidiInfo<'text> {
711
    /// Determine the bidi embedding level.
712
    ///
713
    ///
714
    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
715
    ///
716
    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
717
    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
718
    ///
719
    /// TODO: Support auto-RTL base direction
720
    #[cfg_attr(feature = "flame_it", flamer::flame)]
721
    #[cfg(feature = "hardcoded-data")]
722
    #[inline]
723
    pub fn new(text: &str, default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
724
        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
725
    }
726
727
    /// Determine the bidi embedding level, with a custom [`BidiDataSource`]
728
    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
729
    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
730
    ///
731
    /// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
732
    /// and should be kept in sync with it.
733
    #[cfg_attr(feature = "flame_it", flamer::flame)]
734
0
    pub fn new_with_data_source<'a, D: BidiDataSource>(
735
0
        data_source: &D,
736
0
        text: &'a str,
737
0
        default_para_level: Option<Level>,
738
0
    ) -> ParagraphBidiInfo<'a> {
739
0
        // Here we could create a ParagraphInitialInfo struct to parallel the one
740
0
        // used by BidiInfo, but there doesn't seem any compelling reason for it.
741
0
        let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
742
0
            compute_initial_info(data_source, text, default_para_level, None);
743
0
744
0
        let mut levels = Vec::<Level>::with_capacity(text.len());
745
0
        let mut processing_classes = original_classes.clone();
746
0
747
0
        let para_info = ParagraphInfo {
748
0
            range: Range {
749
0
                start: 0,
750
0
                end: text.len(),
751
0
            },
752
0
            level: paragraph_level,
753
0
        };
754
0
755
0
        compute_bidi_info_for_para(
756
0
            data_source,
757
0
            &para_info,
758
0
            is_pure_ltr,
759
0
            has_isolate_controls,
760
0
            text,
761
0
            &original_classes,
762
0
            &mut processing_classes,
763
0
            &mut levels,
764
0
        );
765
0
766
0
        ParagraphBidiInfo {
767
0
            text,
768
0
            original_classes,
769
0
            levels,
770
0
            paragraph_level,
771
0
            is_pure_ltr,
772
0
        }
773
0
    }
774
775
    /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
776
    /// in the paragraph. The returned vector includes bytes that are not included
777
    /// in the `line`, but will not adjust them.
778
    ///
779
    /// See BidiInfo::reordered_levels for details.
780
    ///
781
    /// (This should be kept in sync with BidiInfo::reordered_levels.)
782
    #[cfg_attr(feature = "flame_it", flamer::flame)]
783
0
    pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
784
0
        assert!(line.start <= self.levels.len());
785
0
        assert!(line.end <= self.levels.len());
786
787
0
        let mut levels = self.levels.clone();
788
0
        let line_classes = &self.original_classes[line.clone()];
789
0
        let line_levels = &mut levels[line.clone()];
790
0
791
0
        reorder_levels(
792
0
            line_classes,
793
0
            line_levels,
794
0
            self.text.subrange(line),
795
0
            self.paragraph_level,
796
0
        );
797
0
798
0
        levels
799
0
    }
800
801
    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
802
    /// in the paragraph. The returned vector includes characters that are not included
803
    /// in the `line`, but will not adjust them.
804
    ///
805
    /// See BidiInfo::reordered_levels_per_char for details.
806
    ///
807
    /// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
808
    #[cfg_attr(feature = "flame_it", flamer::flame)]
809
0
    pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
810
0
        let levels = self.reordered_levels(line);
811
0
        self.text.char_indices().map(|(i, _)| levels[i]).collect()
812
0
    }
813
814
    /// Re-order a line based on resolved levels and return the line in display order.
815
    ///
816
    /// See BidiInfo::reorder_line for details.
817
    ///
818
    /// (This should be kept in sync with BidiInfo::reorder_line.)
819
    #[cfg_attr(feature = "flame_it", flamer::flame)]
820
0
    pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, str> {
821
0
        if !level::has_rtl(&self.levels[line.clone()]) {
822
0
            return self.text[line].into();
823
0
        }
824
0
825
0
        let (levels, runs) = self.visual_runs(line.clone());
826
0
827
0
        reorder_line(self.text, line, levels, runs)
828
0
    }
829
830
    /// Reorders pre-calculated levels of a sequence of characters.
831
    ///
832
    /// See BidiInfo::reorder_visual for details.
833
    #[cfg_attr(feature = "flame_it", flamer::flame)]
834
    #[inline]
835
0
    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
836
0
        reorder_visual(levels)
837
0
    }
838
839
    /// Find the level runs within a line and return them in visual order.
840
    ///
841
    /// `line` is a range of bytes indices within `levels`.
842
    ///
843
    /// See BidiInfo::visual_runs for details.
844
    ///
845
    /// (This should be kept in sync with BidiInfo::visual_runs.)
846
    #[cfg_attr(feature = "flame_it", flamer::flame)]
847
    #[inline]
848
0
    pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
849
0
        let levels = self.reordered_levels(line.clone());
850
0
        visual_runs_for_line(levels, &line)
851
0
    }
852
853
    /// If processed text has any computed RTL levels
854
    ///
855
    /// This information is usually used to skip re-ordering of text when no RTL level is present
856
    #[inline]
857
0
    pub fn has_rtl(&self) -> bool {
858
0
        !self.is_pure_ltr
859
0
    }
860
861
    /// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
862
    #[inline]
863
0
    pub fn direction(&self) -> Direction {
864
0
        para_direction(&self.levels)
865
0
    }
866
}
867
868
/// Return a line of the text in display order based on resolved levels.
869
///
870
/// `text`   the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
871
/// `line`   a range of byte indices within `text` corresponding to one line
872
/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
873
/// `runs`   array of `LevelRun`s in visual order
874
///
875
/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
876
/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
877
///
878
/// Returns: the reordered text of the line.
879
///
880
/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
881
///
882
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
883
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
884
0
fn reorder_line(
885
0
    text: &str,
886
0
    line: Range<usize>,
887
0
    levels: Vec<Level>,
888
0
    runs: Vec<LevelRun>,
889
0
) -> Cow<'_, str> {
890
0
    // If all isolating run sequences are LTR, no reordering is needed
891
0
    if runs.iter().all(|run| levels[run.start].is_ltr()) {
892
0
        return text[line].into();
893
0
    }
894
0
895
0
    let mut result = String::with_capacity(line.len());
896
0
    for run in runs {
897
0
        if levels[run.start].is_rtl() {
898
0
            result.extend(text[run].chars().rev());
899
0
        } else {
900
0
            result.push_str(&text[run]);
901
0
        }
902
    }
903
0
    result.into()
904
0
}
905
906
/// Find the level runs within a line and return them in visual order.
907
///
908
/// `line` is a range of code-unit indices within `levels`.
909
///
910
/// The first return value is a vector of levels used by the reordering algorithm,
911
/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
912
/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
913
/// same level) should be displayed. Within each run, the display order can be checked
914
/// against the Level vector.
915
///
916
/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
917
/// as that should be handled by the engine using this API.
918
///
919
/// Conceptually, this is the same as running [`reordered_levels()`] followed by
920
/// [`reorder_visual()`], however it returns the result as a list of level runs instead
921
/// of producing a level map, since one may wish to deal with the fact that this is operating on
922
/// byte rather than character indices.
923
///
924
/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
925
///
926
/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
927
/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
928
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
929
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
930
0
fn visual_runs_for_line(levels: Vec<Level>, line: &Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
931
0
    // Find consecutive level runs.
932
0
    let mut runs = Vec::new();
933
0
    let mut start = line.start;
934
0
    let mut run_level = levels[start];
935
0
    let mut min_level = run_level;
936
0
    let mut max_level = run_level;
937
938
0
    for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) {
939
0
        if new_level != run_level {
940
0
            // End of the previous run, start of a new one.
941
0
            runs.push(start..i);
942
0
            start = i;
943
0
            run_level = new_level;
944
0
            min_level = cmp::min(run_level, min_level);
945
0
            max_level = cmp::max(run_level, max_level);
946
0
        }
947
    }
948
0
    runs.push(start..line.end);
949
0
950
0
    let run_count = runs.len();
951
0
952
0
    // Re-order the odd runs.
953
0
    // <http://www.unicode.org/reports/tr9/#L2>
954
0
955
0
    // Stop at the lowest *odd* level.
956
0
    min_level = min_level.new_lowest_ge_rtl().expect("Level error");
957
    // This loop goes through contiguous chunks of level runs that have a level
958
    // ≥ max_level and reverses their contents, reducing max_level by 1 each time.
959
0
    while max_level >= min_level {
960
        // Look for the start of a sequence of consecutive runs of max_level or higher.
961
0
        let mut seq_start = 0;
962
0
        while seq_start < run_count {
963
0
            if levels[runs[seq_start].start] < max_level {
964
0
                seq_start += 1;
965
0
                continue;
966
0
            }
967
0
968
0
            // Found the start of a sequence. Now find the end.
969
0
            let mut seq_end = seq_start + 1;
970
0
            while seq_end < run_count {
971
0
                if levels[runs[seq_end].start] < max_level {
972
0
                    break;
973
0
                }
974
0
                seq_end += 1;
975
            }
976
            // Reverse the runs within this sequence.
977
0
            runs[seq_start..seq_end].reverse();
978
0
979
0
            seq_start = seq_end;
980
        }
981
0
        max_level
982
0
            .lower(1)
983
0
            .expect("Lowering embedding level below zero");
984
    }
985
0
    (levels, runs)
986
0
}
987
988
/// Reorders pre-calculated levels of a sequence of characters.
989
///
990
/// NOTE: This is a convenience method that does not use a `Paragraph`  object. It is
991
/// intended to be used when an application has determined the levels of the objects (character sequences)
992
/// and just needs to have them reordered.
993
///
994
/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
995
///
996
/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
997
/// information about the actual text.
998
///
999
/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
1000
/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
1001
/// is for a single code point.
1002
0
fn reorder_visual(levels: &[Level]) -> Vec<usize> {
1003
    // Gets the next range of characters after start_index with a level greater
1004
    // than or equal to `max`
1005
0
    fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range<usize> {
1006
0
        if levels.is_empty() || start_index >= levels.len() {
1007
0
            return start_index..start_index;
1008
0
        }
1009
0
        while let Some(l) = levels.get(start_index) {
1010
0
            if *l >= max {
1011
0
                break;
1012
0
            }
1013
0
            start_index += 1;
1014
        }
1015
1016
0
        if levels.get(start_index).is_none() {
1017
            // If at the end of the array, adding one will
1018
            // produce an out-of-range end element
1019
0
            return start_index..start_index;
1020
0
        }
1021
0
1022
0
        let mut end_index = start_index + 1;
1023
0
        while let Some(l) = levels.get(end_index) {
1024
0
            if *l < max {
1025
0
                return start_index..end_index;
1026
0
            }
1027
0
            end_index += 1;
1028
        }
1029
1030
0
        start_index..end_index
1031
0
    }
1032
1033
    // This implementation is similar to the L2 implementation in `visual_runs()`
1034
    // but it cannot benefit from a precalculated LevelRun vector so needs to be different.
1035
1036
0
    if levels.is_empty() {
1037
0
        return vec![];
1038
0
    }
1039
0
1040
0
    // Get the min and max levels
1041
0
    let (mut min, mut max) = levels
1042
0
        .iter()
1043
0
        .fold((levels[0], levels[0]), |(min, max), &l| {
1044
0
            (cmp::min(min, l), cmp::max(max, l))
1045
0
        });
1046
0
1047
0
    // Initialize an index map
1048
0
    let mut result: Vec<usize> = (0..levels.len()).collect();
1049
0
1050
0
    if min == max && min.is_ltr() {
1051
        // Everything is LTR and at the same level, do nothing
1052
0
        return result;
1053
0
    }
1054
0
1055
0
    // Stop at the lowest *odd* level, since everything below that
1056
0
    // is LTR and does not need further reordering
1057
0
    min = min.new_lowest_ge_rtl().expect("Level error");
1058
1059
    // For each max level, take all contiguous chunks of
1060
    // levels ≥ max and reverse them
1061
    //
1062
    // We can do this check with the original levels instead of checking reorderings because all
1063
    // prior reorderings will have been for contiguous chunks of levels >> max, which will
1064
    // be a subset of these chunks anyway.
1065
0
    while min <= max {
1066
0
        let mut range = 0..0;
1067
        loop {
1068
0
            range = next_range(levels, range.end, max);
1069
0
            result[range.clone()].reverse();
1070
0
1071
0
            if range.end >= levels.len() {
1072
0
                break;
1073
0
            }
1074
        }
1075
1076
0
        max.lower(1).expect("Level error");
1077
    }
1078
1079
0
    result
1080
0
}
1081
1082
/// The core of BidiInfo initialization, factored out into a function that both
1083
/// the utf-8 and utf-16 versions of BidiInfo can use.
1084
0
fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1085
0
    data_source: &D,
1086
0
    para: &ParagraphInfo,
1087
0
    is_pure_ltr: bool,
1088
0
    has_isolate_controls: bool,
1089
0
    text: &'a T,
1090
0
    original_classes: &[BidiClass],
1091
0
    processing_classes: &mut [BidiClass],
1092
0
    levels: &mut Vec<Level>,
1093
0
) {
1094
0
    let new_len = levels.len() + para.range.len();
1095
0
    levels.resize(new_len, para.level);
1096
0
    if para.level == LTR_LEVEL && is_pure_ltr {
1097
0
        return;
1098
0
    }
1099
0
1100
0
    let processing_classes = &mut processing_classes[para.range.clone()];
1101
0
    let levels = &mut levels[para.range.clone()];
1102
0
    let mut level_runs = LevelRunVec::new();
1103
0
1104
0
    explicit::compute(
1105
0
        text,
1106
0
        para.level,
1107
0
        original_classes,
1108
0
        levels,
1109
0
        processing_classes,
1110
0
        &mut level_runs,
1111
0
    );
1112
0
1113
0
    let mut sequences = prepare::IsolatingRunSequenceVec::new();
1114
0
    prepare::isolating_run_sequences(
1115
0
        para.level,
1116
0
        original_classes,
1117
0
        levels,
1118
0
        level_runs,
1119
0
        has_isolate_controls,
1120
0
        &mut sequences,
1121
0
    );
1122
0
    for sequence in &sequences {
1123
0
        implicit::resolve_weak(text, sequence, processing_classes);
1124
0
        implicit::resolve_neutral(
1125
0
            text,
1126
0
            data_source,
1127
0
            sequence,
1128
0
            levels,
1129
0
            original_classes,
1130
0
            processing_classes,
1131
0
        );
1132
0
    }
1133
1134
0
    implicit::resolve_levels(processing_classes, levels);
1135
0
1136
0
    assign_levels_to_removed_chars(para.level, original_classes, levels);
1137
0
}
Unexecuted instantiation: unicode_bidi::compute_bidi_info_for_para::<icu_properties::bidi::BidiClassAdapter, str>
Unexecuted instantiation: unicode_bidi::compute_bidi_info_for_para::<_, _>
1138
1139
/// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
1140
/// in the paragraph. The returned vector includes code units that are not included
1141
/// in the `line`, but will not adjust them.
1142
///
1143
/// This runs [Rule L1]
1144
///
1145
/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
1146
0
fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
1147
0
    line_classes: &[BidiClass],
1148
0
    line_levels: &mut [Level],
1149
0
    line_text: &'a T,
1150
0
    para_level: Level,
1151
0
) {
1152
0
    // Reset some whitespace chars to paragraph level.
1153
0
    // <http://www.unicode.org/reports/tr9/#L1>
1154
0
    let mut reset_from: Option<usize> = Some(0);
1155
0
    let mut reset_to: Option<usize> = None;
1156
0
    let mut prev_level = para_level;
1157
0
    for ((i, c), (_, length)) in line_text.char_indices().zip(line_text.indices_lengths()) {
1158
0
        match line_classes[i] {
1159
            // Segment separator, Paragraph separator
1160
            B | S => {
1161
0
                assert_eq!(reset_to, None);
1162
0
                reset_to = Some(i + T::char_len(c));
1163
0
                if reset_from.is_none() {
1164
0
                    reset_from = Some(i);
1165
0
                }
1166
            }
1167
            // Whitespace, isolate formatting
1168
            WS | FSI | LRI | RLI | PDI => {
1169
0
                if reset_from.is_none() {
1170
0
                    reset_from = Some(i);
1171
0
                }
1172
            }
1173
            // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
1174
            // same as above + set the level
1175
            RLE | LRE | RLO | LRO | PDF | BN => {
1176
0
                if reset_from.is_none() {
1177
0
                    reset_from = Some(i);
1178
0
                }
1179
                // also set the level to previous
1180
0
                for level in &mut line_levels[i..i + length] {
1181
0
                    *level = prev_level;
1182
0
                }
1183
            }
1184
0
            _ => {
1185
0
                reset_from = None;
1186
0
            }
1187
        }
1188
0
        if let (Some(from), Some(to)) = (reset_from, reset_to) {
1189
0
            for level in &mut line_levels[from..to] {
1190
0
                *level = para_level;
1191
0
            }
1192
0
            reset_from = None;
1193
0
            reset_to = None;
1194
0
        }
1195
0
        prev_level = line_levels[i];
1196
    }
1197
0
    if let Some(from) = reset_from {
1198
0
        for level in &mut line_levels[from..] {
1199
0
            *level = para_level;
1200
0
        }
1201
0
    }
1202
0
}
Unexecuted instantiation: unicode_bidi::reorder_levels::<[u16]>
Unexecuted instantiation: unicode_bidi::reorder_levels::<str>
1203
1204
/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
1205
/// And it supports all operation in the `Paragraph` that needs also its
1206
/// `BidiInfo` such as `direction`.
1207
#[derive(Debug)]
1208
pub struct Paragraph<'a, 'text> {
1209
    pub info: &'a BidiInfo<'text>,
1210
    pub para: &'a ParagraphInfo,
1211
}
1212
1213
impl<'a, 'text> Paragraph<'a, 'text> {
1214
    #[inline]
1215
0
    pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
1216
0
        Paragraph { info, para }
1217
0
    }
Unexecuted instantiation: <unicode_bidi::Paragraph>::new
Unexecuted instantiation: <unicode_bidi::Paragraph>::new
1218
1219
    /// Returns if the paragraph is Left direction, right direction or mixed.
1220
    #[inline]
1221
0
    pub fn direction(&self) -> Direction {
1222
0
        para_direction(&self.info.levels[self.para.range.clone()])
1223
0
    }
Unexecuted instantiation: <unicode_bidi::Paragraph>::direction
Unexecuted instantiation: <unicode_bidi::Paragraph>::direction
1224
1225
    /// Returns the `Level` of a certain character in the paragraph.
1226
    #[inline]
1227
0
    pub fn level_at(&self, pos: usize) -> Level {
1228
0
        let actual_position = self.para.range.start + pos;
1229
0
        self.info.levels[actual_position]
1230
0
    }
Unexecuted instantiation: <unicode_bidi::Paragraph>::level_at
Unexecuted instantiation: <unicode_bidi::Paragraph>::level_at
1231
}
1232
1233
/// Return the directionality of the paragraph (Left, Right or Mixed) from its levels.
1234
#[cfg_attr(feature = "flame_it", flamer::flame)]
1235
0
fn para_direction(levels: &[Level]) -> Direction {
1236
0
    let mut ltr = false;
1237
0
    let mut rtl = false;
1238
0
    for level in levels {
1239
0
        if level.is_ltr() {
1240
0
            ltr = true;
1241
0
            if rtl {
1242
0
                return Direction::Mixed;
1243
0
            }
1244
0
        }
1245
1246
0
        if level.is_rtl() {
1247
0
            rtl = true;
1248
0
            if ltr {
1249
0
                return Direction::Mixed;
1250
0
            }
1251
0
        }
1252
    }
1253
1254
0
    if ltr {
1255
0
        return Direction::Ltr;
1256
0
    }
1257
0
1258
0
    Direction::Rtl
1259
0
}
1260
1261
/// Assign levels to characters removed by rule X9.
1262
///
1263
/// The levels assigned to these characters are not specified by the algorithm.  This function
1264
/// assigns each one the level of the previous character, to avoid breaking level runs.
1265
#[cfg_attr(feature = "flame_it", flamer::flame)]
1266
0
fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) {
1267
0
    for i in 0..levels.len() {
1268
0
        if prepare::removed_by_x9(classes[i]) {
1269
0
            levels[i] = if i > 0 { levels[i - 1] } else { para_level };
1270
0
        }
1271
    }
1272
0
}
1273
1274
/// Get the base direction of the text provided according to the Unicode Bidirectional Algorithm.
1275
///
1276
/// See rules P2 and P3.
1277
///
1278
/// The base direction is derived from the first character in the string with bidi character type
1279
/// L, R, or AL. If the first such character has type L, Direction::Ltr is returned. If the first
1280
/// such character has type R or AL, Direction::Rtl is returned.
1281
///
1282
/// If the string does not contain any character of these types (outside of embedded isolate runs),
1283
/// then Direction::Mixed is returned (but should be considered as meaning "neutral" or "unknown",
1284
/// not in fact mixed directions).
1285
///
1286
/// This is a lightweight function for use when only the base direction is needed and no further
1287
/// bidi processing of the text is needed.
1288
///
1289
/// If the text contains paragraph separators, this function considers only the first paragraph.
1290
#[cfg(feature = "hardcoded-data")]
1291
#[inline]
1292
pub fn get_base_direction<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1293
    get_base_direction_with_data_source(&HardcodedBidiData, text)
1294
}
1295
1296
/// Get the base direction of the text provided according to the Unicode Bidirectional Algorithm,
1297
/// considering the full text if the first paragraph is all-neutral.
1298
///
1299
/// This is the same as get_base_direction except that it does not stop at the first block
1300
/// separator, but just resets the embedding level and continues to look for a strongly-
1301
/// directional character. So the result will be the base direction of the first paragraph
1302
/// that is not purely neutral characters.
1303
#[cfg(feature = "hardcoded-data")]
1304
#[inline]
1305
pub fn get_base_direction_full<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1306
    get_base_direction_full_with_data_source(&HardcodedBidiData, text)
1307
}
1308
1309
#[inline]
1310
0
pub fn get_base_direction_with_data_source<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1311
0
    data_source: &D,
1312
0
    text: &'a T,
1313
0
) -> Direction {
1314
0
    get_base_direction_impl(data_source, text, false)
1315
0
}
1316
1317
#[inline]
1318
0
pub fn get_base_direction_full_with_data_source<
1319
0
    'a,
1320
0
    D: BidiDataSource,
1321
0
    T: TextSource<'a> + ?Sized,
1322
0
>(
1323
0
    data_source: &D,
1324
0
    text: &'a T,
1325
0
) -> Direction {
1326
0
    get_base_direction_impl(data_source, text, true)
1327
0
}
1328
1329
0
fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1330
0
    data_source: &D,
1331
0
    text: &'a T,
1332
0
    use_full_text: bool,
1333
0
) -> Direction {
1334
0
    let mut isolate_level = 0;
1335
0
    for c in text.chars() {
1336
0
        match data_source.bidi_class(c) {
1337
0
            LRI | RLI | FSI => isolate_level += 1,
1338
0
            PDI if isolate_level > 0 => isolate_level -= 1,
1339
0
            L if isolate_level == 0 => return Direction::Ltr,
1340
0
            R | AL if isolate_level == 0 => return Direction::Rtl,
1341
0
            B if !use_full_text => break,
1342
0
            B if use_full_text => isolate_level = 0,
1343
0
            _ => (),
1344
        }
1345
    }
1346
    // If no strong char was found, return Mixed. Normally this will be treated as Ltr by callers
1347
    // (see rule P3), but we don't map this to Ltr here so that a caller that wants to apply other
1348
    // heuristics to an all-neutral paragraph can tell the difference.
1349
0
    Direction::Mixed
1350
0
}
1351
1352
/// Implementation of TextSource for UTF-8 text (a string slice).
1353
impl<'text> TextSource<'text> for str {
1354
    type CharIter = core::str::Chars<'text>;
1355
    type CharIndexIter = core::str::CharIndices<'text>;
1356
    type IndexLenIter = Utf8IndexLenIter<'text>;
1357
1358
    #[inline]
1359
0
    fn len(&self) -> usize {
1360
0
        (self as &str).len()
1361
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::len
Unexecuted instantiation: <str as unicode_bidi::TextSource>::len
1362
    #[inline]
1363
0
    fn char_at(&self, index: usize) -> Option<(char, usize)> {
1364
0
        if let Some(slice) = self.get(index..) {
1365
0
            if let Some(ch) = slice.chars().next() {
1366
0
                return Some((ch, ch.len_utf8()));
1367
0
            }
1368
0
        }
1369
0
        None
1370
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::char_at
Unexecuted instantiation: <str as unicode_bidi::TextSource>::char_at
1371
    #[inline]
1372
0
    fn subrange(&self, range: Range<usize>) -> &Self {
1373
0
        &(self as &str)[range]
1374
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::subrange
Unexecuted instantiation: <str as unicode_bidi::TextSource>::subrange
1375
    #[inline]
1376
0
    fn chars(&'text self) -> Self::CharIter {
1377
0
        (self as &str).chars()
1378
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::chars
Unexecuted instantiation: <str as unicode_bidi::TextSource>::chars
1379
    #[inline]
1380
0
    fn char_indices(&'text self) -> Self::CharIndexIter {
1381
0
        (self as &str).char_indices()
1382
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::char_indices
Unexecuted instantiation: <str as unicode_bidi::TextSource>::char_indices
1383
    #[inline]
1384
0
    fn indices_lengths(&'text self) -> Self::IndexLenIter {
1385
0
        Utf8IndexLenIter::new(self)
1386
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::indices_lengths
Unexecuted instantiation: <str as unicode_bidi::TextSource>::indices_lengths
1387
    #[inline]
1388
0
    fn char_len(ch: char) -> usize {
1389
0
        ch.len_utf8()
1390
0
    }
Unexecuted instantiation: <str as unicode_bidi::TextSource>::char_len
Unexecuted instantiation: <str as unicode_bidi::TextSource>::char_len
1391
}
1392
1393
/// Iterator over (UTF-8) string slices returning (index, char_len) tuple.
1394
#[derive(Debug)]
1395
pub struct Utf8IndexLenIter<'text> {
1396
    iter: CharIndices<'text>,
1397
}
1398
1399
impl<'text> Utf8IndexLenIter<'text> {
1400
    #[inline]
1401
0
    pub fn new(text: &'text str) -> Self {
1402
0
        Utf8IndexLenIter {
1403
0
            iter: text.char_indices(),
1404
0
        }
1405
0
    }
Unexecuted instantiation: <unicode_bidi::Utf8IndexLenIter>::new
Unexecuted instantiation: <unicode_bidi::Utf8IndexLenIter>::new
1406
}
1407
1408
impl Iterator for Utf8IndexLenIter<'_> {
1409
    type Item = (usize, usize);
1410
1411
    #[inline]
1412
0
    fn next(&mut self) -> Option<Self::Item> {
1413
0
        if let Some((pos, ch)) = self.iter.next() {
1414
0
            return Some((pos, ch.len_utf8()));
1415
0
        }
1416
0
        None
1417
0
    }
Unexecuted instantiation: <unicode_bidi::Utf8IndexLenIter as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <unicode_bidi::Utf8IndexLenIter as core::iter::traits::iterator::Iterator>::next
1418
}
1419
1420
#[cfg(test)]
1421
fn to_utf16(s: &str) -> Vec<u16> {
1422
    s.encode_utf16().collect()
1423
}
1424
1425
#[cfg(test)]
1426
#[cfg(feature = "hardcoded-data")]
1427
mod tests {
1428
    use super::*;
1429
1430
    use utf16::{
1431
        BidiInfo as BidiInfoU16, InitialInfo as InitialInfoU16, Paragraph as ParagraphU16,
1432
        ParagraphBidiInfo as ParagraphBidiInfoU16,
1433
    };
1434
1435
    #[test]
1436
    fn test_utf16_text_source() {
1437
        let text: &[u16] =
1438
            &[0x41, 0xD801, 0xDC01, 0x20, 0xD800, 0x20, 0xDFFF, 0x20, 0xDC00, 0xD800];
1439
        assert_eq!(text.char_at(0), Some(('A', 1)));
1440
        assert_eq!(text.char_at(1), Some(('\u{10401}', 2)));
1441
        assert_eq!(text.char_at(2), None);
1442
        assert_eq!(text.char_at(3), Some((' ', 1)));
1443
        assert_eq!(text.char_at(4), Some((char::REPLACEMENT_CHARACTER, 1)));
1444
        assert_eq!(text.char_at(5), Some((' ', 1)));
1445
        assert_eq!(text.char_at(6), Some((char::REPLACEMENT_CHARACTER, 1)));
1446
        assert_eq!(text.char_at(7), Some((' ', 1)));
1447
        assert_eq!(text.char_at(8), Some((char::REPLACEMENT_CHARACTER, 1)));
1448
        assert_eq!(text.char_at(9), Some((char::REPLACEMENT_CHARACTER, 1)));
1449
        assert_eq!(text.char_at(10), None);
1450
    }
1451
1452
    #[test]
1453
    fn test_utf16_char_iter() {
1454
        let text: &[u16] =
1455
            &[0x41, 0xD801, 0xDC01, 0x20, 0xD800, 0x20, 0xDFFF, 0x20, 0xDC00, 0xD800];
1456
        assert_eq!(text.len(), 10);
1457
        assert_eq!(text.chars().count(), 9);
1458
        let mut chars = text.chars();
1459
        assert_eq!(chars.next(), Some('A'));
1460
        assert_eq!(chars.next(), Some('\u{10401}'));
1461
        assert_eq!(chars.next(), Some(' '));
1462
        assert_eq!(chars.next(), Some('\u{FFFD}'));
1463
        assert_eq!(chars.next(), Some(' '));
1464
        assert_eq!(chars.next(), Some('\u{FFFD}'));
1465
        assert_eq!(chars.next(), Some(' '));
1466
        assert_eq!(chars.next(), Some('\u{FFFD}'));
1467
        assert_eq!(chars.next(), Some('\u{FFFD}'));
1468
        assert_eq!(chars.next(), None);
1469
    }
1470
1471
    #[test]
1472
    fn test_initial_text_info() {
1473
        let tests = vec![
1474
            (
1475
                // text
1476
                "a1",
1477
                // expected bidi classes per utf-8 byte
1478
                vec![L, EN],
1479
                // expected paragraph-info for utf-8
1480
                vec![ParagraphInfo {
1481
                    range: 0..2,
1482
                    level: LTR_LEVEL,
1483
                }],
1484
                // expected bidi classes per utf-16 code unit
1485
                vec![L, EN],
1486
                // expected paragraph-info for utf-16
1487
                vec![ParagraphInfo {
1488
                    range: 0..2,
1489
                    level: LTR_LEVEL,
1490
                }],
1491
            ),
1492
            (
1493
                // Arabic, space, Hebrew
1494
                "\u{0639} \u{05D0}",
1495
                vec![AL, AL, WS, R, R],
1496
                vec![ParagraphInfo {
1497
                    range: 0..5,
1498
                    level: RTL_LEVEL,
1499
                }],
1500
                vec![AL, WS, R],
1501
                vec![ParagraphInfo {
1502
                    range: 0..3,
1503
                    level: RTL_LEVEL,
1504
                }],
1505
            ),
1506
            (
1507
                // SMP characters from Kharoshthi, Cuneiform, Adlam:
1508
                "\u{10A00}\u{12000}\u{1E900}",
1509
                vec![R, R, R, R, L, L, L, L, R, R, R, R],
1510
                vec![ParagraphInfo {
1511
                    range: 0..12,
1512
                    level: RTL_LEVEL,
1513
                }],
1514
                vec![R, R, L, L, R, R],
1515
                vec![ParagraphInfo {
1516
                    range: 0..6,
1517
                    level: RTL_LEVEL,
1518
                }],
1519
            ),
1520
            (
1521
                "a\u{2029}b",
1522
                vec![L, B, B, B, L],
1523
                vec![
1524
                    ParagraphInfo {
1525
                        range: 0..4,
1526
                        level: LTR_LEVEL,
1527
                    },
1528
                    ParagraphInfo {
1529
                        range: 4..5,
1530
                        level: LTR_LEVEL,
1531
                    },
1532
                ],
1533
                vec![L, B, L],
1534
                vec![
1535
                    ParagraphInfo {
1536
                        range: 0..2,
1537
                        level: LTR_LEVEL,
1538
                    },
1539
                    ParagraphInfo {
1540
                        range: 2..3,
1541
                        level: LTR_LEVEL,
1542
                    },
1543
                ],
1544
            ),
1545
            (
1546
                "\u{2068}א\u{2069}a", // U+2068 FSI, U+2069 PDI
1547
                vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
1548
                vec![ParagraphInfo {
1549
                    range: 0..9,
1550
                    level: LTR_LEVEL,
1551
                }],
1552
                vec![RLI, R, PDI, L],
1553
                vec![ParagraphInfo {
1554
                    range: 0..4,
1555
                    level: LTR_LEVEL,
1556
                }],
1557
            ),
1558
        ];
1559
1560
        for t in tests {
1561
            assert_eq!(
1562
                InitialInfo::new(t.0, None),
1563
                InitialInfo {
1564
                    text: t.0,
1565
                    original_classes: t.1,
1566
                    paragraphs: t.2,
1567
                }
1568
            );
1569
            let text = &to_utf16(t.0);
1570
            assert_eq!(
1571
                InitialInfoU16::new(text, None),
1572
                InitialInfoU16 {
1573
                    text,
1574
                    original_classes: t.3,
1575
                    paragraphs: t.4,
1576
                }
1577
            );
1578
        }
1579
    }
1580
1581
    #[test]
1582
    #[cfg(feature = "hardcoded-data")]
1583
    fn test_process_text() {
1584
        let tests = vec![
1585
            (
1586
                // text
1587
                "",
1588
                // base level
1589
                Some(RTL_LEVEL),
1590
                // levels
1591
                Level::vec(&[]),
1592
                // original_classes
1593
                vec![],
1594
                // paragraphs
1595
                vec![],
1596
                // levels_u16
1597
                Level::vec(&[]),
1598
                // original_classes_u16
1599
                vec![],
1600
                // paragraphs_u16
1601
                vec![],
1602
            ),
1603
            (
1604
                // text
1605
                "abc123",
1606
                // base level
1607
                Some(LTR_LEVEL),
1608
                // levels
1609
                Level::vec(&[0, 0, 0, 0, 0, 0]),
1610
                // original_classes
1611
                vec![L, L, L, EN, EN, EN],
1612
                // paragraphs
1613
                vec![ParagraphInfo {
1614
                    range: 0..6,
1615
                    level: LTR_LEVEL,
1616
                }],
1617
                // levels_u16
1618
                Level::vec(&[0, 0, 0, 0, 0, 0]),
1619
                // original_classes_u16
1620
                vec![L, L, L, EN, EN, EN],
1621
                // paragraphs_u16
1622
                vec![ParagraphInfo {
1623
                    range: 0..6,
1624
                    level: LTR_LEVEL,
1625
                }],
1626
            ),
1627
            (
1628
                "abc \u{05D0}\u{05D1}\u{05D2}",
1629
                Some(LTR_LEVEL),
1630
                Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]),
1631
                vec![L, L, L, WS, R, R, R, R, R, R],
1632
                vec![ParagraphInfo {
1633
                    range: 0..10,
1634
                    level: LTR_LEVEL,
1635
                }],
1636
                Level::vec(&[0, 0, 0, 0, 1, 1, 1]),
1637
                vec![L, L, L, WS, R, R, R],
1638
                vec![ParagraphInfo {
1639
                    range: 0..7,
1640
                    level: LTR_LEVEL,
1641
                }],
1642
            ),
1643
            (
1644
                "abc \u{05D0}\u{05D1}\u{05D2}",
1645
                Some(RTL_LEVEL),
1646
                Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]),
1647
                vec![L, L, L, WS, R, R, R, R, R, R],
1648
                vec![ParagraphInfo {
1649
                    range: 0..10,
1650
                    level: RTL_LEVEL,
1651
                }],
1652
                Level::vec(&[2, 2, 2, 1, 1, 1, 1]),
1653
                vec![L, L, L, WS, R, R, R],
1654
                vec![ParagraphInfo {
1655
                    range: 0..7,
1656
                    level: RTL_LEVEL,
1657
                }],
1658
            ),
1659
            (
1660
                "\u{05D0}\u{05D1}\u{05D2} abc",
1661
                Some(LTR_LEVEL),
1662
                Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
1663
                vec![R, R, R, R, R, R, WS, L, L, L],
1664
                vec![ParagraphInfo {
1665
                    range: 0..10,
1666
                    level: LTR_LEVEL,
1667
                }],
1668
                Level::vec(&[1, 1, 1, 0, 0, 0, 0]),
1669
                vec![R, R, R, WS, L, L, L],
1670
                vec![ParagraphInfo {
1671
                    range: 0..7,
1672
                    level: LTR_LEVEL,
1673
                }],
1674
            ),
1675
            (
1676
                "\u{05D0}\u{05D1}\u{05D2} abc",
1677
                None,
1678
                Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]),
1679
                vec![R, R, R, R, R, R, WS, L, L, L],
1680
                vec![ParagraphInfo {
1681
                    range: 0..10,
1682
                    level: RTL_LEVEL,
1683
                }],
1684
                Level::vec(&[1, 1, 1, 1, 2, 2, 2]),
1685
                vec![R, R, R, WS, L, L, L],
1686
                vec![ParagraphInfo {
1687
                    range: 0..7,
1688
                    level: RTL_LEVEL,
1689
                }],
1690
            ),
1691
            (
1692
                "\u{063A}2\u{0638} \u{05D0}2\u{05D2}",
1693
                Some(LTR_LEVEL),
1694
                Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]),
1695
                vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R],
1696
                vec![ParagraphInfo {
1697
                    range: 0..11,
1698
                    level: LTR_LEVEL,
1699
                }],
1700
                Level::vec(&[1, 2, 1, 1, 1, 2, 1]),
1701
                vec![AL, EN, AL, WS, R, EN, R],
1702
                vec![ParagraphInfo {
1703
                    range: 0..7,
1704
                    level: LTR_LEVEL,
1705
                }],
1706
            ),
1707
            (
1708
                "a א.\nג",
1709
                None,
1710
                Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]),
1711
                vec![L, WS, R, R, CS, B, R, R],
1712
                vec![
1713
                    ParagraphInfo {
1714
                        range: 0..6,
1715
                        level: LTR_LEVEL,
1716
                    },
1717
                    ParagraphInfo {
1718
                        range: 6..8,
1719
                        level: RTL_LEVEL,
1720
                    },
1721
                ],
1722
                Level::vec(&[0, 0, 1, 0, 0, 1]),
1723
                vec![L, WS, R, CS, B, R],
1724
                vec![
1725
                    ParagraphInfo {
1726
                        range: 0..5,
1727
                        level: LTR_LEVEL,
1728
                    },
1729
                    ParagraphInfo {
1730
                        range: 5..6,
1731
                        level: RTL_LEVEL,
1732
                    },
1733
                ],
1734
            ),
1735
            // BidiTest:69635 (AL ET EN)
1736
            (
1737
                "\u{060B}\u{20CF}\u{06F9}",
1738
                None,
1739
                Level::vec(&[1, 1, 1, 1, 1, 2, 2]),
1740
                vec![AL, AL, ET, ET, ET, EN, EN],
1741
                vec![ParagraphInfo {
1742
                    range: 0..7,
1743
                    level: RTL_LEVEL,
1744
                }],
1745
                Level::vec(&[1, 1, 2]),
1746
                vec![AL, ET, EN],
1747
                vec![ParagraphInfo {
1748
                    range: 0..3,
1749
                    level: RTL_LEVEL,
1750
                }],
1751
            ),
1752
        ];
1753
1754
        for t in tests {
1755
            assert_eq!(
1756
                BidiInfo::new(t.0, t.1),
1757
                BidiInfo {
1758
                    text: t.0,
1759
                    levels: t.2.clone(),
1760
                    original_classes: t.3.clone(),
1761
                    paragraphs: t.4.clone(),
1762
                }
1763
            );
1764
            // If it was empty, also test that ParagraphBidiInfo handles it safely.
1765
            if t.4.len() == 0 {
1766
                assert_eq!(
1767
                    ParagraphBidiInfo::new(t.0, t.1),
1768
                    ParagraphBidiInfo {
1769
                        text: t.0,
1770
                        original_classes: t.3.clone(),
1771
                        levels: t.2.clone(),
1772
                        paragraph_level: RTL_LEVEL,
1773
                        is_pure_ltr: true,
1774
                    }
1775
                )
1776
            }
1777
            // If it was a single paragraph, also test ParagraphBidiInfo.
1778
            if t.4.len() == 1 {
1779
                assert_eq!(
1780
                    ParagraphBidiInfo::new(t.0, t.1),
1781
                    ParagraphBidiInfo {
1782
                        text: t.0,
1783
                        original_classes: t.3,
1784
                        levels: t.2.clone(),
1785
                        paragraph_level: t.4[0].level,
1786
                        is_pure_ltr: !level::has_rtl(&t.2),
1787
                    }
1788
                )
1789
            }
1790
            let text = &to_utf16(t.0);
1791
            assert_eq!(
1792
                BidiInfoU16::new(text, t.1),
1793
                BidiInfoU16 {
1794
                    text,
1795
                    levels: t.5.clone(),
1796
                    original_classes: t.6.clone(),
1797
                    paragraphs: t.7.clone(),
1798
                }
1799
            );
1800
            if t.7.len() == 1 {
1801
                assert_eq!(
1802
                    ParagraphBidiInfoU16::new(text, t.1),
1803
                    ParagraphBidiInfoU16 {
1804
                        text: text,
1805
                        original_classes: t.6.clone(),
1806
                        levels: t.5.clone(),
1807
                        paragraph_level: t.7[0].level,
1808
                        is_pure_ltr: !level::has_rtl(&t.5),
1809
                    }
1810
                )
1811
            }
1812
        }
1813
    }
1814
1815
    #[test]
1816
    #[cfg(feature = "hardcoded-data")]
1817
    fn test_paragraph_bidi_info() {
1818
        // Passing text that includes a paragraph break to the ParagraphBidiInfo API:
1819
        // this is a misuse of the API by the client, but our behavior is safe &
1820
        // consistent. The embedded paragraph break acts like a separator (tab) would.
1821
        let tests = vec![
1822
            (
1823
                "a א.\nג",
1824
                None,
1825
                // utf-8 results:
1826
                vec![L, WS, R, R, CS, B, R, R],
1827
                Level::vec(&[0, 0, 1, 1, 1, 1, 1, 1]),
1828
                // utf-16 results:
1829
                vec![L, WS, R, CS, B, R],
1830
                Level::vec(&[0, 0, 1, 1, 1, 1]),
1831
                // paragraph level; is_pure_ltr
1832
                LTR_LEVEL,
1833
                false,
1834
            ),
1835
            (
1836
                "\u{5d1} a.\nb.",
1837
                None,
1838
                // utf-8 results:
1839
                vec![R, R, WS, L, CS, B, L, CS],
1840
                Level::vec(&[1, 1, 1, 2, 2, 2, 2, 1]),
1841
                // utf-16 results:
1842
                vec![R, WS, L, CS, B, L, CS],
1843
                Level::vec(&[1, 1, 2, 2, 2, 2, 1]),
1844
                // paragraph level; is_pure_ltr
1845
                RTL_LEVEL,
1846
                false,
1847
            ),
1848
            (
1849
                "a א.\tג",
1850
                None,
1851
                // utf-8 results:
1852
                vec![L, WS, R, R, CS, S, R, R],
1853
                Level::vec(&[0, 0, 1, 1, 1, 1, 1, 1]),
1854
                // utf-16 results:
1855
                vec![L, WS, R, CS, S, R],
1856
                Level::vec(&[0, 0, 1, 1, 1, 1]),
1857
                // paragraph level; is_pure_ltr
1858
                LTR_LEVEL,
1859
                false,
1860
            ),
1861
            (
1862
                "\u{5d1} a.\tb.",
1863
                None,
1864
                // utf-8 results:
1865
                vec![R, R, WS, L, CS, S, L, CS],
1866
                Level::vec(&[1, 1, 1, 2, 2, 2, 2, 1]),
1867
                // utf-16 results:
1868
                vec![R, WS, L, CS, S, L, CS],
1869
                Level::vec(&[1, 1, 2, 2, 2, 2, 1]),
1870
                // paragraph level; is_pure_ltr
1871
                RTL_LEVEL,
1872
                false,
1873
            ),
1874
        ];
1875
1876
        for t in tests {
1877
            assert_eq!(
1878
                ParagraphBidiInfo::new(t.0, t.1),
1879
                ParagraphBidiInfo {
1880
                    text: t.0,
1881
                    original_classes: t.2,
1882
                    levels: t.3,
1883
                    paragraph_level: t.6,
1884
                    is_pure_ltr: t.7,
1885
                }
1886
            );
1887
            let text = &to_utf16(t.0);
1888
            assert_eq!(
1889
                ParagraphBidiInfoU16::new(text, t.1),
1890
                ParagraphBidiInfoU16 {
1891
                    text: text,
1892
                    original_classes: t.4,
1893
                    levels: t.5,
1894
                    paragraph_level: t.6,
1895
                    is_pure_ltr: t.7,
1896
                }
1897
            );
1898
        }
1899
    }
1900
1901
    #[test]
1902
    #[cfg(feature = "hardcoded-data")]
1903
    fn test_bidi_info_has_rtl() {
1904
        let tests = vec![
1905
            // ASCII only
1906
            ("123", None, false),
1907
            ("123", Some(LTR_LEVEL), false),
1908
            ("123", Some(RTL_LEVEL), false),
1909
            ("abc", None, false),
1910
            ("abc", Some(LTR_LEVEL), false),
1911
            ("abc", Some(RTL_LEVEL), false),
1912
            ("abc 123", None, false),
1913
            ("abc\n123", None, false),
1914
            // With Hebrew
1915
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1916
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", Some(LTR_LEVEL), true),
1917
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", Some(RTL_LEVEL), true),
1918
            ("abc \u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1919
            ("abc\n\u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1920
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2} abc", None, true),
1921
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}\nabc", None, true),
1922
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2} 123", None, true),
1923
            ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n123", None, true),
1924
        ];
1925
1926
        for t in tests {
1927
            assert_eq!(BidiInfo::new(t.0, t.1).has_rtl(), t.2);
1928
            assert_eq!(BidiInfoU16::new(&to_utf16(t.0), t.1).has_rtl(), t.2);
1929
        }
1930
    }
1931
1932
    #[cfg(feature = "hardcoded-data")]
1933
    fn reorder_paras(text: &str) -> Vec<Cow<'_, str>> {
1934
        let bidi_info = BidiInfo::new(text, None);
1935
        bidi_info
1936
            .paragraphs
1937
            .iter()
1938
            .map(|para| bidi_info.reorder_line(para, para.range.clone()))
1939
            .collect()
1940
    }
1941
1942
    #[cfg(feature = "hardcoded-data")]
1943
    fn reorder_paras_u16(text: &[u16]) -> Vec<Cow<'_, [u16]>> {
1944
        let bidi_info = BidiInfoU16::new(text, None);
1945
        bidi_info
1946
            .paragraphs
1947
            .iter()
1948
            .map(|para| bidi_info.reorder_line(para, para.range.clone()))
1949
            .collect()
1950
    }
1951
1952
    #[test]
1953
    #[cfg(feature = "hardcoded-data")]
1954
    fn test_reorder_line() {
1955
        let tests = vec![
1956
            // Bidi_Class: L L L B L L L B L L L
1957
            ("abc\ndef\nghi", vec!["abc\n", "def\n", "ghi"]),
1958
            // Bidi_Class: L L EN B L L EN B L L EN
1959
            ("ab1\nde2\ngh3", vec!["ab1\n", "de2\n", "gh3"]),
1960
            // Bidi_Class: L L L B AL AL AL
1961
            ("abc\nابج", vec!["abc\n", "جبا"]),
1962
            // Bidi_Class: AL AL AL B L L L
1963
            (
1964
                "\u{0627}\u{0628}\u{062C}\nabc",
1965
                vec!["\n\u{062C}\u{0628}\u{0627}", "abc"],
1966
            ),
1967
            ("1.-2", vec!["1.-2"]),
1968
            ("1-.2", vec!["1-.2"]),
1969
            ("abc אבג", vec!["abc גבא"]),
1970
            // Numbers being weak LTR characters, cannot reorder strong RTL
1971
            ("123 \u{05D0}\u{05D1}\u{05D2}", vec!["גבא 123"]),
1972
            ("abc\u{202A}def", vec!["abc\u{202A}def"]),
1973
            (
1974
                "abc\u{202A}def\u{202C}ghi",
1975
                vec!["abc\u{202A}def\u{202C}ghi"],
1976
            ),
1977
            (
1978
                "abc\u{2066}def\u{2069}ghi",
1979
                vec!["abc\u{2066}def\u{2069}ghi"],
1980
            ),
1981
            // Testing for RLE Character
1982
            ("\u{202B}abc אבג\u{202C}", vec!["\u{202b}גבא abc\u{202c}"]),
1983
            // Testing neutral characters
1984
            ("\u{05D0}בג? אבג", vec!["גבא ?גבא"]),
1985
            // Testing neutral characters with special case
1986
            ("A אבג?", vec!["A גבא?"]),
1987
            // Testing neutral characters with Implicit RTL Marker
1988
            ("A אבג?\u{200F}", vec!["A \u{200F}?גבא"]),
1989
            ("\u{05D0}בג abc", vec!["abc גבא"]),
1990
            ("abc\u{2067}.-\u{2069}ghi", vec!["abc\u{2067}-.\u{2069}ghi"]),
1991
            (
1992
                "Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!",
1993
                vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"],
1994
            ),
1995
            // With mirrorable characters in RTL run
1996
            ("\u{05D0}(ב)ג.", vec![".ג)ב(א"]),
1997
            // With mirrorable characters on level boundary
1998
            ("\u{05D0}ב(גד[&ef].)gh", vec!["gh).]ef&[דג(בא"]),
1999
        ];
2000
2001
        for t in tests {
2002
            assert_eq!(reorder_paras(t.0), t.1);
2003
            let expect_utf16 = t.1.iter().map(|v| to_utf16(v)).collect::<Vec<_>>();
2004
            assert_eq!(reorder_paras_u16(&to_utf16(t.0)), expect_utf16);
2005
        }
2006
    }
2007
2008
    fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> {
2009
        let bidi_info = BidiInfo::new(text, None);
2010
        bidi_info
2011
            .paragraphs
2012
            .iter()
2013
            .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
2014
            .collect()
2015
    }
2016
2017
    fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> {
2018
        let bidi_info = BidiInfo::new(text, None);
2019
        bidi_info
2020
            .paragraphs
2021
            .iter()
2022
            .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2023
            .collect()
2024
    }
2025
2026
    fn reordered_levels_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2027
        let bidi_info = BidiInfoU16::new(text, None);
2028
        bidi_info
2029
            .paragraphs
2030
            .iter()
2031
            .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
2032
            .collect()
2033
    }
2034
2035
    fn reordered_levels_per_char_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2036
        let bidi_info = BidiInfoU16::new(text, None);
2037
        bidi_info
2038
            .paragraphs
2039
            .iter()
2040
            .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2041
            .collect()
2042
    }
2043
2044
    #[test]
2045
    #[cfg(feature = "hardcoded-data")]
2046
    // See issue #138
2047
    fn test_reordered_levels_range() {
2048
        //                |---------------|
2049
        let s = "\u{202a}A\u{202c}\u{202a}A\u{202c}";
2050
        let range = 4..11;
2051
        assert!(s.get(range.clone()).is_some());
2052
2053
        let bidi = BidiInfo::new(s, None);
2054
        let (_, runs) = bidi.visual_runs(&bidi.paragraphs[0], range);
2055
2056
        for run in runs {
2057
            let _ = &s[run]; // should be valid slice of s
2058
        }
2059
    }
2060
2061
    #[test]
2062
    #[cfg(feature = "hardcoded-data")]
2063
    fn test_reordered_levels() {
2064
        let tests = vec![
2065
            // BidiTest:946 (LRI PDI)
2066
            (
2067
                "\u{2067}\u{2069}",
2068
                vec![Level::vec(&[0, 0, 0, 0, 0, 0])],
2069
                vec![Level::vec(&[0, 0])],
2070
                vec![Level::vec(&[0, 0])],
2071
            ),
2072
            // BidiTest:69635 (AL ET EN)
2073
            (
2074
                "\u{060B}\u{20CF}\u{06F9}",
2075
                vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])],
2076
                vec![Level::vec(&[1, 1, 2])],
2077
                vec![Level::vec(&[1, 1, 2])],
2078
            ),
2079
        ];
2080
2081
        for t in tests {
2082
            assert_eq!(reordered_levels_for_paras(t.0), t.1);
2083
            assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2084
            let text = &to_utf16(t.0);
2085
            assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2086
            assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2087
        }
2088
2089
        let tests = vec![
2090
            // BidiTest:291284 (AN RLI PDF R)
2091
            (
2092
                "\u{0605}\u{2067}\u{202C}\u{0590}",
2093
                vec![&["2", "2", "0", "0", "0", "x", "x", "x", "1", "1"]],
2094
                vec![&["2", "0", "x", "1"]],
2095
                vec![&["2", "0", "x", "1"]],
2096
            ),
2097
        ];
2098
2099
        for t in tests {
2100
            assert_eq!(reordered_levels_for_paras(t.0), t.1);
2101
            assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2102
            let text = &to_utf16(t.0);
2103
            assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2104
            assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2105
        }
2106
2107
        let text = "aa טֶ";
2108
        let bidi_info = BidiInfo::new(text, None);
2109
        assert_eq!(
2110
            bidi_info.reordered_levels(&bidi_info.paragraphs[0], 3..7),
2111
            Level::vec(&[0, 0, 0, 1, 1, 1, 1]),
2112
        );
2113
2114
        let text = &to_utf16(text);
2115
        let bidi_info = BidiInfoU16::new(text, None);
2116
        assert_eq!(
2117
            bidi_info.reordered_levels(&bidi_info.paragraphs[0], 1..4),
2118
            Level::vec(&[0, 0, 0, 1, 1]),
2119
        );
2120
    }
2121
2122
    #[test]
2123
    fn test_paragraph_info_len() {
2124
        let text = "hello world";
2125
        let bidi_info = BidiInfo::new(text, None);
2126
        assert_eq!(bidi_info.paragraphs.len(), 1);
2127
        assert_eq!(bidi_info.paragraphs[0].len(), text.len());
2128
2129
        let text2 = "How are you";
2130
        let whole_text = format!("{}\n{}", text, text2);
2131
        let bidi_info = BidiInfo::new(&whole_text, None);
2132
        assert_eq!(bidi_info.paragraphs.len(), 2);
2133
2134
        // The first paragraph include the paragraph separator.
2135
        // TODO: investigate if the paragraph separator character
2136
        // should not be part of any paragraph.
2137
        assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1);
2138
        assert_eq!(bidi_info.paragraphs[1].len(), text2.len());
2139
2140
        let text = &to_utf16(text);
2141
        let bidi_info = BidiInfoU16::new(text, None);
2142
        assert_eq!(bidi_info.paragraphs.len(), 1);
2143
        assert_eq!(bidi_info.paragraphs[0].len(), text.len());
2144
2145
        let text2 = &to_utf16(text2);
2146
        let whole_text = &to_utf16(&whole_text);
2147
        let bidi_info = BidiInfoU16::new(&whole_text, None);
2148
        assert_eq!(bidi_info.paragraphs.len(), 2);
2149
2150
        assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1);
2151
        assert_eq!(bidi_info.paragraphs[1].len(), text2.len());
2152
    }
2153
2154
    #[test]
2155
    fn test_direction() {
2156
        let ltr_text = "hello world";
2157
        let rtl_text = "أهلا بكم";
2158
        let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2159
        let bidi_info = BidiInfo::new(&all_paragraphs, None);
2160
        assert_eq!(bidi_info.paragraphs.len(), 3);
2161
        let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2162
        let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]);
2163
        let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]);
2164
        assert_eq!(p_ltr.direction(), Direction::Ltr);
2165
        assert_eq!(p_rtl.direction(), Direction::Rtl);
2166
        assert_eq!(p_mixed.direction(), Direction::Mixed);
2167
2168
        let all_paragraphs = &to_utf16(&all_paragraphs);
2169
        let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2170
        assert_eq!(bidi_info.paragraphs.len(), 3);
2171
        let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2172
        let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[1]);
2173
        let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[2]);
2174
        assert_eq!(p_ltr.direction(), Direction::Ltr);
2175
        assert_eq!(p_rtl.direction(), Direction::Rtl);
2176
        assert_eq!(p_mixed.direction(), Direction::Mixed);
2177
    }
2178
2179
    #[test]
2180
    fn test_edge_cases_direction() {
2181
        // No paragraphs for empty text.
2182
        let empty = "";
2183
        let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL));
2184
        assert_eq!(bidi_info.paragraphs.len(), 0);
2185
2186
        let empty = &to_utf16(empty);
2187
        let bidi_info = BidiInfoU16::new(empty, Option::from(RTL_LEVEL));
2188
        assert_eq!(bidi_info.paragraphs.len(), 0);
2189
2190
        let tests = vec![
2191
            // The paragraph separator will take the value of the default direction
2192
            // which is left to right.
2193
            ("\n", None, Direction::Ltr),
2194
            // The paragraph separator will take the value of the given initial direction
2195
            // which is left to right.
2196
            ("\n", Option::from(LTR_LEVEL), Direction::Ltr),
2197
            // The paragraph separator will take the value of the given initial direction
2198
            // which is right to left.
2199
            ("\n", Option::from(RTL_LEVEL), Direction::Rtl),
2200
        ];
2201
2202
        for t in tests {
2203
            let bidi_info = BidiInfo::new(t.0, t.1);
2204
            assert_eq!(bidi_info.paragraphs.len(), 1);
2205
            let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2206
            assert_eq!(p.direction(), t.2);
2207
            let text = &to_utf16(t.0);
2208
            let bidi_info = BidiInfoU16::new(text, t.1);
2209
            let p = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2210
            assert_eq!(p.direction(), t.2);
2211
        }
2212
    }
2213
2214
    #[test]
2215
    fn test_level_at() {
2216
        let ltr_text = "hello world";
2217
        let rtl_text = "أهلا بكم";
2218
        let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2219
        let bidi_info = BidiInfo::new(&all_paragraphs, None);
2220
        assert_eq!(bidi_info.paragraphs.len(), 3);
2221
2222
        let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2223
        let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]);
2224
        let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]);
2225
2226
        assert_eq!(p_ltr.level_at(0), LTR_LEVEL);
2227
        assert_eq!(p_rtl.level_at(0), RTL_LEVEL);
2228
        assert_eq!(p_mixed.level_at(0), LTR_LEVEL);
2229
        assert_eq!(p_mixed.info.levels.len(), 54);
2230
        assert_eq!(p_mixed.para.range.start, 28);
2231
        assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2232
2233
        let all_paragraphs = &to_utf16(&all_paragraphs);
2234
        let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2235
        assert_eq!(bidi_info.paragraphs.len(), 3);
2236
2237
        let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2238
        let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[1]);
2239
        let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[2]);
2240
2241
        assert_eq!(p_ltr.level_at(0), LTR_LEVEL);
2242
        assert_eq!(p_rtl.level_at(0), RTL_LEVEL);
2243
        assert_eq!(p_mixed.level_at(0), LTR_LEVEL);
2244
        assert_eq!(p_mixed.info.levels.len(), 40);
2245
        assert_eq!(p_mixed.para.range.start, 21);
2246
        assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2247
    }
2248
2249
    #[test]
2250
    fn test_get_base_direction() {
2251
        let tests = vec![
2252
            ("", Direction::Mixed), // return Mixed if no strong character found
2253
            ("123[]-+\u{2019}\u{2060}\u{00bf}?", Direction::Mixed),
2254
            ("3.14\npi", Direction::Mixed), // only first paragraph is considered
2255
            ("[123 'abc']", Direction::Ltr),
2256
            ("[123 '\u{0628}' abc", Direction::Rtl),
2257
            ("[123 '\u{2066}abc\u{2069}'\u{0628}]", Direction::Rtl), // embedded isolate is ignored
2258
            ("[123 '\u{2066}abc\u{2068}'\u{0628}]", Direction::Mixed),
2259
        ];
2260
2261
        for t in tests {
2262
            assert_eq!(get_base_direction(t.0), t.1);
2263
            let text = &to_utf16(t.0);
2264
            assert_eq!(get_base_direction(text.as_slice()), t.1);
2265
        }
2266
    }
2267
2268
    #[test]
2269
    fn test_get_base_direction_full() {
2270
        let tests = vec![
2271
            ("", Direction::Mixed), // return Mixed if no strong character found
2272
            ("123[]-+\u{2019}\u{2060}\u{00bf}?", Direction::Mixed),
2273
            ("3.14\npi", Direction::Ltr), // direction taken from the second paragraph
2274
            ("3.14\n\u{05D0}", Direction::Rtl), // direction taken from the second paragraph
2275
            ("[123 'abc']", Direction::Ltr),
2276
            ("[123 '\u{0628}' abc", Direction::Rtl),
2277
            ("[123 '\u{2066}abc\u{2069}'\u{0628}]", Direction::Rtl), // embedded isolate is ignored
2278
            ("[123 '\u{2066}abc\u{2068}'\u{0628}]", Direction::Mixed),
2279
            ("[123 '\u{2066}abc\u{2068}'\n\u{0628}]", Direction::Rtl), // \n resets embedding level
2280
        ];
2281
2282
        for t in tests {
2283
            assert_eq!(get_base_direction_full(t.0), t.1);
2284
            let text = &to_utf16(t.0);
2285
            assert_eq!(get_base_direction_full(text.as_slice()), t.1);
2286
        }
2287
    }
2288
}
2289
2290
#[cfg(all(feature = "serde", feature = "hardcoded-data", test))]
2291
mod serde_tests {
2292
    use super::*;
2293
    use serde_test::{assert_tokens, Token};
2294
2295
    #[test]
2296
    fn test_levels() {
2297
        let text = "abc אבג";
2298
        let bidi_info = BidiInfo::new(text, None);
2299
        let levels = bidi_info.levels;
2300
        assert_eq!(text.as_bytes().len(), 10);
2301
        assert_eq!(levels.len(), 10);
2302
        assert_tokens(
2303
            &levels,
2304
            &[
2305
                Token::Seq { len: Some(10) },
2306
                Token::NewtypeStruct { name: "Level" },
2307
                Token::U8(0),
2308
                Token::NewtypeStruct { name: "Level" },
2309
                Token::U8(0),
2310
                Token::NewtypeStruct { name: "Level" },
2311
                Token::U8(0),
2312
                Token::NewtypeStruct { name: "Level" },
2313
                Token::U8(0),
2314
                Token::NewtypeStruct { name: "Level" },
2315
                Token::U8(1),
2316
                Token::NewtypeStruct { name: "Level" },
2317
                Token::U8(1),
2318
                Token::NewtypeStruct { name: "Level" },
2319
                Token::U8(1),
2320
                Token::NewtypeStruct { name: "Level" },
2321
                Token::U8(1),
2322
                Token::NewtypeStruct { name: "Level" },
2323
                Token::U8(1),
2324
                Token::NewtypeStruct { name: "Level" },
2325
                Token::U8(1),
2326
                Token::SeqEnd,
2327
            ],
2328
        );
2329
    }
2330
}