Coverage Report

Created: 2025-07-12 07:16

/rust/registry/src/index.crates.io-6f17d22bba15001f/unicode-bidi-0.3.18/src/explicit.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2015 The Servo Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! 3.3.2 Explicit Levels and Directions
11
//!
12
//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14
#[cfg(feature = "smallvec")]
15
use smallvec::{smallvec, SmallVec};
16
17
use super::char_data::{
18
    is_rtl,
19
    BidiClass::{self, *},
20
};
21
use super::level::Level;
22
use super::prepare::removed_by_x9;
23
use super::LevelRunVec;
24
use super::TextSource;
25
26
/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify
27
/// level runs (BD7) for use when determining Isolating Run Sequences (X10).
28
///
29
/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
30
/// for each char in `text`.
31
///
32
/// `runs` returns the list of level runs (BD7) of the text.
33
#[cfg_attr(feature = "flame_it", flamer::flame)]
34
0
pub fn compute<'a, T: TextSource<'a> + ?Sized>(
35
0
    text: &'a T,
36
0
    para_level: Level,
37
0
    original_classes: &[BidiClass],
38
0
    levels: &mut [Level],
39
0
    processing_classes: &mut [BidiClass],
40
0
    runs: &mut LevelRunVec,
41
0
) {
42
0
    assert_eq!(text.len(), original_classes.len());
43
44
    // <http://www.unicode.org/reports/tr9/#X1>
45
    #[cfg(feature = "smallvec")]
46
    let mut stack: SmallVec<[Status; 8]> = smallvec![Status {
47
        level: para_level,
48
        status: OverrideStatus::Neutral,
49
    }];
50
    #[cfg(not(feature = "smallvec"))]
51
0
    let mut stack = vec![Status {
52
0
        level: para_level,
53
0
        status: OverrideStatus::Neutral,
54
0
    }];
55
0
56
0
    let mut overflow_isolate_count = 0u32;
57
0
    let mut overflow_embedding_count = 0u32;
58
0
    let mut valid_isolate_count = 0u32;
59
0
60
0
    let mut current_run_level = Level::ltr();
61
0
    let mut current_run_start = 0;
62
63
0
    for (i, len) in text.indices_lengths() {
64
0
        let last = stack.last().unwrap();
65
0
66
0
        match original_classes[i] {
67
            // Rules X2-X5c
68
            RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
69
                // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
70
0
                levels[i] = last.level;
71
72
                // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
73
0
                let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
74
0
                if is_isolate {
75
                    // Redundant due to "Retaining explicit formatting characters" step.
76
                    // levels[i] = last.level;
77
0
                    match last.status {
78
0
                        OverrideStatus::RTL => processing_classes[i] = R,
79
0
                        OverrideStatus::LTR => processing_classes[i] = L,
80
0
                        _ => {}
81
                    }
82
0
                }
83
84
0
                let new_level = if is_rtl(original_classes[i]) {
85
0
                    last.level.new_explicit_next_rtl()
86
                } else {
87
0
                    last.level.new_explicit_next_ltr()
88
                };
89
90
0
                if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
91
                {
92
0
                    let new_level = new_level.unwrap();
93
0
94
0
                    stack.push(Status {
95
0
                        level: new_level,
96
0
                        status: match original_classes[i] {
97
0
                            RLO => OverrideStatus::RTL,
98
0
                            LRO => OverrideStatus::LTR,
99
0
                            RLI | LRI | FSI => OverrideStatus::Isolate,
100
0
                            _ => OverrideStatus::Neutral,
101
                        },
102
                    });
103
104
0
                    if is_isolate {
105
0
                        valid_isolate_count += 1;
106
0
                    } else {
107
0
                        // The spec doesn't explicitly mention this step, but it is necessary.
108
0
                        // See the reference implementations for comparison.
109
0
                        levels[i] = new_level;
110
0
                    }
111
0
                } else if is_isolate {
112
0
                    overflow_isolate_count += 1;
113
0
                } else if overflow_isolate_count == 0 {
114
0
                    overflow_embedding_count += 1;
115
0
                }
116
117
0
                if !is_isolate {
118
0
                    // X9 +
119
0
                    // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
120
0
                    // (PDF handled below)
121
0
                    processing_classes[i] = BN;
122
0
                }
123
            }
124
125
            // <http://www.unicode.org/reports/tr9/#X6a>
126
            PDI => {
127
0
                if overflow_isolate_count > 0 {
128
0
                    overflow_isolate_count -= 1;
129
0
                } else if valid_isolate_count > 0 {
130
0
                    overflow_embedding_count = 0;
131
132
0
                    while !matches!(
133
0
                        stack.pop(),
134
                        None | Some(Status {
135
                            status: OverrideStatus::Isolate,
136
                            ..
137
                        })
138
0
                    ) {}
139
140
0
                    valid_isolate_count -= 1;
141
0
                }
142
143
0
                let last = stack.last().unwrap();
144
0
                levels[i] = last.level;
145
0
146
0
                match last.status {
147
0
                    OverrideStatus::RTL => processing_classes[i] = R,
148
0
                    OverrideStatus::LTR => processing_classes[i] = L,
149
0
                    _ => {}
150
                }
151
            }
152
153
            // <http://www.unicode.org/reports/tr9/#X7>
154
            PDF => {
155
0
                if overflow_isolate_count > 0 {
156
0
                    // do nothing
157
0
                } else if overflow_embedding_count > 0 {
158
0
                    overflow_embedding_count -= 1;
159
0
                } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 {
160
0
                    stack.pop();
161
0
                }
162
163
                // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
164
0
                levels[i] = stack.last().unwrap().level;
165
0
                // X9 part of retaining explicit formatting characters.
166
0
                processing_classes[i] = BN;
167
            }
168
169
            // Nothing.
170
            // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
171
0
            B => {}
172
173
            // <http://www.unicode.org/reports/tr9/#X6>
174
            _ => {
175
0
                levels[i] = last.level;
176
0
177
0
                // This condition is not in the spec, but I am pretty sure that is a spec bug.
178
0
                // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
179
0
                if original_classes[i] != BN {
180
0
                    match last.status {
181
0
                        OverrideStatus::RTL => processing_classes[i] = R,
182
0
                        OverrideStatus::LTR => processing_classes[i] = L,
183
0
                        _ => {}
184
                    }
185
0
                }
186
            }
187
        }
188
189
        // Handle multi-byte characters.
190
0
        for j in 1..len {
191
0
            levels[i + j] = levels[i];
192
0
            processing_classes[i + j] = processing_classes[i];
193
0
        }
194
195
        // Identify level runs to be passed to prepare::isolating_run_sequences().
196
0
        if i == 0 {
197
0
            // Initialize for the first (or only) run.
198
0
            current_run_level = levels[i];
199
0
        } else {
200
            // Check if we need to start a new level run.
201
            // <https://www.unicode.org/reports/tr9/#BD7>
202
0
            if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
203
0
                // End the last run and start a new one.
204
0
                runs.push(current_run_start..i);
205
0
                current_run_level = levels[i];
206
0
                current_run_start = i;
207
0
            }
208
        }
209
    }
210
211
    // Append the trailing level run, if non-empty.
212
0
    if levels.len() > current_run_start {
213
0
        runs.push(current_run_start..levels.len());
214
0
    }
215
0
}
Unexecuted instantiation: unicode_bidi::explicit::compute::<str>
Unexecuted instantiation: unicode_bidi::explicit::compute::<_>
216
217
/// Entries in the directional status stack:
218
struct Status {
219
    level: Level,
220
    status: OverrideStatus,
221
}
222
223
#[derive(PartialEq)]
224
enum OverrideStatus {
225
    Neutral,
226
    RTL,
227
    LTR,
228
    Isolate,
229
}