/src/unicode-normalization/src/normalize.rs
Line | Count | Source |
1 | | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | | // file at the top-level directory of this distribution and at |
3 | | // http://rust-lang.org/COPYRIGHT. |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | | // option. This file may not be copied, modified, or distributed |
9 | | // except according to those terms. |
10 | | |
11 | | //! Functions for computing canonical and compatible decompositions for Unicode characters. |
12 | | use crate::lookups::{ |
13 | | canonical_fully_decomposed, cjk_compat_variants_fully_decomposed, |
14 | | compatibility_fully_decomposed, composition_table, |
15 | | }; |
16 | | |
17 | | use core::char; |
18 | | |
19 | | /// Compute canonical Unicode decomposition for character. |
20 | | /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) |
21 | | /// for more information. |
22 | | #[inline] |
23 | 195M | pub fn decompose_canonical<F>(c: char, emit_char: F) |
24 | 195M | where |
25 | 195M | F: FnMut(char), |
26 | | { |
27 | 195M | decompose(c, canonical_fully_decomposed, emit_char) |
28 | 195M | } unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 23 | 99.6M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 99.6M | where | 25 | 99.6M | F: FnMut(char), | 26 | | { | 27 | 99.6M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 99.6M | } |
unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 23 | 69.2M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 69.2M | where | 25 | 69.2M | F: FnMut(char), | 26 | | { | 27 | 69.2M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 69.2M | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_canonical::<_> unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 23 | 11.3M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 11.3M | where | 25 | 11.3M | F: FnMut(char), | 26 | | { | 27 | 11.3M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 11.3M | } |
unicode_normalization::normalize::decompose_canonical::<process::rust_fuzzer_test_input::{closure#0}>Line | Count | Source | 23 | 75 | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 75 | where | 25 | 75 | F: FnMut(char), | 26 | | { | 27 | 75 | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 75 | } |
unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 23 | 15.7M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 15.7M | where | 25 | 15.7M | F: FnMut(char), | 26 | | { | 27 | 15.7M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 15.7M | } |
|
29 | | |
30 | | /// Compute canonical or compatible Unicode decomposition for character. |
31 | | /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) |
32 | | /// for more information. |
33 | | #[inline] |
34 | 98.7M | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { |
35 | 98.7M | let decompose_char = |
36 | 71.7M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}Line | Count | Source | 36 | 60.0M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<_>::{closure#0}Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}unicode_normalization::normalize::decompose_compatible::<process::rust_fuzzer_test_input::{closure#1}>::{closure#0}Line | Count | Source | 36 | 48 | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}Line | Count | Source | 36 | 11.7M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0}unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0}Line | Count | Source | 36 | 58.1M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<_>::{closure#0}::{closure#0}Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0}unicode_normalization::normalize::decompose_compatible::<process::rust_fuzzer_test_input::{closure#1}>::{closure#0}::{closure#0}Line | Count | Source | 36 | 45 | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0}Line | Count | Source | 36 | 10.1M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
|
37 | 98.7M | decompose(c, decompose_char, emit_char) |
38 | 98.7M | } Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>Line | Count | Source | 34 | 83.5M | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { | 35 | 83.5M | let decompose_char = | 36 | | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); | 37 | 83.5M | decompose(c, decompose_char, emit_char) | 38 | 83.5M | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<_> Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>unicode_normalization::normalize::decompose_compatible::<process::rust_fuzzer_test_input::{closure#1}>Line | Count | Source | 34 | 54 | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { | 35 | 54 | let decompose_char = | 36 | | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); | 37 | 54 | decompose(c, decompose_char, emit_char) | 38 | 54 | } |
unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>Line | Count | Source | 34 | 15.1M | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { | 35 | 15.1M | let decompose_char = | 36 | | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); | 37 | 15.1M | decompose(c, decompose_char, emit_char) | 38 | 15.1M | } |
|
39 | | |
40 | | /// Compute standard-variation decomposition for character. |
41 | | /// |
42 | | /// [Standardized Variation Sequences] are used instead of the standard canonical |
43 | | /// decompositions, notably for CJK codepoints with singleton canonical decompositions, |
44 | | /// to avoid losing information. See the [Unicode Variation Sequence FAQ] and the |
45 | | /// "Other Enhancements" section of the [Unicode 6.3 Release Summary] for more information. |
46 | | /// |
47 | | /// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence |
48 | | /// [Unicode Variation Sequence FAQ]: http://unicode.org/faq/vs.html |
49 | | /// [Unicode 6.3 Release Summary]: https://www.unicode.org/versions/Unicode6.3.0/#Summary |
50 | | #[inline] |
51 | 0 | pub fn decompose_cjk_compat_variants<F>(c: char, mut emit_char: F) |
52 | 0 | where |
53 | 0 | F: FnMut(char), |
54 | | { |
55 | | // 7-bit ASCII never decomposes |
56 | 0 | if c <= '\x7f' { |
57 | 0 | emit_char(c); |
58 | 0 | return; |
59 | 0 | } |
60 | | |
61 | | // Don't perform decomposition for Hangul |
62 | | |
63 | 0 | if let Some(decomposed) = cjk_compat_variants_fully_decomposed(c) { |
64 | 0 | for &d in decomposed { |
65 | 0 | emit_char(d); |
66 | 0 | } |
67 | 0 | return; |
68 | 0 | } |
69 | | |
70 | | // Finally bottom out. |
71 | 0 | emit_char(c); |
72 | 0 | } |
73 | | |
74 | | #[inline] |
75 | | #[allow(unsafe_code)] |
76 | 294M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) |
77 | 294M | where |
78 | 294M | D: Fn(char) -> Option<&'static [char]>, |
79 | 294M | F: FnMut(char), |
80 | | { |
81 | | // 7-bit ASCII never decomposes |
82 | 294M | if c <= '\x7f' { |
83 | 84.3M | emit_char(c); |
84 | 84.3M | return; |
85 | 210M | } |
86 | | |
87 | | // Perform decomposition for Hangul |
88 | 210M | if is_hangul_syllable(c) { |
89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above |
90 | 246k | unsafe { |
91 | 246k | decompose_hangul(c, emit_char); |
92 | 246k | } |
93 | 246k | return; |
94 | 210M | } |
95 | | |
96 | 210M | if let Some(decomposed) = decompose_char(c) { |
97 | 350M | for &d in decomposed { |
98 | 253M | emit_char(d); |
99 | 253M | } |
100 | 97.2M | return; |
101 | 112M | } |
102 | | |
103 | | // Finally bottom out. |
104 | 112M | emit_char(c); |
105 | 294M | } Unexecuted instantiation: unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>Line | Count | Source | 76 | 83.5M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 83.5M | where | 78 | 83.5M | D: Fn(char) -> Option<&'static [char]>, | 79 | 83.5M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 83.5M | if c <= '\x7f' { | 83 | 23.3M | emit_char(c); | 84 | 23.3M | return; | 85 | 60.2M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 60.2M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 203k | unsafe { | 91 | 203k | decompose_hangul(c, emit_char); | 92 | 203k | } | 93 | 203k | return; | 94 | 60.0M | } | 95 | | | 96 | 60.0M | if let Some(decomposed) = decompose_char(c) { | 97 | 105M | for &d in decomposed { | 98 | 80.0M | emit_char(d); | 99 | 80.0M | } | 100 | 25.3M | return; | 101 | 34.7M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 34.7M | emit_char(c); | 105 | 83.5M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 76 | 99.6M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 99.6M | where | 78 | 99.6M | D: Fn(char) -> Option<&'static [char]>, | 79 | 99.6M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 99.6M | if c <= '\x7f' { | 83 | 24.6M | emit_char(c); | 84 | 24.6M | return; | 85 | 74.9M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 74.9M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 4.23k | unsafe { | 91 | 4.23k | decompose_hangul(c, emit_char); | 92 | 4.23k | } | 93 | 4.23k | return; | 94 | 74.9M | } | 95 | | | 96 | 74.9M | if let Some(decomposed) = decompose_char(c) { | 97 | 74.1M | for &d in decomposed { | 98 | 50.2M | emit_char(d); | 99 | 50.2M | } | 100 | 23.9M | return; | 101 | 51.0M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 51.0M | emit_char(c); | 105 | 99.6M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 76 | 69.2M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 69.2M | where | 78 | 69.2M | D: Fn(char) -> Option<&'static [char]>, | 79 | 69.2M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 69.2M | if c <= '\x7f' { | 83 | 20.4M | emit_char(c); | 84 | 20.4M | return; | 85 | 48.8M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 48.8M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 3.80k | unsafe { | 91 | 3.80k | decompose_hangul(c, emit_char); | 92 | 3.80k | } | 93 | 3.80k | return; | 94 | 48.8M | } | 95 | | | 96 | 48.8M | if let Some(decomposed) = decompose_char(c) { | 97 | 72.3M | for &d in decomposed { | 98 | 48.8M | emit_char(d); | 99 | 48.8M | } | 100 | 23.5M | return; | 101 | 25.3M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 25.3M | emit_char(c); | 105 | 69.2M | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose::<_, _> Unexecuted instantiation: unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 76 | 11.3M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 11.3M | where | 78 | 11.3M | D: Fn(char) -> Option<&'static [char]>, | 79 | 11.3M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 11.3M | if c <= '\x7f' { | 83 | 7.47M | emit_char(c); | 84 | 7.47M | return; | 85 | 3.86M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 3.86M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 1.18k | unsafe { | 91 | 1.18k | decompose_hangul(c, emit_char); | 92 | 1.18k | } | 93 | 1.18k | return; | 94 | 3.85M | } | 95 | | | 96 | 3.85M | if let Some(decomposed) = decompose_char(c) { | 97 | 10.5M | for &d in decomposed { | 98 | 7.04M | emit_char(d); | 99 | 7.04M | } | 100 | 3.51M | return; | 101 | 342k | } | 102 | | | 103 | | // Finally bottom out. | 104 | 342k | emit_char(c); | 105 | 11.3M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<process::rust_fuzzer_test_input::{closure#1}>::{closure#0}, process::rust_fuzzer_test_input::{closure#1}>Line | Count | Source | 76 | 54 | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 54 | where | 78 | 54 | D: Fn(char) -> Option<&'static [char]>, | 79 | 54 | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 54 | if c <= '\x7f' { | 83 | 3 | emit_char(c); | 84 | 3 | return; | 85 | 51 | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 51 | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 3 | unsafe { | 91 | 3 | decompose_hangul(c, emit_char); | 92 | 3 | } | 93 | 3 | return; | 94 | 48 | } | 95 | | | 96 | 48 | if let Some(decomposed) = decompose_char(c) { | 97 | 8 | for &d in decomposed { | 98 | 5 | emit_char(d); | 99 | 5 | } | 100 | 3 | return; | 101 | 45 | } | 102 | | | 103 | | // Finally bottom out. | 104 | 45 | emit_char(c); | 105 | 54 | } |
unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>Line | Count | Source | 76 | 15.1M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 15.1M | where | 78 | 15.1M | D: Fn(char) -> Option<&'static [char]>, | 79 | 15.1M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 15.1M | if c <= '\x7f' { | 83 | 3.42M | emit_char(c); | 84 | 3.42M | return; | 85 | 11.7M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 11.7M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 20.8k | unsafe { | 91 | 20.8k | decompose_hangul(c, emit_char); | 92 | 20.8k | } | 93 | 20.8k | return; | 94 | 11.7M | } | 95 | | | 96 | 11.7M | if let Some(decomposed) = decompose_char(c) { | 97 | 57.2M | for &d in decomposed { | 98 | 46.5M | emit_char(d); | 99 | 46.5M | } | 100 | 10.6M | return; | 101 | 1.12M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 1.12M | emit_char(c); | 105 | 15.1M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, process::rust_fuzzer_test_input::{closure#0}>Line | Count | Source | 76 | 75 | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 75 | where | 78 | 75 | D: Fn(char) -> Option<&'static [char]>, | 79 | 75 | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 75 | if c <= '\x7f' { | 83 | 9 | emit_char(c); | 84 | 9 | return; | 85 | 66 | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 66 | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 13 | unsafe { | 91 | 13 | decompose_hangul(c, emit_char); | 92 | 13 | } | 93 | 13 | return; | 94 | 53 | } | 95 | | | 96 | 53 | if let Some(decomposed) = decompose_char(c) { | 97 | 0 | for &d in decomposed { | 98 | 0 | emit_char(d); | 99 | 0 | } | 100 | 0 | return; | 101 | 53 | } | 102 | | | 103 | | // Finally bottom out. | 104 | 53 | emit_char(c); | 105 | 75 | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 76 | 15.7M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 15.7M | where | 78 | 15.7M | D: Fn(char) -> Option<&'static [char]>, | 79 | 15.7M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 15.7M | if c <= '\x7f' { | 83 | 4.96M | emit_char(c); | 84 | 4.96M | return; | 85 | 10.7M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 10.7M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 13.2k | unsafe { | 91 | 13.2k | decompose_hangul(c, emit_char); | 92 | 13.2k | } | 93 | 13.2k | return; | 94 | 10.7M | } | 95 | | | 96 | 10.7M | if let Some(decomposed) = decompose_char(c) { | 97 | 31.0M | for &d in decomposed { | 98 | 20.6M | emit_char(d); | 99 | 20.6M | } | 100 | 10.3M | return; | 101 | 389k | } | 102 | | | 103 | | // Finally bottom out. | 104 | 389k | emit_char(c); | 105 | 15.7M | } |
|
106 | | |
107 | | /// Compose two characters into a single character, if possible. |
108 | | /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) |
109 | | /// for more information. |
110 | 113M | pub fn compose(a: char, b: char) -> Option<char> { |
111 | 113M | compose_hangul(a, b).or_else(|| composition_table(a, b)) |
112 | 113M | } |
113 | | |
114 | | // Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior |
115 | | // http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior |
116 | | const S_BASE: u32 = 0xAC00; |
117 | | const L_BASE: u32 = 0x1100; |
118 | | const V_BASE: u32 = 0x1161; |
119 | | const T_BASE: u32 = 0x11A7; |
120 | | const L_COUNT: u32 = 19; |
121 | | const V_COUNT: u32 = 21; |
122 | | const T_COUNT: u32 = 28; |
123 | | const N_COUNT: u32 = V_COUNT * T_COUNT; |
124 | | const S_COUNT: u32 = L_COUNT * N_COUNT; |
125 | | |
126 | | const S_LAST: u32 = S_BASE + S_COUNT - 1; |
127 | | const L_LAST: u32 = L_BASE + L_COUNT - 1; |
128 | | const V_LAST: u32 = V_BASE + V_COUNT - 1; |
129 | | const T_LAST: u32 = T_BASE + T_COUNT - 1; |
130 | | |
131 | | // Composition only occurs for `TPart`s in `U+11A8 ..= U+11C2`, |
132 | | // i.e. `T_BASE + 1 ..= T_LAST`. |
133 | | const T_FIRST: u32 = T_BASE + 1; |
134 | | |
135 | | // Safety-usable invariant: This ensures that c is a valid Hangul Syllable character (U+AC00..U+D7AF) |
136 | 543M | pub(crate) fn is_hangul_syllable(c: char) -> bool { |
137 | | // Safety: This checks the range 0xAC00 (S_BASE) to 0xD7A4 (S_BASE + S_COUNT), upholding the safety-usable invariant |
138 | 543M | (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) |
139 | 543M | } |
140 | | |
141 | | // Decompose a precomposed Hangul syllable |
142 | | // Safety: `s` MUST be a valid Hangul Syllable character, between U+AC00..U+D7AF |
143 | | #[allow(unsafe_code, unused_unsafe)] |
144 | | #[inline(always)] |
145 | 246k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) |
146 | 246k | where |
147 | 246k | F: FnMut(char), |
148 | | { |
149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block |
150 | 246k | let s_index = s as u32 - S_BASE; |
151 | | // This will be at most 0x2baf / (21 * 28), 19 |
152 | 246k | let l_index = s_index / N_COUNT; |
153 | | unsafe { |
154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) |
155 | 246k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); |
156 | | |
157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 |
158 | 246k | let v_index = (s_index % N_COUNT) / T_COUNT; |
159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) |
160 | 246k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); |
161 | | |
162 | | // Safety: This will be at most T_COUNT - 1 (27) |
163 | 246k | let t_index = s_index % T_COUNT; |
164 | 246k | if t_index > 0 { |
165 | 143k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) |
166 | 143k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); |
167 | 143k | } |
168 | | } |
169 | 246k | } unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 145 | 4.23k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 4.23k | where | 147 | 4.23k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 4.23k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 4.23k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 4.23k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 4.23k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 4.23k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 4.23k | let t_index = s_index % T_COUNT; | 164 | 4.23k | if t_index > 0 { | 165 | 3.56k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 3.56k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 3.56k | } | 168 | | } | 169 | 4.23k | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 145 | 3.80k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 3.80k | where | 147 | 3.80k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 3.80k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 3.80k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 3.80k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 3.80k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 3.80k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 3.80k | let t_index = s_index % T_COUNT; | 164 | 3.80k | if t_index > 0 { | 165 | 3.26k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 3.26k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 3.26k | } | 168 | | } | 169 | 3.80k | } |
unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>Line | Count | Source | 145 | 203k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 203k | where | 147 | 203k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 203k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 203k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 203k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 203k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 203k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 203k | let t_index = s_index % T_COUNT; | 164 | 203k | if t_index > 0 { | 165 | 102k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 102k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 102k | } | 168 | | } | 169 | 203k | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_hangul::<_> unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 145 | 1.18k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 1.18k | where | 147 | 1.18k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 1.18k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 1.18k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 1.18k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 1.18k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 1.18k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 1.18k | let t_index = s_index % T_COUNT; | 164 | 1.18k | if t_index > 0 { | 165 | 857 | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 857 | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 857 | } | 168 | | } | 169 | 1.18k | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>unicode_normalization::normalize::decompose_hangul::<process::rust_fuzzer_test_input::{closure#0}>Line | Count | Source | 145 | 13 | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 13 | where | 147 | 13 | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 13 | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 13 | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 13 | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 13 | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 13 | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 13 | let t_index = s_index % T_COUNT; | 164 | 13 | if t_index > 0 { | 165 | 13 | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 13 | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 13 | } | 168 | | } | 169 | 13 | } |
unicode_normalization::normalize::decompose_hangul::<process::rust_fuzzer_test_input::{closure#1}>Line | Count | Source | 145 | 3 | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 3 | where | 147 | 3 | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 3 | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 3 | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 3 | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 3 | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 3 | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 3 | let t_index = s_index % T_COUNT; | 164 | 3 | if t_index > 0 { | 165 | 2 | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 2 | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 2 | } | 168 | | } | 169 | 3 | } |
unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}>Line | Count | Source | 145 | 13.2k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 13.2k | where | 147 | 13.2k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 13.2k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 13.2k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 13.2k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 13.2k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 13.2k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 13.2k | let t_index = s_index % T_COUNT; | 164 | 13.2k | if t_index > 0 { | 165 | 13.0k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 13.0k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 13.0k | } | 168 | | } | 169 | 13.2k | } |
unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>Line | Count | Source | 145 | 20.8k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 20.8k | where | 147 | 20.8k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 20.8k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 20.8k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 20.8k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 20.8k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 20.8k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 20.8k | let t_index = s_index % T_COUNT; | 164 | 20.8k | if t_index > 0 { | 165 | 20.3k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 20.3k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 20.3k | } | 168 | | } | 169 | 20.8k | } |
|
170 | | |
171 | | #[inline] |
172 | 16.2k | pub(crate) fn hangul_decomposition_length(s: char) -> usize { |
173 | 16.2k | let si = s as u32 - S_BASE; |
174 | 16.2k | let ti = si % T_COUNT; |
175 | 16.2k | if ti > 0 { |
176 | 14.0k | 3 |
177 | | } else { |
178 | 2.23k | 2 |
179 | | } |
180 | 16.2k | } unicode_normalization::normalize::hangul_decomposition_length Line | Count | Source | 172 | 9.05k | pub(crate) fn hangul_decomposition_length(s: char) -> usize { | 173 | 9.05k | let si = s as u32 - S_BASE; | 174 | 9.05k | let ti = si % T_COUNT; | 175 | 9.05k | if ti > 0 { | 176 | 8.16k | 3 | 177 | | } else { | 178 | 898 | 2 | 179 | | } | 180 | 9.05k | } |
unicode_normalization::normalize::hangul_decomposition_length Line | Count | Source | 172 | 7.20k | pub(crate) fn hangul_decomposition_length(s: char) -> usize { | 173 | 7.20k | let si = s as u32 - S_BASE; | 174 | 7.20k | let ti = si % T_COUNT; | 175 | 7.20k | if ti > 0 { | 176 | 5.87k | 3 | 177 | | } else { | 178 | 1.33k | 2 | 179 | | } | 180 | 7.20k | } |
|
181 | | |
182 | | // Compose a pair of Hangul Jamo |
183 | | #[allow(unsafe_code)] |
184 | | #[inline(always)] |
185 | | #[allow(ellipsis_inclusive_range_patterns)] |
186 | 113M | fn compose_hangul(a: char, b: char) -> Option<char> { |
187 | 113M | let (a, b) = (a as u32, b as u32); |
188 | 113M | match (a, b) { |
189 | | // Compose a leading consonant and a vowel together into an LV_Syllable |
190 | 5.90M | (L_BASE..=L_LAST, V_BASE..=V_LAST) => { |
191 | | // Safety: based on the above bounds, l_index will be less than or equal to L_COUNT (19) |
192 | | // and v_index will be <= V_COUNT (21) |
193 | 415k | let l_index = a - L_BASE; |
194 | 415k | let v_index = b - V_BASE; |
195 | | // Safety: This will be <= 19 * (20 * 21) + (21 * 20), which is 8400. |
196 | 415k | let lv_index = l_index * N_COUNT + v_index * T_COUNT; |
197 | | // Safety: This is between 0xAC00 and 0xCCD0, which are in range for Hangul Syllables (U+AC00..U+D7AF) and also in range |
198 | | // for BMP unicode |
199 | 415k | let s = S_BASE + lv_index; |
200 | | // Safety: We've verified this is in-range |
201 | 415k | Some(unsafe { char::from_u32_unchecked(s) }) |
202 | | } |
203 | | // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable |
204 | 4.89M | (S_BASE..=S_LAST, T_FIRST..=T_LAST) if (a - S_BASE) % T_COUNT == 0 => { |
205 | | // Safety: a is between 0xAC00 and (0xAC00 + 19 * 21 * 28). b - T_BASE is between 0 and 19. |
206 | | // Adding a number 0 to 19 to a number that is at largest 0xD7A4 will not go out of bounds to 0xD800 (where the |
207 | | // surrogates start), so this is safe. |
208 | 210k | Some(unsafe { char::from_u32_unchecked(a + (b - T_BASE)) }) |
209 | | } |
210 | 112M | _ => None, |
211 | | } |
212 | 113M | } |
213 | | |
214 | | #[cfg(test)] |
215 | | mod tests { |
216 | | use super::compose_hangul; |
217 | | |
218 | | // Regression test from a bugfix where we were composing an LV_Syllable with |
219 | | // T_BASE directly. (We should only compose an LV_Syllable with a character |
220 | | // in the range `T_BASE + 1 ..= T_LAST`.) |
221 | | #[test] |
222 | | fn test_hangul_composition() { |
223 | | assert_eq!(compose_hangul('\u{c8e0}', '\u{11a7}'), None); |
224 | | } |
225 | | } |