/src/unicode-normalization/src/normalize.rs
Line | Count | Source |
1 | | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | | // file at the top-level directory of this distribution and at |
3 | | // http://rust-lang.org/COPYRIGHT. |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | | // option. This file may not be copied, modified, or distributed |
9 | | // except according to those terms. |
10 | | |
11 | | //! Functions for computing canonical and compatible decompositions for Unicode characters. |
12 | | use crate::lookups::{ |
13 | | canonical_fully_decomposed, cjk_compat_variants_fully_decomposed, |
14 | | compatibility_fully_decomposed, composition_table, |
15 | | }; |
16 | | |
17 | | use core::char; |
18 | | |
19 | | /// Compute canonical Unicode decomposition for character. |
20 | | /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) |
21 | | /// for more information. |
22 | | #[inline] |
23 | 174M | pub fn decompose_canonical<F>(c: char, emit_char: F) |
24 | 174M | where |
25 | 174M | F: FnMut(char), |
26 | | { |
27 | 174M | decompose(c, canonical_fully_decomposed, emit_char) |
28 | 174M | } unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 23 | 84.1M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 84.1M | where | 25 | 84.1M | F: FnMut(char), | 26 | | { | 27 | 84.1M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 84.1M | } |
unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 23 | 59.8M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 59.8M | where | 25 | 59.8M | F: FnMut(char), | 26 | | { | 27 | 59.8M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 59.8M | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_canonical::<_> unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 23 | 11.8M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 11.8M | where | 25 | 11.8M | F: FnMut(char), | 26 | | { | 27 | 11.8M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 11.8M | } |
unicode_normalization::normalize::decompose_canonical::<process::rust_fuzzer_test_input::{closure#0}> Line | Count | Source | 23 | 78 | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 78 | where | 25 | 78 | F: FnMut(char), | 26 | | { | 27 | 78 | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 78 | } |
unicode_normalization::normalize::decompose_canonical::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 23 | 18.1M | pub fn decompose_canonical<F>(c: char, emit_char: F) | 24 | 18.1M | where | 25 | 18.1M | F: FnMut(char), | 26 | | { | 27 | 18.1M | decompose(c, canonical_fully_decomposed, emit_char) | 28 | 18.1M | } |
|
29 | | |
30 | | /// Compute canonical or compatible Unicode decomposition for character. |
31 | | /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) |
32 | | /// for more information. |
33 | | #[inline] |
34 | 87.3M | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { |
35 | 87.3M | let decompose_char = |
36 | 60.5M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0} unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0} Line | Count | Source | 36 | 48.7M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<_>::{closure#0} Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0} unicode_normalization::normalize::decompose_compatible::<process::rust_fuzzer_test_input::{closure#1}>::{closure#0} Line | Count | Source | 36 | 58 | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0} Line | Count | Source | 36 | 11.7M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0} unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0} Line | Count | Source | 36 | 46.9M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<_>::{closure#0}::{closure#0} Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0} unicode_normalization::normalize::decompose_compatible::<process::rust_fuzzer_test_input::{closure#1}>::{closure#0}::{closure#0} Line | Count | Source | 36 | 54 | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}::{closure#0} Line | Count | Source | 36 | 10.2M | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); |
|
37 | 87.3M | decompose(c, decompose_char, emit_char) |
38 | 87.3M | } Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}> unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}> Line | Count | Source | 34 | 73.1M | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { | 35 | 73.1M | let decompose_char = | 36 | | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); | 37 | 73.1M | decompose(c, decompose_char, emit_char) | 38 | 73.1M | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<_> Unexecuted instantiation: unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}> unicode_normalization::normalize::decompose_compatible::<process::rust_fuzzer_test_input::{closure#1}> Line | Count | Source | 34 | 81 | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { | 35 | 81 | let decompose_char = | 36 | | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); | 37 | 81 | decompose(c, decompose_char, emit_char) | 38 | 81 | } |
unicode_normalization::normalize::decompose_compatible::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}> Line | Count | Source | 34 | 14.1M | pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) { | 35 | 14.1M | let decompose_char = | 36 | | |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); | 37 | 14.1M | decompose(c, decompose_char, emit_char) | 38 | 14.1M | } |
|
39 | | |
40 | | /// Compute standard-variation decomposition for character. |
41 | | /// |
42 | | /// [Standardized Variation Sequences] are used instead of the standard canonical |
43 | | /// decompositions, notably for CJK codepoints with singleton canonical decompositions, |
44 | | /// to avoid losing information. See the [Unicode Variation Sequence FAQ] and the |
45 | | /// "Other Enhancements" section of the [Unicode 6.3 Release Summary] for more information. |
46 | | /// |
47 | | /// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence |
48 | | /// [Unicode Variation Sequence FAQ]: http://unicode.org/faq/vs.html |
49 | | /// [Unicode 6.3 Release Summary]: https://www.unicode.org/versions/Unicode6.3.0/#Summary |
50 | | #[inline] |
51 | 0 | pub fn decompose_cjk_compat_variants<F>(c: char, mut emit_char: F) |
52 | 0 | where |
53 | 0 | F: FnMut(char), |
54 | | { |
55 | | // 7-bit ASCII never decomposes |
56 | 0 | if c <= '\x7f' { |
57 | 0 | emit_char(c); |
58 | 0 | return; |
59 | 0 | } |
60 | | |
61 | | // Don't perform decomposition for Hangul |
62 | | |
63 | 0 | if let Some(decomposed) = cjk_compat_variants_fully_decomposed(c) { |
64 | 0 | for &d in decomposed { |
65 | 0 | emit_char(d); |
66 | 0 | } |
67 | 0 | return; |
68 | 0 | } |
69 | | |
70 | | // Finally bottom out. |
71 | 0 | emit_char(c); |
72 | 0 | } |
73 | | |
74 | | #[inline] |
75 | | #[allow(unsafe_code)] |
76 | 261M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) |
77 | 261M | where |
78 | 261M | D: Fn(char) -> Option<&'static [char]>, |
79 | 261M | F: FnMut(char), |
80 | | { |
81 | | // 7-bit ASCII never decomposes |
82 | 261M | if c <= '\x7f' { |
83 | 89.0M | emit_char(c); |
84 | 89.0M | return; |
85 | 172M | } |
86 | | |
87 | | // Perform decomposition for Hangul |
88 | 172M | if is_hangul_syllable(c) { |
89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above |
90 | 266k | unsafe { |
91 | 266k | decompose_hangul(c, emit_char); |
92 | 266k | } |
93 | 266k | return; |
94 | 172M | } |
95 | | |
96 | 172M | if let Some(decomposed) = decompose_char(c) { |
97 | 306M | for &d in decomposed { |
98 | 223M | emit_char(d); |
99 | 223M | } |
100 | 83.0M | return; |
101 | 89.0M | } |
102 | | |
103 | | // Finally bottom out. |
104 | 89.0M | emit_char(c); |
105 | 261M | } Unexecuted instantiation: unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}> unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}> Line | Count | Source | 76 | 73.1M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 73.1M | where | 78 | 73.1M | D: Fn(char) -> Option<&'static [char]>, | 79 | 73.1M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 73.1M | if c <= '\x7f' { | 83 | 24.1M | emit_char(c); | 84 | 24.1M | return; | 85 | 49.0M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 49.0M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 219k | unsafe { | 91 | 219k | decompose_hangul(c, emit_char); | 92 | 219k | } | 93 | 219k | return; | 94 | 48.7M | } | 95 | | | 96 | 48.7M | if let Some(decomposed) = decompose_char(c) { | 97 | 89.3M | for &d in decomposed { | 98 | 69.0M | emit_char(d); | 99 | 69.0M | } | 100 | 20.3M | return; | 101 | 28.4M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 28.4M | emit_char(c); | 105 | 73.1M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 76 | 84.1M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 84.1M | where | 78 | 84.1M | D: Fn(char) -> Option<&'static [char]>, | 79 | 84.1M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 84.1M | if c <= '\x7f' { | 83 | 26.1M | emit_char(c); | 84 | 26.1M | return; | 85 | 58.0M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 58.0M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 9.56k | unsafe { | 91 | 9.56k | decompose_hangul(c, emit_char); | 92 | 9.56k | } | 93 | 9.56k | return; | 94 | 57.9M | } | 95 | | | 96 | 57.9M | if let Some(decomposed) = decompose_char(c) { | 97 | 59.3M | for &d in decomposed { | 98 | 40.4M | emit_char(d); | 99 | 40.4M | } | 100 | 18.9M | return; | 101 | 39.0M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 39.0M | emit_char(c); | 105 | 84.1M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 76 | 59.8M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 59.8M | where | 78 | 59.8M | D: Fn(char) -> Option<&'static [char]>, | 79 | 59.8M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 59.8M | if c <= '\x7f' { | 83 | 21.6M | emit_char(c); | 84 | 21.6M | return; | 85 | 38.1M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 38.1M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 9.07k | unsafe { | 91 | 9.07k | decompose_hangul(c, emit_char); | 92 | 9.07k | } | 93 | 9.07k | return; | 94 | 38.1M | } | 95 | | | 96 | 38.1M | if let Some(decomposed) = decompose_char(c) { | 97 | 58.0M | for &d in decomposed { | 98 | 39.4M | emit_char(d); | 99 | 39.4M | } | 100 | 18.5M | return; | 101 | 19.5M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 19.5M | emit_char(c); | 105 | 59.8M | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose::<_, _> Unexecuted instantiation: unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}> unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 76 | 11.8M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 11.8M | where | 78 | 11.8M | D: Fn(char) -> Option<&'static [char]>, | 79 | 11.8M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 11.8M | if c <= '\x7f' { | 83 | 7.90M | emit_char(c); | 84 | 7.90M | return; | 85 | 3.96M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 3.96M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 1.69k | unsafe { | 91 | 1.69k | decompose_hangul(c, emit_char); | 92 | 1.69k | } | 93 | 1.69k | return; | 94 | 3.96M | } | 95 | | | 96 | 3.96M | if let Some(decomposed) = decompose_char(c) { | 97 | 10.8M | for &d in decomposed { | 98 | 7.25M | emit_char(d); | 99 | 7.25M | } | 100 | 3.62M | return; | 101 | 340k | } | 102 | | | 103 | | // Finally bottom out. | 104 | 340k | emit_char(c); | 105 | 11.8M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<process::rust_fuzzer_test_input::{closure#1}>::{closure#0}, process::rust_fuzzer_test_input::{closure#1}> Line | Count | Source | 76 | 81 | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 81 | where | 78 | 81 | D: Fn(char) -> Option<&'static [char]>, | 79 | 81 | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 81 | if c <= '\x7f' { | 83 | 8 | emit_char(c); | 84 | 8 | return; | 85 | 73 | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 73 | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 15 | unsafe { | 91 | 15 | decompose_hangul(c, emit_char); | 92 | 15 | } | 93 | 15 | return; | 94 | 58 | } | 95 | | | 96 | 58 | if let Some(decomposed) = decompose_char(c) { | 97 | 14 | for &d in decomposed { | 98 | 10 | emit_char(d); | 99 | 10 | } | 100 | 4 | return; | 101 | 54 | } | 102 | | | 103 | | // Finally bottom out. | 104 | 54 | emit_char(c); | 105 | 81 | } |
unicode_normalization::normalize::decompose::<unicode_normalization::normalize::decompose_compatible<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}>::{closure#0}, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}> Line | Count | Source | 76 | 14.1M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 14.1M | where | 78 | 14.1M | D: Fn(char) -> Option<&'static [char]>, | 79 | 14.1M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 14.1M | if c <= '\x7f' { | 83 | 2.39M | emit_char(c); | 84 | 2.39M | return; | 85 | 11.7M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 11.7M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 1.34k | unsafe { | 91 | 1.34k | decompose_hangul(c, emit_char); | 92 | 1.34k | } | 93 | 1.34k | return; | 94 | 11.7M | } | 95 | | | 96 | 11.7M | if let Some(decomposed) = decompose_char(c) { | 97 | 55.5M | for &d in decomposed { | 98 | 44.9M | emit_char(d); | 99 | 44.9M | } | 100 | 10.5M | return; | 101 | 1.21M | } | 102 | | | 103 | | // Finally bottom out. | 104 | 1.21M | emit_char(c); | 105 | 14.1M | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, process::rust_fuzzer_test_input::{closure#0}> Line | Count | Source | 76 | 78 | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 78 | where | 78 | 78 | D: Fn(char) -> Option<&'static [char]>, | 79 | 78 | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 78 | if c <= '\x7f' { | 83 | 8 | emit_char(c); | 84 | 8 | return; | 85 | 70 | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 70 | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 16 | unsafe { | 91 | 16 | decompose_hangul(c, emit_char); | 92 | 16 | } | 93 | 16 | return; | 94 | 54 | } | 95 | | | 96 | 54 | if let Some(decomposed) = decompose_char(c) { | 97 | 3 | for &d in decomposed { | 98 | 2 | emit_char(d); | 99 | 2 | } | 100 | 1 | return; | 101 | 53 | } | 102 | | | 103 | | // Finally bottom out. | 104 | 53 | emit_char(c); | 105 | 78 | } |
unicode_normalization::normalize::decompose::<unicode_normalization::lookups::canonical_fully_decomposed, <unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 76 | 18.1M | fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F) | 77 | 18.1M | where | 78 | 18.1M | D: Fn(char) -> Option<&'static [char]>, | 79 | 18.1M | F: FnMut(char), | 80 | | { | 81 | | // 7-bit ASCII never decomposes | 82 | 18.1M | if c <= '\x7f' { | 83 | 6.71M | emit_char(c); | 84 | 6.71M | return; | 85 | 11.4M | } | 86 | | | 87 | | // Perform decomposition for Hangul | 88 | 11.4M | if is_hangul_syllable(c) { | 89 | | // Safety: Hangul Syllables invariant checked by is_hangul_syllable above | 90 | 25.4k | unsafe { | 91 | 25.4k | decompose_hangul(c, emit_char); | 92 | 25.4k | } | 93 | 25.4k | return; | 94 | 11.4M | } | 95 | | | 96 | 11.4M | if let Some(decomposed) = decompose_char(c) { | 97 | 33.1M | for &d in decomposed { | 98 | 22.0M | emit_char(d); | 99 | 22.0M | } | 100 | 11.0M | return; | 101 | 363k | } | 102 | | | 103 | | // Finally bottom out. | 104 | 363k | emit_char(c); | 105 | 18.1M | } |
|
106 | | |
107 | | /// Compose two characters into a single character, if possible. |
108 | | /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) |
109 | | /// for more information. |
110 | 112M | pub fn compose(a: char, b: char) -> Option<char> { |
111 | 112M | compose_hangul(a, b).or_else(|| composition_table(a, b)) |
112 | 112M | } |
113 | | |
114 | | // Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior |
115 | | // http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior |
116 | | const S_BASE: u32 = 0xAC00; |
117 | | const L_BASE: u32 = 0x1100; |
118 | | const V_BASE: u32 = 0x1161; |
119 | | const T_BASE: u32 = 0x11A7; |
120 | | const L_COUNT: u32 = 19; |
121 | | const V_COUNT: u32 = 21; |
122 | | const T_COUNT: u32 = 28; |
123 | | const N_COUNT: u32 = V_COUNT * T_COUNT; |
124 | | const S_COUNT: u32 = L_COUNT * N_COUNT; |
125 | | |
126 | | const S_LAST: u32 = S_BASE + S_COUNT - 1; |
127 | | const L_LAST: u32 = L_BASE + L_COUNT - 1; |
128 | | const V_LAST: u32 = V_BASE + V_COUNT - 1; |
129 | | const T_LAST: u32 = T_BASE + T_COUNT - 1; |
130 | | |
131 | | // Composition only occurs for `TPart`s in `U+11A8 ..= U+11C2`, |
132 | | // i.e. `T_BASE + 1 ..= T_LAST`. |
133 | | const T_FIRST: u32 = T_BASE + 1; |
134 | | |
135 | | // Safety-usable invariant: This ensures that c is a valid Hangul Syllable character (U+AC00..U+D7AF) |
136 | 437M | pub(crate) fn is_hangul_syllable(c: char) -> bool { |
137 | | // Safety: This checks the range 0xAC00 (S_BASE) to 0xD7A4 (S_BASE + S_COUNT), upholding the safety-usable invariant |
138 | 437M | (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) |
139 | 437M | } |
140 | | |
141 | | // Decompose a precomposed Hangul syllable |
142 | | // Safety: `s` MUST be a valid Hangul Syllable character, between U+AC00..U+D7AF |
143 | | #[allow(unsafe_code, unused_unsafe)] |
144 | | #[inline(always)] |
145 | 266k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) |
146 | 266k | where |
147 | 266k | F: FnMut(char), |
148 | | { |
149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block |
150 | 266k | let s_index = s as u32 - S_BASE; |
151 | | // This will be at most 0x2baf / (21 * 28), 19 |
152 | 266k | let l_index = s_index / N_COUNT; |
153 | | unsafe { |
154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) |
155 | 266k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); |
156 | | |
157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 |
158 | 266k | let v_index = (s_index % N_COUNT) / T_COUNT; |
159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) |
160 | 266k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); |
161 | | |
162 | | // Safety: This will be at most T_COUNT - 1 (27) |
163 | 266k | let t_index = s_index % T_COUNT; |
164 | 266k | if t_index > 0 { |
165 | 158k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) |
166 | 158k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); |
167 | 158k | } |
168 | | } |
169 | 266k | } unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 145 | 9.56k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 9.56k | where | 147 | 9.56k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 9.56k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 9.56k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 9.56k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 9.56k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 9.56k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 9.56k | let t_index = s_index % T_COUNT; | 164 | 9.56k | if t_index > 0 { | 165 | 8.77k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 8.77k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 8.77k | } | 168 | | } | 169 | 9.56k | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<unicode_normalization::stream_safe::StreamSafe<core::str::iter::Chars>> as core::iter::traits::iterator::Iterator>::next::{closure#1}> unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 145 | 9.07k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 9.07k | where | 147 | 9.07k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 9.07k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 9.07k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 9.07k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 9.07k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 9.07k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 9.07k | let t_index = s_index % T_COUNT; | 164 | 9.07k | if t_index > 0 { | 165 | 8.44k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 8.44k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 8.44k | } | 168 | | } | 169 | 9.07k | } |
unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}> Line | Count | Source | 145 | 219k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 219k | where | 147 | 219k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 219k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 219k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 219k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 219k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 219k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 219k | let t_index = s_index % T_COUNT; | 164 | 219k | if t_index > 0 { | 165 | 112k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 112k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 112k | } | 168 | | } | 169 | 219k | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_hangul::<_> unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 145 | 1.69k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 1.69k | where | 147 | 1.69k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 1.69k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 1.69k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 1.69k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 1.69k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 1.69k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 1.69k | let t_index = s_index % T_COUNT; | 164 | 1.69k | if t_index > 0 { | 165 | 1.49k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 1.49k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 1.49k | } | 168 | | } | 169 | 1.69k | } |
Unexecuted instantiation: unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<streaming::Counter> as core::iter::traits::iterator::Iterator>::next::{closure#1}> unicode_normalization::normalize::decompose_hangul::<process::rust_fuzzer_test_input::{closure#0}> Line | Count | Source | 145 | 16 | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 16 | where | 147 | 16 | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 16 | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 16 | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 16 | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 16 | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 16 | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 16 | let t_index = s_index % T_COUNT; | 164 | 16 | if t_index > 0 { | 165 | 16 | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 16 | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 16 | } | 168 | | } | 169 | 16 | } |
unicode_normalization::normalize::decompose_hangul::<process::rust_fuzzer_test_input::{closure#1}> Line | Count | Source | 145 | 15 | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 15 | where | 147 | 15 | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 15 | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 15 | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 15 | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 15 | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 15 | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 15 | let t_index = s_index % T_COUNT; | 164 | 15 | if t_index > 0 { | 165 | 15 | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 15 | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 15 | } | 168 | | } | 169 | 15 | } |
unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#0}> Line | Count | Source | 145 | 25.4k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 25.4k | where | 147 | 25.4k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 25.4k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 25.4k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 25.4k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 25.4k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 25.4k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 25.4k | let t_index = s_index % T_COUNT; | 164 | 25.4k | if t_index > 0 { | 165 | 25.2k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 25.2k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 25.2k | } | 168 | | } | 169 | 25.4k | } |
unicode_normalization::normalize::decompose_hangul::<<unicode_normalization::decompose::Decompositions<core::str::iter::Chars> as core::iter::traits::iterator::Iterator>::next::{closure#1}> Line | Count | Source | 145 | 1.34k | unsafe fn decompose_hangul<F>(s: char, mut emit_char: F) | 146 | 1.34k | where | 147 | 1.34k | F: FnMut(char), | 148 | | { | 149 | | // This will be at most 0x2baf, the size of the Hangul Syllables block | 150 | 1.34k | let s_index = s as u32 - S_BASE; | 151 | | // This will be at most 0x2baf / (21 * 28), 19 | 152 | 1.34k | let l_index = s_index / N_COUNT; | 153 | | unsafe { | 154 | | // Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 155 | 1.34k | emit_char(char::from_u32_unchecked(L_BASE + l_index)); | 156 | | | 157 | | // Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21 | 158 | 1.34k | let v_index = (s_index % N_COUNT) / T_COUNT; | 159 | | // Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 160 | 1.34k | emit_char(char::from_u32_unchecked(V_BASE + v_index)); | 161 | | | 162 | | // Safety: This will be at most T_COUNT - 1 (27) | 163 | 1.34k | let t_index = s_index % T_COUNT; | 164 | 1.34k | if t_index > 0 { | 165 | 1.12k | // Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800) | 166 | 1.12k | emit_char(char::from_u32_unchecked(T_BASE + t_index)); | 167 | 1.12k | } | 168 | | } | 169 | 1.34k | } |
|
170 | | |
171 | | #[inline] |
172 | 40.2k | pub(crate) fn hangul_decomposition_length(s: char) -> usize { |
173 | 40.2k | let si = s as u32 - S_BASE; |
174 | 40.2k | let ti = si % T_COUNT; |
175 | 40.2k | if ti > 0 { |
176 | 37.8k | 3 |
177 | | } else { |
178 | 2.41k | 2 |
179 | | } |
180 | 40.2k | } unicode_normalization::normalize::hangul_decomposition_length Line | Count | Source | 172 | 24.5k | pub(crate) fn hangul_decomposition_length(s: char) -> usize { | 173 | 24.5k | let si = s as u32 - S_BASE; | 174 | 24.5k | let ti = si % T_COUNT; | 175 | 24.5k | if ti > 0 { | 176 | 23.5k | 3 | 177 | | } else { | 178 | 1.07k | 2 | 179 | | } | 180 | 24.5k | } |
unicode_normalization::normalize::hangul_decomposition_length Line | Count | Source | 172 | 15.6k | pub(crate) fn hangul_decomposition_length(s: char) -> usize { | 173 | 15.6k | let si = s as u32 - S_BASE; | 174 | 15.6k | let ti = si % T_COUNT; | 175 | 15.6k | if ti > 0 { | 176 | 14.3k | 3 | 177 | | } else { | 178 | 1.33k | 2 | 179 | | } | 180 | 15.6k | } |
|
181 | | |
182 | | // Compose a pair of Hangul Jamo |
183 | | #[allow(unsafe_code)] |
184 | | #[inline(always)] |
185 | | #[allow(ellipsis_inclusive_range_patterns)] |
186 | 112M | fn compose_hangul(a: char, b: char) -> Option<char> { |
187 | 112M | let (a, b) = (a as u32, b as u32); |
188 | 112M | match (a, b) { |
189 | | // Compose a leading consonant and a vowel together into an LV_Syllable |
190 | 5.76M | (L_BASE..=L_LAST, V_BASE..=V_LAST) => { |
191 | | // Safety: based on the above bounds, l_index will be less than or equal to L_COUNT (19) |
192 | | // and v_index will be <= V_COUNT (21) |
193 | 438k | let l_index = a - L_BASE; |
194 | 438k | let v_index = b - V_BASE; |
195 | | // Safety: This will be <= 19 * (20 * 21) + (21 * 20), which is 8400. |
196 | 438k | let lv_index = l_index * N_COUNT + v_index * T_COUNT; |
197 | | // Safety: This is between 0xAC00 and 0xCCD0, which are in range for Hangul Syllables (U+AC00..U+D7AF) and also in range |
198 | | // for BMP unicode |
199 | 438k | let s = S_BASE + lv_index; |
200 | | // Safety: We've verified this is in-range |
201 | 438k | Some(unsafe { char::from_u32_unchecked(s) }) |
202 | | } |
203 | | // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable |
204 | 4.71M | (S_BASE..=S_LAST, T_FIRST..=T_LAST) if (a - S_BASE) % T_COUNT == 0 => { |
205 | | // Safety: a is between 0xAC00 and (0xAC00 + 19 * 21 * 28). b - T_BASE is between 0 and 19. |
206 | | // Adding a number 0 to 19 to a number that is at largest 0xD7A4 will not go out of bounds to 0xD800 (where the |
207 | | // surrogates start), so this is safe. |
208 | 225k | Some(unsafe { char::from_u32_unchecked(a + (b - T_BASE)) }) |
209 | | } |
210 | 112M | _ => None, |
211 | | } |
212 | 112M | } |
213 | | |
214 | | #[cfg(test)] |
215 | | mod tests { |
216 | | use super::compose_hangul; |
217 | | |
218 | | // Regression test from a bugfix where we were composing an LV_Syllable with |
219 | | // T_BASE directly. (We should only compose an LV_Syllable with a character |
220 | | // in the range `T_BASE + 1 ..= T_LAST`.) |
221 | | #[test] |
222 | | fn test_hangul_composition() { |
223 | | assert_eq!(compose_hangul('\u{c8e0}', '\u{11a7}'), None); |
224 | | } |
225 | | } |