/src/unicode-normalization/src/lookups.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2019 The Rust Project Developers. See the COPYRIGHT |
2 | | // file at the top-level directory of this distribution and at |
3 | | // http://rust-lang.org/COPYRIGHT. |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | | // option. This file may not be copied, modified, or distributed |
9 | | // except according to those terms. |
10 | | |
11 | | //! Lookups of unicode properties using minimal perfect hashing. |
12 | | |
13 | | use crate::perfect_hash::mph_lookup; |
14 | | use crate::tables::*; |
15 | | |
16 | | /// Look up the canonical combining class for a codepoint. |
17 | | /// |
18 | | /// The value returned is as defined in the Unicode Character Database. |
19 | 876M | pub fn canonical_combining_class(c: char) -> u8 { |
20 | 876M | mph_lookup( |
21 | 876M | c.into(), |
22 | 876M | CANONICAL_COMBINING_CLASS_SALT, |
23 | 876M | CANONICAL_COMBINING_CLASS_KV, |
24 | 876M | u8_lookup_fk, |
25 | 876M | u8_lookup_fv, |
26 | 876M | 0, |
27 | 876M | ) |
28 | 876M | } |
29 | | |
30 | 67.9M | pub(crate) fn composition_table(c1: char, c2: char) -> Option<char> { |
31 | 67.9M | if c1 < '\u{10000}' && c2 < '\u{10000}' { |
32 | 67.8M | mph_lookup( |
33 | 67.8M | (c1 as u32) << 16 | (c2 as u32), |
34 | 67.8M | COMPOSITION_TABLE_SALT, |
35 | 67.8M | COMPOSITION_TABLE_KV, |
36 | 67.8M | pair_lookup_fk, |
37 | 67.8M | pair_lookup_fv_opt, |
38 | 67.8M | None, |
39 | 67.8M | ) |
40 | | } else { |
41 | 110k | composition_table_astral(c1, c2) |
42 | | } |
43 | 67.9M | } |
44 | | |
45 | 284M | pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> { |
46 | 284M | mph_lookup( |
47 | 284M | c.into(), |
48 | 284M | CANONICAL_DECOMPOSED_SALT, |
49 | 284M | CANONICAL_DECOMPOSED_KV, |
50 | 284M | pair_lookup_fk, |
51 | 284M | pair_lookup_fv_opt, |
52 | 284M | None, |
53 | 284M | ) |
54 | 284M | .map(|(start, len)| &CANONICAL_DECOMPOSED_CHARS[start as usize..][..len as usize]) |
55 | 284M | } |
56 | | |
57 | 217M | pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> { |
58 | 217M | mph_lookup( |
59 | 217M | c.into(), |
60 | 217M | COMPATIBILITY_DECOMPOSED_SALT, |
61 | 217M | COMPATIBILITY_DECOMPOSED_KV, |
62 | 217M | pair_lookup_fk, |
63 | 217M | pair_lookup_fv_opt, |
64 | 217M | None, |
65 | 217M | ) |
66 | 217M | .map(|(start, len)| &COMPATIBILITY_DECOMPOSED_CHARS[start as usize..][..len as usize]) |
67 | 217M | } |
68 | | |
69 | 0 | pub(crate) fn cjk_compat_variants_fully_decomposed(c: char) -> Option<&'static [char]> { |
70 | 0 | mph_lookup( |
71 | 0 | c.into(), |
72 | 0 | CJK_COMPAT_VARIANTS_DECOMPOSED_SALT, |
73 | 0 | CJK_COMPAT_VARIANTS_DECOMPOSED_KV, |
74 | 0 | pair_lookup_fk, |
75 | 0 | pair_lookup_fv_opt, |
76 | 0 | None, |
77 | 0 | ) |
78 | 0 | .map(|(start, len)| &CJK_COMPAT_VARIANTS_DECOMPOSED_CHARS[start as usize..][..len as usize]) |
79 | 0 | } |
80 | | |
81 | | /// Return whether the given character is a combining mark (`General_Category=Mark`) |
82 | 44 | pub fn is_combining_mark(c: char) -> bool { |
83 | 44 | mph_lookup( |
84 | 44 | c.into(), |
85 | 44 | COMBINING_MARK_SALT, |
86 | 44 | COMBINING_MARK_KV, |
87 | 44 | bool_lookup_fk, |
88 | 44 | bool_lookup_fv, |
89 | 44 | false, |
90 | 44 | ) |
91 | 44 | } |
92 | | |
93 | 25.4M | pub fn stream_safe_trailing_nonstarters(c: char) -> usize { |
94 | 25.4M | mph_lookup( |
95 | 25.4M | c.into(), |
96 | 25.4M | TRAILING_NONSTARTERS_SALT, |
97 | 25.4M | TRAILING_NONSTARTERS_KV, |
98 | 25.4M | u8_lookup_fk, |
99 | 25.4M | u8_lookup_fv, |
100 | 25.4M | 0, |
101 | 25.4M | ) as usize |
102 | 25.4M | } |
103 | | |
104 | | /// Extract the key in a 24 bit key and 8 bit value packed in a u32. |
105 | | #[inline] |
106 | 902M | fn u8_lookup_fk(kv: u32) -> u32 { |
107 | 902M | kv >> 8 |
108 | 902M | } |
109 | | |
110 | | /// Extract the value in a 24 bit key and 8 bit value packed in a u32. |
111 | | #[inline] |
112 | 661M | fn u8_lookup_fv(kv: u32) -> u8 { |
113 | 661M | (kv & 0xff) as u8 |
114 | 661M | } |
115 | | |
116 | | /// Extract the key for a boolean lookup. |
117 | | #[inline] |
118 | 44 | fn bool_lookup_fk(kv: u32) -> u32 { |
119 | 44 | kv |
120 | 44 | } |
121 | | |
122 | | /// Extract the value for a boolean lookup. |
123 | | #[inline] |
124 | 1 | fn bool_lookup_fv(_kv: u32) -> bool { |
125 | 1 | true |
126 | 1 | } |
127 | | |
128 | | /// Extract the key in a pair. |
129 | | #[inline] |
130 | 569M | fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 { |
131 | 569M | kv.0 |
132 | 569M | } unicode_normalization::lookups::pair_lookup_fk::<(u16, u16)> Line | Count | Source | 130 | 501M | fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 { | 131 | 501M | kv.0 | 132 | 501M | } |
unicode_normalization::lookups::pair_lookup_fk::<char> Line | Count | Source | 130 | 67.8M | fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 { | 131 | 67.8M | kv.0 | 132 | 67.8M | } |
|
133 | | |
134 | | /// Extract the value in a pair, returning an option. |
135 | | #[inline] |
136 | 100M | fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> { |
137 | 100M | Some(kv.1) |
138 | 100M | } unicode_normalization::lookups::pair_lookup_fv_opt::<(u16, u16)> Line | Count | Source | 136 | 97.0M | fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> { | 137 | 97.0M | Some(kv.1) | 138 | 97.0M | } |
unicode_normalization::lookups::pair_lookup_fv_opt::<char> Line | Count | Source | 136 | 3.80M | fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> { | 137 | 3.80M | Some(kv.1) | 138 | 3.80M | } |
|