/src/icu/source/common/uscript_props.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2013-2016, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * file name: uscript_props.cpp |
9 | | * encoding: UTF-8 |
10 | | * tab size: 8 (not used) |
11 | | * indentation:4 |
12 | | * |
13 | | * created on: 2013feb16 |
14 | | * created by: Markus W. Scherer |
15 | | */ |
16 | | |
17 | | #include "unicode/utypes.h" |
18 | | #include "unicode/unistr.h" |
19 | | #include "unicode/uscript.h" |
20 | | #include "unicode/utf16.h" |
21 | | #include "ustr_imp.h" |
22 | | #include "cmemory.h" |
23 | | |
24 | | namespace { |
25 | | |
26 | | // Script metadata (script properties). |
27 | | // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt |
28 | | |
29 | | // 0 = NOT_ENCODED, no sample character, default false script properties. |
30 | | // Bits 20.. 0: sample character |
31 | | |
32 | | // Bits 23..21: usage |
33 | | const int32_t UNKNOWN = 1 << 21; |
34 | | const int32_t EXCLUSION = 2 << 21; |
35 | | const int32_t LIMITED_USE = 3 << 21; |
36 | | // st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 |
37 | | const int32_t RECOMMENDED = 5 << 21; |
38 | | |
39 | | // Bits 31..24: Single-bit flags |
40 | | const int32_t RTL = 1 << 24; |
41 | | const int32_t LB_LETTERS = 1 << 25; |
42 | | const int32_t CASED = 1 << 26; |
43 | | |
44 | | const int32_t SCRIPT_PROPS[] = { |
45 | | // Begin copy-paste output from |
46 | | // tools/trunk/unicode/py/parsescriptmetadata.py |
47 | | 0x0040 | RECOMMENDED, // Zyyy |
48 | | 0x0308 | RECOMMENDED, // Zinh |
49 | | 0x0628 | RECOMMENDED | RTL, // Arab |
50 | | 0x0531 | RECOMMENDED | CASED, // Armn |
51 | | 0x0995 | RECOMMENDED, // Beng |
52 | | 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo |
53 | | 0x13C4 | LIMITED_USE | CASED, // Cher |
54 | | 0x03E2 | EXCLUSION | CASED, // Copt |
55 | | 0x042F | RECOMMENDED | CASED, // Cyrl |
56 | | 0x10414 | EXCLUSION | CASED, // Dsrt |
57 | | 0x0905 | RECOMMENDED, // Deva |
58 | | 0x12A0 | RECOMMENDED, // Ethi |
59 | | 0x10D3 | RECOMMENDED, // Geor |
60 | | 0x10330 | EXCLUSION, // Goth |
61 | | 0x03A9 | RECOMMENDED | CASED, // Grek |
62 | | 0x0A95 | RECOMMENDED, // Gujr |
63 | | 0x0A15 | RECOMMENDED, // Guru |
64 | | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani |
65 | | 0xAC00 | RECOMMENDED, // Hang |
66 | | 0x05D0 | RECOMMENDED | RTL, // Hebr |
67 | | 0x304B | RECOMMENDED | LB_LETTERS, // Hira |
68 | | 0x0C95 | RECOMMENDED, // Knda |
69 | | 0x30AB | RECOMMENDED | LB_LETTERS, // Kana |
70 | | 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr |
71 | | 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo |
72 | | 0x004C | RECOMMENDED | CASED, // Latn |
73 | | 0x0D15 | RECOMMENDED, // Mlym |
74 | | 0x1826 | EXCLUSION, // Mong |
75 | | 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr |
76 | | 0x168F | EXCLUSION, // Ogam |
77 | | 0x10300 | EXCLUSION, // Ital |
78 | | 0x0B15 | RECOMMENDED, // Orya |
79 | | 0x16A0 | EXCLUSION, // Runr |
80 | | 0x0D85 | RECOMMENDED, // Sinh |
81 | | 0x0710 | LIMITED_USE | RTL, // Syrc |
82 | | 0x0B95 | RECOMMENDED, // Taml |
83 | | 0x0C15 | RECOMMENDED, // Telu |
84 | | 0x078C | RECOMMENDED | RTL, // Thaa |
85 | | 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai |
86 | | 0x0F40 | RECOMMENDED, // Tibt |
87 | | 0x14C0 | LIMITED_USE, // Cans |
88 | | 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii |
89 | | 0x1703 | EXCLUSION, // Tglg |
90 | | 0x1723 | EXCLUSION, // Hano |
91 | | 0x1743 | EXCLUSION, // Buhd |
92 | | 0x1763 | EXCLUSION, // Tagb |
93 | | 0x280E | UNKNOWN, // Brai |
94 | | 0x10800 | EXCLUSION | RTL, // Cprt |
95 | | 0x1900 | LIMITED_USE, // Limb |
96 | | 0x10000 | EXCLUSION, // Linb |
97 | | 0x10480 | EXCLUSION, // Osma |
98 | | 0x10450 | EXCLUSION, // Shaw |
99 | | 0x1950 | LIMITED_USE | LB_LETTERS, // Tale |
100 | | 0x10380 | EXCLUSION, // Ugar |
101 | | 0, |
102 | | 0x1A00 | EXCLUSION, // Bugi |
103 | | 0x2C00 | EXCLUSION | CASED, // Glag |
104 | | 0x10A00 | EXCLUSION | RTL, // Khar |
105 | | 0xA800 | LIMITED_USE, // Sylo |
106 | | 0x1980 | LIMITED_USE | LB_LETTERS, // Talu |
107 | | 0x2D30 | LIMITED_USE, // Tfng |
108 | | 0x103A0 | EXCLUSION, // Xpeo |
109 | | 0x1B05 | LIMITED_USE, // Bali |
110 | | 0x1BC0 | LIMITED_USE, // Batk |
111 | | 0, |
112 | | 0x11005 | EXCLUSION, // Brah |
113 | | 0xAA00 | LIMITED_USE, // Cham |
114 | | 0, |
115 | | 0, |
116 | | 0, |
117 | | 0, |
118 | | 0x13153 | EXCLUSION, // Egyp |
119 | | 0, |
120 | | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans |
121 | | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant |
122 | | 0x16B1C | EXCLUSION, // Hmng |
123 | | 0x10CA1 | EXCLUSION | RTL | CASED, // Hung |
124 | | 0, |
125 | | 0xA984 | LIMITED_USE, // Java |
126 | | 0xA90A | LIMITED_USE, // Kali |
127 | | 0, |
128 | | 0, |
129 | | 0x1C00 | LIMITED_USE, // Lepc |
130 | | 0x10647 | EXCLUSION, // Lina |
131 | | 0x0840 | LIMITED_USE | RTL, // Mand |
132 | | 0, |
133 | | 0x10980 | EXCLUSION | RTL, // Mero |
134 | | 0x07CA | LIMITED_USE | RTL, // Nkoo |
135 | | 0x10C00 | EXCLUSION | RTL, // Orkh |
136 | | 0x1036B | EXCLUSION, // Perm |
137 | | 0xA840 | EXCLUSION, // Phag |
138 | | 0x10900 | EXCLUSION | RTL, // Phnx |
139 | | 0x16F00 | LIMITED_USE, // Plrd |
140 | | 0, |
141 | | 0, |
142 | | 0, |
143 | | 0, |
144 | | 0, |
145 | | 0, |
146 | | 0xA549 | LIMITED_USE, // Vaii |
147 | | 0, |
148 | | 0x12000 | EXCLUSION, // Xsux |
149 | | 0, |
150 | | 0xFDD0 | UNKNOWN, // Zzzz |
151 | | 0x102A0 | EXCLUSION, // Cari |
152 | | 0x304B | RECOMMENDED | LB_LETTERS, // Jpan |
153 | | 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana |
154 | | 0x10280 | EXCLUSION, // Lyci |
155 | | 0x10920 | EXCLUSION | RTL, // Lydi |
156 | | 0x1C5A | LIMITED_USE, // Olck |
157 | | 0xA930 | EXCLUSION, // Rjng |
158 | | 0xA882 | LIMITED_USE, // Saur |
159 | | 0x1D850 | EXCLUSION, // Sgnw |
160 | | 0x1B83 | LIMITED_USE, // Sund |
161 | | 0, |
162 | | 0xABC0 | LIMITED_USE, // Mtei |
163 | | 0x10840 | EXCLUSION | RTL, // Armi |
164 | | 0x10B00 | EXCLUSION | RTL, // Avst |
165 | | 0x11103 | LIMITED_USE, // Cakm |
166 | | 0xAC00 | RECOMMENDED, // Kore |
167 | | 0x11083 | EXCLUSION, // Kthi |
168 | | 0x10AD8 | EXCLUSION | RTL, // Mani |
169 | | 0x10B60 | EXCLUSION | RTL, // Phli |
170 | | 0x10B8F | EXCLUSION | RTL, // Phlp |
171 | | 0, |
172 | | 0x10B40 | EXCLUSION | RTL, // Prti |
173 | | 0x0800 | EXCLUSION | RTL, // Samr |
174 | | 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt |
175 | | 0, |
176 | | 0, |
177 | | 0xA6A0 | LIMITED_USE, // Bamu |
178 | | 0xA4D0 | LIMITED_USE, // Lisu |
179 | | 0, |
180 | | 0x10A60 | EXCLUSION | RTL, // Sarb |
181 | | 0x16AE6 | EXCLUSION, // Bass |
182 | | 0x1BC20 | EXCLUSION, // Dupl |
183 | | 0x10500 | EXCLUSION, // Elba |
184 | | 0x11315 | EXCLUSION, // Gran |
185 | | 0, |
186 | | 0, |
187 | | 0x1E802 | EXCLUSION | RTL, // Mend |
188 | | 0x109A0 | EXCLUSION | RTL, // Merc |
189 | | 0x10A95 | EXCLUSION | RTL, // Narb |
190 | | 0x10896 | EXCLUSION | RTL, // Nbat |
191 | | 0x10873 | EXCLUSION | RTL, // Palm |
192 | | 0x112BE | EXCLUSION, // Sind |
193 | | 0x118B4 | EXCLUSION | CASED, // Wara |
194 | | 0, |
195 | | 0, |
196 | | 0x16A4F | EXCLUSION, // Mroo |
197 | | 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu |
198 | | 0x11183 | EXCLUSION, // Shrd |
199 | | 0x110D0 | EXCLUSION, // Sora |
200 | | 0x11680 | EXCLUSION, // Takr |
201 | | 0x18229 | EXCLUSION | LB_LETTERS, // Tang |
202 | | 0, |
203 | | 0x14400 | EXCLUSION, // Hluw |
204 | | 0x11208 | EXCLUSION, // Khoj |
205 | | 0x11484 | EXCLUSION, // Tirh |
206 | | 0x10537 | EXCLUSION, // Aghb |
207 | | 0x11152 | EXCLUSION, // Mahj |
208 | | 0x11717 | EXCLUSION | LB_LETTERS, // Ahom |
209 | | 0x108F4 | EXCLUSION | RTL, // Hatr |
210 | | 0x1160E | EXCLUSION, // Modi |
211 | | 0x1128F | EXCLUSION, // Mult |
212 | | 0x11AC0 | EXCLUSION, // Pauc |
213 | | 0x1158E | EXCLUSION, // Sidd |
214 | | 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm |
215 | | 0x11C0E | EXCLUSION, // Bhks |
216 | | 0x11C72 | EXCLUSION, // Marc |
217 | | 0x11412 | LIMITED_USE, // Newa |
218 | | 0x104B5 | LIMITED_USE | CASED, // Osge |
219 | | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb |
220 | | 0x1112 | RECOMMENDED, // Jamo |
221 | | 0, |
222 | | 0x11D10 | EXCLUSION, // Gonm |
223 | | 0x11A5C | EXCLUSION, // Soyo |
224 | | 0x11A0B | EXCLUSION, // Zanb |
225 | | 0x1180B | EXCLUSION, // Dogr |
226 | | 0x11D71 | LIMITED_USE, // Gong |
227 | | 0x11EE5 | EXCLUSION, // Maka |
228 | | 0x16E40 | EXCLUSION | CASED, // Medf |
229 | | 0x10D12 | LIMITED_USE | RTL, // Rohg |
230 | | 0x10F42 | EXCLUSION | RTL, // Sogd |
231 | | 0x10F19 | EXCLUSION | RTL, // Sogo |
232 | | 0x10FF1 | EXCLUSION | RTL, // Elym |
233 | | 0x1E108 | LIMITED_USE, // Hmnp |
234 | | 0x119CE | EXCLUSION, // Nand |
235 | | 0x1E2E1 | LIMITED_USE, // Wcho |
236 | | 0x10FBF | EXCLUSION | RTL, // Chrs |
237 | | 0x1190C | EXCLUSION, // Diak |
238 | | 0x18C65 | EXCLUSION | LB_LETTERS, // Kits |
239 | | 0x10E88 | EXCLUSION | RTL, // Yezi |
240 | | 0x12FE5 | EXCLUSION, // Cpmn |
241 | | 0x10F7C | EXCLUSION | RTL, // Ougr |
242 | | 0x16ABC | EXCLUSION, // Tnsa |
243 | | 0x1E290 | EXCLUSION, // Toto |
244 | | 0x10582 | EXCLUSION | CASED, // Vith |
245 | | // End copy-paste from parsescriptmetadata.py |
246 | | }; |
247 | | |
248 | 0 | int32_t getScriptProps(UScriptCode script) { |
249 | 0 | if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { |
250 | 0 | return SCRIPT_PROPS[script]; |
251 | 0 | } else { |
252 | 0 | return 0; |
253 | 0 | } |
254 | 0 | } |
255 | | |
256 | | } // namespace |
257 | | |
258 | | U_CAPI int32_t U_EXPORT2 |
259 | 0 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { |
260 | 0 | if(U_FAILURE(*pErrorCode)) { return 0; } |
261 | 0 | if(capacity < 0 || (capacity > 0 && dest == NULL)) { |
262 | 0 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
263 | 0 | return 0; |
264 | 0 | } |
265 | 0 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
266 | 0 | int32_t length; |
267 | 0 | if(sampleChar == 0) { |
268 | 0 | length = 0; |
269 | 0 | } else { |
270 | 0 | length = U16_LENGTH(sampleChar); |
271 | 0 | if(length <= capacity) { |
272 | 0 | int32_t i = 0; |
273 | 0 | U16_APPEND_UNSAFE(dest, i, sampleChar); |
274 | 0 | } |
275 | 0 | } |
276 | 0 | return u_terminateUChars(dest, capacity, length, pErrorCode); |
277 | 0 | } |
278 | | |
279 | | U_COMMON_API icu::UnicodeString U_EXPORT2 |
280 | 0 | uscript_getSampleUnicodeString(UScriptCode script) { |
281 | 0 | icu::UnicodeString sample; |
282 | 0 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
283 | 0 | if(sampleChar != 0) { |
284 | 0 | sample.append(sampleChar); |
285 | 0 | } |
286 | 0 | return sample; |
287 | 0 | } |
288 | | |
289 | | U_CAPI UScriptUsage U_EXPORT2 |
290 | 0 | uscript_getUsage(UScriptCode script) { |
291 | 0 | return (UScriptUsage)((getScriptProps(script) >> 21) & 7); |
292 | 0 | } |
293 | | |
294 | | U_CAPI UBool U_EXPORT2 |
295 | 0 | uscript_isRightToLeft(UScriptCode script) { |
296 | 0 | return (getScriptProps(script) & RTL) != 0; |
297 | 0 | } |
298 | | |
299 | | U_CAPI UBool U_EXPORT2 |
300 | 0 | uscript_breaksBetweenLetters(UScriptCode script) { |
301 | 0 | return (getScriptProps(script) & LB_LETTERS) != 0; |
302 | 0 | } |
303 | | |
304 | | U_CAPI UBool U_EXPORT2 |
305 | 0 | uscript_isCased(UScriptCode script) { |
306 | 0 | return (getScriptProps(script) & CASED) != 0; |
307 | 0 | } |