Coverage Report

Created: 2023-02-22 06:51

/src/icu/source/common/uscript_props.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2013-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  uscript_props.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2013feb16
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
#include "unicode/unistr.h"
19
#include "unicode/uscript.h"
20
#include "unicode/utf16.h"
21
#include "ustr_imp.h"
22
#include "cmemory.h"
23
24
namespace {
25
26
// Script metadata (script properties).
27
// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
28
29
// 0 = NOT_ENCODED, no sample character, default false script properties.
30
// Bits 20.. 0: sample character
31
32
// Bits 23..21: usage
33
const int32_t UNKNOWN = 1 << 21;
34
const int32_t EXCLUSION = 2 << 21;
35
const int32_t LIMITED_USE = 3 << 21;
36
// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
37
const int32_t RECOMMENDED = 5 << 21;
38
39
// Bits 31..24: Single-bit flags
40
const int32_t RTL = 1 << 24;
41
const int32_t LB_LETTERS = 1 << 25;
42
const int32_t CASED = 1 << 26;
43
44
const int32_t SCRIPT_PROPS[] = {
45
    // Begin copy-paste output from
46
    // tools/trunk/unicode/py/parsescriptmetadata.py
47
    0x0040 | RECOMMENDED,  // Zyyy
48
    0x0308 | RECOMMENDED,  // Zinh
49
    0x0628 | RECOMMENDED | RTL,  // Arab
50
    0x0531 | RECOMMENDED | CASED,  // Armn
51
    0x0995 | RECOMMENDED,  // Beng
52
    0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
53
    0x13C4 | LIMITED_USE | CASED,  // Cher
54
    0x03E2 | EXCLUSION | CASED,  // Copt
55
    0x042F | RECOMMENDED | CASED,  // Cyrl
56
    0x10414 | EXCLUSION | CASED,  // Dsrt
57
    0x0905 | RECOMMENDED,  // Deva
58
    0x12A0 | RECOMMENDED,  // Ethi
59
    0x10D3 | RECOMMENDED,  // Geor
60
    0x10330 | EXCLUSION,  // Goth
61
    0x03A9 | RECOMMENDED | CASED,  // Grek
62
    0x0A95 | RECOMMENDED,  // Gujr
63
    0x0A15 | RECOMMENDED,  // Guru
64
    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
65
    0xAC00 | RECOMMENDED,  // Hang
66
    0x05D0 | RECOMMENDED | RTL,  // Hebr
67
    0x304B | RECOMMENDED | LB_LETTERS,  // Hira
68
    0x0C95 | RECOMMENDED,  // Knda
69
    0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
70
    0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
71
    0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
72
    0x004C | RECOMMENDED | CASED,  // Latn
73
    0x0D15 | RECOMMENDED,  // Mlym
74
    0x1826 | EXCLUSION,  // Mong
75
    0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
76
    0x168F | EXCLUSION,  // Ogam
77
    0x10300 | EXCLUSION,  // Ital
78
    0x0B15 | RECOMMENDED,  // Orya
79
    0x16A0 | EXCLUSION,  // Runr
80
    0x0D85 | RECOMMENDED,  // Sinh
81
    0x0710 | LIMITED_USE | RTL,  // Syrc
82
    0x0B95 | RECOMMENDED,  // Taml
83
    0x0C15 | RECOMMENDED,  // Telu
84
    0x078C | RECOMMENDED | RTL,  // Thaa
85
    0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
86
    0x0F40 | RECOMMENDED,  // Tibt
87
    0x14C0 | LIMITED_USE,  // Cans
88
    0xA288 | LIMITED_USE | LB_LETTERS,  // Yiii
89
    0x1703 | EXCLUSION,  // Tglg
90
    0x1723 | EXCLUSION,  // Hano
91
    0x1743 | EXCLUSION,  // Buhd
92
    0x1763 | EXCLUSION,  // Tagb
93
    0x280E | UNKNOWN,  // Brai
94
    0x10800 | EXCLUSION | RTL,  // Cprt
95
    0x1900 | LIMITED_USE,  // Limb
96
    0x10000 | EXCLUSION,  // Linb
97
    0x10480 | EXCLUSION,  // Osma
98
    0x10450 | EXCLUSION,  // Shaw
99
    0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
100
    0x10380 | EXCLUSION,  // Ugar
101
    0,
102
    0x1A00 | EXCLUSION,  // Bugi
103
    0x2C00 | EXCLUSION | CASED,  // Glag
104
    0x10A00 | EXCLUSION | RTL,  // Khar
105
    0xA800 | LIMITED_USE,  // Sylo
106
    0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
107
    0x2D30 | LIMITED_USE,  // Tfng
108
    0x103A0 | EXCLUSION,  // Xpeo
109
    0x1B05 | LIMITED_USE,  // Bali
110
    0x1BC0 | LIMITED_USE,  // Batk
111
    0,
112
    0x11005 | EXCLUSION,  // Brah
113
    0xAA00 | LIMITED_USE,  // Cham
114
    0,
115
    0,
116
    0,
117
    0,
118
    0x13153 | EXCLUSION,  // Egyp
119
    0,
120
    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
121
    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
122
    0x16B1C | EXCLUSION,  // Hmng
123
    0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
124
    0,
125
    0xA984 | LIMITED_USE,  // Java
126
    0xA90A | LIMITED_USE,  // Kali
127
    0,
128
    0,
129
    0x1C00 | LIMITED_USE,  // Lepc
130
    0x10647 | EXCLUSION,  // Lina
131
    0x0840 | LIMITED_USE | RTL,  // Mand
132
    0,
133
    0x10980 | EXCLUSION | RTL,  // Mero
134
    0x07CA | LIMITED_USE | RTL,  // Nkoo
135
    0x10C00 | EXCLUSION | RTL,  // Orkh
136
    0x1036B | EXCLUSION,  // Perm
137
    0xA840 | EXCLUSION,  // Phag
138
    0x10900 | EXCLUSION | RTL,  // Phnx
139
    0x16F00 | LIMITED_USE,  // Plrd
140
    0,
141
    0,
142
    0,
143
    0,
144
    0,
145
    0,
146
    0xA549 | LIMITED_USE,  // Vaii
147
    0,
148
    0x12000 | EXCLUSION,  // Xsux
149
    0,
150
    0xFDD0 | UNKNOWN,  // Zzzz
151
    0x102A0 | EXCLUSION,  // Cari
152
    0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
153
    0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
154
    0x10280 | EXCLUSION,  // Lyci
155
    0x10920 | EXCLUSION | RTL,  // Lydi
156
    0x1C5A | LIMITED_USE,  // Olck
157
    0xA930 | EXCLUSION,  // Rjng
158
    0xA882 | LIMITED_USE,  // Saur
159
    0x1D850 | EXCLUSION,  // Sgnw
160
    0x1B83 | LIMITED_USE,  // Sund
161
    0,
162
    0xABC0 | LIMITED_USE,  // Mtei
163
    0x10840 | EXCLUSION | RTL,  // Armi
164
    0x10B00 | EXCLUSION | RTL,  // Avst
165
    0x11103 | LIMITED_USE,  // Cakm
166
    0xAC00 | RECOMMENDED,  // Kore
167
    0x11083 | EXCLUSION,  // Kthi
168
    0x10AD8 | EXCLUSION | RTL,  // Mani
169
    0x10B60 | EXCLUSION | RTL,  // Phli
170
    0x10B8F | EXCLUSION | RTL,  // Phlp
171
    0,
172
    0x10B40 | EXCLUSION | RTL,  // Prti
173
    0x0800 | EXCLUSION | RTL,  // Samr
174
    0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
175
    0,
176
    0,
177
    0xA6A0 | LIMITED_USE,  // Bamu
178
    0xA4D0 | LIMITED_USE,  // Lisu
179
    0,
180
    0x10A60 | EXCLUSION | RTL,  // Sarb
181
    0x16AE6 | EXCLUSION,  // Bass
182
    0x1BC20 | EXCLUSION,  // Dupl
183
    0x10500 | EXCLUSION,  // Elba
184
    0x11315 | EXCLUSION,  // Gran
185
    0,
186
    0,
187
    0x1E802 | EXCLUSION | RTL,  // Mend
188
    0x109A0 | EXCLUSION | RTL,  // Merc
189
    0x10A95 | EXCLUSION | RTL,  // Narb
190
    0x10896 | EXCLUSION | RTL,  // Nbat
191
    0x10873 | EXCLUSION | RTL,  // Palm
192
    0x112BE | EXCLUSION,  // Sind
193
    0x118B4 | EXCLUSION | CASED,  // Wara
194
    0,
195
    0,
196
    0x16A4F | EXCLUSION,  // Mroo
197
    0x1B1C4 | EXCLUSION | LB_LETTERS,  // Nshu
198
    0x11183 | EXCLUSION,  // Shrd
199
    0x110D0 | EXCLUSION,  // Sora
200
    0x11680 | EXCLUSION,  // Takr
201
    0x18229 | EXCLUSION | LB_LETTERS,  // Tang
202
    0,
203
    0x14400 | EXCLUSION,  // Hluw
204
    0x11208 | EXCLUSION,  // Khoj
205
    0x11484 | EXCLUSION,  // Tirh
206
    0x10537 | EXCLUSION,  // Aghb
207
    0x11152 | EXCLUSION,  // Mahj
208
    0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
209
    0x108F4 | EXCLUSION | RTL,  // Hatr
210
    0x1160E | EXCLUSION,  // Modi
211
    0x1128F | EXCLUSION,  // Mult
212
    0x11AC0 | EXCLUSION,  // Pauc
213
    0x1158E | EXCLUSION,  // Sidd
214
    0x1E909 | LIMITED_USE | RTL | CASED,  // Adlm
215
    0x11C0E | EXCLUSION,  // Bhks
216
    0x11C72 | EXCLUSION,  // Marc
217
    0x11412 | LIMITED_USE,  // Newa
218
    0x104B5 | LIMITED_USE | CASED,  // Osge
219
    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hanb
220
    0x1112 | RECOMMENDED,  // Jamo
221
    0,
222
    0x11D10 | EXCLUSION,  // Gonm
223
    0x11A5C | EXCLUSION,  // Soyo
224
    0x11A0B | EXCLUSION,  // Zanb
225
    0x1180B | EXCLUSION,  // Dogr
226
    0x11D71 | LIMITED_USE,  // Gong
227
    0x11EE5 | EXCLUSION,  // Maka
228
    0x16E40 | EXCLUSION | CASED,  // Medf
229
    0x10D12 | LIMITED_USE | RTL,  // Rohg
230
    0x10F42 | EXCLUSION | RTL,  // Sogd
231
    0x10F19 | EXCLUSION | RTL,  // Sogo
232
    0x10FF1 | EXCLUSION | RTL,  // Elym
233
    0x1E108 | LIMITED_USE,  // Hmnp
234
    0x119CE | EXCLUSION,  // Nand
235
    0x1E2E1 | LIMITED_USE,  // Wcho
236
    0x10FBF | EXCLUSION | RTL,  // Chrs
237
    0x1190C | EXCLUSION,  // Diak
238
    0x18C65 | EXCLUSION | LB_LETTERS,  // Kits
239
    0x10E88 | EXCLUSION | RTL,  // Yezi
240
    0x12FE5 | EXCLUSION,  // Cpmn
241
    0x10F7C | EXCLUSION | RTL,  // Ougr
242
    0x16ABC | EXCLUSION,  // Tnsa
243
    0x1E290 | EXCLUSION,  // Toto
244
    0x10582 | EXCLUSION | CASED,  // Vith
245
    // End copy-paste from parsescriptmetadata.py
246
};
247
248
0
int32_t getScriptProps(UScriptCode script) {
249
0
    if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
250
0
        return SCRIPT_PROPS[script];
251
0
    } else {
252
0
        return 0;
253
0
    }
254
0
}
255
256
}  // namespace
257
258
U_CAPI int32_t U_EXPORT2
259
0
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
260
0
    if(U_FAILURE(*pErrorCode)) { return 0; }
261
0
    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
262
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
263
0
        return 0;
264
0
    }
265
0
    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
266
0
    int32_t length;
267
0
    if(sampleChar == 0) {
268
0
        length = 0;
269
0
    } else {
270
0
        length = U16_LENGTH(sampleChar);
271
0
        if(length <= capacity) {
272
0
            int32_t i = 0;
273
0
            U16_APPEND_UNSAFE(dest, i, sampleChar);
274
0
        }
275
0
    }
276
0
    return u_terminateUChars(dest, capacity, length, pErrorCode);
277
0
}
278
279
U_COMMON_API icu::UnicodeString U_EXPORT2
280
0
uscript_getSampleUnicodeString(UScriptCode script) {
281
0
    icu::UnicodeString sample;
282
0
    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
283
0
    if(sampleChar != 0) {
284
0
        sample.append(sampleChar);
285
0
    }
286
0
    return sample;
287
0
}
288
289
U_CAPI UScriptUsage U_EXPORT2
290
0
uscript_getUsage(UScriptCode script) {
291
0
    return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
292
0
}
293
294
U_CAPI UBool U_EXPORT2
295
0
uscript_isRightToLeft(UScriptCode script) {
296
0
    return (getScriptProps(script) & RTL) != 0;
297
0
}
298
299
U_CAPI UBool U_EXPORT2
300
0
uscript_breaksBetweenLetters(UScriptCode script) {
301
0
    return (getScriptProps(script) & LB_LETTERS) != 0;
302
0
}
303
304
U_CAPI UBool U_EXPORT2
305
0
uscript_isCased(UScriptCode script) {
306
0
    return (getScriptProps(script) & CASED) != 0;
307
0
}