Coverage Report

Created: 2025-12-07 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/propname.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (c) 2002-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
* Author: Alan Liu
9
* Created: October 30 2002
10
* Since: ICU 2.4
11
* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
12
**********************************************************************
13
*/
14
#include "propname.h"
15
#include "unicode/uchar.h"
16
#include "unicode/udata.h"
17
#include "unicode/uscript.h"
18
#include "umutex.h"
19
#include "cmemory.h"
20
#include "cstring.h"
21
#include "uarrsort.h"
22
#include "uinvchar.h"
23
24
#define INCLUDED_FROM_PROPNAME_CPP
25
#include "propname_data.h"
26
27
U_CDECL_BEGIN
28
29
/**
30
 * Get the next non-ignorable ASCII character from a property name
31
 * and lowercases it.
32
 * @return ((advance count for the name)<<8)|character
33
 */
34
static inline int32_t
35
184k
getASCIIPropertyNameChar(const char *name) {
36
184k
    int32_t i;
37
184k
    char c;
38
39
    /* Ignore delimiters '-', '_', and ASCII White_Space */
40
184k
    for(i=0;
41
228k
        (c=name[i++])==0x2d || c==0x5f ||
42
218k
        c==0x20 || (0x09<=c && c<=0x0d);
43
184k
    ) {}
44
45
184k
    if(c!=0) {
46
157k
        return (i << 8) | static_cast<uint8_t>(uprv_asciitolower(c));
47
157k
    } else {
48
27.2k
        return i<<8;
49
27.2k
    }
50
184k
}
51
52
/**
53
 * Get the next non-ignorable EBCDIC character from a property name
54
 * and lowercases it.
55
 * @return ((advance count for the name)<<8)|character
56
 */
57
static inline int32_t
58
0
getEBCDICPropertyNameChar(const char *name) {
59
0
    int32_t i;
60
0
    char c;
61
62
    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
63
0
    for(i=0;
64
0
        (c=name[i++])==0x60 || c==0x6d ||
65
0
        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
66
0
    ) {}
67
68
0
    if(c!=0) {
69
0
        return (i << 8) | static_cast<uint8_t>(uprv_ebcdictolower(c));
70
0
    } else {
71
0
        return i<<8;
72
0
    }
73
0
}
74
75
/**
76
 * Unicode property names and property value names are compared "loosely".
77
 *
78
 * UCD.html 4.0.1 says:
79
 *   For all property names, property value names, and for property values for
80
 *   Enumerated, Binary, or Catalog properties, use the following
81
 *   loose matching rule:
82
 *
83
 *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
84
 *
85
 * This function does just that, for (char *) name strings.
86
 * It is almost identical to ucnv_compareNames() but also ignores
87
 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
88
 *
89
 * @internal
90
 */
91
92
U_CAPI int32_t U_EXPORT2
93
45.3k
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
94
45.3k
    int32_t rc, r1, r2;
95
96
92.3k
    for(;;) {
97
92.3k
        r1=getASCIIPropertyNameChar(name1);
98
92.3k
        r2=getASCIIPropertyNameChar(name2);
99
100
        /* If we reach the ends of both strings then they match */
101
92.3k
        if(((r1|r2)&0xff)==0) {
102
13.0k
            return 0;
103
13.0k
        }
104
105
        /* Compare the lowercased characters */
106
79.3k
        if(r1!=r2) {
107
35.9k
            rc=(r1&0xff)-(r2&0xff);
108
35.9k
            if(rc!=0) {
109
32.2k
                return rc;
110
32.2k
            }
111
35.9k
        }
112
113
47.0k
        name1+=r1>>8;
114
47.0k
        name2+=r2>>8;
115
47.0k
    }
116
45.3k
}
117
118
U_CAPI int32_t U_EXPORT2
119
0
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
120
0
    int32_t rc, r1, r2;
121
122
0
    for(;;) {
123
0
        r1=getEBCDICPropertyNameChar(name1);
124
0
        r2=getEBCDICPropertyNameChar(name2);
125
126
        /* If we reach the ends of both strings then they match */
127
0
        if(((r1|r2)&0xff)==0) {
128
0
            return 0;
129
0
        }
130
131
        /* Compare the lowercased characters */
132
0
        if(r1!=r2) {
133
0
            rc=(r1&0xff)-(r2&0xff);
134
0
            if(rc!=0) {
135
0
                return rc;
136
0
            }
137
0
        }
138
139
0
        name1+=r1>>8;
140
0
        name2+=r2>>8;
141
0
    }
142
0
}
143
144
U_CDECL_END
145
146
U_NAMESPACE_BEGIN
147
148
571k
int32_t PropNameData::findProperty(int32_t property) {
149
571k
    int32_t i=1;  // valueMaps index, initially after numRanges
150
1.33M
    for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
151
        // Read and skip the start and limit of this range.
152
1.33M
        int32_t start=valueMaps[i];
153
1.33M
        int32_t limit=valueMaps[i+1];
154
1.33M
        i+=2;
155
1.33M
        if(property<start) {
156
0
            break;
157
0
        }
158
1.33M
        if(property<limit) {
159
571k
            return i+(property-start)*2;
160
571k
        }
161
761k
        i+=(limit-start)*2;  // Skip all entries for this range.
162
761k
    }
163
0
    return 0;
164
571k
}
165
166
252k
int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
167
252k
    if(valueMapIndex==0) {
168
0
        return 0;  // The property does not have named values.
169
0
    }
170
252k
    ++valueMapIndex;  // Skip the BytesTrie offset.
171
252k
    int32_t numRanges=valueMaps[valueMapIndex++];
172
252k
    if(numRanges<0x10) {
173
        // Ranges of values.
174
252k
        for(; numRanges>0; --numRanges) {
175
            // Read and skip the start and limit of this range.
176
252k
            int32_t start=valueMaps[valueMapIndex];
177
252k
            int32_t limit=valueMaps[valueMapIndex+1];
178
252k
            valueMapIndex+=2;
179
252k
            if(value<start) {
180
0
                break;
181
0
            }
182
252k
            if(value<limit) {
183
252k
                return valueMaps[valueMapIndex+value-start];
184
252k
            }
185
0
            valueMapIndex+=limit-start;  // Skip all entries for this range.
186
0
        }
187
252k
    } else {
188
        // List of values.
189
0
        int32_t valuesStart=valueMapIndex;
190
0
        int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
191
0
        do {
192
0
            int32_t v=valueMaps[valueMapIndex];
193
0
            if(value<v) {
194
0
                break;
195
0
            }
196
0
            if(value==v) {
197
0
                return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
198
0
            }
199
0
        } while(++valueMapIndex<nameGroupOffsetsStart);
200
0
    }
201
0
    return 0;
202
252k
}
203
204
252k
const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
205
252k
    int32_t numNames=*nameGroup++;
206
252k
    if(nameIndex<0 || numNames<=nameIndex) {
207
0
        return nullptr;
208
0
    }
209
    // Skip nameIndex names.
210
252k
    for(; nameIndex>0; --nameIndex) {
211
0
        nameGroup=uprv_strchr(nameGroup, 0)+1;
212
0
    }
213
252k
    if(*nameGroup==0) {
214
0
        return nullptr;  // no name (Property[Value]Aliases.txt has "n/a")
215
0
    }
216
252k
    return nameGroup;
217
252k
}
218
219
466k
UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
220
466k
    if(name==nullptr) {
221
0
        return false;
222
0
    }
223
466k
    UStringTrieResult result=USTRINGTRIE_NO_VALUE;
224
466k
    char c;
225
1.54M
    while((c=*name++)!=0) {
226
1.15M
        c=uprv_invCharToLowercaseAscii(c);
227
        // Ignore delimiters '-', '_', and ASCII White_Space.
228
1.15M
        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
229
99.2k
            continue;
230
99.2k
        }
231
1.05M
        if(!USTRINGTRIE_HAS_NEXT(result)) {
232
80.8k
            return false;
233
80.8k
        }
234
974k
        result = trie.next(static_cast<uint8_t>(c));
235
974k
    }
236
385k
    return USTRINGTRIE_HAS_VALUE(result);
237
466k
}
238
239
0
const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
240
0
    int32_t valueMapIndex=findProperty(property);
241
0
    if(valueMapIndex==0) {
242
0
        return nullptr;  // Not a known property.
243
0
    }
244
0
    return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
245
0
}
246
247
252k
const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
248
252k
    int32_t valueMapIndex=findProperty(property);
249
252k
    if(valueMapIndex==0) {
250
0
        return nullptr;  // Not a known property.
251
0
    }
252
252k
    int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
253
252k
    if(nameGroupOffset==0) {
254
0
        return nullptr;
255
0
    }
256
252k
    return getName(nameGroups+nameGroupOffset, nameChoice);
257
252k
}
258
259
466k
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
260
466k
    BytesTrie trie(bytesTries+bytesTrieOffset);
261
466k
    if(containsName(trie, alias)) {
262
321k
        return trie.getValue();
263
321k
    } else {
264
145k
        return UCHAR_INVALID_CODE;
265
145k
    }
266
466k
}
267
268
147k
int32_t PropNameData::getPropertyEnum(const char *alias) {
269
147k
    return getPropertyOrValueEnum(0, alias);
270
147k
}
271
272
319k
int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
273
319k
    int32_t valueMapIndex=findProperty(property);
274
319k
    if(valueMapIndex==0) {
275
0
        return UCHAR_INVALID_CODE;  // Not a known property.
276
0
    }
277
319k
    valueMapIndex=valueMaps[valueMapIndex+1];
278
319k
    if(valueMapIndex==0) {
279
0
        return UCHAR_INVALID_CODE;  // The property does not have named values.
280
0
    }
281
    // valueMapIndex is the start of the property's valueMap,
282
    // where the first word is the BytesTrie offset.
283
319k
    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
284
319k
}
285
U_NAMESPACE_END
286
287
//----------------------------------------------------------------------
288
// Public API implementation
289
290
U_CAPI const char* U_EXPORT2
291
u_getPropertyName(UProperty property,
292
0
                  UPropertyNameChoice nameChoice) UPRV_NO_SANITIZE_UNDEFINED {
293
    // The nameChoice is really an integer with a couple of named constants.
294
    // Unicode allows for names other than short and long ones.
295
    // If present, these will be returned for U_LONG_PROPERTY_NAME + i, where i=1, 2,...
296
0
    U_NAMESPACE_USE
297
0
    return PropNameData::getPropertyName(property, nameChoice);
298
0
}
299
300
U_CAPI UProperty U_EXPORT2
301
147k
u_getPropertyEnum(const char* alias) {
302
147k
    U_NAMESPACE_USE
303
147k
    return (UProperty)PropNameData::getPropertyEnum(alias);
304
147k
}
305
306
U_CAPI const char* U_EXPORT2
307
u_getPropertyValueName(UProperty property,
308
                       int32_t value,
309
252k
                       UPropertyNameChoice nameChoice) UPRV_NO_SANITIZE_UNDEFINED {
310
    // The nameChoice is really an integer with a couple of named constants.
311
    // Unicode allows for names other than short and long ones.
312
    // If present, these will be returned for U_LONG_PROPERTY_NAME + i, where i=1, 2,...
313
252k
    U_NAMESPACE_USE
314
252k
    return PropNameData::getPropertyValueName(property, value, nameChoice);
315
252k
}
316
317
U_CAPI int32_t U_EXPORT2
318
u_getPropertyValueEnum(UProperty property,
319
319k
                       const char* alias) {
320
319k
    U_NAMESPACE_USE
321
319k
    return PropNameData::getPropertyValueEnum(property, alias);
322
319k
}
323
324
U_CAPI const char*  U_EXPORT2
325
0
uscript_getName(UScriptCode scriptCode){
326
0
    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
327
0
                                  U_LONG_PROPERTY_NAME);
328
0
}
329
330
U_CAPI const char*  U_EXPORT2
331
252k
uscript_getShortName(UScriptCode scriptCode){
332
252k
    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
333
252k
                                  U_SHORT_PROPERTY_NAME);
334
252k
}