/src/mozilla-central/intl/icu/source/common/usprep.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * |
6 | | * Copyright (C) 2003-2016, International Business Machines |
7 | | * Corporation and others. All Rights Reserved. |
8 | | * |
9 | | ******************************************************************************* |
10 | | * file name: usprep.cpp |
11 | | * encoding: UTF-8 |
12 | | * tab size: 8 (not used) |
13 | | * indentation:4 |
14 | | * |
15 | | * created on: 2003jul2 |
16 | | * created by: Ram Viswanadha |
17 | | */ |
18 | | |
19 | | #include "unicode/utypes.h" |
20 | | |
21 | | #if !UCONFIG_NO_IDNA |
22 | | |
23 | | #include "unicode/usprep.h" |
24 | | |
25 | | #include "unicode/normalizer2.h" |
26 | | #include "unicode/ustring.h" |
27 | | #include "unicode/uchar.h" |
28 | | #include "unicode/uversion.h" |
29 | | #include "umutex.h" |
30 | | #include "cmemory.h" |
31 | | #include "sprpimpl.h" |
32 | | #include "ustr_imp.h" |
33 | | #include "uhash.h" |
34 | | #include "cstring.h" |
35 | | #include "udataswp.h" |
36 | | #include "ucln_cmn.h" |
37 | | #include "ubidi_props.h" |
38 | | #include "uprops.h" |
39 | | |
40 | | U_NAMESPACE_USE |
41 | | |
42 | | U_CDECL_BEGIN |
43 | | |
44 | | /* |
45 | | Static cache for already opened StringPrep profiles |
46 | | */ |
47 | | static UHashtable *SHARED_DATA_HASHTABLE = NULL; |
48 | | static icu::UInitOnce gSharedDataInitOnce; |
49 | | |
50 | | static UMutex usprepMutex = U_MUTEX_INITIALIZER; |
51 | | |
52 | | /* format version of spp file */ |
53 | | //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; |
54 | | |
55 | | /* the Unicode version of the sprep data */ |
56 | | static UVersionInfo dataVersion={ 0, 0, 0, 0 }; |
57 | | |
58 | | /* Profile names must be aligned to UStringPrepProfileType */ |
59 | | static const char * const PROFILE_NAMES[] = { |
60 | | "rfc3491", /* USPREP_RFC3491_NAMEPREP */ |
61 | | "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ |
62 | | "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ |
63 | | "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ |
64 | | "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ |
65 | | "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ |
66 | | "rfc3722", /* USPREP_RFC3722_ISCSI */ |
67 | | "rfc3920node", /* USPREP_RFC3920_NODEPREP */ |
68 | | "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ |
69 | | "rfc4011", /* USPREP_RFC4011_MIB */ |
70 | | "rfc4013", /* USPREP_RFC4013_SASLPREP */ |
71 | | "rfc4505", /* USPREP_RFC4505_TRACE */ |
72 | | "rfc4518", /* USPREP_RFC4518_LDAP */ |
73 | | "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ |
74 | | }; |
75 | | |
76 | | static UBool U_CALLCONV |
77 | | isSPrepAcceptable(void * /* context */, |
78 | | const char * /* type */, |
79 | | const char * /* name */, |
80 | 0 | const UDataInfo *pInfo) { |
81 | 0 | if( |
82 | 0 | pInfo->size>=20 && |
83 | 0 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && |
84 | 0 | pInfo->charsetFamily==U_CHARSET_FAMILY && |
85 | 0 | pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ |
86 | 0 | pInfo->dataFormat[1]==0x50 && |
87 | 0 | pInfo->dataFormat[2]==0x52 && |
88 | 0 | pInfo->dataFormat[3]==0x50 && |
89 | 0 | pInfo->formatVersion[0]==3 && |
90 | 0 | pInfo->formatVersion[2]==UTRIE_SHIFT && |
91 | 0 | pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT |
92 | 0 | ) { |
93 | 0 | //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); |
94 | 0 | uprv_memcpy(dataVersion, pInfo->dataVersion, 4); |
95 | 0 | return TRUE; |
96 | 0 | } else { |
97 | 0 | return FALSE; |
98 | 0 | } |
99 | 0 | } |
100 | | |
101 | | static int32_t U_CALLCONV |
102 | 0 | getSPrepFoldingOffset(uint32_t data) { |
103 | 0 |
|
104 | 0 | return (int32_t)data; |
105 | 0 |
|
106 | 0 | } |
107 | | |
108 | | /* hashes an entry */ |
109 | | static int32_t U_CALLCONV |
110 | 0 | hashEntry(const UHashTok parm) { |
111 | 0 | UStringPrepKey *b = (UStringPrepKey *)parm.pointer; |
112 | 0 | UHashTok namekey, pathkey; |
113 | 0 | namekey.pointer = b->name; |
114 | 0 | pathkey.pointer = b->path; |
115 | 0 | return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); |
116 | 0 | } |
117 | | |
118 | | /* compares two entries */ |
119 | | static UBool U_CALLCONV |
120 | 0 | compareEntries(const UHashTok p1, const UHashTok p2) { |
121 | 0 | UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; |
122 | 0 | UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; |
123 | 0 | UHashTok name1, name2, path1, path2; |
124 | 0 | name1.pointer = b1->name; |
125 | 0 | name2.pointer = b2->name; |
126 | 0 | path1.pointer = b1->path; |
127 | 0 | path2.pointer = b2->path; |
128 | 0 | return ((UBool)(uhash_compareChars(name1, name2) & |
129 | 0 | uhash_compareChars(path1, path2))); |
130 | 0 | } |
131 | | |
132 | | static void |
133 | 0 | usprep_unload(UStringPrepProfile* data){ |
134 | 0 | udata_close(data->sprepData); |
135 | 0 | } |
136 | | |
137 | | static int32_t |
138 | 0 | usprep_internal_flushCache(UBool noRefCount){ |
139 | 0 | UStringPrepProfile *profile = NULL; |
140 | 0 | UStringPrepKey *key = NULL; |
141 | 0 | int32_t pos = UHASH_FIRST; |
142 | 0 | int32_t deletedNum = 0; |
143 | 0 | const UHashElement *e; |
144 | 0 |
|
145 | 0 | /* |
146 | 0 | * if shared data hasn't even been lazy evaluated yet |
147 | 0 | * return 0 |
148 | 0 | */ |
149 | 0 | umtx_lock(&usprepMutex); |
150 | 0 | if (SHARED_DATA_HASHTABLE == NULL) { |
151 | 0 | umtx_unlock(&usprepMutex); |
152 | 0 | return 0; |
153 | 0 | } |
154 | 0 |
|
155 | 0 | /*creates an enumeration to iterate through every element in the table */ |
156 | 0 | while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) |
157 | 0 | { |
158 | 0 | profile = (UStringPrepProfile *) e->value.pointer; |
159 | 0 | key = (UStringPrepKey *) e->key.pointer; |
160 | 0 |
|
161 | 0 | if ((noRefCount== FALSE && profile->refCount == 0) || |
162 | 0 | noRefCount== TRUE) { |
163 | 0 | deletedNum++; |
164 | 0 | uhash_removeElement(SHARED_DATA_HASHTABLE, e); |
165 | 0 |
|
166 | 0 | /* unload the data */ |
167 | 0 | usprep_unload(profile); |
168 | 0 |
|
169 | 0 | if(key->name != NULL) { |
170 | 0 | uprv_free(key->name); |
171 | 0 | key->name=NULL; |
172 | 0 | } |
173 | 0 | if(key->path != NULL) { |
174 | 0 | uprv_free(key->path); |
175 | 0 | key->path=NULL; |
176 | 0 | } |
177 | 0 | uprv_free(profile); |
178 | 0 | uprv_free(key); |
179 | 0 | } |
180 | 0 | |
181 | 0 | } |
182 | 0 | umtx_unlock(&usprepMutex); |
183 | 0 |
|
184 | 0 | return deletedNum; |
185 | 0 | } |
186 | | |
187 | | /* Works just like ucnv_flushCache() |
188 | | static int32_t |
189 | | usprep_flushCache(){ |
190 | | return usprep_internal_flushCache(FALSE); |
191 | | } |
192 | | */ |
193 | | |
194 | 0 | static UBool U_CALLCONV usprep_cleanup(void){ |
195 | 0 | if (SHARED_DATA_HASHTABLE != NULL) { |
196 | 0 | usprep_internal_flushCache(TRUE); |
197 | 0 | if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { |
198 | 0 | uhash_close(SHARED_DATA_HASHTABLE); |
199 | 0 | SHARED_DATA_HASHTABLE = NULL; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | gSharedDataInitOnce.reset(); |
203 | 0 | return (SHARED_DATA_HASHTABLE == NULL); |
204 | 0 | } |
205 | | U_CDECL_END |
206 | | |
207 | | |
208 | | /** Initializes the cache for resources */ |
209 | | static void U_CALLCONV |
210 | 0 | createCache(UErrorCode &status) { |
211 | 0 | SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); |
212 | 0 | if (U_FAILURE(status)) { |
213 | 0 | SHARED_DATA_HASHTABLE = NULL; |
214 | 0 | } |
215 | 0 | ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); |
216 | 0 | } |
217 | | |
218 | | static void |
219 | 0 | initCache(UErrorCode *status) { |
220 | 0 | umtx_initOnce(gSharedDataInitOnce, &createCache, *status); |
221 | 0 | } |
222 | | |
223 | | static UBool U_CALLCONV |
224 | | loadData(UStringPrepProfile* profile, |
225 | | const char* path, |
226 | | const char* name, |
227 | | const char* type, |
228 | 0 | UErrorCode* errorCode) { |
229 | 0 | /* load Unicode SPREP data from file */ |
230 | 0 | UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; |
231 | 0 | UDataMemory *dataMemory; |
232 | 0 | const int32_t *p=NULL; |
233 | 0 | const uint8_t *pb; |
234 | 0 | UVersionInfo normUnicodeVersion; |
235 | 0 | int32_t normUniVer, sprepUniVer, normCorrVer; |
236 | 0 |
|
237 | 0 | if(errorCode==NULL || U_FAILURE(*errorCode)) { |
238 | 0 | return 0; |
239 | 0 | } |
240 | 0 | |
241 | 0 | /* open the data outside the mutex block */ |
242 | 0 | //TODO: change the path |
243 | 0 | dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); |
244 | 0 | if(U_FAILURE(*errorCode)) { |
245 | 0 | return FALSE; |
246 | 0 | } |
247 | 0 |
|
248 | 0 | p=(const int32_t *)udata_getMemory(dataMemory); |
249 | 0 | pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); |
250 | 0 | utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); |
251 | 0 | _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; |
252 | 0 |
|
253 | 0 |
|
254 | 0 | if(U_FAILURE(*errorCode)) { |
255 | 0 | udata_close(dataMemory); |
256 | 0 | return FALSE; |
257 | 0 | } |
258 | 0 |
|
259 | 0 | /* in the mutex block, set the data for this process */ |
260 | 0 | umtx_lock(&usprepMutex); |
261 | 0 | if(profile->sprepData==NULL) { |
262 | 0 | profile->sprepData=dataMemory; |
263 | 0 | dataMemory=NULL; |
264 | 0 | uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); |
265 | 0 | uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); |
266 | 0 | } else { |
267 | 0 | p=(const int32_t *)udata_getMemory(profile->sprepData); |
268 | 0 | } |
269 | 0 | umtx_unlock(&usprepMutex); |
270 | 0 | /* initialize some variables */ |
271 | 0 | profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); |
272 | 0 | |
273 | 0 | u_getUnicodeVersion(normUnicodeVersion); |
274 | 0 | normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + |
275 | 0 | (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); |
276 | 0 | sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + |
277 | 0 | (dataVersion[2] << 8 ) + (dataVersion[3]); |
278 | 0 | normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; |
279 | 0 | |
280 | 0 | if(U_FAILURE(*errorCode)){ |
281 | 0 | udata_close(dataMemory); |
282 | 0 | return FALSE; |
283 | 0 | } |
284 | 0 | if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ |
285 | 0 | normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ |
286 | 0 | ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ |
287 | 0 | ){ |
288 | 0 | *errorCode = U_INVALID_FORMAT_ERROR; |
289 | 0 | udata_close(dataMemory); |
290 | 0 | return FALSE; |
291 | 0 | } |
292 | 0 | profile->isDataLoaded = TRUE; |
293 | 0 |
|
294 | 0 | /* if a different thread set it first, then close the extra data */ |
295 | 0 | if(dataMemory!=NULL) { |
296 | 0 | udata_close(dataMemory); /* NULL if it was set correctly */ |
297 | 0 | } |
298 | 0 |
|
299 | 0 |
|
300 | 0 | return profile->isDataLoaded; |
301 | 0 | } |
302 | | |
303 | | static UStringPrepProfile* |
304 | | usprep_getProfile(const char* path, |
305 | | const char* name, |
306 | 0 | UErrorCode *status){ |
307 | 0 |
|
308 | 0 | UStringPrepProfile* profile = NULL; |
309 | 0 |
|
310 | 0 | initCache(status); |
311 | 0 |
|
312 | 0 | if(U_FAILURE(*status)){ |
313 | 0 | return NULL; |
314 | 0 | } |
315 | 0 | |
316 | 0 | UStringPrepKey stackKey; |
317 | 0 | /* |
318 | 0 | * const is cast way to save malloc, strcpy and free calls |
319 | 0 | * we use the passed in pointers for fetching the data from the |
320 | 0 | * hash table which is safe |
321 | 0 | */ |
322 | 0 | stackKey.name = (char*) name; |
323 | 0 | stackKey.path = (char*) path; |
324 | 0 |
|
325 | 0 | /* fetch the data from the cache */ |
326 | 0 | umtx_lock(&usprepMutex); |
327 | 0 | profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); |
328 | 0 | if(profile != NULL) { |
329 | 0 | profile->refCount++; |
330 | 0 | } |
331 | 0 | umtx_unlock(&usprepMutex); |
332 | 0 | |
333 | 0 | if(profile == NULL) { |
334 | 0 | /* else load the data and put the data in the cache */ |
335 | 0 | LocalMemory<UStringPrepProfile> newProfile; |
336 | 0 | if(newProfile.allocateInsteadAndReset() == NULL) { |
337 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
338 | 0 | return NULL; |
339 | 0 | } |
340 | 0 | |
341 | 0 | /* load the data */ |
342 | 0 | if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ |
343 | 0 | return NULL; |
344 | 0 | } |
345 | 0 | |
346 | 0 | /* get the options */ |
347 | 0 | newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); |
348 | 0 | newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); |
349 | 0 |
|
350 | 0 | LocalMemory<UStringPrepKey> key; |
351 | 0 | LocalMemory<char> keyName; |
352 | 0 | LocalMemory<char> keyPath; |
353 | 0 | if( key.allocateInsteadAndReset() == NULL || |
354 | 0 | keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || |
355 | 0 | (path != NULL && |
356 | 0 | keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) |
357 | 0 | ) { |
358 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
359 | 0 | usprep_unload(newProfile.getAlias()); |
360 | 0 | return NULL; |
361 | 0 | } |
362 | 0 | |
363 | 0 | umtx_lock(&usprepMutex); |
364 | 0 | // If another thread already inserted the same key/value, refcount and cleanup our thread data |
365 | 0 | profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); |
366 | 0 | if(profile != NULL) { |
367 | 0 | profile->refCount++; |
368 | 0 | usprep_unload(newProfile.getAlias()); |
369 | 0 | } |
370 | 0 | else { |
371 | 0 | /* initialize the key members */ |
372 | 0 | key->name = keyName.orphan(); |
373 | 0 | uprv_strcpy(key->name, name); |
374 | 0 | if(path != NULL){ |
375 | 0 | key->path = keyPath.orphan(); |
376 | 0 | uprv_strcpy(key->path, path); |
377 | 0 | } |
378 | 0 | profile = newProfile.orphan(); |
379 | 0 | |
380 | 0 | /* add the data object to the cache */ |
381 | 0 | profile->refCount = 1; |
382 | 0 | uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); |
383 | 0 | } |
384 | 0 | umtx_unlock(&usprepMutex); |
385 | 0 | } |
386 | 0 |
|
387 | 0 | return profile; |
388 | 0 | } |
389 | | |
390 | | U_CAPI UStringPrepProfile* U_EXPORT2 |
391 | | usprep_open(const char* path, |
392 | | const char* name, |
393 | 0 | UErrorCode* status){ |
394 | 0 |
|
395 | 0 | if(status == NULL || U_FAILURE(*status)){ |
396 | 0 | return NULL; |
397 | 0 | } |
398 | 0 | |
399 | 0 | /* initialize the profile struct members */ |
400 | 0 | return usprep_getProfile(path,name,status); |
401 | 0 | } |
402 | | |
403 | | U_CAPI UStringPrepProfile* U_EXPORT2 |
404 | | usprep_openByType(UStringPrepProfileType type, |
405 | 0 | UErrorCode* status) { |
406 | 0 | if(status == NULL || U_FAILURE(*status)){ |
407 | 0 | return NULL; |
408 | 0 | } |
409 | 0 | int32_t index = (int32_t)type; |
410 | 0 | if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) { |
411 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
412 | 0 | return NULL; |
413 | 0 | } |
414 | 0 | return usprep_open(NULL, PROFILE_NAMES[index], status); |
415 | 0 | } |
416 | | |
417 | | U_CAPI void U_EXPORT2 |
418 | 0 | usprep_close(UStringPrepProfile* profile){ |
419 | 0 | if(profile==NULL){ |
420 | 0 | return; |
421 | 0 | } |
422 | 0 | |
423 | 0 | umtx_lock(&usprepMutex); |
424 | 0 | /* decrement the ref count*/ |
425 | 0 | if(profile->refCount > 0){ |
426 | 0 | profile->refCount--; |
427 | 0 | } |
428 | 0 | umtx_unlock(&usprepMutex); |
429 | 0 | |
430 | 0 | } |
431 | | |
432 | | U_CFUNC void |
433 | | uprv_syntaxError(const UChar* rules, |
434 | | int32_t pos, |
435 | | int32_t rulesLen, |
436 | 0 | UParseError* parseError){ |
437 | 0 | if(parseError == NULL){ |
438 | 0 | return; |
439 | 0 | } |
440 | 0 | parseError->offset = pos; |
441 | 0 | parseError->line = 0 ; // we are not using line numbers |
442 | 0 | |
443 | 0 | // for pre-context |
444 | 0 | int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); |
445 | 0 | int32_t limit = pos; |
446 | 0 | |
447 | 0 | u_memcpy(parseError->preContext,rules+start,limit-start); |
448 | 0 | //null terminate the buffer |
449 | 0 | parseError->preContext[limit-start] = 0; |
450 | 0 | |
451 | 0 | // for post-context; include error rules[pos] |
452 | 0 | start = pos; |
453 | 0 | limit = start + (U_PARSE_CONTEXT_LEN-1); |
454 | 0 | if (limit > rulesLen) { |
455 | 0 | limit = rulesLen; |
456 | 0 | } |
457 | 0 | if (start < rulesLen) { |
458 | 0 | u_memcpy(parseError->postContext,rules+start,limit-start); |
459 | 0 | } |
460 | 0 | //null terminate the buffer |
461 | 0 | parseError->postContext[limit-start]= 0; |
462 | 0 | } |
463 | | |
464 | | |
465 | | static inline UStringPrepType |
466 | 0 | getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ |
467 | 0 |
|
468 | 0 | UStringPrepType type; |
469 | 0 | if(trieWord == 0){ |
470 | 0 | /* |
471 | 0 | * Initial value stored in the mapping table |
472 | 0 | * just return USPREP_TYPE_LIMIT .. so that |
473 | 0 | * the source codepoint is copied to the destination |
474 | 0 | */ |
475 | 0 | type = USPREP_TYPE_LIMIT; |
476 | 0 | isIndex =FALSE; |
477 | 0 | value = 0; |
478 | 0 | }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ |
479 | 0 | type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); |
480 | 0 | isIndex =FALSE; |
481 | 0 | value = 0; |
482 | 0 | }else{ |
483 | 0 | /* get the type */ |
484 | 0 | type = USPREP_MAP; |
485 | 0 | /* ascertain if the value is index or delta */ |
486 | 0 | if(trieWord & 0x02){ |
487 | 0 | isIndex = TRUE; |
488 | 0 | value = trieWord >> 2; //mask off the lower 2 bits and shift |
489 | 0 | }else{ |
490 | 0 | isIndex = FALSE; |
491 | 0 | value = (int16_t)trieWord; |
492 | 0 | value = (value >> 2); |
493 | 0 | } |
494 | 0 | |
495 | 0 | if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ |
496 | 0 | type = USPREP_DELETE; |
497 | 0 | isIndex =FALSE; |
498 | 0 | value = 0; |
499 | 0 | } |
500 | 0 | } |
501 | 0 | return type; |
502 | 0 | } |
503 | | |
504 | | // TODO: change to writing to UnicodeString not UChar * |
505 | | static int32_t |
506 | | usprep_map( const UStringPrepProfile* profile, |
507 | | const UChar* src, int32_t srcLength, |
508 | | UChar* dest, int32_t destCapacity, |
509 | | int32_t options, |
510 | | UParseError* parseError, |
511 | 0 | UErrorCode* status ){ |
512 | 0 | |
513 | 0 | uint16_t result; |
514 | 0 | int32_t destIndex=0; |
515 | 0 | int32_t srcIndex; |
516 | 0 | UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); |
517 | 0 | UStringPrepType type; |
518 | 0 | int16_t value; |
519 | 0 | UBool isIndex; |
520 | 0 | const int32_t* indexes = profile->indexes; |
521 | 0 |
|
522 | 0 | // no error checking the caller check for error and arguments |
523 | 0 | // no string length check the caller finds out the string length |
524 | 0 |
|
525 | 0 | for(srcIndex=0;srcIndex<srcLength;){ |
526 | 0 | UChar32 ch; |
527 | 0 |
|
528 | 0 | U16_NEXT(src,srcIndex,srcLength,ch); |
529 | 0 | |
530 | 0 | result=0; |
531 | 0 |
|
532 | 0 | UTRIE_GET16(&profile->sprepTrie,ch,result); |
533 | 0 | |
534 | 0 | type = getValues(result, value, isIndex); |
535 | 0 |
|
536 | 0 | // check if the source codepoint is unassigned |
537 | 0 | if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ |
538 | 0 |
|
539 | 0 | uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); |
540 | 0 | *status = U_STRINGPREP_UNASSIGNED_ERROR; |
541 | 0 | return 0; |
542 | 0 | |
543 | 0 | }else if(type == USPREP_MAP){ |
544 | 0 | |
545 | 0 | int32_t index, length; |
546 | 0 |
|
547 | 0 | if(isIndex){ |
548 | 0 | index = value; |
549 | 0 | if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && |
550 | 0 | index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ |
551 | 0 | length = 1; |
552 | 0 | }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && |
553 | 0 | index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ |
554 | 0 | length = 2; |
555 | 0 | }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && |
556 | 0 | index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ |
557 | 0 | length = 3; |
558 | 0 | }else{ |
559 | 0 | length = profile->mappingData[index++]; |
560 | 0 | |
561 | 0 | } |
562 | 0 |
|
563 | 0 | /* copy mapping to destination */ |
564 | 0 | for(int32_t i=0; i< length; i++){ |
565 | 0 | if(destIndex < destCapacity ){ |
566 | 0 | dest[destIndex] = profile->mappingData[index+i]; |
567 | 0 | } |
568 | 0 | destIndex++; /* for pre-flighting */ |
569 | 0 | } |
570 | 0 | continue; |
571 | 0 | }else{ |
572 | 0 | // subtract the delta to arrive at the code point |
573 | 0 | ch -= value; |
574 | 0 | } |
575 | 0 |
|
576 | 0 | }else if(type==USPREP_DELETE){ |
577 | 0 | // just consume the codepoint and contine |
578 | 0 | continue; |
579 | 0 | } |
580 | 0 | //copy the code point into destination |
581 | 0 | if(ch <= 0xFFFF){ |
582 | 0 | if(destIndex < destCapacity ){ |
583 | 0 | dest[destIndex] = (UChar)ch; |
584 | 0 | } |
585 | 0 | destIndex++; |
586 | 0 | }else{ |
587 | 0 | if(destIndex+1 < destCapacity ){ |
588 | 0 | dest[destIndex] = U16_LEAD(ch); |
589 | 0 | dest[destIndex+1] = U16_TRAIL(ch); |
590 | 0 | } |
591 | 0 | destIndex +=2; |
592 | 0 | } |
593 | 0 | |
594 | 0 | } |
595 | 0 | |
596 | 0 | return u_terminateUChars(dest, destCapacity, destIndex, status); |
597 | 0 | } |
598 | | |
599 | | /* |
600 | | 1) Map -- For each character in the input, check if it has a mapping |
601 | | and, if so, replace it with its mapping. |
602 | | |
603 | | 2) Normalize -- Possibly normalize the result of step 1 using Unicode |
604 | | normalization. |
605 | | |
606 | | 3) Prohibit -- Check for any characters that are not allowed in the |
607 | | output. If any are found, return an error. |
608 | | |
609 | | 4) Check bidi -- Possibly check for right-to-left characters, and if |
610 | | any are found, make sure that the whole string satisfies the |
611 | | requirements for bidirectional strings. If the string does not |
612 | | satisfy the requirements for bidirectional strings, return an |
613 | | error. |
614 | | [Unicode3.2] defines several bidirectional categories; each character |
615 | | has one bidirectional category assigned to it. For the purposes of |
616 | | the requirements below, an "RandALCat character" is a character that |
617 | | has Unicode bidirectional categories "R" or "AL"; an "LCat character" |
618 | | is a character that has Unicode bidirectional category "L". Note |
619 | | |
620 | | |
621 | | that there are many characters which fall in neither of the above |
622 | | definitions; Latin digits (<U+0030> through <U+0039>) are examples of |
623 | | this because they have bidirectional category "EN". |
624 | | |
625 | | In any profile that specifies bidirectional character handling, all |
626 | | three of the following requirements MUST be met: |
627 | | |
628 | | 1) The characters in section 5.8 MUST be prohibited. |
629 | | |
630 | | 2) If a string contains any RandALCat character, the string MUST NOT |
631 | | contain any LCat character. |
632 | | |
633 | | 3) If a string contains any RandALCat character, a RandALCat |
634 | | character MUST be the first character of the string, and a |
635 | | RandALCat character MUST be the last character of the string. |
636 | | */ |
637 | | U_CAPI int32_t U_EXPORT2 |
638 | | usprep_prepare( const UStringPrepProfile* profile, |
639 | | const UChar* src, int32_t srcLength, |
640 | | UChar* dest, int32_t destCapacity, |
641 | | int32_t options, |
642 | | UParseError* parseError, |
643 | 0 | UErrorCode* status ){ |
644 | 0 |
|
645 | 0 | // check error status |
646 | 0 | if(U_FAILURE(*status)){ |
647 | 0 | return 0; |
648 | 0 | } |
649 | 0 | |
650 | 0 | //check arguments |
651 | 0 | if(profile==NULL || |
652 | 0 | (src==NULL ? srcLength!=0 : srcLength<-1) || |
653 | 0 | (dest==NULL ? destCapacity!=0 : destCapacity<0)) { |
654 | 0 | *status=U_ILLEGAL_ARGUMENT_ERROR; |
655 | 0 | return 0; |
656 | 0 | } |
657 | 0 | |
658 | 0 | //get the string length |
659 | 0 | if(srcLength < 0){ |
660 | 0 | srcLength = u_strlen(src); |
661 | 0 | } |
662 | 0 | // map |
663 | 0 | UnicodeString s1; |
664 | 0 | UChar *b1 = s1.getBuffer(srcLength); |
665 | 0 | if(b1==NULL){ |
666 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
667 | 0 | return 0; |
668 | 0 | } |
669 | 0 | int32_t b1Len = usprep_map(profile, src, srcLength, |
670 | 0 | b1, s1.getCapacity(), options, parseError, status); |
671 | 0 | s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); |
672 | 0 |
|
673 | 0 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
674 | 0 | // redo processing of string |
675 | 0 | /* we do not have enough room so grow the buffer*/ |
676 | 0 | b1 = s1.getBuffer(b1Len); |
677 | 0 | if(b1==NULL){ |
678 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
679 | 0 | return 0; |
680 | 0 | } |
681 | 0 | |
682 | 0 | *status = U_ZERO_ERROR; // reset error |
683 | 0 | b1Len = usprep_map(profile, src, srcLength, |
684 | 0 | b1, s1.getCapacity(), options, parseError, status); |
685 | 0 | s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); |
686 | 0 | } |
687 | 0 | if(U_FAILURE(*status)){ |
688 | 0 | return 0; |
689 | 0 | } |
690 | 0 | |
691 | 0 | // normalize |
692 | 0 | UnicodeString s2; |
693 | 0 | if(profile->doNFKC){ |
694 | 0 | const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); |
695 | 0 | FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); |
696 | 0 | if(U_FAILURE(*status)){ |
697 | 0 | return 0; |
698 | 0 | } |
699 | 0 | fn2.normalize(s1, s2, *status); |
700 | 0 | }else{ |
701 | 0 | s2.fastCopyFrom(s1); |
702 | 0 | } |
703 | 0 | if(U_FAILURE(*status)){ |
704 | 0 | return 0; |
705 | 0 | } |
706 | 0 | |
707 | 0 | // Prohibit and checkBiDi in one pass |
708 | 0 | const UChar *b2 = s2.getBuffer(); |
709 | 0 | int32_t b2Len = s2.length(); |
710 | 0 | UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; |
711 | 0 | UBool leftToRight=FALSE, rightToLeft=FALSE; |
712 | 0 | int32_t rtlPos =-1, ltrPos =-1; |
713 | 0 |
|
714 | 0 | for(int32_t b2Index=0; b2Index<b2Len;){ |
715 | 0 | UChar32 ch = 0; |
716 | 0 | U16_NEXT(b2, b2Index, b2Len, ch); |
717 | 0 |
|
718 | 0 | uint16_t result; |
719 | 0 | UTRIE_GET16(&profile->sprepTrie,ch,result); |
720 | 0 |
|
721 | 0 | int16_t value; |
722 | 0 | UBool isIndex; |
723 | 0 | UStringPrepType type = getValues(result, value, isIndex); |
724 | 0 |
|
725 | 0 | if( type == USPREP_PROHIBITED || |
726 | 0 | ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) |
727 | 0 | ){ |
728 | 0 | *status = U_STRINGPREP_PROHIBITED_ERROR; |
729 | 0 | uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); |
730 | 0 | return 0; |
731 | 0 | } |
732 | 0 |
|
733 | 0 | if(profile->checkBiDi) { |
734 | 0 | direction = ubidi_getClass(ch); |
735 | 0 | if(firstCharDir == U_CHAR_DIRECTION_COUNT){ |
736 | 0 | firstCharDir = direction; |
737 | 0 | } |
738 | 0 | if(direction == U_LEFT_TO_RIGHT){ |
739 | 0 | leftToRight = TRUE; |
740 | 0 | ltrPos = b2Index-1; |
741 | 0 | } |
742 | 0 | if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ |
743 | 0 | rightToLeft = TRUE; |
744 | 0 | rtlPos = b2Index-1; |
745 | 0 | } |
746 | 0 | } |
747 | 0 | } |
748 | 0 | if(profile->checkBiDi == TRUE){ |
749 | 0 | // satisfy 2 |
750 | 0 | if( leftToRight == TRUE && rightToLeft == TRUE){ |
751 | 0 | *status = U_STRINGPREP_CHECK_BIDI_ERROR; |
752 | 0 | uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); |
753 | 0 | return 0; |
754 | 0 | } |
755 | 0 |
|
756 | 0 | //satisfy 3 |
757 | 0 | if( rightToLeft == TRUE && |
758 | 0 | !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && |
759 | 0 | (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) |
760 | 0 | ){ |
761 | 0 | *status = U_STRINGPREP_CHECK_BIDI_ERROR; |
762 | 0 | uprv_syntaxError(b2, rtlPos, b2Len, parseError); |
763 | 0 | return FALSE; |
764 | 0 | } |
765 | 0 | } |
766 | 0 | return s2.extract(dest, destCapacity, *status); |
767 | 0 | } |
768 | | |
769 | | |
770 | | /* data swapping ------------------------------------------------------------ */ |
771 | | |
772 | | U_CAPI int32_t U_EXPORT2 |
773 | | usprep_swap(const UDataSwapper *ds, |
774 | | const void *inData, int32_t length, void *outData, |
775 | 0 | UErrorCode *pErrorCode) { |
776 | 0 | const UDataInfo *pInfo; |
777 | 0 | int32_t headerSize; |
778 | 0 |
|
779 | 0 | const uint8_t *inBytes; |
780 | 0 | uint8_t *outBytes; |
781 | 0 |
|
782 | 0 | const int32_t *inIndexes; |
783 | 0 | int32_t indexes[16]; |
784 | 0 |
|
785 | 0 | int32_t i, offset, count, size; |
786 | 0 |
|
787 | 0 | /* udata_swapDataHeader checks the arguments */ |
788 | 0 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
789 | 0 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
790 | 0 | return 0; |
791 | 0 | } |
792 | 0 | |
793 | 0 | /* check data format and format version */ |
794 | 0 | pInfo=(const UDataInfo *)((const char *)inData+4); |
795 | 0 | if(!( |
796 | 0 | pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ |
797 | 0 | pInfo->dataFormat[1]==0x50 && |
798 | 0 | pInfo->dataFormat[2]==0x52 && |
799 | 0 | pInfo->dataFormat[3]==0x50 && |
800 | 0 | pInfo->formatVersion[0]==3 |
801 | 0 | )) { |
802 | 0 | udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", |
803 | 0 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
804 | 0 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
805 | 0 | pInfo->formatVersion[0]); |
806 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
807 | 0 | return 0; |
808 | 0 | } |
809 | 0 |
|
810 | 0 | inBytes=(const uint8_t *)inData+headerSize; |
811 | 0 | outBytes=(uint8_t *)outData+headerSize; |
812 | 0 |
|
813 | 0 | inIndexes=(const int32_t *)inBytes; |
814 | 0 |
|
815 | 0 | if(length>=0) { |
816 | 0 | length-=headerSize; |
817 | 0 | if(length<16*4) { |
818 | 0 | udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", |
819 | 0 | length); |
820 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
821 | 0 | return 0; |
822 | 0 | } |
823 | 0 | } |
824 | 0 |
|
825 | 0 | /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ |
826 | 0 | for(i=0; i<16; ++i) { |
827 | 0 | indexes[i]=udata_readInt32(ds, inIndexes[i]); |
828 | 0 | } |
829 | 0 |
|
830 | 0 | /* calculate the total length of the data */ |
831 | 0 | size= |
832 | 0 | 16*4+ /* size of indexes[] */ |
833 | 0 | indexes[_SPREP_INDEX_TRIE_SIZE]+ |
834 | 0 | indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; |
835 | 0 |
|
836 | 0 | if(length>=0) { |
837 | 0 | if(length<size) { |
838 | 0 | udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", |
839 | 0 | length); |
840 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
841 | 0 | return 0; |
842 | 0 | } |
843 | 0 |
|
844 | 0 | /* copy the data for inaccessible bytes */ |
845 | 0 | if(inBytes!=outBytes) { |
846 | 0 | uprv_memcpy(outBytes, inBytes, size); |
847 | 0 | } |
848 | 0 |
|
849 | 0 | offset=0; |
850 | 0 |
|
851 | 0 | /* swap the int32_t indexes[] */ |
852 | 0 | count=16*4; |
853 | 0 | ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); |
854 | 0 | offset+=count; |
855 | 0 |
|
856 | 0 | /* swap the UTrie */ |
857 | 0 | count=indexes[_SPREP_INDEX_TRIE_SIZE]; |
858 | 0 | utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
859 | 0 | offset+=count; |
860 | 0 |
|
861 | 0 | /* swap the uint16_t mappingTable[] */ |
862 | 0 | count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; |
863 | 0 | ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
864 | 0 | //offset+=count; |
865 | 0 | } |
866 | 0 |
|
867 | 0 | return headerSize+size; |
868 | 0 | } |
869 | | |
870 | | #endif /* #if !UCONFIG_NO_IDNA */ |