Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/usprep.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 *******************************************************************************
5
 *
6
 *   Copyright (C) 2003-2016, International Business Machines
7
 *   Corporation and others.  All Rights Reserved.
8
 *
9
 *******************************************************************************
10
 *   file name:  usprep.cpp
11
 *   encoding:   UTF-8
12
 *   tab size:   8 (not used)
13
 *   indentation:4
14
 *
15
 *   created on: 2003jul2
16
 *   created by: Ram Viswanadha
17
 */
18
19
#include "unicode/utypes.h"
20
21
#if !UCONFIG_NO_IDNA
22
23
#include "unicode/usprep.h"
24
25
#include "unicode/normalizer2.h"
26
#include "unicode/ustring.h"
27
#include "unicode/uchar.h"
28
#include "unicode/uversion.h"
29
#include "umutex.h"
30
#include "cmemory.h"
31
#include "sprpimpl.h"
32
#include "ustr_imp.h"
33
#include "uhash.h"
34
#include "cstring.h"
35
#include "udataswp.h"
36
#include "ucln_cmn.h"
37
#include "ubidi_props.h"
38
#include "uprops.h"
39
40
U_NAMESPACE_USE
41
42
U_CDECL_BEGIN
43
44
/*
45
Static cache for already opened StringPrep profiles
46
*/
47
static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48
static icu::UInitOnce gSharedDataInitOnce;
49
50
static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51
52
/* format version of spp file */
53
//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54
55
/* the Unicode version of the sprep data */
56
static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57
58
/* Profile names must be aligned to UStringPrepProfileType */
59
static const char * const PROFILE_NAMES[] = {
60
    "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
61
    "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
62
    "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63
    "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
64
    "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65
    "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66
    "rfc3722",      /* USPREP_RFC3722_ISCSI */
67
    "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
68
    "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
69
    "rfc4011",      /* USPREP_RFC4011_MIB */
70
    "rfc4013",      /* USPREP_RFC4013_SASLPREP */
71
    "rfc4505",      /* USPREP_RFC4505_TRACE */
72
    "rfc4518",      /* USPREP_RFC4518_LDAP */
73
    "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
74
};
75
76
static UBool U_CALLCONV
77
isSPrepAcceptable(void * /* context */,
78
             const char * /* type */, 
79
             const char * /* name */,
80
0
             const UDataInfo *pInfo) {
81
0
    if(
82
0
        pInfo->size>=20 &&
83
0
        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84
0
        pInfo->charsetFamily==U_CHARSET_FAMILY &&
85
0
        pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
86
0
        pInfo->dataFormat[1]==0x50 &&
87
0
        pInfo->dataFormat[2]==0x52 &&
88
0
        pInfo->dataFormat[3]==0x50 &&
89
0
        pInfo->formatVersion[0]==3 &&
90
0
        pInfo->formatVersion[2]==UTRIE_SHIFT &&
91
0
        pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92
0
    ) {
93
0
        //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94
0
        uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95
0
        return TRUE;
96
0
    } else {
97
0
        return FALSE;
98
0
    }
99
0
}
100
101
static int32_t U_CALLCONV
102
0
getSPrepFoldingOffset(uint32_t data) {
103
0
104
0
    return (int32_t)data;
105
0
106
0
}
107
108
/* hashes an entry  */
109
static int32_t U_CALLCONV 
110
0
hashEntry(const UHashTok parm) {
111
0
    UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112
0
    UHashTok namekey, pathkey;
113
0
    namekey.pointer = b->name;
114
0
    pathkey.pointer = b->path;
115
0
    return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
116
0
}
117
118
/* compares two entries */
119
static UBool U_CALLCONV 
120
0
compareEntries(const UHashTok p1, const UHashTok p2) {
121
0
    UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
122
0
    UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
123
0
    UHashTok name1, name2, path1, path2;
124
0
    name1.pointer = b1->name;
125
0
    name2.pointer = b2->name;
126
0
    path1.pointer = b1->path;
127
0
    path2.pointer = b2->path;
128
0
    return ((UBool)(uhash_compareChars(name1, name2) & 
129
0
        uhash_compareChars(path1, path2)));
130
0
}
131
132
static void 
133
0
usprep_unload(UStringPrepProfile* data){
134
0
    udata_close(data->sprepData);
135
0
}
136
137
static int32_t 
138
0
usprep_internal_flushCache(UBool noRefCount){
139
0
    UStringPrepProfile *profile = NULL;
140
0
    UStringPrepKey  *key  = NULL;
141
0
    int32_t pos = UHASH_FIRST;
142
0
    int32_t deletedNum = 0;
143
0
    const UHashElement *e;
144
0
145
0
    /*
146
0
     * if shared data hasn't even been lazy evaluated yet
147
0
     * return 0
148
0
     */
149
0
    umtx_lock(&usprepMutex);
150
0
    if (SHARED_DATA_HASHTABLE == NULL) {
151
0
        umtx_unlock(&usprepMutex);
152
0
        return 0;
153
0
    }
154
0
155
0
    /*creates an enumeration to iterate through every element in the table */
156
0
    while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
157
0
    {
158
0
        profile = (UStringPrepProfile *) e->value.pointer;
159
0
        key  = (UStringPrepKey *) e->key.pointer;
160
0
161
0
        if ((noRefCount== FALSE && profile->refCount == 0) || 
162
0
             noRefCount== TRUE) {
163
0
            deletedNum++;
164
0
            uhash_removeElement(SHARED_DATA_HASHTABLE, e);
165
0
166
0
            /* unload the data */
167
0
            usprep_unload(profile);
168
0
169
0
            if(key->name != NULL) {
170
0
                uprv_free(key->name);
171
0
                key->name=NULL;
172
0
            }
173
0
            if(key->path != NULL) {
174
0
                uprv_free(key->path);
175
0
                key->path=NULL;
176
0
            }
177
0
            uprv_free(profile);
178
0
            uprv_free(key);
179
0
        }
180
0
       
181
0
    }
182
0
    umtx_unlock(&usprepMutex);
183
0
184
0
    return deletedNum;
185
0
}
186
187
/* Works just like ucnv_flushCache() 
188
static int32_t 
189
usprep_flushCache(){
190
    return usprep_internal_flushCache(FALSE);
191
}
192
*/
193
194
0
static UBool U_CALLCONV usprep_cleanup(void){
195
0
    if (SHARED_DATA_HASHTABLE != NULL) {
196
0
        usprep_internal_flushCache(TRUE);
197
0
        if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
198
0
            uhash_close(SHARED_DATA_HASHTABLE);
199
0
            SHARED_DATA_HASHTABLE = NULL;
200
0
        }
201
0
    }
202
0
    gSharedDataInitOnce.reset();
203
0
    return (SHARED_DATA_HASHTABLE == NULL);
204
0
}
205
U_CDECL_END
206
207
208
/** Initializes the cache for resources */
209
static void U_CALLCONV
210
0
createCache(UErrorCode &status) {
211
0
    SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
212
0
    if (U_FAILURE(status)) {
213
0
        SHARED_DATA_HASHTABLE = NULL;
214
0
    }
215
0
    ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216
0
}
217
218
static void 
219
0
initCache(UErrorCode *status) {
220
0
    umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
221
0
}
222
223
static UBool U_CALLCONV
224
loadData(UStringPrepProfile* profile, 
225
         const char* path, 
226
         const char* name, 
227
         const char* type, 
228
0
         UErrorCode* errorCode) {
229
0
    /* load Unicode SPREP data from file */    
230
0
    UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
231
0
    UDataMemory *dataMemory;
232
0
    const int32_t *p=NULL;
233
0
    const uint8_t *pb;
234
0
    UVersionInfo normUnicodeVersion;
235
0
    int32_t normUniVer, sprepUniVer, normCorrVer;
236
0
237
0
    if(errorCode==NULL || U_FAILURE(*errorCode)) {
238
0
        return 0;
239
0
    }
240
0
241
0
    /* open the data outside the mutex block */
242
0
    //TODO: change the path
243
0
    dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
244
0
    if(U_FAILURE(*errorCode)) {
245
0
        return FALSE;
246
0
    }
247
0
248
0
    p=(const int32_t *)udata_getMemory(dataMemory);
249
0
    pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
250
0
    utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
251
0
    _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
252
0
253
0
254
0
    if(U_FAILURE(*errorCode)) {
255
0
        udata_close(dataMemory);
256
0
        return FALSE;
257
0
    }
258
0
259
0
    /* in the mutex block, set the data for this process */
260
0
    umtx_lock(&usprepMutex);
261
0
    if(profile->sprepData==NULL) {
262
0
        profile->sprepData=dataMemory;
263
0
        dataMemory=NULL;
264
0
        uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
265
0
        uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
266
0
    } else {
267
0
        p=(const int32_t *)udata_getMemory(profile->sprepData);
268
0
    }
269
0
    umtx_unlock(&usprepMutex);
270
0
    /* initialize some variables */
271
0
    profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
272
0
    
273
0
    u_getUnicodeVersion(normUnicodeVersion);
274
0
    normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 
275
0
                 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
276
0
    sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 
277
0
                  (dataVersion[2] << 8 ) + (dataVersion[3]);
278
0
    normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
279
0
    
280
0
    if(U_FAILURE(*errorCode)){
281
0
        udata_close(dataMemory);
282
0
        return FALSE;
283
0
    }
284
0
    if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285
0
        normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286
0
        ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
287
0
      ){
288
0
        *errorCode = U_INVALID_FORMAT_ERROR;
289
0
        udata_close(dataMemory);
290
0
        return FALSE;
291
0
    }
292
0
    profile->isDataLoaded = TRUE;
293
0
294
0
    /* if a different thread set it first, then close the extra data */
295
0
    if(dataMemory!=NULL) {
296
0
        udata_close(dataMemory); /* NULL if it was set correctly */
297
0
    }
298
0
299
0
300
0
    return profile->isDataLoaded;
301
0
}
302
303
static UStringPrepProfile* 
304
usprep_getProfile(const char* path, 
305
                  const char* name,
306
0
                  UErrorCode *status){
307
0
308
0
    UStringPrepProfile* profile = NULL;
309
0
310
0
    initCache(status);
311
0
312
0
    if(U_FAILURE(*status)){
313
0
        return NULL;
314
0
    }
315
0
316
0
    UStringPrepKey stackKey;
317
0
    /* 
318
0
     * const is cast way to save malloc, strcpy and free calls 
319
0
     * we use the passed in pointers for fetching the data from the 
320
0
     * hash table which is safe
321
0
     */
322
0
    stackKey.name = (char*) name;
323
0
    stackKey.path = (char*) path;
324
0
325
0
    /* fetch the data from the cache */
326
0
    umtx_lock(&usprepMutex);
327
0
    profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
328
0
    if(profile != NULL) {
329
0
        profile->refCount++;
330
0
    }
331
0
    umtx_unlock(&usprepMutex);
332
0
    
333
0
    if(profile == NULL) {
334
0
        /* else load the data and put the data in the cache */
335
0
        LocalMemory<UStringPrepProfile> newProfile;
336
0
        if(newProfile.allocateInsteadAndReset() == NULL) {
337
0
            *status = U_MEMORY_ALLOCATION_ERROR;
338
0
            return NULL;
339
0
        }
340
0
341
0
        /* load the data */
342
0
        if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
343
0
            return NULL;
344
0
        }
345
0
346
0
        /* get the options */
347
0
        newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
348
0
        newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
349
0
350
0
        LocalMemory<UStringPrepKey> key;
351
0
        LocalMemory<char> keyName;
352
0
        LocalMemory<char> keyPath;
353
0
        if( key.allocateInsteadAndReset() == NULL ||
354
0
            keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
355
0
            (path != NULL &&
356
0
             keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
357
0
         ) {
358
0
            *status = U_MEMORY_ALLOCATION_ERROR;
359
0
            usprep_unload(newProfile.getAlias());
360
0
            return NULL;
361
0
        }
362
0
363
0
        umtx_lock(&usprepMutex);
364
0
        // If another thread already inserted the same key/value, refcount and cleanup our thread data
365
0
        profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
366
0
        if(profile != NULL) {
367
0
            profile->refCount++;
368
0
            usprep_unload(newProfile.getAlias());
369
0
        }
370
0
        else {
371
0
            /* initialize the key members */
372
0
            key->name = keyName.orphan();
373
0
            uprv_strcpy(key->name, name);
374
0
            if(path != NULL){
375
0
                key->path = keyPath.orphan();
376
0
                uprv_strcpy(key->path, path);
377
0
            }        
378
0
            profile = newProfile.orphan();
379
0
    
380
0
            /* add the data object to the cache */
381
0
            profile->refCount = 1;
382
0
            uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
383
0
        }
384
0
        umtx_unlock(&usprepMutex);
385
0
    }
386
0
387
0
    return profile;
388
0
}
389
390
U_CAPI UStringPrepProfile* U_EXPORT2
391
usprep_open(const char* path, 
392
            const char* name,
393
0
            UErrorCode* status){
394
0
395
0
    if(status == NULL || U_FAILURE(*status)){
396
0
        return NULL;
397
0
    }
398
0
       
399
0
    /* initialize the profile struct members */
400
0
    return usprep_getProfile(path,name,status);
401
0
}
402
403
U_CAPI UStringPrepProfile* U_EXPORT2
404
usprep_openByType(UStringPrepProfileType type,
405
0
          UErrorCode* status) {
406
0
    if(status == NULL || U_FAILURE(*status)){
407
0
        return NULL;
408
0
    }
409
0
    int32_t index = (int32_t)type;
410
0
    if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
411
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
412
0
        return NULL;
413
0
    }
414
0
    return usprep_open(NULL, PROFILE_NAMES[index], status);
415
0
}
416
417
U_CAPI void U_EXPORT2
418
0
usprep_close(UStringPrepProfile* profile){
419
0
    if(profile==NULL){
420
0
        return;
421
0
    }
422
0
423
0
    umtx_lock(&usprepMutex);
424
0
    /* decrement the ref count*/
425
0
    if(profile->refCount > 0){
426
0
        profile->refCount--;
427
0
    }
428
0
    umtx_unlock(&usprepMutex);
429
0
    
430
0
}
431
432
U_CFUNC void 
433
uprv_syntaxError(const UChar* rules, 
434
                 int32_t pos,
435
                 int32_t rulesLen,
436
0
                 UParseError* parseError){
437
0
    if(parseError == NULL){
438
0
        return;
439
0
    }
440
0
    parseError->offset = pos;
441
0
    parseError->line = 0 ; // we are not using line numbers 
442
0
    
443
0
    // for pre-context
444
0
    int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
445
0
    int32_t limit = pos;
446
0
    
447
0
    u_memcpy(parseError->preContext,rules+start,limit-start);
448
0
    //null terminate the buffer
449
0
    parseError->preContext[limit-start] = 0;
450
0
    
451
0
    // for post-context; include error rules[pos]
452
0
    start = pos;
453
0
    limit = start + (U_PARSE_CONTEXT_LEN-1);
454
0
    if (limit > rulesLen) {
455
0
        limit = rulesLen;
456
0
    }
457
0
    if (start < rulesLen) {
458
0
        u_memcpy(parseError->postContext,rules+start,limit-start);
459
0
    }
460
0
    //null terminate the buffer
461
0
    parseError->postContext[limit-start]= 0;
462
0
}
463
464
465
static inline UStringPrepType
466
0
getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
467
0
468
0
    UStringPrepType type;
469
0
    if(trieWord == 0){
470
0
        /* 
471
0
         * Initial value stored in the mapping table 
472
0
         * just return USPREP_TYPE_LIMIT .. so that
473
0
         * the source codepoint is copied to the destination
474
0
         */
475
0
        type = USPREP_TYPE_LIMIT;
476
0
        isIndex =FALSE;
477
0
        value = 0;
478
0
    }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
479
0
        type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
480
0
        isIndex =FALSE;
481
0
        value = 0;
482
0
    }else{
483
0
        /* get the type */
484
0
        type = USPREP_MAP;
485
0
        /* ascertain if the value is index or delta */
486
0
        if(trieWord & 0x02){
487
0
            isIndex = TRUE;
488
0
            value = trieWord  >> 2; //mask off the lower 2 bits and shift
489
0
        }else{
490
0
            isIndex = FALSE;
491
0
            value = (int16_t)trieWord;
492
0
            value =  (value >> 2);
493
0
        }
494
0
 
495
0
        if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
496
0
            type = USPREP_DELETE;
497
0
            isIndex =FALSE;
498
0
            value = 0;
499
0
        }
500
0
    }
501
0
    return type;
502
0
}
503
504
// TODO: change to writing to UnicodeString not UChar *
505
static int32_t 
506
usprep_map(  const UStringPrepProfile* profile, 
507
             const UChar* src, int32_t srcLength, 
508
             UChar* dest, int32_t destCapacity,
509
             int32_t options,
510
             UParseError* parseError,
511
0
             UErrorCode* status ){
512
0
    
513
0
    uint16_t result;
514
0
    int32_t destIndex=0;
515
0
    int32_t srcIndex;
516
0
    UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
517
0
    UStringPrepType type;
518
0
    int16_t value;
519
0
    UBool isIndex;
520
0
    const int32_t* indexes = profile->indexes;
521
0
522
0
    // no error checking the caller check for error and arguments
523
0
    // no string length check the caller finds out the string length
524
0
525
0
    for(srcIndex=0;srcIndex<srcLength;){
526
0
        UChar32 ch;
527
0
528
0
        U16_NEXT(src,srcIndex,srcLength,ch);
529
0
        
530
0
        result=0;
531
0
532
0
        UTRIE_GET16(&profile->sprepTrie,ch,result);
533
0
        
534
0
        type = getValues(result, value, isIndex);
535
0
536
0
        // check if the source codepoint is unassigned
537
0
        if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
538
0
539
0
            uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
540
0
            *status = U_STRINGPREP_UNASSIGNED_ERROR;
541
0
            return 0;
542
0
            
543
0
        }else if(type == USPREP_MAP){
544
0
            
545
0
            int32_t index, length;
546
0
547
0
            if(isIndex){
548
0
                index = value;
549
0
                if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
550
0
                         index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
551
0
                    length = 1;
552
0
                }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
553
0
                         index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
554
0
                    length = 2;
555
0
                }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
556
0
                         index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
557
0
                    length = 3;
558
0
                }else{
559
0
                    length = profile->mappingData[index++];
560
0
         
561
0
                }
562
0
563
0
                /* copy mapping to destination */
564
0
                for(int32_t i=0; i< length; i++){
565
0
                    if(destIndex < destCapacity  ){
566
0
                        dest[destIndex] = profile->mappingData[index+i];
567
0
                    }
568
0
                    destIndex++; /* for pre-flighting */
569
0
                }  
570
0
                continue;
571
0
            }else{
572
0
                // subtract the delta to arrive at the code point
573
0
                ch -= value;
574
0
            }
575
0
576
0
        }else if(type==USPREP_DELETE){
577
0
             // just consume the codepoint and contine
578
0
            continue;
579
0
        }
580
0
        //copy the code point into destination
581
0
        if(ch <= 0xFFFF){
582
0
            if(destIndex < destCapacity ){
583
0
                dest[destIndex] = (UChar)ch;
584
0
            }
585
0
            destIndex++;
586
0
        }else{
587
0
            if(destIndex+1 < destCapacity ){
588
0
                dest[destIndex]   = U16_LEAD(ch);
589
0
                dest[destIndex+1] = U16_TRAIL(ch);
590
0
            }
591
0
            destIndex +=2;
592
0
        }
593
0
       
594
0
    }
595
0
        
596
0
    return u_terminateUChars(dest, destCapacity, destIndex, status);
597
0
}
598
599
/*
600
   1) Map -- For each character in the input, check if it has a mapping
601
      and, if so, replace it with its mapping.  
602
603
   2) Normalize -- Possibly normalize the result of step 1 using Unicode
604
      normalization. 
605
606
   3) Prohibit -- Check for any characters that are not allowed in the
607
      output.  If any are found, return an error.  
608
609
   4) Check bidi -- Possibly check for right-to-left characters, and if
610
      any are found, make sure that the whole string satisfies the
611
      requirements for bidirectional strings.  If the string does not
612
      satisfy the requirements for bidirectional strings, return an
613
      error.  
614
      [Unicode3.2] defines several bidirectional categories; each character
615
       has one bidirectional category assigned to it.  For the purposes of
616
       the requirements below, an "RandALCat character" is a character that
617
       has Unicode bidirectional categories "R" or "AL"; an "LCat character"
618
       is a character that has Unicode bidirectional category "L".  Note
619
620
621
       that there are many characters which fall in neither of the above
622
       definitions; Latin digits (<U+0030> through <U+0039>) are examples of
623
       this because they have bidirectional category "EN".
624
625
       In any profile that specifies bidirectional character handling, all
626
       three of the following requirements MUST be met:
627
628
       1) The characters in section 5.8 MUST be prohibited.
629
630
       2) If a string contains any RandALCat character, the string MUST NOT
631
          contain any LCat character.
632
633
       3) If a string contains any RandALCat character, a RandALCat
634
          character MUST be the first character of the string, and a
635
          RandALCat character MUST be the last character of the string.
636
*/
637
U_CAPI int32_t U_EXPORT2
638
usprep_prepare(   const UStringPrepProfile* profile,
639
                  const UChar* src, int32_t srcLength, 
640
                  UChar* dest, int32_t destCapacity,
641
                  int32_t options,
642
                  UParseError* parseError,
643
0
                  UErrorCode* status ){
644
0
645
0
    // check error status
646
0
    if(U_FAILURE(*status)){
647
0
        return 0;
648
0
    }
649
0
650
0
    //check arguments
651
0
    if(profile==NULL ||
652
0
            (src==NULL ? srcLength!=0 : srcLength<-1) ||
653
0
            (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
654
0
        *status=U_ILLEGAL_ARGUMENT_ERROR;
655
0
        return 0;
656
0
    }
657
0
658
0
    //get the string length
659
0
    if(srcLength < 0){
660
0
        srcLength = u_strlen(src);
661
0
    }
662
0
    // map
663
0
    UnicodeString s1;
664
0
    UChar *b1 = s1.getBuffer(srcLength);
665
0
    if(b1==NULL){
666
0
        *status = U_MEMORY_ALLOCATION_ERROR;
667
0
        return 0;
668
0
    }
669
0
    int32_t b1Len = usprep_map(profile, src, srcLength,
670
0
                               b1, s1.getCapacity(), options, parseError, status);
671
0
    s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
672
0
673
0
    if(*status == U_BUFFER_OVERFLOW_ERROR){
674
0
        // redo processing of string
675
0
        /* we do not have enough room so grow the buffer*/
676
0
        b1 = s1.getBuffer(b1Len);
677
0
        if(b1==NULL){
678
0
            *status = U_MEMORY_ALLOCATION_ERROR;
679
0
            return 0;
680
0
        }
681
0
682
0
        *status = U_ZERO_ERROR; // reset error
683
0
        b1Len = usprep_map(profile, src, srcLength,
684
0
                           b1, s1.getCapacity(), options, parseError, status);
685
0
        s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
686
0
    }
687
0
    if(U_FAILURE(*status)){
688
0
        return 0;
689
0
    }
690
0
691
0
    // normalize
692
0
    UnicodeString s2;
693
0
    if(profile->doNFKC){
694
0
        const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
695
0
        FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
696
0
        if(U_FAILURE(*status)){
697
0
            return 0;
698
0
        }
699
0
        fn2.normalize(s1, s2, *status);
700
0
    }else{
701
0
        s2.fastCopyFrom(s1);
702
0
    }
703
0
    if(U_FAILURE(*status)){
704
0
        return 0;
705
0
    }
706
0
707
0
    // Prohibit and checkBiDi in one pass
708
0
    const UChar *b2 = s2.getBuffer();
709
0
    int32_t b2Len = s2.length();
710
0
    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
711
0
    UBool leftToRight=FALSE, rightToLeft=FALSE;
712
0
    int32_t rtlPos =-1, ltrPos =-1;
713
0
714
0
    for(int32_t b2Index=0; b2Index<b2Len;){
715
0
        UChar32 ch = 0;
716
0
        U16_NEXT(b2, b2Index, b2Len, ch);
717
0
718
0
        uint16_t result;
719
0
        UTRIE_GET16(&profile->sprepTrie,ch,result);
720
0
721
0
        int16_t value;
722
0
        UBool isIndex;
723
0
        UStringPrepType type = getValues(result, value, isIndex);
724
0
725
0
        if( type == USPREP_PROHIBITED || 
726
0
            ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
727
0
           ){
728
0
            *status = U_STRINGPREP_PROHIBITED_ERROR;
729
0
            uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
730
0
            return 0;
731
0
        }
732
0
733
0
        if(profile->checkBiDi) {
734
0
            direction = ubidi_getClass(ch);
735
0
            if(firstCharDir == U_CHAR_DIRECTION_COUNT){
736
0
                firstCharDir = direction;
737
0
            }
738
0
            if(direction == U_LEFT_TO_RIGHT){
739
0
                leftToRight = TRUE;
740
0
                ltrPos = b2Index-1;
741
0
            }
742
0
            if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
743
0
                rightToLeft = TRUE;
744
0
                rtlPos = b2Index-1;
745
0
            }
746
0
        }
747
0
    }
748
0
    if(profile->checkBiDi == TRUE){
749
0
        // satisfy 2
750
0
        if( leftToRight == TRUE && rightToLeft == TRUE){
751
0
            *status = U_STRINGPREP_CHECK_BIDI_ERROR;
752
0
            uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
753
0
            return 0;
754
0
        }
755
0
756
0
        //satisfy 3
757
0
        if( rightToLeft == TRUE && 
758
0
            !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
759
0
              (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
760
0
           ){
761
0
            *status = U_STRINGPREP_CHECK_BIDI_ERROR;
762
0
            uprv_syntaxError(b2, rtlPos, b2Len, parseError);
763
0
            return FALSE;
764
0
        }
765
0
    }
766
0
    return s2.extract(dest, destCapacity, *status);
767
0
}
768
769
770
/* data swapping ------------------------------------------------------------ */
771
772
U_CAPI int32_t U_EXPORT2
773
usprep_swap(const UDataSwapper *ds,
774
            const void *inData, int32_t length, void *outData,
775
0
            UErrorCode *pErrorCode) {
776
0
    const UDataInfo *pInfo;
777
0
    int32_t headerSize;
778
0
779
0
    const uint8_t *inBytes;
780
0
    uint8_t *outBytes;
781
0
782
0
    const int32_t *inIndexes;
783
0
    int32_t indexes[16];
784
0
785
0
    int32_t i, offset, count, size;
786
0
787
0
    /* udata_swapDataHeader checks the arguments */
788
0
    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
789
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
790
0
        return 0;
791
0
    }
792
0
793
0
    /* check data format and format version */
794
0
    pInfo=(const UDataInfo *)((const char *)inData+4);
795
0
    if(!(
796
0
        pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
797
0
        pInfo->dataFormat[1]==0x50 &&
798
0
        pInfo->dataFormat[2]==0x52 &&
799
0
        pInfo->dataFormat[3]==0x50 &&
800
0
        pInfo->formatVersion[0]==3
801
0
    )) {
802
0
        udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
803
0
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
804
0
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
805
0
                         pInfo->formatVersion[0]);
806
0
        *pErrorCode=U_UNSUPPORTED_ERROR;
807
0
        return 0;
808
0
    }
809
0
810
0
    inBytes=(const uint8_t *)inData+headerSize;
811
0
    outBytes=(uint8_t *)outData+headerSize;
812
0
813
0
    inIndexes=(const int32_t *)inBytes;
814
0
815
0
    if(length>=0) {
816
0
        length-=headerSize;
817
0
        if(length<16*4) {
818
0
            udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
819
0
                             length);
820
0
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
821
0
            return 0;
822
0
        }
823
0
    }
824
0
825
0
    /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
826
0
    for(i=0; i<16; ++i) {
827
0
        indexes[i]=udata_readInt32(ds, inIndexes[i]);
828
0
    }
829
0
830
0
    /* calculate the total length of the data */
831
0
    size=
832
0
        16*4+ /* size of indexes[] */
833
0
        indexes[_SPREP_INDEX_TRIE_SIZE]+
834
0
        indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
835
0
836
0
    if(length>=0) {
837
0
        if(length<size) {
838
0
            udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
839
0
                             length);
840
0
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
841
0
            return 0;
842
0
        }
843
0
844
0
        /* copy the data for inaccessible bytes */
845
0
        if(inBytes!=outBytes) {
846
0
            uprv_memcpy(outBytes, inBytes, size);
847
0
        }
848
0
849
0
        offset=0;
850
0
851
0
        /* swap the int32_t indexes[] */
852
0
        count=16*4;
853
0
        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
854
0
        offset+=count;
855
0
856
0
        /* swap the UTrie */
857
0
        count=indexes[_SPREP_INDEX_TRIE_SIZE];
858
0
        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
859
0
        offset+=count;
860
0
861
0
        /* swap the uint16_t mappingTable[] */
862
0
        count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
863
0
        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
864
0
        //offset+=count;
865
0
    }
866
0
867
0
    return headerSize+size;
868
0
}
869
870
#endif /* #if !UCONFIG_NO_IDNA */