Coverage Report

Created: 2024-04-24 06:23

/src/icu/source/common/ucnv_io.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1999-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*
11
*
12
*  ucnv_io.cpp:
13
*  initializes global variables and defines functions pertaining to converter 
14
*  name resolution aspect of the conversion code.
15
*
16
*   new implementation:
17
*
18
*   created on: 1999nov22
19
*   created by: Markus W. Scherer
20
*
21
*   Use the binary cnvalias.icu (created from convrtrs.txt) to work
22
*   with aliases for converter names.
23
*
24
*   Date        Name        Description
25
*   11/22/1999  markus      Created
26
*   06/28/2002  grhoten     Major overhaul of the converter alias design.
27
*                           Now an alias can map to different converters
28
*                           depending on the specified standard.
29
*******************************************************************************
30
*/
31
32
#include "unicode/utypes.h"
33
34
#if !UCONFIG_NO_CONVERSION
35
36
#include "unicode/ucnv.h"
37
#include "unicode/udata.h"
38
39
#include "umutex.h"
40
#include "uarrsort.h"
41
#include "uassert.h"
42
#include "udataswp.h"
43
#include "cstring.h"
44
#include "cmemory.h"
45
#include "ucnv_io.h"
46
#include "uenumimp.h"
47
#include "ucln_cmn.h"
48
49
/* Format of cnvalias.icu -----------------------------------------------------
50
 *
51
 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
52
 * This binary form contains several tables. All indexes are to uint16_t
53
 * units, and not to the bytes (uint8_t units). Addressing everything on
54
 * 16-bit boundaries allows us to store more information with small index
55
 * numbers, which are also 16-bit in size. The majority of the table (except
56
 * the string table) are 16-bit numbers.
57
 *
58
 * First there is the size of the Table of Contents (TOC). The TOC
59
 * entries contain the size of each section. In order to find the offset
60
 * you just need to sum up the previous offsets.
61
 * The TOC length and entries are an array of uint32_t values.
62
 * The first section after the TOC starts immediately after the TOC.
63
 *
64
 * 1) This section contains a list of converters. This list contains indexes
65
 * into the string table for the converter name. The index of this list is
66
 * also used by other sections, which are mentioned later on.
67
 * This list is not sorted.
68
 *
69
 * 2) This section contains a list of tags. This list contains indexes
70
 * into the string table for the tag name. The index of this list is
71
 * also used by other sections, which are mentioned later on.
72
 * This list is in priority order of standards.
73
 *
74
 * 3) This section contains a list of sorted unique aliases. This
75
 * list contains indexes into the string table for the alias name. The
76
 * index of this list is also used by other sections, like the 4th section.
77
 * The index for the 3rd and 4th section is used to get the
78
 * alias -> converter name mapping. Section 3 and 4 form a two column table.
79
 * Some of the most significant bits of each index may contain other
80
 * information (see findConverter for details).
81
 *
82
 * 4) This section contains a list of mapped converter names. Consider this
83
 * as a table that maps the 3rd section to the 1st section. This list contains
84
 * indexes into the 1st section. The index of this list is the same index in
85
 * the 3rd section. There is also some extra information in the high bits of
86
 * each converter index in this table. Currently it's only used to say that
87
 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
88
 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
89
 * the predigested form of the 5th section so that an alias lookup can be fast.
90
 *
91
 * 5) This section contains a 2D array with indexes to the 6th section. This
92
 * section is the full form of all alias mappings. The column index is the
93
 * index into the converter list (column header). The row index is the index
94
 * to tag list (row header). This 2D array is the top part a 3D array. The
95
 * third dimension is in the 6th section.
96
 *
97
 * 6) This is blob of variable length arrays. Each array starts with a size,
98
 * and is followed by indexes to alias names in the string table. This is
99
 * the third dimension to the section 5. No other section should be referencing
100
 * this section.
101
 *
102
 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
103
 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
104
 * what type of string normalization is used among other potential things in the
105
 * future.
106
 *
107
 * 8) This is the string table. All strings are indexed on an even address.
108
 * There are two reasons for this. First many chip architectures locate strings
109
 * faster on even address boundaries. Second, since all indexes are 16-bit
110
 * numbers, this string table can be 128KB in size instead of 64KB when we
111
 * only have strings starting on an even address.
112
 *
113
 * 9) When present this is a set of prenormalized strings from section 8. This
114
 * table contains normalized strings with the dashes and spaces stripped out,
115
 * and all strings lowercased. In the future, the options in section 7 may state
116
 * other types of normalization.
117
 *
118
 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
119
 * has a unique alias among all converters. That same alias can
120
 * be mentioned in other standards on different converters,
121
 * but only one alias per tag can be unique.
122
 *
123
 *
124
 *              Converter Names (Usually in TR22 form)
125
 *           -------------------------------------------.
126
 *     T    /                                          /|
127
 *     a   /                                          / |
128
 *     g  /                                          /  |
129
 *     s /                                          /   |
130
 *      /                                          /    |
131
 *      ------------------------------------------/     |
132
 *    A |                                         |     |
133
 *    l |                                         |     |
134
 *    i |                                         |    /
135
 *    a |                                         |   /
136
 *    s |                                         |  /
137
 *    e |                                         | /
138
 *    s |                                         |/
139
 *      -------------------------------------------
140
 *
141
 *
142
 *
143
 * Here is what it really looks like. It's like swiss cheese.
144
 * There are holes. Some converters aren't recognized by
145
 * a standard, or they are really old converters that the
146
 * standard doesn't recognize anymore.
147
 *
148
 *              Converter Names (Usually in TR22 form)
149
 *           -------------------------------------------.
150
 *     T    /##########################################/|
151
 *     a   /     #            #                       /#
152
 *     g  /  #      ##     ##     ### # ### ### ### #/
153
 *     s / #             #####  ####        ##  ## #/#
154
 *      / ### # # ##  #  #   #          ### # #   #/##
155
 *      ------------------------------------------/# #
156
 *    A |### # # ##  #  #   #          ### # #   #|# #
157
 *    l |# # #    #     #               ## #     #|# #
158
 *    i |# # #    #     #                #       #|#
159
 *    a |#                                       #|#
160
 *    s |                                        #|#
161
 *    e
162
 *    s
163
 *
164
 */
165
166
/**
167
 * Used by the UEnumeration API
168
 */
169
typedef struct UAliasContext {
170
    uint32_t listOffset;
171
    uint32_t listIdx;
172
} UAliasContext;
173
174
static const char DATA_NAME[] = "cnvalias";
175
static const char DATA_TYPE[] = "icu";
176
177
static UDataMemory *gAliasData=NULL;
178
static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
179
180
enum {
181
    tocLengthIndex=0,
182
    converterListIndex=1,
183
    tagListIndex=2,
184
    aliasListIndex=3,
185
    untaggedConvArrayIndex=4,
186
    taggedAliasArrayIndex=5,
187
    taggedAliasListsIndex=6,
188
    tableOptionsIndex=7,
189
    stringTableIndex=8,
190
    normalizedStringTableIndex=9,
191
    offsetsCount,    /* length of the swapper's temporary offsets[] */
192
    minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
193
};
194
195
static const UConverterAliasOptions defaultTableOptions = {
196
    UCNV_IO_UNNORMALIZED,
197
    0 /* containsCnvOptionInfo */
198
};
199
static UConverterAlias gMainTable;
200
201
0
#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
202
#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
203
204
static UBool U_CALLCONV
205
isAcceptable(void * /*context*/,
206
             const char * /*type*/, const char * /*name*/,
207
0
             const UDataInfo *pInfo) {
208
0
    return (UBool)(
209
0
        pInfo->size>=20 &&
210
0
        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
211
0
        pInfo->charsetFamily==U_CHARSET_FAMILY &&
212
0
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
213
0
        pInfo->dataFormat[1]==0x76 &&
214
0
        pInfo->dataFormat[2]==0x41 &&
215
0
        pInfo->dataFormat[3]==0x6c &&
216
0
        pInfo->formatVersion[0]==3);
217
0
}
218
219
static UBool U_CALLCONV ucnv_io_cleanup(void)
220
0
{
221
0
    if (gAliasData) {
222
0
        udata_close(gAliasData);
223
0
        gAliasData = NULL;
224
0
    }
225
0
    gAliasDataInitOnce.reset();
226
227
0
    uprv_memset(&gMainTable, 0, sizeof(gMainTable));
228
229
0
    return TRUE;                   /* Everything was cleaned up */
230
0
}
231
232
0
static void U_CALLCONV initAliasData(UErrorCode &errCode) {
233
0
    UDataMemory *data;
234
0
    const uint16_t *table;
235
0
    const uint32_t *sectionSizes;
236
0
    uint32_t tableStart;
237
0
    uint32_t currOffset;
238
239
0
    ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
240
241
0
    U_ASSERT(gAliasData == NULL);
242
0
    data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
243
0
    if(U_FAILURE(errCode)) {
244
0
        return;
245
0
    }
246
247
0
    sectionSizes = (const uint32_t *)udata_getMemory(data);
248
0
    table = (const uint16_t *)sectionSizes;
249
250
0
    tableStart      = sectionSizes[0];
251
0
    if (tableStart < minTocLength) {
252
0
        errCode = U_INVALID_FORMAT_ERROR;
253
0
        udata_close(data);
254
0
        return;
255
0
    }
256
0
    gAliasData = data;
257
258
0
    gMainTable.converterListSize      = sectionSizes[1];
259
0
    gMainTable.tagListSize            = sectionSizes[2];
260
0
    gMainTable.aliasListSize          = sectionSizes[3];
261
0
    gMainTable.untaggedConvArraySize  = sectionSizes[4];
262
0
    gMainTable.taggedAliasArraySize   = sectionSizes[5];
263
0
    gMainTable.taggedAliasListsSize   = sectionSizes[6];
264
0
    gMainTable.optionTableSize        = sectionSizes[7];
265
0
    gMainTable.stringTableSize        = sectionSizes[8];
266
267
0
    if (tableStart > 8) {
268
0
        gMainTable.normalizedStringTableSize = sectionSizes[9];
269
0
    }
270
271
0
    currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
272
0
    gMainTable.converterList = table + currOffset;
273
274
0
    currOffset += gMainTable.converterListSize;
275
0
    gMainTable.tagList = table + currOffset;
276
277
0
    currOffset += gMainTable.tagListSize;
278
0
    gMainTable.aliasList = table + currOffset;
279
280
0
    currOffset += gMainTable.aliasListSize;
281
0
    gMainTable.untaggedConvArray = table + currOffset;
282
283
0
    currOffset += gMainTable.untaggedConvArraySize;
284
0
    gMainTable.taggedAliasArray = table + currOffset;
285
286
    /* aliasLists is a 1's based array, but it has a padding character */
287
0
    currOffset += gMainTable.taggedAliasArraySize;
288
0
    gMainTable.taggedAliasLists = table + currOffset;
289
290
0
    currOffset += gMainTable.taggedAliasListsSize;
291
0
    if (gMainTable.optionTableSize > 0
292
0
        && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
293
0
    {
294
        /* Faster table */
295
0
        gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
296
0
    }
297
0
    else {
298
        /* Smaller table, or I can't handle this normalization mode!
299
        Use the original slower table lookup. */
300
0
        gMainTable.optionTable = &defaultTableOptions;
301
0
    }
302
303
0
    currOffset += gMainTable.optionTableSize;
304
0
    gMainTable.stringTable = table + currOffset;
305
306
0
    currOffset += gMainTable.stringTableSize;
307
0
    gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
308
0
        ? gMainTable.stringTable : (table + currOffset));
309
0
}
310
311
312
static UBool
313
0
haveAliasData(UErrorCode *pErrorCode) {
314
0
    umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
315
0
    return U_SUCCESS(*pErrorCode);
316
0
}
317
318
static inline UBool
319
0
isAlias(const char *alias, UErrorCode *pErrorCode) {
320
0
    if(alias==NULL) {
321
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
322
0
        return FALSE;
323
0
    }
324
0
    return (UBool)(*alias!=0);
325
0
}
326
327
0
static uint32_t getTagNumber(const char *tagname) {
328
0
    if (gMainTable.tagList) {
329
0
        uint32_t tagNum;
330
0
        for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
331
0
            if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
332
0
                return tagNum;
333
0
            }
334
0
        }
335
0
    }
336
337
0
    return UINT32_MAX;
338
0
}
339
340
/* character types relevant for ucnv_compareNames() */
341
enum {
342
    UIGNORE,
343
    ZERO,
344
    NONZERO,
345
    MINLETTER /* any values from here on are lowercase letter mappings */
346
};
347
348
/* character types for ASCII 00..7F */
349
static const uint8_t asciiTypes[128] = {
350
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353
    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
354
    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
355
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
356
    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
357
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
358
};
359
360
0
#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
361
362
/* character types for EBCDIC 80..FF */
363
static const uint8_t ebcdicTypes[128] = {
364
    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
365
    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
366
    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
367
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368
    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
369
    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
370
    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
371
    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
372
};
373
374
0
#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
375
376
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
377
0
#   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
378
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
379
#   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
380
#else
381
#   error U_CHARSET_FAMILY is not valid
382
#endif
383
384
385
/* @see ucnv_compareNames */
386
U_CAPI char * U_CALLCONV
387
0
ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
388
0
    char *dstItr = dst;
389
0
    uint8_t type, nextType;
390
0
    char c1;
391
0
    UBool afterDigit = FALSE;
392
393
0
    while ((c1 = *name++) != 0) {
394
0
        type = GET_ASCII_TYPE(c1);
395
0
        switch (type) {
396
0
        case UIGNORE:
397
0
            afterDigit = FALSE;
398
0
            continue; /* ignore all but letters and digits */
399
0
        case ZERO:
400
0
            if (!afterDigit) {
401
0
                nextType = GET_ASCII_TYPE(*name);
402
0
                if (nextType == ZERO || nextType == NONZERO) {
403
0
                    continue; /* ignore leading zero before another digit */
404
0
                }
405
0
            }
406
0
            break;
407
0
        case NONZERO:
408
0
            afterDigit = TRUE;
409
0
            break;
410
0
        default:
411
0
            c1 = (char)type; /* lowercased letter */
412
0
            afterDigit = FALSE;
413
0
            break;
414
0
        }
415
0
        *dstItr++ = c1;
416
0
    }
417
0
    *dstItr = 0;
418
0
    return dst;
419
0
}
420
421
U_CAPI char * U_CALLCONV
422
0
ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
423
0
    char *dstItr = dst;
424
0
    uint8_t type, nextType;
425
0
    char c1;
426
0
    UBool afterDigit = FALSE;
427
428
0
    while ((c1 = *name++) != 0) {
429
0
        type = GET_EBCDIC_TYPE(c1);
430
0
        switch (type) {
431
0
        case UIGNORE:
432
0
            afterDigit = FALSE;
433
0
            continue; /* ignore all but letters and digits */
434
0
        case ZERO:
435
0
            if (!afterDigit) {
436
0
                nextType = GET_EBCDIC_TYPE(*name);
437
0
                if (nextType == ZERO || nextType == NONZERO) {
438
0
                    continue; /* ignore leading zero before another digit */
439
0
                }
440
0
            }
441
0
            break;
442
0
        case NONZERO:
443
0
            afterDigit = TRUE;
444
0
            break;
445
0
        default:
446
0
            c1 = (char)type; /* lowercased letter */
447
0
            afterDigit = FALSE;
448
0
            break;
449
0
        }
450
0
        *dstItr++ = c1;
451
0
    }
452
0
    *dstItr = 0;
453
0
    return dst;
454
0
}
455
456
/**
457
 * Do a fuzzy compare of two converter/alias names.
458
 * The comparison is case-insensitive, ignores leading zeroes if they are not
459
 * followed by further digits, and ignores all but letters and digits.
460
 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
461
 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
462
 * at http://www.unicode.org/reports/tr22/
463
 *
464
 * This is a symmetrical (commutative) operation; order of arguments
465
 * is insignificant.  This is an important property for sorting the
466
 * list (when the list is preprocessed into binary form) and for
467
 * performing binary searches on it at run time.
468
 *
469
 * @param name1 a converter name or alias, zero-terminated
470
 * @param name2 a converter name or alias, zero-terminated
471
 * @return 0 if the names match, or a negative value if the name1
472
 * lexically precedes name2, or a positive value if the name1
473
 * lexically follows name2.
474
 *
475
 * @see ucnv_io_stripForCompare
476
 */
477
U_CAPI int U_EXPORT2
478
0
ucnv_compareNames(const char *name1, const char *name2) {
479
0
    int rc;
480
0
    uint8_t type, nextType;
481
0
    char c1, c2;
482
0
    UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
483
484
0
    for (;;) {
485
0
        while ((c1 = *name1++) != 0) {
486
0
            type = GET_CHAR_TYPE(c1);
487
0
            switch (type) {
488
0
            case UIGNORE:
489
0
                afterDigit1 = FALSE;
490
0
                continue; /* ignore all but letters and digits */
491
0
            case ZERO:
492
0
                if (!afterDigit1) {
493
0
                    nextType = GET_CHAR_TYPE(*name1);
494
0
                    if (nextType == ZERO || nextType == NONZERO) {
495
0
                        continue; /* ignore leading zero before another digit */
496
0
                    }
497
0
                }
498
0
                break;
499
0
            case NONZERO:
500
0
                afterDigit1 = TRUE;
501
0
                break;
502
0
            default:
503
0
                c1 = (char)type; /* lowercased letter */
504
0
                afterDigit1 = FALSE;
505
0
                break;
506
0
            }
507
0
            break; /* deliver c1 */
508
0
        }
509
0
        while ((c2 = *name2++) != 0) {
510
0
            type = GET_CHAR_TYPE(c2);
511
0
            switch (type) {
512
0
            case UIGNORE:
513
0
                afterDigit2 = FALSE;
514
0
                continue; /* ignore all but letters and digits */
515
0
            case ZERO:
516
0
                if (!afterDigit2) {
517
0
                    nextType = GET_CHAR_TYPE(*name2);
518
0
                    if (nextType == ZERO || nextType == NONZERO) {
519
0
                        continue; /* ignore leading zero before another digit */
520
0
                    }
521
0
                }
522
0
                break;
523
0
            case NONZERO:
524
0
                afterDigit2 = TRUE;
525
0
                break;
526
0
            default:
527
0
                c2 = (char)type; /* lowercased letter */
528
0
                afterDigit2 = FALSE;
529
0
                break;
530
0
            }
531
0
            break; /* deliver c2 */
532
0
        }
533
534
        /* If we reach the ends of both strings then they match */
535
0
        if ((c1|c2)==0) {
536
0
            return 0;
537
0
        }
538
539
        /* Case-insensitive comparison */
540
0
        rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
541
0
        if (rc != 0) {
542
0
            return rc;
543
0
        }
544
0
    }
545
0
}
546
547
/*
548
 * search for an alias
549
 * return the converter number index for gConverterList
550
 */
551
static inline uint32_t
552
0
findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
553
0
    uint32_t mid, start, limit;
554
0
    uint32_t lastMid;
555
0
    int result;
556
0
    int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
557
0
    char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
558
559
0
    if (!isUnnormalized) {
560
0
        if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
561
0
            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
562
0
            return UINT32_MAX;
563
0
        }
564
565
        /* Lower case and remove ignoreable characters. */
566
0
        ucnv_io_stripForCompare(strippedName, alias);
567
0
        alias = strippedName;
568
0
    }
569
570
    /* do a binary search for the alias */
571
0
    start = 0;
572
0
    limit = gMainTable.untaggedConvArraySize;
573
0
    mid = limit;
574
0
    lastMid = UINT32_MAX;
575
576
0
    for (;;) {
577
0
        mid = (uint32_t)((start + limit) / 2);
578
0
        if (lastMid == mid) {   /* Have we moved? */
579
0
            break;  /* We haven't moved, and it wasn't found. */
580
0
        }
581
0
        lastMid = mid;
582
0
        if (isUnnormalized) {
583
0
            result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
584
0
        }
585
0
        else {
586
0
            result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
587
0
        }
588
589
0
        if (result < 0) {
590
0
            limit = mid;
591
0
        } else if (result > 0) {
592
0
            start = mid;
593
0
        } else {
594
            /* Since the gencnval tool folds duplicates into one entry,
595
             * this alias in gAliasList is unique, but different standards
596
             * may map an alias to different converters.
597
             */
598
0
            if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
599
0
                *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
600
0
            }
601
            /* State whether the canonical converter name contains an option.
602
            This information is contained in this list in order to maintain backward & forward compatibility. */
603
0
            if (containsOption) {
604
0
                UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
605
0
                *containsOption = (UBool)((containsCnvOptionInfo
606
0
                    && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
607
0
                    || !containsCnvOptionInfo);
608
0
            }
609
0
            return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
610
0
        }
611
0
    }
612
613
0
    return UINT32_MAX;
614
0
}
615
616
/*
617
 * Is this alias in this list?
618
 * alias and listOffset should be non-NULL.
619
 */
620
static inline UBool
621
0
isAliasInList(const char *alias, uint32_t listOffset) {
622
0
    if (listOffset) {
623
0
        uint32_t currAlias;
624
0
        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
625
        /* +1 to skip listCount */
626
0
        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
627
0
        for (currAlias = 0; currAlias < listCount; currAlias++) {
628
0
            if (currList[currAlias]
629
0
                && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
630
0
            {
631
0
                return TRUE;
632
0
            }
633
0
        }
634
0
    }
635
0
    return FALSE;
636
0
}
637
638
/*
639
 * Search for an standard name of an alias (what is the default name
640
 * that this standard uses?)
641
 * return the listOffset for gTaggedAliasLists. If it's 0,
642
 * the it couldn't be found, but the parameters are valid.
643
 */
644
static uint32_t
645
0
findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
646
0
    uint32_t idx;
647
0
    uint32_t listOffset;
648
0
    uint32_t convNum;
649
0
    UErrorCode myErr = U_ZERO_ERROR;
650
0
    uint32_t tagNum = getTagNumber(standard);
651
652
    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
653
0
    convNum = findConverter(alias, NULL, &myErr);
654
0
    if (myErr != U_ZERO_ERROR) {
655
0
        *pErrorCode = myErr;
656
0
    }
657
658
0
    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
659
0
        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
660
0
        if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
661
0
            return listOffset;
662
0
        }
663
0
        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
664
            /* Uh Oh! They used an ambiguous alias.
665
               We have to search the whole swiss cheese starting
666
               at the highest standard affinity.
667
               This may take a while.
668
            */
669
0
            for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
670
0
                listOffset = gMainTable.taggedAliasArray[idx];
671
0
                if (listOffset && isAliasInList(alias, listOffset)) {
672
0
                    uint32_t currTagNum = idx/gMainTable.converterListSize;
673
0
                    uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
674
0
                    uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
675
0
                    if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
676
0
                        return tempListOffset;
677
0
                    }
678
                    /* else keep on looking */
679
                    /* We could speed this up by starting on the next row
680
                       because an alias is unique per row, right now.
681
                       This would change if alias versioning appears. */
682
0
                }
683
0
            }
684
            /* The standard doesn't know about the alias */
685
0
        }
686
        /* else no default name */
687
0
        return 0;
688
0
    }
689
    /* else converter or tag not found */
690
691
0
    return UINT32_MAX;
692
0
}
693
694
/* Return the canonical name */
695
static uint32_t
696
0
findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
697
0
    uint32_t idx;
698
0
    uint32_t listOffset;
699
0
    uint32_t convNum;
700
0
    UErrorCode myErr = U_ZERO_ERROR;
701
0
    uint32_t tagNum = getTagNumber(standard);
702
703
    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
704
0
    convNum = findConverter(alias, NULL, &myErr);
705
0
    if (myErr != U_ZERO_ERROR) {
706
0
        *pErrorCode = myErr;
707
0
    }
708
709
0
    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
710
0
        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
711
0
        if (listOffset && isAliasInList(alias, listOffset)) {
712
0
            return convNum;
713
0
        }
714
0
        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
715
            /* Uh Oh! They used an ambiguous alias.
716
               We have to search one slice of the swiss cheese.
717
               We search only in the requested tag, not the whole thing.
718
               This may take a while.
719
            */
720
0
            uint32_t convStart = (tagNum)*gMainTable.converterListSize;
721
0
            uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
722
0
            for (idx = convStart; idx < convLimit; idx++) {
723
0
                listOffset = gMainTable.taggedAliasArray[idx];
724
0
                if (listOffset && isAliasInList(alias, listOffset)) {
725
0
                    return idx-convStart;
726
0
                }
727
0
            }
728
            /* The standard doesn't know about the alias */
729
0
        }
730
        /* else no canonical name */
731
0
    }
732
    /* else converter or tag not found */
733
734
0
    return UINT32_MAX;
735
0
}
736
737
U_CAPI const char *
738
0
ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
739
0
    const char *aliasTmp = alias;
740
0
    int32_t i = 0;
741
0
    for (i = 0; i < 2; i++) {
742
0
        if (i == 1) {
743
            /*
744
             * After the first unsuccess converter lookup, check to see if
745
             * the name begins with 'x-'. If it does, strip it off and try
746
             * again.  This behaviour is similar to how ICU4J does it.
747
             */
748
0
            if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
749
0
                aliasTmp = aliasTmp+2;
750
0
            } else {
751
0
                break;
752
0
            }
753
0
        }
754
0
        if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
755
0
            uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
756
0
            if (convNum < gMainTable.converterListSize) {
757
0
                return GET_STRING(gMainTable.converterList[convNum]);
758
0
            }
759
            /* else converter not found */
760
0
        } else {
761
0
            break;
762
0
        }
763
0
    }
764
765
0
    return NULL;
766
0
}
767
768
U_CDECL_BEGIN
769
770
771
static int32_t U_CALLCONV
772
0
ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
773
0
    int32_t value = 0;
774
0
    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
775
0
    uint32_t listOffset = myContext->listOffset;
776
777
0
    if (listOffset) {
778
0
        value = gMainTable.taggedAliasLists[listOffset];
779
0
    }
780
0
    return value;
781
0
}
782
783
static const char * U_CALLCONV
784
ucnv_io_nextStandardAliases(UEnumeration *enumerator,
785
                            int32_t* resultLength,
786
                            UErrorCode * /*pErrorCode*/)
787
0
{
788
0
    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
789
0
    uint32_t listOffset = myContext->listOffset;
790
791
0
    if (listOffset) {
792
0
        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
793
0
        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
794
795
0
        if (myContext->listIdx < listCount) {
796
0
            const char *myStr = GET_STRING(currList[myContext->listIdx++]);
797
0
            if (resultLength) {
798
0
                *resultLength = (int32_t)uprv_strlen(myStr);
799
0
            }
800
0
            return myStr;
801
0
        }
802
0
    }
803
    /* Either we accessed a zero length list, or we enumerated too far. */
804
0
    if (resultLength) {
805
0
        *resultLength = 0;
806
0
    }
807
0
    return NULL;
808
0
}
809
810
static void U_CALLCONV
811
0
ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
812
0
    ((UAliasContext *)(enumerator->context))->listIdx = 0;
813
0
}
814
815
static void U_CALLCONV
816
0
ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
817
0
    uprv_free(enumerator->context);
818
0
    uprv_free(enumerator);
819
0
}
820
821
U_CDECL_END
822
823
/* Enumerate the aliases for the specified converter and standard tag */
824
static const UEnumeration gEnumAliases = {
825
    NULL,
826
    NULL,
827
    ucnv_io_closeUEnumeration,
828
    ucnv_io_countStandardAliases,
829
    uenum_unextDefault,
830
    ucnv_io_nextStandardAliases,
831
    ucnv_io_resetStandardAliases
832
};
833
834
U_CAPI UEnumeration * U_EXPORT2
835
ucnv_openStandardNames(const char *convName,
836
                       const char *standard,
837
                       UErrorCode *pErrorCode)
838
0
{
839
0
    UEnumeration *myEnum = NULL;
840
0
    if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
841
0
        uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
842
843
        /* When listOffset == 0, we want to acknowledge that the
844
           converter name and standard are okay, but there
845
           is nothing to enumerate. */
846
0
        if (listOffset < gMainTable.taggedAliasListsSize) {
847
0
            UAliasContext *myContext;
848
849
0
            myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
850
0
            if (myEnum == NULL) {
851
0
                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852
0
                return NULL;
853
0
            }
854
0
            uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
855
0
            myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
856
0
            if (myContext == NULL) {
857
0
                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
858
0
                uprv_free(myEnum);
859
0
                return NULL;
860
0
            }
861
0
            myContext->listOffset = listOffset;
862
0
            myContext->listIdx = 0;
863
0
            myEnum->context = myContext;
864
0
        }
865
        /* else converter or tag not found */
866
0
    }
867
0
    return myEnum;
868
0
}
869
870
static uint16_t
871
0
ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
872
0
    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
873
0
        uint32_t convNum = findConverter(alias, NULL, pErrorCode);
874
0
        if (convNum < gMainTable.converterListSize) {
875
            /* tagListNum - 1 is the ALL tag */
876
0
            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
877
878
0
            if (listOffset) {
879
0
                return gMainTable.taggedAliasLists[listOffset];
880
0
            }
881
            /* else this shouldn't happen. internal program error */
882
0
        }
883
        /* else converter not found */
884
0
    }
885
0
    return 0;
886
0
}
887
888
static uint16_t
889
0
ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
890
0
    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
891
0
        uint32_t currAlias;
892
0
        uint32_t convNum = findConverter(alias, NULL, pErrorCode);
893
0
        if (convNum < gMainTable.converterListSize) {
894
            /* tagListNum - 1 is the ALL tag */
895
0
            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
896
897
0
            if (listOffset) {
898
0
                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
899
                /* +1 to skip listCount */
900
0
                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
901
902
0
                for (currAlias = start; currAlias < listCount; currAlias++) {
903
0
                    aliases[currAlias] = GET_STRING(currList[currAlias]);
904
0
                }
905
0
            }
906
            /* else this shouldn't happen. internal program error */
907
0
        }
908
        /* else converter not found */
909
0
    }
910
0
    return 0;
911
0
}
912
913
static const char *
914
0
ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
915
0
    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
916
0
        uint32_t convNum = findConverter(alias, NULL, pErrorCode);
917
0
        if (convNum < gMainTable.converterListSize) {
918
            /* tagListNum - 1 is the ALL tag */
919
0
            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
920
921
0
            if (listOffset) {
922
0
                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
923
                /* +1 to skip listCount */
924
0
                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
925
926
0
                if (n < listCount)  {
927
0
                    return GET_STRING(currList[n]);
928
0
                }
929
0
                *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
930
0
            }
931
            /* else this shouldn't happen. internal program error */
932
0
        }
933
        /* else converter not found */
934
0
    }
935
0
    return NULL;
936
0
}
937
938
static uint16_t
939
0
ucnv_io_countStandards(UErrorCode *pErrorCode) {
940
0
    if (haveAliasData(pErrorCode)) {
941
        /* Don't include the empty list */
942
0
        return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
943
0
    }
944
945
0
    return 0;
946
0
}
947
948
U_CAPI const char * U_EXPORT2
949
0
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
950
0
    if (haveAliasData(pErrorCode)) {
951
0
        if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
952
0
            return GET_STRING(gMainTable.tagList[n]);
953
0
        }
954
0
        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
955
0
    }
956
957
0
    return NULL;
958
0
}
959
960
U_CAPI const char * U_EXPORT2
961
0
ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
962
0
    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
963
0
        uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
964
965
0
        if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
966
0
            const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
967
968
            /* Get the preferred name from this list */
969
0
            if (currList[0]) {
970
0
                return GET_STRING(currList[0]);
971
0
            }
972
            /* else someone screwed up the alias table. */
973
            /* *pErrorCode = U_INVALID_FORMAT_ERROR */
974
0
        }
975
0
    }
976
977
0
    return NULL;
978
0
}
979
980
U_CAPI uint16_t U_EXPORT2
981
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
982
0
{
983
0
    return ucnv_io_countAliases(alias, pErrorCode);
984
0
}
985
986
987
U_CAPI const char* U_EXPORT2
988
ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
989
0
{
990
0
    return ucnv_io_getAlias(alias, n, pErrorCode);
991
0
}
992
993
U_CAPI void U_EXPORT2
994
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
995
0
{
996
0
    ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
997
0
}
998
999
U_CAPI uint16_t U_EXPORT2
1000
ucnv_countStandards(void)
1001
0
{
1002
0
    UErrorCode err = U_ZERO_ERROR;
1003
0
    return ucnv_io_countStandards(&err);
1004
0
}
1005
1006
U_CAPI const char * U_EXPORT2
1007
0
ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1008
0
    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1009
0
        uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1010
1011
0
        if (convNum < gMainTable.converterListSize) {
1012
0
            return GET_STRING(gMainTable.converterList[convNum]);
1013
0
        }
1014
0
    }
1015
1016
0
    return NULL;
1017
0
}
1018
1019
U_CDECL_BEGIN
1020
1021
1022
static int32_t U_CALLCONV
1023
0
ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
1024
0
    return gMainTable.converterListSize;
1025
0
}
1026
1027
static const char * U_CALLCONV
1028
ucnv_io_nextAllConverters(UEnumeration *enumerator,
1029
                            int32_t* resultLength,
1030
                            UErrorCode * /*pErrorCode*/)
1031
0
{
1032
0
    uint16_t *myContext = (uint16_t *)(enumerator->context);
1033
1034
0
    if (*myContext < gMainTable.converterListSize) {
1035
0
        const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1036
0
        if (resultLength) {
1037
0
            *resultLength = (int32_t)uprv_strlen(myStr);
1038
0
        }
1039
0
        return myStr;
1040
0
    }
1041
    /* Either we accessed a zero length list, or we enumerated too far. */
1042
0
    if (resultLength) {
1043
0
        *resultLength = 0;
1044
0
    }
1045
0
    return NULL;
1046
0
}
1047
1048
static void U_CALLCONV
1049
0
ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
1050
0
    *((uint16_t *)(enumerator->context)) = 0;
1051
0
}
1052
U_CDECL_END
1053
static const UEnumeration gEnumAllConverters = {
1054
    NULL,
1055
    NULL,
1056
    ucnv_io_closeUEnumeration,
1057
    ucnv_io_countAllConverters,
1058
    uenum_unextDefault,
1059
    ucnv_io_nextAllConverters,
1060
    ucnv_io_resetAllConverters
1061
};
1062
1063
U_CAPI UEnumeration * U_EXPORT2
1064
0
ucnv_openAllNames(UErrorCode *pErrorCode) {
1065
0
    UEnumeration *myEnum = NULL;
1066
0
    if (haveAliasData(pErrorCode)) {
1067
0
        uint16_t *myContext;
1068
1069
0
        myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
1070
0
        if (myEnum == NULL) {
1071
0
            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1072
0
            return NULL;
1073
0
        }
1074
0
        uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1075
0
        myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
1076
0
        if (myContext == NULL) {
1077
0
            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1078
0
            uprv_free(myEnum);
1079
0
            return NULL;
1080
0
        }
1081
0
        *myContext = 0;
1082
0
        myEnum->context = myContext;
1083
0
    }
1084
0
    return myEnum;
1085
0
}
1086
1087
U_CAPI uint16_t
1088
0
ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1089
0
    if (haveAliasData(pErrorCode)) {
1090
0
        return (uint16_t)gMainTable.converterListSize;
1091
0
    }
1092
0
    return 0;
1093
0
}
1094
1095
/* alias table swapping ----------------------------------------------------- */
1096
1097
U_CDECL_BEGIN
1098
1099
typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1100
U_CDECL_END
1101
1102
1103
/*
1104
 * row of a temporary array
1105
 *
1106
 * gets platform-endian charset string indexes and sorting indexes;
1107
 * after sorting this array by strings, the actual arrays are permutated
1108
 * according to the sorting indexes
1109
 */
1110
typedef struct TempRow {
1111
    uint16_t strIndex, sortIndex;
1112
} TempRow;
1113
1114
typedef struct TempAliasTable {
1115
    const char *chars;
1116
    TempRow *rows;
1117
    uint16_t *resort;
1118
    StripForCompareFn *stripForCompare;
1119
} TempAliasTable;
1120
1121
enum {
1122
    STACK_ROW_CAPACITY=500
1123
};
1124
1125
static int32_t U_CALLCONV
1126
0
io_compareRows(const void *context, const void *left, const void *right) {
1127
0
    char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1128
0
         strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1129
1130
0
    TempAliasTable *tempTable=(TempAliasTable *)context;
1131
0
    const char *chars=tempTable->chars;
1132
1133
0
    return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1134
0
                                tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1135
0
}
1136
1137
U_CAPI int32_t U_EXPORT2
1138
ucnv_swapAliases(const UDataSwapper *ds,
1139
                 const void *inData, int32_t length, void *outData,
1140
0
                 UErrorCode *pErrorCode) {
1141
0
    const UDataInfo *pInfo;
1142
0
    int32_t headerSize;
1143
1144
0
    const uint16_t *inTable;
1145
0
    const uint32_t *inSectionSizes;
1146
0
    uint32_t toc[offsetsCount];
1147
0
    uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1148
0
    uint32_t i, count, tocLength, topOffset;
1149
1150
0
    TempRow rows[STACK_ROW_CAPACITY];
1151
0
    uint16_t resort[STACK_ROW_CAPACITY];
1152
0
    TempAliasTable tempTable;
1153
1154
    /* udata_swapDataHeader checks the arguments */
1155
0
    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1156
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1157
0
        return 0;
1158
0
    }
1159
1160
    /* check data format and format version */
1161
0
    pInfo=(const UDataInfo *)((const char *)inData+4);
1162
0
    if(!(
1163
0
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1164
0
        pInfo->dataFormat[1]==0x76 &&
1165
0
        pInfo->dataFormat[2]==0x41 &&
1166
0
        pInfo->dataFormat[3]==0x6c &&
1167
0
        pInfo->formatVersion[0]==3
1168
0
    )) {
1169
0
        udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1170
0
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
1171
0
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
1172
0
                         pInfo->formatVersion[0]);
1173
0
        *pErrorCode=U_UNSUPPORTED_ERROR;
1174
0
        return 0;
1175
0
    }
1176
1177
    /* an alias table must contain at least the table of contents array */
1178
0
    if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1179
0
        udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1180
0
                         length-headerSize);
1181
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1182
0
        return 0;
1183
0
    }
1184
1185
0
    inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1186
0
    inTable=(const uint16_t *)inSectionSizes;
1187
0
    uprv_memset(toc, 0, sizeof(toc));
1188
0
    toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1189
0
    if(tocLength<minTocLength || offsetsCount<=tocLength) {
1190
0
        udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1191
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
1192
0
        return 0;
1193
0
    }
1194
1195
    /* read the known part of the table of contents */
1196
0
    for(i=converterListIndex; i<=tocLength; ++i) {
1197
0
        toc[i]=ds->readUInt32(inSectionSizes[i]);
1198
0
    }
1199
1200
    /* compute offsets */
1201
0
    uprv_memset(offsets, 0, sizeof(offsets));
1202
0
    offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1203
0
    for(i=tagListIndex; i<=tocLength; ++i) {
1204
0
        offsets[i]=offsets[i-1]+toc[i-1];
1205
0
    }
1206
1207
    /* compute the overall size of the after-header data, in numbers of 16-bit units */
1208
0
    topOffset=offsets[i-1]+toc[i-1];
1209
1210
0
    if(length>=0) {
1211
0
        uint16_t *outTable;
1212
0
        const uint16_t *p, *p2;
1213
0
        uint16_t *q, *q2;
1214
0
        uint16_t oldIndex;
1215
1216
0
        if((length-headerSize)<(2*(int32_t)topOffset)) {
1217
0
            udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1218
0
                             length-headerSize);
1219
0
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1220
0
            return 0;
1221
0
        }
1222
1223
0
        outTable=(uint16_t *)((char *)outData+headerSize);
1224
1225
        /* swap the entire table of contents */
1226
0
        ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1227
1228
        /* swap unormalized strings & normalized strings */
1229
0
        ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1230
0
                             outTable+offsets[stringTableIndex], pErrorCode);
1231
0
        if(U_FAILURE(*pErrorCode)) {
1232
0
            udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1233
0
            return 0;
1234
0
        }
1235
1236
0
        if(ds->inCharset==ds->outCharset) {
1237
            /* no need to sort, just swap all 16-bit values together */
1238
0
            ds->swapArray16(ds,
1239
0
                            inTable+offsets[converterListIndex],
1240
0
                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1241
0
                            outTable+offsets[converterListIndex],
1242
0
                            pErrorCode);
1243
0
        } else {
1244
            /* allocate the temporary table for sorting */
1245
0
            count=toc[aliasListIndex];
1246
1247
0
            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1248
1249
0
            if(count<=STACK_ROW_CAPACITY) {
1250
0
                tempTable.rows=rows;
1251
0
                tempTable.resort=resort;
1252
0
            } else {
1253
0
                tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1254
0
                if(tempTable.rows==NULL) {
1255
0
                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1256
0
                                     count);
1257
0
                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1258
0
                    return 0;
1259
0
                }
1260
0
                tempTable.resort=(uint16_t *)(tempTable.rows+count);
1261
0
            }
1262
1263
0
            if(ds->outCharset==U_ASCII_FAMILY) {
1264
0
                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1265
0
            } else /* U_EBCDIC_FAMILY */ {
1266
0
                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1267
0
            }
1268
1269
            /*
1270
             * Sort unique aliases+mapped names.
1271
             *
1272
             * We need to sort the list again by outCharset strings because they
1273
             * sort differently for different charset families.
1274
             * First we set up a temporary table with the string indexes and
1275
             * sorting indexes and sort that.
1276
             * Then we permutate and copy/swap the actual values.
1277
             */
1278
0
            p=inTable+offsets[aliasListIndex];
1279
0
            q=outTable+offsets[aliasListIndex];
1280
1281
0
            p2=inTable+offsets[untaggedConvArrayIndex];
1282
0
            q2=outTable+offsets[untaggedConvArrayIndex];
1283
1284
0
            for(i=0; i<count; ++i) {
1285
0
                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1286
0
                tempTable.rows[i].sortIndex=(uint16_t)i;
1287
0
            }
1288
1289
0
            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1290
0
                           io_compareRows, &tempTable,
1291
0
                           FALSE, pErrorCode);
1292
1293
0
            if(U_SUCCESS(*pErrorCode)) {
1294
                /* copy/swap/permutate items */
1295
0
                if(p!=q) {
1296
0
                    for(i=0; i<count; ++i) {
1297
0
                        oldIndex=tempTable.rows[i].sortIndex;
1298
0
                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1299
0
                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1300
0
                    }
1301
0
                } else {
1302
                    /*
1303
                     * If we swap in-place, then the permutation must use another
1304
                     * temporary array (tempTable.resort)
1305
                     * before the results are copied to the outBundle.
1306
                     */
1307
0
                    uint16_t *r=tempTable.resort;
1308
1309
0
                    for(i=0; i<count; ++i) {
1310
0
                        oldIndex=tempTable.rows[i].sortIndex;
1311
0
                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1312
0
                    }
1313
0
                    uprv_memcpy(q, r, 2*(size_t)count);
1314
1315
0
                    for(i=0; i<count; ++i) {
1316
0
                        oldIndex=tempTable.rows[i].sortIndex;
1317
0
                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1318
0
                    }
1319
0
                    uprv_memcpy(q2, r, 2*(size_t)count);
1320
0
                }
1321
0
            }
1322
1323
0
            if(tempTable.rows!=rows) {
1324
0
                uprv_free(tempTable.rows);
1325
0
            }
1326
1327
0
            if(U_FAILURE(*pErrorCode)) {
1328
0
                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1329
0
                                 count);
1330
0
                return 0;
1331
0
            }
1332
1333
            /* swap remaining 16-bit values */
1334
0
            ds->swapArray16(ds,
1335
0
                            inTable+offsets[converterListIndex],
1336
0
                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1337
0
                            outTable+offsets[converterListIndex],
1338
0
                            pErrorCode);
1339
0
            ds->swapArray16(ds,
1340
0
                            inTable+offsets[taggedAliasArrayIndex],
1341
0
                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1342
0
                            outTable+offsets[taggedAliasArrayIndex],
1343
0
                            pErrorCode);
1344
0
        }
1345
0
    }
1346
1347
0
    return headerSize+2*(int32_t)topOffset;
1348
0
}
1349
1350
#endif
1351
1352
1353
/*
1354
 * Hey, Emacs, please set the following:
1355
 *
1356
 * Local Variables:
1357
 * indent-tabs-mode: nil
1358
 * End:
1359
 *
1360
 */