Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/source/common/ucnv_io.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1999-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*
11
*
12
*  ucnv_io.cpp:
13
*  initializes global variables and defines functions pertaining to converter 
14
*  name resolution aspect of the conversion code.
15
*
16
*   new implementation:
17
*
18
*   created on: 1999nov22
19
*   created by: Markus W. Scherer
20
*
21
*   Use the binary cnvalias.icu (created from convrtrs.txt) to work
22
*   with aliases for converter names.
23
*
24
*   Date        Name        Description
25
*   11/22/1999  markus      Created
26
*   06/28/2002  grhoten     Major overhaul of the converter alias design.
27
*                           Now an alias can map to different converters
28
*                           depending on the specified standard.
29
*******************************************************************************
30
*/
31
32
#include "unicode/utypes.h"
33
34
#if !UCONFIG_NO_CONVERSION
35
36
#include "unicode/ucnv.h"
37
#include "unicode/udata.h"
38
39
#include "umutex.h"
40
#include "uarrsort.h"
41
#include "uassert.h"
42
#include "udataswp.h"
43
#include "udatamem.h"
44
#include "cstring.h"
45
#include "cmemory.h"
46
#include "ucnv_io.h"
47
#include "uenumimp.h"
48
#include "ucln_cmn.h"
49
50
/* Format of cnvalias.icu -----------------------------------------------------
51
 *
52
 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
53
 * This binary form contains several tables. All indexes are to uint16_t
54
 * units, and not to the bytes (uint8_t units). Addressing everything on
55
 * 16-bit boundaries allows us to store more information with small index
56
 * numbers, which are also 16-bit in size. The majority of the table (except
57
 * the string table) are 16-bit numbers.
58
 *
59
 * First there is the size of the Table of Contents (TOC). The TOC
60
 * entries contain the size of each section. In order to find the offset
61
 * you just need to sum up the previous offsets.
62
 * The TOC length and entries are an array of uint32_t values.
63
 * The first section after the TOC starts immediately after the TOC.
64
 *
65
 * 1) This section contains a list of converters. This list contains indexes
66
 * into the string table for the converter name. The index of this list is
67
 * also used by other sections, which are mentioned later on.
68
 * This list is not sorted.
69
 *
70
 * 2) This section contains a list of tags. This list contains indexes
71
 * into the string table for the tag name. The index of this list is
72
 * also used by other sections, which are mentioned later on.
73
 * This list is in priority order of standards.
74
 *
75
 * 3) This section contains a list of sorted unique aliases. This
76
 * list contains indexes into the string table for the alias name. The
77
 * index of this list is also used by other sections, like the 4th section.
78
 * The index for the 3rd and 4th section is used to get the
79
 * alias -> converter name mapping. Section 3 and 4 form a two column table.
80
 * Some of the most significant bits of each index may contain other
81
 * information (see findConverter for details).
82
 *
83
 * 4) This section contains a list of mapped converter names. Consider this
84
 * as a table that maps the 3rd section to the 1st section. This list contains
85
 * indexes into the 1st section. The index of this list is the same index in
86
 * the 3rd section. There is also some extra information in the high bits of
87
 * each converter index in this table. Currently it's only used to say that
88
 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
89
 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
90
 * the predigested form of the 5th section so that an alias lookup can be fast.
91
 *
92
 * 5) This section contains a 2D array with indexes to the 6th section. This
93
 * section is the full form of all alias mappings. The column index is the
94
 * index into the converter list (column header). The row index is the index
95
 * to tag list (row header). This 2D array is the top part a 3D array. The
96
 * third dimension is in the 6th section.
97
 *
98
 * 6) This is blob of variable length arrays. Each array starts with a size,
99
 * and is followed by indexes to alias names in the string table. This is
100
 * the third dimension to the section 5. No other section should be referencing
101
 * this section.
102
 *
103
 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
104
 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
105
 * what type of string normalization is used among other potential things in the
106
 * future.
107
 *
108
 * 8) This is the string table. All strings are indexed on an even address.
109
 * There are two reasons for this. First many chip architectures locate strings
110
 * faster on even address boundaries. Second, since all indexes are 16-bit
111
 * numbers, this string table can be 128KB in size instead of 64KB when we
112
 * only have strings starting on an even address.
113
 *
114
 * 9) When present this is a set of prenormalized strings from section 8. This
115
 * table contains normalized strings with the dashes and spaces stripped out,
116
 * and all strings lowercased. In the future, the options in section 7 may state
117
 * other types of normalization.
118
 *
119
 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
120
 * has a unique alias among all converters. That same alias can
121
 * be mentioned in other standards on different converters,
122
 * but only one alias per tag can be unique.
123
 *
124
 *
125
 *              Converter Names (Usually in TR22 form)
126
 *           -------------------------------------------.
127
 *     T    /                                          /|
128
 *     a   /                                          / |
129
 *     g  /                                          /  |
130
 *     s /                                          /   |
131
 *      /                                          /    |
132
 *      ------------------------------------------/     |
133
 *    A |                                         |     |
134
 *    l |                                         |     |
135
 *    i |                                         |    /
136
 *    a |                                         |   /
137
 *    s |                                         |  /
138
 *    e |                                         | /
139
 *    s |                                         |/
140
 *      -------------------------------------------
141
 *
142
 *
143
 *
144
 * Here is what it really looks like. It's like swiss cheese.
145
 * There are holes. Some converters aren't recognized by
146
 * a standard, or they are really old converters that the
147
 * standard doesn't recognize anymore.
148
 *
149
 *              Converter Names (Usually in TR22 form)
150
 *           -------------------------------------------.
151
 *     T    /##########################################/|
152
 *     a   /     #            #                       /#
153
 *     g  /  #      ##     ##     ### # ### ### ### #/
154
 *     s / #             #####  ####        ##  ## #/#
155
 *      / ### # # ##  #  #   #          ### # #   #/##
156
 *      ------------------------------------------/# #
157
 *    A |### # # ##  #  #   #          ### # #   #|# #
158
 *    l |# # #    #     #               ## #     #|# #
159
 *    i |# # #    #     #                #       #|#
160
 *    a |#                                       #|#
161
 *    s |                                        #|#
162
 *    e
163
 *    s
164
 *
165
 */
166
167
/**
168
 * Used by the UEnumeration API
169
 */
170
typedef struct UAliasContext {
171
    uint32_t listOffset;
172
    uint32_t listIdx;
173
} UAliasContext;
174
175
static const char DATA_NAME[] = "cnvalias";
176
static const char DATA_TYPE[] = "icu";
177
178
static UDataMemory *gAliasData=nullptr;
179
static icu::UInitOnce gAliasDataInitOnce {};
180
181
enum {
182
    tocLengthIndex=0,
183
    converterListIndex=1,
184
    tagListIndex=2,
185
    aliasListIndex=3,
186
    untaggedConvArrayIndex=4,
187
    taggedAliasArrayIndex=5,
188
    taggedAliasListsIndex=6,
189
    tableOptionsIndex=7,
190
    stringTableIndex=8,
191
    normalizedStringTableIndex=9,
192
    offsetsCount,    /* length of the swapper's temporary offsets[] */
193
    minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
194
};
195
196
static const UConverterAliasOptions defaultTableOptions = {
197
    UCNV_IO_UNNORMALIZED,
198
    0 /* containsCnvOptionInfo */
199
};
200
static UConverterAlias gMainTable;
201
202
6.23M
#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
203
#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
204
205
static UBool U_CALLCONV
206
isAcceptable(void * /*context*/,
207
             const char * /*type*/, const char * /*name*/,
208
10
             const UDataInfo *pInfo) {
209
10
    return
210
10
        pInfo->size>=20 &&
211
10
        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
212
10
        pInfo->charsetFamily==U_CHARSET_FAMILY &&
213
10
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
214
10
        pInfo->dataFormat[1]==0x76 &&
215
10
        pInfo->dataFormat[2]==0x41 &&
216
10
        pInfo->dataFormat[3]==0x6c &&
217
10
        pInfo->formatVersion[0]==3;
218
10
}
219
220
static UBool U_CALLCONV ucnv_io_cleanup()
221
0
{
222
0
    if (gAliasData) {
223
0
        udata_close(gAliasData);
224
0
        gAliasData = nullptr;
225
0
    }
226
0
    gAliasDataInitOnce.reset();
227
228
0
    uprv_memset(&gMainTable, 0, sizeof(gMainTable));
229
230
0
    return true;                   /* Everything was cleaned up */
231
0
}
232
233
10
static void U_CALLCONV initAliasData(UErrorCode &errCode) {
234
10
    UDataMemory *data;
235
10
    const uint16_t *table;
236
10
    const uint32_t *sectionSizes;
237
10
    uint32_t tableStart;
238
10
    uint32_t currOffset;
239
10
    int32_t sizeOfData;
240
10
    int32_t sizeOfTOC;
241
242
10
    ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
243
244
10
    U_ASSERT(gAliasData == nullptr);
245
10
    data = udata_openChoice(nullptr, DATA_TYPE, DATA_NAME, isAcceptable, nullptr, &errCode);
246
10
    if (U_FAILURE(errCode)) {
247
0
        return;
248
0
    }
249
250
10
    sectionSizes = static_cast<const uint32_t*>(udata_getMemory(data));
251
10
    int32_t dataLength = udata_getLength(data); // This is the length minus the UDataInfo size
252
10
    if (dataLength <= int32_t(sizeof(sectionSizes[0]))) {
253
        // We don't even have a TOC!
254
0
        goto invalidFormat;
255
0
    }
256
10
    table = reinterpret_cast<const uint16_t*>(sectionSizes);
257
10
    tableStart = sectionSizes[0];
258
10
    sizeOfTOC = int32_t((tableStart + 1) * sizeof(sectionSizes[0]));
259
10
    if (tableStart < minTocLength || dataLength <= sizeOfTOC) {
260
        // We don't have a whole TOC!
261
0
        goto invalidFormat;
262
0
    }
263
10
    gAliasData = data;
264
265
10
    gMainTable.converterListSize      = sectionSizes[1];
266
10
    gMainTable.tagListSize            = sectionSizes[2];
267
10
    gMainTable.aliasListSize          = sectionSizes[3];
268
10
    gMainTable.untaggedConvArraySize  = sectionSizes[4];
269
10
    gMainTable.taggedAliasArraySize   = sectionSizes[5];
270
10
    gMainTable.taggedAliasListsSize   = sectionSizes[6];
271
10
    gMainTable.optionTableSize        = sectionSizes[7];
272
10
    gMainTable.stringTableSize        = sectionSizes[8];
273
274
10
    if (tableStart > minTocLength) {
275
10
        gMainTable.normalizedStringTableSize = sectionSizes[9];
276
10
    }
277
278
10
    sizeOfData = sizeOfTOC;
279
100
    for (uint32_t section = 1; section <= tableStart; section++) {
280
90
        sizeOfData += sectionSizes[section] * sizeof(table[0]);
281
90
    }
282
10
    if (dataLength < sizeOfData) {
283
        // Truncated file!
284
0
        goto invalidFormat;
285
0
    }
286
    // There may be some extra padding at the end, or this is a new file format with extra data that we can't read yet.
287
288
10
    currOffset = (tableStart + 1) * (sizeof(uint32_t)/sizeof(uint16_t));
289
10
    gMainTable.converterList = table + currOffset;
290
291
10
    currOffset += gMainTable.converterListSize;
292
10
    gMainTable.tagList = table + currOffset;
293
294
10
    currOffset += gMainTable.tagListSize;
295
10
    gMainTable.aliasList = table + currOffset;
296
297
10
    currOffset += gMainTable.aliasListSize;
298
10
    gMainTable.untaggedConvArray = table + currOffset;
299
300
10
    currOffset += gMainTable.untaggedConvArraySize;
301
10
    gMainTable.taggedAliasArray = table + currOffset;
302
303
    /* aliasLists is a 1's based array, but it has a padding character */
304
10
    currOffset += gMainTable.taggedAliasArraySize;
305
10
    gMainTable.taggedAliasLists = table + currOffset;
306
307
10
    currOffset += gMainTable.taggedAliasListsSize;
308
10
    if (gMainTable.optionTableSize > 0
309
10
        && reinterpret_cast<const UConverterAliasOptions*>(table + currOffset)->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
310
10
    {
311
        /* Faster table */
312
10
        gMainTable.optionTable = reinterpret_cast<const UConverterAliasOptions*>(table + currOffset);
313
10
    }
314
0
    else {
315
        /* Smaller table, or I can't handle this normalization mode!
316
        Use the original slower table lookup. */
317
0
        gMainTable.optionTable = &defaultTableOptions;
318
0
    }
319
320
10
    currOffset += gMainTable.optionTableSize;
321
10
    gMainTable.stringTable = table + currOffset;
322
323
10
    currOffset += gMainTable.stringTableSize;
324
10
    gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
325
10
        ? gMainTable.stringTable : (table + currOffset));
326
327
10
    return;
328
329
0
invalidFormat:
330
0
    errCode = U_INVALID_FORMAT_ERROR;
331
0
    udata_close(data);
332
0
}
333
334
335
static UBool
336
6.23M
haveAliasData(UErrorCode *pErrorCode) {
337
6.23M
    umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
338
6.23M
    return U_SUCCESS(*pErrorCode);
339
6.23M
}
340
341
static inline UBool
342
6.23M
isAlias(const char *alias, UErrorCode *pErrorCode) {
343
6.23M
    if(alias==nullptr) {
344
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
345
0
        return false;
346
0
    }
347
6.23M
    return *alias != 0;
348
6.23M
}
349
350
0
static uint32_t getTagNumber(const char *tagname) {
351
0
    if (gMainTable.tagList) {
352
0
        uint32_t tagNum;
353
0
        for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
354
0
            if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
355
0
                return tagNum;
356
0
            }
357
0
        }
358
0
    }
359
360
0
    return UINT32_MAX;
361
0
}
362
363
/* character types relevant for ucnv_compareNames() */
364
enum {
365
    UIGNORE,
366
    ZERO,
367
    NONZERO,
368
    MINLETTER /* any values from here on are lowercase letter mappings */
369
};
370
371
/* character types for ASCII 00..7F */
372
static const uint8_t asciiTypes[128] = {
373
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
374
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376
    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
377
    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
378
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
379
    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
380
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
381
};
382
383
158M
#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
384
385
/* character types for EBCDIC 80..FF */
386
static const uint8_t ebcdicTypes[128] = {
387
    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
388
    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
389
    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
390
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391
    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
392
    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
393
    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
394
    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
395
};
396
397
0
#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
398
399
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
400
0
#   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
401
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
402
#   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
403
#else
404
#   error U_CHARSET_FAMILY is not valid
405
#endif
406
407
408
/* @see ucnv_compareNames */
409
U_CAPI char * U_CALLCONV
410
12.4M
ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
411
12.4M
    char *dstItr = dst;
412
12.4M
    uint8_t type, nextType;
413
12.4M
    char c1;
414
12.4M
    UBool afterDigit = false;
415
416
171M
    while ((c1 = *name++) != 0) {
417
158M
        type = GET_ASCII_TYPE(c1);
418
158M
        switch (type) {
419
20.8M
        case UIGNORE:
420
20.8M
            afterDigit = false;
421
20.8M
            continue; /* ignore all but letters and digits */
422
8.61M
        case ZERO:
423
8.61M
            if (!afterDigit) {
424
11.0k
                nextType = GET_ASCII_TYPE(*name);
425
11.0k
                if (nextType == ZERO || nextType == NONZERO) {
426
1.27k
                    continue; /* ignore leading zero before another digit */
427
1.27k
                }
428
11.0k
            }
429
8.61M
            break;
430
62.3M
        case NONZERO:
431
62.3M
            afterDigit = true;
432
62.3M
            break;
433
66.8M
        default:
434
66.8M
            c1 = (char)type; /* lowercased letter */
435
66.8M
            afterDigit = false;
436
66.8M
            break;
437
158M
        }
438
137M
        *dstItr++ = c1;
439
137M
    }
440
12.4M
    *dstItr = 0;
441
12.4M
    return dst;
442
12.4M
}
443
444
U_CAPI char * U_CALLCONV
445
0
ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
446
0
    char *dstItr = dst;
447
0
    uint8_t type, nextType;
448
0
    char c1;
449
0
    UBool afterDigit = false;
450
451
0
    while ((c1 = *name++) != 0) {
452
0
        type = GET_EBCDIC_TYPE(c1);
453
0
        switch (type) {
454
0
        case UIGNORE:
455
0
            afterDigit = false;
456
0
            continue; /* ignore all but letters and digits */
457
0
        case ZERO:
458
0
            if (!afterDigit) {
459
0
                nextType = GET_EBCDIC_TYPE(*name);
460
0
                if (nextType == ZERO || nextType == NONZERO) {
461
0
                    continue; /* ignore leading zero before another digit */
462
0
                }
463
0
            }
464
0
            break;
465
0
        case NONZERO:
466
0
            afterDigit = true;
467
0
            break;
468
0
        default:
469
0
            c1 = (char)type; /* lowercased letter */
470
0
            afterDigit = false;
471
0
            break;
472
0
        }
473
0
        *dstItr++ = c1;
474
0
    }
475
0
    *dstItr = 0;
476
0
    return dst;
477
0
}
478
479
/**
480
 * Do a fuzzy compare of two converter/alias names.
481
 * The comparison is case-insensitive, ignores leading zeroes if they are not
482
 * followed by further digits, and ignores all but letters and digits.
483
 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
484
 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
485
 * at http://www.unicode.org/reports/tr22/
486
 *
487
 * This is a symmetrical (commutative) operation; order of arguments
488
 * is insignificant.  This is an important property for sorting the
489
 * list (when the list is preprocessed into binary form) and for
490
 * performing binary searches on it at run time.
491
 *
492
 * @param name1 a converter name or alias, zero-terminated
493
 * @param name2 a converter name or alias, zero-terminated
494
 * @return 0 if the names match, or a negative value if the name1
495
 * lexically precedes name2, or a positive value if the name1
496
 * lexically follows name2.
497
 *
498
 * @see ucnv_io_stripForCompare
499
 */
500
U_CAPI int U_EXPORT2
501
0
ucnv_compareNames(const char *name1, const char *name2) {
502
0
    int rc;
503
0
    uint8_t type, nextType;
504
0
    char c1, c2;
505
0
    UBool afterDigit1 = false, afterDigit2 = false;
506
507
0
    for (;;) {
508
0
        while ((c1 = *name1++) != 0) {
509
0
            type = GET_CHAR_TYPE(c1);
510
0
            switch (type) {
511
0
            case UIGNORE:
512
0
                afterDigit1 = false;
513
0
                continue; /* ignore all but letters and digits */
514
0
            case ZERO:
515
0
                if (!afterDigit1) {
516
0
                    nextType = GET_CHAR_TYPE(*name1);
517
0
                    if (nextType == ZERO || nextType == NONZERO) {
518
0
                        continue; /* ignore leading zero before another digit */
519
0
                    }
520
0
                }
521
0
                break;
522
0
            case NONZERO:
523
0
                afterDigit1 = true;
524
0
                break;
525
0
            default:
526
0
                c1 = (char)type; /* lowercased letter */
527
0
                afterDigit1 = false;
528
0
                break;
529
0
            }
530
0
            break; /* deliver c1 */
531
0
        }
532
0
        while ((c2 = *name2++) != 0) {
533
0
            type = GET_CHAR_TYPE(c2);
534
0
            switch (type) {
535
0
            case UIGNORE:
536
0
                afterDigit2 = false;
537
0
                continue; /* ignore all but letters and digits */
538
0
            case ZERO:
539
0
                if (!afterDigit2) {
540
0
                    nextType = GET_CHAR_TYPE(*name2);
541
0
                    if (nextType == ZERO || nextType == NONZERO) {
542
0
                        continue; /* ignore leading zero before another digit */
543
0
                    }
544
0
                }
545
0
                break;
546
0
            case NONZERO:
547
0
                afterDigit2 = true;
548
0
                break;
549
0
            default:
550
0
                c2 = (char)type; /* lowercased letter */
551
0
                afterDigit2 = false;
552
0
                break;
553
0
            }
554
0
            break; /* deliver c2 */
555
0
        }
556
557
        /* If we reach the ends of both strings then they match */
558
0
        if ((c1|c2)==0) {
559
0
            return 0;
560
0
        }
561
562
        /* Case-insensitive comparison */
563
0
        rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
564
0
        if (rc != 0) {
565
0
            return rc;
566
0
        }
567
0
    }
568
0
}
569
570
/*
571
 * search for an alias
572
 * return the converter number index for gConverterList
573
 */
574
static inline uint32_t
575
6.23M
findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
576
6.23M
    uint32_t mid, start, limit;
577
6.23M
    uint32_t lastMid;
578
6.23M
    int result;
579
6.23M
    int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
580
6.23M
    char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
581
582
6.23M
    if (!isUnnormalized) {
583
6.23M
        if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
584
0
            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
585
0
            return UINT32_MAX;
586
0
        }
587
588
        /* Lower case and remove ignoreable characters. */
589
6.23M
        ucnv_io_stripForCompare(strippedName, alias);
590
6.23M
        alias = strippedName;
591
6.23M
    }
592
593
    /* do a binary search for the alias */
594
6.23M
    start = 0;
595
6.23M
    limit = gMainTable.untaggedConvArraySize;
596
6.23M
    mid = limit;
597
6.23M
    lastMid = UINT32_MAX;
598
599
66.2M
    for (;;) {
600
66.2M
        mid = (start + limit) / 2;
601
66.2M
        if (lastMid == mid) {   /* Have we moved? */
602
3.58k
            break;  /* We haven't moved, and it wasn't found. */
603
3.58k
        }
604
66.2M
        lastMid = mid;
605
66.2M
        if (isUnnormalized) {
606
0
            result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
607
0
        }
608
66.2M
        else {
609
66.2M
            result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
610
66.2M
        }
611
612
66.2M
        if (result < 0) {
613
14.6M
            limit = mid;
614
51.5M
        } else if (result > 0) {
615
45.3M
            start = mid;
616
45.3M
        } else {
617
            /* Since the gencnval tool folds duplicates into one entry,
618
             * this alias in gAliasList is unique, but different standards
619
             * may map an alias to different converters.
620
             */
621
6.23M
            if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
622
4.15M
                *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
623
4.15M
            }
624
            /* State whether the canonical converter name contains an option.
625
            This information is contained in this list in order to maintain backward & forward compatibility. */
626
6.23M
            if (containsOption) {
627
6.23M
                UBool containsCnvOptionInfo = static_cast<UBool>(gMainTable.optionTable->containsCnvOptionInfo);
628
6.23M
                *containsOption = static_cast<UBool>((containsCnvOptionInfo
629
6.23M
                    && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
630
6.23M
                    || !containsCnvOptionInfo);
631
6.23M
            }
632
6.23M
            return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
633
6.23M
        }
634
66.2M
    }
635
636
3.58k
    return UINT32_MAX;
637
6.23M
}
638
639
/*
640
 * Is this alias in this list?
641
 * alias and listOffset should be non-nullptr.
642
 */
643
static inline UBool
644
0
isAliasInList(const char *alias, uint32_t listOffset) {
645
0
    if (listOffset) {
646
0
        uint32_t currAlias;
647
0
        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
648
        /* +1 to skip listCount */
649
0
        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
650
0
        for (currAlias = 0; currAlias < listCount; currAlias++) {
651
0
            if (currList[currAlias]
652
0
                && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
653
0
            {
654
0
                return true;
655
0
            }
656
0
        }
657
0
    }
658
0
    return false;
659
0
}
660
661
/*
662
 * Search for an standard name of an alias (what is the default name
663
 * that this standard uses?)
664
 * return the listOffset for gTaggedAliasLists. If it's 0,
665
 * the it couldn't be found, but the parameters are valid.
666
 */
667
static uint32_t
668
0
findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
669
0
    uint32_t idx;
670
0
    uint32_t listOffset;
671
0
    uint32_t convNum;
672
0
    UErrorCode myErr = U_ZERO_ERROR;
673
0
    uint32_t tagNum = getTagNumber(standard);
674
675
    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
676
0
    convNum = findConverter(alias, nullptr, &myErr);
677
0
    if (myErr != U_ZERO_ERROR) {
678
0
        *pErrorCode = myErr;
679
0
    }
680
681
0
    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
682
0
        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
683
0
        if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
684
0
            return listOffset;
685
0
        }
686
0
        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
687
            /* Uh Oh! They used an ambiguous alias.
688
               We have to search the whole swiss cheese starting
689
               at the highest standard affinity.
690
               This may take a while.
691
            */
692
0
            for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
693
0
                listOffset = gMainTable.taggedAliasArray[idx];
694
0
                if (listOffset && isAliasInList(alias, listOffset)) {
695
0
                    uint32_t currTagNum = idx/gMainTable.converterListSize;
696
0
                    uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
697
0
                    uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
698
0
                    if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
699
0
                        return tempListOffset;
700
0
                    }
701
                    /* else keep on looking */
702
                    /* We could speed this up by starting on the next row
703
                       because an alias is unique per row, right now.
704
                       This would change if alias versioning appears. */
705
0
                }
706
0
            }
707
            /* The standard doesn't know about the alias */
708
0
        }
709
        /* else no default name */
710
0
        return 0;
711
0
    }
712
    /* else converter or tag not found */
713
714
0
    return UINT32_MAX;
715
0
}
716
717
/* Return the canonical name */
718
static uint32_t
719
0
findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
720
0
    uint32_t idx;
721
0
    uint32_t listOffset;
722
0
    uint32_t convNum;
723
0
    UErrorCode myErr = U_ZERO_ERROR;
724
0
    uint32_t tagNum = getTagNumber(standard);
725
726
    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
727
0
    convNum = findConverter(alias, nullptr, &myErr);
728
0
    if (myErr != U_ZERO_ERROR) {
729
0
        *pErrorCode = myErr;
730
0
    }
731
732
0
    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
733
0
        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
734
0
        if (listOffset && isAliasInList(alias, listOffset)) {
735
0
            return convNum;
736
0
        }
737
0
        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
738
            /* Uh Oh! They used an ambiguous alias.
739
               We have to search one slice of the swiss cheese.
740
               We search only in the requested tag, not the whole thing.
741
               This may take a while.
742
            */
743
0
            uint32_t convStart = (tagNum)*gMainTable.converterListSize;
744
0
            uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
745
0
            for (idx = convStart; idx < convLimit; idx++) {
746
0
                listOffset = gMainTable.taggedAliasArray[idx];
747
0
                if (listOffset && isAliasInList(alias, listOffset)) {
748
0
                    return idx-convStart;
749
0
                }
750
0
            }
751
            /* The standard doesn't know about the alias */
752
0
        }
753
        /* else no canonical name */
754
0
    }
755
    /* else converter or tag not found */
756
757
0
    return UINT32_MAX;
758
0
}
759
760
U_CAPI const char *
761
6.23M
ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
762
6.23M
    const char *aliasTmp = alias;
763
6.23M
    int32_t i = 0;
764
6.24M
    for (i = 0; i < 2; i++) {
765
6.24M
        if (i == 1) {
766
            /*
767
             * After the first unsuccess converter lookup, check to see if
768
             * the name begins with 'x-'. If it does, strip it off and try
769
             * again.  This behaviour is similar to how ICU4J does it.
770
             */
771
3.58k
            if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
772
0
                aliasTmp = aliasTmp+2;
773
3.58k
            } else {
774
3.58k
                break;
775
3.58k
            }
776
3.58k
        }
777
6.23M
        if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
778
6.23M
            uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
779
6.23M
            if (convNum < gMainTable.converterListSize) {
780
6.23M
                return GET_STRING(gMainTable.converterList[convNum]);
781
6.23M
            }
782
            /* else converter not found */
783
6.23M
        } else {
784
0
            break;
785
0
        }
786
6.23M
    }
787
788
3.58k
    return nullptr;
789
6.23M
}
790
791
U_CDECL_BEGIN
792
793
794
static int32_t U_CALLCONV
795
0
ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
796
0
    int32_t value = 0;
797
0
    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
798
0
    uint32_t listOffset = myContext->listOffset;
799
800
0
    if (listOffset) {
801
0
        value = gMainTable.taggedAliasLists[listOffset];
802
0
    }
803
0
    return value;
804
0
}
805
806
static const char * U_CALLCONV
807
ucnv_io_nextStandardAliases(UEnumeration *enumerator,
808
                            int32_t* resultLength,
809
                            UErrorCode * /*pErrorCode*/)
810
0
{
811
0
    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
812
0
    uint32_t listOffset = myContext->listOffset;
813
814
0
    if (listOffset) {
815
0
        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
816
0
        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
817
818
0
        if (myContext->listIdx < listCount) {
819
0
            const char *myStr = GET_STRING(currList[myContext->listIdx++]);
820
0
            if (resultLength) {
821
0
                *resultLength = (int32_t)uprv_strlen(myStr);
822
0
            }
823
0
            return myStr;
824
0
        }
825
0
    }
826
    /* Either we accessed a zero length list, or we enumerated too far. */
827
0
    if (resultLength) {
828
0
        *resultLength = 0;
829
0
    }
830
0
    return nullptr;
831
0
}
832
833
static void U_CALLCONV
834
0
ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
835
0
    ((UAliasContext *)(enumerator->context))->listIdx = 0;
836
0
}
837
838
static void U_CALLCONV
839
0
ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
840
0
    uprv_free(enumerator->context);
841
0
    uprv_free(enumerator);
842
0
}
843
844
U_CDECL_END
845
846
/* Enumerate the aliases for the specified converter and standard tag */
847
static const UEnumeration gEnumAliases = {
848
    nullptr,
849
    nullptr,
850
    ucnv_io_closeUEnumeration,
851
    ucnv_io_countStandardAliases,
852
    uenum_unextDefault,
853
    ucnv_io_nextStandardAliases,
854
    ucnv_io_resetStandardAliases
855
};
856
857
U_CAPI UEnumeration * U_EXPORT2
858
ucnv_openStandardNames(const char *convName,
859
                       const char *standard,
860
                       UErrorCode *pErrorCode)
861
0
{
862
0
    UEnumeration *myEnum = nullptr;
863
0
    if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
864
0
        uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
865
866
        /* When listOffset == 0, we want to acknowledge that the
867
           converter name and standard are okay, but there
868
           is nothing to enumerate. */
869
0
        if (listOffset < gMainTable.taggedAliasListsSize) {
870
0
            UAliasContext *myContext;
871
872
0
            myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
873
0
            if (myEnum == nullptr) {
874
0
                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
875
0
                return nullptr;
876
0
            }
877
0
            uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
878
0
            myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
879
0
            if (myContext == nullptr) {
880
0
                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
881
0
                uprv_free(myEnum);
882
0
                return nullptr;
883
0
            }
884
0
            myContext->listOffset = listOffset;
885
0
            myContext->listIdx = 0;
886
0
            myEnum->context = myContext;
887
0
        }
888
        /* else converter or tag not found */
889
0
    }
890
0
    return myEnum;
891
0
}
892
893
static uint16_t
894
0
ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
895
0
    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
896
0
        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
897
0
        if (convNum < gMainTable.converterListSize) {
898
            /* tagListNum - 1 is the ALL tag */
899
0
            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
900
901
0
            if (listOffset) {
902
0
                return gMainTable.taggedAliasLists[listOffset];
903
0
            }
904
            /* else this shouldn't happen. internal program error */
905
0
        }
906
        /* else converter not found */
907
0
    }
908
0
    return 0;
909
0
}
910
911
static uint16_t
912
0
ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
913
0
    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
914
0
        uint32_t currAlias;
915
0
        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
916
0
        if (convNum < gMainTable.converterListSize) {
917
            /* tagListNum - 1 is the ALL tag */
918
0
            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
919
920
0
            if (listOffset) {
921
0
                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
922
                /* +1 to skip listCount */
923
0
                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
924
925
0
                for (currAlias = start; currAlias < listCount; currAlias++) {
926
0
                    aliases[currAlias] = GET_STRING(currList[currAlias]);
927
0
                }
928
0
            }
929
            /* else this shouldn't happen. internal program error */
930
0
        }
931
        /* else converter not found */
932
0
    }
933
0
    return 0;
934
0
}
935
936
static const char *
937
0
ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
938
0
    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
939
0
        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
940
0
        if (convNum < gMainTable.converterListSize) {
941
            /* tagListNum - 1 is the ALL tag */
942
0
            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
943
944
0
            if (listOffset) {
945
0
                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
946
                /* +1 to skip listCount */
947
0
                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
948
949
0
                if (n < listCount)  {
950
0
                    return GET_STRING(currList[n]);
951
0
                }
952
0
                *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
953
0
            }
954
            /* else this shouldn't happen. internal program error */
955
0
        }
956
        /* else converter not found */
957
0
    }
958
0
    return nullptr;
959
0
}
960
961
static uint16_t
962
0
ucnv_io_countStandards(UErrorCode *pErrorCode) {
963
0
    if (haveAliasData(pErrorCode)) {
964
        /* Don't include the empty list */
965
0
        return static_cast<uint16_t>(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
966
0
    }
967
968
0
    return 0;
969
0
}
970
971
U_CAPI const char * U_EXPORT2
972
0
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
973
0
    if (haveAliasData(pErrorCode)) {
974
0
        if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
975
0
            return GET_STRING(gMainTable.tagList[n]);
976
0
        }
977
0
        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
978
0
    }
979
980
0
    return nullptr;
981
0
}
982
983
U_CAPI const char * U_EXPORT2
984
0
ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
985
0
    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
986
0
        uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
987
988
0
        if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
989
0
            const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
990
991
            /* Get the preferred name from this list */
992
0
            if (currList[0]) {
993
0
                return GET_STRING(currList[0]);
994
0
            }
995
            /* else someone screwed up the alias table. */
996
            /* *pErrorCode = U_INVALID_FORMAT_ERROR */
997
0
        }
998
0
    }
999
1000
0
    return nullptr;
1001
0
}
1002
1003
U_CAPI uint16_t U_EXPORT2
1004
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
1005
0
{
1006
0
    return ucnv_io_countAliases(alias, pErrorCode);
1007
0
}
1008
1009
1010
U_CAPI const char* U_EXPORT2
1011
ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
1012
0
{
1013
0
    return ucnv_io_getAlias(alias, n, pErrorCode);
1014
0
}
1015
1016
U_CAPI void U_EXPORT2
1017
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
1018
0
{
1019
0
    ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
1020
0
}
1021
1022
U_CAPI uint16_t U_EXPORT2
1023
ucnv_countStandards()
1024
0
{
1025
0
    UErrorCode err = U_ZERO_ERROR;
1026
0
    return ucnv_io_countStandards(&err);
1027
0
}
1028
1029
U_CAPI const char * U_EXPORT2
1030
0
ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1031
0
    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1032
0
        uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1033
1034
0
        if (convNum < gMainTable.converterListSize) {
1035
0
            return GET_STRING(gMainTable.converterList[convNum]);
1036
0
        }
1037
0
    }
1038
1039
0
    return nullptr;
1040
0
}
1041
1042
U_CDECL_BEGIN
1043
1044
1045
static int32_t U_CALLCONV
1046
0
ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
1047
0
    return gMainTable.converterListSize;
1048
0
}
1049
1050
static const char * U_CALLCONV
1051
ucnv_io_nextAllConverters(UEnumeration *enumerator,
1052
                            int32_t* resultLength,
1053
                            UErrorCode * /*pErrorCode*/)
1054
0
{
1055
0
    uint16_t *myContext = (uint16_t *)(enumerator->context);
1056
1057
0
    if (*myContext < gMainTable.converterListSize) {
1058
0
        const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1059
0
        if (resultLength) {
1060
0
            *resultLength = (int32_t)uprv_strlen(myStr);
1061
0
        }
1062
0
        return myStr;
1063
0
    }
1064
    /* Either we accessed a zero length list, or we enumerated too far. */
1065
0
    if (resultLength) {
1066
0
        *resultLength = 0;
1067
0
    }
1068
0
    return nullptr;
1069
0
}
1070
1071
static void U_CALLCONV
1072
0
ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
1073
0
    *((uint16_t *)(enumerator->context)) = 0;
1074
0
}
1075
U_CDECL_END
1076
static const UEnumeration gEnumAllConverters = {
1077
    nullptr,
1078
    nullptr,
1079
    ucnv_io_closeUEnumeration,
1080
    ucnv_io_countAllConverters,
1081
    uenum_unextDefault,
1082
    ucnv_io_nextAllConverters,
1083
    ucnv_io_resetAllConverters
1084
};
1085
1086
U_CAPI UEnumeration * U_EXPORT2
1087
0
ucnv_openAllNames(UErrorCode *pErrorCode) {
1088
0
    UEnumeration *myEnum = nullptr;
1089
0
    if (haveAliasData(pErrorCode)) {
1090
0
        uint16_t *myContext;
1091
1092
0
        myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
1093
0
        if (myEnum == nullptr) {
1094
0
            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1095
0
            return nullptr;
1096
0
        }
1097
0
        uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1098
0
        myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
1099
0
        if (myContext == nullptr) {
1100
0
            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1101
0
            uprv_free(myEnum);
1102
0
            return nullptr;
1103
0
        }
1104
0
        *myContext = 0;
1105
0
        myEnum->context = myContext;
1106
0
    }
1107
0
    return myEnum;
1108
0
}
1109
1110
U_CAPI uint16_t
1111
7
ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1112
7
    if (haveAliasData(pErrorCode)) {
1113
7
        return (uint16_t)gMainTable.converterListSize;
1114
7
    }
1115
0
    return 0;
1116
7
}
1117
1118
/* alias table swapping ----------------------------------------------------- */
1119
1120
U_CDECL_BEGIN
1121
1122
typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1123
U_CDECL_END
1124
1125
1126
/*
1127
 * row of a temporary array
1128
 *
1129
 * gets platform-endian charset string indexes and sorting indexes;
1130
 * after sorting this array by strings, the actual arrays are permutated
1131
 * according to the sorting indexes
1132
 */
1133
typedef struct TempRow {
1134
    uint16_t strIndex, sortIndex;
1135
} TempRow;
1136
1137
typedef struct TempAliasTable {
1138
    const char *chars;
1139
    TempRow *rows;
1140
    uint16_t *resort;
1141
    StripForCompareFn *stripForCompare;
1142
} TempAliasTable;
1143
1144
enum {
1145
    STACK_ROW_CAPACITY=500
1146
};
1147
1148
static int32_t U_CALLCONV
1149
0
io_compareRows(const void *context, const void *left, const void *right) {
1150
0
    char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1151
0
         strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1152
1153
0
    TempAliasTable *tempTable=(TempAliasTable *)context;
1154
0
    const char *chars=tempTable->chars;
1155
1156
0
    return static_cast<int32_t>(uprv_strcmp(
1157
0
        tempTable->stripForCompare(strippedLeft, chars + 2 * static_cast<const TempRow*>(left)->strIndex),
1158
0
        tempTable->stripForCompare(strippedRight, chars + 2 * static_cast<const TempRow*>(right)->strIndex)));
1159
0
}
1160
1161
U_CAPI int32_t U_EXPORT2
1162
ucnv_swapAliases(const UDataSwapper *ds,
1163
                 const void *inData, int32_t length, void *outData,
1164
0
                 UErrorCode *pErrorCode) {
1165
0
    const UDataInfo *pInfo;
1166
0
    int32_t headerSize;
1167
1168
0
    const uint16_t *inTable;
1169
0
    const uint32_t *inSectionSizes;
1170
0
    uint32_t toc[offsetsCount];
1171
0
    uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1172
0
    uint32_t i, count, tocLength, topOffset;
1173
1174
0
    TempRow rows[STACK_ROW_CAPACITY];
1175
0
    uint16_t resort[STACK_ROW_CAPACITY];
1176
0
    TempAliasTable tempTable;
1177
1178
    /* udata_swapDataHeader checks the arguments */
1179
0
    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1180
0
    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
1181
0
        return 0;
1182
0
    }
1183
1184
    /* check data format and format version */
1185
0
    pInfo=(const UDataInfo *)((const char *)inData+4);
1186
0
    if(!(
1187
0
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1188
0
        pInfo->dataFormat[1]==0x76 &&
1189
0
        pInfo->dataFormat[2]==0x41 &&
1190
0
        pInfo->dataFormat[3]==0x6c &&
1191
0
        pInfo->formatVersion[0]==3
1192
0
    )) {
1193
0
        udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1194
0
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
1195
0
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
1196
0
                         pInfo->formatVersion[0]);
1197
0
        *pErrorCode=U_UNSUPPORTED_ERROR;
1198
0
        return 0;
1199
0
    }
1200
1201
    /* an alias table must contain at least the table of contents array */
1202
0
    if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1203
0
        udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1204
0
                         length-headerSize);
1205
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1206
0
        return 0;
1207
0
    }
1208
1209
0
    inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1210
0
    inTable=(const uint16_t *)inSectionSizes;
1211
0
    uprv_memset(toc, 0, sizeof(toc));
1212
0
    toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1213
0
    if(tocLength<minTocLength || offsetsCount<=tocLength) {
1214
0
        udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1215
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
1216
0
        return 0;
1217
0
    }
1218
1219
    /* read the known part of the table of contents */
1220
0
    for(i=converterListIndex; i<=tocLength; ++i) {
1221
0
        toc[i]=ds->readUInt32(inSectionSizes[i]);
1222
0
    }
1223
1224
    /* compute offsets */
1225
0
    uprv_memset(offsets, 0, sizeof(offsets));
1226
0
    offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1227
0
    for(i=tagListIndex; i<=tocLength; ++i) {
1228
0
        offsets[i]=offsets[i-1]+toc[i-1];
1229
0
    }
1230
1231
    /* compute the overall size of the after-header data, in numbers of 16-bit units */
1232
0
    topOffset=offsets[i-1]+toc[i-1];
1233
1234
0
    if(length>=0) {
1235
0
        uint16_t *outTable;
1236
0
        const uint16_t *p, *p2;
1237
0
        uint16_t *q, *q2;
1238
0
        uint16_t oldIndex;
1239
1240
0
        if((length-headerSize)<(2*(int32_t)topOffset)) {
1241
0
            udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1242
0
                             length-headerSize);
1243
0
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1244
0
            return 0;
1245
0
        }
1246
1247
0
        outTable=(uint16_t *)((char *)outData+headerSize);
1248
1249
        /* swap the entire table of contents */
1250
0
        ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1251
1252
        /* swap unormalized strings & normalized strings */
1253
0
        ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1254
0
                             outTable+offsets[stringTableIndex], pErrorCode);
1255
0
        if(U_FAILURE(*pErrorCode)) {
1256
0
            udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1257
0
            return 0;
1258
0
        }
1259
1260
0
        if(ds->inCharset==ds->outCharset) {
1261
            /* no need to sort, just swap all 16-bit values together */
1262
0
            ds->swapArray16(ds,
1263
0
                            inTable+offsets[converterListIndex],
1264
0
                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1265
0
                            outTable+offsets[converterListIndex],
1266
0
                            pErrorCode);
1267
0
        } else {
1268
            /* allocate the temporary table for sorting */
1269
0
            count=toc[aliasListIndex];
1270
1271
0
            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1272
1273
0
            if(count<=STACK_ROW_CAPACITY) {
1274
0
                tempTable.rows=rows;
1275
0
                tempTable.resort=resort;
1276
0
            } else {
1277
0
                tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1278
0
                if(tempTable.rows==nullptr) {
1279
0
                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1280
0
                                     count);
1281
0
                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1282
0
                    return 0;
1283
0
                }
1284
0
                tempTable.resort=(uint16_t *)(tempTable.rows+count);
1285
0
            }
1286
1287
0
            if(ds->outCharset==U_ASCII_FAMILY) {
1288
0
                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1289
0
            } else /* U_EBCDIC_FAMILY */ {
1290
0
                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1291
0
            }
1292
1293
            /*
1294
             * Sort unique aliases+mapped names.
1295
             *
1296
             * We need to sort the list again by outCharset strings because they
1297
             * sort differently for different charset families.
1298
             * First we set up a temporary table with the string indexes and
1299
             * sorting indexes and sort that.
1300
             * Then we permutate and copy/swap the actual values.
1301
             */
1302
0
            p=inTable+offsets[aliasListIndex];
1303
0
            q=outTable+offsets[aliasListIndex];
1304
1305
0
            p2=inTable+offsets[untaggedConvArrayIndex];
1306
0
            q2=outTable+offsets[untaggedConvArrayIndex];
1307
1308
0
            for(i=0; i<count; ++i) {
1309
0
                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1310
0
                tempTable.rows[i].sortIndex=(uint16_t)i;
1311
0
            }
1312
1313
0
            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1314
0
                           io_compareRows, &tempTable,
1315
0
                           false, pErrorCode);
1316
1317
0
            if(U_SUCCESS(*pErrorCode)) {
1318
                /* copy/swap/permutate items */
1319
0
                if(p!=q) {
1320
0
                    for(i=0; i<count; ++i) {
1321
0
                        oldIndex=tempTable.rows[i].sortIndex;
1322
0
                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1323
0
                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1324
0
                    }
1325
0
                } else {
1326
                    /*
1327
                     * If we swap in-place, then the permutation must use another
1328
                     * temporary array (tempTable.resort)
1329
                     * before the results are copied to the outBundle.
1330
                     */
1331
0
                    uint16_t *r=tempTable.resort;
1332
1333
0
                    for(i=0; i<count; ++i) {
1334
0
                        oldIndex=tempTable.rows[i].sortIndex;
1335
0
                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1336
0
                    }
1337
0
                    uprv_memcpy(q, r, 2*(size_t)count);
1338
1339
0
                    for(i=0; i<count; ++i) {
1340
0
                        oldIndex=tempTable.rows[i].sortIndex;
1341
0
                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1342
0
                    }
1343
0
                    uprv_memcpy(q2, r, 2*(size_t)count);
1344
0
                }
1345
0
            }
1346
1347
0
            if(tempTable.rows!=rows) {
1348
0
                uprv_free(tempTable.rows);
1349
0
            }
1350
1351
0
            if(U_FAILURE(*pErrorCode)) {
1352
0
                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1353
0
                                 count);
1354
0
                return 0;
1355
0
            }
1356
1357
            /* swap remaining 16-bit values */
1358
0
            ds->swapArray16(ds,
1359
0
                            inTable+offsets[converterListIndex],
1360
0
                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1361
0
                            outTable+offsets[converterListIndex],
1362
0
                            pErrorCode);
1363
0
            ds->swapArray16(ds,
1364
0
                            inTable+offsets[taggedAliasArrayIndex],
1365
0
                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1366
0
                            outTable+offsets[taggedAliasArrayIndex],
1367
0
                            pErrorCode);
1368
0
        }
1369
0
    }
1370
1371
0
    return headerSize+2*(int32_t)topOffset;
1372
0
}
1373
1374
#endif
1375
1376
1377
/*
1378
 * Hey, Emacs, please set the following:
1379
 *
1380
 * Local Variables:
1381
 * indent-tabs-mode: nil
1382
 * End:
1383
 *
1384
 */