/src/icu/source/common/ucnv_bld.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************** |
5 | | * COPYRIGHT: |
6 | | * Copyright (c) 1996-2016, International Business Machines Corporation and |
7 | | * others. All Rights Reserved. |
8 | | ******************************************************************** |
9 | | * |
10 | | * ucnv_bld.cpp: |
11 | | * |
12 | | * Defines functions that are used in the creation/initialization/deletion |
13 | | * of converters and related structures. |
14 | | * uses uconv_io.h routines to access disk information |
15 | | * is used by ucnv.h to implement public API create/delete/flushCache routines |
16 | | * Modification History: |
17 | | * |
18 | | * Date Name Description |
19 | | * |
20 | | * 06/20/2000 helena OS/400 port changes; mostly typecast. |
21 | | * 06/29/2000 helena Major rewrite of the callback interface. |
22 | | */ |
23 | | |
24 | | #include "unicode/utypes.h" |
25 | | |
26 | | #if !UCONFIG_NO_CONVERSION |
27 | | |
28 | | #include "unicode/putil.h" |
29 | | #include "unicode/udata.h" |
30 | | #include "unicode/ucnv.h" |
31 | | #include "unicode/uloc.h" |
32 | | #include "mutex.h" |
33 | | #include "putilimp.h" |
34 | | #include "uassert.h" |
35 | | #include "utracimp.h" |
36 | | #include "ucnv_io.h" |
37 | | #include "ucnv_bld.h" |
38 | | #include "ucnvmbcs.h" |
39 | | #include "ucnv_ext.h" |
40 | | #include "ucnv_cnv.h" |
41 | | #include "ucnv_imp.h" |
42 | | #include "uhash.h" |
43 | | #include "umutex.h" |
44 | | #include "cstring.h" |
45 | | #include "cmemory.h" |
46 | | #include "ucln_cmn.h" |
47 | | #include "ustr_cnv.h" |
48 | | |
49 | | |
50 | | #if 0 |
51 | | #include <stdio.h> |
52 | | extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); |
53 | | #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) |
54 | | #else |
55 | | # define UCNV_DEBUG_LOG(x,y,z) |
56 | | #endif |
57 | | |
58 | | static const UConverterSharedData * const |
59 | | converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ |
60 | | NULL, NULL, |
61 | | |
62 | | #if UCONFIG_NO_LEGACY_CONVERSION |
63 | | NULL, |
64 | | #else |
65 | | &_MBCSData, |
66 | | #endif |
67 | | |
68 | | &_Latin1Data, |
69 | | &_UTF8Data, &_UTF16BEData, &_UTF16LEData, |
70 | | #if UCONFIG_ONLY_HTML_CONVERSION |
71 | | NULL, NULL, |
72 | | #else |
73 | | &_UTF32BEData, &_UTF32LEData, |
74 | | #endif |
75 | | NULL, |
76 | | |
77 | | #if UCONFIG_NO_LEGACY_CONVERSION |
78 | | NULL, |
79 | | #else |
80 | | &_ISO2022Data, |
81 | | #endif |
82 | | |
83 | | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
84 | | NULL, NULL, NULL, NULL, NULL, NULL, |
85 | | NULL, NULL, NULL, NULL, NULL, NULL, |
86 | | NULL, |
87 | | #else |
88 | | &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, |
89 | | &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, |
90 | | &_HZData, |
91 | | #endif |
92 | | |
93 | | #if UCONFIG_ONLY_HTML_CONVERSION |
94 | | NULL, |
95 | | #else |
96 | | &_SCSUData, |
97 | | #endif |
98 | | |
99 | | |
100 | | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
101 | | NULL, |
102 | | #else |
103 | | &_ISCIIData, |
104 | | #endif |
105 | | |
106 | | &_ASCIIData, |
107 | | #if UCONFIG_ONLY_HTML_CONVERSION |
108 | | NULL, NULL, &_UTF16Data, NULL, NULL, NULL, |
109 | | #else |
110 | | &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
111 | | #endif |
112 | | |
113 | | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
114 | | NULL, |
115 | | #else |
116 | | &_CompoundTextData |
117 | | #endif |
118 | | }; |
119 | | |
120 | | /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. |
121 | | Also the name should be in lower case and all spaces, dashes and underscores |
122 | | removed |
123 | | */ |
124 | | static struct { |
125 | | const char *name; |
126 | | const UConverterType type; |
127 | | } const cnvNameType[] = { |
128 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
129 | | { "bocu1", UCNV_BOCU1 }, |
130 | | { "cesu8", UCNV_CESU8 }, |
131 | | #endif |
132 | | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
133 | | { "hz",UCNV_HZ }, |
134 | | #endif |
135 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
136 | | { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
137 | | #endif |
138 | | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
139 | | { "iscii", UCNV_ISCII }, |
140 | | #endif |
141 | | #if !UCONFIG_NO_LEGACY_CONVERSION |
142 | | { "iso2022", UCNV_ISO_2022 }, |
143 | | #endif |
144 | | { "iso88591", UCNV_LATIN_1 }, |
145 | | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
146 | | { "lmbcs1", UCNV_LMBCS_1 }, |
147 | | { "lmbcs11",UCNV_LMBCS_11 }, |
148 | | { "lmbcs16",UCNV_LMBCS_16 }, |
149 | | { "lmbcs17",UCNV_LMBCS_17 }, |
150 | | { "lmbcs18",UCNV_LMBCS_18 }, |
151 | | { "lmbcs19",UCNV_LMBCS_19 }, |
152 | | { "lmbcs2", UCNV_LMBCS_2 }, |
153 | | { "lmbcs3", UCNV_LMBCS_3 }, |
154 | | { "lmbcs4", UCNV_LMBCS_4 }, |
155 | | { "lmbcs5", UCNV_LMBCS_5 }, |
156 | | { "lmbcs6", UCNV_LMBCS_6 }, |
157 | | { "lmbcs8", UCNV_LMBCS_8 }, |
158 | | #endif |
159 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
160 | | { "scsu", UCNV_SCSU }, |
161 | | #endif |
162 | | { "usascii", UCNV_US_ASCII }, |
163 | | { "utf16", UCNV_UTF16 }, |
164 | | { "utf16be", UCNV_UTF16_BigEndian }, |
165 | | { "utf16le", UCNV_UTF16_LittleEndian }, |
166 | | #if U_IS_BIG_ENDIAN |
167 | | { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, |
168 | | { "utf16platformendian", UCNV_UTF16_BigEndian }, |
169 | | #else |
170 | | { "utf16oppositeendian", UCNV_UTF16_BigEndian}, |
171 | | { "utf16platformendian", UCNV_UTF16_LittleEndian }, |
172 | | #endif |
173 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
174 | | { "utf32", UCNV_UTF32 }, |
175 | | { "utf32be", UCNV_UTF32_BigEndian }, |
176 | | { "utf32le", UCNV_UTF32_LittleEndian }, |
177 | | #if U_IS_BIG_ENDIAN |
178 | | { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, |
179 | | { "utf32platformendian", UCNV_UTF32_BigEndian }, |
180 | | #else |
181 | | { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
182 | | { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
183 | | #endif |
184 | | #endif |
185 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
186 | | { "utf7", UCNV_UTF7 }, |
187 | | #endif |
188 | | { "utf8", UCNV_UTF8 }, |
189 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
190 | | { "x11compoundtext", UCNV_COMPOUND_TEXT} |
191 | | #endif |
192 | | }; |
193 | | |
194 | | |
195 | | /*initializes some global variables */ |
196 | | static UHashtable *SHARED_DATA_HASHTABLE = NULL; |
197 | | static icu::UMutex cnvCacheMutex; |
198 | | /* Note: the global mutex is used for */ |
199 | | /* reference count updates. */ |
200 | | |
201 | | static const char **gAvailableConverters = NULL; |
202 | | static uint16_t gAvailableConverterCount = 0; |
203 | | static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; |
204 | | |
205 | | #if !U_CHARSET_IS_UTF8 |
206 | | |
207 | | /* This contains the resolved converter name. So no further alias lookup is needed again. */ |
208 | | static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ |
209 | | static const char *gDefaultConverterName = NULL; |
210 | | |
211 | | /* |
212 | | If the default converter is an algorithmic converter, this is the cached value. |
213 | | We don't cache a full UConverter and clone it because ucnv_clone doesn't have |
214 | | less overhead than an algorithmic open. We don't cache non-algorithmic converters |
215 | | because ucnv_flushCache must be able to unload the default converter and its table. |
216 | | */ |
217 | | static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; |
218 | | |
219 | | /* Does gDefaultConverterName have a converter option and require extra parsing? */ |
220 | | static UBool gDefaultConverterContainsOption; |
221 | | |
222 | | #endif /* !U_CHARSET_IS_UTF8 */ |
223 | | |
224 | | static const char DATA_TYPE[] = "cnv"; |
225 | | |
226 | | /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). |
227 | | * If it is ever to be called from elsewhere, synchronization |
228 | | * will need to be considered. |
229 | | */ |
230 | | static void |
231 | 0 | ucnv_flushAvailableConverterCache() { |
232 | 0 | gAvailableConverterCount = 0; |
233 | 0 | if (gAvailableConverters) { |
234 | 0 | uprv_free((char **)gAvailableConverters); |
235 | 0 | gAvailableConverters = NULL; |
236 | 0 | } |
237 | 0 | gAvailableConvertersInitOnce.reset(); |
238 | 0 | } |
239 | | |
240 | | /* ucnv_cleanup - delete all storage held by the converter cache, except any */ |
241 | | /* in use by open converters. */ |
242 | | /* Not thread safe. */ |
243 | | /* Not supported API. */ |
244 | 0 | static UBool U_CALLCONV ucnv_cleanup(void) { |
245 | 0 | ucnv_flushCache(); |
246 | 0 | if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { |
247 | 0 | uhash_close(SHARED_DATA_HASHTABLE); |
248 | 0 | SHARED_DATA_HASHTABLE = NULL; |
249 | 0 | } |
250 | | |
251 | | /* Isn't called from flushCache because other threads may have preexisting references to the table. */ |
252 | 0 | ucnv_flushAvailableConverterCache(); |
253 | |
|
254 | | #if !U_CHARSET_IS_UTF8 |
255 | | gDefaultConverterName = NULL; |
256 | | gDefaultConverterNameBuffer[0] = 0; |
257 | | gDefaultConverterContainsOption = FALSE; |
258 | | gDefaultAlgorithmicSharedData = NULL; |
259 | | #endif |
260 | |
|
261 | 0 | return (SHARED_DATA_HASHTABLE == NULL); |
262 | 0 | } |
263 | | |
264 | | U_CAPI void U_EXPORT2 |
265 | 0 | ucnv_enableCleanup(void) { |
266 | 0 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
267 | 0 | } |
268 | | |
269 | | static UBool U_CALLCONV |
270 | | isCnvAcceptable(void * /*context*/, |
271 | | const char * /*type*/, const char * /*name*/, |
272 | 0 | const UDataInfo *pInfo) { |
273 | 0 | return (UBool)( |
274 | 0 | pInfo->size>=20 && |
275 | 0 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && |
276 | 0 | pInfo->charsetFamily==U_CHARSET_FAMILY && |
277 | 0 | pInfo->sizeofUChar==U_SIZEOF_UCHAR && |
278 | 0 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ |
279 | 0 | pInfo->dataFormat[1]==0x6e && |
280 | 0 | pInfo->dataFormat[2]==0x76 && |
281 | 0 | pInfo->dataFormat[3]==0x74 && |
282 | 0 | pInfo->formatVersion[0]==6); /* Everything will be version 6 */ |
283 | 0 | } |
284 | | |
285 | | /** |
286 | | * Un flatten shared data from a UDATA.. |
287 | | */ |
288 | | static UConverterSharedData* |
289 | | ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) |
290 | 0 | { |
291 | | /* UDataInfo info; -- necessary only if some converters have different formatVersion */ |
292 | 0 | const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); |
293 | 0 | const UConverterStaticData *source = (const UConverterStaticData *) raw; |
294 | 0 | UConverterSharedData *data; |
295 | 0 | UConverterType type = (UConverterType)source->conversionType; |
296 | |
|
297 | 0 | if(U_FAILURE(*status)) |
298 | 0 | return NULL; |
299 | | |
300 | 0 | if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || |
301 | 0 | converterData[type] == NULL || |
302 | 0 | !converterData[type]->isReferenceCounted || |
303 | 0 | converterData[type]->referenceCounter != 1 || |
304 | 0 | source->structSize != sizeof(UConverterStaticData)) |
305 | 0 | { |
306 | 0 | *status = U_INVALID_TABLE_FORMAT; |
307 | 0 | return NULL; |
308 | 0 | } |
309 | | |
310 | 0 | data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); |
311 | 0 | if(data == NULL) { |
312 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
313 | 0 | return NULL; |
314 | 0 | } |
315 | | |
316 | | /* copy initial values from the static structure for this type */ |
317 | 0 | uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); |
318 | |
|
319 | 0 | data->staticData = source; |
320 | |
|
321 | 0 | data->sharedDataCached = FALSE; |
322 | | |
323 | | /* fill in fields from the loaded data */ |
324 | 0 | data->dataMemory = (void*)pData; /* for future use */ |
325 | |
|
326 | 0 | if(data->impl->load != NULL) { |
327 | 0 | data->impl->load(data, pArgs, raw + source->structSize, status); |
328 | 0 | if(U_FAILURE(*status)) { |
329 | 0 | uprv_free(data); |
330 | 0 | return NULL; |
331 | 0 | } |
332 | 0 | } |
333 | 0 | return data; |
334 | 0 | } |
335 | | |
336 | | /*Takes an alias name gets an actual converter file name |
337 | | *goes to disk and opens it. |
338 | | *allocates the memory and returns a new UConverter object |
339 | | */ |
340 | | static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) |
341 | 0 | { |
342 | 0 | UDataMemory *data; |
343 | 0 | UConverterSharedData *sharedData; |
344 | |
|
345 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); |
346 | |
|
347 | 0 | if (U_FAILURE (*err)) { |
348 | 0 | UTRACE_EXIT_STATUS(*err); |
349 | 0 | return NULL; |
350 | 0 | } |
351 | | |
352 | 0 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); |
353 | |
|
354 | 0 | data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); |
355 | 0 | if(U_FAILURE(*err)) |
356 | 0 | { |
357 | 0 | UTRACE_EXIT_STATUS(*err); |
358 | 0 | return NULL; |
359 | 0 | } |
360 | | |
361 | 0 | sharedData = ucnv_data_unFlattenClone(pArgs, data, err); |
362 | 0 | if(U_FAILURE(*err)) |
363 | 0 | { |
364 | 0 | udata_close(data); |
365 | 0 | UTRACE_EXIT_STATUS(*err); |
366 | 0 | return NULL; |
367 | 0 | } |
368 | | |
369 | | /* |
370 | | * TODO Store pkg in a field in the shared data so that delta-only converters |
371 | | * can load base converters from the same package. |
372 | | * If the pkg name is longer than the field, then either do not load the converter |
373 | | * in the first place, or just set the pkg field to "". |
374 | | */ |
375 | | |
376 | 0 | UTRACE_EXIT_PTR_STATUS(sharedData, *err); |
377 | 0 | return sharedData; |
378 | 0 | } |
379 | | |
380 | | /*returns a converter type from a string |
381 | | */ |
382 | | static const UConverterSharedData * |
383 | | getAlgorithmicTypeFromName(const char *realName) |
384 | 0 | { |
385 | 0 | uint32_t mid, start, limit; |
386 | 0 | uint32_t lastMid; |
387 | 0 | int result; |
388 | 0 | char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
389 | | |
390 | | /* Lower case and remove ignoreable characters. */ |
391 | 0 | ucnv_io_stripForCompare(strippedName, realName); |
392 | | |
393 | | /* do a binary search for the alias */ |
394 | 0 | start = 0; |
395 | 0 | limit = UPRV_LENGTHOF(cnvNameType); |
396 | 0 | mid = limit; |
397 | 0 | lastMid = UINT32_MAX; |
398 | |
|
399 | 0 | for (;;) { |
400 | 0 | mid = (uint32_t)((start + limit) / 2); |
401 | 0 | if (lastMid == mid) { /* Have we moved? */ |
402 | 0 | break; /* We haven't moved, and it wasn't found. */ |
403 | 0 | } |
404 | 0 | lastMid = mid; |
405 | 0 | result = uprv_strcmp(strippedName, cnvNameType[mid].name); |
406 | |
|
407 | 0 | if (result < 0) { |
408 | 0 | limit = mid; |
409 | 0 | } else if (result > 0) { |
410 | 0 | start = mid; |
411 | 0 | } else { |
412 | 0 | return converterData[cnvNameType[mid].type]; |
413 | 0 | } |
414 | 0 | } |
415 | | |
416 | 0 | return NULL; |
417 | 0 | } |
418 | | |
419 | | /* |
420 | | * Based on the number of known converters, this determines how many times larger |
421 | | * the shared data hash table should be. When on small platforms, or just a couple |
422 | | * of converters are used, this number should be 2. When memory is plentiful, or |
423 | | * when ucnv_countAvailable is ever used with a lot of available converters, |
424 | | * this should be 4. |
425 | | * Larger numbers reduce the number of hash collisions, but use more memory. |
426 | | */ |
427 | 0 | #define UCNV_CACHE_LOAD_FACTOR 2 |
428 | | |
429 | | /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ |
430 | | /* Will always be called with the cnvCacheMutex already being held */ |
431 | | /* by the calling function. */ |
432 | | /* Stores the shared data in the SHARED_DATA_HASHTABLE |
433 | | * @param data The shared data |
434 | | */ |
435 | | static void |
436 | | ucnv_shareConverterData(UConverterSharedData * data) |
437 | 0 | { |
438 | 0 | UErrorCode err = U_ZERO_ERROR; |
439 | | /*Lazy evaluates the Hashtable itself */ |
440 | | /*void *sanity = NULL;*/ |
441 | |
|
442 | 0 | if (SHARED_DATA_HASHTABLE == NULL) |
443 | 0 | { |
444 | 0 | SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, |
445 | 0 | ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, |
446 | 0 | &err); |
447 | 0 | ucnv_enableCleanup(); |
448 | |
|
449 | 0 | if (U_FAILURE(err)) |
450 | 0 | return; |
451 | 0 | } |
452 | | |
453 | | /* ### check to see if the element is not already there! */ |
454 | | |
455 | | /* |
456 | | sanity = ucnv_getSharedConverterData (data->staticData->name); |
457 | | if(sanity != NULL) |
458 | | { |
459 | | UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); |
460 | | } |
461 | | UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); |
462 | | */ |
463 | | |
464 | | /* Mark it shared */ |
465 | 0 | data->sharedDataCached = TRUE; |
466 | |
|
467 | 0 | uhash_put(SHARED_DATA_HASHTABLE, |
468 | 0 | (void*) data->staticData->name, /* Okay to cast away const as long as |
469 | | keyDeleter == NULL */ |
470 | 0 | data, |
471 | 0 | &err); |
472 | 0 | UCNV_DEBUG_LOG("put", data->staticData->name,data); |
473 | |
|
474 | 0 | } |
475 | | |
476 | | /* Look up a converter name in the shared data cache. */ |
477 | | /* cnvCacheMutex must be held by the caller to protect the hash table. */ |
478 | | /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) |
479 | | * @param name The name of the shared data |
480 | | * @return the shared data from the SHARED_DATA_HASHTABLE |
481 | | */ |
482 | | static UConverterSharedData * |
483 | | ucnv_getSharedConverterData(const char *name) |
484 | 0 | { |
485 | | /*special case when no Table has yet been created we return NULL */ |
486 | 0 | if (SHARED_DATA_HASHTABLE == NULL) |
487 | 0 | { |
488 | 0 | return NULL; |
489 | 0 | } |
490 | 0 | else |
491 | 0 | { |
492 | 0 | UConverterSharedData *rc; |
493 | |
|
494 | 0 | rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); |
495 | 0 | UCNV_DEBUG_LOG("get",name,rc); |
496 | 0 | return rc; |
497 | 0 | } |
498 | 0 | } |
499 | | |
500 | | /*frees the string of memory blocks associates with a sharedConverter |
501 | | *if and only if the referenceCounter == 0 |
502 | | */ |
503 | | /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to |
504 | | * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and |
505 | | * returns TRUE, |
506 | | * otherwise returns FALSE |
507 | | * @param sharedConverterData The shared data |
508 | | * @return if not it frees all the memory stemming from sharedConverterData and |
509 | | * returns TRUE, otherwise returns FALSE |
510 | | */ |
511 | | static UBool |
512 | | ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) |
513 | 0 | { |
514 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); |
515 | 0 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); |
516 | |
|
517 | 0 | if (deadSharedData->referenceCounter > 0) { |
518 | 0 | UTRACE_EXIT_VALUE((int32_t)FALSE); |
519 | 0 | return FALSE; |
520 | 0 | } |
521 | | |
522 | 0 | if (deadSharedData->impl->unload != NULL) { |
523 | 0 | deadSharedData->impl->unload(deadSharedData); |
524 | 0 | } |
525 | |
|
526 | 0 | if(deadSharedData->dataMemory != NULL) |
527 | 0 | { |
528 | 0 | UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; |
529 | 0 | udata_close(data); |
530 | 0 | } |
531 | |
|
532 | 0 | uprv_free(deadSharedData); |
533 | |
|
534 | 0 | UTRACE_EXIT_VALUE((int32_t)TRUE); |
535 | 0 | return TRUE; |
536 | 0 | } |
537 | | |
538 | | /** |
539 | | * Load a non-algorithmic converter. |
540 | | * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). |
541 | | */ |
542 | | UConverterSharedData * |
543 | 0 | ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { |
544 | 0 | UConverterSharedData *mySharedConverterData; |
545 | |
|
546 | 0 | if(err == NULL || U_FAILURE(*err)) { |
547 | 0 | return NULL; |
548 | 0 | } |
549 | | |
550 | 0 | if(pArgs->pkg != NULL && *pArgs->pkg != 0) { |
551 | | /* application-provided converters are not currently cached */ |
552 | 0 | return createConverterFromFile(pArgs, err); |
553 | 0 | } |
554 | | |
555 | 0 | mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); |
556 | 0 | if (mySharedConverterData == NULL) |
557 | 0 | { |
558 | | /*Not cached, we need to stream it in from file */ |
559 | 0 | mySharedConverterData = createConverterFromFile(pArgs, err); |
560 | 0 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) |
561 | 0 | { |
562 | 0 | return NULL; |
563 | 0 | } |
564 | 0 | else if (!pArgs->onlyTestIsLoadable) |
565 | 0 | { |
566 | | /* share it with other library clients */ |
567 | 0 | ucnv_shareConverterData(mySharedConverterData); |
568 | 0 | } |
569 | 0 | } |
570 | 0 | else |
571 | 0 | { |
572 | | /* The data for this converter was already in the cache. */ |
573 | | /* Update the reference counter on the shared data: one more client */ |
574 | 0 | mySharedConverterData->referenceCounter++; |
575 | 0 | } |
576 | | |
577 | 0 | return mySharedConverterData; |
578 | 0 | } |
579 | | |
580 | | /** |
581 | | * Unload a non-algorithmic converter. |
582 | | * It must be sharedData->isReferenceCounted |
583 | | * and this function must be called inside umtx_lock(&cnvCacheMutex). |
584 | | */ |
585 | | U_CAPI void |
586 | 0 | ucnv_unload(UConverterSharedData *sharedData) { |
587 | 0 | if(sharedData != NULL) { |
588 | 0 | if (sharedData->referenceCounter > 0) { |
589 | 0 | sharedData->referenceCounter--; |
590 | 0 | } |
591 | |
|
592 | 0 | if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { |
593 | 0 | ucnv_deleteSharedConverterData(sharedData); |
594 | 0 | } |
595 | 0 | } |
596 | 0 | } |
597 | | |
598 | | U_CFUNC void |
599 | | ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) |
600 | 0 | { |
601 | 0 | if(sharedData != NULL && sharedData->isReferenceCounted) { |
602 | 0 | umtx_lock(&cnvCacheMutex); |
603 | 0 | ucnv_unload(sharedData); |
604 | 0 | umtx_unlock(&cnvCacheMutex); |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | | U_CFUNC void |
609 | | ucnv_incrementRefCount(UConverterSharedData *sharedData) |
610 | 0 | { |
611 | 0 | if(sharedData != NULL && sharedData->isReferenceCounted) { |
612 | 0 | umtx_lock(&cnvCacheMutex); |
613 | 0 | sharedData->referenceCounter++; |
614 | 0 | umtx_unlock(&cnvCacheMutex); |
615 | 0 | } |
616 | 0 | } |
617 | | |
618 | | /* |
619 | | * *pPieces must be initialized. |
620 | | * The name without options will be copied to pPieces->cnvName. |
621 | | * The locale and options will be copied to pPieces only if present in inName, |
622 | | * otherwise the existing values in pPieces remain. |
623 | | * *pArgs will be set to the pPieces values. |
624 | | */ |
625 | | static void |
626 | | parseConverterOptions(const char *inName, |
627 | | UConverterNamePieces *pPieces, |
628 | | UConverterLoadArgs *pArgs, |
629 | | UErrorCode *err) |
630 | 0 | { |
631 | 0 | char *cnvName = pPieces->cnvName; |
632 | 0 | char c; |
633 | 0 | int32_t len = 0; |
634 | |
|
635 | 0 | pArgs->name=inName; |
636 | 0 | pArgs->locale=pPieces->locale; |
637 | 0 | pArgs->options=pPieces->options; |
638 | | |
639 | | /* copy the converter name itself to cnvName */ |
640 | 0 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { |
641 | 0 | if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { |
642 | 0 | *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ |
643 | 0 | pPieces->cnvName[0]=0; |
644 | 0 | return; |
645 | 0 | } |
646 | 0 | *cnvName++=c; |
647 | 0 | inName++; |
648 | 0 | } |
649 | 0 | *cnvName=0; |
650 | 0 | pArgs->name=pPieces->cnvName; |
651 | | |
652 | | /* parse options. No more name copying should occur. */ |
653 | 0 | while((c=*inName)!=0) { |
654 | 0 | if(c==UCNV_OPTION_SEP_CHAR) { |
655 | 0 | ++inName; |
656 | 0 | } |
657 | | |
658 | | /* inName is behind an option separator */ |
659 | 0 | if(uprv_strncmp(inName, "locale=", 7)==0) { |
660 | | /* do not modify locale itself in case we have multiple locale options */ |
661 | 0 | char *dest=pPieces->locale; |
662 | | |
663 | | /* copy the locale option value */ |
664 | 0 | inName+=7; |
665 | 0 | len=0; |
666 | 0 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { |
667 | 0 | ++inName; |
668 | |
|
669 | 0 | if(++len>=ULOC_FULLNAME_CAPACITY) { |
670 | 0 | *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ |
671 | 0 | pPieces->locale[0]=0; |
672 | 0 | return; |
673 | 0 | } |
674 | | |
675 | 0 | *dest++=c; |
676 | 0 | } |
677 | 0 | *dest=0; |
678 | 0 | } else if(uprv_strncmp(inName, "version=", 8)==0) { |
679 | | /* copy the version option value into bits 3..0 of pPieces->options */ |
680 | 0 | inName+=8; |
681 | 0 | c=*inName; |
682 | 0 | if(c==0) { |
683 | 0 | pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); |
684 | 0 | return; |
685 | 0 | } else if((uint8_t)(c-'0')<10) { |
686 | 0 | pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); |
687 | 0 | ++inName; |
688 | 0 | } |
689 | 0 | } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { |
690 | 0 | inName+=8; |
691 | 0 | pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); |
692 | | /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ |
693 | 0 | } else { |
694 | | /* ignore any other options until we define some */ |
695 | 0 | while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { |
696 | 0 | } |
697 | 0 | if(c==0) { |
698 | 0 | return; |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | 0 | } |
703 | | |
704 | | /*Logic determines if the converter is Algorithmic AND/OR cached |
705 | | *depending on that: |
706 | | * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) |
707 | | * -Get it from a Hashtable (Data=X, Cached=TRUE) |
708 | | * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) |
709 | | * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) |
710 | | */ |
711 | | U_CFUNC UConverterSharedData * |
712 | | ucnv_loadSharedData(const char *converterName, |
713 | | UConverterNamePieces *pPieces, |
714 | | UConverterLoadArgs *pArgs, |
715 | 2 | UErrorCode * err) { |
716 | 2 | UConverterNamePieces stackPieces; |
717 | 2 | UConverterLoadArgs stackArgs; |
718 | 2 | UConverterSharedData *mySharedConverterData = NULL; |
719 | 2 | UErrorCode internalErrorCode = U_ZERO_ERROR; |
720 | 2 | UBool mayContainOption = TRUE; |
721 | 2 | UBool checkForAlgorithmic = TRUE; |
722 | | |
723 | 2 | if (U_FAILURE (*err)) { |
724 | 0 | return NULL; |
725 | 0 | } |
726 | | |
727 | 2 | if(pPieces == NULL) { |
728 | 0 | if(pArgs != NULL) { |
729 | | /* |
730 | | * Bad: We may set pArgs pointers to stackPieces fields |
731 | | * which will be invalid after this function returns. |
732 | | */ |
733 | 0 | *err = U_INTERNAL_PROGRAM_ERROR; |
734 | 0 | return NULL; |
735 | 0 | } |
736 | 0 | pPieces = &stackPieces; |
737 | 0 | } |
738 | 2 | if(pArgs == NULL) { |
739 | 0 | uprv_memset(&stackArgs, 0, sizeof(stackArgs)); |
740 | 0 | stackArgs.size = (int32_t)sizeof(stackArgs); |
741 | 0 | pArgs = &stackArgs; |
742 | 0 | } |
743 | | |
744 | 2 | pPieces->cnvName[0] = 0; |
745 | 2 | pPieces->locale[0] = 0; |
746 | 2 | pPieces->options = 0; |
747 | | |
748 | 2 | pArgs->name = converterName; |
749 | 2 | pArgs->locale = pPieces->locale; |
750 | 2 | pArgs->options = pPieces->options; |
751 | | |
752 | | /* In case "name" is NULL we want to open the default converter. */ |
753 | 2 | if (converterName == NULL) { |
754 | 0 | #if U_CHARSET_IS_UTF8 |
755 | 0 | pArgs->name = "UTF-8"; |
756 | 0 | return (UConverterSharedData *)converterData[UCNV_UTF8]; |
757 | | #else |
758 | | /* Call ucnv_getDefaultName first to query the name from the OS. */ |
759 | | pArgs->name = ucnv_getDefaultName(); |
760 | | if (pArgs->name == NULL) { |
761 | | *err = U_MISSING_RESOURCE_ERROR; |
762 | | return NULL; |
763 | | } |
764 | | mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; |
765 | | checkForAlgorithmic = FALSE; |
766 | | mayContainOption = gDefaultConverterContainsOption; |
767 | | /* the default converter name is already canonical */ |
768 | | #endif |
769 | 0 | } |
770 | 2 | else if(UCNV_FAST_IS_UTF8(converterName)) { |
771 | | /* fastpath for UTF-8 */ |
772 | 2 | pArgs->name = "UTF-8"; |
773 | 2 | return (UConverterSharedData *)converterData[UCNV_UTF8]; |
774 | 2 | } |
775 | 0 | else { |
776 | | /* separate the converter name from the options */ |
777 | 0 | parseConverterOptions(converterName, pPieces, pArgs, err); |
778 | 0 | if (U_FAILURE(*err)) { |
779 | | /* Very bad name used. */ |
780 | 0 | return NULL; |
781 | 0 | } |
782 | | |
783 | | /* get the canonical converter name */ |
784 | 0 | pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); |
785 | 0 | if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { |
786 | | /* |
787 | | * set the input name in case the converter was added |
788 | | * without updating the alias table, or when there is no alias table |
789 | | */ |
790 | 0 | pArgs->name = pPieces->cnvName; |
791 | 0 | } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { |
792 | 0 | *err = U_AMBIGUOUS_ALIAS_WARNING; |
793 | 0 | } |
794 | 0 | } |
795 | | |
796 | | /* separate the converter name from the options */ |
797 | 0 | if(mayContainOption && pArgs->name != pPieces->cnvName) { |
798 | 0 | parseConverterOptions(pArgs->name, pPieces, pArgs, err); |
799 | 0 | } |
800 | | |
801 | | /* get the shared data for an algorithmic converter, if it is one */ |
802 | 0 | if (checkForAlgorithmic) { |
803 | 0 | mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); |
804 | 0 | } |
805 | 0 | if (mySharedConverterData == NULL) |
806 | 0 | { |
807 | | /* it is a data-based converter, get its shared data. */ |
808 | | /* Hold the cnvCacheMutex through the whole process of checking the */ |
809 | | /* converter data cache, and adding new entries to the cache */ |
810 | | /* to prevent other threads from modifying the cache during the */ |
811 | | /* process. */ |
812 | 0 | pArgs->nestedLoads=1; |
813 | 0 | pArgs->pkg=NULL; |
814 | |
|
815 | 0 | umtx_lock(&cnvCacheMutex); |
816 | 0 | mySharedConverterData = ucnv_load(pArgs, err); |
817 | 0 | umtx_unlock(&cnvCacheMutex); |
818 | 0 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) |
819 | 0 | { |
820 | 0 | return NULL; |
821 | 0 | } |
822 | 0 | } |
823 | | |
824 | 0 | return mySharedConverterData; |
825 | 0 | } |
826 | | |
827 | | U_CAPI UConverter * |
828 | | ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) |
829 | 2 | { |
830 | 2 | UConverterNamePieces stackPieces; |
831 | 2 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
832 | 2 | UConverterSharedData *mySharedConverterData; |
833 | | |
834 | 2 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); |
835 | | |
836 | 2 | if(U_SUCCESS(*err)) { |
837 | 2 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); |
838 | | |
839 | 2 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
840 | | |
841 | 2 | myUConverter = ucnv_createConverterFromSharedData( |
842 | 2 | myUConverter, mySharedConverterData, |
843 | 2 | &stackArgs, |
844 | 2 | err); |
845 | | |
846 | 2 | if(U_SUCCESS(*err)) { |
847 | 2 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
848 | 2 | return myUConverter; |
849 | 2 | } |
850 | 2 | } |
851 | | |
852 | | /* exit with error */ |
853 | 0 | UTRACE_EXIT_STATUS(*err); |
854 | 0 | return NULL; |
855 | 2 | } |
856 | | |
857 | | U_CFUNC UBool |
858 | 0 | ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { |
859 | 0 | UConverter myUConverter; |
860 | 0 | UConverterNamePieces stackPieces; |
861 | 0 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
862 | 0 | UConverterSharedData *mySharedConverterData; |
863 | |
|
864 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); |
865 | |
|
866 | 0 | if(U_SUCCESS(*err)) { |
867 | 0 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); |
868 | |
|
869 | 0 | stackArgs.onlyTestIsLoadable=TRUE; |
870 | 0 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
871 | 0 | ucnv_createConverterFromSharedData( |
872 | 0 | &myUConverter, mySharedConverterData, |
873 | 0 | &stackArgs, |
874 | 0 | err); |
875 | 0 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
876 | 0 | } |
877 | |
|
878 | 0 | UTRACE_EXIT_STATUS(*err); |
879 | 0 | return U_SUCCESS(*err); |
880 | 0 | } |
881 | | |
882 | | UConverter * |
883 | | ucnv_createAlgorithmicConverter(UConverter *myUConverter, |
884 | | UConverterType type, |
885 | | const char *locale, uint32_t options, |
886 | 0 | UErrorCode *err) { |
887 | 0 | UConverter *cnv; |
888 | 0 | const UConverterSharedData *sharedData; |
889 | 0 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
890 | |
|
891 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); |
892 | 0 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); |
893 | |
|
894 | 0 | if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { |
895 | 0 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
896 | 0 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
897 | 0 | return NULL; |
898 | 0 | } |
899 | | |
900 | 0 | sharedData = converterData[type]; |
901 | 0 | if(sharedData == NULL || sharedData->isReferenceCounted) { |
902 | | /* not a valid type, or not an algorithmic converter */ |
903 | 0 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
904 | 0 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
905 | 0 | return NULL; |
906 | 0 | } |
907 | | |
908 | 0 | stackArgs.name = ""; |
909 | 0 | stackArgs.options = options; |
910 | 0 | stackArgs.locale=locale; |
911 | 0 | cnv = ucnv_createConverterFromSharedData( |
912 | 0 | myUConverter, (UConverterSharedData *)sharedData, |
913 | 0 | &stackArgs, err); |
914 | |
|
915 | 0 | UTRACE_EXIT_PTR_STATUS(cnv, *err); |
916 | 0 | return cnv; |
917 | 0 | } |
918 | | |
919 | | U_CFUNC UConverter* |
920 | | ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) |
921 | 0 | { |
922 | 0 | UConverter *myUConverter; |
923 | 0 | UConverterSharedData *mySharedConverterData; |
924 | 0 | UConverterNamePieces stackPieces; |
925 | 0 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
926 | |
|
927 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); |
928 | |
|
929 | 0 | if(U_FAILURE(*err)) { |
930 | 0 | UTRACE_EXIT_STATUS(*err); |
931 | 0 | return NULL; |
932 | 0 | } |
933 | | |
934 | 0 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); |
935 | | |
936 | | /* first, get the options out of the converterName string */ |
937 | 0 | stackPieces.cnvName[0] = 0; |
938 | 0 | stackPieces.locale[0] = 0; |
939 | 0 | stackPieces.options = 0; |
940 | 0 | parseConverterOptions(converterName, &stackPieces, &stackArgs, err); |
941 | 0 | if (U_FAILURE(*err)) { |
942 | | /* Very bad name used. */ |
943 | 0 | UTRACE_EXIT_STATUS(*err); |
944 | 0 | return NULL; |
945 | 0 | } |
946 | 0 | stackArgs.nestedLoads=1; |
947 | 0 | stackArgs.pkg=packageName; |
948 | | |
949 | | /* open the data, unflatten the shared structure */ |
950 | 0 | mySharedConverterData = createConverterFromFile(&stackArgs, err); |
951 | |
|
952 | 0 | if (U_FAILURE(*err)) { |
953 | 0 | UTRACE_EXIT_STATUS(*err); |
954 | 0 | return NULL; |
955 | 0 | } |
956 | | |
957 | | /* create the actual converter */ |
958 | 0 | myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); |
959 | |
|
960 | 0 | if (U_FAILURE(*err)) { |
961 | 0 | ucnv_close(myUConverter); |
962 | 0 | UTRACE_EXIT_STATUS(*err); |
963 | 0 | return NULL; |
964 | 0 | } |
965 | | |
966 | 0 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
967 | 0 | return myUConverter; |
968 | 0 | } |
969 | | |
970 | | |
971 | | U_CFUNC UConverter* |
972 | | ucnv_createConverterFromSharedData(UConverter *myUConverter, |
973 | | UConverterSharedData *mySharedConverterData, |
974 | | UConverterLoadArgs *pArgs, |
975 | | UErrorCode *err) |
976 | 2 | { |
977 | 2 | UBool isCopyLocal; |
978 | | |
979 | 2 | if(U_FAILURE(*err)) { |
980 | 0 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
981 | 0 | return myUConverter; |
982 | 0 | } |
983 | 2 | if(myUConverter == NULL) |
984 | 2 | { |
985 | 2 | myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); |
986 | 2 | if(myUConverter == NULL) |
987 | 0 | { |
988 | 0 | *err = U_MEMORY_ALLOCATION_ERROR; |
989 | 0 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
990 | 0 | return NULL; |
991 | 0 | } |
992 | 2 | isCopyLocal = FALSE; |
993 | 2 | } else { |
994 | 0 | isCopyLocal = TRUE; |
995 | 0 | } |
996 | | |
997 | | /* initialize the converter */ |
998 | 2 | uprv_memset(myUConverter, 0, sizeof(UConverter)); |
999 | 2 | myUConverter->isCopyLocal = isCopyLocal; |
1000 | | /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ |
1001 | 2 | myUConverter->sharedData = mySharedConverterData; |
1002 | 2 | myUConverter->options = pArgs->options; |
1003 | 2 | if(!pArgs->onlyTestIsLoadable) { |
1004 | 2 | myUConverter->preFromUFirstCP = U_SENTINEL; |
1005 | 2 | myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; |
1006 | 2 | myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; |
1007 | 2 | myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; |
1008 | 2 | myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; |
1009 | 2 | myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; |
1010 | 2 | myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; |
1011 | 2 | myUConverter->subChars = (uint8_t *)myUConverter->subUChars; |
1012 | 2 | uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); |
1013 | 2 | myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ |
1014 | 2 | } |
1015 | | |
1016 | 2 | if(mySharedConverterData->impl->open != NULL) { |
1017 | 0 | mySharedConverterData->impl->open(myUConverter, pArgs, err); |
1018 | 0 | if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { |
1019 | | /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ |
1020 | 0 | ucnv_close(myUConverter); |
1021 | 0 | return NULL; |
1022 | 0 | } |
1023 | 0 | } |
1024 | | |
1025 | 2 | return myUConverter; |
1026 | 2 | } |
1027 | | |
1028 | | /*Frees all shared immutable objects that aren't referred to (reference count = 0) |
1029 | | */ |
1030 | | U_CAPI int32_t U_EXPORT2 |
1031 | | ucnv_flushCache () |
1032 | 0 | { |
1033 | 0 | UConverterSharedData *mySharedData = NULL; |
1034 | 0 | int32_t pos; |
1035 | 0 | int32_t tableDeletedNum = 0; |
1036 | 0 | const UHashElement *e; |
1037 | | /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ |
1038 | 0 | int32_t i, remaining; |
1039 | |
|
1040 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); |
1041 | | |
1042 | | /* Close the default converter without creating a new one so that everything will be flushed. */ |
1043 | 0 | u_flushDefaultConverter(); |
1044 | | |
1045 | | /*if shared data hasn't even been lazy evaluated yet |
1046 | | * return 0 |
1047 | | */ |
1048 | 0 | if (SHARED_DATA_HASHTABLE == NULL) { |
1049 | 0 | UTRACE_EXIT_VALUE((int32_t)0); |
1050 | 0 | return 0; |
1051 | 0 | } |
1052 | | |
1053 | | /*creates an enumeration to iterate through every element in the |
1054 | | * table |
1055 | | * |
1056 | | * Synchronization: holding cnvCacheMutex will prevent any other thread from |
1057 | | * accessing or modifying the hash table during the iteration. |
1058 | | * The reference count of an entry may be decremented by |
1059 | | * ucnv_close while the iteration is in process, but this is |
1060 | | * benign. It can't be incremented (in ucnv_createConverter()) |
1061 | | * because the sequence of looking up in the cache + incrementing |
1062 | | * is protected by cnvCacheMutex. |
1063 | | */ |
1064 | 0 | umtx_lock(&cnvCacheMutex); |
1065 | | /* |
1066 | | * double loop: A delta/extension-only converter has a pointer to its base table's |
1067 | | * shared data; the first iteration of the outer loop may see the delta converter |
1068 | | * before the base converter, and unloading the delta converter may get the base |
1069 | | * converter's reference counter down to 0. |
1070 | | */ |
1071 | 0 | i = 0; |
1072 | 0 | do { |
1073 | 0 | remaining = 0; |
1074 | 0 | pos = UHASH_FIRST; |
1075 | 0 | while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) |
1076 | 0 | { |
1077 | 0 | mySharedData = (UConverterSharedData *) e->value.pointer; |
1078 | | /*deletes only if reference counter == 0 */ |
1079 | 0 | if (mySharedData->referenceCounter == 0) |
1080 | 0 | { |
1081 | 0 | tableDeletedNum++; |
1082 | |
|
1083 | 0 | UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); |
1084 | |
|
1085 | 0 | uhash_removeElement(SHARED_DATA_HASHTABLE, e); |
1086 | 0 | mySharedData->sharedDataCached = FALSE; |
1087 | 0 | ucnv_deleteSharedConverterData (mySharedData); |
1088 | 0 | } else { |
1089 | 0 | ++remaining; |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 | } while(++i == 1 && remaining > 0); |
1093 | 0 | umtx_unlock(&cnvCacheMutex); |
1094 | |
|
1095 | 0 | UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); |
1096 | |
|
1097 | 0 | UTRACE_EXIT_VALUE(tableDeletedNum); |
1098 | 0 | return tableDeletedNum; |
1099 | 0 | } |
1100 | | |
1101 | | /* available converters list --------------------------------------------------- */ |
1102 | | |
1103 | 0 | static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { |
1104 | 0 | U_ASSERT(gAvailableConverterCount == 0); |
1105 | 0 | U_ASSERT(gAvailableConverters == NULL); |
1106 | |
|
1107 | 0 | ucnv_enableCleanup(); |
1108 | 0 | UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); |
1109 | 0 | int32_t allConverterCount = uenum_count(allConvEnum, &errCode); |
1110 | 0 | if (U_FAILURE(errCode)) { |
1111 | 0 | return; |
1112 | 0 | } |
1113 | | |
1114 | | /* We can't have more than "*converterTable" converters to open */ |
1115 | 0 | gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); |
1116 | 0 | if (!gAvailableConverters) { |
1117 | 0 | errCode = U_MEMORY_ALLOCATION_ERROR; |
1118 | 0 | return; |
1119 | 0 | } |
1120 | | |
1121 | | /* Open the default converter to make sure that it has first dibs in the hash table. */ |
1122 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
1123 | 0 | UConverter tempConverter; |
1124 | 0 | ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); |
1125 | |
|
1126 | 0 | gAvailableConverterCount = 0; |
1127 | |
|
1128 | 0 | for (int32_t idx = 0; idx < allConverterCount; idx++) { |
1129 | 0 | localStatus = U_ZERO_ERROR; |
1130 | 0 | const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); |
1131 | 0 | if (ucnv_canCreateConverter(converterName, &localStatus)) { |
1132 | 0 | gAvailableConverters[gAvailableConverterCount++] = converterName; |
1133 | 0 | } |
1134 | 0 | } |
1135 | |
|
1136 | 0 | uenum_close(allConvEnum); |
1137 | 0 | } |
1138 | | |
1139 | | |
1140 | 0 | static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { |
1141 | 0 | umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); |
1142 | 0 | return U_SUCCESS(*pErrorCode); |
1143 | 0 | } |
1144 | | |
1145 | | U_CFUNC uint16_t |
1146 | 0 | ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { |
1147 | 0 | if (haveAvailableConverterList(pErrorCode)) { |
1148 | 0 | return gAvailableConverterCount; |
1149 | 0 | } |
1150 | 0 | return 0; |
1151 | 0 | } |
1152 | | |
1153 | | U_CFUNC const char * |
1154 | 0 | ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { |
1155 | 0 | if (haveAvailableConverterList(pErrorCode)) { |
1156 | 0 | if (n < gAvailableConverterCount) { |
1157 | 0 | return gAvailableConverters[n]; |
1158 | 0 | } |
1159 | 0 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
1160 | 0 | } |
1161 | 0 | return NULL; |
1162 | 0 | } |
1163 | | |
1164 | | /* default converter name --------------------------------------------------- */ |
1165 | | |
1166 | | #if !U_CHARSET_IS_UTF8 |
1167 | | /* |
1168 | | Copy the canonical converter name. |
1169 | | ucnv_getDefaultName must be thread safe, which can call this function. |
1170 | | |
1171 | | ucnv_setDefaultName calls this function and it doesn't have to be |
1172 | | thread safe because there is no reliable/safe way to reset the |
1173 | | converter in use in all threads. If you did reset the converter, you |
1174 | | would not be sure that retrieving a default converter for one string |
1175 | | would be the same type of default converter for a successive string. |
1176 | | Since the name is a returned via ucnv_getDefaultName without copying, |
1177 | | you shouldn't be modifying or deleting the string from a separate thread. |
1178 | | */ |
1179 | | static inline void |
1180 | | internalSetName(const char *name, UErrorCode *status) { |
1181 | | UConverterNamePieces stackPieces; |
1182 | | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
1183 | | int32_t length=(int32_t)(uprv_strlen(name)); |
1184 | | UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); |
1185 | | const UConverterSharedData *algorithmicSharedData; |
1186 | | |
1187 | | stackArgs.name = name; |
1188 | | if(containsOption) { |
1189 | | stackPieces.cnvName[0] = 0; |
1190 | | stackPieces.locale[0] = 0; |
1191 | | stackPieces.options = 0; |
1192 | | parseConverterOptions(name, &stackPieces, &stackArgs, status); |
1193 | | if(U_FAILURE(*status)) { |
1194 | | return; |
1195 | | } |
1196 | | } |
1197 | | algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); |
1198 | | |
1199 | | umtx_lock(&cnvCacheMutex); |
1200 | | |
1201 | | gDefaultAlgorithmicSharedData = algorithmicSharedData; |
1202 | | gDefaultConverterContainsOption = containsOption; |
1203 | | uprv_memcpy(gDefaultConverterNameBuffer, name, length); |
1204 | | gDefaultConverterNameBuffer[length]=0; |
1205 | | |
1206 | | /* gDefaultConverterName MUST be the last global var set by this function. */ |
1207 | | /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ |
1208 | | // But there is nothing here preventing that from being reordered, either by the compiler |
1209 | | // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. |
1210 | | // -- Andy |
1211 | | gDefaultConverterName = gDefaultConverterNameBuffer; |
1212 | | |
1213 | | ucnv_enableCleanup(); |
1214 | | |
1215 | | umtx_unlock(&cnvCacheMutex); |
1216 | | } |
1217 | | #endif |
1218 | | |
1219 | | /* |
1220 | | * In order to be really thread-safe, the get function would have to take |
1221 | | * a buffer parameter and copy the current string inside a mutex block. |
1222 | | * This implementation only tries to be really thread-safe while |
1223 | | * setting the name. |
1224 | | * It assumes that setting a pointer is atomic. |
1225 | | */ |
1226 | | |
1227 | | U_CAPI const char* U_EXPORT2 |
1228 | 0 | ucnv_getDefaultName() { |
1229 | 0 | #if U_CHARSET_IS_UTF8 |
1230 | 0 | return "UTF-8"; |
1231 | | #else |
1232 | | /* local variable to be thread-safe */ |
1233 | | const char *name; |
1234 | | |
1235 | | /* |
1236 | | Concurrent calls to ucnv_getDefaultName must be thread safe, |
1237 | | but ucnv_setDefaultName is not thread safe. |
1238 | | */ |
1239 | | { |
1240 | | icu::Mutex lock(&cnvCacheMutex); |
1241 | | name = gDefaultConverterName; |
1242 | | } |
1243 | | if(name==NULL) { |
1244 | | UErrorCode errorCode = U_ZERO_ERROR; |
1245 | | UConverter *cnv = NULL; |
1246 | | |
1247 | | name = uprv_getDefaultCodepage(); |
1248 | | |
1249 | | /* if the name is there, test it out and get the canonical name with options */ |
1250 | | if(name != NULL) { |
1251 | | cnv = ucnv_open(name, &errorCode); |
1252 | | if(U_SUCCESS(errorCode) && cnv != NULL) { |
1253 | | name = ucnv_getName(cnv, &errorCode); |
1254 | | } |
1255 | | } |
1256 | | |
1257 | | if(name == NULL || name[0] == 0 |
1258 | | || U_FAILURE(errorCode) || cnv == NULL |
1259 | | || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) |
1260 | | { |
1261 | | /* Panic time, let's use a fallback. */ |
1262 | | #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) |
1263 | | name = "US-ASCII"; |
1264 | | /* there is no 'algorithmic' converter for EBCDIC */ |
1265 | | #elif U_PLATFORM == U_PF_OS390 |
1266 | | name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; |
1267 | | #else |
1268 | | name = "ibm-37_P100-1995"; |
1269 | | #endif |
1270 | | } |
1271 | | |
1272 | | internalSetName(name, &errorCode); |
1273 | | |
1274 | | /* The close may make the current name go away. */ |
1275 | | ucnv_close(cnv); |
1276 | | } |
1277 | | |
1278 | | return name; |
1279 | | #endif |
1280 | 0 | } |
1281 | | |
1282 | | #if U_CHARSET_IS_UTF8 |
1283 | 0 | U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} |
1284 | | #else |
1285 | | /* |
1286 | | This function is not thread safe, and it can't be thread safe. |
1287 | | See internalSetName or the API reference for details. |
1288 | | */ |
1289 | | U_CAPI void U_EXPORT2 |
1290 | | ucnv_setDefaultName(const char *converterName) { |
1291 | | if(converterName==NULL) { |
1292 | | /* reset to the default codepage */ |
1293 | | gDefaultConverterName=NULL; |
1294 | | } else { |
1295 | | UErrorCode errorCode = U_ZERO_ERROR; |
1296 | | UConverter *cnv = NULL; |
1297 | | const char *name = NULL; |
1298 | | |
1299 | | /* if the name is there, test it out and get the canonical name with options */ |
1300 | | cnv = ucnv_open(converterName, &errorCode); |
1301 | | if(U_SUCCESS(errorCode) && cnv != NULL) { |
1302 | | name = ucnv_getName(cnv, &errorCode); |
1303 | | } |
1304 | | |
1305 | | if(U_SUCCESS(errorCode) && name!=NULL) { |
1306 | | internalSetName(name, &errorCode); |
1307 | | } |
1308 | | /* else this converter is bad to use. Don't change it to a bad value. */ |
1309 | | |
1310 | | /* The close may make the current name go away. */ |
1311 | | ucnv_close(cnv); |
1312 | | |
1313 | | /* reset the converter cache */ |
1314 | | u_flushDefaultConverter(); |
1315 | | } |
1316 | | } |
1317 | | #endif |
1318 | | |
1319 | | /* data swapping ------------------------------------------------------------ */ |
1320 | | |
1321 | | /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ |
1322 | | |
1323 | | #if !UCONFIG_NO_LEGACY_CONVERSION |
1324 | | |
1325 | | U_CAPI int32_t U_EXPORT2 |
1326 | | ucnv_swap(const UDataSwapper *ds, |
1327 | | const void *inData, int32_t length, void *outData, |
1328 | 0 | UErrorCode *pErrorCode) { |
1329 | 0 | const UDataInfo *pInfo; |
1330 | 0 | int32_t headerSize; |
1331 | |
|
1332 | 0 | const uint8_t *inBytes; |
1333 | 0 | uint8_t *outBytes; |
1334 | |
|
1335 | 0 | uint32_t offset, count, staticDataSize; |
1336 | 0 | int32_t size; |
1337 | |
|
1338 | 0 | const UConverterStaticData *inStaticData; |
1339 | 0 | UConverterStaticData *outStaticData; |
1340 | |
|
1341 | 0 | const _MBCSHeader *inMBCSHeader; |
1342 | 0 | _MBCSHeader *outMBCSHeader; |
1343 | 0 | _MBCSHeader mbcsHeader; |
1344 | 0 | uint32_t mbcsHeaderLength; |
1345 | 0 | UBool noFromU=FALSE; |
1346 | |
|
1347 | 0 | uint8_t outputType; |
1348 | |
|
1349 | 0 | int32_t maxFastUChar, mbcsIndexLength; |
1350 | |
|
1351 | 0 | const int32_t *inExtIndexes; |
1352 | 0 | int32_t extOffset; |
1353 | | |
1354 | | /* udata_swapDataHeader checks the arguments */ |
1355 | 0 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
1356 | 0 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
1357 | 0 | return 0; |
1358 | 0 | } |
1359 | | |
1360 | | /* check data format and format version */ |
1361 | 0 | pInfo=(const UDataInfo *)((const char *)inData+4); |
1362 | 0 | if(!( |
1363 | 0 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ |
1364 | 0 | pInfo->dataFormat[1]==0x6e && |
1365 | 0 | pInfo->dataFormat[2]==0x76 && |
1366 | 0 | pInfo->dataFormat[3]==0x74 && |
1367 | 0 | pInfo->formatVersion[0]==6 && |
1368 | 0 | pInfo->formatVersion[1]>=2 |
1369 | 0 | )) { |
1370 | 0 | udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", |
1371 | 0 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
1372 | 0 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
1373 | 0 | pInfo->formatVersion[0], pInfo->formatVersion[1]); |
1374 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1375 | 0 | return 0; |
1376 | 0 | } |
1377 | | |
1378 | 0 | inBytes=(const uint8_t *)inData+headerSize; |
1379 | 0 | outBytes=(uint8_t *)outData+headerSize; |
1380 | | |
1381 | | /* read the initial UConverterStaticData structure after the UDataInfo header */ |
1382 | 0 | inStaticData=(const UConverterStaticData *)inBytes; |
1383 | 0 | outStaticData=(UConverterStaticData *)outBytes; |
1384 | |
|
1385 | 0 | if(length<0) { |
1386 | 0 | staticDataSize=ds->readUInt32(inStaticData->structSize); |
1387 | 0 | } else { |
1388 | 0 | length-=headerSize; |
1389 | 0 | if( length<(int32_t)sizeof(UConverterStaticData) || |
1390 | 0 | (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) |
1391 | 0 | ) { |
1392 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", |
1393 | 0 | length); |
1394 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1395 | 0 | return 0; |
1396 | 0 | } |
1397 | 0 | } |
1398 | | |
1399 | 0 | if(length>=0) { |
1400 | | /* swap the static data */ |
1401 | 0 | if(inStaticData!=outStaticData) { |
1402 | 0 | uprv_memcpy(outStaticData, inStaticData, staticDataSize); |
1403 | 0 | } |
1404 | |
|
1405 | 0 | ds->swapArray32(ds, &inStaticData->structSize, 4, |
1406 | 0 | &outStaticData->structSize, pErrorCode); |
1407 | 0 | ds->swapArray32(ds, &inStaticData->codepage, 4, |
1408 | 0 | &outStaticData->codepage, pErrorCode); |
1409 | |
|
1410 | 0 | ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), |
1411 | 0 | outStaticData->name, pErrorCode); |
1412 | 0 | if(U_FAILURE(*pErrorCode)) { |
1413 | 0 | udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); |
1414 | 0 | return 0; |
1415 | 0 | } |
1416 | 0 | } |
1417 | | |
1418 | 0 | inBytes+=staticDataSize; |
1419 | 0 | outBytes+=staticDataSize; |
1420 | 0 | if(length>=0) { |
1421 | 0 | length-=(int32_t)staticDataSize; |
1422 | 0 | } |
1423 | | |
1424 | | /* check for supported conversionType values */ |
1425 | 0 | if(inStaticData->conversionType==UCNV_MBCS) { |
1426 | | /* swap MBCS data */ |
1427 | 0 | inMBCSHeader=(const _MBCSHeader *)inBytes; |
1428 | 0 | outMBCSHeader=(_MBCSHeader *)outBytes; |
1429 | |
|
1430 | 0 | if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { |
1431 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
1432 | 0 | length); |
1433 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1434 | 0 | return 0; |
1435 | 0 | } |
1436 | 0 | if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { |
1437 | 0 | mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; |
1438 | 0 | } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && |
1439 | 0 | ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& |
1440 | 0 | MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 |
1441 | 0 | ) { |
1442 | 0 | mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; |
1443 | 0 | noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); |
1444 | 0 | } else { |
1445 | 0 | udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", |
1446 | 0 | inMBCSHeader->version[0], inMBCSHeader->version[1]); |
1447 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1448 | 0 | return 0; |
1449 | 0 | } |
1450 | | |
1451 | 0 | uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); |
1452 | 0 | mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); |
1453 | 0 | mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); |
1454 | 0 | mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); |
1455 | 0 | mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); |
1456 | 0 | mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); |
1457 | 0 | mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); |
1458 | 0 | mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); |
1459 | | /* mbcsHeader.options have been read above */ |
1460 | |
|
1461 | 0 | extOffset=(int32_t)(mbcsHeader.flags>>8); |
1462 | 0 | outputType=(uint8_t)mbcsHeader.flags; |
1463 | 0 | if(noFromU && outputType==MBCS_OUTPUT_1) { |
1464 | 0 | udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); |
1465 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1466 | 0 | return 0; |
1467 | 0 | } |
1468 | | |
1469 | | /* make sure that the output type is known */ |
1470 | 0 | switch(outputType) { |
1471 | 0 | case MBCS_OUTPUT_1: |
1472 | 0 | case MBCS_OUTPUT_2: |
1473 | 0 | case MBCS_OUTPUT_3: |
1474 | 0 | case MBCS_OUTPUT_4: |
1475 | 0 | case MBCS_OUTPUT_3_EUC: |
1476 | 0 | case MBCS_OUTPUT_4_EUC: |
1477 | 0 | case MBCS_OUTPUT_2_SISO: |
1478 | 0 | case MBCS_OUTPUT_EXT_ONLY: |
1479 | | /* OK */ |
1480 | 0 | break; |
1481 | 0 | default: |
1482 | 0 | udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", |
1483 | 0 | outputType); |
1484 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1485 | 0 | return 0; |
1486 | 0 | } |
1487 | | |
1488 | | /* calculate the length of the MBCS data */ |
1489 | | |
1490 | | /* |
1491 | | * utf8Friendly MBCS files (mbcsHeader.version 4.3) |
1492 | | * contain an additional mbcsIndex table: |
1493 | | * uint16_t[(maxFastUChar+1)>>6]; |
1494 | | * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). |
1495 | | */ |
1496 | 0 | maxFastUChar=0; |
1497 | 0 | mbcsIndexLength=0; |
1498 | 0 | if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && |
1499 | 0 | mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 |
1500 | 0 | ) { |
1501 | 0 | maxFastUChar=(maxFastUChar<<8)|0xff; |
1502 | 0 | mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ |
1503 | 0 | } |
1504 | |
|
1505 | 0 | if(extOffset==0) { |
1506 | 0 | size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); |
1507 | 0 | if(!noFromU) { |
1508 | 0 | size+=(int32_t)mbcsHeader.fromUBytesLength; |
1509 | 0 | } |
1510 | | |
1511 | | /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ |
1512 | 0 | inExtIndexes=NULL; |
1513 | 0 | } else { |
1514 | | /* there is extension data after the base data, see ucnv_ext.h */ |
1515 | 0 | if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { |
1516 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", |
1517 | 0 | length); |
1518 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1519 | 0 | return 0; |
1520 | 0 | } |
1521 | | |
1522 | 0 | inExtIndexes=(const int32_t *)(inBytes+extOffset); |
1523 | 0 | size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); |
1524 | 0 | } |
1525 | | |
1526 | 0 | if(length>=0) { |
1527 | 0 | if(length<size) { |
1528 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
1529 | 0 | length); |
1530 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1531 | 0 | return 0; |
1532 | 0 | } |
1533 | | |
1534 | | /* copy the data for inaccessible bytes */ |
1535 | 0 | if(inBytes!=outBytes) { |
1536 | 0 | uprv_memcpy(outBytes, inBytes, size); |
1537 | 0 | } |
1538 | | |
1539 | | /* swap the MBCSHeader, except for the version field */ |
1540 | 0 | count=mbcsHeaderLength*4; |
1541 | 0 | ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, |
1542 | 0 | &outMBCSHeader->countStates, pErrorCode); |
1543 | |
|
1544 | 0 | if(outputType==MBCS_OUTPUT_EXT_ONLY) { |
1545 | | /* |
1546 | | * extension-only file, |
1547 | | * contains a base name instead of normal base table data |
1548 | | */ |
1549 | | |
1550 | | /* swap the base name, between the header and the extension data */ |
1551 | 0 | const char *inBaseName=(const char *)inBytes+count; |
1552 | 0 | char *outBaseName=(char *)outBytes+count; |
1553 | 0 | ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), |
1554 | 0 | outBaseName, pErrorCode); |
1555 | 0 | } else { |
1556 | | /* normal file with base table data */ |
1557 | | |
1558 | | /* swap the state table, 1kB per state */ |
1559 | 0 | offset=count; |
1560 | 0 | count=mbcsHeader.countStates*1024; |
1561 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1562 | 0 | outBytes+offset, pErrorCode); |
1563 | | |
1564 | | /* swap the toUFallbacks[] */ |
1565 | 0 | offset+=count; |
1566 | 0 | count=mbcsHeader.countToUFallbacks*8; |
1567 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1568 | 0 | outBytes+offset, pErrorCode); |
1569 | | |
1570 | | /* swap the unicodeCodeUnits[] */ |
1571 | 0 | offset=mbcsHeader.offsetToUCodeUnits; |
1572 | 0 | count=mbcsHeader.offsetFromUTable-offset; |
1573 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1574 | 0 | outBytes+offset, pErrorCode); |
1575 | | |
1576 | | /* offset to the stage 1 table, independent of the outputType */ |
1577 | 0 | offset=mbcsHeader.offsetFromUTable; |
1578 | |
|
1579 | 0 | if(outputType==MBCS_OUTPUT_1) { |
1580 | | /* SBCS: swap the fromU tables, all 16 bits wide */ |
1581 | 0 | count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; |
1582 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1583 | 0 | outBytes+offset, pErrorCode); |
1584 | 0 | } else { |
1585 | | /* otherwise: swap the stage tables separately */ |
1586 | | |
1587 | | /* stage 1 table: uint16_t[0x440 or 0x40] */ |
1588 | 0 | if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { |
1589 | 0 | count=0x440*2; /* for all of Unicode */ |
1590 | 0 | } else { |
1591 | 0 | count=0x40*2; /* only BMP */ |
1592 | 0 | } |
1593 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1594 | 0 | outBytes+offset, pErrorCode); |
1595 | | |
1596 | | /* stage 2 table: uint32_t[] */ |
1597 | 0 | offset+=count; |
1598 | 0 | count=mbcsHeader.offsetFromUBytes-offset; |
1599 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1600 | 0 | outBytes+offset, pErrorCode); |
1601 | | |
1602 | | /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ |
1603 | 0 | offset=mbcsHeader.offsetFromUBytes; |
1604 | 0 | count= noFromU ? 0 : mbcsHeader.fromUBytesLength; |
1605 | 0 | switch(outputType) { |
1606 | 0 | case MBCS_OUTPUT_2: |
1607 | 0 | case MBCS_OUTPUT_3_EUC: |
1608 | 0 | case MBCS_OUTPUT_2_SISO: |
1609 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1610 | 0 | outBytes+offset, pErrorCode); |
1611 | 0 | break; |
1612 | 0 | case MBCS_OUTPUT_4: |
1613 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1614 | 0 | outBytes+offset, pErrorCode); |
1615 | 0 | break; |
1616 | 0 | default: |
1617 | | /* just uint8_t[], nothing to swap */ |
1618 | 0 | break; |
1619 | 0 | } |
1620 | | |
1621 | 0 | if(mbcsIndexLength!=0) { |
1622 | 0 | offset+=count; |
1623 | 0 | count=mbcsIndexLength; |
1624 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1625 | 0 | outBytes+offset, pErrorCode); |
1626 | 0 | } |
1627 | 0 | } |
1628 | 0 | } |
1629 | | |
1630 | 0 | if(extOffset!=0) { |
1631 | | /* swap the extension data */ |
1632 | 0 | inBytes+=extOffset; |
1633 | 0 | outBytes+=extOffset; |
1634 | | |
1635 | | /* swap toUTable[] */ |
1636 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); |
1637 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); |
1638 | 0 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
1639 | | |
1640 | | /* swap toUUChars[] */ |
1641 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); |
1642 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); |
1643 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1644 | | |
1645 | | /* swap fromUTableUChars[] */ |
1646 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); |
1647 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); |
1648 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1649 | | |
1650 | | /* swap fromUTableValues[] */ |
1651 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); |
1652 | | /* same length as for fromUTableUChars[] */ |
1653 | 0 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
1654 | | |
1655 | | /* no need to swap fromUBytes[] */ |
1656 | | |
1657 | | /* swap fromUStage12[] */ |
1658 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); |
1659 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); |
1660 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1661 | | |
1662 | | /* swap fromUStage3[] */ |
1663 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); |
1664 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); |
1665 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1666 | | |
1667 | | /* swap fromUStage3b[] */ |
1668 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); |
1669 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); |
1670 | 0 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
1671 | | |
1672 | | /* swap indexes[] */ |
1673 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); |
1674 | 0 | ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); |
1675 | 0 | } |
1676 | 0 | } |
1677 | 0 | } else { |
1678 | 0 | udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", |
1679 | 0 | inStaticData->conversionType); |
1680 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1681 | 0 | return 0; |
1682 | 0 | } |
1683 | | |
1684 | 0 | return headerSize+(int32_t)staticDataSize+size; |
1685 | 0 | } |
1686 | | |
1687 | | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
1688 | | |
1689 | | #endif |