/src/icu/source/common/ucnv_bld.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************** |
5 | | * COPYRIGHT: |
6 | | * Copyright (c) 1996-2016, International Business Machines Corporation and |
7 | | * others. All Rights Reserved. |
8 | | ******************************************************************** |
9 | | * |
10 | | * ucnv_bld.cpp: |
11 | | * |
12 | | * Defines functions that are used in the creation/initialization/deletion |
13 | | * of converters and related structures. |
14 | | * uses uconv_io.h routines to access disk information |
15 | | * is used by ucnv.h to implement public API create/delete/flushCache routines |
16 | | * Modification History: |
17 | | * |
18 | | * Date Name Description |
19 | | * |
20 | | * 06/20/2000 helena OS/400 port changes; mostly typecast. |
21 | | * 06/29/2000 helena Major rewrite of the callback interface. |
22 | | */ |
23 | | |
24 | | #include "unicode/utypes.h" |
25 | | |
26 | | #if !UCONFIG_NO_CONVERSION |
27 | | |
28 | | #include "unicode/putil.h" |
29 | | #include "unicode/udata.h" |
30 | | #include "unicode/ucnv.h" |
31 | | #include "unicode/uloc.h" |
32 | | #include "mutex.h" |
33 | | #include "putilimp.h" |
34 | | #include "uassert.h" |
35 | | #include "utracimp.h" |
36 | | #include "ucnv_io.h" |
37 | | #include "ucnv_bld.h" |
38 | | #include "ucnvmbcs.h" |
39 | | #include "ucnv_ext.h" |
40 | | #include "ucnv_cnv.h" |
41 | | #include "ucnv_imp.h" |
42 | | #include "uhash.h" |
43 | | #include "umutex.h" |
44 | | #include "cstring.h" |
45 | | #include "cmemory.h" |
46 | | #include "ucln_cmn.h" |
47 | | #include "ustr_cnv.h" |
48 | | |
49 | | |
50 | | #if 0 |
51 | | #include <stdio.h> |
52 | | extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); |
53 | | #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) |
54 | | #else |
55 | | # define UCNV_DEBUG_LOG(x,y,z) |
56 | | #endif |
57 | | |
58 | | static const UConverterSharedData * const |
59 | | converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ |
60 | | NULL, NULL, |
61 | | |
62 | | #if UCONFIG_NO_LEGACY_CONVERSION |
63 | | NULL, |
64 | | #else |
65 | | &_MBCSData, |
66 | | #endif |
67 | | |
68 | | &_Latin1Data, |
69 | | &_UTF8Data, &_UTF16BEData, &_UTF16LEData, |
70 | | #if UCONFIG_ONLY_HTML_CONVERSION |
71 | | NULL, NULL, |
72 | | #else |
73 | | &_UTF32BEData, &_UTF32LEData, |
74 | | #endif |
75 | | NULL, |
76 | | |
77 | | #if UCONFIG_NO_LEGACY_CONVERSION |
78 | | NULL, |
79 | | #else |
80 | | &_ISO2022Data, |
81 | | #endif |
82 | | |
83 | | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
84 | | NULL, NULL, NULL, NULL, NULL, NULL, |
85 | | NULL, NULL, NULL, NULL, NULL, NULL, |
86 | | NULL, |
87 | | #else |
88 | | &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, |
89 | | &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, |
90 | | &_HZData, |
91 | | #endif |
92 | | |
93 | | #if UCONFIG_ONLY_HTML_CONVERSION |
94 | | NULL, |
95 | | #else |
96 | | &_SCSUData, |
97 | | #endif |
98 | | |
99 | | |
100 | | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
101 | | NULL, |
102 | | #else |
103 | | &_ISCIIData, |
104 | | #endif |
105 | | |
106 | | &_ASCIIData, |
107 | | #if UCONFIG_ONLY_HTML_CONVERSION |
108 | | NULL, NULL, &_UTF16Data, NULL, NULL, NULL, |
109 | | #else |
110 | | &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
111 | | #endif |
112 | | |
113 | | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
114 | | NULL, |
115 | | #else |
116 | | &_CompoundTextData |
117 | | #endif |
118 | | }; |
119 | | |
120 | | /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. |
121 | | Also the name should be in lower case and all spaces, dashes and underscores |
122 | | removed |
123 | | */ |
124 | | static struct { |
125 | | const char *name; |
126 | | const UConverterType type; |
127 | | } const cnvNameType[] = { |
128 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
129 | | { "bocu1", UCNV_BOCU1 }, |
130 | | { "cesu8", UCNV_CESU8 }, |
131 | | #endif |
132 | | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
133 | | { "hz",UCNV_HZ }, |
134 | | #endif |
135 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
136 | | { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
137 | | #endif |
138 | | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
139 | | { "iscii", UCNV_ISCII }, |
140 | | #endif |
141 | | #if !UCONFIG_NO_LEGACY_CONVERSION |
142 | | { "iso2022", UCNV_ISO_2022 }, |
143 | | #endif |
144 | | { "iso88591", UCNV_LATIN_1 }, |
145 | | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
146 | | { "lmbcs1", UCNV_LMBCS_1 }, |
147 | | { "lmbcs11",UCNV_LMBCS_11 }, |
148 | | { "lmbcs16",UCNV_LMBCS_16 }, |
149 | | { "lmbcs17",UCNV_LMBCS_17 }, |
150 | | { "lmbcs18",UCNV_LMBCS_18 }, |
151 | | { "lmbcs19",UCNV_LMBCS_19 }, |
152 | | { "lmbcs2", UCNV_LMBCS_2 }, |
153 | | { "lmbcs3", UCNV_LMBCS_3 }, |
154 | | { "lmbcs4", UCNV_LMBCS_4 }, |
155 | | { "lmbcs5", UCNV_LMBCS_5 }, |
156 | | { "lmbcs6", UCNV_LMBCS_6 }, |
157 | | { "lmbcs8", UCNV_LMBCS_8 }, |
158 | | #endif |
159 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
160 | | { "scsu", UCNV_SCSU }, |
161 | | #endif |
162 | | { "usascii", UCNV_US_ASCII }, |
163 | | { "utf16", UCNV_UTF16 }, |
164 | | { "utf16be", UCNV_UTF16_BigEndian }, |
165 | | { "utf16le", UCNV_UTF16_LittleEndian }, |
166 | | #if U_IS_BIG_ENDIAN |
167 | | { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, |
168 | | { "utf16platformendian", UCNV_UTF16_BigEndian }, |
169 | | #else |
170 | | { "utf16oppositeendian", UCNV_UTF16_BigEndian}, |
171 | | { "utf16platformendian", UCNV_UTF16_LittleEndian }, |
172 | | #endif |
173 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
174 | | { "utf32", UCNV_UTF32 }, |
175 | | { "utf32be", UCNV_UTF32_BigEndian }, |
176 | | { "utf32le", UCNV_UTF32_LittleEndian }, |
177 | | #if U_IS_BIG_ENDIAN |
178 | | { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, |
179 | | { "utf32platformendian", UCNV_UTF32_BigEndian }, |
180 | | #else |
181 | | { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
182 | | { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
183 | | #endif |
184 | | #endif |
185 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
186 | | { "utf7", UCNV_UTF7 }, |
187 | | #endif |
188 | | { "utf8", UCNV_UTF8 }, |
189 | | #if !UCONFIG_ONLY_HTML_CONVERSION |
190 | | { "x11compoundtext", UCNV_COMPOUND_TEXT} |
191 | | #endif |
192 | | }; |
193 | | |
194 | | |
195 | | /*initializes some global variables */ |
196 | | static UHashtable *SHARED_DATA_HASHTABLE = NULL; |
197 | | static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ |
198 | | /* Note: the global mutex is used for */ |
199 | | /* reference count updates. */ |
200 | | |
201 | | static const char **gAvailableConverters = NULL; |
202 | | static uint16_t gAvailableConverterCount = 0; |
203 | | static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; |
204 | | |
205 | | #if !U_CHARSET_IS_UTF8 |
206 | | |
207 | | /* This contains the resolved converter name. So no further alias lookup is needed again. */ |
208 | | static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ |
209 | | static const char *gDefaultConverterName = NULL; |
210 | | |
211 | | /* |
212 | | If the default converter is an algorithmic converter, this is the cached value. |
213 | | We don't cache a full UConverter and clone it because ucnv_clone doesn't have |
214 | | less overhead than an algorithmic open. We don't cache non-algorithmic converters |
215 | | because ucnv_flushCache must be able to unload the default converter and its table. |
216 | | */ |
217 | | static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; |
218 | | |
219 | | /* Does gDefaultConverterName have a converter option and require extra parsing? */ |
220 | | static UBool gDefaultConverterContainsOption; |
221 | | |
222 | | #endif /* !U_CHARSET_IS_UTF8 */ |
223 | | |
224 | | static const char DATA_TYPE[] = "cnv"; |
225 | | |
226 | | /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). |
227 | | * If it is ever to be called from elsewhere, synchronization |
228 | | * will need to be considered. |
229 | | */ |
230 | | static void |
231 | 1.11k | ucnv_flushAvailableConverterCache() { |
232 | 1.11k | gAvailableConverterCount = 0; |
233 | 1.11k | if (gAvailableConverters) { |
234 | 0 | uprv_free((char **)gAvailableConverters); |
235 | 0 | gAvailableConverters = NULL; |
236 | 0 | } |
237 | 1.11k | gAvailableConvertersInitOnce.reset(); |
238 | 1.11k | } |
239 | | |
240 | | /* ucnv_cleanup - delete all storage held by the converter cache, except any */ |
241 | | /* in use by open converters. */ |
242 | | /* Not thread safe. */ |
243 | | /* Not supported API. */ |
244 | 1.11k | static UBool U_CALLCONV ucnv_cleanup(void) { |
245 | 1.11k | ucnv_flushCache(); |
246 | 1.11k | if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { |
247 | 0 | uhash_close(SHARED_DATA_HASHTABLE); |
248 | 0 | SHARED_DATA_HASHTABLE = NULL; |
249 | 0 | } |
250 | | |
251 | | /* Isn't called from flushCache because other threads may have preexisting references to the table. */ |
252 | 1.11k | ucnv_flushAvailableConverterCache(); |
253 | | |
254 | 1.11k | #if !U_CHARSET_IS_UTF8 |
255 | 1.11k | gDefaultConverterName = NULL; |
256 | 1.11k | gDefaultConverterNameBuffer[0] = 0; |
257 | 1.11k | gDefaultConverterContainsOption = FALSE; |
258 | 1.11k | gDefaultAlgorithmicSharedData = NULL; |
259 | 1.11k | #endif |
260 | | |
261 | 1.11k | return (SHARED_DATA_HASHTABLE == NULL); |
262 | 1.11k | } |
263 | | |
264 | | static UBool U_CALLCONV |
265 | | isCnvAcceptable(void * /*context*/, |
266 | | const char * /*type*/, const char * /*name*/, |
267 | 0 | const UDataInfo *pInfo) { |
268 | 0 | return (UBool)( |
269 | 0 | pInfo->size>=20 && |
270 | 0 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && |
271 | 0 | pInfo->charsetFamily==U_CHARSET_FAMILY && |
272 | 0 | pInfo->sizeofUChar==U_SIZEOF_UCHAR && |
273 | 0 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ |
274 | 0 | pInfo->dataFormat[1]==0x6e && |
275 | 0 | pInfo->dataFormat[2]==0x76 && |
276 | 0 | pInfo->dataFormat[3]==0x74 && |
277 | 0 | pInfo->formatVersion[0]==6); /* Everything will be version 6 */ |
278 | 0 | } |
279 | | |
280 | | /** |
281 | | * Un flatten shared data from a UDATA.. |
282 | | */ |
283 | | static UConverterSharedData* |
284 | | ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) |
285 | 0 | { |
286 | | /* UDataInfo info; -- necessary only if some converters have different formatVersion */ |
287 | 0 | const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); |
288 | 0 | const UConverterStaticData *source = (const UConverterStaticData *) raw; |
289 | 0 | UConverterSharedData *data; |
290 | 0 | UConverterType type = (UConverterType)source->conversionType; |
291 | |
|
292 | 0 | if(U_FAILURE(*status)) |
293 | 0 | return NULL; |
294 | | |
295 | 0 | if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || |
296 | 0 | converterData[type] == NULL || |
297 | 0 | !converterData[type]->isReferenceCounted || |
298 | 0 | converterData[type]->referenceCounter != 1 || |
299 | 0 | source->structSize != sizeof(UConverterStaticData)) |
300 | 0 | { |
301 | 0 | *status = U_INVALID_TABLE_FORMAT; |
302 | 0 | return NULL; |
303 | 0 | } |
304 | | |
305 | 0 | data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); |
306 | 0 | if(data == NULL) { |
307 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
308 | 0 | return NULL; |
309 | 0 | } |
310 | | |
311 | | /* copy initial values from the static structure for this type */ |
312 | 0 | uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); |
313 | |
|
314 | 0 | data->staticData = source; |
315 | |
|
316 | 0 | data->sharedDataCached = FALSE; |
317 | | |
318 | | /* fill in fields from the loaded data */ |
319 | 0 | data->dataMemory = (void*)pData; /* for future use */ |
320 | |
|
321 | 0 | if(data->impl->load != NULL) { |
322 | 0 | data->impl->load(data, pArgs, raw + source->structSize, status); |
323 | 0 | if(U_FAILURE(*status)) { |
324 | 0 | uprv_free(data); |
325 | 0 | return NULL; |
326 | 0 | } |
327 | 0 | } |
328 | 0 | return data; |
329 | 0 | } |
330 | | |
331 | | /*Takes an alias name gets an actual converter file name |
332 | | *goes to disk and opens it. |
333 | | *allocates the memory and returns a new UConverter object |
334 | | */ |
335 | | static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) |
336 | 0 | { |
337 | 0 | UDataMemory *data; |
338 | 0 | UConverterSharedData *sharedData; |
339 | |
|
340 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); |
341 | |
|
342 | 0 | if (U_FAILURE (*err)) { |
343 | 0 | UTRACE_EXIT_STATUS(*err); |
344 | 0 | return NULL; |
345 | 0 | } |
346 | | |
347 | 0 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); |
348 | |
|
349 | 0 | data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); |
350 | 0 | if(U_FAILURE(*err)) |
351 | 0 | { |
352 | 0 | UTRACE_EXIT_STATUS(*err); |
353 | 0 | return NULL; |
354 | 0 | } |
355 | | |
356 | 0 | sharedData = ucnv_data_unFlattenClone(pArgs, data, err); |
357 | 0 | if(U_FAILURE(*err)) |
358 | 0 | { |
359 | 0 | udata_close(data); |
360 | 0 | UTRACE_EXIT_STATUS(*err); |
361 | 0 | return NULL; |
362 | 0 | } |
363 | | |
364 | | /* |
365 | | * TODO Store pkg in a field in the shared data so that delta-only converters |
366 | | * can load base converters from the same package. |
367 | | * If the pkg name is longer than the field, then either do not load the converter |
368 | | * in the first place, or just set the pkg field to "". |
369 | | */ |
370 | | |
371 | 0 | UTRACE_EXIT_PTR_STATUS(sharedData, *err); |
372 | 0 | return sharedData; |
373 | 0 | } |
374 | | |
375 | | /*returns a converter type from a string |
376 | | */ |
377 | | static const UConverterSharedData * |
378 | | getAlgorithmicTypeFromName(const char *realName) |
379 | 3.34k | { |
380 | 3.34k | uint32_t mid, start, limit; |
381 | 3.34k | uint32_t lastMid; |
382 | 3.34k | int result; |
383 | 3.34k | char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
384 | | |
385 | | /* Lower case and remove ignoreable characters. */ |
386 | 3.34k | ucnv_io_stripForCompare(strippedName, realName); |
387 | | |
388 | | /* do a binary search for the alias */ |
389 | 3.34k | start = 0; |
390 | 3.34k | limit = UPRV_LENGTHOF(cnvNameType); |
391 | 3.34k | mid = limit; |
392 | 3.34k | lastMid = UINT32_MAX; |
393 | | |
394 | 15.6k | for (;;) { |
395 | 15.6k | mid = (uint32_t)((start + limit) / 2); |
396 | 15.6k | if (lastMid == mid) { /* Have we moved? */ |
397 | 0 | break; /* We haven't moved, and it wasn't found. */ |
398 | 0 | } |
399 | 15.6k | lastMid = mid; |
400 | 15.6k | result = uprv_strcmp(strippedName, cnvNameType[mid].name); |
401 | | |
402 | 15.6k | if (result < 0) { |
403 | 6.69k | limit = mid; |
404 | 8.92k | } else if (result > 0) { |
405 | 5.58k | start = mid; |
406 | 5.58k | } else { |
407 | 3.34k | return converterData[cnvNameType[mid].type]; |
408 | 3.34k | } |
409 | 15.6k | } |
410 | | |
411 | 0 | return NULL; |
412 | 3.34k | } |
413 | | |
414 | | /* |
415 | | * Based on the number of known converters, this determines how many times larger |
416 | | * the shared data hash table should be. When on small platforms, or just a couple |
417 | | * of converters are used, this number should be 2. When memory is plentiful, or |
418 | | * when ucnv_countAvailable is ever used with a lot of available converters, |
419 | | * this should be 4. |
420 | | * Larger numbers reduce the number of hash collisions, but use more memory. |
421 | | */ |
422 | 0 | #define UCNV_CACHE_LOAD_FACTOR 2 |
423 | | |
424 | | /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ |
425 | | /* Will always be called with the cnvCacheMutex alrady being held */ |
426 | | /* by the calling function. */ |
427 | | /* Stores the shared data in the SHARED_DATA_HASHTABLE |
428 | | * @param data The shared data |
429 | | */ |
430 | | static void |
431 | | ucnv_shareConverterData(UConverterSharedData * data) |
432 | 0 | { |
433 | 0 | UErrorCode err = U_ZERO_ERROR; |
434 | | /*Lazy evaluates the Hashtable itself */ |
435 | | /*void *sanity = NULL;*/ |
436 | |
|
437 | 0 | if (SHARED_DATA_HASHTABLE == NULL) |
438 | 0 | { |
439 | 0 | SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, |
440 | 0 | ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, |
441 | 0 | &err); |
442 | 0 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
443 | |
|
444 | 0 | if (U_FAILURE(err)) |
445 | 0 | return; |
446 | 0 | } |
447 | | |
448 | | /* ### check to see if the element is not already there! */ |
449 | | |
450 | | /* |
451 | | sanity = ucnv_getSharedConverterData (data->staticData->name); |
452 | | if(sanity != NULL) |
453 | | { |
454 | | UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); |
455 | | } |
456 | | UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); |
457 | | */ |
458 | | |
459 | | /* Mark it shared */ |
460 | 0 | data->sharedDataCached = TRUE; |
461 | |
|
462 | 0 | uhash_put(SHARED_DATA_HASHTABLE, |
463 | 0 | (void*) data->staticData->name, /* Okay to cast away const as long as |
464 | | keyDeleter == NULL */ |
465 | 0 | data, |
466 | 0 | &err); |
467 | 0 | UCNV_DEBUG_LOG("put", data->staticData->name,data); |
468 | |
|
469 | 0 | } |
470 | | |
471 | | /* Look up a converter name in the shared data cache. */ |
472 | | /* cnvCacheMutex must be held by the caller to protect the hash table. */ |
473 | | /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) |
474 | | * @param name The name of the shared data |
475 | | * @return the shared data from the SHARED_DATA_HASHTABLE |
476 | | */ |
477 | | static UConverterSharedData * |
478 | | ucnv_getSharedConverterData(const char *name) |
479 | 0 | { |
480 | | /*special case when no Table has yet been created we return NULL */ |
481 | 0 | if (SHARED_DATA_HASHTABLE == NULL) |
482 | 0 | { |
483 | 0 | return NULL; |
484 | 0 | } |
485 | 0 | else |
486 | 0 | { |
487 | 0 | UConverterSharedData *rc; |
488 | |
|
489 | 0 | rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); |
490 | 0 | UCNV_DEBUG_LOG("get",name,rc); |
491 | 0 | return rc; |
492 | 0 | } |
493 | 0 | } |
494 | | |
495 | | /*frees the string of memory blocks associates with a sharedConverter |
496 | | *if and only if the referenceCounter == 0 |
497 | | */ |
498 | | /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to |
499 | | * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and |
500 | | * returns TRUE, |
501 | | * otherwise returns FALSE |
502 | | * @param sharedConverterData The shared data |
503 | | * @return if not it frees all the memory stemming from sharedConverterData and |
504 | | * returns TRUE, otherwise returns FALSE |
505 | | */ |
506 | | static UBool |
507 | | ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) |
508 | 0 | { |
509 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); |
510 | 0 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); |
511 | |
|
512 | 0 | if (deadSharedData->referenceCounter > 0) { |
513 | 0 | UTRACE_EXIT_VALUE((int32_t)FALSE); |
514 | 0 | return FALSE; |
515 | 0 | } |
516 | | |
517 | 0 | if (deadSharedData->impl->unload != NULL) { |
518 | 0 | deadSharedData->impl->unload(deadSharedData); |
519 | 0 | } |
520 | |
|
521 | 0 | if(deadSharedData->dataMemory != NULL) |
522 | 0 | { |
523 | 0 | UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; |
524 | 0 | udata_close(data); |
525 | 0 | } |
526 | |
|
527 | 0 | uprv_free(deadSharedData); |
528 | |
|
529 | 0 | UTRACE_EXIT_VALUE((int32_t)TRUE); |
530 | 0 | return TRUE; |
531 | 0 | } |
532 | | |
533 | | /** |
534 | | * Load a non-algorithmic converter. |
535 | | * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). |
536 | | */ |
537 | | UConverterSharedData * |
538 | 0 | ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { |
539 | 0 | UConverterSharedData *mySharedConverterData; |
540 | |
|
541 | 0 | if(err == NULL || U_FAILURE(*err)) { |
542 | 0 | return NULL; |
543 | 0 | } |
544 | | |
545 | 0 | if(pArgs->pkg != NULL && *pArgs->pkg != 0) { |
546 | | /* application-provided converters are not currently cached */ |
547 | 0 | return createConverterFromFile(pArgs, err); |
548 | 0 | } |
549 | | |
550 | 0 | mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); |
551 | 0 | if (mySharedConverterData == NULL) |
552 | 0 | { |
553 | | /*Not cached, we need to stream it in from file */ |
554 | 0 | mySharedConverterData = createConverterFromFile(pArgs, err); |
555 | 0 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) |
556 | 0 | { |
557 | 0 | return NULL; |
558 | 0 | } |
559 | 0 | else if (!pArgs->onlyTestIsLoadable) |
560 | 0 | { |
561 | | /* share it with other library clients */ |
562 | 0 | ucnv_shareConverterData(mySharedConverterData); |
563 | 0 | } |
564 | 0 | } |
565 | 0 | else |
566 | 0 | { |
567 | | /* The data for this converter was already in the cache. */ |
568 | | /* Update the reference counter on the shared data: one more client */ |
569 | 0 | mySharedConverterData->referenceCounter++; |
570 | 0 | } |
571 | | |
572 | 0 | return mySharedConverterData; |
573 | 0 | } |
574 | | |
575 | | /** |
576 | | * Unload a non-algorithmic converter. |
577 | | * It must be sharedData->isReferenceCounted |
578 | | * and this function must be called inside umtx_lock(&cnvCacheMutex). |
579 | | */ |
580 | | U_CAPI void |
581 | 0 | ucnv_unload(UConverterSharedData *sharedData) { |
582 | 0 | if(sharedData != NULL) { |
583 | 0 | if (sharedData->referenceCounter > 0) { |
584 | 0 | sharedData->referenceCounter--; |
585 | 0 | } |
586 | |
|
587 | 0 | if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { |
588 | 0 | ucnv_deleteSharedConverterData(sharedData); |
589 | 0 | } |
590 | 0 | } |
591 | 0 | } |
592 | | |
593 | | U_CFUNC void |
594 | | ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) |
595 | 0 | { |
596 | 0 | if(sharedData != NULL && sharedData->isReferenceCounted) { |
597 | 0 | umtx_lock(&cnvCacheMutex); |
598 | 0 | ucnv_unload(sharedData); |
599 | 0 | umtx_unlock(&cnvCacheMutex); |
600 | 0 | } |
601 | 0 | } |
602 | | |
603 | | U_CFUNC void |
604 | | ucnv_incrementRefCount(UConverterSharedData *sharedData) |
605 | 0 | { |
606 | 0 | if(sharedData != NULL && sharedData->isReferenceCounted) { |
607 | 0 | umtx_lock(&cnvCacheMutex); |
608 | 0 | sharedData->referenceCounter++; |
609 | 0 | umtx_unlock(&cnvCacheMutex); |
610 | 0 | } |
611 | 0 | } |
612 | | |
613 | | /* |
614 | | * *pPieces must be initialized. |
615 | | * The name without options will be copied to pPieces->cnvName. |
616 | | * The locale and options will be copied to pPieces only if present in inName, |
617 | | * otherwise the existing values in pPieces remain. |
618 | | * *pArgs will be set to the pPieces values. |
619 | | */ |
620 | | static void |
621 | | parseConverterOptions(const char *inName, |
622 | | UConverterNamePieces *pPieces, |
623 | | UConverterLoadArgs *pArgs, |
624 | | UErrorCode *err) |
625 | 2.23k | { |
626 | 2.23k | char *cnvName = pPieces->cnvName; |
627 | 2.23k | char c; |
628 | 2.23k | int32_t len = 0; |
629 | | |
630 | 2.23k | pArgs->name=inName; |
631 | 2.23k | pArgs->locale=pPieces->locale; |
632 | 2.23k | pArgs->options=pPieces->options; |
633 | | |
634 | | /* copy the converter name itself to cnvName */ |
635 | 29.0k | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { |
636 | 26.7k | if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { |
637 | 0 | *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ |
638 | 0 | pPieces->cnvName[0]=0; |
639 | 0 | return; |
640 | 0 | } |
641 | 26.7k | *cnvName++=c; |
642 | 26.7k | inName++; |
643 | 26.7k | } |
644 | 2.23k | *cnvName=0; |
645 | 2.23k | pArgs->name=pPieces->cnvName; |
646 | | |
647 | | /* parse options. No more name copying should occur. */ |
648 | 2.23k | while((c=*inName)!=0) { |
649 | 0 | if(c==UCNV_OPTION_SEP_CHAR) { |
650 | 0 | ++inName; |
651 | 0 | } |
652 | | |
653 | | /* inName is behind an option separator */ |
654 | 0 | if(uprv_strncmp(inName, "locale=", 7)==0) { |
655 | | /* do not modify locale itself in case we have multiple locale options */ |
656 | 0 | char *dest=pPieces->locale; |
657 | | |
658 | | /* copy the locale option value */ |
659 | 0 | inName+=7; |
660 | 0 | len=0; |
661 | 0 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { |
662 | 0 | ++inName; |
663 | |
|
664 | 0 | if(++len>=ULOC_FULLNAME_CAPACITY) { |
665 | 0 | *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ |
666 | 0 | pPieces->locale[0]=0; |
667 | 0 | return; |
668 | 0 | } |
669 | | |
670 | 0 | *dest++=c; |
671 | 0 | } |
672 | 0 | *dest=0; |
673 | 0 | } else if(uprv_strncmp(inName, "version=", 8)==0) { |
674 | | /* copy the version option value into bits 3..0 of pPieces->options */ |
675 | 0 | inName+=8; |
676 | 0 | c=*inName; |
677 | 0 | if(c==0) { |
678 | 0 | pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); |
679 | 0 | return; |
680 | 0 | } else if((uint8_t)(c-'0')<10) { |
681 | 0 | pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); |
682 | 0 | ++inName; |
683 | 0 | } |
684 | 0 | } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { |
685 | 0 | inName+=8; |
686 | 0 | pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); |
687 | | /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ |
688 | 0 | } else { |
689 | | /* ignore any other options until we define some */ |
690 | 0 | while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { |
691 | 0 | } |
692 | 0 | if(c==0) { |
693 | 0 | return; |
694 | 0 | } |
695 | 0 | } |
696 | 0 | } |
697 | 2.23k | } |
698 | | |
699 | | /*Logic determines if the converter is Algorithmic AND/OR cached |
700 | | *depending on that: |
701 | | * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) |
702 | | * -Get it from a Hashtable (Data=X, Cached=TRUE) |
703 | | * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) |
704 | | * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) |
705 | | */ |
706 | | U_CFUNC UConverterSharedData * |
707 | | ucnv_loadSharedData(const char *converterName, |
708 | | UConverterNamePieces *pPieces, |
709 | | UConverterLoadArgs *pArgs, |
710 | 4.46k | UErrorCode * err) { |
711 | 4.46k | UConverterNamePieces stackPieces; |
712 | 4.46k | UConverterLoadArgs stackArgs; |
713 | 4.46k | UConverterSharedData *mySharedConverterData = NULL; |
714 | 4.46k | UErrorCode internalErrorCode = U_ZERO_ERROR; |
715 | 4.46k | UBool mayContainOption = TRUE; |
716 | 4.46k | UBool checkForAlgorithmic = TRUE; |
717 | | |
718 | 4.46k | if (U_FAILURE (*err)) { |
719 | 0 | return NULL; |
720 | 0 | } |
721 | | |
722 | 4.46k | if(pPieces == NULL) { |
723 | 0 | if(pArgs != NULL) { |
724 | | /* |
725 | | * Bad: We may set pArgs pointers to stackPieces fields |
726 | | * which will be invalid after this function returns. |
727 | | */ |
728 | 0 | *err = U_INTERNAL_PROGRAM_ERROR; |
729 | 0 | return NULL; |
730 | 0 | } |
731 | 0 | pPieces = &stackPieces; |
732 | 0 | } |
733 | 4.46k | if(pArgs == NULL) { |
734 | 0 | uprv_memset(&stackArgs, 0, sizeof(stackArgs)); |
735 | 0 | stackArgs.size = (int32_t)sizeof(stackArgs); |
736 | 0 | pArgs = &stackArgs; |
737 | 0 | } |
738 | | |
739 | 4.46k | pPieces->cnvName[0] = 0; |
740 | 4.46k | pPieces->locale[0] = 0; |
741 | 4.46k | pPieces->options = 0; |
742 | | |
743 | 4.46k | pArgs->name = converterName; |
744 | 4.46k | pArgs->locale = pPieces->locale; |
745 | 4.46k | pArgs->options = pPieces->options; |
746 | | |
747 | | /* In case "name" is NULL we want to open the default converter. */ |
748 | 4.46k | if (converterName == NULL) { |
749 | | #if U_CHARSET_IS_UTF8 |
750 | | pArgs->name = "UTF-8"; |
751 | | return (UConverterSharedData *)converterData[UCNV_UTF8]; |
752 | | #else |
753 | | /* Call ucnv_getDefaultName first to query the name from the OS. */ |
754 | 1.11k | pArgs->name = ucnv_getDefaultName(); |
755 | 1.11k | if (pArgs->name == NULL) { |
756 | 0 | *err = U_MISSING_RESOURCE_ERROR; |
757 | 0 | return NULL; |
758 | 0 | } |
759 | 1.11k | mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; |
760 | 1.11k | checkForAlgorithmic = FALSE; |
761 | 1.11k | mayContainOption = gDefaultConverterContainsOption; |
762 | | /* the default converter name is already canonical */ |
763 | 1.11k | #endif |
764 | 1.11k | } |
765 | 3.34k | else if(UCNV_FAST_IS_UTF8(converterName)) { |
766 | | /* fastpath for UTF-8 */ |
767 | 1.11k | pArgs->name = "UTF-8"; |
768 | 1.11k | return (UConverterSharedData *)converterData[UCNV_UTF8]; |
769 | 1.11k | } |
770 | 2.23k | else { |
771 | | /* separate the converter name from the options */ |
772 | 2.23k | parseConverterOptions(converterName, pPieces, pArgs, err); |
773 | 2.23k | if (U_FAILURE(*err)) { |
774 | | /* Very bad name used. */ |
775 | 0 | return NULL; |
776 | 0 | } |
777 | | |
778 | | /* get the canonical converter name */ |
779 | 2.23k | pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); |
780 | 2.23k | if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { |
781 | | /* |
782 | | * set the input name in case the converter was added |
783 | | * without updating the alias table, or when there is no alias table |
784 | | */ |
785 | 0 | pArgs->name = pPieces->cnvName; |
786 | 2.23k | } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { |
787 | 0 | *err = U_AMBIGUOUS_ALIAS_WARNING; |
788 | 0 | } |
789 | 2.23k | } |
790 | | |
791 | | /* separate the converter name from the options */ |
792 | 3.34k | if(mayContainOption && pArgs->name != pPieces->cnvName) { |
793 | 0 | parseConverterOptions(pArgs->name, pPieces, pArgs, err); |
794 | 0 | } |
795 | | |
796 | | /* get the shared data for an algorithmic converter, if it is one */ |
797 | 3.34k | if (checkForAlgorithmic) { |
798 | 2.23k | mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); |
799 | 2.23k | } |
800 | 3.34k | if (mySharedConverterData == NULL) |
801 | 0 | { |
802 | | /* it is a data-based converter, get its shared data. */ |
803 | | /* Hold the cnvCacheMutex through the whole process of checking the */ |
804 | | /* converter data cache, and adding new entries to the cache */ |
805 | | /* to prevent other threads from modifying the cache during the */ |
806 | | /* process. */ |
807 | 0 | pArgs->nestedLoads=1; |
808 | 0 | pArgs->pkg=NULL; |
809 | |
|
810 | 0 | umtx_lock(&cnvCacheMutex); |
811 | 0 | mySharedConverterData = ucnv_load(pArgs, err); |
812 | 0 | umtx_unlock(&cnvCacheMutex); |
813 | 0 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) |
814 | 0 | { |
815 | 0 | return NULL; |
816 | 0 | } |
817 | 0 | } |
818 | | |
819 | 3.34k | return mySharedConverterData; |
820 | 3.34k | } |
821 | | |
822 | | U_CAPI UConverter * |
823 | | ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) |
824 | 4.46k | { |
825 | 4.46k | UConverterNamePieces stackPieces; |
826 | 4.46k | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
827 | 4.46k | UConverterSharedData *mySharedConverterData; |
828 | | |
829 | 4.46k | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); |
830 | | |
831 | 4.46k | if(U_SUCCESS(*err)) { |
832 | 4.46k | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); |
833 | | |
834 | 4.46k | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
835 | | |
836 | 4.46k | myUConverter = ucnv_createConverterFromSharedData( |
837 | 4.46k | myUConverter, mySharedConverterData, |
838 | 4.46k | &stackArgs, |
839 | 4.46k | err); |
840 | | |
841 | 4.46k | if(U_SUCCESS(*err)) { |
842 | 4.46k | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
843 | 4.46k | return myUConverter; |
844 | 4.46k | } |
845 | 4.46k | } |
846 | | |
847 | | /* exit with error */ |
848 | 0 | UTRACE_EXIT_STATUS(*err); |
849 | 0 | return NULL; |
850 | 4.46k | } |
851 | | |
852 | | U_CFUNC UBool |
853 | 0 | ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { |
854 | 0 | UConverter myUConverter; |
855 | 0 | UConverterNamePieces stackPieces; |
856 | 0 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
857 | 0 | UConverterSharedData *mySharedConverterData; |
858 | |
|
859 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); |
860 | |
|
861 | 0 | if(U_SUCCESS(*err)) { |
862 | 0 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); |
863 | |
|
864 | 0 | stackArgs.onlyTestIsLoadable=TRUE; |
865 | 0 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
866 | 0 | ucnv_createConverterFromSharedData( |
867 | 0 | &myUConverter, mySharedConverterData, |
868 | 0 | &stackArgs, |
869 | 0 | err); |
870 | 0 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
871 | 0 | } |
872 | |
|
873 | 0 | UTRACE_EXIT_STATUS(*err); |
874 | 0 | return U_SUCCESS(*err); |
875 | 0 | } |
876 | | |
877 | | UConverter * |
878 | | ucnv_createAlgorithmicConverter(UConverter *myUConverter, |
879 | | UConverterType type, |
880 | | const char *locale, uint32_t options, |
881 | 0 | UErrorCode *err) { |
882 | 0 | UConverter *cnv; |
883 | 0 | const UConverterSharedData *sharedData; |
884 | 0 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
885 | |
|
886 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); |
887 | 0 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); |
888 | |
|
889 | 0 | if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { |
890 | 0 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
891 | 0 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
892 | 0 | return NULL; |
893 | 0 | } |
894 | | |
895 | 0 | sharedData = converterData[type]; |
896 | 0 | if(sharedData == NULL || sharedData->isReferenceCounted) { |
897 | | /* not a valid type, or not an algorithmic converter */ |
898 | 0 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
899 | 0 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
900 | 0 | return NULL; |
901 | 0 | } |
902 | | |
903 | 0 | stackArgs.name = ""; |
904 | 0 | stackArgs.options = options; |
905 | 0 | stackArgs.locale=locale; |
906 | 0 | cnv = ucnv_createConverterFromSharedData( |
907 | 0 | myUConverter, (UConverterSharedData *)sharedData, |
908 | 0 | &stackArgs, err); |
909 | |
|
910 | 0 | UTRACE_EXIT_PTR_STATUS(cnv, *err); |
911 | 0 | return cnv; |
912 | 0 | } |
913 | | |
914 | | U_CFUNC UConverter* |
915 | | ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) |
916 | 0 | { |
917 | 0 | UConverter *myUConverter; |
918 | 0 | UConverterSharedData *mySharedConverterData; |
919 | 0 | UConverterNamePieces stackPieces; |
920 | 0 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
921 | |
|
922 | 0 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); |
923 | |
|
924 | 0 | if(U_FAILURE(*err)) { |
925 | 0 | UTRACE_EXIT_STATUS(*err); |
926 | 0 | return NULL; |
927 | 0 | } |
928 | | |
929 | 0 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); |
930 | | |
931 | | /* first, get the options out of the converterName string */ |
932 | 0 | stackPieces.cnvName[0] = 0; |
933 | 0 | stackPieces.locale[0] = 0; |
934 | 0 | stackPieces.options = 0; |
935 | 0 | parseConverterOptions(converterName, &stackPieces, &stackArgs, err); |
936 | 0 | if (U_FAILURE(*err)) { |
937 | | /* Very bad name used. */ |
938 | 0 | UTRACE_EXIT_STATUS(*err); |
939 | 0 | return NULL; |
940 | 0 | } |
941 | 0 | stackArgs.nestedLoads=1; |
942 | 0 | stackArgs.pkg=packageName; |
943 | | |
944 | | /* open the data, unflatten the shared structure */ |
945 | 0 | mySharedConverterData = createConverterFromFile(&stackArgs, err); |
946 | |
|
947 | 0 | if (U_FAILURE(*err)) { |
948 | 0 | UTRACE_EXIT_STATUS(*err); |
949 | 0 | return NULL; |
950 | 0 | } |
951 | | |
952 | | /* create the actual converter */ |
953 | 0 | myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); |
954 | |
|
955 | 0 | if (U_FAILURE(*err)) { |
956 | 0 | ucnv_close(myUConverter); |
957 | 0 | UTRACE_EXIT_STATUS(*err); |
958 | 0 | return NULL; |
959 | 0 | } |
960 | | |
961 | 0 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
962 | 0 | return myUConverter; |
963 | 0 | } |
964 | | |
965 | | |
966 | | U_CFUNC UConverter* |
967 | | ucnv_createConverterFromSharedData(UConverter *myUConverter, |
968 | | UConverterSharedData *mySharedConverterData, |
969 | | UConverterLoadArgs *pArgs, |
970 | | UErrorCode *err) |
971 | 4.46k | { |
972 | 4.46k | UBool isCopyLocal; |
973 | | |
974 | 4.46k | if(U_FAILURE(*err)) { |
975 | 0 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
976 | 0 | return myUConverter; |
977 | 0 | } |
978 | 4.46k | if(myUConverter == NULL) |
979 | 4.46k | { |
980 | 4.46k | myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); |
981 | 4.46k | if(myUConverter == NULL) |
982 | 0 | { |
983 | 0 | *err = U_MEMORY_ALLOCATION_ERROR; |
984 | 0 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
985 | 0 | return NULL; |
986 | 0 | } |
987 | 4.46k | isCopyLocal = FALSE; |
988 | 4.46k | } else { |
989 | 0 | isCopyLocal = TRUE; |
990 | 0 | } |
991 | | |
992 | | /* initialize the converter */ |
993 | 4.46k | uprv_memset(myUConverter, 0, sizeof(UConverter)); |
994 | 4.46k | myUConverter->isCopyLocal = isCopyLocal; |
995 | | /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ |
996 | 4.46k | myUConverter->sharedData = mySharedConverterData; |
997 | 4.46k | myUConverter->options = pArgs->options; |
998 | 4.46k | if(!pArgs->onlyTestIsLoadable) { |
999 | 4.46k | myUConverter->preFromUFirstCP = U_SENTINEL; |
1000 | 4.46k | myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; |
1001 | 4.46k | myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; |
1002 | 4.46k | myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; |
1003 | 4.46k | myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; |
1004 | 4.46k | myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; |
1005 | 4.46k | myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; |
1006 | 4.46k | myUConverter->subChars = (uint8_t *)myUConverter->subUChars; |
1007 | 4.46k | uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); |
1008 | 4.46k | myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ |
1009 | 4.46k | } |
1010 | | |
1011 | 4.46k | if(mySharedConverterData->impl->open != NULL) { |
1012 | 0 | mySharedConverterData->impl->open(myUConverter, pArgs, err); |
1013 | 0 | if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { |
1014 | | /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ |
1015 | 0 | ucnv_close(myUConverter); |
1016 | 0 | return NULL; |
1017 | 0 | } |
1018 | 0 | } |
1019 | | |
1020 | 4.46k | return myUConverter; |
1021 | 4.46k | } |
1022 | | |
1023 | | /*Frees all shared immutable objects that aren't referred to (reference count = 0) |
1024 | | */ |
1025 | | U_CAPI int32_t U_EXPORT2 |
1026 | | ucnv_flushCache () |
1027 | 1.11k | { |
1028 | 1.11k | UConverterSharedData *mySharedData = NULL; |
1029 | 1.11k | int32_t pos; |
1030 | 1.11k | int32_t tableDeletedNum = 0; |
1031 | 1.11k | const UHashElement *e; |
1032 | | /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ |
1033 | 1.11k | int32_t i, remaining; |
1034 | | |
1035 | 1.11k | UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); |
1036 | | |
1037 | | /* Close the default converter without creating a new one so that everything will be flushed. */ |
1038 | 1.11k | u_flushDefaultConverter(); |
1039 | | |
1040 | | /*if shared data hasn't even been lazy evaluated yet |
1041 | | * return 0 |
1042 | | */ |
1043 | 1.11k | if (SHARED_DATA_HASHTABLE == NULL) { |
1044 | 1.11k | UTRACE_EXIT_VALUE((int32_t)0); |
1045 | 1.11k | return 0; |
1046 | 1.11k | } |
1047 | | |
1048 | | /*creates an enumeration to iterate through every element in the |
1049 | | * table |
1050 | | * |
1051 | | * Synchronization: holding cnvCacheMutex will prevent any other thread from |
1052 | | * accessing or modifying the hash table during the iteration. |
1053 | | * The reference count of an entry may be decremented by |
1054 | | * ucnv_close while the iteration is in process, but this is |
1055 | | * benign. It can't be incremented (in ucnv_createConverter()) |
1056 | | * because the sequence of looking up in the cache + incrementing |
1057 | | * is protected by cnvCacheMutex. |
1058 | | */ |
1059 | 0 | umtx_lock(&cnvCacheMutex); |
1060 | | /* |
1061 | | * double loop: A delta/extension-only converter has a pointer to its base table's |
1062 | | * shared data; the first iteration of the outer loop may see the delta converter |
1063 | | * before the base converter, and unloading the delta converter may get the base |
1064 | | * converter's reference counter down to 0. |
1065 | | */ |
1066 | 0 | i = 0; |
1067 | 0 | do { |
1068 | 0 | remaining = 0; |
1069 | 0 | pos = UHASH_FIRST; |
1070 | 0 | while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) |
1071 | 0 | { |
1072 | 0 | mySharedData = (UConverterSharedData *) e->value.pointer; |
1073 | | /*deletes only if reference counter == 0 */ |
1074 | 0 | if (mySharedData->referenceCounter == 0) |
1075 | 0 | { |
1076 | 0 | tableDeletedNum++; |
1077 | |
|
1078 | 0 | UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); |
1079 | |
|
1080 | 0 | uhash_removeElement(SHARED_DATA_HASHTABLE, e); |
1081 | 0 | mySharedData->sharedDataCached = FALSE; |
1082 | 0 | ucnv_deleteSharedConverterData (mySharedData); |
1083 | 0 | } else { |
1084 | 0 | ++remaining; |
1085 | 0 | } |
1086 | 0 | } |
1087 | 0 | } while(++i == 1 && remaining > 0); |
1088 | 0 | umtx_unlock(&cnvCacheMutex); |
1089 | |
|
1090 | 0 | UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); |
1091 | |
|
1092 | 0 | UTRACE_EXIT_VALUE(tableDeletedNum); |
1093 | 0 | return tableDeletedNum; |
1094 | 1.11k | } |
1095 | | |
1096 | | /* available converters list --------------------------------------------------- */ |
1097 | | |
1098 | 0 | static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { |
1099 | 0 | U_ASSERT(gAvailableConverterCount == 0); |
1100 | 0 | U_ASSERT(gAvailableConverters == NULL); |
1101 | |
|
1102 | 0 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
1103 | 0 | UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); |
1104 | 0 | int32_t allConverterCount = uenum_count(allConvEnum, &errCode); |
1105 | 0 | if (U_FAILURE(errCode)) { |
1106 | 0 | return; |
1107 | 0 | } |
1108 | | |
1109 | | /* We can't have more than "*converterTable" converters to open */ |
1110 | 0 | gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); |
1111 | 0 | if (!gAvailableConverters) { |
1112 | 0 | errCode = U_MEMORY_ALLOCATION_ERROR; |
1113 | 0 | return; |
1114 | 0 | } |
1115 | | |
1116 | | /* Open the default converter to make sure that it has first dibs in the hash table. */ |
1117 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
1118 | 0 | UConverter tempConverter; |
1119 | 0 | ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); |
1120 | |
|
1121 | 0 | gAvailableConverterCount = 0; |
1122 | |
|
1123 | 0 | for (int32_t idx = 0; idx < allConverterCount; idx++) { |
1124 | 0 | localStatus = U_ZERO_ERROR; |
1125 | 0 | const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); |
1126 | 0 | if (ucnv_canCreateConverter(converterName, &localStatus)) { |
1127 | 0 | gAvailableConverters[gAvailableConverterCount++] = converterName; |
1128 | 0 | } |
1129 | 0 | } |
1130 | |
|
1131 | 0 | uenum_close(allConvEnum); |
1132 | 0 | } |
1133 | | |
1134 | | |
1135 | 0 | static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { |
1136 | 0 | umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); |
1137 | 0 | return U_SUCCESS(*pErrorCode); |
1138 | 0 | } |
1139 | | |
1140 | | U_CFUNC uint16_t |
1141 | 0 | ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { |
1142 | 0 | if (haveAvailableConverterList(pErrorCode)) { |
1143 | 0 | return gAvailableConverterCount; |
1144 | 0 | } |
1145 | 0 | return 0; |
1146 | 0 | } |
1147 | | |
1148 | | U_CFUNC const char * |
1149 | 0 | ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { |
1150 | 0 | if (haveAvailableConverterList(pErrorCode)) { |
1151 | 0 | if (n < gAvailableConverterCount) { |
1152 | 0 | return gAvailableConverters[n]; |
1153 | 0 | } |
1154 | 0 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
1155 | 0 | } |
1156 | 0 | return NULL; |
1157 | 0 | } |
1158 | | |
1159 | | /* default converter name --------------------------------------------------- */ |
1160 | | |
1161 | | #if !U_CHARSET_IS_UTF8 |
1162 | | /* |
1163 | | Copy the canonical converter name. |
1164 | | ucnv_getDefaultName must be thread safe, which can call this function. |
1165 | | |
1166 | | ucnv_setDefaultName calls this function and it doesn't have to be |
1167 | | thread safe because there is no reliable/safe way to reset the |
1168 | | converter in use in all threads. If you did reset the converter, you |
1169 | | would not be sure that retrieving a default converter for one string |
1170 | | would be the same type of default converter for a successive string. |
1171 | | Since the name is a returned via ucnv_getDefaultName without copying, |
1172 | | you shouldn't be modifying or deleting the string from a separate thread. |
1173 | | */ |
1174 | | static inline void |
1175 | 1.11k | internalSetName(const char *name, UErrorCode *status) { |
1176 | 1.11k | UConverterNamePieces stackPieces; |
1177 | 1.11k | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
1178 | 1.11k | int32_t length=(int32_t)(uprv_strlen(name)); |
1179 | 1.11k | UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); |
1180 | 1.11k | const UConverterSharedData *algorithmicSharedData; |
1181 | | |
1182 | 1.11k | stackArgs.name = name; |
1183 | 1.11k | if(containsOption) { |
1184 | 0 | stackPieces.cnvName[0] = 0; |
1185 | 0 | stackPieces.locale[0] = 0; |
1186 | 0 | stackPieces.options = 0; |
1187 | 0 | parseConverterOptions(name, &stackPieces, &stackArgs, status); |
1188 | 0 | if(U_FAILURE(*status)) { |
1189 | 0 | return; |
1190 | 0 | } |
1191 | 0 | } |
1192 | 1.11k | algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); |
1193 | | |
1194 | 1.11k | umtx_lock(&cnvCacheMutex); |
1195 | | |
1196 | 1.11k | gDefaultAlgorithmicSharedData = algorithmicSharedData; |
1197 | 1.11k | gDefaultConverterContainsOption = containsOption; |
1198 | 1.11k | uprv_memcpy(gDefaultConverterNameBuffer, name, length); |
1199 | 1.11k | gDefaultConverterNameBuffer[length]=0; |
1200 | | |
1201 | | /* gDefaultConverterName MUST be the last global var set by this function. */ |
1202 | | /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ |
1203 | | // But there is nothing here preventing that from being reordered, either by the compiler |
1204 | | // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. |
1205 | | // -- Andy |
1206 | 1.11k | gDefaultConverterName = gDefaultConverterNameBuffer; |
1207 | | |
1208 | 1.11k | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
1209 | | |
1210 | 1.11k | umtx_unlock(&cnvCacheMutex); |
1211 | 1.11k | } |
1212 | | #endif |
1213 | | |
1214 | | /* |
1215 | | * In order to be really thread-safe, the get function would have to take |
1216 | | * a buffer parameter and copy the current string inside a mutex block. |
1217 | | * This implementation only tries to be really thread-safe while |
1218 | | * setting the name. |
1219 | | * It assumes that setting a pointer is atomic. |
1220 | | */ |
1221 | | |
1222 | | U_CAPI const char* U_EXPORT2 |
1223 | 1.11k | ucnv_getDefaultName() { |
1224 | | #if U_CHARSET_IS_UTF8 |
1225 | | return "UTF-8"; |
1226 | | #else |
1227 | | /* local variable to be thread-safe */ |
1228 | 1.11k | const char *name; |
1229 | | |
1230 | | /* |
1231 | | Concurrent calls to ucnv_getDefaultName must be thread safe, |
1232 | | but ucnv_setDefaultName is not thread safe. |
1233 | | */ |
1234 | 1.11k | { |
1235 | 1.11k | icu::Mutex lock(&cnvCacheMutex); |
1236 | 1.11k | name = gDefaultConverterName; |
1237 | 1.11k | } |
1238 | 1.11k | if(name==NULL) { |
1239 | 1.11k | UErrorCode errorCode = U_ZERO_ERROR; |
1240 | 1.11k | UConverter *cnv = NULL; |
1241 | | |
1242 | 1.11k | name = uprv_getDefaultCodepage(); |
1243 | | |
1244 | | /* if the name is there, test it out and get the canonical name with options */ |
1245 | 1.11k | if(name != NULL) { |
1246 | 1.11k | cnv = ucnv_open(name, &errorCode); |
1247 | 1.11k | if(U_SUCCESS(errorCode) && cnv != NULL) { |
1248 | 1.11k | name = ucnv_getName(cnv, &errorCode); |
1249 | 1.11k | } |
1250 | 1.11k | } |
1251 | | |
1252 | 1.11k | if(name == NULL || name[0] == 0 |
1253 | 1.11k | || U_FAILURE(errorCode) || cnv == NULL |
1254 | 1.11k | || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) |
1255 | 0 | { |
1256 | | /* Panic time, let's use a fallback. */ |
1257 | 0 | #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) |
1258 | 0 | name = "US-ASCII"; |
1259 | | /* there is no 'algorithmic' converter for EBCDIC */ |
1260 | | #elif U_PLATFORM == U_PF_OS390 |
1261 | | name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; |
1262 | | #else |
1263 | | name = "ibm-37_P100-1995"; |
1264 | | #endif |
1265 | 0 | } |
1266 | | |
1267 | 1.11k | internalSetName(name, &errorCode); |
1268 | | |
1269 | | /* The close may make the current name go away. */ |
1270 | 1.11k | ucnv_close(cnv); |
1271 | 1.11k | } |
1272 | | |
1273 | 1.11k | return name; |
1274 | 1.11k | #endif |
1275 | 1.11k | } |
1276 | | |
1277 | | #if U_CHARSET_IS_UTF8 |
1278 | | U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} |
1279 | | #else |
1280 | | /* |
1281 | | This function is not thread safe, and it can't be thread safe. |
1282 | | See internalSetName or the API reference for details. |
1283 | | */ |
1284 | | U_CAPI void U_EXPORT2 |
1285 | 0 | ucnv_setDefaultName(const char *converterName) { |
1286 | 0 | if(converterName==NULL) { |
1287 | | /* reset to the default codepage */ |
1288 | 0 | gDefaultConverterName=NULL; |
1289 | 0 | } else { |
1290 | 0 | UErrorCode errorCode = U_ZERO_ERROR; |
1291 | 0 | UConverter *cnv = NULL; |
1292 | 0 | const char *name = NULL; |
1293 | | |
1294 | | /* if the name is there, test it out and get the canonical name with options */ |
1295 | 0 | cnv = ucnv_open(converterName, &errorCode); |
1296 | 0 | if(U_SUCCESS(errorCode) && cnv != NULL) { |
1297 | 0 | name = ucnv_getName(cnv, &errorCode); |
1298 | 0 | } |
1299 | |
|
1300 | 0 | if(U_SUCCESS(errorCode) && name!=NULL) { |
1301 | 0 | internalSetName(name, &errorCode); |
1302 | 0 | } |
1303 | | /* else this converter is bad to use. Don't change it to a bad value. */ |
1304 | | |
1305 | | /* The close may make the current name go away. */ |
1306 | 0 | ucnv_close(cnv); |
1307 | | |
1308 | | /* reset the converter cache */ |
1309 | 0 | u_flushDefaultConverter(); |
1310 | 0 | } |
1311 | 0 | } |
1312 | | #endif |
1313 | | |
1314 | | /* data swapping ------------------------------------------------------------ */ |
1315 | | |
1316 | | /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ |
1317 | | |
1318 | | #if !UCONFIG_NO_LEGACY_CONVERSION |
1319 | | |
1320 | | U_CAPI int32_t U_EXPORT2 |
1321 | | ucnv_swap(const UDataSwapper *ds, |
1322 | | const void *inData, int32_t length, void *outData, |
1323 | 0 | UErrorCode *pErrorCode) { |
1324 | 0 | const UDataInfo *pInfo; |
1325 | 0 | int32_t headerSize; |
1326 | |
|
1327 | 0 | const uint8_t *inBytes; |
1328 | 0 | uint8_t *outBytes; |
1329 | |
|
1330 | 0 | uint32_t offset, count, staticDataSize; |
1331 | 0 | int32_t size; |
1332 | |
|
1333 | 0 | const UConverterStaticData *inStaticData; |
1334 | 0 | UConverterStaticData *outStaticData; |
1335 | |
|
1336 | 0 | const _MBCSHeader *inMBCSHeader; |
1337 | 0 | _MBCSHeader *outMBCSHeader; |
1338 | 0 | _MBCSHeader mbcsHeader; |
1339 | 0 | uint32_t mbcsHeaderLength; |
1340 | 0 | UBool noFromU=FALSE; |
1341 | |
|
1342 | 0 | uint8_t outputType; |
1343 | |
|
1344 | 0 | int32_t maxFastUChar, mbcsIndexLength; |
1345 | |
|
1346 | 0 | const int32_t *inExtIndexes; |
1347 | 0 | int32_t extOffset; |
1348 | | |
1349 | | /* udata_swapDataHeader checks the arguments */ |
1350 | 0 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
1351 | 0 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
1352 | 0 | return 0; |
1353 | 0 | } |
1354 | | |
1355 | | /* check data format and format version */ |
1356 | 0 | pInfo=(const UDataInfo *)((const char *)inData+4); |
1357 | 0 | if(!( |
1358 | 0 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ |
1359 | 0 | pInfo->dataFormat[1]==0x6e && |
1360 | 0 | pInfo->dataFormat[2]==0x76 && |
1361 | 0 | pInfo->dataFormat[3]==0x74 && |
1362 | 0 | pInfo->formatVersion[0]==6 && |
1363 | 0 | pInfo->formatVersion[1]>=2 |
1364 | 0 | )) { |
1365 | 0 | udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", |
1366 | 0 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
1367 | 0 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
1368 | 0 | pInfo->formatVersion[0], pInfo->formatVersion[1]); |
1369 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1370 | 0 | return 0; |
1371 | 0 | } |
1372 | | |
1373 | 0 | inBytes=(const uint8_t *)inData+headerSize; |
1374 | 0 | outBytes=(uint8_t *)outData+headerSize; |
1375 | | |
1376 | | /* read the initial UConverterStaticData structure after the UDataInfo header */ |
1377 | 0 | inStaticData=(const UConverterStaticData *)inBytes; |
1378 | 0 | outStaticData=(UConverterStaticData *)outBytes; |
1379 | |
|
1380 | 0 | if(length<0) { |
1381 | 0 | staticDataSize=ds->readUInt32(inStaticData->structSize); |
1382 | 0 | } else { |
1383 | 0 | length-=headerSize; |
1384 | 0 | if( length<(int32_t)sizeof(UConverterStaticData) || |
1385 | 0 | (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) |
1386 | 0 | ) { |
1387 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", |
1388 | 0 | length); |
1389 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1390 | 0 | return 0; |
1391 | 0 | } |
1392 | 0 | } |
1393 | | |
1394 | 0 | if(length>=0) { |
1395 | | /* swap the static data */ |
1396 | 0 | if(inStaticData!=outStaticData) { |
1397 | 0 | uprv_memcpy(outStaticData, inStaticData, staticDataSize); |
1398 | 0 | } |
1399 | |
|
1400 | 0 | ds->swapArray32(ds, &inStaticData->structSize, 4, |
1401 | 0 | &outStaticData->structSize, pErrorCode); |
1402 | 0 | ds->swapArray32(ds, &inStaticData->codepage, 4, |
1403 | 0 | &outStaticData->codepage, pErrorCode); |
1404 | |
|
1405 | 0 | ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), |
1406 | 0 | outStaticData->name, pErrorCode); |
1407 | 0 | if(U_FAILURE(*pErrorCode)) { |
1408 | 0 | udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); |
1409 | 0 | return 0; |
1410 | 0 | } |
1411 | 0 | } |
1412 | | |
1413 | 0 | inBytes+=staticDataSize; |
1414 | 0 | outBytes+=staticDataSize; |
1415 | 0 | if(length>=0) { |
1416 | 0 | length-=(int32_t)staticDataSize; |
1417 | 0 | } |
1418 | | |
1419 | | /* check for supported conversionType values */ |
1420 | 0 | if(inStaticData->conversionType==UCNV_MBCS) { |
1421 | | /* swap MBCS data */ |
1422 | 0 | inMBCSHeader=(const _MBCSHeader *)inBytes; |
1423 | 0 | outMBCSHeader=(_MBCSHeader *)outBytes; |
1424 | |
|
1425 | 0 | if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { |
1426 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
1427 | 0 | length); |
1428 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1429 | 0 | return 0; |
1430 | 0 | } |
1431 | 0 | if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { |
1432 | 0 | mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; |
1433 | 0 | } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && |
1434 | 0 | ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& |
1435 | 0 | MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 |
1436 | 0 | ) { |
1437 | 0 | mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; |
1438 | 0 | noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); |
1439 | 0 | } else { |
1440 | 0 | udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", |
1441 | 0 | inMBCSHeader->version[0], inMBCSHeader->version[1]); |
1442 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1443 | 0 | return 0; |
1444 | 0 | } |
1445 | | |
1446 | 0 | uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); |
1447 | 0 | mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); |
1448 | 0 | mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); |
1449 | 0 | mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); |
1450 | 0 | mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); |
1451 | 0 | mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); |
1452 | 0 | mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); |
1453 | 0 | mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); |
1454 | | /* mbcsHeader.options have been read above */ |
1455 | |
|
1456 | 0 | extOffset=(int32_t)(mbcsHeader.flags>>8); |
1457 | 0 | outputType=(uint8_t)mbcsHeader.flags; |
1458 | 0 | if(noFromU && outputType==MBCS_OUTPUT_1) { |
1459 | 0 | udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); |
1460 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1461 | 0 | return 0; |
1462 | 0 | } |
1463 | | |
1464 | | /* make sure that the output type is known */ |
1465 | 0 | switch(outputType) { |
1466 | 0 | case MBCS_OUTPUT_1: |
1467 | 0 | case MBCS_OUTPUT_2: |
1468 | 0 | case MBCS_OUTPUT_3: |
1469 | 0 | case MBCS_OUTPUT_4: |
1470 | 0 | case MBCS_OUTPUT_3_EUC: |
1471 | 0 | case MBCS_OUTPUT_4_EUC: |
1472 | 0 | case MBCS_OUTPUT_2_SISO: |
1473 | 0 | case MBCS_OUTPUT_EXT_ONLY: |
1474 | | /* OK */ |
1475 | 0 | break; |
1476 | 0 | default: |
1477 | 0 | udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", |
1478 | 0 | outputType); |
1479 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1480 | 0 | return 0; |
1481 | 0 | } |
1482 | | |
1483 | | /* calculate the length of the MBCS data */ |
1484 | | |
1485 | | /* |
1486 | | * utf8Friendly MBCS files (mbcsHeader.version 4.3) |
1487 | | * contain an additional mbcsIndex table: |
1488 | | * uint16_t[(maxFastUChar+1)>>6]; |
1489 | | * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). |
1490 | | */ |
1491 | 0 | maxFastUChar=0; |
1492 | 0 | mbcsIndexLength=0; |
1493 | 0 | if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && |
1494 | 0 | mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 |
1495 | 0 | ) { |
1496 | 0 | maxFastUChar=(maxFastUChar<<8)|0xff; |
1497 | 0 | mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ |
1498 | 0 | } |
1499 | |
|
1500 | 0 | if(extOffset==0) { |
1501 | 0 | size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); |
1502 | 0 | if(!noFromU) { |
1503 | 0 | size+=(int32_t)mbcsHeader.fromUBytesLength; |
1504 | 0 | } |
1505 | | |
1506 | | /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ |
1507 | 0 | inExtIndexes=NULL; |
1508 | 0 | } else { |
1509 | | /* there is extension data after the base data, see ucnv_ext.h */ |
1510 | 0 | if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { |
1511 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", |
1512 | 0 | length); |
1513 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1514 | 0 | return 0; |
1515 | 0 | } |
1516 | | |
1517 | 0 | inExtIndexes=(const int32_t *)(inBytes+extOffset); |
1518 | 0 | size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); |
1519 | 0 | } |
1520 | | |
1521 | 0 | if(length>=0) { |
1522 | 0 | if(length<size) { |
1523 | 0 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
1524 | 0 | length); |
1525 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1526 | 0 | return 0; |
1527 | 0 | } |
1528 | | |
1529 | | /* copy the data for inaccessible bytes */ |
1530 | 0 | if(inBytes!=outBytes) { |
1531 | 0 | uprv_memcpy(outBytes, inBytes, size); |
1532 | 0 | } |
1533 | | |
1534 | | /* swap the MBCSHeader, except for the version field */ |
1535 | 0 | count=mbcsHeaderLength*4; |
1536 | 0 | ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, |
1537 | 0 | &outMBCSHeader->countStates, pErrorCode); |
1538 | |
|
1539 | 0 | if(outputType==MBCS_OUTPUT_EXT_ONLY) { |
1540 | | /* |
1541 | | * extension-only file, |
1542 | | * contains a base name instead of normal base table data |
1543 | | */ |
1544 | | |
1545 | | /* swap the base name, between the header and the extension data */ |
1546 | 0 | const char *inBaseName=(const char *)inBytes+count; |
1547 | 0 | char *outBaseName=(char *)outBytes+count; |
1548 | 0 | ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), |
1549 | 0 | outBaseName, pErrorCode); |
1550 | 0 | } else { |
1551 | | /* normal file with base table data */ |
1552 | | |
1553 | | /* swap the state table, 1kB per state */ |
1554 | 0 | offset=count; |
1555 | 0 | count=mbcsHeader.countStates*1024; |
1556 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1557 | 0 | outBytes+offset, pErrorCode); |
1558 | | |
1559 | | /* swap the toUFallbacks[] */ |
1560 | 0 | offset+=count; |
1561 | 0 | count=mbcsHeader.countToUFallbacks*8; |
1562 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1563 | 0 | outBytes+offset, pErrorCode); |
1564 | | |
1565 | | /* swap the unicodeCodeUnits[] */ |
1566 | 0 | offset=mbcsHeader.offsetToUCodeUnits; |
1567 | 0 | count=mbcsHeader.offsetFromUTable-offset; |
1568 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1569 | 0 | outBytes+offset, pErrorCode); |
1570 | | |
1571 | | /* offset to the stage 1 table, independent of the outputType */ |
1572 | 0 | offset=mbcsHeader.offsetFromUTable; |
1573 | |
|
1574 | 0 | if(outputType==MBCS_OUTPUT_1) { |
1575 | | /* SBCS: swap the fromU tables, all 16 bits wide */ |
1576 | 0 | count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; |
1577 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1578 | 0 | outBytes+offset, pErrorCode); |
1579 | 0 | } else { |
1580 | | /* otherwise: swap the stage tables separately */ |
1581 | | |
1582 | | /* stage 1 table: uint16_t[0x440 or 0x40] */ |
1583 | 0 | if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { |
1584 | 0 | count=0x440*2; /* for all of Unicode */ |
1585 | 0 | } else { |
1586 | 0 | count=0x40*2; /* only BMP */ |
1587 | 0 | } |
1588 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1589 | 0 | outBytes+offset, pErrorCode); |
1590 | | |
1591 | | /* stage 2 table: uint32_t[] */ |
1592 | 0 | offset+=count; |
1593 | 0 | count=mbcsHeader.offsetFromUBytes-offset; |
1594 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1595 | 0 | outBytes+offset, pErrorCode); |
1596 | | |
1597 | | /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ |
1598 | 0 | offset=mbcsHeader.offsetFromUBytes; |
1599 | 0 | count= noFromU ? 0 : mbcsHeader.fromUBytesLength; |
1600 | 0 | switch(outputType) { |
1601 | 0 | case MBCS_OUTPUT_2: |
1602 | 0 | case MBCS_OUTPUT_3_EUC: |
1603 | 0 | case MBCS_OUTPUT_2_SISO: |
1604 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1605 | 0 | outBytes+offset, pErrorCode); |
1606 | 0 | break; |
1607 | 0 | case MBCS_OUTPUT_4: |
1608 | 0 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
1609 | 0 | outBytes+offset, pErrorCode); |
1610 | 0 | break; |
1611 | 0 | default: |
1612 | | /* just uint8_t[], nothing to swap */ |
1613 | 0 | break; |
1614 | 0 | } |
1615 | | |
1616 | 0 | if(mbcsIndexLength!=0) { |
1617 | 0 | offset+=count; |
1618 | 0 | count=mbcsIndexLength; |
1619 | 0 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
1620 | 0 | outBytes+offset, pErrorCode); |
1621 | 0 | } |
1622 | 0 | } |
1623 | 0 | } |
1624 | | |
1625 | 0 | if(extOffset!=0) { |
1626 | | /* swap the extension data */ |
1627 | 0 | inBytes+=extOffset; |
1628 | 0 | outBytes+=extOffset; |
1629 | | |
1630 | | /* swap toUTable[] */ |
1631 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); |
1632 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); |
1633 | 0 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
1634 | | |
1635 | | /* swap toUUChars[] */ |
1636 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); |
1637 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); |
1638 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1639 | | |
1640 | | /* swap fromUTableUChars[] */ |
1641 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); |
1642 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); |
1643 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1644 | | |
1645 | | /* swap fromUTableValues[] */ |
1646 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); |
1647 | | /* same length as for fromUTableUChars[] */ |
1648 | 0 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
1649 | | |
1650 | | /* no need to swap fromUBytes[] */ |
1651 | | |
1652 | | /* swap fromUStage12[] */ |
1653 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); |
1654 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); |
1655 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1656 | | |
1657 | | /* swap fromUStage3[] */ |
1658 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); |
1659 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); |
1660 | 0 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
1661 | | |
1662 | | /* swap fromUStage3b[] */ |
1663 | 0 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); |
1664 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); |
1665 | 0 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
1666 | | |
1667 | | /* swap indexes[] */ |
1668 | 0 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); |
1669 | 0 | ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); |
1670 | 0 | } |
1671 | 0 | } |
1672 | 0 | } else { |
1673 | 0 | udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", |
1674 | 0 | inStaticData->conversionType); |
1675 | 0 | *pErrorCode=U_UNSUPPORTED_ERROR; |
1676 | 0 | return 0; |
1677 | 0 | } |
1678 | | |
1679 | 0 | return headerSize+(int32_t)staticDataSize+size; |
1680 | 0 | } |
1681 | | |
1682 | | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
1683 | | |
1684 | | #endif |